testing.pipeline_class_test
PyTest suite for testing all module specification:
deeptest_graph(pipeline, definition, path='ROOT')
Recursively compare a pipeline object to a serialized definition [EXPERIMENTAL]
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pipeline |
json |
source for the comparison |
required |
definition |
json |
target/reference for the comparison |
required |
path |
str |
current path of the comparison (in the json tree) |
'ROOT' |
Returns:
Type | Description |
---|---|
|
None |
Source code in shrike/pipeline/testing/pipeline_class_test.py
def deeptest_graph(pipeline, definition, path="ROOT"):
"""Recursively compare a pipeline object to a serialized definition [EXPERIMENTAL]
Args:
pipeline (json): source for the comparison
definition (json): target/reference for the comparison
path (str): current path of the comparison (in the json tree)
Returns:
None
"""
if definition is None:
# no definition provided, let's stop inspection at this path
log.info(f"deeptest_graph @ {path}: nop, definition is None")
return
# is inspecting a dictionary structure, iterate on keys
if isinstance(pipeline, dict) and isinstance(definition, dict):
log.info(f"deeptest_graph @ {path}: checking dictionary")
for key in definition:
assert (
key in pipeline
), f"pipeline graph does not have key {key} at level @ {path}"
# ignoring all ids
if key in {"id", "node_id", "module_id", "dataset_id"}:
log.info(f"deeptest_graph @ {path}: ignore id key {key}")
return
if (
key in {"run_settings", "compute_run_settings"}
and definition[key] is not None
):
# this is a specific kind of key containing a list we're transforming into a dict
log.info(f"deeptest_graph @ {path}: refactoring key {key} as dict")
pipeline_run_settings = dict(
[(entry["name"], entry) for entry in pipeline[key]]
)
definition_run_settings = dict(
[(entry["name"], entry) for entry in definition[key]]
)
deeptest_graph(
pipeline_run_settings,
definition_run_settings,
path + ".(runsettings)" + key,
)
else:
deeptest_graph(pipeline[key], definition[key], path + "." + key)
return
# is inspecting a list structure, each element MUST passed
# NOTE: this should be improved in case list can be shuffled ?
if isinstance(pipeline, list) and isinstance(definition, list):
log.info(f"deeptest_graph @ {path}: checking list")
for key, entry in enumerate(definition):
deeptest_graph(pipeline[key], entry, path + "[" + str(key) + "]")
return
# if anything else (int, str, unknown), just test plain equality
log.info(f"deeptest_graph @ {path}: checking equality {pipeline} == {definition}")
assert pipeline == definition, f"values mismatch @ {path}"
deeptest_graph_comparison(pipeline_export_file, pipeline_definition_file)
Compare a pipeline object to a serialized definition [EXPERIMENTAL]
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pipeline_export_file |
str |
path to pipeline exported file |
required |
pipeline_definition_file |
str |
path to reference file |
required |
Returns:
Type | Description |
---|---|
|
None |
Source code in shrike/pipeline/testing/pipeline_class_test.py
def deeptest_graph_comparison(pipeline_export_file, pipeline_definition_file):
"""Compare a pipeline object to a serialized definition [EXPERIMENTAL]
Args:
pipeline_export_file (str): path to pipeline exported file
pipeline_definition_file (str): path to reference file
Returns:
None
"""
# checks the exported file in temp dir
assert os.path.isfile(
pipeline_export_file
), f"deeptest_graph_comparison() expects a file as first argument but {pipeline_export_file} does not exist."
assert os.path.isfile(
pipeline_definition_file
), f"deeptest_graph_comparison() expects a file as second argument but {pipeline_definition_file} does not exist."
# read the exported graph
with open(pipeline_export_file, "r") as export_file:
pipeline = json.loads(export_file.read())
assert (
pipeline is not None
), f"deeptest_graph_comparison() expects first argument to be a parsable json, instead it found None"
with open(pipeline_definition_file, "r") as definition_file:
definition = json.loads(definition_file.read())
assert (
definition is not None
), f"deeptest_graph_comparison() expects first argument to be a parsable json, instead it found None"
deeptest_graph(pipeline, definition)
get_config_class(pipeline_class)
Test if the get_arg_parser() method is in there and behaves correctly
Source code in shrike/pipeline/testing/pipeline_class_test.py
def get_config_class(pipeline_class):
"""Test if the get_arg_parser() method is in there and behaves correctly"""
try:
config_class = pipeline_class.get_config_class()
except:
assert (
False
), "getting config class for pipeline class {} resulted in an exception: {}".format(
pipeline_class.__name__, traceback.format_exc()
)
pipeline_required_modules(pipeline_class)
Test if the required_modules() returns the right list of modules with all required keys
Source code in shrike/pipeline/testing/pipeline_class_test.py
def pipeline_required_modules(pipeline_class):
"""Test if the required_modules() returns the right list of modules with all required keys"""
modules_manifest = pipeline_class.required_modules()
assert isinstance(
modules_manifest, dict
), "required_modules() must return a dictionary."
error_log = []
for module_key, module_description in modules_manifest.items():
if not isinstance(module_description, dict):
error_log.append(
f"values in dictionary returned by required_modules() must be dictionaries (under key={module_key}, found value of type={module_description.__name__})"
)
continue
if "yaml_spec" not in module_description:
error_log.append(
f"In pipeline class module {pipeline_class.__name__},"
+ f" module under key={module_key} (in required_modules() function)"
+ " does not provide any yaml_spec key."
+ " You need to give such a yaml_spec path before creating your pull request"
+ " so that we're able to consume this module when running pre-merge tests (detonation chamber)"
)
if "remote_module_name" not in module_description:
error_log.append(
f"In pipeline class module {pipeline_class.__name__},"
+ f" module under key={module_key} (in required_modules() function)"
+ " does not provide any remote_module_name."
+ " You need to give such a name before creating your pull request"
+ " so that we're able to consume this module when running in production."
)
# if "namespace" not in module_description:
# error_log.append(
# f"In pipeline class module {pipeline_class.__name__},"
# + f" module under key={module_key} (in required_modules() function)"
# + " does not provide any namespace."
# )
# TODO : verify if the version exists or is in the yaml spec?
assert not (error_log), (
f"In pipeline class module {pipeline_class.__name__}, validation of the dictionary returned by required_modules() method shows errors:\n"
+ "\n".join(error_log)
)
pipeline_required_subgraphs(pipeline_class)
Tests if the required_subgraphs() returns the right list of modules with all requires keys
Source code in shrike/pipeline/testing/pipeline_class_test.py
def pipeline_required_subgraphs(pipeline_class):
"""Tests if the required_subgraphs() returns the right list of modules with all requires keys"""
subgraphs_manifest = pipeline_class.required_subgraphs()
assert isinstance(
subgraphs_manifest, dict
), "required_subgraphs() must return a dictionary."
error_log = []
for subgraph_key, subgraph_class in subgraphs_manifest.items():
if not issubclass(subgraph_class, AMLPipelineHelper):
error_log.append(
f"In pipeline class module {pipeline_class.__name__}, values in dictionary returned by required_subgraphs() must be subclass of AMLPipelineHelper (under key={subgraph_key}, found object {subgraph_class.__name__})"
)
continue
assert not (error_log), (
f"In pipeline class module {pipeline_class.__name__}, validation of the dictionary returned by required_subgraphs() shows errors: "
+ "\n".join(error_log)
)