Source code for dkutils.datakitchen_api.tests_utils

import csv
import json
import logging

from dataclasses import dataclass
from pathlib import Path
from typing import List

from dkutils.constants import VALID_TEST_DIRECTORIES

logger = logging.getLogger(__name__)


[docs]@dataclass class TestInfo: test: str datestamp: str description: str kitchen: str recipe: str node: str failure_action: str variable: str metric: str comparison: str expression: str
[docs] @classmethod def keys(cls): return cls.__dataclass_fields__.keys()
[docs] def get(self, field_name, default=None): return getattr(self, field_name, default)
[docs]def is_valid_test_directory(base_path, file_depth) -> bool: """ Only node directories and their immediate subdirectories may contain tests. This function takes a recipe subdirectory and returns True if it may contain tests or False otherwise. Parameters ---------- base_path : Path Recipe subdirectory path that may or may not contain tests. This path is relative to a recipe's root directory (e.g. Recipe/Node/data_sinks). file_depth : int Depth of directory containing recipe files (e.g. Recipe/Node/notebook.json has file_depth==2). Tests may only be defined in files in the root of a node directory or direct subdirectories (i.e. file_depth == 2 or 3) Returns ------- boolean True if the recipe base_path may contain tests, False otherwise. """ if file_depth < 2 or file_depth > 3: return False # base_path starts with the recipe name, followed by node name (e.g. Recipe/Node or # Recipe/resources) node_name = base_path.parts[1] # Ignore the resources directory if node_name == 'resources': return False # Only certain node subdirectories may contain tests return file_depth != 3 or base_path.parts[2] in VALID_TEST_DIRECTORIES
[docs]def is_valid_test_file(file_path, file_depth) -> bool: """ Only certain files may contain tests. This function returns True if the provided file may contain tests or False otherwise. Parameters ---------- file_path : Path Path to a recipe file relative to the root of the recipe (e.g. Recipe/Node/notebook.json) file_depth : int Subdirectory depth of recipe file (e.g. Recipe/Node/notebook.json file depth is 2) Returns ------- boolean True if the file may contain tests, otherwise False. """ if file_path.name == 'notebook.json': return True return file_depth == 3 and file_path.suffix == '.json'
[docs]def get_recipe_test_paths(client, kitchen=None, recipe=None) -> List[str]: """ Return a list of paths to all recipe files that potentially contain tests. The kitchen and and recipe that are interrogated for tests are derived from the client, unless otherwise specified as optional input arguments. If kitchen and/or recipe arguments are provided, they are set accordingly on the provided client. Parameters ---------- client : DataKitchenClient DataKitchenClient instance with kitchen and/or recipe set accordingly, unless optional kitchen and/or recipe arguments are provided kitchen : str, optional If None, use the kitchen currently set on the client, otherwise set the kitchen accordingly. recipe : str, optional If None, use the recipe currently set on the client, otherwise, set the recipe accordingly. Returns ------- list List of paths to all recipe files that potentially contain tests. """ if kitchen: client.kitchen = kitchen if recipe: client.recipe = recipe logger.info(f'Finding test files in recipe: {client.recipe}') json_response = client.get_recipe(recipe_files=['description.json'], include_recipe_tree=True) test_paths = [] for recipe_contents in json_response['recipe-tree'].values(): for file_dir, files in recipe_contents.items(): base_path = Path(file_dir) file_depth = len(base_path.parts) if is_valid_test_directory(base_path, file_depth): for file_info in files: file_path = base_path / file_info["filename"] if is_valid_test_file(file_path, file_depth): # Remove recipe name from start of file path and path separator test_paths.append(str(file_path)[len(client.recipe) + 1:]) logger.info(f'Finished finding test files in recipe: {client.recipe}') return test_paths
[docs]def extract_tests_from_files(client, datestamp, test_paths, kitchen=None, recipe=None) -> List[TestInfo]: """ Extract tests from the provided test_paths. The kitchen and recipe are derived from the client, unless otherwise specified as optional input arguments. If kitchen and/or recipe arguments are provided, they are set accordingly on the provided client. Parameters ---------- client : DataKitchenClient DataKitchenClient instance with kitchen and/or recipe set accordingly, unless optional kitchen and/or recipe arguments are provided datestamp : datetime Datestamp indicating approximate time when tests were extracted. test_paths : list List of paths to all recipe files that potentially contain tests. kitchen : str, optional If None, use the kitchen currently set on the client, otherwise set the kitchen accordingly. recipe : str, optional If None, use the recipe currently set on the client, otherwise, set the recipe accordingly. Returns ------- list List of TestInfo objects, one per test found in the provided test_paths. """ if kitchen: client.kitchen = kitchen if recipe: client.recipe = recipe logger.info(f'Extracting tests from files in recipe: {client.recipe}') try: json_response = client.get_recipe(recipe_files=test_paths, include_recipe_tree=False) except Exception: logger.error(f'Failed to retrieve recipe files containing tests: {test_paths}') raise test_infos = [] for recipe_contents in json_response['recipes'].values(): for file_dir, files in recipe_contents.items(): base_path = Path(file_dir) node_name = base_path.parts[1] logger.info(f'Processing node: {node_name}') for file_info in files: if 'json' not in file_info: logger.info( f"JSON field not present in node {node_name}'s file {file_info['filename']}" ) continue json_contents = json.loads(file_info['json']) try: if 'tests' in json_contents: for test, fields in json_contents['tests'].items(): description = fields['description'] if 'description' in fields else '' metric = '' comparison = '' expression = '' if isinstance(fields['test-logic'], dict): metric = fields['test-logic']['test-metric'] comparison = fields['test-logic']['test-compare'] else: expression = fields['test-logic'] test_infos.append( TestInfo( **{ 'test': test, 'datestamp': datestamp, 'description': description, 'kitchen': client.kitchen, 'recipe': client.recipe, 'node': node_name, 'failure_action': fields['action'], 'variable': fields['test-variable'], 'metric': metric, 'comparison': comparison, 'expression': expression, } ) ) except Exception as e: logger.info( f'Failed to parse tests from {base_path / file_info["filename"]}: {str(e)}' ) logger.info(f'Finished processing node: {node_name}') logger.info(f'Finished extracting tests from files in recipe: {client.recipe}') return test_infos
[docs]def get_test_infos(client, datestamp, recipes, kitchen=None) -> List[TestInfo]: """ For a set of recipes in a kitchen, retrieve all the defined tests and their associated metadata. Return a list of test_info dictionaries, one per test. Parameters ---------- client : DataKitchenClient DataKitchenClient instance with kitchen set accordingly, unless optional kitchen argument is provided datestamp : datetime Datestamp indicating approximate time when tests were extracted. recipes : list List of paths to all recipe files that potentially contain tests. kitchen : str, optional If None, use the kitchen currently set on the client, otherwise set the kitchen accordingly. Returns ------- list List of TestInfo objects, one per test found in the provided kitchen recipes. """ if kitchen: client.kitchen = kitchen logger.info(f'Finding tests in kitchen: {kitchen}') test_infos = [] for recipe in recipes: test_paths = get_recipe_test_paths(client, recipe=recipe) test_infos.extend(extract_tests_from_files(client, datestamp, test_paths)) return test_infos
[docs]def write_test_infos_csv(test_infos, output_csv_path) -> None: """ Write a list of TestInfo objects to a CSV file. Parameters ---------- test_infos : list List of TestInfo object output_csv_path : str Output CSV file path """ with open(output_csv_path, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=TestInfo.keys()) writer.writeheader() writer.writerows(test_infos)