Source code for dgitcore.datasets.validation

#!/usr/bin/env python

import os, sys, json
from ..plugins.common import plugins_get_mgr

#####################################################
# Exports
#####################################################

__all__ = ['validate']

#####################################################
# Validate content
#####################################################

def instantiate(repo, validator_name=None, filename=None, rulesfiles=None):
    """
    Instantiate the validation specification
    """

    default_validators = repo.options.get('validator', {})

    validators = {}
    if validator_name is not None:
        # Handle the case validator is specified..
        if validator_name in default_validators:
            validators = {
                validator_name : default_validators[validator_name]
            }
        else:
            validators = {
                validator_name : {
                    'files': [],
                    'rules': {},
                    'rules-files': []
                }
            }
    else:
        validators = default_validators

    #=========================================
    # Insert the file names
    #=========================================
    if filename is not None:
        matching_files = repo.find_matching_files([filename])
        if len(matching_files) == 0:
            print("Filename could not be found", filename)
            raise Exception("Invalid filename pattern")
        for v in validators:
            validators[v]['files'] = matching_files
    else:
        # Instantiate the files from the patterns specified
        for v in validators:
            if 'files' not in validators[v]:
                validators[v]['files'] = []
            elif len(validators[v]['files']) > 0:
                matching_files = repo.find_matching_files(validators[v]['files'])
                validators[v]['files'] = matching_files

    #=========================================
    # Insert the rules files..
    #=========================================
    if rulesfiles is not None:
        # Command lines...
        matching_files = repo.find_matching_files([rulesfiles])
        if len(matching_files) == 0:
            print("Could not find matching rules files ({}) for {}".format(rulesfiles,v))
            raise Exception("Invalid rules")
        for v in validators:
            validators[v]['rules-files'] = matching_files
    else:
        # Instantiate the files from the patterns specified
        for v in validators:
            if 'rules-files' not in validators[v]:
                validators[v]['rules-files'] = []
            else:
                rulesfiles = validators[v]['rules-files']
                matching_files = repo.find_matching_files(rulesfiles)
                if len(matching_files) == 0:
                    print("Could not find matching rules files ({}) for {}".format(rules,v))
                    raise Exception("Invalid rules")
                validators[v]['rules-files'] = matching_files

    return validators

[docs]def validate(repo, validator_name=None, filename=None, rulesfiles=None, args=[]): """ Validate the content of the files for consistency. Validators can look as deeply as needed into the files. dgit treats them all as black boxes. Parameters ---------- repo: Repository object validator_name: Name of validator, if any. If none, then all validators specified in dgit.json will be included. filename: Pattern that specifies files that must be processed by the validators selected. If none, then the default specification in dgit.json is used. rules: Pattern specifying the files that have rules that validators will use show: Print the validation results on the terminal Returns ------- status: A list of dictionaries, each with target file processed, rules file applied, status of the validation and any error message. """ mgr = plugins_get_mgr() # Expand the specification. Now we have full file paths validator_specs = instantiate(repo, validator_name, filename, rulesfiles) # Run the validators with rules files... allresults = [] for v in validator_specs: keys = mgr.search(what='validator',name=v)['validator'] for k in keys: validator = mgr.get_by_key('validator', k) result = validator.evaluate(repo, validator_specs[v], args) allresults.extend(result) return allresults