Source code for dgitcore.contrib.validators.regression_quality
#!/usr/bin/env python
import os, sys, glob2, json
from collections import OrderedDict
import re
from dgitcore.plugins.validator import ValidatorBase
from dgitcore.config import get_config
from dgitcore.helper import compute_sha256, cd
from dgitcore.exceptions import *
[docs]class RegressionQualityValidator(ValidatorBase):
"""
Validate repository metdata
Parameters
----------
"""
def __init__(self):
self.enable = 'y'
super(RegressionQualityValidator, self).__init__('regression-quality-validator',
'v0',
"Check R2 of regression model")
[docs] def config(self, what='get', params=None):
if what == 'get':
return {
'name': 'regression-quality-validator',
'nature': 'validator',
'variables': ['enable'],
'defaults': {
'enable': {
'value': "y",
"description": "Enable repository regression-quality checker"
},
}
}
else:
if (('regression-quality-validator' in params) and
'enable' in params['regression-quality-validator']):
self.enable = params['regression-quality-validator']['enable']
else:
self.enable = 'n'
[docs] def autooptions(self):
return OrderedDict([
("files", ["*.txt"]),
("rules", OrderedDict([
("min-r2", 0.25)
])),
("rules-files",[])
])
[docs] def evaluate(self, repo, spec, args):
"""
Evaluate the files identified for checksum.
"""
status = []
# Do we have to any thing at all?
if len(spec['files']) == 0:
return status
with cd(repo.rootdir):
rules = None
if 'rules-files' in spec and len(spec['rules-files']) > 0:
rulesfiles = spec['rules-files']
rules = dict([(f, json.loads(open(f).read())) for f in rulesfiles])
elif 'rules' in spec:
rules = {
'inline': spec['rules']
}
if rules is None or len(rules) == 0:
print("Regression quality validation has been enabled but no rules file has been specified")
print("Example: { 'min-r2': 0.25 }. Put this either in file or in dgit.json")
raise InvalidParameters("Regression quality checking rules missing")
files = dict([(f, open(f).read()) for f in spec['files']])
for r in rules:
if 'min-r2' not in rules[r]:
continue
minr2 = float(rules[r]['min-r2'])
for f in files:
match = re.search(r"R-squared:\s+(\d.\d+)", files[f])
if match is None:
status.append({
'target': f,
'validator': self.name,
'description': self.description,
'rules': r,
'status': "ERROR",
'message': "Invalid model output"
})
else:
r2 = match.group(1)
r2 = float(r2)
if r2 > minr2:
status.append({
'target': f,
'validator': self.name,
'description': self.description,
'rules': r,
'status': "OK",
'message': "Acceptable R2"
})
else:
status.append({
'target': f,
'validator': self.name,
'description': self.description,
'rules': r,
'status': "ERROR",
'message': "R2 is too low"
})
return status
def setup(mgr):
obj = RegressionQualityValidator()
mgr.register('validator', obj)