Skip to content

Commit

Permalink
Check that each value of cmenergies keyword is numeric (closes #18)
Browse files Browse the repository at this point in the history
* Move _get_major_version to Validator for SubmissionFileValidator use.
* Add check that each value of cmenergies are numeric or a range.
* Add test for check that cmenergies values are numeric or a range.
* Modify valid_submission.yaml for example of cmenergies as a range.
* Correct duplicate test_valid_submission_yaml_v1 function name.
* Add Python 3.7 and 3.8 to versions checked in Travis CI.
* Explain in README.rst how to pass 'data' for SubmissionFileValidator.
  • Loading branch information
GraemeWatt committed Jun 7, 2020
1 parent 97089c0 commit 1f98598
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 11 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ language: python
python:
- "2.7"
- "3.6"
- "3.7"
- "3.8"

cache:
- pip
Expand Down
14 changes: 14 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ for the error message lookup map.
data_file_validator.print_errors('data.yaml')
For the analogous case of the ``SubmissionFileValidator``:

.. code:: python
from hepdata_validator.submission_file_validator import SubmissionFileValidator
import yaml
submission_file_path = 'submission.yaml'
# convert a generator returned by yaml.safe_load_all into a list
docs = list(yaml.safe_load_all(open(submission_file_path, 'r')))
submission_file_validator = SubmissionFileValidator()
is_valid_submission_file = submission_file_validator.validate(file_path=submission_file_path, data=docs)
submission_file_validator.print_errors(submission_file_path)
An example `offline validation script <https://github.com/HEPData/hepdata-submission/blob/master/scripts/check.py>`_
uses the ``hepdata_validator`` package to validate the ``submission.yaml`` file and all YAML data files of a
Expand Down
8 changes: 8 additions & 0 deletions hepdata_validator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ def __init__(self, *args, **kwargs):
if self.schema_version not in VALID_SCHEMA_VERSIONS:
raise ValueError('Invalid schema version ' + self.schema_version)

def _get_major_version(self):
"""
Parses the major version of the validator.
:return: integer corresponding to the validator major version
"""
return int(self.schema_version.split('.')[0])

def _get_schema_filepath(self, schema_filename):
full_filepath = os.path.join(self.base_path,
self.schema_folder,
Expand Down
8 changes: 0 additions & 8 deletions hepdata_validator/data_file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,6 @@ def __init__(self, *args, **kwargs):
super(DataFileValidator, self).__init__(*args, **kwargs)
self.default_schema_file = self._get_schema_filepath(self.schema_name)

def _get_major_version(self):
"""
Parses the major version of the validator.
:return: integer corresponding to the validator major version
"""
return int(self.schema_version.split('.')[0])

def load_custom_schema(self, type, schema_file_path=None):
"""
Loads a custom schema, or will use a stored version for the given type if available.
Expand Down
23 changes: 23 additions & 0 deletions hepdata_validator/submission_file_validator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from jsonschema import validate, ValidationError
import os
import re
import yaml
from yaml.scanner import ScannerError

Expand Down Expand Up @@ -69,6 +70,8 @@ def validate(self, **kwargs):
validate(data_item, additional_file_section_schema)
else:
validate(data_item, submission_file_schema)
if self._get_major_version() > 0:
check_cmenergies(data_item)

except ValidationError as ve:
self.add_validation_message(
Expand Down Expand Up @@ -99,3 +102,23 @@ def validate(self, **kwargs):
data_file_handle.close()

return return_value


def check_cmenergies(data_item):
"""
Check that 'cmenergies' values are numeric unless a range like 1.7-4.7.
:param data_item: YAML document from submission.yaml
:return: raise ValidationError if not numeric
"""
for keyword in data_item['keywords']:
if keyword['name'] == 'cmenergies':
cmenergies = keyword['values']
for cmenergy in cmenergies:
try:
cmenergy = float(cmenergy)
except ValueError:
m = re.match(r'^\d+\.?\d?-\d+\.?\d?$', cmenergy)
if not m or len(cmenergies) > 1:
raise ValidationError("Invalid value (in GeV) for cmenergies: %s" % cmenergy,
instance=data_item)
10 changes: 10 additions & 0 deletions testsuite/test_data/invalid_cmenergies.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
# This is Table 2.
name: "Table 2"
location: Page 20 of preprint
description: The measured total cross sections. The first systematic uncertainty is the combined systematic uncertainty excluding luminosity, the second is the luminosity
keywords: # used for searching, possibly multiple values for each keyword
- { name: reactions, values: [P P --> Z0 Z0 X]}
- { name: observables, values: [SIG]}
- { name: cmenergies, values: [7000 GeV]}
data_file: data2.yaml
2 changes: 1 addition & 1 deletion testsuite/test_data/valid_submission.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ description: The measured total cross sections. The first systematic uncertaint
keywords: # used for searching, possibly multiple values for each keyword
- { name: reactions, values: [P P --> Z0 Z0 X]}
- { name: observables, values: [SIG]}
- { name: cmenergies, values: [7000]}
- { name: cmenergies, values: [7000.0-8000.0]}
data_file: data2.yaml
19 changes: 17 additions & 2 deletions testsuite/test_submission_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def test_invalid_schema_file():
VALID_SCHEMA_VERSIONS.pop()


def test_valid_submission_yaml_v1(validator_v1, data_path):
def test_data_schema_submission_yaml_v1(validator_v1, data_path):
"""
Tests the SubmissionFileValidator V1 against a valid YAML with a data_schema key
"""
Expand All @@ -230,4 +230,19 @@ def test_valid_submission_yaml_v1(validator_v1, data_path):
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
validator_v1.print_errors(file)

assert is_valid is True
assert is_valid is True


def test_invalid_cmenergies_submission_yaml_v1(validator_v1, data_path):
"""
Tests the SubmissionFileValidator V1 against an invalid cmenergies value
"""

file = os.path.join(data_path, 'invalid_cmenergies.yaml')

with open(file, 'r') as submission:
yaml_obj = yaml.load_all(submission, Loader=Loader)
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
validator_v1.print_errors(file)

assert is_valid is False

0 comments on commit 1f98598

Please sign in to comment.