From 266d1d2dd1c429e56ff3fdcd0e46546151b2f72d Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Wed, 30 Oct 2024 22:02:28 +1100 Subject: [PATCH 1/3] dev: update dependencies - no formatting changes, just keeping pace with ruff really --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 85d1aa4f..f0b7476d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,8 +18,8 @@ dependencies = [ dev = [ "formencode==2.1.0", # Compare XML "lxml==5.3.0", # XPath test expressions - "psutil==6.0.0", # Process info for performance tests - "ruff==0.6.4", # Format and lint + "psutil==6.1.0", # Process info for performance tests + "ruff==0.7.1", # Format and lint ] [project.urls] From b81e816a90b5a7b3b0e90194cfed46b342585e0e Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Wed, 30 Oct 2024 22:13:33 +1100 Subject: [PATCH 2/3] chg: add all choices in the XLSForm to the XForm even if not referenced - allows users to include reference data for expressions via choices without needing a hidden dummy question. Includes all choices rather than parsing expressions to find references, because it seems unlikely that both a XLSForm would include completely irrelevant choices and have so many such choices that the XForm document size is a problem. - test_xls2json_xls.py - moved loop test into test_loop.py and modified to use the same style as the existing test - test_loop.py - use xls2xform.convert rather than internals directly --- pyxform/xls2json.py | 15 +-- tests/test_choices_sheet.py | 21 ++++ .../test_expected_output/repeat_date_test.xml | 100 +++++++++++++++++- tests/test_expected_output/simple_loop.json | 57 ---------- tests/test_expected_output/xml_escaping.xml | 100 +++++++++++++++++- tests/test_loop.py | 76 +++++++++++-- tests/test_xls2json_xls.py | 12 --- 7 files changed, 289 insertions(+), 92 deletions(-) delete mode 100644 tests/test_expected_output/simple_loop.json diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 1e340b35..3b260e22 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -520,13 +520,14 @@ def workbook_to_json( use_double_colons=use_double_colons, default_language=default_language, ) - combined_lists = group_dictionaries_by_key( + choices = group_dictionaries_by_key( list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S ) + if 0 < len(choices): + json_dict[constants.CHOICES] = choices # To combine the warning into one message, the check for missing choices translation # columns is run with Survey sheet below. - choices = combined_lists # Make sure all the options have the required properties: warnedabout = set() for list_name, options in choices.items(): @@ -1242,16 +1243,6 @@ def workbook_to_json( choice_filter=row.get(constants.CHOICE_FILTER), file_extension=file_extension, ) - # Add the choice to the survey choices if it's being used in an itemset. - if ( - constants.ITEMSET in new_json_dict - and new_json_dict[constants.ITEMSET] == list_name - and list_name in choices - ): - # Initialise choices output if none added already. - if constants.CHOICES not in json_dict: - json_dict[constants.CHOICES] = {} - json_dict[constants.CHOICES][list_name] = choices[list_name] # Code to deal with table_list appearance flags # (for groups of selects) diff --git a/tests/test_choices_sheet.py b/tests/test_choices_sheet.py index 114ccfc3..084b9b56 100644 --- a/tests/test_choices_sheet.py +++ b/tests/test_choices_sheet.py @@ -133,3 +133,24 @@ def test_choices_extra_columns_output_order_matches_xlsform(self): """, ], ) + + def test_unreferenced_lists_included_in_output(self): + """Should find all specified choice lists in the output, even if unreferenced.""" + md = """ + | survey | + | | type | name | label | + | | select_one choices | a | A | + | choices | + | | list_name | name | label | + | | choices | 1 | Y | + | | choices | 2 | N | + | | choices2 | 1 | Y | + | | choices2 | 2 | N | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpc.model_instance_choices_label("choices", (("1", "Y"), ("2", "N"))), + xpc.model_instance_choices_label("choices2", (("1", "Y"), ("2", "N"))), + ], + ) diff --git a/tests/test_expected_output/repeat_date_test.xml b/tests/test_expected_output/repeat_date_test.xml index 2212f014..0840bb55 100644 --- a/tests/test_expected_output/repeat_date_test.xml +++ b/tests/test_expected_output/repeat_date_test.xml @@ -3,6 +3,22 @@ repeat_date_test + + + + jr://images/a.jpg + + + jr://images/b.jpg + + + jr://images/happy.jpg + + + jr://images/sad.jpg + + + @@ -21,15 +37,95 @@ + + + + a + + + + b + + + + c + + + + d + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + 7 + + + + 8 + + + + - yes + - no + + + + + + + + a_b-0 + a + + + a_b-1 + b + + + + + + + happy_sad-0 + happy + + + happy_sad-1 + sad diff --git a/tests/test_expected_output/simple_loop.json b/tests/test_expected_output/simple_loop.json deleted file mode 100644 index 6f2bba94..00000000 --- a/tests/test_expected_output/simple_loop.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "name": "simple_loop", - "title": "simple_loop", - "sms_keyword": "simple_loop", - "default_language": "default", - "id_string": "simple_loop", - "type": "survey", - "children": [ - { - "children": [ - { - "type": "integer", - "name": "count", - "label": { - "English": "How many are there in this group?" - } - } - ], - "type": "loop", - "name": "my_table", - "columns": [ - { - "name": "col1", - "label": { - "English": "Column 1" - } - }, - { - "name": "col2", - "label": { - "English": "Column 2" - } - } - ], - "label": { - "English": "My Table" - } - }, - { - "control": { - "bodyless": true - }, - "type": "group", - "name": "meta", - "children": [ - { - "bind": { - "readonly": "true()", - "jr:preload": "uid" - }, - "type": "calculate", - "name": "instanceID" - } - ] - } - ] -} \ No newline at end of file diff --git a/tests/test_expected_output/xml_escaping.xml b/tests/test_expected_output/xml_escaping.xml index dd6b849d..f74eb588 100644 --- a/tests/test_expected_output/xml_escaping.xml +++ b/tests/test_expected_output/xml_escaping.xml @@ -3,6 +3,22 @@ xml_escaping + + + + jr://images/a.jpg + + + jr://images/b.jpg + + + jr://images/happy.jpg + + + jr://images/sad.jpg + + + @@ -13,15 +29,95 @@ + + + + a + + + + b + + + + c + + + + d + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + 7 + + + + 8 + + + + - yes + - no + + + + + + + + a_b-0 + a + + + a_b-1 + b + + + + + + + happy_sad-0 + happy + + + happy_sad-1 + sad diff --git a/tests/test_loop.py b/tests/test_loop.py index d4ca0d61..0e8689f4 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -4,18 +4,71 @@ from unittest import TestCase -from pyxform.builder import create_survey_from_xls +from pyxform.xls2xform import convert from tests import utils -class LoopTests(TestCase): +class TestLoop(TestCase): + maxDiff = None + + def test_table(self): + path = utils.path_to_text_fixture("simple_loop.xls") + observed = convert(xlsform=path)._pyxform + + expected = { + "name": "data", + "title": "simple_loop", + "sms_keyword": "simple_loop", + "default_language": "default", + "id_string": "simple_loop", + "type": "survey", + "children": [ + { + "children": [ + { + "type": "integer", + "name": "count", + "label": {"English": "How many are there in this group?"}, + } + ], + "type": "loop", + "name": "my_table", + "columns": [ + {"name": "col1", "label": {"English": "Column 1"}}, + {"name": "col2", "label": {"English": "Column 2"}}, + ], + "label": {"English": "My Table"}, + }, + { + "control": {"bodyless": True}, + "type": "group", + "name": "meta", + "children": [ + { + "bind": {"readonly": "true()", "jr:preload": "uid"}, + "type": "calculate", + "name": "instanceID", + } + ], + }, + ], + "choices": { + "my_columns": [ + {"label": {"English": "Column 1"}, "name": "col1"}, + {"label": {"English": "Column 2"}, "name": "col2"}, + ] + }, + } + self.assertEqual(expected, observed) + def test_loop(self): path = utils.path_to_text_fixture("another_loop.xls") - survey = create_survey_from_xls(path, "another_loop") - self.maxDiff = None - expected_dict = { - "name": "another_loop", + observed = convert(xlsform=path)._survey.to_json_dict() + observed.pop("_translations", None) + observed.pop("_xpath", None) + expected = { + "name": "data", "id_string": "another_loop", "sms_keyword": "another_loop", "default_language": "default", @@ -89,5 +142,14 @@ def test_loop(self): "type": "group", }, ], + "choices": { + "types": [ + {"label": {"English": "Car", "French": "Voiture"}, "name": "car"}, + { + "label": {"English": "Motorcycle", "French": "Moto"}, + "name": "motor_cycle", + }, + ] + }, } - self.assertEqual(survey.to_json_dict(), expected_dict) + self.assertEqual(expected, observed) diff --git a/tests/test_xls2json_xls.py b/tests/test_xls2json_xls.py index 349bc3ab..b362a6a6 100644 --- a/tests/test_xls2json_xls.py +++ b/tests/test_xls2json_xls.py @@ -103,18 +103,6 @@ def test_text_and_integer(self): self.assertEqual(x.to_json_dict()["children"], expected_dict) - def test_table(self): - filename = "simple_loop.xls" - path_to_excel_file = Path(example_xls.PATH) / filename - expected_output_path = Path(test_expected_output.PATH) / ( - path_to_excel_file.stem + ".json" - ) - result = convert( - xlsform=path_to_excel_file, warnings=[], form_name=path_to_excel_file.stem - ) - with open(expected_output_path, encoding="utf-8") as expected: - self.assertEqual(json.load(expected), result._pyxform) - def test_choice_filter_choice_fields(self): """ Test that the choice filter fields appear on children field of json From 4f1116e6b2876ecb123db25ae4e9bee6518414fc Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Fri, 1 Nov 2024 01:57:16 +1100 Subject: [PATCH 3/3] chg: simplify validation of choices and tag names - xls2json.py: - move choices checks to new validator module - include row reference in all errors/warnings. To have a consistent error message structure, each duplicate choice gets a copy of the message (in one error) rather than being smushed into one message. - xlsparseutils.py - replace separate regex for xml tags with expression parser and use that instead, for consistent parsing rules and to use cache - move the remaining sheet misspellings check to new validator module - expression.py - fix issue where a name containing "or" or "and" was parsed as an operator rather than a name. These tokens are only valid when surrounded by spaces so the regex is updated accordingly. - add tests for positive and negative match cases for tag names. --- pyxform/entities/entities_parsing.py | 7 +- pyxform/parsing/expression.py | 18 ++- pyxform/survey_element.py | 4 +- pyxform/validators/pyxform/choices.py | 59 ++++++++++ pyxform/validators/pyxform/question_types.py | 6 +- .../pyxform/sheet_misspellings.py} | 13 --- pyxform/xls2json.py | 108 ++++++------------ tests/parsing/__init__.py | 0 tests/parsing/test_expression.py | 49 ++++++++ tests/test_fields.py | 14 +-- tests/test_sheet_columns.py | 4 +- tests/test_xlsform_spec.py | 10 +- tests/xform_test_case/test_bugs.py | 7 +- 13 files changed, 189 insertions(+), 110 deletions(-) create mode 100644 pyxform/validators/pyxform/choices.py rename pyxform/{xlsparseutils.py => validators/pyxform/sheet_misspellings.py} (76%) create mode 100644 tests/parsing/__init__.py create mode 100644 tests/parsing/test_expression.py diff --git a/pyxform/entities/entities_parsing.py b/pyxform/entities/entities_parsing.py index 017b6751..34f44e7c 100644 --- a/pyxform/entities/entities_parsing.py +++ b/pyxform/entities/entities_parsing.py @@ -3,7 +3,8 @@ from pyxform import constants as const from pyxform.aliases import yes_no from pyxform.errors import PyXFormError -from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag +from pyxform.parsing.expression import is_xml_tag +from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings EC = const.EntityColumns @@ -73,7 +74,7 @@ def get_validated_dataset_name(entity): f"Invalid entity list name: '{dataset}'. Names may not include periods." ) - if not is_valid_xml_tag(dataset): + if not is_xml_tag(dataset): if isinstance(dataset, bytes): dataset = dataset.decode("utf-8") @@ -118,7 +119,7 @@ def validate_entity_saveto( f"{error_start} the entity property name '{save_to}' starts with reserved prefix {const.ENTITIES_RESERVED_PREFIX}." ) - if not is_valid_xml_tag(save_to): + if not is_xml_tag(save_to): if isinstance(save_to, bytes): save_to = save_to.decode("utf-8") diff --git a/pyxform/parsing/expression.py b/pyxform/parsing/expression.py index af919859..29df4fa1 100644 --- a/pyxform/parsing/expression.py +++ b/pyxform/parsing/expression.py @@ -37,9 +37,9 @@ def get_expression_lexer() -> re.Scanner: "TIME": time_regex, "NUMBER": r"-?\d+\.\d*|-?\.\d+|-?\d+", # https://www.w3.org/TR/1999/REC-xpath-19991116/#exprlex - "OPS_MATH": r"[\*\+\-]|mod|div", + "OPS_MATH": r"[\*\+\-]| mod | div ", "OPS_COMP": r"\=|\!\=|\<|\>|\<=|>=", - "OPS_BOOL": r"and|or", + "OPS_BOOL": r" and | or ", "OPS_UNION": r"\|", "OPEN_PAREN": r"\(", "CLOSE_PAREN": r"\)", @@ -107,3 +107,17 @@ def is_single_token_expression(expression: str, token_types: Iterable[str]) -> b return True else: return False + + +def is_pyxform_reference(value: str) -> bool: + """ + Does the input string contain only a valid Pyxform reference? e.g. ${my_question} + """ + return is_single_token_expression(expression=value, token_types=("PYXFORM_REF",)) + + +def is_xml_tag(value: str) -> bool: + """ + Does the input string contain only a valid XML tag / element name? + """ + return is_single_token_expression(expression=value, token_types=("NAME",)) diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index 504e83d0..3468f92f 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -11,6 +11,7 @@ from pyxform import aliases as alias from pyxform import constants as const from pyxform.errors import PyXFormError +from pyxform.parsing.expression import is_xml_tag from pyxform.question_type_dictionary import QUESTION_TYPE_DICT from pyxform.utils import ( BRACKETED_TAG_REGEX, @@ -19,7 +20,6 @@ node, ) from pyxform.xls2json import print_pyobj_to_json -from pyxform.xlsparseutils import is_valid_xml_tag if TYPE_CHECKING: from pyxform.utils import DetachableElement @@ -140,7 +140,7 @@ def add_children(self, children): SUPPORTED_MEDIA = ("image", "big-image", "audio", "video") def validate(self): - if not is_valid_xml_tag(self.name): + if not is_xml_tag(self.name): invalid_char = re.search(INVALID_XFORM_TAG_REGEXP, self.name) raise PyXFormError( f"The name '{self.name}' contains an invalid character '{invalid_char.group(0)}'. Names {const.XML_IDENTIFIER_ERROR_MESSAGE}" diff --git a/pyxform/validators/pyxform/choices.py b/pyxform/validators/pyxform/choices.py new file mode 100644 index 00000000..c97298e7 --- /dev/null +++ b/pyxform/validators/pyxform/choices.py @@ -0,0 +1,59 @@ +from pyxform import constants +from pyxform.errors import PyXFormError + +INVALID_NAME = ( + "[row : {row}] On the 'choices' sheet, the 'name' value is invalid. " + "Choices must have a name. " + "Learn more: https://xlsform.org/en/#setting-up-your-worksheets" +) +INVALID_LABEL = ( + "[row : {row}] On the 'choices' sheet, the 'label' value is invalid. " + "Choices should have a label. " + "Learn more: https://xlsform.org/en/#setting-up-your-worksheets" +) +INVALID_HEADER = ( + "[row : 1] On the 'choices' sheet, the '{column}' value is invalid. " + "Column headers must not be empty and must not contain spaces. " + "Learn more: https://xlsform.org/en/#setting-up-your-worksheets" +) +INVALID_DUPLICATE = ( + "[row : {row}] On the 'choices' sheet, the 'name' value is invalid. " + "Choice names must be unique for each choice list. " + "If this is intentional, use the setting 'allow_choice_duplicates'. " + "Learn more: https://xlsform.org/#choice-names." +) + + +def validate_headers( + headers: tuple[tuple[str, ...], ...], warnings: list[str] +) -> list[str]: + def check(): + for header in headers: + header = header[0] + if header != constants.LIST_NAME_S and (" " in header or header == ""): + warnings.append(INVALID_HEADER.format(column=header)) + yield header + + return list(check()) + + +def validate_choices( + options: list[dict], warnings: list[str], allow_duplicates: bool = False +) -> None: + seen_options = set() + duplicate_errors = [] + for option in options: + if "name" not in option: + raise PyXFormError(INVALID_NAME.format(row=option["__row"])) + elif "label" not in option: + warnings.append(INVALID_LABEL.format(row=option["__row"])) + + if not allow_duplicates: + name = option["name"] + if name in seen_options: + duplicate_errors.append(INVALID_DUPLICATE.format(row=option["__row"])) + else: + seen_options.add(name) + + if 0 < len(duplicate_errors): + raise PyXFormError("\n".join(duplicate_errors)) diff --git a/pyxform/validators/pyxform/question_types.py b/pyxform/validators/pyxform/question_types.py index 7ec18edd..3c62673f 100644 --- a/pyxform/validators/pyxform/question_types.py +++ b/pyxform/validators/pyxform/question_types.py @@ -3,7 +3,7 @@ """ from pyxform.errors import PyXFormError -from pyxform.parsing.expression import is_single_token_expression +from pyxform.parsing.expression import is_pyxform_reference from pyxform.utils import PYXFORM_REFERENCE_REGEX BACKGROUND_GEOPOINT_CALCULATION = "[row : {r}] For 'background-geopoint' questions, the 'calculation' column must be empty." @@ -25,9 +25,7 @@ def validate_background_geopoint_calculation(row: dict, row_num: int) -> bool: def validate_background_geopoint_trigger(row: dict, row_num: int) -> bool: """A background-geopoint must have a trigger.""" - if not row.get("trigger", False) or not is_single_token_expression( - expression=row["trigger"], token_types=["PYXFORM_REF"] - ): + if not row.get("trigger", False) or not is_pyxform_reference(value=row["trigger"]): raise PyXFormError(TRIGGER_INVALID.format(r=row_num, t=row["type"])) return True diff --git a/pyxform/xlsparseutils.py b/pyxform/validators/pyxform/sheet_misspellings.py similarity index 76% rename from pyxform/xlsparseutils.py rename to pyxform/validators/pyxform/sheet_misspellings.py index 280f706a..c83fef31 100644 --- a/pyxform/xlsparseutils.py +++ b/pyxform/validators/pyxform/sheet_misspellings.py @@ -1,14 +1,8 @@ -import re from collections.abc import KeysView from pyxform import constants from pyxform.utils import levenshtein_distance -# http://www.w3.org/TR/REC-xml/ -TAG_START_CHAR = r"[a-zA-Z:_]" -TAG_CHAR = r"[a-zA-Z:_0-9\-.]" -XFORM_TAG_REGEXP = re.compile(rf"^{TAG_START_CHAR}{TAG_CHAR}*$") - def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None": """ @@ -36,10 +30,3 @@ def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None": return msg else: return None - - -def is_valid_xml_tag(tag): - """ - Use a regex to see if there are any invalid characters (i.e. spaces). - """ - return re.search(XFORM_TAG_REGEXP, tag) diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 3b260e22..7e4e6ebe 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -6,7 +6,6 @@ import os import re import sys -from collections import Counter from typing import IO, Any from pyxform import aliases, constants @@ -21,15 +20,16 @@ validate_entity_saveto, ) from pyxform.errors import PyXFormError -from pyxform.parsing.expression import is_single_token_expression +from pyxform.parsing.expression import is_pyxform_reference, is_xml_tag from pyxform.utils import PYXFORM_REFERENCE_REGEX, coalesce, default_is_dynamic +from pyxform.validators.pyxform import choices as vc from pyxform.validators.pyxform import parameters_generic, select_from_file from pyxform.validators.pyxform import question_types as qt from pyxform.validators.pyxform.android_package_name import validate_android_package_name from pyxform.validators.pyxform.pyxform_reference import validate_pyxform_reference_syntax +from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings from pyxform.validators.pyxform.translations_checks import SheetTranslations from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict -from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'} RE_SMART_QUOTES = re.compile(r"|".join(re.escape(old) for old in SMART_QUOTES)) @@ -175,7 +175,12 @@ def dealias_types(dict_array): return dict_array -def clean_text_values(sheet_name: str, data: list[dict], strip_whitespace: bool = False): +def clean_text_values( + sheet_name: str, + data: list[dict], + strip_whitespace: bool = False, + add_row_number: bool = False, +) -> list[dict]: """ Go though the dict array and strips all text values. Also replaces multiple spaces with single spaces. @@ -192,6 +197,8 @@ def clean_text_values(sheet_name: str, data: list[dict], strip_whitespace: bool validate_pyxform_reference_syntax( value=value, sheet_name=sheet_name, row_number=row_number, key=key ) + if add_row_number: + row["__row"] = row_number return data @@ -513,7 +520,11 @@ def workbook_to_json( # ########## Choices sheet ########## choices_sheet = workbook_dict.get(constants.CHOICES, []) - choices_sheet = clean_text_values(sheet_name=constants.CHOICES, data=choices_sheet) + choices_sheet = clean_text_values( + sheet_name=constants.CHOICES, + data=choices_sheet, + add_row_number=True, + ) choices_sheet = dealias_and_group_headers( dict_array=choices_sheet, header_aliases=aliases.list_header, @@ -523,73 +534,27 @@ def workbook_to_json( choices = group_dictionaries_by_key( list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S ) - if 0 < len(choices): - json_dict[constants.CHOICES] = choices # To combine the warning into one message, the check for missing choices translation # columns is run with Survey sheet below. - # Make sure all the options have the required properties: - warnedabout = set() - for list_name, options in choices.items(): + # Warn and remove invalid headers in case the form uses headers for notes. + invalid_headers = vc.validate_headers(choices_sheet.headers, warnings) + allow_duplicates = aliases.yes_no.get( + settings.get("allow_choice_duplicates", False), False + ) + for options in choices.values(): + vc.validate_choices( + options=options, + warnings=warnings, + allow_duplicates=allow_duplicates, + ) for option in options: - if "name" not in option: - info = "[list_name : " + list_name + "]" - raise PyXFormError( - "On the choices sheet there is a option with no name. " + info - ) - if "label" not in option: - info = "[list_name : " + list_name + "]" - warnings.append( - "On the choices sheet there is a option with no label. " + info - ) - # chrislrobert's fix for a cryptic error message: - # see: https://code.google.com/p/opendatakit/issues/detail?id=833&start=200 - option_keys = list(option.keys()) - for headername in option_keys: - # Using warnings and removing the bad columns - # instead of throwing errors because some forms - # use choices column headers for notes. - if " " in headername: - if headername not in warnedabout: - warnedabout.add(headername) - warnings.append( - "On the choices sheet there is " - + 'a column ("' - + headername - + '") with an illegal header. ' - + "Headers cannot include spaces." - ) - del option[headername] - elif headername == "": - warnings.append( - "On the choices sheet there is a value" - + " in a column with no header." - ) - del option[headername] - list_name_choices = [option.get("name") for option in options] - if len(list_name_choices) != len(set(list_name_choices)): - duplicate_setting = settings.get("allow_choice_duplicates") - for v in Counter(list_name_choices).values(): - if v > 1: - if not duplicate_setting or duplicate_setting.capitalize() != "Yes": - choice_duplicates = [ - item - for item, count in Counter(list_name_choices).items() - if count > 1 - ] - - if choice_duplicates: - raise PyXFormError( - "The name column for the '{}' choice list contains these duplicates: {}. Duplicate names " - "will be impossible to identify in analysis unless a previous value in a cascading " - "select differentiates them. If this is intentional, you can set the " - "allow_choice_duplicates setting to 'yes'. Learn more: https://xlsform.org/#choice-names.".format( - list_name, - ", ".join( - [f"'{dupe}'" for dupe in choice_duplicates] - ), - ) - ) + for invalid_header in invalid_headers: + option.pop(invalid_header, None) + del option["__row"] + + if 0 < len(choices): + json_dict[constants.CHOICES] = choices # ########## Entities sheet ########### entities_sheet = workbook_dict.get(constants.ENTITIES, []) @@ -945,7 +910,7 @@ def workbook_to_json( ROW_FORMAT_STRING % row_number + " Question or group with no name." ) question_name = str(row[constants.NAME]) - if not is_valid_xml_tag(question_name): + if not is_xml_tag(question_name): if isinstance(question_name, bytes): question_name = question_name.decode("utf-8") @@ -1022,10 +987,7 @@ def workbook_to_json( repeat_count_expression = new_json_dict.get("control", {}).get("jr:count") if repeat_count_expression: # Simple expressions don't require a new node, they can reference directly. - simple_expression = is_single_token_expression( - expression=repeat_count_expression, token_types=["PYXFORM_REF"] - ) - if not simple_expression: + if not is_pyxform_reference(value=repeat_count_expression): generated_node_name = new_json_dict["name"] + "_count" parent_children_array.append( { diff --git a/tests/parsing/__init__.py b/tests/parsing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/parsing/test_expression.py b/tests/parsing/test_expression.py new file mode 100644 index 00000000..1fe3ad42 --- /dev/null +++ b/tests/parsing/test_expression.py @@ -0,0 +1,49 @@ +from pyxform.parsing.expression import is_xml_tag + +from tests.pyxform_test_case import PyxformTestCase + +positive = [ + ("A", "Single uppercase letter"), + ("ab", "Lowercase letters"), + ("_u", "Leading underscore"), + ("A12", "Leading uppercase letter with digit"), + ("A-1.23", "Leading uppercase letter with hyphen, period, and digit"), + ("Name123-456", "Mixed case, digits, hyphen"), + ("𐐀n", "Leading unicode"), + ("Αλ", "Following unicode"), + ("name:name", "NCName, colon, NCName"), + ("name_with_colon:_and_extras", "NCName, colon, NCName (non-letter characters)"), + # Other special character tokens are excluded by ncname_regex. + ("nameor", "Contains another parser token (or)"), + ("nameand", "Contains another parser token (and)"), + ("namemod", "Contains another parser token (mod)"), + ("namediv", "Contains another parser token (div)"), +] + +negative = [ + ("", "Empty string"), + (" ", "Space"), + ("123name", "Leading digit"), + ("-name", "Leading hyphen"), + (".name", "Leading period"), + (":name", "Leading colon"), + ("name$", "Invalid character ($)"), + ("name with space", "Invalid character (space)"), + ("na@me", "Invalid character (@)"), + ("na#me", "Invalid character (#)"), + ("name:name:name", "Invalid structure (multiple colons)"), +] + + +class TestExpression(PyxformTestCase): + def test_is_xml_tag__positive(self): + """Should accept positive match cases i.e. valid xml tag names.""" + for case, description in positive: + with self.subTest(case=case, description=description): + self.assertTrue(is_xml_tag(case)) + + def test_is_xml_tag__negative(self): + """Should reject negative match cases i.e. invalid xml tag names.""" + for case, description in negative: + with self.subTest(case=case, description=description): + self.assertFalse(is_xml_tag(case)) diff --git a/tests/test_fields.py b/tests/test_fields.py index 6c44fc9e..55591535 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -2,6 +2,8 @@ Test duplicate survey question field name. """ +from pyxform.validators.pyxform import choices as vc + from tests.pyxform_test_case import PyxformTestCase from tests.xpath_helpers.choices import xpc from tests.xpath_helpers.questions import xpq @@ -57,9 +59,7 @@ def test_duplicate_choices_without_setting(self): | | list | b | option c | """, errored=True, - error__contains=[ - "The name column for the 'list' choice list contains these duplicates: 'b'" - ], + error__contains=[vc.INVALID_DUPLICATE.format(row=4)], ) def test_multiple_duplicate_choices_without_setting(self): @@ -77,8 +77,10 @@ def test_multiple_duplicate_choices_without_setting(self): """, errored=True, error__contains=[ - "The name column for the 'list' choice list contains these duplicates: 'a', 'b'" + vc.INVALID_DUPLICATE.format(row=3), + vc.INVALID_DUPLICATE.format(row=5), ], + debug=True, ) def test_duplicate_choices_with_setting_not_set_to_yes(self): @@ -97,9 +99,7 @@ def test_duplicate_choices_with_setting_not_set_to_yes(self): | | Duplicates | Bob | """, errored=True, - error__contains=[ - "The name column for the 'list' choice list contains these duplicates: 'b'" - ], + error__contains=[vc.INVALID_DUPLICATE.format(row=4)], ) def test_duplicate_choices_with_allow_choice_duplicates_setting(self): diff --git a/tests/test_sheet_columns.py b/tests/test_sheet_columns.py index a5b85a42..3a7e708d 100644 --- a/tests/test_sheet_columns.py +++ b/tests/test_sheet_columns.py @@ -2,6 +2,8 @@ Test XLSForm sheet names. """ +from pyxform.validators.pyxform import choices as vc + from tests.pyxform_test_case import PyxformTestCase from tests.utils import prep_for_xml_contains from tests.xpath_helpers.choices import xpc @@ -156,7 +158,7 @@ def test_invalid_choices_sheet_fails(self): ] ), errored=True, - error__contains=["option with no name"], + error__contains=[vc.INVALID_NAME.format(row=2)], ) def test_missing_list_name(self): diff --git a/tests/test_xlsform_spec.py b/tests/test_xlsform_spec.py index c3442fae..891c591b 100644 --- a/tests/test_xlsform_spec.py +++ b/tests/test_xlsform_spec.py @@ -1,3 +1,5 @@ +from pyxform.validators.pyxform import choices as vc + from tests.pyxform_test_case import PyxformTestCase @@ -61,10 +63,10 @@ def test_warnings__count(self): ) self.maxDiff = 2000 expected = [ - "On the choices sheet there is a option with no label. [list_name : a_b]", - "On the choices sheet there is a option with no label. [list_name : a_b]", - "On the choices sheet there is a option with no label. [list_name : animals]", - "On the choices sheet there is a option with no label. [list_name : animals]", + vc.INVALID_LABEL.format(row=4), + vc.INVALID_LABEL.format(row=5), + vc.INVALID_LABEL.format(row=6), + vc.INVALID_LABEL.format(row=7), "[row : 9] Repeat has no label: {'name': 'repeat_test', 'type': 'begin repeat'}", "[row : 10] Group has no label: {'name': 'group_test', 'type': 'begin group'}", "[row : 17] Group has no label: {'name': 'name', 'type': 'begin group'}", diff --git a/tests/xform_test_case/test_bugs.py b/tests/xform_test_case/test_bugs.py index c022a812..c5db20d1 100644 --- a/tests/xform_test_case/test_bugs.py +++ b/tests/xform_test_case/test_bugs.py @@ -10,6 +10,7 @@ from pyxform.errors import PyXFormError from pyxform.utils import has_external_choices from pyxform.validators.odk_validate import ODKValidateError, check_xform +from pyxform.validators.pyxform import choices as vc from pyxform.xls2json import SurveyReader, parse_file_to_workbook_dict from pyxform.xls2json_backends import xlsx_to_dict from pyxform.xls2xform import convert @@ -79,7 +80,11 @@ def test_conversion(self): ) # The "column with no header" warning is probably not reachable since XLS/X # pre-processing ignores any columns without a header. - observed = [w for w in warnings if "Headers cannot include spaces" in w] + observed = [ + w + for w in warnings + if w == vc.INVALID_HEADER.format(column="header with spaces") + ] self.assertEqual(1, len(observed), warnings) def test_values_with_spaces_are_cleaned(self):