From 33780b6d3a958fc8185958692399ec1d7dc84a18 Mon Sep 17 00:00:00 2001 From: Marco Borgeaud Date: Mon, 14 Oct 2024 16:48:37 +0200 Subject: [PATCH] Detect title as subsections (#4407) --- rspec-tools/README.adoc | 2 +- .../rspec_tools/validation/description.py | 25 +- .../invalid-rules/S100/php/metadata.json | 6 + .../invalid-rules/S100/php/rule.adoc | 5 + .../invalid-rules/S100/php/rule.html | 457 ++++++++++++++++++ .../validation/test_description_validation.py | 5 + 6 files changed, 492 insertions(+), 8 deletions(-) create mode 100644 rspec-tools/tests/resources/invalid-rules/S100/php/metadata.json create mode 100644 rspec-tools/tests/resources/invalid-rules/S100/php/rule.adoc create mode 100644 rspec-tools/tests/resources/invalid-rules/S100/php/rule.html diff --git a/rspec-tools/README.adoc b/rspec-tools/README.adoc index ea46a5793bf..3e961e4d404 100644 --- a/rspec-tools/README.adoc +++ b/rspec-tools/README.adoc @@ -55,5 +55,5 @@ In order to generate an HTML file from the ASCIIdoc, you can use [asciidoctor](h [source,sh] ---- -$ asciidoctor -e rule.adoc +$ asciidoctor rule.adoc ---- diff --git a/rspec-tools/rspec_tools/validation/description.py b/rspec-tools/rspec_tools/validation/description.py index cf343f9e656..96271728293 100644 --- a/rspec-tools/rspec_tools/validation/description.py +++ b/rspec-tools/rspec_tools/validation/description.py @@ -1,6 +1,6 @@ import re from pathlib import Path -from typing import Final, Dict, List +from typing import Dict, Final, List, Union from bs4 import BeautifulSoup from rspec_tools.errors import RuleValidationError @@ -78,15 +78,26 @@ def intersection(list1, list2): def difference(list1, list2): return list(set(list1) - set(list2)) +def validate_titles_are_not_misclassified_as_subtitles(rule_language: LanguageSpecificRule, subtitles: list[str], allowed_h2_sections: list[str]): + # TODO This does not validate "How to fix it" section for frameworks as the section names are a bit special. + misclassified = intersection(subtitles, allowed_h2_sections) + if misclassified: + misclassified.sort() + misclassified_str = ', '.join(misclassified) + raise RuleValidationError(f'Rule {rule_language.id} has some sections misclassified. Ensure there are not too many `=` in the asciidoc file for: {misclassified_str}') + def validate_section_names(rule_language: LanguageSpecificRule): """Validates all h2-level section names""" + def get_titles(level: Union[str, list[str]]) -> list[str]: + return list(map(lambda x: x.text.strip(), rule_language.description.find_all(level))) - descr = rule_language.description - h2_titles = list(map(lambda x: x.text.strip(), descr.find_all('h2'))) - + h2_titles = get_titles('h2') + subtitles = get_titles(['h3', 'h4', 'h5', 'h6']) + allowed_h2_sections = list(MANDATORY_SECTIONS) + list(OPTIONAL_SECTIONS.keys()) + validate_titles_are_not_misclassified_as_subtitles(rule_language, subtitles, allowed_h2_sections) validate_duplications(h2_titles, rule_language) - education_titles = intersection(h2_titles, list(MANDATORY_SECTIONS) + list(OPTIONAL_SECTIONS.keys())) + education_titles = intersection(h2_titles, allowed_h2_sections) if education_titles: # Using the education format. validate_how_to_fix_it_sections_names(rule_language, h2_titles) @@ -234,7 +245,7 @@ def validate_security_standard_links(rule_language: LanguageSpecificRule): # Avoid raising mismatch issues on deprecated or closed rules if metadata.get('status') != 'ready': return - + security_standards_metadata = metadata.get('securityStandards', {}) for standard in SECURITY_STANDARD_URL.keys(): @@ -244,7 +255,7 @@ def validate_security_standard_links(rule_language: LanguageSpecificRule): extra_links = difference(links_mapping, metadata_mapping) if len(extra_links) > 0: raise RuleValidationError(f'Rule {rule_language.id} has a mismatch for the {standard} security standards. Remove links from the Resources/See section ({extra_links}) or fix the rule metadata') - + missing_links = difference(metadata_mapping, links_mapping) if len(missing_links) > 0: raise RuleValidationError(f'Rule {rule_language.id} has a mismatch for the {standard} security standards. Add links to the Resources/See section ({missing_links}) or fix the rule metadata') diff --git a/rspec-tools/tests/resources/invalid-rules/S100/php/metadata.json b/rspec-tools/tests/resources/invalid-rules/S100/php/metadata.json new file mode 100644 index 00000000000..68702a9782c --- /dev/null +++ b/rspec-tools/tests/resources/invalid-rules/S100/php/metadata.json @@ -0,0 +1,6 @@ +{ + "title": "Function names should comply with a naming convention", + "defaultQualityProfiles": [ + + ] +} diff --git a/rspec-tools/tests/resources/invalid-rules/S100/php/rule.adoc b/rspec-tools/tests/resources/invalid-rules/S100/php/rule.adoc new file mode 100644 index 00000000000..63f64743211 --- /dev/null +++ b/rspec-tools/tests/resources/invalid-rules/S100/php/rule.adoc @@ -0,0 +1,5 @@ +== Why is this an issue? + +=== How to fix it + +=== Resources \ No newline at end of file diff --git a/rspec-tools/tests/resources/invalid-rules/S100/php/rule.html b/rspec-tools/tests/resources/invalid-rules/S100/php/rule.html new file mode 100644 index 00000000000..d7afbf83140 --- /dev/null +++ b/rspec-tools/tests/resources/invalid-rules/S100/php/rule.html @@ -0,0 +1,457 @@ + + + + + + + +Why is this an issue? + + + + + +
+
+

Why is this an issue?

+
+
+

How to fix it

+ +
+
+

Resources

+ +
+
+
+
+ + + \ No newline at end of file diff --git a/rspec-tools/tests/validation/test_description_validation.py b/rspec-tools/tests/validation/test_description_validation.py index b15a6b0422f..f92772f84ce 100644 --- a/rspec-tools/tests/validation/test_description_validation.py +++ b/rspec-tools/tests/validation/test_description_validation.py @@ -39,6 +39,11 @@ def test_unexpected_section_fails_validation(invalid_rule): with pytest.raises(RuleValidationError, match=fr'^Rule {rule.id} has an unconventional header "Invalid header"'): validate_section_names(rule) +def test_sections_with_wrong_level_fails_validation(invalid_rule): + rule = invalid_rule('S100', 'php') + with pytest.raises(RuleValidationError, match=fr'^Rule {rule.id} has some sections misclassified. Ensure there are not too many `=` in the asciidoc file for: How to fix it, Resources'): + validate_section_names(rule) + def test_valid_section_levels_passes_validation(rule_language): '''Check that description with correct formatting is considered valid.''' validate_section_levels(rule_language('S100', 'cfamily'))