Add condition parsing to the script

open-telemetry · Oct 12, 2021 · 62db85c · 62db85c
1 parent f87311f
commit 62db85c
Show file tree

Hide file tree

Showing 8 changed files with 348 additions and 137 deletions.
diff --git a/.gitignore b/.gitignore
@@ -24,7 +24,6 @@ bin
 
 # Misspell binary
 internal/tools/bin
-.tools
 
 # Pytest cache
 __pycache__

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -8,7 +8,6 @@
         "MD029": {"style": "ordered"},
         "MD033": false,
         "MD040": false,
-        "MD001": false,
     },
     "yaml.schemas": {
         "https://raw.githubusercontent.com/open-telemetry/build-tools/v0.7.0/semantic-conventions/semconv.schema.json": [

diff --git a/README.md b/README.md
@@ -74,6 +74,15 @@ Changes to the [specification](./specification/overview.md) are versioned accord
 
 Changes to the change process itself are not currently versioned but may be independently versioned in the future.
 
+## Generating requirements in JSON
+
+As described in the [conformance clause](./conformance_clause.md) this specification follows the
+[W3C Recommendation QA Framework: Specification Guidelines](https://www.w3.org/TR/2005/REC-qaframe-spec-20050817/). The requirements can be automatically extracted from the
+specification markdown files into JSON files for easy listing. These JSON files can later be used to check the compliance of a particular implementation.
+
+To generate JSON files run `python tools/specification_parser/specification_parser.py` from the directory that contains this file. This will generate the corresponding JSON files
+in the same directories of the markdown files that contain the requirements.
+
 ## Acronym
 
 The official acronym used by the OpenTelemetry project is "OTel".

diff --git a/conformance_clause.md b/conformance_clause.md
@@ -93,11 +93,11 @@ particular implementation.
 This is the markdown code for the previous example:
 
 ```
-##### Condition 1:
+##### Condition 1
 
 > The API does not operate directly on the `Context`.
 >
-> ##### Conditional Requirement 1.1:
+> ##### Conditional Requirement 1.1
 >
 > > The API **MUST** provide an `extract` function to extract the `Baggage`
 > > from a `Context` instance.
@@ -124,7 +124,7 @@ the following words:
 - REQUIRED
 - SHALL
 - SHALL NOT
-- MUST NOT:w
+- MUST NOT
 - SHOULD
 - RECOMMENDED
 - SHOULD NOT

diff --git a/specification/baggage/api.md b/specification/baggage/api.md
@@ -161,39 +161,39 @@ on the `Context`, then users should not have access to the
 > > The API **SHOULD** provide a get `Baggage` functionality to get the currently active
 > > `Baggage` from the implicit context and a set `Baggage` functionality to set the
 > > currently active `Baggage` into the implicit context.
+>
+> ##### Condition 2.1
+>
+> > The API provides a functionality to get the currently active `Baggage` from the
+> > implicit context and a functionality to set the currently active `Baggage` into
+> > the implicit context.
+> >
+> > ##### Conditional Requirement 2.1.1
+> >
+> > > The get `Baggage` functionality behavior **MUST** be equivalent to getting the
+> > > implicit context, then extracting the `Baggage` from the context.
+> >
+> > ##### Conditional Requirement 2.1.2
+> >
+> > > The set `Baggage` functionality behavior **MUST** be equivalent to getting the
+> > > implicit context, then inserting the `Baggage` into the context.
+> >
+> > ##### Conditional Requirement 2.1.3
+> >
+> > > The get and set `Baggage` functionalities **MUST** operate solely on the context
+> > > API.
+> >
+> > ##### Conditional Requirement 2.1.4
+> >
+> > > The get and set `Baggage` functionalities **MAY** be exposed
+> > > - as static methods on the baggage module or
+> > > - as static methods on a class inside the baggage module or
+> > > - on the `Baggage` class
 > >
-> > ##### Condition 2.1
+> > ##### Conditional Requirement 2.1.5
 > >
-> > > The API provides a functionality to get the currently active `Baggage` from the
-> > > implicit context and a functionality to set the currently active `Baggage` into
-> > > the implicit context.
-> > >
-> > > ##### Conditional Requirement 2.1.1
-> > >
-> > > > The get `Baggage` functionality behavior **MUST** be equivalent to getting the
-> > > > implicit context, then extracting the `Baggage` from the context.
-> > >
-> > > ##### Conditional Requirement 2.1.2
-> > >
-> > > > The set `Baggage` functionality behavior **MUST** be equivalent to getting the
-> > > > implicit context, then inserting the `Baggage` into the context.
-> > >
-> > > ##### Conditional Requirement 2.1.3
-> > >
-> > > > The get and set `Baggage` functionalities **MUST** operate solely on the context
-> > > > API.
-> > >
-> > > ##### Conditional Requirement 2.1.4
-> > >
-> > > > The get and set `Baggage` functionalities **MAY** be exposed
-> > > > - as static methods on the baggage module or
-> > > > - as static methods on a class inside the baggage module or
-> > > > - on the `Baggage` class
-> > >
-> > > ##### Conditional Requirement 2.1.5
-> > >
-> > > > The get and set `Baggage` functionalities **SHOULD** be fully implemented in
-> > > > the API.
+> > > The get and set `Baggage` functionalities **SHOULD** be fully implemented in
+> > > the API.
 
 ### Clear Baggage in the Context
 

diff --git a/tools/specification_parser/specification_parser.py b/tools/specification_parser/specification_parser.py
@@ -1,8 +1,24 @@
-from re import finditer, findall
+from re import (
+    finditer, findall, compile as compile_, DOTALL, sub, match, search
+)
 from json import dumps
 from os.path import curdir, abspath, join, splitext
 from os import walk
 
+rfc_2119_keywords_regexes = [
+    r"MUST",
+    r"REQUIRED",
+    r"SHALL",
+    r"MUST NOT",
+    r"SHALL NOT",
+    r"SHOULD",
+    r"RECOMMENDED",
+    r"SHOULD NOT",
+    r"NOT RECOMMENDED",
+    r"MAY",
+    r"OPTIONAL",
+]
+
 
 def find_markdown_file_paths(root):
     markdown_file_paths = []
@@ -19,103 +35,148 @@ def find_markdown_file_paths(root):
     return markdown_file_paths
 
 
-def parse_requirements(markdown_file_paths):
-    requirements = {}
+def clean_content(content):
 
-    for markdown_file_path in markdown_file_paths:
+    for rfc_2119_keyword_regex in rfc_2119_keywords_regexes:
 
-        with open(markdown_file_path, "r") as markdown_file:
+        content = sub(
+            f"\\*\\*{rfc_2119_keyword_regex}\\*\\*",
+            rfc_2119_keyword_regex,
+            content
+        )
 
-            requirement_matches = [
-                requirement_match.groupdict() for requirement_match in (
-                    finditer(
-                        r"##### (?P<key>Requirement [0-9]+)\n\n"
-                        r"(?P<description>(>.*\n)+)",
-                        markdown_file.read(),
-                    )
-                )
-            ]
+    return sub(r"\n>", "", content)
 
-        if not requirement_matches:
-            continue
 
-        md_file_path = "".join([splitext(markdown_file_path)[0], ".md"])
+def find_rfc_2119_keyword(content):
 
-        requirements[md_file_path] = {}
+    for rfc_2119_keyword_regex in rfc_2119_keywords_regexes:
 
-        for requirement in requirement_matches:
+        if search(
+            f"\\*\\*{rfc_2119_keyword_regex}\\*\\*", content
+        ) is not None:
+            return rfc_2119_keyword_regex
 
-            requirement_key = requirement["key"]
 
-            assert (
-                requirement_key not in
-                requirements[md_file_path].keys()
-            ), "Repeated requirement key {} found in {}".format(
-                requirement_key, markdown_file_path
-            )
+def parse_requirements(markdown_file_path):
 
-            requirement_description = requirement["description"].strip()
-
-            rfc_2119_keyword_matches = []
-
-            for rfc_2119_keyword_regex in [
-                # 2. MUST NOT
-                r"MUST NOT",
-                r"SHALL NOT",
-                # 1. MUST
-                r"MUST(?! NOT)",
-                r"REQUIRED",
-                r"SHALL(?! NOT)",
-                # 4. SHOULD NOT
-                r"SHOULD NOT",
-                r"NOT RECOMMENDED",
-                # 3. SHOULD
-                r"SHOULD(?! NOT)",
-                r"(?<!NOT )RECOMMENDED",
-                # 5. MAY
-                r"MAY",
-                r"OPTIONAL",
-
-            ]:
-                rfc_2119_keyword_matches.extend(
-                    findall(
-                        rfc_2119_keyword_regex,
-                        requirement_description
-                    )
-                )
+    requirements = []
 
-            requirement_key_path = "{}:{}".format(
-                markdown_file_path, requirement_key
-            )
+    with open(markdown_file_path, "r") as markdown_file:
 
-            assert (
-                len(rfc_2119_keyword_matches) > 0
-            ), "No RFC 2119 keywords were found in {}".format(
-                requirement_key_path
+        for requirement in [
+            requirement_match.groupdict() for requirement_match in (
+                finditer(
+                    r"##### (?P<id>Requirement [0-9]+)\n\n"
+                    r"> (?P<content>(.*?))\n\n",
+                    markdown_file.read(),
+                    DOTALL
+                )
             )
+        ]:
 
-            assert (
-                len(rfc_2119_keyword_matches) == 1
-            ), "More than one RFC 2119 keyword was found in {}".format(
-                requirement_key_path
+            content = requirement["content"]
+
+            requirements.append(
+                {
+                    "id": requirement["id"],
+                    "content": clean_content(content),
+                    "RFC 2119 keyword": find_rfc_2119_keyword(content)
+                }
             )
 
-            requirements[md_file_path][requirement_key] = {}
+    return requirements
+
+
+def parse_conditions(markdown_file_path):
+
+    conditions = []
 
-            requirements[md_file_path][requirement_key]["description"] = (
-                requirement_description
+    with open(markdown_file_path, "r") as markdown_file:
+
+        for condition in findall(
+            r"##### Condition [0-9]+\n\n.*?\n\n",
+            markdown_file.read(),
+            DOTALL
+        ):
+
+            stack = []
+
+            regex = compile_(
+                r"(?P<level>(> ?)*)(?P<pounds>##### )?(?P<content>.*)"
             )
 
-            rfc_2119_keyword_matches.reverse()
+            text = ""
 
-            requirements[md_file_path][requirement_key][
-                "RFC 2119 Keywords"
-            ] = rfc_2119_keyword_matches
+            for line in condition.split("\n"):
+                regex_dict = regex.match(line).groupdict()
 
-    return requirements
+                level = len(regex_dict["level"].split())
+                pounds = regex_dict["pounds"]
+                content = regex_dict["content"]
 
+                if not level and not content:
+                    continue
 
-def write_json_specifications(requirements):
+                if not pounds:
+                    text = "".join([text, content])
+                    continue
+
+                if match(
+                    r"(> ?)*##### Condition [\.0-9]+", line
+                ) is not None:
+
+                    node = {
+                        "id": content,
+                        "content": "",
+                        "children": []
+                    }
+                else:
+                    node = {
+                        "id": content,
+                        "content": "",
+                        "RFC 2119 keyword": None
+                    }
+
+                if not stack:
+                    stack.append(node)
+                    continue
+
+                stack[-1]["content"] = clean_content(text)
+
+                if level == len(stack) - 1:
+
+                    stack[-1]["RFC 2119 keyword"] = find_rfc_2119_keyword(
+                        text
+                    )
+                    stack.pop()
+
+                elif level < len(stack) - 1:
+                    stack[-1]["RFC 2119 keyword"] = find_rfc_2119_keyword(
+                        text
+                    )
+                    for _ in range(len(stack) - level):
+                        stack.pop()
+
+                text = ""
+                from ipdb import set_trace
+                try:
+                    stack[-1]["children"].append(node)
+                except:
+                    set_trace()
+                stack.append(node)
+
+            stack[-1]["content"] = clean_content(text)
+            stack[-1]["RFC 2119 keyword"] = find_rfc_2119_keyword(
+                text
+            )
+
+            conditions.append(stack[0])
+
+    return conditions
+
+
+def write_json_specifications(requirements, conditions):
     for md_absolute_file_path, requirement_sections in requirements.items():
 
         with open(
@@ -126,10 +187,16 @@ def write_json_specifications(requirements):
 
 if __name__ == "__main__":
 
-    write_json_specifications(
-        parse_requirements(
-            find_markdown_file_paths(
-                join(abspath(curdir), "..", "..", "specification")
-            )
-        )
-    )
+    for markdown_file_path in find_markdown_file_paths(
+        join(abspath(curdir), "specification")
+    ):
+
+        result = []
+        result.extend(parse_requirements(markdown_file_path))
+        result.extend(parse_conditions(markdown_file_path))
+
+        if result:
+            with open(
+                "".join([splitext(markdown_file_path)[0], ".json"]), "w"
+            ) as json_file:
+                json_file.write(dumps(result, indent=4))