From 8cf19c649247579f66dc8e7e7e8158da494c5399 Mon Sep 17 00:00:00 2001
From: Andrei Litvin <andy314@gmail.com>
Date: Fri, 20 Oct 2023 18:12:55 -0400
Subject: [PATCH] Add a parser for CSA XML data models (#29888)

* Move xml to say zapxml since that is the format

* Start defining a data_model_xml parser (no functionality for now)

* Start adding some basic support for data model xml parsing

* make the DM parser  executable

* Start having the ability to parse clusters

* More updates, we seem to have parsing for features

* Hard-code global attributes

* Remove some comments

* Add enumeration handling

* Add bitmap handling

* Restyle

* Parse structs

* Re-organize parsing a bit

* Make a linter happy

* Another linter fix

* Handling of events

* More handling and logic on events

* Add support for access privilege parsing

* XMLs have maybe invalid enum entries. Handle them gracefully

* More attribute handling updates

* restyle

* Support deprecate constraint

* Support constraint decoding, apply on attributes for now

* Restyle

* Restyle

* Field handling

* Field handling

* Some bug fixing

* Start adding command handling

* Restyle

* Name normalization and more parsing updates

* Name normalization and more parsing updates

* Better messaging and fix constraint types

* Restyle

* Start creating a IDL codegen so we can self-test parsed output

* Start with listing clusters

* Enum listing

* A lot more things supported

* Attribute rendering

* Support for string and octet string sizes

* Timed command support

* Restyle

* Add descriptions to clusters

* Attempt to fix up alignment of things

* Alignment looks slightly better

* Better command separation

* Align comments

* Align and output descriptions including clusters

* More work regarding loop structures

* Apply hex formatting to bitmaps. output now seems identical except one whitespace change

* Identical output for now

* Support API maturity. Notice that doccomments are lost on maturity :(

* Fix doxygen parsing for api maturity at the cluster level

* Restyle

* Support endpoints, although that is not 1:1 as hex encoding and ordering for events is lost

* Restyle

* Add todo note that default value does not string escaping

* Default rendering and add to files

* More updates on file dependencies

* Unit test IDL generator

* Add the IDL unit test as a standard unit test

* Update for python compatibility

* Fix unit testing of builds when GSDK root is defined

* Added a readme file

* Restyle

* Make xml parser use the idl codegen

* Restyle

* look to fix misspell warnings

* Undo repo update

* Fix linter errors

* Codegen as idl for data_model_xml_parser.py

* Parsing closer to matter idl content

* more normalization and type processing

* More mandatory conformance logic

* Fix mandatory conditionals

* Make unit test pass

* Fix tests a bit more and make parsers better

* Restyle

* Ignore min/max values while parsing xmls, even though raw data internally contains them

* Restyle

* Fix space after click annotations

* Compare support for human reviews

* Restyle

* Fix slash

* Undo submodule change

* fix xml to zapxml naming changes

* Restyle

* Update dates from 2022 to 2023

* Add note about the complex test input

* Remove unused imports

* Restyle

* Add some commends based on code review

* Add heuristic for setting enum and bitmap sizes, to make output from XML much more readable

* Add support for timed and fabric scoped commands

* Add missing import

---------

Co-authored-by: Andrei Litvin <andreilitvin@google.com>
---
 .github/workflows/tests.yaml                  |   2 +-
 scripts/py_matter_idl/BUILD.gn                |   2 +-
 scripts/py_matter_idl/files.gni               |  11 +-
 .../matter_idl/data_model_xml/__init__.py     | 128 ++++
 .../data_model_xml/handlers/__init__.py       |  34 ++
 .../data_model_xml/handlers/base.py           |  63 ++
 .../data_model_xml/handlers/context.py        | 123 ++++
 .../data_model_xml/handlers/handlers.py       | 547 ++++++++++++++++++
 .../data_model_xml/handlers/parsing.py        | 269 +++++++++
 .../matter_idl/data_model_xml_parser.py       | 181 ++++++
 .../matter_idl/generators/idl/README.md       |   2 +-
 .../matter_idl/test_data_model_xml.py         | 274 +++++++++
 .../{test_xml_parser.py => test_zapxml.py}    |   0
 .../{xml_parser.py => zapxml_parser.py}       |   3 +-
 14 files changed, 1633 insertions(+), 6 deletions(-)
 create mode 100644 scripts/py_matter_idl/matter_idl/data_model_xml/__init__.py
 create mode 100644 scripts/py_matter_idl/matter_idl/data_model_xml/handlers/__init__.py
 create mode 100644 scripts/py_matter_idl/matter_idl/data_model_xml/handlers/base.py
 create mode 100644 scripts/py_matter_idl/matter_idl/data_model_xml/handlers/context.py
 create mode 100644 scripts/py_matter_idl/matter_idl/data_model_xml/handlers/handlers.py
 create mode 100644 scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py
 create mode 100755 scripts/py_matter_idl/matter_idl/data_model_xml_parser.py
 create mode 100755 scripts/py_matter_idl/matter_idl/test_data_model_xml.py
 rename scripts/py_matter_idl/matter_idl/{test_xml_parser.py => test_zapxml.py} (100%)
 rename scripts/py_matter_idl/matter_idl/{xml_parser.py => zapxml_parser.py} (96%)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index bb0a781f8b566c..1a2eedcb59c3a0 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -85,7 +85,7 @@ jobs:
               #
               run: |
                   ./scripts/run_in_build_env.sh \
-                    "./scripts/py_matter_idl/matter_idl/xml_parser.py \
+                    "./scripts/py_matter_idl/matter_idl/zapxml_parser.py \
                       --no-print \
                       --log-level info \
                       src/app/zap-templates/zcl/data-model/chip/global-attributes.xml \
diff --git a/scripts/py_matter_idl/BUILD.gn b/scripts/py_matter_idl/BUILD.gn
index c57df9b7d5287d..37b17477aabfbc 100644
--- a/scripts/py_matter_idl/BUILD.gn
+++ b/scripts/py_matter_idl/BUILD.gn
@@ -67,7 +67,7 @@ pw_python_package("matter_idl") {
     "matter_idl/test_matter_idl_parser.py",
     "matter_idl/test_generators.py",
     "matter_idl/test_idl_generator.py",
-    "matter_idl/test_xml_parser.py",
+    "matter_idl/test_zapxml.py",
   ]
 
   # TODO: at a future time consider enabling all (* or missing) here to get
diff --git a/scripts/py_matter_idl/files.gni b/scripts/py_matter_idl/files.gni
index cb54b253ef0615..fb9f3991b87c4c 100644
--- a/scripts/py_matter_idl/files.gni
+++ b/scripts/py_matter_idl/files.gni
@@ -23,6 +23,12 @@ matter_idl_generator_templates = [
 matter_idl_generator_sources = [
   "${chip_root}/scripts/py_matter_idl/matter_idl/__init__.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/backwards_compatibility.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/data_model_xml/__init__.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/__init__.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/base.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/context.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/handlers.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/generators/__init__.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/generators/cpp/__init__.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/generators/cpp/application/__init__.py",
@@ -38,16 +44,17 @@ matter_idl_generator_sources = [
   "${chip_root}/scripts/py_matter_idl/matter_idl/matter_idl_parser.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/matter_idl_types.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/test_backwards_compatibility.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/test_data_model_xml.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/test_generators.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/test_matter_idl_parser.py",
-  "${chip_root}/scripts/py_matter_idl/matter_idl/test_xml_parser.py",
-  "${chip_root}/scripts/py_matter_idl/matter_idl/xml_parser.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/test_zapxml.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/zapxml/__init__.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/zapxml/handlers/__init__.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/zapxml/handlers/base.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/zapxml/handlers/context.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/zapxml/handlers/handlers.py",
   "${chip_root}/scripts/py_matter_idl/matter_idl/zapxml/handlers/parsing.py",
+  "${chip_root}/scripts/py_matter_idl/matter_idl/zapxml_parser.py",
 ]
 
 # All the files that the matter idl infrastructure will use
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/__init__.py b/scripts/py_matter_idl/matter_idl/data_model_xml/__init__.py
new file mode 100644
index 00000000000000..fb0a0cf8d516a9
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml/__init__.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2022 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import typing
+import xml.sax.handler
+from dataclasses import dataclass
+from typing import List, Optional, Union
+
+from matter_idl.data_model_xml.handlers import Context, DataModelXmlHandler
+from matter_idl.matter_idl_types import Idl
+
+
+class ParseHandler(xml.sax.handler.ContentHandler):
+    """A parser for data model XML data definitions.
+
+    Defers its processing to DataModelXmlHandler and keeps track of:
+       - an internal context for all handlers
+       - the parsed Idl structure that is incrementally built
+       - sets up parsing location within the context
+       - keeps track of ParsePath
+
+    Overall converts a python SAX handler into matter_idl.zapxml.handlers
+    """
+
+    def __init__(self, include_meta_data=True):
+        super().__init__()
+        self._idl = Idl()
+        self._processing_stack = []
+        # Context persists across all
+        self._context = Context()
+        self._include_meta_data = include_meta_data
+        self._locator = None
+
+    def PrepareParsing(self, filename):
+        # This is a bit ugly: filename keeps changing during parse
+        # IDL meta is not prepared for this (as source is XML and .matter is
+        # single file)
+        if self._include_meta_data:
+            self._idl.parse_file_name = filename
+
+        self._context.file_name = filename
+
+    def Finish(self) -> Idl:
+        self._context.PostProcess(self._idl)
+        return self._idl
+
+    def startDocument(self):
+        if self._include_meta_data and self._locator:
+            self._context.locator = self._locator
+        self._processing_stack = [
+            DataModelXmlHandler(self._context, self._idl)]
+
+    def endDocument(self):
+        if len(self._processing_stack) != 1:
+            raise Exception("Unexpected nesting!")
+
+    def startElement(self, name: str, attrs):
+        logging.debug("ELEMENT START: %r / %r" % (name, attrs))
+        self._context.path.push(name)
+        self._processing_stack.append(
+            self._processing_stack[-1].GetNextProcessor(name, attrs))
+
+    def endElement(self, name: str):
+        logging.debug("ELEMENT END: %r" % name)
+
+        last = self._processing_stack.pop()
+        last.EndProcessing()
+
+        # important to pop AFTER processing end to allow processing
+        # end to access the current context
+        self._context.path.pop()
+
+    def characters(self, content):
+        self._processing_stack[-1].HandleContent(content)
+
+
+@dataclass
+class ParseSource:
+    """Represents an input sopurce for ParseXmls.
+
+    Allows for named data sources to be parsed.
+    """
+    source: Union[str, typing.IO]  # filename or stream
+    # actual filename to use, None if the source is a filename already
+    name: Optional[str] = None
+
+    @ property
+    def source_file_name(self):
+        if self.name:
+            return self.name
+        return self.source  # assume string
+
+
+def ParseXmls(sources: List[ParseSource], include_meta_data=True) -> Idl:
+    """Parse one or more XML inputs and return the resulting Idl data.
+
+    Params:
+       sources - what to parse
+       include_meta_data - if parsing location data should be included in the Idl
+    """
+    handler = ParseHandler(include_meta_data=include_meta_data)
+
+    for source in sources:
+        logging.info('Parsing %s...' % source.source_file_name)
+        handler.PrepareParsing(source.source_file_name)
+
+        parser = xml.sax.make_parser()
+        parser.setContentHandler(handler)
+        try:
+            parser.parse(source.source)
+        except AssertionError as e:
+            logging.error("AssertionError %s at %r", e,
+                          handler._context.GetCurrentLocationMeta())
+            raise
+
+    return handler.Finish()
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/__init__.py b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/__init__.py
new file mode 100644
index 00000000000000..a2192ee010c46d
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from matter_idl.matter_idl_types import Idl
+
+from .base import BaseHandler
+from .context import Context
+from .handlers import ClusterHandler
+
+
+class DataModelXmlHandler(BaseHandler):
+    """Handles the top level (/) of a data model xml file
+    """
+
+    def __init__(self, context: Context, idl: Idl):
+        super().__init__(context)
+        self._idl = idl
+
+    def GetNextProcessor(self, name, attrs):
+        if name.lower() == 'cluster':
+            return ClusterHandler(self.context, self._idl, attrs)
+        else:
+            return BaseHandler(self.context)
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/base.py b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/base.py
new file mode 100644
index 00000000000000..c446a2e4bb43d1
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/base.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import enum
+
+from .context import Context
+
+
+class HandledDepth:
+    """Defines how deep a XML element has been handled."""
+    NOT_HANDLED = enum.auto()  # Unknown/parsed element
+    ENTIRE_TREE = enum.auto()  # Entire tree can be ignored
+    SINGLE_TAG = enum.auto()  # Single tag processed, but not sub-items
+
+
+class BaseHandler:
+    """A generic element handler.
+
+       XML processing is done in the form of depth-first processing:
+          - Tree is descended into using `GetNextProcessor`
+          - Processors are expected to extend `BaseHandler` and allow for:
+             - GetNextProcessor to recurse
+             - HandleContent in case the text content is relevant
+             - EndProcessing once the entire tree has been walked (when xml element ends)
+
+       BaseHandler keeps track if it has been handled or ot by its `_handled` setting and
+       init parameter. Non-handled elements will be tagged within the context, resulting
+       in logs. This is to detect if unknown/new tags appear in XML files.
+    """
+
+    def __init__(self, context: Context, handled=HandledDepth.NOT_HANDLED):
+        self.context = context
+        self._handled = handled
+
+    def GetNextProcessor(self, name, attrs):
+        """Get the next processor to use for the given name"""
+
+        if self._handled == HandledDepth.SINGLE_TAG:
+            handled = HandledDepth.NOT_HANDLED
+        else:
+            handled = self._handled
+
+        return BaseHandler(context=self.context, handled=handled)
+
+    def HandleContent(self, content):
+        """Processes some content"""
+        pass
+
+    def EndProcessing(self):
+        """Finalizes the processing of the current element"""
+        if self._handled == HandledDepth.NOT_HANDLED:
+            self.context.MarkTagNotHandled()
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/context.py b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/context.py
new file mode 100644
index 00000000000000..3e3220ee699c87
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/context.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import xml.sax.xmlreader
+from typing import List, Optional
+
+from matter_idl.matter_idl_types import Idl, ParseMetaData
+
+
+class IdlPostProcessor:
+    """Defines a callback that will apply after an entire parsing
+       is complete.
+    """
+
+    def FinalizeProcessing(self, idl: Idl):
+        """Update idl with any post-processing directives."""
+        pass
+
+
+class ProcessingPath:
+    """Maintains the current path of tags within xml processing.
+
+    As processing descents into an xml like `<configurator><cluster>....`
+    paths will have contents like ['configurator', 'cluster', ...].
+
+    The main purpose for this is to log and keep track of what was visited
+    and in general to report things like 'this path found but was not handled'.
+    """
+
+    def __init__(self, paths: Optional[List[str]] = None):
+        if paths is None:
+            paths = []
+        self.paths = paths
+
+    def push(self, name: str):
+        self.paths.append(name)
+
+    def pop(self):
+        self.paths.pop()
+
+    def __str__(self):
+        return '::'.join(self.paths)
+
+    def __repr__(self):
+        return 'ProcessingPath(%r)' % self.paths
+
+
+class Context:
+    """
+    Contains a processing state during XML reading.
+
+    The purpose of this is to allow elements to interact with each other, share
+    data and defer processing.
+
+    Usage:
+      - globally shared data:
+         > locator: parsing location, for error reporting
+         > path: current ProcessingPath for any logging of where we are located
+      - post-processing support:
+         > can register AddIdlPostProcessor to perform some processing once
+           a full parsing pass has been done
+
+    More data may be added in time if it involves separate XML parse handlers
+    needing to interact with each other.
+    """
+
+    def __init__(self, locator: Optional[xml.sax.xmlreader.Locator] = None):
+        self.path = ProcessingPath()
+        self.locator = locator
+        self.file_name = None
+        self._not_handled: set[str] = set()
+        self._idl_post_processors: list[IdlPostProcessor] = []
+
+    def GetCurrentLocationMeta(self) -> Optional[ParseMetaData]:
+        if not self.locator:
+            return None
+
+        return ParseMetaData(line=self.locator.getLineNumber(), column=self.locator.getColumnNumber())
+
+    def ParseLogLocation(self) -> Optional[str]:
+        if not self.file_name:
+            return None
+        meta = self.GetCurrentLocationMeta()
+        if not meta:
+            return None
+
+        return f"{self.file_name}:{meta.line}:{meta.column}"
+
+    def MarkTagNotHandled(self):
+        path = str(self.path)
+        if path not in self._not_handled:
+            msg = "TAG %s was not handled/recognized" % path
+
+            where = self.ParseLogLocation()
+            if where:
+                msg = msg + " at " + where
+
+            logging.warning(msg)
+            self._not_handled.add(path)
+
+    def AddIdlPostProcessor(self, processor: IdlPostProcessor, has_priority: bool = False):
+        if has_priority:
+            self._idl_post_processors.insert(0, processor)
+        else:
+            self._idl_post_processors.append(processor)
+
+    def PostProcess(self, idl: Idl):
+        for p in self._idl_post_processors:
+            p.FinalizeProcessing(idl)
+
+        self._idl_post_processors = []
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/handlers.py b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/handlers.py
new file mode 100644
index 00000000000000..955c5e4c06020d
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/handlers.py
@@ -0,0 +1,547 @@
+# Copyright (c) 2023 Project CHIP
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from matter_idl.matter_idl_types import (Attribute, AttributeQuality, Bitmap, Cluster, ClusterSide, CommandQuality, ConstantEntry,
+                                         DataType, Enum, Field, FieldQuality, Idl, Struct, StructTag)
+
+from .base import BaseHandler, HandledDepth
+from .context import Context
+from .parsing import (ApplyConstraint, AttributesToAttribute, AttributesToBitFieldConstantEntry, AttributesToCommand,
+                      AttributesToEvent, AttributesToField, NormalizeDataType, NormalizeName, ParseInt, StringToAccessPrivilege)
+
+LOGGER = logging.getLogger('data-model-xml-parser')
+
+
+class FeaturesHandler(BaseHandler):
+
+    def __init__(self, context: Context, cluster: Cluster):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+        self._bitmap = Bitmap(name="Feature", base_type="bitmap32", entries=[])
+
+    def EndProcessing(self):
+        if self._bitmap.entries:
+            self._cluster.bitmaps.append(self._bitmap)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name in {"section", "optionalConform"}:
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "feature":
+            self._bitmap.entries.append(
+                AttributesToBitFieldConstantEntry(attrs))
+            # assume everything handled. Sub-item is only section
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        else:
+            return BaseHandler(self.context)
+
+
+class BitmapHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster, attrs):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+
+        # TODO: base type is GUESSED here because xml does not contain it
+        self._bitmap = Bitmap(name=NormalizeName(
+            attrs["name"]), base_type="UNKNOWN", entries=[])
+
+    def EndProcessing(self):
+        if not self._bitmap.entries:
+            return
+
+        # try to find the best size that fits
+        # TODO: this is a pure heuristic. XML containing this would be better.
+        acceptable = {8, 16, 32}
+        for entry in self._bitmap.entries:
+            if entry.code > 0xFF:
+                acceptable.remove(8)
+            if entry.code > 0xFFFF:
+                acceptable.remove(16)
+
+        if 8 in acceptable:
+            self._bitmap.base_type = "bitmap8"
+        elif 16 in acceptable:
+            self._bitmap.base_type = "bitmap16"
+        else:
+            self._bitmap.base_type = "bitmap32"
+
+        self._cluster.bitmaps.append(self._bitmap)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "section":
+            # Documentation data, skipped
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "bitfield":
+            self._bitmap.entries.append(
+                AttributesToBitFieldConstantEntry(attrs))
+            # Assume fully handled. We do not parse "mandatoryConform and such"
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        else:
+            return BaseHandler(self.context)
+
+
+class MandatoryConfirmFieldHandler(BaseHandler):
+    def __init__(self, context: Context, field: Field):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._field = field
+        self._hadConditions = False
+
+    def GetNextProcessor(self, name: str, attrs):
+        self._hadConditions = True
+        return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+
+    def EndProcessing(self):
+        # A mandatory conformance with conditions means it is
+        # optional in some cases
+        if self._hadConditions:
+            self._field.qualities |= FieldQuality.OPTIONAL
+
+
+class FieldHandler(BaseHandler):
+    def __init__(self, context: Context, field: Field):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._field = field
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "constraint":
+            ApplyConstraint(attrs, self._field)
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        elif name == "mandatoryConform":
+            return MandatoryConfirmFieldHandler(self.context, self._field)
+        elif name == "optionalConform":
+            self._field.qualities |= FieldQuality.OPTIONAL
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "access":
+            # per-field access is not something we model
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        elif name == "quality":
+            if "nullable" in attrs and attrs["nullable"] != "false":
+                self._field.qualities = self._field.qualities | FieldQuality.NULLABLE
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        elif name == "enum":
+            LOGGER.warning(
+                f"Anonymous enumeration not supported when handling field {self._field.name}")
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "bitmap":
+            LOGGER.warning(
+                f"Anonymous bitmap not supported when handling field {self._field.name}")
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "entry":
+            # Lists have "type=list" and then the type is inside entry
+
+            if self._field.data_type.name != "list":
+                LOGGER.warning(
+                    f"Entry type provided for non-list element {self._field.name}")
+
+            assert "type" in attrs
+
+            self._field.is_list = True
+            self._field.data_type.name = NormalizeDataType(attrs["type"])
+
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        else:
+            return BaseHandler(self.context)
+
+
+class StructHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster, attrs):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+        self._struct = Struct(name=NormalizeName(attrs["name"]), fields=[])
+
+    def EndProcessing(self):
+        self._cluster.structs.append(self._struct)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "section":
+            # Documentation data, skipped
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "field":
+            field = AttributesToField(attrs)
+            self._struct.fields.append(field)
+            return FieldHandler(self.context, field)
+        else:
+            return BaseHandler(self.context)
+
+
+class EventHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster, attrs):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+        self._event = AttributesToEvent(attrs)
+
+    def EndProcessing(self):
+        self._cluster.events.append(self._event)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "section":
+            # Documentation data, skipped
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "field":
+            field = AttributesToField(attrs)
+            self._event.fields.append(field)
+            return FieldHandler(self.context, field)
+        elif name == "mandatoryConform":
+            # assume handled (we do not record conformance in IDL)
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "access":
+            if "readPrivilege" in attrs:
+                self._event.readacl = StringToAccessPrivilege(
+                    attrs["readPrivilege"])
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        else:
+            return BaseHandler(self.context)
+
+
+class EnumHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster, attrs):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+
+        # TODO: base type is GUESSED here because xml does not contain it
+        self._enum = Enum(name=NormalizeName(
+            attrs["name"]), base_type="UNKNOWN", entries=[])
+
+    def EndProcessing(self):
+        if not self._enum.entries:
+            return
+
+        # try to find the best enum size that fits out of enum8, enum32 and enum32
+        # TODO: this is a pure heuristic. XML containing this would be better.
+        acceptable = {8, 16, 32}
+        for entry in self._enum.entries:
+            if entry.code > 0xFF:
+                acceptable.remove(8)
+            if entry.code > 0xFFFF:
+                acceptable.remove(16)
+
+        if 8 in acceptable:
+            self._enum.base_type = "enum8"
+        elif 16 in acceptable:
+            self._enum.base_type = "enum16"
+        else:
+            self._enum.base_type = "enum32"
+
+        self._cluster.enums.append(self._enum)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "section":
+            # Documentation data, skipped
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "item":
+            for key in ["name", "value"]:
+                if key not in attrs:
+                    logging.error("Enumeration %s entry is missing a '%s' entry (at %r)",
+                                  self._enum.name, key, self.context.GetCurrentLocationMeta())
+                    # bad entry, nothing I can do about it.
+                    return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+
+            self._enum.entries.append(
+                ConstantEntry(
+                    name="k" + NormalizeName(attrs["name"]), code=ParseInt(attrs["value"]))
+            )
+            # Assume fully handled
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        else:
+            return BaseHandler(self.context)
+
+
+class EventsHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "section":
+            # Documentation data, skipped
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "event":
+            return EventHandler(self.context, self._cluster, attrs)
+        else:
+            return BaseHandler(self.context)
+
+
+class AttributeHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster, attrs):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+        self._attribute = AttributesToAttribute(attrs)
+        self._deprecated = False
+
+    def EndProcessing(self):
+        if self._deprecated:
+            # Deprecation skips processing
+            return
+
+        self._cluster.attributes.append(self._attribute)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "enum":
+            LOGGER.warning(
+                f"Anonymous enumeration not supported when handling attribute {self._cluster.name}::{self._attribute.definition.name}")
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "bitmap":
+            LOGGER.warning(
+                f"Anonymous bitmap not supported when handling attribute {self._cluster.name}::{self._attribute.definition.name}")
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "access":
+            if "readPrivilege" in attrs:
+                self._attribute.readacl = StringToAccessPrivilege(
+                    attrs["readPrivilege"])
+
+            if "writePrivilege" in attrs:
+                self._attribute.writeacl = StringToAccessPrivilege(
+                    attrs["writePrivilege"])
+
+            if "read" in attrs and attrs["read"] != "false":
+                self._attribute.qualities = self._attribute.qualities | AttributeQuality.READABLE
+
+            if "write" in attrs and attrs["write"] != "false":
+                self._attribute.qualities = self._attribute.qualities | AttributeQuality.WRITABLE
+
+            if "timed" in attrs and attrs["timed"] != "false":
+                self._attribute.qualities = self._attribute.qualities | AttributeQuality.TIMED_WRITE
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        elif name == "quality":
+            # Out of the many interesting bits, only "nullable" seems relevant for codegen
+            if "nullable" in attrs and attrs["nullable"] != "false":
+                self._attribute.definition.qualities |= FieldQuality.NULLABLE
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        elif name == "optionalConform":
+            self._attribute.definition.qualities |= FieldQuality.OPTIONAL
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "mandatoryConform":
+            return MandatoryConfirmFieldHandler(self.context, self._attribute.definition)
+        elif name == "deprecateConform":
+            self._deprecated = True
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "constraint":
+            ApplyConstraint(attrs, self._attribute.definition)
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        else:
+            return BaseHandler(self.context)
+
+
+class AttributesHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "attribute":
+            return AttributeHandler(self.context, self._cluster, attrs)
+        else:
+            return BaseHandler(self.context)
+
+
+class CommandHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster, attrs):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+
+        # Command information layout:
+        #   "response":
+        #       - is mandatory for "requests" and contains
+        #         'Y' for default response and something else
+        #         for non-default responses
+        #   "direction":
+        #       - sometimes missing (seems to just be request to client)
+        #       - "commandToClient"
+        #       - "responseFromServer"
+        #
+
+        # Heuristic logic of direction:
+        #   - if we have a response, this must be a request
+        #   - if direction is "commandToClient" it should be a request
+        #   - if direction is "responseFromServer" it should be a response
+        # otherwise guess
+
+        if "response" in attrs:
+            is_command = True
+        elif ("direction" in attrs) and attrs["direction"] == "commandToClient":
+            is_command = True
+        elif ("direction" in attrs) and attrs["direction"] == "responseFromServer":
+            is_command = False  # response
+        else:
+            LOGGER.warn("Could not clearly determine command direction: %s" %
+                        [item for item in attrs.items()])
+            # Do a best-guess. However we should NOT need to guess once
+            # we have a good data set
+            is_command = not attrs["name"].endswith("Response")
+
+        if is_command:
+            self._command = AttributesToCommand(attrs)
+            self._struct = Struct(name=NormalizeName(attrs["name"] + "Request"),
+                                  fields=[],
+                                  tag=StructTag.REQUEST,
+                                  )
+        else:
+            self._command = None
+            self._struct = Struct(
+                name=NormalizeName(attrs["name"]),
+                fields=[],
+                code=ParseInt(attrs["id"]),
+                tag=StructTag.RESPONSE,
+            )
+
+    def EndProcessing(self):
+        if self._struct and self._struct.fields:
+            # A valid structure exists ...
+            self._cluster.structs.append(self._struct)
+
+            if self._command:
+                # Input structure is well defined, set it
+                self._command.input_param = self._struct.name
+
+        if self._command:
+            self._cluster.commands.append(self._command)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name in {"mandatoryConform", "optionalConform", "disallowConform"}:
+            # Unclear how commands may be optional or mandatory
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "access":
+            # <access invokePrivilege="admin" timed="true"/>
+            if "invokePrivilege" in attrs:
+                if self._command:
+                    self._command.invokeacl = StringToAccessPrivilege(
+                        attrs["invokePrivilege"])
+                else:
+                    LOGGER.warn(
+                        f"Ignoring invoke privilege for {self._struct.name}")
+
+            if "timed" in attrs and attrs["timed"] != "false":
+                self._command.qualities |= CommandQuality.TIMED_INVOKE
+
+            if "fabricScoped" in attrs and attrs["fabricScoped"] != "false":
+                self._command.qualities |= CommandQuality.FABRIC_SCOPED
+
+            return BaseHandler(self.context, handled=HandledDepth.SINGLE_TAG)
+        elif name == "field":
+            field = AttributesToField(attrs)
+            self._struct.fields.append(field)
+            return FieldHandler(self.context, field)
+        else:
+            return BaseHandler(self.context)
+
+
+class CommandsHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "command":
+            return CommandHandler(self.context, self._cluster, attrs)
+        elif name in {"mandatoryConform", "optionalConform"}:
+            # Nothing to tag conformance
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        else:
+            return BaseHandler(self.context)
+
+
+class DataTypesHandler(BaseHandler):
+    def __init__(self, context: Context, cluster: Cluster):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._cluster = cluster
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "section":
+            # Documentation data, skipped
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "number":
+            # Seems like a documentation of a number format
+            #
+            # TODO: actually ensure this has no meaning
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "enum":
+            return EnumHandler(self.context, self._cluster, attrs)
+        elif name == "bitmap":
+            return BitmapHandler(self.context, self._cluster, attrs)
+        elif name == "struct":
+            return StructHandler(self.context, self._cluster, attrs)
+        else:
+            return BaseHandler(self.context)
+
+
+class ClusterHandler(BaseHandler):
+    """ Handling /cluster elements."""
+
+    def __init__(self, context: Context, idl: Idl, attrs):
+        super().__init__(context, handled=HandledDepth.SINGLE_TAG)
+        self._idl = idl
+
+        assert ("name" in attrs)
+        assert ("id" in attrs)
+
+        self._cluster = Cluster(
+            side=ClusterSide.CLIENT,
+            name=NormalizeName(attrs["name"]),
+            code=ParseInt(attrs["id"]),
+            parse_meta=context.GetCurrentLocationMeta()
+        )
+
+    def EndProcessing(self):
+        # Global things MUST be available everywhere
+        to_add = [
+            # type, code, name, is_list
+            ('attrib_id', 65531, 'attributeList', True),
+            ('event_id', 65530, 'eventList', True),
+            ('command_id', 65529, 'acceptedCommandList', True),
+            ('command_id', 65528, 'generatedCommandList', True),
+            ('bitmap32', 65532, 'featureMap', False),
+            ('int16u', 65533, 'clusterRevision', False),
+        ]
+
+        for data_type, code, name, is_list in to_add:
+            self._cluster.attributes.append(Attribute(definition=Field(
+                data_type=DataType(name=data_type),
+                code=code,
+                name=name,
+                is_list=is_list,
+            ), qualities=AttributeQuality.READABLE))
+        self._idl.clusters.append(self._cluster)
+
+    def GetNextProcessor(self, name: str, attrs):
+        if name == "revisionHistory":
+            # Revision history COULD be used to find the latest revision of a cluster
+            # however current IDL files do NOT have a revision info field
+            #
+            # NOTE: we COULD set this as a `default` for attribute clusterRevision, however this will likely
+            #       not match with what matter IDL would parse into.
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "section":
+            # Documentation data, skipped
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "classification":
+            # Not an obvious mapping in the existing data model.
+            #
+            # TODO IFF hierarchy == derived, we should use baseCluster
+            #
+            # Other elements like role, picsCode, scope and primaryTransaction seem
+            # to not be used
+            return BaseHandler(self.context, handled=HandledDepth.ENTIRE_TREE)
+        elif name == "features":
+            return FeaturesHandler(self.context, self._cluster)
+        elif name == "dataTypes":
+            return DataTypesHandler(self.context, self._cluster)
+        elif name == "events":
+            return EventsHandler(self.context, self._cluster)
+        elif name == "attributes":
+            return AttributesHandler(self.context, self._cluster)
+        elif name == "commands":
+            return CommandsHandler(self.context, self._cluster)
+        else:
+            return BaseHandler(self.context)
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py
new file mode 100644
index 00000000000000..a6b7b4287ef5fa
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py
@@ -0,0 +1,269 @@
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import re
+from typing import Optional
+
+from matter_idl.generators.types import GetDataTypeSizeInBits, IsSignedDataType
+from matter_idl.matter_idl_types import AccessPrivilege, Attribute, Command, ConstantEntry, DataType, Event, EventPriority, Field
+
+LOGGER = logging.getLogger('data-model-xml-data-parsing')
+
+
+def ParseInt(value: str, data_type: Optional[DataType] = None) -> int:
+    """
+    Convert a string that is a known integer into an actual number.
+
+    Supports decimal or hex values prefixed with '0x'
+    """
+    if value.startswith('0x'):
+        parsed = int(value[2:], 16)
+        if data_type and IsSignedDataType(data_type):
+            bits = GetDataTypeSizeInBits(data_type)
+            assert (bits)  # size MUST be known
+            if parsed & (1 << (bits - 1)):
+                parsed -= 1 << bits
+        return parsed
+    else:
+        return int(value)
+
+
+def ParseOptionalInt(value: str) -> Optional[int]:
+    """Parses numbers as long as they are in an expected format of numbers.
+
+       "1" parses to 1
+       "0x12" parses to 18
+       "Min" parses to None
+    """
+    if re.match("^-?((0x[0-9a-fA-F]*)|([0-9]*))$", value):
+        return ParseInt(value)
+
+    return None
+
+
+_TYPE_REMAP = {
+    # unsigned
+    "uint8": "int8u",
+    "uint16": "int16u",
+    "uint24": "int24u",
+    "uint32": "int32u",
+    "uint48": "int48u",
+    "uint52": "int52u",
+    "uint64": "int54u",
+    # signed
+    "sint8": "int8s",
+    "sint16": "int16s",
+    "sint24": "int24s",
+    "sint32": "int32s",
+    "sint48": "int48s",
+    "sint52": "int52s",
+    "sint64": "int54s",
+    # other
+    "bool": "boolean",
+    "string": "char_string",
+    "octets": "octet_string",
+}
+
+
+def NormalizeDataType(t: str) -> str:
+    """Convert data model xml types into matter idl types."""
+    return _TYPE_REMAP.get(t.lower(), t.replace("-", "_"))
+
+
+def NormalizeName(name: str) -> str:
+    """Convert a free form name from the spec into a programming language
+       name that is appropriate for matter IDL.
+    """
+
+    # Trim human name separators
+    for separator in " /-":
+        name = name.replace(separator, '_')
+    while '__' in name:
+        name = name.replace('__', '_')
+
+    # NOTE: zapt generators for IDL files use a construct of the form
+    #       `{{asUpperCamelCase name preserveAcronyms=true}}`
+    #       and it is somewhat unclear what preserveAcronyms will do.
+    #
+    #      Current assumption is that spec already has acronyms set in
+    #      the correct place and at least for some basic tests this method
+    #      generates good names
+    #
+    #      If any acronyms seem off in naming at some point, more logic may
+    #      be needed here.
+
+    # At this point, we remove all _ and make sure _ is followed by an uppercase
+    while name.endswith('_'):
+        name = name[:-1]
+
+    while '_' in name:
+        idx = name.find('_')
+        name = name[:idx] + name[idx+1].upper() + name[idx+2:]
+
+    return name
+
+
+def FieldName(name: str) -> str:
+    """Normalized name with the first letter lowercase. """
+    name = NormalizeName(name)
+    return name[0].lower() + name[1:]
+
+
+def AttributesToField(attrs) -> Field:
+    assert "name" in attrs
+    assert "id" in attrs
+    assert "type" in attrs
+
+    return Field(
+        name=FieldName(attrs["name"]),
+        code=ParseInt(attrs["id"]),
+        data_type=DataType(name=NormalizeDataType(attrs["type"]))
+    )
+
+
+def AttributesToBitFieldConstantEntry(attrs) -> ConstantEntry:
+    """Creates a constant entry appropriate for bitmaps.
+    """
+    assert ("name" in attrs)
+    assert ("bit" in attrs)
+
+    return ConstantEntry(name="k" + NormalizeName(attrs["name"]), code=1 << ParseInt(attrs["bit"]))
+
+
+def AttributesToAttribute(attrs) -> Attribute:
+    assert "name" in attrs
+    assert "id" in attrs
+
+    if "type" in attrs:
+        attr_type = NormalizeDataType(attrs["type"])
+    else:
+        # TODO: we should NOT have this, however we are now lenient
+        # to bad input data
+        LOGGER.error(f"Attribute {attrs['name']} has no type")
+        attr_type = "sint32"
+
+    return Attribute(
+        definition=Field(
+            code=ParseInt(attrs["id"]),
+            name=FieldName(attrs["name"]),
+            data_type=DataType(name=attr_type),
+        )
+    )
+
+
+def AttributesToEvent(attrs) -> Event:
+    assert "name" in attrs
+    assert "id" in attrs
+    assert "priority" in attrs
+
+    if attrs["priority"] == "critical":
+        priority = EventPriority.CRITICAL
+    elif attrs["priority"] == "info":
+        priority = EventPriority.INFO
+    elif attrs["priority"] == "debug":
+        priority = EventPriority.DEBUG
+    elif attrs["priority"] == "desc":
+        LOGGER.warn("Found an event with 'desc' priority: %s" %
+                    [item for item in attrs.items()])
+        priority = EventPriority.CRITICAL
+    else:
+        raise Exception("UNKNOWN event priority: %r" % attrs["priority"])
+
+    return Event(
+        name=NormalizeName(attrs["name"]),
+        code=ParseInt(attrs["id"]),
+        priority=priority,
+        fields=[])
+
+
+def StringToAccessPrivilege(value: str) -> AccessPrivilege:
+    if value == "view":
+        return AccessPrivilege.VIEW
+    elif value == "operate":
+        return AccessPrivilege.OPERATE
+    elif value == "manage":
+        return AccessPrivilege.MANAGE
+    elif value == "admin":
+        return AccessPrivilege.ADMINISTER
+    else:
+        raise Exception("UNKNOWN privilege level: %r" % value)
+
+
+def AttributesToCommand(attrs) -> Command:
+    assert "id" in attrs
+    assert "name" in attrs
+
+    if "response" not in attrs:
+        LOGGER.warn(f"Command {attrs['name']} has no response set.")
+        # Matter IDL has no concept of "no response sent"
+        # Example is DoorLock::"Operating Event Notification"
+        #
+        # However that is not in the impl in general
+        # it is unclear what to do here (and what "NOT" is as conformance)
+
+        output_param = "DefaultSuccess"
+    else:
+        output_param = NormalizeName(attrs["response"])
+        if output_param == "Y":
+            output_param = "DefaultSuccess"  # IDL name for no specific struct
+
+    return Command(
+        name=NormalizeName(attrs["name"]),
+        code=ParseInt(attrs["id"]),
+        input_param=None,  # not specified YET
+        output_param=output_param
+    )
+
+
+def ApplyConstraint(attrs, field: Field):
+    """
+    Handles constraints according to Matter IDL formats.
+
+    Specifically it does NOT handle min/max values as current IDL
+    format does not support having such values defined.
+    """
+    assert "type" in attrs
+
+    constraint_type = attrs["type"]
+
+    if constraint_type == "allowed":
+        pass  # unsure what to do allowed
+    elif constraint_type == "desc":
+        pass  # free-form description
+    elif constraint_type in {"countBetween", "maxCount"}:
+        pass  # cannot implement count
+    elif constraint_type == "min":
+        # field.data_type.min_value = ParseOptionalInt(attrs["value"])
+        pass
+    elif constraint_type == "max":
+        # field.data_type.max_value = ParseOptionalInt(attrs["value"])
+        pass
+    elif constraint_type == "between":
+        # TODO: examples existing in the parsed data which are NOT
+        #       handled:
+        #         - from="-2.5°C" to="2.5°C"
+        #         - from="0%" to="100%"
+        # field.data_type.min_value = ParseOptionalInt(attrs["from"])
+        # field.data_type.max_value = ParseOptionalInt(attrs["to"])
+        pass
+    elif constraint_type == "maxLength":
+        field.data_type.max_length = ParseOptionalInt(attrs["value"])
+    elif constraint_type == "minLength":
+        field.data_type.min_length = ParseOptionalInt(attrs["value"])
+    elif constraint_type == "lengthBetween":
+        field.data_type.min_length = ParseOptionalInt(attrs["from"])
+        field.data_type.max_length = ParseOptionalInt(attrs["to"])
+    else:
+        logging.error(f"UNKNOWN constraint type {constraint_type}")
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml_parser.py b/scripts/py_matter_idl/matter_idl/data_model_xml_parser.py
new file mode 100755
index 00000000000000..e0878572d0536f
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml_parser.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import sys
+from typing import Optional
+
+import click
+
+try:
+    from matter_idl.data_model_xml import ParseSource, ParseXmls
+except ImportError:
+    sys.path.append(os.path.abspath(
+        os.path.join(os.path.dirname(__file__), '..')))
+    from matter_idl.data_model_xml import ParseSource, ParseXmls
+
+from matter_idl.generators import GeneratorStorage
+from matter_idl.generators.idl import IdlGenerator
+from matter_idl.matter_idl_parser import CreateParser
+from matter_idl.matter_idl_types import Idl
+
+
+class InMemoryStorage(GeneratorStorage):
+    def __init__(self):
+        super().__init__()
+        self.content: Optional[str] = None
+
+    def get_existing_data(self, relative_path: str):
+        # Force re-generation each time
+        return None
+
+    def write_new_data(self, relative_path: str, content: str):
+        if self.content:
+            raise Exception(
+                "Unexpected extra data: single file generation expected")
+        self.content = content
+
+
+def normalize_order(idl: Idl):
+    """Re-sorts contents of things inside a cluster so that
+       output is easily diffed by humans
+    """
+
+    # This method exists because `zapt` generation of IDL files
+    # are generally based on SQL select query ordering, likely
+    # with some sort fields to achieve determinism
+    #
+    # However overall, especially if manual editing, it seems
+    # easier to just fix a sort order instead of trying to
+    # match another tool ordering that resides in another
+    # code location.
+
+    idl.clusters.sort(key=lambda c: c.name)
+
+    for cluster in idl.clusters:
+        cluster.enums.sort(key=lambda e: e.name)
+        cluster.bitmaps.sort(key=lambda b: b.name)
+        cluster.events.sort(key=lambda e: e.code)
+        cluster.attributes.sort(key=lambda a: a.definition.code)
+        cluster.structs.sort(key=lambda s: s.name)
+        cluster.commands.sort(key=lambda c: c.code)
+
+
+# Supported log levels, mapping string values required for argument
+# parsing into logging constants
+__LOG_LEVELS__ = {
+    'debug': logging.DEBUG,
+    'info': logging.INFO,
+    'warn': logging.WARN,
+    'fatal': logging.FATAL,
+}
+
+
+@click.command()
+@click.option(
+    '--log-level',
+    default='INFO',
+    show_default=True,
+    type=click.Choice(list(__LOG_LEVELS__.keys()), case_sensitive=False),
+    help='Determines the verbosity of script output.')
+@click.option(
+    '--no-print',
+    show_default=True,
+    default=False,
+    is_flag=True,
+    help='Do not pring output data (parsed data)')
+@click.option(
+    "-o",
+    "--output",
+    default=None,
+    type=click.Path(),
+    help="Where to output the parsed IDL."
+)
+@click.option(
+    "--compare",
+    default=None,
+    type=click.Path(exists=True),
+    help="An input .matter IDL to compare with."
+)
+@click.option(
+    "--compare-output",
+    default=None,
+    type=click.Path(),
+    help="Where to output the compare IDL"
+)
+@click.argument('filenames', nargs=-1)
+def main(log_level, no_print, output, compare, compare_output, filenames):
+    """
+    A program supporting parsing of CSA data model XML files and generating them
+    as human readable IDL output.
+
+    Also supports parsing and generating a diff against an existing .matter file,
+    such as using:
+
+    \b
+       ./scripts/py_matter_idl/matter_idl/data_model_xml_parser.py       \\
+          --compare src/controller/data_model/controller-clusters.matter \\
+          --compare-output out/orig.matter                               \\
+          --output out/from_xml.matter                                   \\
+          data_model/clusters/Switch.xml
+    """
+    logging.basicConfig(
+        level=__LOG_LEVELS__[log_level],
+        format='%(asctime)s %(levelname)-7s %(message)s',
+    )
+
+    if (compare is None) != (compare_output is None):
+        logging.error(
+            "Either both or none of --compare AND --compare-output must be set")
+        sys.exit(1)
+
+    logging.info("Starting to parse ...")
+
+    sources = [ParseSource(source=name) for name in filenames]
+    data = ParseXmls(sources)
+    logging.info("Parse completed")
+
+    if compare:
+        other_idl = CreateParser(skip_meta=True).parse(
+            open(compare).read(), file_name=compare)
+
+        # ensure that input file is filtered to only interesting
+        # clusters
+        loaded_clusters = set([c.code for c in data.clusters])
+        other_idl.clusters = [
+            c for c in other_idl.clusters if c.code in loaded_clusters]
+
+        # Ensure consistent ordering for compares
+        normalize_order(data)
+        normalize_order(other_idl)
+
+        storage = InMemoryStorage()
+        IdlGenerator(storage=storage, idl=other_idl).render(dry_run=False)
+        with open(compare_output, 'wt', encoding="utf8") as o:
+            o.write(storage.content)
+
+    storage = InMemoryStorage()
+    IdlGenerator(storage=storage, idl=data).render(dry_run=False)
+
+    if output:
+        with open(output, 'wt', encoding="utf8") as o:
+            o.write(storage.content)
+    elif not no_print:
+        print(storage.content)
+
+
+if __name__ == '__main__':
+    main(auto_envvar_prefix='CHIP')
diff --git a/scripts/py_matter_idl/matter_idl/generators/idl/README.md b/scripts/py_matter_idl/matter_idl/generators/idl/README.md
index 81e4b92c05e535..c19b428482e017 100644
--- a/scripts/py_matter_idl/matter_idl/generators/idl/README.md
+++ b/scripts/py_matter_idl/matter_idl/generators/idl/README.md
@@ -26,7 +26,7 @@ A XML parser will use this code generator to output a human readable view of the
 parsed data:
 
 ```
-./scripts/py_matter_idl/matter_idl/xml_parser.py \
+./scripts/py_matter_idl/matter_idl/zapxml_parser.py \
     ./src/app/zap-templates/zcl/data-model/chip/onoff-cluster.xml \
     ./src/app/zap-templates/zcl/data-model/chip/global-attributes.xm
 ```
diff --git a/scripts/py_matter_idl/matter_idl/test_data_model_xml.py b/scripts/py_matter_idl/matter_idl/test_data_model_xml.py
new file mode 100755
index 00000000000000..b53a6e3c38fd43
--- /dev/null
+++ b/scripts/py_matter_idl/matter_idl/test_data_model_xml.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+import unittest
+from typing import List, Union
+
+try:
+    from matter_idl.data_model_xml import ParseSource, ParseXmls
+except ImportError:
+    import os
+    import sys
+
+    sys.path.append(os.path.abspath(
+        os.path.join(os.path.dirname(__file__), '..')))
+    from matter_idl.data_model_xml import ParseSource, ParseXmls
+
+from matter_idl.matter_idl_types import Idl
+from matter_idl_parser import CreateParser
+
+
+def XmlToIdl(what: Union[str, List[str]]) -> Idl:
+    if not isinstance(what, list):
+        what = [what]
+
+    sources = []
+    for idx, txt in enumerate(what):
+        sources.append(ParseSource(source=io.StringIO(
+            txt), name=("Input %d" % (idx + 1))))
+
+    return ParseXmls(sources, include_meta_data=False)
+
+
+def IdlTextToIdl(what: str) -> Idl:
+    return CreateParser(skip_meta=True).parse(what)
+
+
+class TestXmlParser(unittest.TestCase):
+
+    def __init__(self, *args, **kargs):
+        super().__init__(*args, **kargs)
+        self.maxDiff = None
+
+    def testBasicInput(self):
+
+        xml_idl = XmlToIdl('''
+            <cluster id="123" name="Test" revision="1"/>
+        ''')
+
+        expected_idl = IdlTextToIdl('''
+            client cluster Test = 123 {
+               readonly attribute attrib_id attributeList[] = 65531;
+               readonly attribute event_id eventList[] = 65530;
+               readonly attribute command_id acceptedCommandList[] = 65529;
+               readonly attribute command_id generatedCommandList[] = 65528;
+               readonly attribute bitmap32 featureMap = 65532;
+               readonly attribute int16u clusterRevision = 65533;
+           }
+        ''')
+
+        self.assertEqual(xml_idl, expected_idl)
+
+    def testComplexInput(self):
+        # This parses a known copy of Switch.xml which happens to be fully
+        # spec-conformant (so assuming it as a good input)
+        xml_idl = XmlToIdl('''
+             <cluster xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="types types.xsd cluster cluster.xsd" id="0x003b" name="Switch" revision="1">
+                <revisionHistory>
+                    <revision revision="1" summary="Initial Release"/>
+                </revisionHistory>
+                <classification hierarchy="base" role="application" picsCode="SWTCH" scope="Endpoint"/>
+                <features>
+                    <feature bit="0" code="LS" name="LatchingSwitch" summary="Switch is latching">
+                    <optionalConform choice="a"/>
+                    </feature>
+                    <feature bit="1" code="MS" name="MomentarySwitch" summary="Switch is momentary">
+                    <optionalConform choice="a"/>
+                    </feature>
+                    <feature bit="2" code="MSR" name="MomentarySwitchRelease" summary="Switch supports release">
+                    <optionalConform>
+                        <feature name="MS"/>
+                    </optionalConform>
+                    </feature>
+                    <feature bit="3" code="MSL" name="MomentarySwitchLongPress" summary="Switch supports long press">
+                    <optionalConform>
+                        <andTerm>
+                        <feature name="MS"/>
+                        <feature name="MSR"/>
+                        </andTerm>
+                    </optionalConform>
+                    </feature>
+                    <feature bit="4" code="MSM" name="MomentarySwitchMultiPress" summary="Switch supports multi-press">
+                    <optionalConform>
+                        <andTerm>
+                        <feature name="MS"/>
+                        <feature name="MSR"/>
+                        </andTerm>
+                    </optionalConform>
+                    </feature>
+                </features>
+                <attributes>
+                    <attribute id="0x0000" name="NumberOfPositions" type="uint8" default="2">
+                    <access read="true" readPrivilege="view"/>
+                    <quality changeOmitted="false" nullable="false" scene="false" persistence="fixed" reportable="false"/>
+                    <mandatoryConform/>
+                    <constraint type="between" from="2" to="max"/>
+                    </attribute>
+                    <attribute id="0x0001" name="CurrentPosition" type="uint8" default="0">
+                    <access read="true" readPrivilege="view"/>
+                    <quality changeOmitted="false" nullable="false" scene="false" persistence="nonVolatile" reportable="false"/>
+                    <mandatoryConform/>
+                    <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </attribute>
+                    <attribute id="0x0002" name="MultiPressMax" type="uint8" default="2">
+                    <access read="true" readPrivilege="view"/>
+                    <quality changeOmitted="false" nullable="false" scene="false" persistence="fixed" reportable="false"/>
+                    <mandatoryConform>
+                        <feature name="MSM"/>
+                    </mandatoryConform>
+                    <constraint type="between" from="2" to="max"/>
+                    </attribute>
+                </attributes>
+                <events>
+                    <event id="0x00" name="SwitchLatched" priority="info">
+                    <access readPrivilege="view"/>
+                    <mandatoryConform>
+                        <feature name="LS"/>
+                    </mandatoryConform>
+                    <field id="0" name="NewPosition" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </field>
+                    </event>
+                    <event id="0x01" name="InitialPress" priority="info">
+                    <access readPrivilege="view"/>
+                    <mandatoryConform>
+                        <feature name="MS"/>
+                    </mandatoryConform>
+                    <field id="0" name="NewPosition" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </field>
+                    </event>
+                    <event id="0x02" name="LongPress" priority="info">
+                    <access readPrivilege="view"/>
+                    <mandatoryConform>
+                        <feature name="MSL"/>
+                    </mandatoryConform>
+                    <field id="0" name="NewPosition" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </field>
+                    </event>
+                    <event id="0x03" name="ShortRelease" priority="info">
+                    <access readPrivilege="view"/>
+                    <mandatoryConform>
+                        <feature name="MSR"/>
+                    </mandatoryConform>
+                    <field id="0" name="PreviousPosition" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </field>
+                    </event>
+                    <event id="0x04" name="LongRelease" priority="info">
+                    <access readPrivilege="view"/>
+                    <mandatoryConform>
+                        <feature name="MSL"/>
+                    </mandatoryConform>
+                    <field id="0" name="PreviousPosition" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </field>
+                    </event>
+                    <event id="0x05" name="MultiPressOngoing" priority="info">
+                    <access readPrivilege="view"/>
+                    <mandatoryConform>
+                        <feature name="MSM"/>
+                    </mandatoryConform>
+                    <field id="0" name="NewPosition" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </field>
+                    <field id="1" name="CurrentNumberOfPressesCounted" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="2" to="MultiPressMax"/>
+                    </field>
+                    </event>
+                    <event id="0x06" name="MultiPressComplete" priority="info">
+                    <access readPrivilege="view"/>
+                    <mandatoryConform>
+                        <feature name="MSM"/>
+                    </mandatoryConform>
+                    <field id="0" name="PreviousPosition" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="0" to="NumberOfPositions-1"/>
+                    </field>
+                    <field id="1" name="TotalNumberOfPressesCounted" type="uint8">
+                        <mandatoryConform/>
+                        <constraint type="between" from="1" to="MultiPressMax"/>
+                    </field>
+                    </event>
+                </events>
+             </cluster>
+        ''')
+
+        expected_idl = IdlTextToIdl('''
+            client cluster Switch = 59 {
+              bitmap Feature : bitmap32 {
+                kLatchingSwitch = 0x1;
+                kMomentarySwitch = 0x2;
+                kMomentarySwitchRelease = 0x4;
+                kMomentarySwitchLongPress = 0x8;
+                kMomentarySwitchMultiPress = 0x10;
+              }
+
+              info event SwitchLatched = 0 {
+                int8u newPosition = 0;
+              }
+
+              info event InitialPress = 1 {
+                int8u newPosition = 0;
+              }
+
+              info event LongPress = 2 {
+                int8u newPosition = 0;
+              }
+
+              info event ShortRelease = 3 {
+                int8u previousPosition = 0;
+              }
+
+              info event LongRelease = 4 {
+                int8u previousPosition = 0;
+              }
+
+              info event MultiPressOngoing = 5 {
+                int8u newPosition = 0;
+                int8u currentNumberOfPressesCounted = 1;
+              }
+
+              info event MultiPressComplete = 6 {
+                int8u previousPosition = 0;
+                int8u totalNumberOfPressesCounted = 1;
+              }
+
+              readonly attribute int8u numberOfPositions = 0;
+              readonly attribute int8u currentPosition = 1;
+              readonly attribute optional int8u multiPressMax = 2;
+              readonly attribute attrib_id attributeList[] = 65531;
+              readonly attribute event_id eventList[] = 65530;
+              readonly attribute command_id acceptedCommandList[] = 65529;
+              readonly attribute command_id generatedCommandList[] = 65528;
+              readonly attribute bitmap32 featureMap = 65532;
+              readonly attribute int16u clusterRevision = 65533;
+            }
+            ''')
+
+        self.assertEqual(xml_idl, expected_idl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/scripts/py_matter_idl/matter_idl/test_xml_parser.py b/scripts/py_matter_idl/matter_idl/test_zapxml.py
similarity index 100%
rename from scripts/py_matter_idl/matter_idl/test_xml_parser.py
rename to scripts/py_matter_idl/matter_idl/test_zapxml.py
diff --git a/scripts/py_matter_idl/matter_idl/xml_parser.py b/scripts/py_matter_idl/matter_idl/zapxml_parser.py
similarity index 96%
rename from scripts/py_matter_idl/matter_idl/xml_parser.py
rename to scripts/py_matter_idl/matter_idl/zapxml_parser.py
index d20bdb9dda67ec..1f9593cd2a74f4 100755
--- a/scripts/py_matter_idl/matter_idl/xml_parser.py
+++ b/scripts/py_matter_idl/matter_idl/zapxml_parser.py
@@ -44,7 +44,8 @@ def get_existing_data(self, relative_path: str):
 
         def write_new_data(self, relative_path: str, content: str):
             if self.content:
-                raise Exception("Unexpected extra data: single file generation expected")
+                raise Exception(
+                    "Unexpected extra data: single file generation expected")
             self.content = content
 
     # Supported log levels, mapping string values required for argument