diff --git a/tests/test_local_taxonomy.py b/tests/test_local_taxonomy.py index 64d5b6e..81bd203 100644 --- a/tests/test_local_taxonomy.py +++ b/tests/test_local_taxonomy.py @@ -18,10 +18,10 @@ def test_parse_taxonomy(self): cache_dir: str = './cache/' cache: HttpCache = HttpCache(cache_dir) print(f"Saving to {cache_dir}") - + imported_schema_uris = set() extension_schema_path: str = './tests/data/example.xsd' # extension_schema_path: str = './data/example.xsd' - tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache) + tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache, imported_schema_uris = set()) print(tax) srt_tax: TaxonomySchema = tax.get_taxonomy('http://fasb.org/srt/2020-01-31') self.assertTrue(srt_tax) diff --git a/xbrl/instance.py b/xbrl/instance.py index d88d67c..81d01df 100644 --- a/xbrl/instance.py +++ b/xbrl/instance.py @@ -347,6 +347,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None = schema_uri: str = schema_ref.attrib[XLINK_NS + 'href'] # check if the schema uri is relative or absolute # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas + if is_url(schema_uri): # fetch the taxonomy extension schema from remote taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) @@ -357,7 +358,9 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None = else: # try to find the taxonomy extension schema file locally because no full url can be constructed schema_path = resolve_uri(instance_path, schema_uri) - taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache) + # initalise a set that will store cached taxonomy schemas uris to avoid recursive loops + imported_schema_uris = set() + taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris) # parse contexts and units context_dir = _parse_context_elements(root.findall('xbrli:context', NAME_SPACES), root.attrib['ns_map'], taxonomy, @@ -453,13 +456,18 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None schema_uri: str = schema_ref.attrib[XLINK_NS + 'href'] # check if the schema uri is relative or absolute # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas + + # initalise a set that will store cached taxonomy schemas uris to avoid recursive loops + imported_schema_uris = set() + + if is_url(schema_uri): # fetch the taxonomy extension schema from remote taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) elif schema_root: # take the given schema_root path as directory for searching for the taxonomy schema schema_path = str(next(Path(schema_root).glob(f'**/{schema_uri}'))) - taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache) + taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris) elif instance_url: # fetch the taxonomy extension schema from remote by reconstructing the url schema_url = resolve_uri(instance_url, schema_uri) @@ -467,7 +475,7 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None else: # try to find the taxonomy extension schema file locally because no full url can be constructed schema_path = resolve_uri(instance_path, schema_uri) - taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache) + taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris) # get all contexts and units xbrl_resources: ET.Element = root.find('.//ix:resources', ns_map) diff --git a/xbrl/taxonomy.py b/xbrl/taxonomy.py index 4150139..f9b078c 100644 --- a/xbrl/taxonomy.py +++ b/xbrl/taxonomy.py @@ -588,26 +588,28 @@ def parse_common_taxonomy(cache: HttpCache, namespace: str) -> TaxonomySchema or @lru_cache(maxsize=60) -def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema: +def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris: set = set()) -> TaxonomySchema: """ Parses a taxonomy schema file from the internet :param schema_url: full link to the taxonomy schema :param cache: :class:`xbrl.cache.HttpCache` instance + :param imported_schema_uris: set of already imported schema uris :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object """ if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. ' 'Please use parse_taxonomy to parse local taxonomy schemas') schema_path: str = cache.cache_file(schema_url) - return parse_taxonomy(schema_path, cache, schema_url) + return parse_taxonomy(schema_path, cache, imported_schema_uris, schema_url) -def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = None) -> TaxonomySchema: +def parse_taxonomy(schema_path: str, cache: HttpCache, imported_schema_uris : set, schema_url: str or None = None) -> TaxonomySchema: """ Parses a taxonomy schema file. :param schema_path: url to the schema (on the internet) :param cache: :class:`xbrl.cache.HttpCache` instance + :param imported_schema_uris: set of already imported schema uris :param schema_url: if this url is set, the script will try to fetch additionally imported files such as linkbases or imported schemas from the remote location. If this url is None, the script will try to find those resources locally. :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object @@ -633,6 +635,10 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = if import_uri == "": continue + # Skip already imported URIs + if import_uri in imported_schema_uris: + continue + # sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd" if is_url(import_uri): # fetch the schema file from remote @@ -640,11 +646,13 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = elif schema_url: # fetch the schema file from remote by reconstructing the full url import_url = resolve_uri(schema_url, import_uri) + imported_schema_uris.add(import_uri) taxonomy.imports.append(parse_taxonomy_url(import_url, cache)) else: # We have to try to fetch the linkbase locally because no full url can be constructed import_path = resolve_uri(schema_path, import_uri) - taxonomy.imports.append(parse_taxonomy(import_path, cache)) + taxonomy.imports.append(parse_taxonomy(import_path, cache, imported_schema_uris)) + role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES) # parse ELR's