Skip to content

Commit

Permalink
Merge pull request #133 from Sam-el0/sam-recurfix
Browse files Browse the repository at this point in the history
Fix for Schema with recuvsive imports
  • Loading branch information
manusimidt authored May 15, 2024
2 parents cbc28fc + e28b5e2 commit 2205e06
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
4 changes: 2 additions & 2 deletions tests/test_local_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def test_parse_taxonomy(self):
cache_dir: str = './cache/'
cache: HttpCache = HttpCache(cache_dir)
print(f"Saving to {cache_dir}")

imported_schema_uris = set()
extension_schema_path: str = './tests/data/example.xsd'
# extension_schema_path: str = './data/example.xsd'
tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache)
tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache, imported_schema_uris = set())
print(tax)
srt_tax: TaxonomySchema = tax.get_taxonomy('http://fasb.org/srt/2020-01-31')
self.assertTrue(srt_tax)
Expand Down
14 changes: 11 additions & 3 deletions xbrl/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas

if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
Expand All @@ -357,7 +358,9 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
else:
# try to find the taxonomy extension schema file locally because no full url can be constructed
schema_path = resolve_uri(instance_path, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
# initalise a set that will store cached taxonomy schemas uris to avoid recursive loops
imported_schema_uris = set()
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)

# parse contexts and units
context_dir = _parse_context_elements(root.findall('xbrli:context', NAME_SPACES), root.attrib['ns_map'], taxonomy,
Expand Down Expand Up @@ -453,21 +456,26 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas

# initalise a set that will store cached taxonomy schemas uris to avoid recursive loops
imported_schema_uris = set()


if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
elif schema_root:
# take the given schema_root path as directory for searching for the taxonomy schema
schema_path = str(next(Path(schema_root).glob(f'**/{schema_uri}')))
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)
elif instance_url:
# fetch the taxonomy extension schema from remote by reconstructing the url
schema_url = resolve_uri(instance_url, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache)
else:
# try to find the taxonomy extension schema file locally because no full url can be constructed
schema_path = resolve_uri(instance_path, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)

# get all contexts and units
xbrl_resources: ET.Element = root.find('.//ix:resources', ns_map)
Expand Down
16 changes: 12 additions & 4 deletions xbrl/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,26 +588,28 @@ def parse_common_taxonomy(cache: HttpCache, namespace: str) -> TaxonomySchema or


@lru_cache(maxsize=60)
def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema:
def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris: set = set()) -> TaxonomySchema:
"""
Parses a taxonomy schema file from the internet
:param schema_url: full link to the taxonomy schema
:param cache: :class:`xbrl.cache.HttpCache` instance
:param imported_schema_uris: set of already imported schema uris
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
"""
if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. '
'Please use parse_taxonomy to parse local taxonomy schemas')
schema_path: str = cache.cache_file(schema_url)
return parse_taxonomy(schema_path, cache, schema_url)
return parse_taxonomy(schema_path, cache, imported_schema_uris, schema_url)


def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = None) -> TaxonomySchema:
def parse_taxonomy(schema_path: str, cache: HttpCache, imported_schema_uris : set, schema_url: str or None = None) -> TaxonomySchema:
"""
Parses a taxonomy schema file.
:param schema_path: url to the schema (on the internet)
:param cache: :class:`xbrl.cache.HttpCache` instance
:param imported_schema_uris: set of already imported schema uris
:param schema_url: if this url is set, the script will try to fetch additionally imported files such as linkbases or
imported schemas from the remote location. If this url is None, the script will try to find those resources locally.
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
Expand All @@ -633,18 +635,24 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
if import_uri == "":
continue

# Skip already imported URIs
if import_uri in imported_schema_uris:
continue

# sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd"
if is_url(import_uri):
# fetch the schema file from remote
taxonomy.imports.append(parse_taxonomy_url(import_uri, cache))
elif schema_url:
# fetch the schema file from remote by reconstructing the full url
import_url = resolve_uri(schema_url, import_uri)
imported_schema_uris.add(import_uri)
taxonomy.imports.append(parse_taxonomy_url(import_url, cache))
else:
# We have to try to fetch the linkbase locally because no full url can be constructed
import_path = resolve_uri(schema_path, import_uri)
taxonomy.imports.append(parse_taxonomy(import_path, cache))
taxonomy.imports.append(parse_taxonomy(import_path, cache, imported_schema_uris))


role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES)
# parse ELR's
Expand Down

0 comments on commit 2205e06

Please sign in to comment.