Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for Schema with recuvsive imports #133

Merged
merged 4 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/test_local_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def test_parse_taxonomy(self):
cache_dir: str = './cache/'
cache: HttpCache = HttpCache(cache_dir)
print(f"Saving to {cache_dir}")

imported_schema_uris = set()
extension_schema_path: str = './tests/data/example.xsd'
# extension_schema_path: str = './data/example.xsd'
tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache)
tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache, imported_schema_uris = set())
print(tax)
srt_tax: TaxonomySchema = tax.get_taxonomy('http://fasb.org/srt/2020-01-31')
self.assertTrue(srt_tax)
Expand Down
14 changes: 11 additions & 3 deletions xbrl/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas

if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
Expand All @@ -357,7 +358,9 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
else:
# try to find the taxonomy extension schema file locally because no full url can be constructed
schema_path = resolve_uri(instance_path, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
# initalise a set that will store cached taxonomy schemas uris to avoid recursive loops
imported_schema_uris = set()
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)

# parse contexts and units
context_dir = _parse_context_elements(root.findall('xbrli:context', NAME_SPACES), root.attrib['ns_map'], taxonomy,
Expand Down Expand Up @@ -453,21 +456,26 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas

# initalise a set that will store cached taxonomy schemas uris to avoid recursive loops
imported_schema_uris = set()


if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
elif schema_root:
# take the given schema_root path as directory for searching for the taxonomy schema
schema_path = str(next(Path(schema_root).glob(f'**/{schema_uri}')))
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)
elif instance_url:
# fetch the taxonomy extension schema from remote by reconstructing the url
schema_url = resolve_uri(instance_url, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache)
else:
# try to find the taxonomy extension schema file locally because no full url can be constructed
schema_path = resolve_uri(instance_path, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)

# get all contexts and units
xbrl_resources: ET.Element = root.find('.//ix:resources', ns_map)
Expand Down
16 changes: 12 additions & 4 deletions xbrl/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,26 +588,28 @@ def parse_common_taxonomy(cache: HttpCache, namespace: str) -> TaxonomySchema or


@lru_cache(maxsize=60)
def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema:
def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris: set = set()) -> TaxonomySchema:
"""
Parses a taxonomy schema file from the internet

:param schema_url: full link to the taxonomy schema
:param cache: :class:`xbrl.cache.HttpCache` instance
:param imported_schema_uris: set of already imported schema uris
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
"""
if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. '
'Please use parse_taxonomy to parse local taxonomy schemas')
schema_path: str = cache.cache_file(schema_url)
return parse_taxonomy(schema_path, cache, schema_url)
return parse_taxonomy(schema_path, cache, imported_schema_uris, schema_url)


def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = None) -> TaxonomySchema:
def parse_taxonomy(schema_path: str, cache: HttpCache, imported_schema_uris : set, schema_url: str or None = None) -> TaxonomySchema:
"""
Parses a taxonomy schema file.

:param schema_path: url to the schema (on the internet)
:param cache: :class:`xbrl.cache.HttpCache` instance
:param imported_schema_uris: set of already imported schema uris
:param schema_url: if this url is set, the script will try to fetch additionally imported files such as linkbases or
imported schemas from the remote location. If this url is None, the script will try to find those resources locally.
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
Expand All @@ -633,18 +635,24 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
if import_uri == "":
continue

# Skip already imported URIs
if import_uri in imported_schema_uris:
continue

# sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd"
if is_url(import_uri):
# fetch the schema file from remote
taxonomy.imports.append(parse_taxonomy_url(import_uri, cache))
elif schema_url:
# fetch the schema file from remote by reconstructing the full url
import_url = resolve_uri(schema_url, import_uri)
imported_schema_uris.add(import_uri)
taxonomy.imports.append(parse_taxonomy_url(import_url, cache))
else:
# We have to try to fetch the linkbase locally because no full url can be constructed
import_path = resolve_uri(schema_path, import_uri)
taxonomy.imports.append(parse_taxonomy(import_path, cache))
taxonomy.imports.append(parse_taxonomy(import_path, cache, imported_schema_uris))


role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES)
# parse ELR's
Expand Down
Loading