From 842ea082b2df390435e7439b9db6a6d67d9fb1df Mon Sep 17 00:00:00 2001 From: Alexander Medeiros Date: Tue, 3 Oct 2023 16:34:52 -0400 Subject: [PATCH] Explicitly match protocol Encountered while processing SEC EDGAR filing 0001010549-18-000409 which contains filenames that start with http, including the custom taxonomy. --- xbrl/helper/uri_helper.py | 8 ++++---- xbrl/instance.py | 6 +++--- xbrl/taxonomy.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xbrl/helper/uri_helper.py b/xbrl/helper/uri_helper.py index eb3eb9f..572d75e 100644 --- a/xbrl/helper/uri_helper.py +++ b/xbrl/helper/uri_helper.py @@ -18,15 +18,15 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str: @param relative_uri: @return: """ - if relative_uri.startswith('http'): + if relative_uri.startswith('http://') or relative_uri.startswith('https://'): return relative_uri # remove redundant characters in the relative uri if relative_uri.startswith('/'): relative_uri = relative_uri[1:] if relative_uri.startswith('./'): relative_uri = relative_uri[2:] - + dir_uri = str(dir_uri) - if not dir_uri.startswith('http'): + if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'): # check if the dir_uri was really a path to a directory or a file if '.' in dir_uri.split(os.sep)[-1]: return os.path.normpath(os.path.dirname(dir_uri) + os.sep + relative_uri) @@ -40,7 +40,7 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str: dir_uri += '/' absolute_uri = dir_uri + relative_uri - if not dir_uri.startswith('http'): + if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'): # make sure the path is correct absolute_uri = os.path.normpath(absolute_uri) diff --git a/xbrl/instance.py b/xbrl/instance.py index 1cca37b..87e37a1 100644 --- a/xbrl/instance.py +++ b/xbrl/instance.py @@ -347,7 +347,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None = schema_uri: str = schema_ref.attrib[XLINK_NS + 'href'] # check if the schema uri is relative or absolute # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas - if schema_uri.startswith('http'): + if schema_uri.startswith('http://') or schema_uri.startswith("https://"): # fetch the taxonomy extension schema from remote taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) elif instance_url: @@ -727,9 +727,9 @@ def parse_instance(self, uri: str, instance_url: str or None = None, encoding: s :return: """ if uri.split('.')[-1] == 'xml' or uri.split('.')[-1] == 'xbrl': - return parse_xbrl_url(uri, self.cache) if uri.startswith('http') \ + return parse_xbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \ else parse_xbrl(uri, self.cache, instance_url) - return parse_ixbrl_url(uri, self.cache) if uri.startswith('http') \ + return parse_ixbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \ else parse_ixbrl(uri, self.cache, instance_url, encoding) def __str__(self) -> str: diff --git a/xbrl/taxonomy.py b/xbrl/taxonomy.py index 1a464b2..f6aafcc 100644 --- a/xbrl/taxonomy.py +++ b/xbrl/taxonomy.py @@ -593,7 +593,7 @@ def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema: :param cache: :class:`xbrl.cache.HttpCache` instance :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object """ - if not schema_url.startswith('http'): raise XbrlParseException( + if not schema_url.startswith('http://') and not schema_url.startswith('https://'): raise XbrlParseException( 'This function only parses remotely saved taxonomies. Please use parse_taxonomy to parse local taxonomy schemas') schema_path: str = cache.cache_file(schema_url) @@ -611,7 +611,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object """ schema_path = str(schema_path) - if schema_path.startswith('http'): raise XbrlParseException( + if schema_path.startswith('http://') or schema_path.startswith('https://'): raise XbrlParseException( 'This function only parses locally saved taxonomies. Please use parse_taxonomy_url to parse remote taxonomy schemas') if not os.path.exists(schema_path): raise TaxonomyNotFound(f"Could not find taxonomy schema at {schema_path}") @@ -632,7 +632,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = continue # sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd" - if import_uri.startswith('http'): + if import_uri.startswith('http://') or import_uri.startswith('https://'): # fetch the schema file from remote taxonomy.imports.append(parse_taxonomy_url(import_uri, cache)) elif schema_url: @@ -683,7 +683,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = linkbase_uri) # check if the linkbase url is relative - if linkbase_uri.startswith('http'): + if linkbase_uri.startswith('http://') or linkbase_uri.startswith('https://'): # fetch the linkbase from remote linkbase: Linkbase = parse_linkbase_url(linkbase_uri, linkbase_type, cache) elif schema_url: