diff --git a/xbrl/helper/uri_helper.py b/xbrl/helper/uri_helper.py index 572d75e..3c745ed 100644 --- a/xbrl/helper/uri_helper.py +++ b/xbrl/helper/uri_helper.py @@ -18,15 +18,14 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str: @param relative_uri: @return: """ - if relative_uri.startswith('http://') or relative_uri.startswith('https://'): - return relative_uri + if is_url(relative_uri): return relative_uri # remove redundant characters in the relative uri if relative_uri.startswith('/'): relative_uri = relative_uri[1:] if relative_uri.startswith('./'): relative_uri = relative_uri[2:] dir_uri = str(dir_uri) - if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'): + if not is_url(dir_uri): # check if the dir_uri was really a path to a directory or a file if '.' in dir_uri.split(os.sep)[-1]: return os.path.normpath(os.path.dirname(dir_uri) + os.sep + relative_uri) @@ -40,7 +39,7 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str: dir_uri += '/' absolute_uri = dir_uri + relative_uri - if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'): + if not is_url(dir_uri): # make sure the path is correct absolute_uri = os.path.normpath(absolute_uri) @@ -74,3 +73,7 @@ def compare_uri(uri1: str, uri2: str) -> bool: uri1_segments: [str] = re.findall(r"[\w']+", uri1) uri2_segments: [str] = re.findall(r"[\w']+", uri2) return uri1_segments == uri2_segments + + +def is_url(candidate: str) -> bool: + return candidate.lower().startswith('http://') or candidate.lower().startswith('https://') diff --git a/xbrl/instance.py b/xbrl/instance.py index 87e37a1..3b1fba4 100644 --- a/xbrl/instance.py +++ b/xbrl/instance.py @@ -15,7 +15,7 @@ from pathlib import Path from xbrl import TaxonomyNotFound, InstanceParseException from xbrl.cache import HttpCache -from xbrl.helper.uri_helper import resolve_uri +from xbrl.helper.uri_helper import resolve_uri, is_url from xbrl.helper.xml_parser import parse_file from xbrl.taxonomy import Concept, TaxonomySchema, parse_taxonomy, parse_common_taxonomy, parse_taxonomy_url from xbrl.transformations import normalize, TransformationException, TransformationNotImplemented @@ -347,7 +347,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None = schema_uri: str = schema_ref.attrib[XLINK_NS + 'href'] # check if the schema uri is relative or absolute # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas - if schema_uri.startswith('http://') or schema_uri.startswith("https://"): + if is_url(schema_uri): # fetch the taxonomy extension schema from remote taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) elif instance_url: @@ -423,7 +423,8 @@ def parse_ixbrl_url(instance_url: str, cache: HttpCache, encoding: str or None = return parse_ixbrl(instance_path, cache, instance_url, encoding) -def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None, encoding=None, schema_root=None) -> XbrlInstance: +def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None, encoding=None, + schema_root=None) -> XbrlInstance: """ Parses a inline XBRL (iXBRL) instance file. @@ -452,7 +453,7 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None schema_uri: str = schema_ref.attrib[XLINK_NS + 'href'] # check if the schema uri is relative or absolute # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas - if schema_uri.startswith('http'): + if is_url(schema_uri): # fetch the taxonomy extension schema from remote taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) elif schema_root: @@ -727,10 +728,8 @@ def parse_instance(self, uri: str, instance_url: str or None = None, encoding: s :return: """ if uri.split('.')[-1] == 'xml' or uri.split('.')[-1] == 'xbrl': - return parse_xbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \ - else parse_xbrl(uri, self.cache, instance_url) - return parse_ixbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \ - else parse_ixbrl(uri, self.cache, instance_url, encoding) + return parse_xbrl_url(uri, self.cache) if is_url(uri) else parse_xbrl(uri, self.cache, instance_url) + return parse_ixbrl_url(uri, self.cache) if is_url(uri) else parse_ixbrl(uri, self.cache, instance_url, encoding) def __str__(self) -> str: return 'XbrlParser with cache dir at {}'.format(self.cache.cache_dir) diff --git a/xbrl/linkbase.py b/xbrl/linkbase.py index eaf1f4b..d11ed67 100644 --- a/xbrl/linkbase.py +++ b/xbrl/linkbase.py @@ -7,7 +7,7 @@ from xbrl import XbrlParseException, LinkbaseNotFoundException from xbrl.cache import HttpCache -from xbrl.helper.uri_helper import resolve_uri +from xbrl.helper.uri_helper import resolve_uri, is_url LINK_NS: str = "{http://www.xbrl.org/2003/linkbase}" XLINK_NS: str = "{http://www.w3.org/1999/xlink}" @@ -383,7 +383,8 @@ class Linkbase: Represents a complete Linkbase (non-generic). """ - def __init__(self, extended_links: List[ExtendedLink], linkbase_type: LinkbaseType, linkbase_uri: None or str = None) -> None: + def __init__(self, extended_links: List[ExtendedLink], linkbase_type: LinkbaseType, + linkbase_uri: None or str = None) -> None: """ :param extended_links: All standard extended links that are defined in the linkbase :type extended_links: [ExtendedDefinitionLink] or [ExtendedCalculationLink] or [ExtendedPresentationLink] or [ExtendedLabelArc] @@ -418,8 +419,8 @@ def parse_linkbase_url(linkbase_url: str, linkbase_type: LinkbaseType, cache: Ht :param cache: :class:`xbrl.cache.HttpCache` instance :return: parsed :class:`xbrl.linkbase.Linkbase` object """ - if not linkbase_url.startswith('http'): raise XbrlParseException( - 'This function only parses remotely saved linkbases. Please use parse_linkbase to parse local linkbases') + if not is_url(linkbase_url): raise XbrlParseException('This function only parses remotely saved linkbases. ' + 'Please use parse_linkbase to parse local linkbases') linkbase_path: str = cache.cache_file(linkbase_url) return parse_linkbase(linkbase_path, linkbase_type, linkbase_url) @@ -439,8 +440,8 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url the locator with concept from the taxonomy :return: parsed :class:`xbrl.linkbase.Linkbase` object """ - if linkbase_path.startswith('http'): raise XbrlParseException( - 'This function only parses locally saved linkbases. Please use parse_linkbase_url to parse remote linkbases') + if is_url(linkbase_path): raise XbrlParseException('This function only parses locally saved linkbases. ' + 'Please use parse_linkbase_url to parse remote linkbases') if not os.path.exists(linkbase_path): raise LinkbaseNotFoundException(f"Could not find linkbase at {linkbase_path}") @@ -486,7 +487,7 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url loc_label: str = loc.attrib[XLINK_NS + 'label'] # check if the locator href is absolute locator_href = loc.attrib[XLINK_NS + 'href'] - if not locator_href.startswith('http'): + if not is_url(locator_href): # resolve the path # todo, try to get the URL here, instead of the path!!! locator_href = resolve_uri(linkbase_url if linkbase_url else linkbase_path, locator_href) diff --git a/xbrl/taxonomy.py b/xbrl/taxonomy.py index f6aafcc..4eceea3 100644 --- a/xbrl/taxonomy.py +++ b/xbrl/taxonomy.py @@ -10,7 +10,7 @@ from xbrl import XbrlParseException, TaxonomyNotFound from xbrl.cache import HttpCache -from xbrl.helper.uri_helper import resolve_uri, compare_uri +from xbrl.helper.uri_helper import resolve_uri, compare_uri, is_url from xbrl.linkbase import Linkbase, ExtendedLink, LinkbaseType, parse_linkbase, parse_linkbase_url, Label logger = logging.getLogger(__name__) @@ -593,9 +593,8 @@ def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema: :param cache: :class:`xbrl.cache.HttpCache` instance :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object """ - if not schema_url.startswith('http://') and not schema_url.startswith('https://'): raise XbrlParseException( - 'This function only parses remotely saved taxonomies. Please use parse_taxonomy to parse local taxonomy schemas') - + if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. ' + 'Please use parse_taxonomy to parse local taxonomy schemas') schema_path: str = cache.cache_file(schema_url) return parse_taxonomy(schema_path, cache, schema_url) @@ -611,8 +610,8 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object """ schema_path = str(schema_path) - if schema_path.startswith('http://') or schema_path.startswith('https://'): raise XbrlParseException( - 'This function only parses locally saved taxonomies. Please use parse_taxonomy_url to parse remote taxonomy schemas') + if is_url(schema_path): raise XbrlParseException('This function only parses locally saved taxonomies. ' + 'Please use parse_taxonomy_url to parse remote taxonomy schemas') if not os.path.exists(schema_path): raise TaxonomyNotFound(f"Could not find taxonomy schema at {schema_path}") @@ -632,7 +631,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = continue # sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd" - if import_uri.startswith('http://') or import_uri.startswith('https://'): + if is_url(import_uri): # fetch the schema file from remote taxonomy.imports.append(parse_taxonomy_url(import_uri, cache)) elif schema_url: @@ -683,7 +682,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = linkbase_uri) # check if the linkbase url is relative - if linkbase_uri.startswith('http://') or linkbase_uri.startswith('https://'): + if is_url(linkbase_uri): # fetch the linkbase from remote linkbase: Linkbase = parse_linkbase_url(linkbase_uri, linkbase_type, cache) elif schema_url: