diff --git a/siphon/catalog.py b/siphon/catalog.py index 6cb5019a3..b82e5085d 100644 --- a/siphon/catalog.py +++ b/siphon/catalog.py @@ -15,10 +15,10 @@ from .http_util import create_http_session, urlopen try: - from urlparse import urljoin + from urlparse import urljoin, urlparse except ImportError: # Python 3 - from urllib.parse import urljoin + from urllib.parse import urljoin, urlparse log = logging.getLogger(__name__) log.addHandler(logging.StreamHandler()) # Python 2.7 needs a handler set @@ -56,7 +56,7 @@ def __init__(self, catalog_url): """ # top level server url self.catalog_url = catalog_url - self.base_tds_url = catalog_url.split('/thredds/')[0] + self.base_tds_url = _find_base_tds_url(catalog_url) session = create_http_session() @@ -277,7 +277,7 @@ def make_access_urls(self, catalog_url, all_services, metadata=None): service_name = metadata["serviceName"] access_urls = {} - server_url = catalog_url.split('/thredds/')[0] + server_url = _find_base_tds_url(catalog_url) found_service = None if service_name: @@ -369,6 +369,19 @@ def __init__(self, service_node): self.number_of_subservices = subservices +def _find_base_tds_url(catalog_url): + """ + Identify the base URL of the THREDDS server from the catalog URL. + + Will retain URL scheme, host, port and username/password when present. + """ + url_components = urlparse(catalog_url) + if url_components.path: + return catalog_url.split(url_components.path)[0] + else: + return catalog_url + + def _get_latest_cat(catalog_url): r""" Get the latest dataset catalog from the supplied top level dataset catalog diff --git a/siphon/tests/fixtures/cat_non_standard_context_path b/siphon/tests/fixtures/cat_non_standard_context_path new file mode 100644 index 000000000..fe147925e --- /dev/null +++ b/siphon/tests/fixtures/cat_non_standard_context_path @@ -0,0 +1,76 @@ +interactions: +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [Siphon (0.4.0+17.gb19adc3.dirty)] + method: GET + uri: http://ereeftds.bom.gov.au/ereefs/tds/catalog/ereef/mwq/P1A/catalog.xml + response: + body: {string: "\r\n\r\n \r\n \r\n \r\n \r\ + \n \r\n \r\n nongrid\r\n au.gov.bom\r\ + \n GRID\r\n netCDF\r\ + \n Registered research users only\r\ + \n \r\n \r\n 94.89\r\n 2014-03-09T20:51:38Z\r\n \r\n \r\n 97.71\r\n 2014-03-09T21:12:03Z\r\ + \n \r\n \r\n 98.36\r\n 2014-03-09T12:39:31Z\r\n \r\n \r\n 97.27\r\n 2014-03-09T12:39:52Z\r\ + \n \r\n \r\n 98.44\r\n 2014-03-09T12:43:41Z\r\n \r\n \r\n 97.62\r\n 2014-03-09T12:43:09Z\r\ + \n \r\n \r\n 98.55\r\n 2014-03-09T12:44:09Z\r\n \r\n \r\n 97.67\r\n 2014-03-09T12:44:10Z\r\ + \n \r\n \r\n 97.26\r\n 2014-03-09T12:44:32Z\r\n \r\n \r\n 98.59\r\n 2014-03-09T12:42:33Z\r\ + \n \r\n \r\n 98.09\r\n 2014-03-09T12:40:15Z\r\n \r\n \r\n 98.19\r\n 2014-03-09T12:42:53Z\r\ + \n \r\n \r\n 100.6\r\n 2015-01-06T01:05:32Z\r\n \r\n \r\ + \n\r\n"} + headers: + Connection: [close] + Content-Language: [en-US] + Content-Type: [application/xml;charset=UTF-8] + Date: ['Fri, 09 Sep 2016 00:24:21 GMT'] + Server: [Apache-Coyote/1.1] + status: {code: 200, message: OK} +version: 1 diff --git a/siphon/tests/test_catalog.py b/siphon/tests/test_catalog.py index c5cac44e9..2e2050869 100644 --- a/siphon/tests/test_catalog.py +++ b/siphon/tests/test_catalog.py @@ -124,3 +124,14 @@ def test_catalog_ref_order(): assert list(cat.catalog_refs) == ['Forecast Model Data', 'Forecast Products and Analyses', 'Observation Data', 'Radar Data', 'Satellite Data', 'Unidata case studies'] + + +@recorder.use_cassette('cat_non_standard_context_path') +def test_non_standard_context_path(): + 'Test accessing TDS with non-standard Context Path' + url = 'http://ereeftds.bom.gov.au/ereefs/tds/catalog/ereef/mwq/P1A/catalog.xml' + cat = TDSCatalog(url) + ds = cat.datasets['A20020101.P1A.ANN_MIM_RMP.nc'] + expected = ('http://ereeftds.bom.gov.au/ereefs/tds/dodsC/ereef/mwq/' + 'P1A/A20020101.P1A.ANN_MIM_RMP.nc') + assert ds.access_urls['OPENDAP'] == expected