Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for accessing THREDDS catalogs hosted at a Context Path other tha… #108

Merged
merged 4 commits into from
Sep 9, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions siphon/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
from .http_util import create_http_session, urlopen

try:
from urlparse import urljoin
from urlparse import urljoin, urlparse
except ImportError:
# Python 3
from urllib.parse import urljoin
from urllib.parse import urljoin, urlparse

log = logging.getLogger(__name__)
log.addHandler(logging.StreamHandler()) # Python 2.7 needs a handler set
Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(self, catalog_url):
"""
# top level server url
self.catalog_url = catalog_url
self.base_tds_url = catalog_url.split('/thredds/')[0]
self.base_tds_url = _find_base_tds_url(catalog_url)

session = create_http_session()

Expand Down Expand Up @@ -277,7 +277,7 @@ def make_access_urls(self, catalog_url, all_services, metadata=None):
service_name = metadata["serviceName"]

access_urls = {}
server_url = catalog_url.split('/thredds/')[0]
server_url = _find_base_tds_url(catalog_url)

found_service = None
if service_name:
Expand Down Expand Up @@ -369,6 +369,19 @@ def __init__(self, service_node):
self.number_of_subservices = subservices


def _find_base_tds_url(catalog_url):
"""
Identify the base URL of the THREDDS server from the catalog URL.

Will retain URL scheme, host, port and username/password when present.
"""
url_components = urlparse(catalog_url)
if url_components.path:
return catalog_url.split(url_components.path)[0]
else:
return catalog_url


def _get_latest_cat(catalog_url):
r"""
Get the latest dataset catalog from the supplied top level dataset catalog
Expand Down
76 changes: 76 additions & 0 deletions siphon/tests/fixtures/cat_non_standard_context_path
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
interactions:
- request:
body: null
headers:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [Siphon (0.4.0+17.gb19adc3.dirty)]
method: GET
uri: http://ereeftds.bom.gov.au/ereefs/tds/catalog/ereef/mwq/P1A/catalog.xml
response:
body: {string: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<catalog xmlns=\"\
http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0\" xmlns:xlink=\"\
http://www.w3.org/1999/xlink\" version=\"1.0.1\">\r\n <service name=\"nongrid\"\
\ serviceType=\"Compound\" base=\"\">\r\n <service name=\"ncdods\" serviceType=\"\
OPENDAP\" base=\"/ereefs/tds/dodsC/\" />\r\n <service name=\"HTTPServer\"\
\ serviceType=\"HTTPServer\" base=\"/ereefs/tds/fileServer/\" />\r\n </service>\r\
\n <dataset name=\"P1A\" ID=\"mwq/P1A\">\r\n <metadata inherited=\"true\"\
>\r\n <serviceName>nongrid</serviceName>\r\n <authority>au.gov.bom</authority>\r\
\n <dataType>GRID</dataType>\r\n <dataFormat>netCDF</dataFormat>\r\
\n <documentation type=\"rights\">Registered research users only</documentation>\r\
\n </metadata>\r\n <dataset name=\"A20020101.P1A.ANN_MIM_RMP.nc\" ID=\"\
mwq/P1A/A20020101.P1A.ANN_MIM_RMP.nc\" urlPath=\"ereef/mwq/P1A/A20020101.P1A.ANN_MIM_RMP.nc\"\
>\r\n <dataSize units=\"Mbytes\">94.89</dataSize>\r\n <date type=\"\
modified\">2014-03-09T20:51:38Z</date>\r\n </dataset>\r\n <dataset name=\"\
A20030101.P1A.ANN_MIM_RMP.nc\" ID=\"mwq/P1A/A20030101.P1A.ANN_MIM_RMP.nc\"\
\ urlPath=\"ereef/mwq/P1A/A20030101.P1A.ANN_MIM_RMP.nc\">\r\n <dataSize\
\ units=\"Mbytes\">97.71</dataSize>\r\n <date type=\"modified\">2014-03-09T21:12:03Z</date>\r\
\n </dataset>\r\n <dataset name=\"A20040101.P1A.ANN_MIM_RMP.nc\" ID=\"\
mwq/P1A/A20040101.P1A.ANN_MIM_RMP.nc\" urlPath=\"ereef/mwq/P1A/A20040101.P1A.ANN_MIM_RMP.nc\"\
>\r\n <dataSize units=\"Mbytes\">98.36</dataSize>\r\n <date type=\"\
modified\">2014-03-09T12:39:31Z</date>\r\n </dataset>\r\n <dataset name=\"\
A20050101.P1A.ANN_MIM_RMP.nc\" ID=\"mwq/P1A/A20050101.P1A.ANN_MIM_RMP.nc\"\
\ urlPath=\"ereef/mwq/P1A/A20050101.P1A.ANN_MIM_RMP.nc\">\r\n <dataSize\
\ units=\"Mbytes\">97.27</dataSize>\r\n <date type=\"modified\">2014-03-09T12:39:52Z</date>\r\
\n </dataset>\r\n <dataset name=\"A20060101.P1A.ANN_MIM_RMP.nc\" ID=\"\
mwq/P1A/A20060101.P1A.ANN_MIM_RMP.nc\" urlPath=\"ereef/mwq/P1A/A20060101.P1A.ANN_MIM_RMP.nc\"\
>\r\n <dataSize units=\"Mbytes\">98.44</dataSize>\r\n <date type=\"\
modified\">2014-03-09T12:43:41Z</date>\r\n </dataset>\r\n <dataset name=\"\
A20070101.P1A.ANN_MIM_RMP.nc\" ID=\"mwq/P1A/A20070101.P1A.ANN_MIM_RMP.nc\"\
\ urlPath=\"ereef/mwq/P1A/A20070101.P1A.ANN_MIM_RMP.nc\">\r\n <dataSize\
\ units=\"Mbytes\">97.62</dataSize>\r\n <date type=\"modified\">2014-03-09T12:43:09Z</date>\r\
\n </dataset>\r\n <dataset name=\"A20080101.P1A.ANN_MIM_RMP.nc\" ID=\"\
mwq/P1A/A20080101.P1A.ANN_MIM_RMP.nc\" urlPath=\"ereef/mwq/P1A/A20080101.P1A.ANN_MIM_RMP.nc\"\
>\r\n <dataSize units=\"Mbytes\">98.55</dataSize>\r\n <date type=\"\
modified\">2014-03-09T12:44:09Z</date>\r\n </dataset>\r\n <dataset name=\"\
A20090101.P1A.ANN_MIM_RMP.nc\" ID=\"mwq/P1A/A20090101.P1A.ANN_MIM_RMP.nc\"\
\ urlPath=\"ereef/mwq/P1A/A20090101.P1A.ANN_MIM_RMP.nc\">\r\n <dataSize\
\ units=\"Mbytes\">97.67</dataSize>\r\n <date type=\"modified\">2014-03-09T12:44:10Z</date>\r\
\n </dataset>\r\n <dataset name=\"A20100101.P1A.ANN_MIM_RMP.nc\" ID=\"\
mwq/P1A/A20100101.P1A.ANN_MIM_RMP.nc\" urlPath=\"ereef/mwq/P1A/A20100101.P1A.ANN_MIM_RMP.nc\"\
>\r\n <dataSize units=\"Mbytes\">97.26</dataSize>\r\n <date type=\"\
modified\">2014-03-09T12:44:32Z</date>\r\n </dataset>\r\n <dataset name=\"\
A20110101.P1A.ANN_MIM_RMP.nc\" ID=\"mwq/P1A/A20110101.P1A.ANN_MIM_RMP.nc\"\
\ urlPath=\"ereef/mwq/P1A/A20110101.P1A.ANN_MIM_RMP.nc\">\r\n <dataSize\
\ units=\"Mbytes\">98.59</dataSize>\r\n <date type=\"modified\">2014-03-09T12:42:33Z</date>\r\
\n </dataset>\r\n <dataset name=\"A20120101.P1A.ANN_MIM_RMP.nc\" ID=\"\
mwq/P1A/A20120101.P1A.ANN_MIM_RMP.nc\" urlPath=\"ereef/mwq/P1A/A20120101.P1A.ANN_MIM_RMP.nc\"\
>\r\n <dataSize units=\"Mbytes\">98.09</dataSize>\r\n <date type=\"\
modified\">2014-03-09T12:40:15Z</date>\r\n </dataset>\r\n <dataset name=\"\
A20130101.P1A.ANN_MIM_RMP.nc\" ID=\"mwq/P1A/A20130101.P1A.ANN_MIM_RMP.nc\"\
\ urlPath=\"ereef/mwq/P1A/A20130101.P1A.ANN_MIM_RMP.nc\">\r\n <dataSize\
\ units=\"Mbytes\">98.19</dataSize>\r\n <date type=\"modified\">2014-03-09T12:42:53Z</date>\r\
\n </dataset>\r\n <dataset name=\"A20140101.316.P1A.ANN_MIM_RMP.nc\"\
\ ID=\"mwq/P1A/A20140101.316.P1A.ANN_MIM_RMP.nc\" urlPath=\"ereef/mwq/P1A/A20140101.316.P1A.ANN_MIM_RMP.nc\"\
>\r\n <dataSize units=\"Mbytes\">100.6</dataSize>\r\n <date type=\"\
modified\">2015-01-06T01:05:32Z</date>\r\n </dataset>\r\n </dataset>\r\
\n</catalog>\r\n"}
headers:
Connection: [close]
Content-Language: [en-US]
Content-Type: [application/xml;charset=UTF-8]
Date: ['Fri, 09 Sep 2016 00:24:21 GMT']
Server: [Apache-Coyote/1.1]
status: {code: 200, message: OK}
version: 1
11 changes: 11 additions & 0 deletions siphon/tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,14 @@ def test_catalog_ref_order():
assert list(cat.catalog_refs) == ['Forecast Model Data', 'Forecast Products and Analyses',
'Observation Data', 'Radar Data', 'Satellite Data',
'Unidata case studies']


@recorder.use_cassette('cat_non_standard_context_path')
def test_non_standard_context_path():
'Test accessing TDS with non-standard Context Path'
url = 'http://ereeftds.bom.gov.au/ereefs/tds/catalog/ereef/mwq/P1A/catalog.xml'
cat = TDSCatalog(url)
ds = cat.datasets['A20020101.P1A.ANN_MIM_RMP.nc']
expected = ('http://ereeftds.bom.gov.au/ereefs/tds/dodsC/ereef/mwq/'
'P1A/A20020101.P1A.ANN_MIM_RMP.nc')
assert ds.access_urls['OPENDAP'] == expected