pandas-dev · hayd · Jun 11, 2013 · Jun 11, 2013 · hayd · Jun 11, 2013
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -34,23 +34,25 @@ def _is_s3_url(url):
         return False
 
 def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
-    """ if the filepath_or_buffer is a url, translate and return the buffer
-        passthru otherwise
+    """ 
+    if the filepath_or_buffer is a url, translate and return the buffer
+    passthrough otherwise
 
-        Parameters
-        ----------
-        filepath_or_buffer : a url, filepath, or buffer
-        encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
+    Parameters
+    ----------
+    filepath_or_buffer : a url, filepath, or buffer
+    encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
 
-        Returns
-        -------
-        a filepath_or_buffer, the encoding
-        
-        """
+    Returns
+    -------
+    a filepath_or_buffer, the encoding
+
+    """
 
     if _is_url(filepath_or_buffer):
-        from urllib2 import urlopen
-        filepath_or_buffer = urlopen(filepath_or_buffer)
+
+        _, filepath_or_buffer = _req_url(filepath_or_buffer) # raise if not status_code 200?
+
         if py3compat.PY3:  # pragma: no cover
             if encoding:
                 errors = 'strict'
@@ -65,7 +67,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
     if _is_s3_url(filepath_or_buffer):
         try:
             import boto
-        except:
+        except ImportError:
             raise ImportError("boto is required to handle s3 files")
         # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
         # are environment variables
@@ -78,3 +80,43 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
         return filepath_or_buffer, None
 
     return filepath_or_buffer, None
+
+def _req_url(url):
+    '''
+    Retrieves text output of request to url
+    Raises on bad status_code or invalid urls
+    Prefer requests module if available
+
+    Parameters
+    ----------
+    url : string
+
+    Returns
+    -------
+    status_code : int, the HTTP status_code
+    buf_text : the text from the url request
+
+    '''
+    try_requests = True
+    if try_requests:
+        try:
+            import requests
+            resp = requests.get(url)
+            resp.raise_for_status()
+            buf_text = StringIO(resp.text)
+            status_code = resp.status_code
+            return status_code, buf_text
+        except (ImportError,):
+            pass
+        except (requests.exceptions.InvalidURL, 
+            requests.exceptions.InvalidSchema):
+            # responses can't deal with local files
+            pass
+
+    import urllib2
+    resp = urllib2.urlopen(url)
+    # except urllib2.URLError:  # don't think there was a purpose to this bit, raises itself
+    #    raise ValueError('Invalid URL: "{0}"'.format(url))
+    status_code = resp.code
+    buf_text = resp # if status_code == 200 else '' # If not 200 does it raise?
+    return status_code, buf_text
diff --git a/pandas/io/data.py b/pandas/io/data.py
@@ -16,7 +16,7 @@
 
 from pandas import Panel, DataFrame, Series, read_csv, concat
 from pandas.io.parsers import TextParser
-
+from pandas.io.common import _req_url
 
 def DataReader(name, data_source=None, start=None, end=None,
                retry_count=3, pause=0):
@@ -166,10 +166,9 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
         '&ignore=.csv'
 
     for _ in range(retry_count):
-        resp = urllib2.urlopen(url)
-        if resp.code == 200:
-            lines = resp.read()
-            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
+        status_code, buf_text = _req_url(url)
+        if status_code == 200:
+            rs = read_csv(buf_text, index_col=0,
                           parse_dates=True)[::-1]
 
             # Yahoo! Finance sometimes does this awesome thing where they
@@ -206,11 +205,9 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
         "startdate": start.strftime('%b %d, %Y'), \
         "enddate": end.strftime('%b %d, %Y'), "output": "csv" })
     for _ in range(retry_count):
-        resp = urllib2.urlopen(url)
-        if resp.code == 200:
-            lines = resp.read()
-            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
-                          parse_dates=True)[::-1]
+        status_code, buf_text = _req_url(url)
+        if status_code == 200:
+            rs = read_csv(buf_text, index_col=0, parse_dates=True)[::-1]
 
             return rs
 
@@ -472,8 +469,7 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
 
     fred_URL = "http://research.stlouisfed.org/fred2/series/"
 
-    url = fred_URL + '%s' % name + \
-        '/downloaddata/%s' % name + '.csv'
+    url = '%s%s/downloaddata/%s.csv' % (fred_URL, name, name)
     data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True,
                     header=None, skiprows=1, names=["DATE", name],
                     na_values='.')

diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py
@@ -350,6 +350,10 @@ def test_url(self):
 
             url = 'http://search.twitter.com/search.json?q=pandas%20python'
             result = read_json(url)
+
+            # gzip compression
+            url = 'https://api.stackexchange.com/2.1/search?page=1&pagesize=10&order=desc&sort=activity&tagged=pandas&site=stackoverflow'
+            result = pd.read_json(url)
 
         except urllib2.URLError:
             raise nose.SkipTest