-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Data Sources] Implement Apache Drill (#3188)
* Added support for Apache Drill datasource * Improvements in `Drill` query runner and minor refactoring 1. Drill query runner now inherits from `BaseHTTPQueryRunner`, because they both have a lot of common code. 2. `BaseHTTPQueryRunner.get_response` method now accepts `http_method` argument (original implementation was only capable of sending `GET` HTTP requests). 3. Added `order` to `BaseHTTPRequestRunner` configuration schema to fix order of UI elements based on the schema. 4. Eliminated duplicate method `_guess_type` in `GoogleSpreadsheet`, `Results` and `Drill` query runners, moved `guess_type` to `redash.query_runner`. 5. Removed tests for `_guess_type` in `GoogleSpreadsheet`, `Results` and `Drill` query runners, merged them into single test case and moved to `tests.query_runner.test_utils`. 6. Various minor changes (code style, imports, etc).
- Loading branch information
1 parent
445f8e5
commit 0b6f1fc
Showing
11 changed files
with
321 additions
and
116 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import os | ||
import logging | ||
import re | ||
|
||
from dateutil import parser | ||
|
||
from redash.query_runner import ( | ||
BaseHTTPQueryRunner, register, | ||
TYPE_DATETIME, TYPE_INTEGER, TYPE_FLOAT, TYPE_BOOLEAN, | ||
guess_type | ||
) | ||
from redash.utils import json_dumps, json_loads | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
# Convert Drill string value to actual type | ||
def convert_type(string_value, actual_type): | ||
if string_value is None or string_value == '': | ||
return '' | ||
|
||
if actual_type == TYPE_INTEGER: | ||
return int(string_value) | ||
|
||
if actual_type == TYPE_FLOAT: | ||
return float(string_value) | ||
|
||
if actual_type == TYPE_BOOLEAN: | ||
return unicode(string_value).lower() == 'true' | ||
|
||
if actual_type == TYPE_DATETIME: | ||
return parser.parse(string_value) | ||
|
||
return unicode(string_value) | ||
|
||
|
||
# Parse Drill API response and translate it to accepted format | ||
def parse_response(data): | ||
cols = data['columns'] | ||
rows = data['rows'] | ||
|
||
if len(cols) == 0: | ||
return {'columns': [], 'rows': []} | ||
|
||
first_row = rows[0] | ||
columns = [] | ||
types = {} | ||
|
||
for c in cols: | ||
columns.append({'name': c, 'type': guess_type(first_row[c]), 'friendly_name': c}) | ||
|
||
for col in columns: | ||
types[col['name']] = col['type'] | ||
|
||
for row in rows: | ||
for key, value in row.iteritems(): | ||
row[key] = convert_type(value, types[key]) | ||
|
||
return {'columns': columns, 'rows': rows} | ||
|
||
|
||
class Drill(BaseHTTPQueryRunner): | ||
noop_query = 'select version from sys.version' | ||
response_error = "Drill API returned unexpected status code" | ||
requires_authentication = False | ||
requires_url = True | ||
url_title = 'Drill URL' | ||
username_title = 'Username' | ||
password_title = 'Password' | ||
|
||
@classmethod | ||
def name(cls): | ||
return 'Apache Drill' | ||
|
||
@classmethod | ||
def configuration_schema(cls): | ||
schema = super(Drill, cls).configuration_schema() | ||
# Since Drill itself can act as aggregator of various datasources, | ||
# it can contain quite a lot of schemas in `INFORMATION_SCHEMA` | ||
# We added this to improve user experience and let users focus only on desired schemas. | ||
schema['properties']['allowed_schemas'] = { | ||
'type': 'string', | ||
'title': 'List of schemas to use in schema browser (comma separated)' | ||
} | ||
schema['order'] += ['allowed_schemas'] | ||
return schema | ||
|
||
def run_query(self, query, user): | ||
drill_url = os.path.join(self.configuration['url'], 'query.json') | ||
|
||
try: | ||
payload = {'queryType': 'SQL', 'query': query} | ||
|
||
response, error = self.get_response(drill_url, http_method='post', json=payload) | ||
if error is not None: | ||
return None, error | ||
|
||
results = parse_response(response.json()) | ||
|
||
return json_dumps(results), None | ||
except KeyboardInterrupt: | ||
return None, 'Query cancelled by user.' | ||
|
||
def get_schema(self, get_stats=False): | ||
|
||
query = """ | ||
SELECT DISTINCT | ||
TABLE_SCHEMA, | ||
TABLE_NAME, | ||
COLUMN_NAME | ||
FROM | ||
INFORMATION_SCHEMA.`COLUMNS` | ||
WHERE | ||
TABLE_SCHEMA not in ('INFORMATION_SCHEMA', 'information_schema', 'sys') | ||
and TABLE_SCHEMA not like '%.information_schema' | ||
and TABLE_SCHEMA not like '%.INFORMATION_SCHEMA' | ||
""" | ||
allowed_schemas = self.configuration.get('allowed_schemas') | ||
if allowed_schemas: | ||
query += "and TABLE_SCHEMA in ({})".format(', '.join(map(lambda x: "'{}'".format(re.sub('[^a-zA-Z0-9_.`]', '', x)), allowed_schemas.split(',')))) | ||
|
||
results, error = self.run_query(query, None) | ||
|
||
if error is not None: | ||
raise Exception("Failed getting schema.") | ||
|
||
results = json_loads(results) | ||
|
||
schema = {} | ||
|
||
for row in results['rows']: | ||
table_name = u'{}.{}'.format(row['TABLE_SCHEMA'], row['TABLE_NAME']) | ||
|
||
if table_name not in schema: | ||
schema[table_name] = {'name': table_name, 'columns': []} | ||
|
||
schema[table_name]['columns'].append(row['COLUMN_NAME']) | ||
|
||
return schema.values() | ||
|
||
|
||
register(Drill) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.