From e0586ec6664657e8d942e1438eebbf32d70e5be8 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Tue, 22 Dec 2015 15:35:06 -0800 Subject: [PATCH] Working state --- panoramix/bin/panoramix | 321 +----------------- panoramix/data/__init__.py | 363 +++++++++++++++++++++ panoramix/static/panoramix.js | 1 + panoramix/static/widgets/viz_world_map.css | 3 + panoramix/static/widgets/viz_world_map.js | 2 +- panoramix/templates/panoramix/explore.html | 48 ++- panoramix/viz.py | 2 +- 7 files changed, 409 insertions(+), 331 deletions(-) diff --git a/panoramix/bin/panoramix b/panoramix/bin/panoramix index a8317241b9c7d..ec508d491cab5 100755 --- a/panoramix/bin/panoramix +++ b/panoramix/bin/panoramix @@ -1,19 +1,12 @@ #!/usr/bin/env python -import csv -from datetime import datetime -import gzip -import json -import os from subprocess import Popen from flask.ext.script import Manager from panoramix import app from flask.ext.migrate import MigrateCommand from panoramix import db -from flask.ext.appbuilder import Base -from sqlalchemy import Column, Integer, String, Table, DateTime -from panoramix import models, utils +from panoramix import data, utils config = app.config @@ -60,314 +53,10 @@ def load_examples(sample): """Loads a set of Slices and Dashboards and a supporting dataset """ print("Loading examples into {}".format(db)) - - BirthNames = Table( - "birth_names", Base.metadata, - Column("id", Integer, primary_key=True), - Column("state", String(10)), - Column("year", Integer), - Column("name", String(128)), - Column("num", Integer), - Column("ds", DateTime), - Column("gender", String(10)), - Column("sum_boys", Integer), - Column("sum_girls", Integer), - ) - try: - BirthNames.drop(db.engine) - except: - pass - - BirthNames.create(db.engine) - session = db.session() - filepath = os.path.join(config.get("BASE_DIR"), 'data/birth_names.csv.gz') - with gzip.open(filepath, mode='rt') as f: - bb_csv = csv.reader(f) - for i, (state, year, name, gender, num) in enumerate(bb_csv): - if i == 0 or year < "1965": # jumpy data before 1965 - continue - if num == "NA": - num = 0 - ds = datetime(int(year), 1, 1) - db.engine.execute( - BirthNames.insert(), - state=state, - year=year, - ds=ds, - name=name, num=num, gender=gender, - sum_boys=num if gender == 'boy' else 0, - sum_girls=num if gender == 'girl' else 0, - ) - if i % 1000 == 0: - print("{} loaded out of 82527 rows".format(i)) - session.commit() - session.commit() - if sample and i>1000: break - print("Done loading table!") - print("-" * 80) - - print("Creating database reference") - DB = models.Database - dbobj = session.query(DB).filter_by(database_name='main').first() - if not dbobj: - dbobj = DB(database_name="main") - print(config.get("SQLALCHEMY_DATABASE_URI")) - dbobj.sqlalchemy_uri = config.get("SQLALCHEMY_DATABASE_URI") - session.add(dbobj) - session.commit() - - print("Creating table reference") - TBL = models.SqlaTable - obj = session.query(TBL).filter_by(table_name='birth_names').first() - if not obj: - obj = TBL(table_name = 'birth_names') - obj.main_dttm_col = 'ds' - obj.default_endpoint = "/panoramix/datasource/table/1/?viz_type=table&granularity=ds&since=100+years&until=now&row_limit=10&where=&flt_col_0=ds&flt_op_0=in&flt_eq_0=&flt_col_1=ds&flt_op_1=in&flt_eq_1=&slice_name=TEST&datasource_name=birth_names&datasource_id=1&datasource_type=table" - obj.database = dbobj - obj.columns = [ - models.TableColumn(column_name="num", sum=True, type="INTEGER"), - models.TableColumn(column_name="sum_boys", sum=True, type="INTEGER"), - models.TableColumn(column_name="sum_girls", sum=True, type="INTEGER"), - models.TableColumn(column_name="ds", is_dttm=True, type="DATETIME"), - ] - models.Table - session.add(obj) - session.commit() - obj.fetch_metadata() - tbl = obj - - print("Creating some slices") - def get_slice_json(slice_name, **kwargs): - defaults = { - "compare_lag": "10", - "compare_suffix": "o10Y", - "datasource_id": "1", - "datasource_name": "birth_names", - "datasource_type": "table", - "limit": "25", - "flt_col_1": "gender", - "flt_eq_1": "", - "flt_op_1": "in", - "granularity": "ds", - "groupby": [], - "metric": 'sum__num', - "metrics": ["sum__num"], - "row_limit": config.get("ROW_LIMIT"), - "since": "100 years", - "slice_name": slice_name, - "until": "now", - "viz_type": "table", - "where": "", - "markup_type": "markdown", - } - d = defaults.copy() - d.update(kwargs) - return json.dumps(d, indent=4, sort_keys=True) - Slice = models.Slice - slices = [] - - slice_name = "Girls" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='table', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, groupby=['name'], flt_eq_1="girl", row_limit=50)) - session.add(slc) - slices.append(slc) - - slice_name = "Boys" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='table', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, groupby=['name'], flt_eq_1="boy", row_limit=50)) - session.add(slc) - slices.append(slc) - - slice_name = "Participants" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='big_number', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, viz_type="big_number", granularity="ds", - compare_lag="5", compare_suffix="over 5Y")) - session.add(slc) - slices.append(slc) - - slice_name = "Genders" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='pie', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, viz_type="pie", groupby=['gender'])) - session.add(slc) - slices.append(slc) - - slice_name = "Gender by State" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='dist_bar', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, flt_eq_1="other", viz_type="dist_bar", - metrics=['sum__sum_girls', 'sum__sum_boys'], - groupby=['state'], flt_op_1='not in', flt_col_1='state')) - session.add(slc) - slices.append(slc) - - slice_name = "Trends" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='line', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, viz_type="line", groupby=['name'], - granularity='ds', rich_tooltip='y', show_legend='y')) - session.add(slc) - slices.append(slc) - - slice_name = "Title" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - code = """ -### Birth Names Dashboard -The source dataset came from [here](https://github.com/hadley/babynames) - -![img](http://monblog.system-linux.net/image/tux/baby-tux_overlord59-tux.png) - """ - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='markup', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, viz_type="markup", markup_type="markdown", - code=code)) - session.add(slc) - slices.append(slc) - - slice_name = "Name Cloud" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='word_cloud', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, viz_type="word_cloud", size_from="10", - groupby=['name'], size_to="70", rotation="square", - limit='100')) - session.add(slc) - slices.append(slc) - - slice_name = "Pivot Table" - slc = session.query(Slice).filter_by(slice_name=slice_name).first() - if not slc: - slc = Slice( - slice_name=slice_name, - viz_type='pivot_table', - datasource_type='table', - table=tbl, - params=get_slice_json( - slice_name, viz_type="pivot_table", metrics=['sum__num'], - groupby=['name'], columns=['state'])) - session.add(slc) - slices.append(slc) - - print("Creating a dashboard") - Dash = models.Dashboard - dash = session.query(Dash).filter_by(dashboard_title="Births").first() - if not dash: - dash = Dash( - dashboard_title="Births", - position_json=""" - [ - { - "size_y": 4, - "size_x": 2, - "col": 3, - "slice_id": "1", - "row": 3 - }, - { - "size_y": 4, - "size_x": 2, - "col": 1, - "slice_id": "2", - "row": 3 - }, - { - "size_y": 2, - "size_x": 2, - "col": 1, - "slice_id": "3", - "row": 1 - }, - { - "size_y": 2, - "size_x": 2, - "col": 3, - "slice_id": "4", - "row": 1 - }, - { - "size_y": 3, - "size_x": 7, - "col": 5, - "slice_id": "5", - "row": 4 - }, - { - "size_y": 5, - "size_x": 11, - "col": 1, - "slice_id": "6", - "row": 7 - }, - { - "size_y": 3, - "size_x": 3, - "col": 9, - "slice_id": "7", - "row": 1 - }, - { - "size_y": 3, - "size_x": 4, - "col": 5, - "slice_id": "8", - "row": 1 - } - ] - """ - ) - session.add(dash) - for s in slices: - dash.slices.append(s) - session.commit() + print("Loading [World Bank's Health Nutrition and Population Stats]") + data.load_world_bank_health_n_pop() + print("Loading [Birth names]") + data.load_birth_names() if __name__ == "__main__": diff --git a/panoramix/data/__init__.py b/panoramix/data/__init__.py index e69de29bb2d1d..e6c2a3ce477d0 100644 --- a/panoramix/data/__init__.py +++ b/panoramix/data/__init__.py @@ -0,0 +1,363 @@ +import pandas as pd +import csv +from datetime import datetime +import gzip +import os +from panoramix import app, db, models +from sqlalchemy import Column, String, DateTime, Table, Integer +from flask.ext.appbuilder import Base + +config = app.config + +DATA_FOLDER = os.path.join(config.get("BASE_DIR"), 'data') + + +def load_world_bank_health_n_pop(): + """ + Details on how the data was loaded from + http://data.worldbank.org/data-catalog/health-nutrition-and-population-statistics + DIR = "" + df_country = pd.read_csv(DIR + '/HNP_Country.csv') + df_country.columns = ['country_code'] + list(df_country.columns[1:]) + df_country = df_country[['country_code', 'Region']] + df_country.columns = ['country_code', 'region'] + + df = pd.read_csv(DIR + '/HNP_Data.csv') + del df['Unnamed: 60'] + df.columns = ['country_name', 'country_code'] + list(df.columns[2:]) + ndf = df.merge(df_country, how='inner') + + dims = ('country_name', 'country_code', 'region') + vv = [str(i) for i in range(1960, 2015)] + mdf = pd.melt(ndf, id_vars=dims + ('Indicator Code',), value_vars=vv) + mdf['year'] = mdf.variable + '-01-01' + dims = dims + ('year',) + + pdf = mdf.pivot_table(values='value', columns='Indicator Code', index=dims) + pdf = pdf.reset_index() + pdf.to_csv(DIR + '/countries.csv') + pdf.to_json(DIR + '/countries.json', orient='records') + """ + with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f: + pdf = pd.read_json(f) + pdf.to_sql( + 'wb_health_population', + db.engine, + if_exists='replace', + chunksize=500, + dtype={ + 'year': DateTime(), + 'country_code': String(3), + 'country_name': String(255), + 'region': String(255), + }, + index=False) + + +def load_birth_names(): + BirthNames = Table( + "birth_names", Base.metadata, + Column("id", Integer, primary_key=True), + Column("state", String(10)), + Column("year", Integer), + Column("name", String(128)), + Column("num", Integer), + Column("ds", DateTime), + Column("gender", String(10)), + Column("sum_boys", Integer), + Column("sum_girls", Integer), + ) + try: + BirthNames.drop(db.engine) + except: + pass + + BirthNames.create(db.engine) + session = db.session() + filepath = os.path.join(DATA_FOLDER, 'birth_names.csv.gz') + with gzip.open(filepath, mode='rt') as f: + bb_csv = csv.reader(f) + for i, (state, year, name, gender, num) in enumerate(bb_csv): + if i == 0 or year < "1965": # jumpy data before 1965 + continue + if num == "NA": + num = 0 + ds = datetime(int(year), 1, 1) + db.engine.execute( + BirthNames.insert(), + state=state, + year=year, + ds=ds, + name=name, num=num, gender=gender, + sum_boys=num if gender == 'boy' else 0, + sum_girls=num if gender == 'girl' else 0, + ) + if i % 1000 == 0: + print("{} loaded out of 82527 rows".format(i)) + session.commit() + session.commit() + print("Done loading table!") + print("-" * 80) + + print("Creating database reference") + DB = models.Database + dbobj = session.query(DB).filter_by(database_name='main').first() + if not dbobj: + dbobj = DB(database_name="main") + print(config.get("SQLALCHEMY_DATABASE_URI")) + dbobj.sqlalchemy_uri = config.get("SQLALCHEMY_DATABASE_URI") + session.add(dbobj) + session.commit() + + print("Creating table reference") + TBL = models.SqlaTable + obj = session.query(TBL).filter_by(table_name='birth_names').first() + if not obj: + obj = TBL(table_name = 'birth_names') + obj.main_dttm_col = 'ds' + obj.default_endpoint = "/panoramix/datasource/table/1/?viz_type=table&granularity=ds&since=100+years&until=now&row_limit=10&where=&flt_col_0=ds&flt_op_0=in&flt_eq_0=&flt_col_1=ds&flt_op_1=in&flt_eq_1=&slice_name=TEST&datasource_name=birth_names&datasource_id=1&datasource_type=table" + obj.database = dbobj + obj.columns = [ + models.TableColumn(column_name="num", sum=True, type="INTEGER"), + models.TableColumn(column_name="sum_boys", sum=True, type="INTEGER"), + models.TableColumn(column_name="sum_girls", sum=True, type="INTEGER"), + models.TableColumn(column_name="ds", is_dttm=True, type="DATETIME"), + ] + models.Table + session.add(obj) + session.commit() + obj.fetch_metadata() + tbl = obj + + print("Creating some slices") + def get_slice_json(slice_name, **kwargs): + defaults = { + "compare_lag": "10", + "compare_suffix": "o10Y", + "datasource_id": "1", + "datasource_name": "birth_names", + "datasource_type": "table", + "limit": "25", + "flt_col_1": "gender", + "flt_eq_1": "", + "flt_op_1": "in", + "granularity": "ds", + "groupby": [], + "metric": 'sum__num', + "metrics": ["sum__num"], + "row_limit": config.get("ROW_LIMIT"), + "since": "100 years", + "slice_name": slice_name, + "until": "now", + "viz_type": "table", + "where": "", + "markup_type": "markdown", + } + d = defaults.copy() + d.update(kwargs) + return json.dumps(d, indent=4, sort_keys=True) + Slice = models.Slice + slices = [] + + slice_name = "Girls" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='table', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, groupby=['name'], flt_eq_1="girl", row_limit=50)) + session.add(slc) + slices.append(slc) + + slice_name = "Boys" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='table', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, groupby=['name'], flt_eq_1="boy", row_limit=50)) + session.add(slc) + slices.append(slc) + + slice_name = "Participants" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='big_number', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, viz_type="big_number", granularity="ds", + compare_lag="5", compare_suffix="over 5Y")) + session.add(slc) + slices.append(slc) + + slice_name = "Genders" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='pie', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, viz_type="pie", groupby=['gender'])) + session.add(slc) + slices.append(slc) + + slice_name = "Gender by State" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='dist_bar', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, flt_eq_1="other", viz_type="dist_bar", + metrics=['sum__sum_girls', 'sum__sum_boys'], + groupby=['state'], flt_op_1='not in', flt_col_1='state')) + session.add(slc) + slices.append(slc) + + slice_name = "Trends" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='line', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, viz_type="line", groupby=['name'], + granularity='ds', rich_tooltip='y', show_legend='y')) + session.add(slc) + slices.append(slc) + + slice_name = "Title" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + code = """ +### Birth Names Dashboard +The source dataset came from [here](https://github.com/hadley/babynames) + +![img](http://monblog.system-linux.net/image/tux/baby-tux_overlord59-tux.png) + """ + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='markup', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, viz_type="markup", markup_type="markdown", + code=code)) + session.add(slc) + slices.append(slc) + + slice_name = "Name Cloud" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='word_cloud', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, viz_type="word_cloud", size_from="10", + groupby=['name'], size_to="70", rotation="square", + limit='100')) + session.add(slc) + slices.append(slc) + + slice_name = "Pivot Table" + slc = session.query(Slice).filter_by(slice_name=slice_name).first() + if not slc: + slc = Slice( + slice_name=slice_name, + viz_type='pivot_table', + datasource_type='table', + table=tbl, + params=get_slice_json( + slice_name, viz_type="pivot_table", metrics=['sum__num'], + groupby=['name'], columns=['state'])) + session.add(slc) + slices.append(slc) + + print("Creating a dashboard") + Dash = models.Dashboard + dash = session.query(Dash).filter_by(dashboard_title="Births").first() + if not dash: + dash = Dash( + dashboard_title="Births", + position_json=""" + [ + { + "size_y": 4, + "size_x": 2, + "col": 3, + "slice_id": "1", + "row": 3 + }, + { + "size_y": 4, + "size_x": 2, + "col": 1, + "slice_id": "2", + "row": 3 + }, + { + "size_y": 2, + "size_x": 2, + "col": 1, + "slice_id": "3", + "row": 1 + }, + { + "size_y": 2, + "size_x": 2, + "col": 3, + "slice_id": "4", + "row": 1 + }, + { + "size_y": 3, + "size_x": 7, + "col": 5, + "slice_id": "5", + "row": 4 + }, + { + "size_y": 5, + "size_x": 11, + "col": 1, + "slice_id": "6", + "row": 7 + }, + { + "size_y": 3, + "size_x": 3, + "col": 9, + "slice_id": "7", + "row": 1 + }, + { + "size_y": 3, + "size_x": 4, + "col": 5, + "slice_id": "8", + "row": 1 + } + ] + """ + ) + session.add(dash) + for s in slices: + dash.slices.append(s) + session.commit() diff --git a/panoramix/static/panoramix.js b/panoramix/static/panoramix.js index 4c905d95b36f1..a556d1e5aaa1d 100644 --- a/panoramix/static/panoramix.js +++ b/panoramix/static/panoramix.js @@ -34,6 +34,7 @@ var px = (function() { form_data['flt_col_1'] = dashboard.filters[f][0]; form_data['flt_op_1'] = 'in'; form_data['flt_eq_1'] = dashboard.filters[f][1][0]; + //form_data['extra_filters'] = JSON.stringify(dashboard.filters) } } } diff --git a/panoramix/static/widgets/viz_world_map.css b/panoramix/static/widgets/viz_world_map.css index e69de29bb2d1d..b4e85c530afdb 100644 --- a/panoramix/static/widgets/viz_world_map.css +++ b/panoramix/static/widgets/viz_world_map.css @@ -0,0 +1,3 @@ +.world_map svg{ + background-color: LightSkyBlue; +} diff --git a/panoramix/static/widgets/viz_world_map.js b/panoramix/static/widgets/viz_world_map.js index 4f9fb6e67b2ee..03bfc1d21283a 100644 --- a/panoramix/static/widgets/viz_world_map.js +++ b/panoramix/static/widgets/viz_world_map.js @@ -36,7 +36,7 @@ function viz_world_map(slice) { element: slice.container.get(0), data: json.data, fills: { - defaultFill: 'white' + defaultFill: 'transparent' }, geographyConfig: { popupOnHover: true, diff --git a/panoramix/templates/panoramix/explore.html b/panoramix/templates/panoramix/explore.html index 842d5ab95e86a..e1181eeaee07c 100644 --- a/panoramix/templates/panoramix/explore.html +++ b/panoramix/templates/panoramix/explore.html @@ -26,26 +26,30 @@ {{ datasource.full_name }} {% if datasource.description %} + + {% endif %} {{ form.get_field("viz_type")(class_="select2") }} - query - 0 sec - - .csv - - - - .json - - - - +
+ + + + + + .json + + + .csv + + 0 sec + query +

@@ -171,6 +175,24 @@
+ {% endblock %} diff --git a/panoramix/viz.py b/panoramix/viz.py index 44886f3665380..5d2266b162e8b 100644 --- a/panoramix/viz.py +++ b/panoramix/viz.py @@ -175,7 +175,7 @@ def query_obj(self): from_dttm = datetime.now() - (from_dttm-datetime.now()) until = form_data.get("until", "now") to_dttm = utils.parse_human_datetime(until) - if from_dttm >= to_dttm: + if from_dttm > to_dttm: flash("The date range doesn't seem right.", "danger") from_dttm = to_dttm # Making them identical to not raise