From 0ccbb7f44a6bd4c2331b264855f0471074a1c020 Mon Sep 17 00:00:00 2001 From: mx Date: Sun, 19 May 2024 15:20:48 +0300 Subject: [PATCH 1/2] data-process - create endpoints vol8 --- superset/dvt_dataprocess/api.py | 45 +++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/superset/dvt_dataprocess/api.py b/superset/dvt_dataprocess/api.py index 521c4848f20d0..e69b1b61d9c77 100644 --- a/superset/dvt_dataprocess/api.py +++ b/superset/dvt_dataprocess/api.py @@ -19,6 +19,7 @@ from flask import jsonify, request, Response from flask_appbuilder import expose from flask_appbuilder.security.decorators import permission_name +from sqlalchemy.exc import NoSuchTableError from sqlalchemy.orm import sessionmaker from superset.extensions import event_logger @@ -49,17 +50,19 @@ def __init__(self): self.connection_string = 'postgresql://examples:examples@db:5432/examples' def handle_request(self, func): - try: - payload = request.json - if "selected_columns" not in payload or "table_name" not in payload: - return jsonify({"error": "Missing required fields in the payload"}), 400 - engine = get_engine(self.connection_string) - session = create_session(engine) - result = func(payload, engine, session) - session.commit() - return result - except Exception as e: - return jsonify({"error": str(e)}), 500 + # try: + payload = request.json + if "selected_columns" not in payload or "table_name" not in payload: + return jsonify({"error": "Missing required fields in the payload"}), 400 + engine = get_engine(self.connection_string) + session = create_session(engine) + result = func(payload, engine, session) + session.commit() + return result + # except Exception as e: + # return jsonify({"error": str(e)}), 500 + # finally: + # session.close() @expose("/outlier-analysis", methods=("POST",)) @event_logger.log_this @@ -76,7 +79,21 @@ def outlier_analysis_impl(self, payload, engine, session): outliers = detect_outliers_boxplot(df[column]) outliers_dict[column] = outliers.tolist() outliers_table_name = f"{table_name}_outliers" - outliers_table = Table(outliers_table_name, MetaData(bind=engine), autoload=True, autoload_with=engine) + metadata = MetaData(bind=engine) + try: + outliers_table = Table(outliers_table_name, metadata, autoload=True, autoload_with=engine) + except NoSuchTableError: + # Define the table structure + outliers_table = Table( + outliers_table_name, + metadata, + Column('id', Integer, primary_key=True, autoincrement=True), + *(Column(column, Float) for column in selected_columns) + ) + # Create the table in the database + metadata.create_all(engine) + outliers_table = Table(outliers_table_name, metadata, autoload=True, autoload_with=engine) + for column, outliers in outliers_dict.items(): for outlier in outliers: session.execute(outliers_table.insert().values({column: outlier})) @@ -154,9 +171,9 @@ def gain_information_impl(self, payload, engine, session): if col1 != col2: session.execute( gain_info_table.insert().values(column_1=col1, column_2=col2, - gain_information=mi_dict[col1][ - col2])) + information_gain=mi_dict[col1][col2])) return jsonify({"success": True, "message": f"Information gain calculated and written to table '{gain_info_table_name}'", "information_gain": mi_dict}), 200 + From 2c99c4d3316ee4d7b19de377f8a819300ed8cf4b Mon Sep 17 00:00:00 2001 From: mx Date: Sun, 19 May 2024 15:21:44 +0300 Subject: [PATCH 2/2] data-process - create endpoints vol9 --- superset/dvt_dataprocess/api.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/superset/dvt_dataprocess/api.py b/superset/dvt_dataprocess/api.py index e69b1b61d9c77..79966294ef49f 100644 --- a/superset/dvt_dataprocess/api.py +++ b/superset/dvt_dataprocess/api.py @@ -50,19 +50,19 @@ def __init__(self): self.connection_string = 'postgresql://examples:examples@db:5432/examples' def handle_request(self, func): - # try: - payload = request.json - if "selected_columns" not in payload or "table_name" not in payload: - return jsonify({"error": "Missing required fields in the payload"}), 400 - engine = get_engine(self.connection_string) - session = create_session(engine) - result = func(payload, engine, session) - session.commit() - return result - # except Exception as e: - # return jsonify({"error": str(e)}), 500 - # finally: - # session.close() + try: + payload = request.json + if "selected_columns" not in payload or "table_name" not in payload: + return jsonify({"error": "Missing required fields in the payload"}), 400 + engine = get_engine(self.connection_string) + session = create_session(engine) + result = func(payload, engine, session) + session.commit() + return result + except Exception as e: + return jsonify({"error": str(e)}), 500 + finally: + session.close() @expose("/outlier-analysis", methods=("POST",)) @event_logger.log_this @@ -83,14 +83,12 @@ def outlier_analysis_impl(self, payload, engine, session): try: outliers_table = Table(outliers_table_name, metadata, autoload=True, autoload_with=engine) except NoSuchTableError: - # Define the table structure outliers_table = Table( outliers_table_name, metadata, Column('id', Integer, primary_key=True, autoincrement=True), *(Column(column, Float) for column in selected_columns) ) - # Create the table in the database metadata.create_all(engine) outliers_table = Table(outliers_table_name, metadata, autoload=True, autoload_with=engine)