♻️ Refactor code to smaller modules (#8)

ministryofjustice · May 9, 2024 · e65b580 · e65b580
1 parent c07f100
commit e65b580
Show file tree

Hide file tree

Showing 9 changed files with 270 additions and 196 deletions.
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ The current solution is composed of two components, the Dashboard UI and a Datab
 - UI
   - [Dash and Plotly](https://dash.plotly.com/tutorial) - for creating dashboards from data with no JavaScript 😱
     - [Pandas](https://pandas.pydata.org/pandas-docs/stable/index.html) - for reading in data from CSV, JSON, SQL etc. and converting it into a readable format by Dash/Plotly 🐼
-  - [Grafana](https://grafana.com/) - for creating dashboards from data 📊
+  - [Grafana](https://grafana.com/) UI Alternative - for creating dashboards from data 📊
 - Data Sources
   - [PostgresDB](https://www.postgresql.org/) - for storing data (that can be auto-populated by systems when/if this goes live 🙈)
   - CSV files - example CSV files stored in [example-data](./example-data/) though the application reads from [./data](./data) 💿
@@ -51,6 +51,20 @@ docker compose up --build
 
 Sometimes this command can fail on the first run - if it does, spin down compose with `docker compose down` and re-launch! 🚀
 
+You can then go to the UI to see graphs for the given data sets:
+
+| Name        |          URL          |
+| :---------- | :-------------------: |
+| Dash/Plotly | http://localhost:4567 |
+
+#### ✌️ Grafana Locally
+
+As an alternative dashboarding option - you can chose to also spin up a Grafana Dashboard locally using the following command:
+
+```bash
+docker compose -f docker-compose-grafana.yaml up --build
+```
+
 You can then go to the following UIs to see graphs for the given data sets:
 
 | Name        |          URL          |

diff --git a/app/app.py b/app/app.py
@@ -1,15 +1,12 @@
-import datetime
 import logging
-from typing import Any
 
-import pandas as pd
-import plotly.express as px
-import psycopg2
 from dash import Dash, dcc, html
 from flask import Flask
 
 from app.config.app_config import app_config
 from app.config.logging_config import configure_logging
+from app.services.dashboard_service import FigureService
+from app.services.database_service import DatabaseService
 
 logger = logging.getLogger(__name__)
 
@@ -19,119 +16,38 @@ def create_app() -> Flask:
 
     logger.info("Starting app...")
 
+    server = Flask(__name__)
+
+    database_service = DatabaseService()
+    figure_service = FigureService(database_service)
+
     logger.info("Populating stub data...")
-    create_indicators_table()
-    clean_indicators_table()
-    add_data()
+    database_service.create_indicators_table()
+    database_service.clean_indicators_table()
+    database_service.add_data()
 
-    logger.info("Retrieving stub data...")
-    sentry_transaction_quota_consumed = pd.DataFrame(get_indicator("SENTRY_DAILY_TRANSACTION_USAGE"), columns=["timestamp", "count"]).sort_values(
-        by="timestamp", ascending=True
-    )
-    number_of_repositories_archived_by_automation = pd.DataFrame(
-        get_indicator("REPOSITORIES_ARCHIVED_BY_AUTOMATION"), columns=["timestamp", "count"]
-    ).sort_values(by="timestamp", ascending=True)
-    number_of_repos_with_standards_label_df = pd.DataFrame(get_indicator("REPOSITORIES_WITH_STANDARDS_LABEL"), columns=["timestamp", "count"]).sort_values(
-        by="timestamp", ascending=True
-    )
-    support_stats_csv = pd.read_csv("data/support-stats.csv")
-    logging.info(support_stats_csv)
-    support_stats_csv_pivoted = pd.melt(
-        support_stats_csv,
-        value_vars=["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"],
-        id_vars=["Request Type", "Total"],
-        value_name="Count",
-        var_name="Month",
-        ignore_index=True,
-    )
-    logging.info(support_stats_csv_pivoted)
-    github_usage_csv = pd.read_csv("data/github_actions_private_and_internal.csv").sort_values(by="Date", ascending=True)
-    logging.info(github_usage_csv)
-    github_actions = github_usage_csv[github_usage_csv["Product"] == "Actions"]
-    logging.info(github_actions)
-    github_actions_summed = github_actions.groupby(by="Date", as_index=False).agg("sum")
-    github_actions_summed["Date"] = pd.to_datetime(github_actions_summed["Date"])
-    logging.info(github_actions_summed)
+    app = Dash(__name__, server=server)
 
-    logger.info("Creating app...")
-    app = Dash(__name__)
     app.layout = html.Div(
         children=[
             dcc.Graph(
-                figure=px.line(
-                    number_of_repos_with_standards_label_df,
-                    x="timestamp",
-                    y="count",
-                    title="🏷️ Number of Repositories With Standards Label",
-                    markers=True,
-                    template="plotly_dark",
-                ).add_hline(y=0),
+                figure=figure_service.get_number_of_repositories_with_standards_label_dashboard(),
                 style={"width": "33%", "height": "500px", "display": "inline-block"},
             ),
             dcc.Graph(
-                figure=px.line(
-                    number_of_repositories_archived_by_automation,
-                    x="timestamp",
-                    y="count",
-                    title="👴 Number of Repositories Archived By Automation",
-                    markers=True,
-                    template="plotly_dark",
-                ).add_hline(y=0),
+                figure=figure_service.get_number_of_repositories_archived_by_automation(),
                 style={"width": "33%", "height": "500px", "display": "inline-block"},
             ),
             dcc.Graph(
-                figure=px.line(
-                    sentry_transaction_quota_consumed,
-                    x="timestamp",
-                    y="count",
-                    title="👀 Sentry Transactions Used",
-                    markers=True,
-                    template="plotly_dark",
-                )
-                .add_hline(y=967741, annotation_text="Max Daily Usage")
-                .add_hrect(y0=(967741 * 0.8), y1=967741, line_width=0, fillcolor="red", opacity=0.2, annotation_text="Alert Threshold"),
+                figure=figure_service.get_sentry_transactions_used(),
                 style={"width": "33%", "height": "500px", "display": "inline-block"},
             ),
             dcc.Graph(
-                figure=px.line(
-                    support_stats_csv_pivoted,
-                    x="Month",
-                    y="Count",
-                    color="Request Type",
-                    title="🏋️ Support Stats",
-                    markers=True,
-                    template="plotly_dark",
-                ),
+                figure=figure_service.get_support_stats(),
                 style={"width": "100%", "height": "500px", "display": "inline-block"},
             ),
             dcc.Graph(
-                figure=px.line(
-                    github_actions_summed,
-                    x="Date",
-                    y="Quantity",
-                    title="💥 GitHub Quota Usage",
-                    markers=True,
-                    template="plotly_dark",
-                    hover_data=["Price Per Unit ($)"],
-                )
-                .add_hline(y=github_actions_summed["Quantity"].mean(), line_dash="dash", annotation_text="Average Daily Usage")
-                .add_hline(y=(40000 / 31), annotation_text="Max Daily Actions Usage Usage")
-                .add_hrect(y0=((40000 / 31) * 0.8), y1=(40000 / 31), line_width=0, fillcolor="red", opacity=0.2, annotation_text="Actions Alert Threshold"),
-                style={"width": "100%", "height": "500px", "display": "inline-block"},
-            ),
-            dcc.Graph(
-                figure=px.scatter(
-                    github_actions_summed,
-                    x="Date",
-                    y="Quantity",
-                    title="💥 GitHub Quota Usage - 28 Day Rolling Average",
-                    trendline="rolling",
-                    trendline_options=dict(window=28),
-                    template="plotly_dark",
-                    hover_data=["Price Per Unit ($)"],
-                )
-                .add_hline(y=(40000 / 31), annotation_text="Max Daily Actions Usage Usage")
-                .add_hrect(y0=((40000 / 31) * 0.8), y1=(40000 / 31), line_width=0, fillcolor="red", opacity=0.2, annotation_text="Actions Alert Threshold"),
+                figure=figure_service.get_github_actions_quota_usage(),
                 style={"width": "100%", "height": "500px", "display": "inline-block"},
             ),
         ],
@@ -141,86 +57,3 @@ def create_app() -> Flask:
     logger.info("Running app...")
 
     return app.server
-
-
-def execute_query(sql: str, values: list[Any] = []):
-    with psycopg2.connect(
-        dbname=app_config.postgres.db,
-        user=app_config.postgres.user,
-        password=app_config.postgres.password,
-        host=app_config.postgres.host,
-        port=app_config.postgres.port,
-    ) as conn:
-        logging.info("Connected to the PostgreSQL server.")
-        cur = conn.cursor()
-        cur.execute(sql, values)
-        data = None
-        try:
-            data = cur.fetchall()
-        except Exception as e:
-            logging.error(e)
-        conn.commit()
-        return data
-
-
-def get_indicator(indicator: str) -> list[tuple[Any, Any]]:
-    return execute_query(sql="SELECT timestamp, count FROM indicators WHERE indicator = %s;", values=[indicator])
-
-
-def create_indicators_table() -> None:
-    execute_query(
-        sql="""
-                CREATE TABLE IF NOT EXISTS indicators (
-                    id SERIAL PRIMARY KEY,
-                    indicator varchar,
-                    timestamp timestamp,
-                    count integer
-                );
-            """
-    )
-
-
-def clean_indicators_table() -> None:
-    execute_query(sql="DELETE FROM indicators")
-
-
-def add_data():
-    for values in [
-        # SENTRY_DAILY_TRANSACTION_USAGE
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 20), 771761),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 21), 796740),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 22), 437108),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 23), 421906),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 24), 853259),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 25), 779597),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 26), 1249612),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 27), 906111),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 28), 418087),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 29), 413430),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 4, 30), 880825),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 5, 1), 792862),
-        ("SENTRY_DAILY_TRANSACTION_USAGE", datetime.date(2024, 5, 1), 783851),
-        # REPOSITORIES_WITH_STANDARDS_LABEL
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 20), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 21), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 22), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 23), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 24), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 25), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 26), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 27), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 28), 11),
-        ("REPOSITORIES_WITH_STANDARDS_LABEL", datetime.date(2024, 4, 29), 11),
-        # REPOSITORIES_ARCHIVED_BY_AUTOMATION
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 20), 1),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 21), 0),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 22), 0),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 23), 4),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 24), 0),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 25), 0),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 26), 1),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 27), 0),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 28), 0),
-        ("REPOSITORIES_ARCHIVED_BY_AUTOMATION", datetime.date(2024, 4, 29), 0),
-    ]:
-        execute_query("INSERT INTO indicators (indicator,timestamp, count) VALUES (%s, %s, %s);", values=values)
diff --git a/app/assets/main.css b/app/assets/main.css
@@ -0,0 +1,4 @@
+
+body {
+    margin: 0px;
+}
diff --git a/app/services/__init__.py b/app/services/__init__.py
diff --git a/app/services/dashboard_service.py b/app/services/dashboard_service.py
@@ -0,0 +1,99 @@
+import logging
+
+import pandas as pd
+import plotly.express as px
+
+logger = logging.getLogger(__name__)
+
+
+class FigureService:
+    def __init__(self, database_service) -> None:
+        self.database_service = database_service
+
+    def get_number_of_repositories_with_standards_label_dashboard(self):
+        number_of_repos_with_standards_label_df = pd.DataFrame(
+            self.database_service.get_indicator("REPOSITORIES_WITH_STANDARDS_LABEL"), columns=["timestamp", "count"]
+        ).sort_values(by="timestamp", ascending=True)
+
+        return px.line(
+            number_of_repos_with_standards_label_df,
+            x="timestamp",
+            y="count",
+            title="🏷️ Number of Repositories With Standards Label",
+            markers=True,
+            template="plotly_dark",
+        ).add_hline(y=0)
+
+    def get_number_of_repositories_archived_by_automation(self):
+        number_of_repositories_archived_by_automation = pd.DataFrame(
+            self.database_service.get_indicator("REPOSITORIES_ARCHIVED_BY_AUTOMATION"), columns=["timestamp", "count"]
+        ).sort_values(by="timestamp", ascending=True)
+
+        return px.line(
+            number_of_repositories_archived_by_automation,
+            x="timestamp",
+            y="count",
+            title="👴 Number of Repositories Archived By Automation",
+            markers=True,
+            template="plotly_dark",
+        ).add_hline(y=0)
+
+    def get_sentry_transactions_used(self):
+        sentry_transaction_quota_consumed = pd.DataFrame(
+            self.database_service.get_indicator("SENTRY_DAILY_TRANSACTION_USAGE"), columns=["timestamp", "count"]
+        ).sort_values(by="timestamp", ascending=True)
+
+        return (
+            px.line(
+                sentry_transaction_quota_consumed,
+                x="timestamp",
+                y="count",
+                title="👀 Sentry Transactions Used",
+                markers=True,
+                template="plotly_dark",
+            )
+            .add_hline(y=967741, annotation_text="Max Daily Usage")
+            .add_hrect(y0=(967741 * 0.8), y1=967741, line_width=0, fillcolor="red", opacity=0.2, annotation_text="Alert Threshold")
+        )
+
+    def get_support_stats(self):
+        support_stats_csv = pd.read_csv("data/support-stats.csv")
+        support_stats_csv_pivoted = pd.melt(
+            support_stats_csv,
+            value_vars=["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"],
+            id_vars=["Request Type", "Total"],
+            value_name="Count",
+            var_name="Month",
+            ignore_index=True,
+        )
+
+        return px.line(
+            support_stats_csv_pivoted,
+            x="Month",
+            y="Count",
+            color="Request Type",
+            title="🏋️ Support Stats",
+            markers=True,
+            template="plotly_dark",
+        )
+
+    def get_github_actions_quota_usage(self):
+        github_usage_csv = pd.read_csv("data/github_actions_private_and_internal.csv").sort_values(by="Date", ascending=True)
+        github_actions = github_usage_csv[github_usage_csv["Product"] == "Actions"]
+        github_actions_summed = github_actions.groupby(by="Date", as_index=False).agg("sum")
+        github_actions_summed["Date"] = pd.to_datetime(github_actions_summed["Date"])
+
+        return (
+            px.scatter(
+                github_actions_summed,
+                x="Date",
+                y="Quantity",
+                title="💥 GitHub Quota Usage - 28 Day Rolling Average",
+                trendline="rolling",
+                trendline_options=dict(window=28),
+                template="plotly_dark",
+                hover_data=["Price Per Unit ($)"],
+            )
+            .add_hline(y=(40000 / 31), annotation_text="Max Daily Actions Usage Usage")
+            .add_hrect(y0=((40000 / 31) * 0.8), y1=(40000 / 31), line_width=0, fillcolor="red", opacity=0.2, annotation_text="Actions Alert Threshold")
+        )