From f19586a4e65db7bdef44acfb602f7a4aabc65d42 Mon Sep 17 00:00:00 2001 From: Ahti Kitsik Date: Wed, 14 Oct 2020 05:13:27 -0700 Subject: [PATCH 1/4] System Status page and API (#1812). --- cypress/integration/system_status.js | 7 ++ frontend/src/layout/Sidebar.js | 12 +++- frontend/src/scenes/sceneLogic.js | 2 + .../src/scenes/system_status/SystemStatus.tsx | 47 +++++++++++++ .../scenes/system_status/systemStatusLogic.ts | 46 +++++++++++++ posthog/urls.py | 3 +- posthog/utils.py | 50 ++++++++++++++ posthog/views.py | 69 +++++++++++++++---- 8 files changed, 219 insertions(+), 17 deletions(-) create mode 100644 cypress/integration/system_status.js create mode 100644 frontend/src/scenes/system_status/SystemStatus.tsx create mode 100644 frontend/src/scenes/system_status/systemStatusLogic.ts diff --git a/cypress/integration/system_status.js b/cypress/integration/system_status.js new file mode 100644 index 0000000000000..2908d89d0d3f1 --- /dev/null +++ b/cypress/integration/system_status.js @@ -0,0 +1,7 @@ +describe('System Status', () => { + it('System Status loaded', () => { + cy.get('[data-attr=menu-item-settings]').click() + cy.get('[data-attr=menu-item-system-status]').click() + cy.get('h1').should('contain', 'System Status') + }) +}) diff --git a/frontend/src/layout/Sidebar.js b/frontend/src/layout/Sidebar.js index 0aebe4dd235bd..41071970403cb 100644 --- a/frontend/src/layout/Sidebar.js +++ b/frontend/src/layout/Sidebar.js @@ -20,6 +20,7 @@ import { TeamOutlined, LockOutlined, WalletOutlined, + DatabaseOutlined, } from '@ant-design/icons' import { useActions, useValues } from 'kea' import { Link } from 'lib/components/Link' @@ -45,7 +46,7 @@ function Logo() { ) } -// to show the right page in the sidebar +// to show the right page i n the sidebar const sceneOverride = { action: 'actions', person: 'people', @@ -63,6 +64,7 @@ const submenuOverride = { annotations: 'settings', billing: 'settings', licenses: 'settings', + systemStatus: 'settings', } export function Sidebar({ user, sidebarCollapsed, setSidebarCollapsed }) { @@ -259,6 +261,14 @@ export function Sidebar({ user, sidebarCollapsed, setSidebarCollapsed }) { )} + {(!user.is_multi_tenancy || (user.is_multi_tenancy && user.is_staff)) && ( + + + System Status + + + )} + {!user.is_multi_tenancy && user.ee_available && ( diff --git a/frontend/src/scenes/sceneLogic.js b/frontend/src/scenes/sceneLogic.js index 6cee953c10e38..7766cb4233727 100644 --- a/frontend/src/scenes/sceneLogic.js +++ b/frontend/src/scenes/sceneLogic.js @@ -23,6 +23,7 @@ export const scenes = { annotations: () => import(/* webpackChunkName: 'annotations' */ './annotations/AnnotationsScene'), team: () => import(/* webpackChunkName: 'team' */ './team/Team'), licenses: () => import(/* webpackChunkName: 'setup' */ './setup/Licenses'), + systemStatus: () => import(/* webpackChunkName: 'setup' */ './system_status/SystemStatus'), preflight: () => import(/* webpackChunkName: 'preflightCheck' */ './setup/PreflightCheck'), signup: () => import(/* webpackChunkName: 'signup' */ './team/Signup'), ingestion: () => import(/* webpackChunkName: 'ingestion' */ './ingestion/IngestionWizard'), @@ -56,6 +57,7 @@ export const routes = { '/annotations': 'annotations', '/team': 'team', '/setup/licenses': 'licenses', + '/system_status': 'systemStatus', '/preflight': 'preflight', '/signup': 'signup', '/ingestion': 'ingestion', diff --git a/frontend/src/scenes/system_status/SystemStatus.tsx b/frontend/src/scenes/system_status/SystemStatus.tsx new file mode 100644 index 0000000000000..39bf04014e63d --- /dev/null +++ b/frontend/src/scenes/system_status/SystemStatus.tsx @@ -0,0 +1,47 @@ +import React from 'react' +import { hot } from 'react-hot-loader/root' +import { Alert, Table } from 'antd' +import { systemStatusLogic } from './systemStatusLogic' +import { useValues } from 'kea' + +const columns = [ + { + title: 'Metric', + dataIndex: 'metric', + }, + { + title: 'Value', + dataIndex: 'value', + }, +] + +export const SystemStatus = hot(_Status) +function _Status(): JSX.Element { + const { systemStatus, systemStatusLoading, error } = useValues(systemStatusLogic) + return ( +
+

System Status

+

+ Here you can find all the critical runtime details about your PostHog installation. +

+
+ {error && ( + Something went wrong. Please try again or contact us.} + type="error" + /> + )} +
+ item.metric} + pagination={{ pageSize: 99999, hideOnSinglePage: true }} + rowClassName="cursor-pointer" + dataSource={systemStatus} + columns={columns} + loading={systemStatusLoading} + /> + + ) +} diff --git a/frontend/src/scenes/system_status/systemStatusLogic.ts b/frontend/src/scenes/system_status/systemStatusLogic.ts new file mode 100644 index 0000000000000..dd9228eb26f0f --- /dev/null +++ b/frontend/src/scenes/system_status/systemStatusLogic.ts @@ -0,0 +1,46 @@ +import api from 'lib/api' +import { kea } from 'kea' + +interface Error { + detail: string + code: string +} + +interface SystemStatus { + metric: string + value: string +} + +export const systemStatusLogic = kea({ + actions: { + setError: (error: Error) => ({ error }), + addSystemStatus: (systemStatus: SystemStatus) => ({ systemStatus }), + }, + loaders: { + systemStatus: [ + [], + { + loadSystemStatus: async () => { + return (await api.get('_system_status')).results + }, + }, + ], + }, + reducers: { + systemStatus: { + addSystemStatus: (state: Array, { systemStatus }) => [systemStatus, ...state], + }, + error: [ + false, + { + setError: (_, { error }) => error, + }, + ], + }, + + events: ({ actions }) => ({ + afterMount: () => { + actions.loadSystemStatus() + }, + }), +}) diff --git a/posthog/urls.py b/posthog/urls.py index bc50a7a6e82ef..f06e24b75a33b 100644 --- a/posthog/urls.py +++ b/posthog/urls.py @@ -23,7 +23,7 @@ from .api import api_not_found, capture, dashboard, decide, router, team, user from .models import Event, Team, User from .utils import render_template -from .views import health, preflight_check, stats +from .views import health, preflight_check, stats, system_status def home(request, **kwargs): @@ -207,6 +207,7 @@ def opt_slash_path(route: str, view: Callable, name: Optional[str] = None) -> st opt_slash_path("_health", health), opt_slash_path("_stats", stats), opt_slash_path("_preflight", preflight_check), + opt_slash_path("_system_status", system_status), # admin path("admin/", admin.site.urls), path("admin/", include("loginas.urls")), diff --git a/posthog/utils.py b/posthog/utils.py index 9c5c1d7757153..a8a0b805d2d9f 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -18,6 +18,7 @@ from dateutil import parser from dateutil.relativedelta import relativedelta from django.conf import settings +from django.db.utils import DatabaseError from django.http import HttpRequest, HttpResponse from django.template.loader import get_template from django.utils import timezone @@ -365,3 +366,52 @@ def get_machine_id() -> str: # MAC addresses are 6 bits long, so overflow shouldn't happen # hashing here as we don't care about the actual address, just it being rather consistent return hashlib.md5(uuid.getnode().to_bytes(6, "little")).hexdigest() + + +def get_table_size(table_name): + from django.db import connection + + query = ( + f'SELECT pg_size_pretty(pg_total_relation_size(relid)) AS "size" ' + f"FROM pg_catalog.pg_statio_user_tables " + f"WHERE relname = '{table_name}'" + ) + cursor = connection.cursor() + cursor.execute(query) + return dict_from_cursor_fetchall(cursor) + + +def get_table_approx_count(table_name): + from django.db import connection + + query = f"SELECT reltuples::BIGINT as \"approx_count\" FROM pg_class WHERE relname = '{table_name}'" + cursor = connection.cursor() + cursor.execute(query) + return dict_from_cursor_fetchall(cursor) + + +def is_postgres_alive() -> bool: + from posthog.models import User + + try: + User.objects.count() + return True + except DatabaseError: + return False + + +def is_redis_alive() -> bool: + try: + return get_redis_heartbeat() != "offline" + except BaseException: + return False + + +def get_redis_info() -> dict: + redis_instance = redis.from_url(settings.REDIS_URL, db=0) + return redis_instance.info() + + +def get_redis_queue_depth() -> int: + redis_instance = redis.from_url(settings.REDIS_URL, db=0) + return redis_instance.llen("celery") diff --git a/posthog/views.py b/posthog/views.py index 97872ef44e978..932d8c2b43de9 100644 --- a/posthog/views.py +++ b/posthog/views.py @@ -1,10 +1,20 @@ from typing import Dict, Union -from django.db import DatabaseError +from django.conf import settings +from django.contrib.auth.decorators import login_required from django.http import HttpResponse, JsonResponse from django.views.decorators.cache import never_cache +from rest_framework.exceptions import AuthenticationFailed + +from posthog.utils import ( + get_redis_info, + get_redis_queue_depth, + get_table_approx_count, + get_table_size, + is_postgres_alive, + is_redis_alive, +) -from .models.user import User from .utils import get_redis_heartbeat @@ -19,19 +29,48 @@ def stats(request): @never_cache -def preflight_check(request): - redis: bool = False - db: bool = False +@login_required +def system_status(request): + is_multitenancy: bool = getattr(settings, "MULTI_TENANCY", False) + + if is_multitenancy and not request.user.is_staff: + raise AuthenticationFailed(detail="You're not authorized.") + + from .models import Element, Event + + redis_alive = is_redis_alive() + postgres_alive = is_postgres_alive() + + metrics = list() + + metrics.append({"metric": "Redis alive", "value": str(redis_alive)}) + metrics.append({"metric": "Postgres DB alive", "value": str(postgres_alive)}) - try: - redis = get_redis_heartbeat() != "offline" - except BaseException: - pass + if postgres_alive: + event_table_count = get_table_approx_count(Event._meta.db_table)[0]["approx_count"] + event_table_size = get_table_size(Event._meta.db_table)[0]["size"] - try: - User.objects.count() - db = True - except DatabaseError: - pass + element_table_count = get_table_approx_count(Element._meta.db_table)[0]["approx_count"] + element_table_size = get_table_size(Element._meta.db_table)[0]["size"] - return JsonResponse({"django": True, "redis": redis, "db": db}) + metrics.append( + {"metric": "Postgres Element table", "value": f"ca {element_table_count} rows ({element_table_size})"} + ) + metrics.append({"metric": "Postgres Event table", "value": f"ca {event_table_count} rows ({event_table_size})"}) + + if redis_alive: + redis_info = get_redis_info() + redis_queue_depth = get_redis_queue_depth() + metrics.append({"metric": "Redis current queue depth", "value": f"{redis_queue_depth}"}) + metrics.append({"metric": "Redis memory used", "value": f"{redis_info['used_memory_human']}"}) + metrics.append({"metric": "Redis memory peak", "value": f"{redis_info['used_memory_peak_human']}"}) + metrics.append( + {"metric": "Redis total memory available", "value": f"{redis_info['total_system_memory_human']}"} + ) + + return JsonResponse({"results": metrics}) + + +@never_cache +def preflight_check(request): + return JsonResponse({"django": True, "redis": is_redis_alive(), "db": is_postgres_alive()}) From df18a4ac1403a386416f30557926a3a04096076f Mon Sep 17 00:00:00 2001 From: Ahti Kitsik Date: Wed, 14 Oct 2020 07:39:57 -0700 Subject: [PATCH 2/4] More resilient to Redis outage. Ensure the table gets loaded. --- cypress/integration/system_status.js | 2 ++ posthog/views.py | 23 ++++++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/cypress/integration/system_status.js b/cypress/integration/system_status.js index 2908d89d0d3f1..7ddf570670b9f 100644 --- a/cypress/integration/system_status.js +++ b/cypress/integration/system_status.js @@ -3,5 +3,7 @@ describe('System Status', () => { cy.get('[data-attr=menu-item-settings]').click() cy.get('[data-attr=menu-item-system-status]').click() cy.get('h1').should('contain', 'System Status') + cy.get('table').should('contain', 'Postgres Event table') + cy.get('table').should('contain', 'Redis current queue depth') }) }) diff --git a/posthog/views.py b/posthog/views.py index 932d8c2b43de9..890b0246bad4b 100644 --- a/posthog/views.py +++ b/posthog/views.py @@ -58,15 +58,20 @@ def system_status(request): ) metrics.append({"metric": "Postgres Event table", "value": f"ca {event_table_count} rows ({event_table_size})"}) - if redis_alive: - redis_info = get_redis_info() - redis_queue_depth = get_redis_queue_depth() - metrics.append({"metric": "Redis current queue depth", "value": f"{redis_queue_depth}"}) - metrics.append({"metric": "Redis memory used", "value": f"{redis_info['used_memory_human']}"}) - metrics.append({"metric": "Redis memory peak", "value": f"{redis_info['used_memory_peak_human']}"}) - metrics.append( - {"metric": "Redis total memory available", "value": f"{redis_info['total_system_memory_human']}"} - ) + if not redis_alive: + import redis + + try: + redis_info = get_redis_info() + redis_queue_depth = get_redis_queue_depth() + metrics.append({"metric": "Redis current queue depth", "value": f"{redis_queue_depth}"}) + metrics.append({"metric": "Redis memory used", "value": f"{redis_info['used_memory_human']}"}) + metrics.append({"metric": "Redis memory peak", "value": f"{redis_info['used_memory_peak_human']}"}) + metrics.append( + {"metric": "Redis total memory available", "value": f"{redis_info['total_system_memory_human']}"} + ) + except redis.exceptions.ConnectionError as e: + metrics.append({"metric": "Redis metrics", "value": f"Redis connected but failed to return metrics: {e}"}) return JsonResponse({"results": metrics}) From 99c37d61ef4d789151644fce8875cdae84fbb661 Mon Sep 17 00:00:00 2001 From: Ahti Kitsik Date: Wed, 14 Oct 2020 07:45:58 -0700 Subject: [PATCH 3/4] Bugfix and reworded. --- posthog/views.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/posthog/views.py b/posthog/views.py index 890b0246bad4b..7c1179bdb2894 100644 --- a/posthog/views.py +++ b/posthog/views.py @@ -58,7 +58,7 @@ def system_status(request): ) metrics.append({"metric": "Postgres Event table", "value": f"ca {event_table_count} rows ({event_table_size})"}) - if not redis_alive: + if redis_alive: import redis try: @@ -71,7 +71,9 @@ def system_status(request): {"metric": "Redis total memory available", "value": f"{redis_info['total_system_memory_human']}"} ) except redis.exceptions.ConnectionError as e: - metrics.append({"metric": "Redis metrics", "value": f"Redis connected but failed to return metrics: {e}"}) + metrics.append( + {"metric": "Redis metrics", "value": f"Redis connected but then failed to return metrics: {e}"} + ) return JsonResponse({"results": metrics}) From 62d9c7994e400308826c213b4967c64ca2b2610d Mon Sep 17 00:00:00 2001 From: Ahti Kitsik Date: Wed, 14 Oct 2020 08:20:36 -0700 Subject: [PATCH 4/4] Using kea-typegen now and removed a " ". --- frontend/src/layout/Sidebar.js | 2 +- frontend/src/scenes/system_status/systemStatusLogic.ts | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/frontend/src/layout/Sidebar.js b/frontend/src/layout/Sidebar.js index 41071970403cb..e2b67754b0505 100644 --- a/frontend/src/layout/Sidebar.js +++ b/frontend/src/layout/Sidebar.js @@ -46,7 +46,7 @@ function Logo() { ) } -// to show the right page i n the sidebar +// to show the right page in the sidebar const sceneOverride = { action: 'actions', person: 'people', diff --git a/frontend/src/scenes/system_status/systemStatusLogic.ts b/frontend/src/scenes/system_status/systemStatusLogic.ts index dd9228eb26f0f..03a1e3aa4d1e4 100644 --- a/frontend/src/scenes/system_status/systemStatusLogic.ts +++ b/frontend/src/scenes/system_status/systemStatusLogic.ts @@ -1,5 +1,6 @@ import api from 'lib/api' import { kea } from 'kea' +import { systemStatusLogicType } from 'types/scenes/system_status/systemStatusLogicType' interface Error { detail: string @@ -11,14 +12,14 @@ interface SystemStatus { value: string } -export const systemStatusLogic = kea({ +export const systemStatusLogic = kea>({ actions: { setError: (error: Error) => ({ error }), addSystemStatus: (systemStatus: SystemStatus) => ({ systemStatus }), }, loaders: { systemStatus: [ - [], + [] as SystemStatus[], { loadSystemStatus: async () => { return (await api.get('_system_status')).results @@ -27,9 +28,6 @@ export const systemStatusLogic = kea({ ], }, reducers: { - systemStatus: { - addSystemStatus: (state: Array, { systemStatus }) => [systemStatus, ...state], - }, error: [ false, {