Skip to content

Commit

Permalink
Hotfix for missing session recording events (#3069)
Browse files Browse the repository at this point in the history
* Hotfix for missing session recording events

Temporarily solves #2927,
though the error will rear it's head again with plugin-based ingestion.

Opened https://github.com/PostHog/posthog/issues/3068 for long-term
solution.

* kwargs everywhere

* use settings over kwargs
  • Loading branch information
macobo authored Jan 27, 2021
1 parent b912f1e commit dba58f4
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 19 deletions.
26 changes: 13 additions & 13 deletions ee/clickhouse/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from asgiref.sync import async_to_sync
from clickhouse_driver import Client as SyncClient
from clickhouse_pool import ChPool
from django.conf import settings
from django.conf import settings as app_settings
from django.core.cache import cache
from django.utils.timezone import now
from sentry_sdk.api import capture_exception
Expand Down Expand Up @@ -37,13 +37,13 @@
ch_client = None # type: Client
ch_sync_pool = None # type: ChPool

def async_execute(query, args=None):
def async_execute(query, args=None, settings=None):
return

def sync_execute(query, args=None):
def sync_execute(query, args=None, settings=None):
return

def cache_sync_execute(query, args=None, redis_client=None, ttl=None):
def cache_sync_execute(query, args=None, redis_client=None, ttl=None, settings=None):
return


Expand All @@ -60,9 +60,9 @@ def cache_sync_execute(query, args=None, redis_client=None, ttl=None):
)

@async_to_sync
async def async_execute(query, args=None):
async def async_execute(query, args=None, settings=None):
loop = asyncio.get_event_loop()
task = loop.create_task(ch_client.execute(query, args))
task = loop.create_task(ch_client.execute(query, args, settings=settings))
return task

else:
Expand All @@ -77,8 +77,8 @@ async def async_execute(query, args=None):
verify=CLICKHOUSE_VERIFY,
)

def async_execute(query, args=None):
return sync_execute(query, args)
def async_execute(query, args=None, settings=None):
return sync_execute(query, args, settings=settings)

ch_sync_pool = ChPool(
host=CLICKHOUSE_HOST,
Expand All @@ -92,26 +92,26 @@ def async_execute(query, args=None):
connections_max=100,
)

def cache_sync_execute(query, args=None, redis_client=None, ttl=CACHE_TTL):
def cache_sync_execute(query, args=None, redis_client=None, ttl=CACHE_TTL, settings=None):
if not redis_client:
redis_client = redis.get_client()
key = _key_hash(query, args)
if redis_client.exists(key):
result = _deserialize(redis_client.get(key))
return result
else:
result = sync_execute(query, args)
result = sync_execute(query, args, settings=settings)
redis_client.set(key, _serialize(result), ex=ttl)
return result

def sync_execute(query, args=None):
def sync_execute(query, args=None, settings=None):
start_time = time()
try:
with ch_sync_pool.get_client() as client:
result = client.execute(query, args)
result = client.execute(query, args, settings=settings)
finally:
execution_time = time() - start_time
if settings.SHELL_PLUS_PRINT_SQL:
if app_settings.SHELL_PLUS_PRINT_SQL:
print(format_sql(query, args))
print("Execution time: %.6fs" % (execution_time,))
if _save_query_user_id:
Expand Down
25 changes: 19 additions & 6 deletions ee/clickhouse/models/session_recording_event.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
import datetime
import json
import logging
import uuid
from typing import Dict, List, Optional, Tuple, Union
from typing import Union

from dateutil.parser import isoparse
from django.utils import timezone
from sentry_sdk import capture_exception

from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.util import cast_timestamp_or_now
from ee.clickhouse.sql.session_recording_events import INSERT_SESSION_RECORDING_EVENT_SQL
from ee.kafka_client.client import ClickhouseProducer
from ee.kafka_client.topics import KAFKA_SESSION_RECORDING_EVENTS

logger = logging.getLogger(__name__)

MAX_KAFKA_MESSAGE_LENGTH = 800_000
MAX_INSERT_LENGTH = 15_000_000


def create_session_recording_event(
uuid: uuid.UUID,
Expand All @@ -22,15 +28,22 @@ def create_session_recording_event(
) -> str:
timestamp = cast_timestamp_or_now(timestamp)

snapshot_data_json = json.dumps(snapshot_data)
data = {
"uuid": str(uuid),
"team_id": team_id,
"distinct_id": distinct_id,
"session_id": session_id,
"snapshot_data": json.dumps(snapshot_data),
"snapshot_data": snapshot_data_json,
"timestamp": timestamp,
"created_at": timestamp,
}
p = ClickhouseProducer()
p.produce(sql=INSERT_SESSION_RECORDING_EVENT_SQL, topic=KAFKA_SESSION_RECORDING_EVENTS, data=data)
if len(snapshot_data_json) <= MAX_KAFKA_MESSAGE_LENGTH:
p = ClickhouseProducer()
p.produce(sql=INSERT_SESSION_RECORDING_EVENT_SQL, topic=KAFKA_SESSION_RECORDING_EVENTS, data=data)
elif len(snapshot_data_json) <= MAX_INSERT_LENGTH:
sync_execute(INSERT_SESSION_RECORDING_EVENT_SQL, data, settings={"max_query_size": MAX_INSERT_LENGTH})
else:
capture_exception(Exception(f"Session recording event data too large - {len(snapshot_data_json)}"))

return str(uuid)

0 comments on commit dba58f4

Please sign in to comment.