From 1fead305202d9be96f5aac6603a3129f723e9549 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 24 Jul 2024 12:22:01 -0700 Subject: [PATCH 001/100] Adds migration for tracking vector store indexing status --- .../20240724103100_v0.9.2_indexing_status.sql | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql diff --git a/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql b/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql new file mode 100644 index 000000000..32cfc47d5 --- /dev/null +++ b/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql @@ -0,0 +1,51 @@ +-- Create a table to store the status of the vector store file indexing +CREATE TABLE vector_store_file_status ( + id VARCHAR PRIMARY KEY, + user_id uuid references auth.users not null, + status VARCHAR CHECK (status IN ('processing', 'complete', 'error')), + created_at bigint default extract(epoch from now()) not null, + updated_at bigint default extract(epoch from now()) not null +); + +-- Add an index on user_id for faster queries +CREATE INDEX idx_vector_store_file_status_user_id ON vector_store_file_status(user_id); + +-- Turn on security +alter table vector_store_file_status enable row level security; + +-- Allow users to CRUD their own vector_store_file_status via API key. +create policy "Individuals can CRUD their own vector_store_file_status via API key." + on vector_store_file_status for all + to anon + using + ( + exists ( + select 1 + from api_keys + where api_keys.api_key_hash = crypt(current_setting('request.headers')::json->>'x-custom-api-key', api_keys.api_key_hash) + and api_keys.user_id = vector_store_file_status.user_id + ) + ); + +-- Allow users to CRUD their own vector_store_file_status +create policy "Individuals can CRUD their own vector_store_file_status" on vector_store_file_status for + all to anon using (auth.uid() = user_id); + +-- Create a function to update the updated_at column +CREATE OR REPLACE FUNCTION update_modified_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = extract(epoch from now()); + RETURN NEW; +END; +$$ language 'plpgsql'; + +-- Create a trigger to automatically update the updated_at column +CREATE TRIGGER update_vector_store_file_status_modtime +BEFORE UPDATE ON vector_store_file_status +FOR EACH ROW +EXECUTE FUNCTION update_modified_column(); + +-- Enable Supabase realtime for the vector_store_file_status table +alter publication supabase_realtime +add table vector_store_file_status; \ No newline at end of file From 85045850dfb8bd08ff13d98cb91e67131f045e4e Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 24 Jul 2024 13:38:16 -0700 Subject: [PATCH 002/100] Removes enforcement of status --- .../migrations/20240724103100_v0.9.2_indexing_status.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql b/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql index 32cfc47d5..7b880de4d 100644 --- a/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql +++ b/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql @@ -2,7 +2,7 @@ CREATE TABLE vector_store_file_status ( id VARCHAR PRIMARY KEY, user_id uuid references auth.users not null, - status VARCHAR CHECK (status IN ('processing', 'complete', 'error')), + status varchar default 'in_progress' not null, created_at bigint default extract(epoch from now()) not null, updated_at bigint default extract(epoch from now()) not null ); From 7c876046b4c6d241b16d14f89172cade520d46c7 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 24 Jul 2024 13:54:17 -0700 Subject: [PATCH 003/100] Simplifies sql query to focus on vector_store_file --- .../20240724103100_v0.9.2_indexing_status.sql | 41 ++++--------------- 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql b/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql index 7b880de4d..d829b87d4 100644 --- a/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql +++ b/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql @@ -1,35 +1,8 @@ --- Create a table to store the status of the vector store file indexing -CREATE TABLE vector_store_file_status ( - id VARCHAR PRIMARY KEY, - user_id uuid references auth.users not null, - status varchar default 'in_progress' not null, - created_at bigint default extract(epoch from now()) not null, - updated_at bigint default extract(epoch from now()) not null -); +-- Update the vector_store_file table to add an updated_at column +alter table vector_store_file add column updated_at bigint default extract(epoch from now()) not null; -- Add an index on user_id for faster queries -CREATE INDEX idx_vector_store_file_status_user_id ON vector_store_file_status(user_id); - --- Turn on security -alter table vector_store_file_status enable row level security; - --- Allow users to CRUD their own vector_store_file_status via API key. -create policy "Individuals can CRUD their own vector_store_file_status via API key." - on vector_store_file_status for all - to anon - using - ( - exists ( - select 1 - from api_keys - where api_keys.api_key_hash = crypt(current_setting('request.headers')::json->>'x-custom-api-key', api_keys.api_key_hash) - and api_keys.user_id = vector_store_file_status.user_id - ) - ); - --- Allow users to CRUD their own vector_store_file_status -create policy "Individuals can CRUD their own vector_store_file_status" on vector_store_file_status for - all to anon using (auth.uid() = user_id); +CREATE INDEX idx_vector_store_file_user_id ON vector_store_file(user_id); -- Create a function to update the updated_at column CREATE OR REPLACE FUNCTION update_modified_column() @@ -41,11 +14,11 @@ END; $$ language 'plpgsql'; -- Create a trigger to automatically update the updated_at column -CREATE TRIGGER update_vector_store_file_status_modtime -BEFORE UPDATE ON vector_store_file_status +CREATE TRIGGER update_vector_store_file_modtime +BEFORE UPDATE ON vector_store_file FOR EACH ROW EXECUTE FUNCTION update_modified_column(); --- Enable Supabase realtime for the vector_store_file_status table +-- Enable Supabase realtime for the vector_store_file table alter publication supabase_realtime -add table vector_store_file_status; \ No newline at end of file +add table vector_store_file; \ No newline at end of file From 0f4125a312c661f457add1f6c59a28b1c1907996 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 25 Jul 2024 10:05:54 -0700 Subject: [PATCH 004/100] Pulls supabase-realtime image from official instead of bitnami --- packages/supabase/bitnami-values.yaml | 3 +++ packages/supabase/zarf.yaml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 3f9dceb8b..e5e8c885b 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -75,6 +75,9 @@ realtime: resourcesPreset: "none" podLabels: sidecar.istio.io/inject: "false" + image: + repository: supabase/realtime + tag: latest rest: enabled: ###ZARF_VAR_ENABLE_REST### diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 4ad6c0fde..8336d9cde 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -92,7 +92,7 @@ components: - docker.io/bitnami/postgrest:11.2.2-debian-12-r15 - docker.io/bitnami/supabase-postgres:15.1.1-debian-12-r69 - docker.io/bitnami/supabase-postgres-meta:0.80.0-debian-12-r3 - - docker.io/bitnami/supabase-realtime:2.28.32-debian-12-r2 + - docker.io/supabase/realtime:latest - docker.io/bitnami/supabase-storage:0.48.4-debian-12-r2 - docker.io/bitnami/supabase-studio:0.24.3-debian-12-r3 - docker.io/bitnami/kong:3.6.1-debian-12-r18 From 53e44a5daac839adfbd4020c9682339e083127a0 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 25 Jul 2024 14:51:41 -0700 Subject: [PATCH 005/100] Reverts supabase realtime image replacement --- packages/supabase/bitnami-values.yaml | 3 --- packages/supabase/zarf.yaml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index e5e8c885b..3f9dceb8b 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -75,9 +75,6 @@ realtime: resourcesPreset: "none" podLabels: sidecar.istio.io/inject: "false" - image: - repository: supabase/realtime - tag: latest rest: enabled: ###ZARF_VAR_ENABLE_REST### diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 8336d9cde..4ad6c0fde 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -92,7 +92,7 @@ components: - docker.io/bitnami/postgrest:11.2.2-debian-12-r15 - docker.io/bitnami/supabase-postgres:15.1.1-debian-12-r69 - docker.io/bitnami/supabase-postgres-meta:0.80.0-debian-12-r3 - - docker.io/supabase/realtime:latest + - docker.io/bitnami/supabase-realtime:2.28.32-debian-12-r2 - docker.io/bitnami/supabase-storage:0.48.4-debian-12-r2 - docker.io/bitnami/supabase-studio:0.24.3-debian-12-r3 - docker.io/bitnami/kong:3.6.1-debian-12-r18 From f652a162645c3dbab15ccc9490a645913341a3ed Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 10:42:29 -0700 Subject: [PATCH 006/100] Adds separate supabase realtime config for websocket/api --- .../manifests/declarative-conf-configmap.yaml | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/packages/supabase/manifests/declarative-conf-configmap.yaml b/packages/supabase/manifests/declarative-conf-configmap.yaml index 8a4620f2c..344ec2f48 100644 --- a/packages/supabase/manifests/declarative-conf-configmap.yaml +++ b/packages/supabase/manifests/declarative-conf-configmap.yaml @@ -95,11 +95,31 @@ data: - admin - anon - - name: realtime-v1 - _comment: "Realtime: /realtime/v1/* -> ws://supabase-realtime:80/socket/*" - url: http://supabase-realtime:80/socket + - name: realtime-v1-ws + _comment: "Realtime: /realtime/v1/* -> ws://supabase-realtime:80/socket/websocket/*" + url: http://supabase-realtime:80/socket/websocket routes: - - name: realtime-v1-all + - name: realtime-v1-ws + strip_path: true + paths: + - /realtime/v1/websocket + plugins: + - name: cors + - name: key-auth + config: + hide_credentials: false + - name: acl + config: + hide_groups_header: true + allow: + - admin + - anon + + - name: realtime-v1-api + _comment: "Realtime: /realtime/v1/* -> http://supabase-realtime:80/*" + url: http://supabase-realtime:80 + routes: + - name: realtime-v1-api strip_path: true paths: - /realtime/v1/ From 4b3fd69568b0f43562cfdc41143fd7c9ca99901f Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 10:49:51 -0700 Subject: [PATCH 007/100] Updates the default config with necessary vars --- packages/supabase/bitnami-values.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index af26cb2eb..79c7410c1 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -75,6 +75,21 @@ realtime: resourcesPreset: "none" podLabels: sidecar.istio.io/inject: "false" + defaultConfig: | + DB_HOST: {{ include "supabase.database.host" . | quote }} + DB_PORT: {{ include "supabase.database.port" . | quote }} + DB_NAME: {{ include "supabase.database.name" . | quote }} + DB_SSL: {{ .Values.dbSSL | quote }} + PORT: {{ .Values.realtime.containerPorts.http | quote }} + APP_NAME: "supabase-realtime" + ERL_AFLAGS: "-proto_dist inet_tcp" + REPLICATION_MODE: "RLS" + REPLICATION_POLL_INTERVAL: "100" + SECURE_CHANNELS: "true" + SLOT_NAME: "supabase_realtime_rls" + TEMPORARY_SLOT: "true" + DB_ENC_KEY: "testenckeychaangethisforsuresure" + DNS_NODES: "''" rest: enabled: ###ZARF_VAR_ENABLE_REST### From f6d4166298baac263a91a382278ce6d6dc1a3998 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 10:57:28 -0700 Subject: [PATCH 008/100] Adds init container placeholder for seeding tables via curl --- packages/supabase/bitnami-values.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 79c7410c1..e989cf3f0 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -90,6 +90,11 @@ realtime: TEMPORARY_SLOT: "true" DB_ENC_KEY: "testenckeychaangethisforsuresure" DNS_NODES: "''" + initContainers: + - name: realtime-init + image: docker.io/bitnami/os-shell:12-debian-12-r19 + imagePullPolicy: Always + command: ['sh', '-c', 'echo "hello world"'] rest: enabled: ###ZARF_VAR_ENABLE_REST### From 775339b40d7490ed8563fba8e7e1802029412c97 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 11:05:41 -0700 Subject: [PATCH 009/100] Moves configmap changes to extra configs --- packages/supabase/bitnami-values.yaml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index e989cf3f0..c801bbbb1 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -75,19 +75,8 @@ realtime: resourcesPreset: "none" podLabels: sidecar.istio.io/inject: "false" - defaultConfig: | - DB_HOST: {{ include "supabase.database.host" . | quote }} - DB_PORT: {{ include "supabase.database.port" . | quote }} - DB_NAME: {{ include "supabase.database.name" . | quote }} - DB_SSL: {{ .Values.dbSSL | quote }} - PORT: {{ .Values.realtime.containerPorts.http | quote }} + extraConfig: | APP_NAME: "supabase-realtime" - ERL_AFLAGS: "-proto_dist inet_tcp" - REPLICATION_MODE: "RLS" - REPLICATION_POLL_INTERVAL: "100" - SECURE_CHANNELS: "true" - SLOT_NAME: "supabase_realtime_rls" - TEMPORARY_SLOT: "true" DB_ENC_KEY: "testenckeychaangethisforsuresure" DNS_NODES: "''" initContainers: From e6d5394c706868fbb5bdb24fba0b1f2c7a5d1eb1 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 11:14:35 -0700 Subject: [PATCH 010/100] Renames variable for compatbility with supabase version --- packages/supabase/bitnami-values.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index c801bbbb1..4f85548bc 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -76,7 +76,8 @@ realtime: podLabels: sidecar.istio.io/inject: "false" extraConfig: | - APP_NAME: "supabase-realtime" + ALLOC_ID: "supabase-realtime" + FLY_APP_NAME: "supabase-realtime" DB_ENC_KEY: "testenckeychaangethisforsuresure" DNS_NODES: "''" initContainers: From 427ffce6493c2c5fda067d45a6b9b984796e8006 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 11:20:08 -0700 Subject: [PATCH 011/100] Ensures configmap is mounted in container --- packages/supabase/bitnami-values.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 4f85548bc..b99171453 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -80,6 +80,7 @@ realtime: FLY_APP_NAME: "supabase-realtime" DB_ENC_KEY: "testenckeychaangethisforsuresure" DNS_NODES: "''" + extraConfigExistingConfigmap: "supabase-realtime-extra" initContainers: - name: realtime-init image: docker.io/bitnami/os-shell:12-debian-12-r19 From 7ba856691048bd4d8eb95eaccdfbe213e5c00eaf Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 11:25:23 -0700 Subject: [PATCH 012/100] Specify the variables manuallY --- packages/supabase/bitnami-values.yaml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index b99171453..9991f6a7d 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -75,12 +75,15 @@ realtime: resourcesPreset: "none" podLabels: sidecar.istio.io/inject: "false" - extraConfig: | - ALLOC_ID: "supabase-realtime" - FLY_APP_NAME: "supabase-realtime" - DB_ENC_KEY: "testenckeychaangethisforsuresure" - DNS_NODES: "''" - extraConfigExistingConfigmap: "supabase-realtime-extra" + extraEnvVars: + - name: ALLOC_ID + value: "supabase-realtime" + - name: FLY_APP_NAME + value: "supabase-realtime" + - name: DB_ENC_KEY + value: "testenckeychaangethisforsuresure" + - name: DNS_NODES + value: "''" initContainers: - name: realtime-init image: docker.io/bitnami/os-shell:12-debian-12-r19 From 62a2793c9bf5ec2502fb82d34e6e8da48480216f Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 11:27:55 -0700 Subject: [PATCH 013/100] Fixes env var typo --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 9991f6a7d..f63344b64 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -76,7 +76,7 @@ realtime: podLabels: sidecar.istio.io/inject: "false" extraEnvVars: - - name: ALLOC_ID + - name: FLY_ALLOC_ID value: "supabase-realtime" - name: FLY_APP_NAME value: "supabase-realtime" From 86f0b4ba9067db976f777eedd46a84725a64a8b4 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 12:26:57 -0700 Subject: [PATCH 014/100] Adds script to seed the realtime instance --- packages/supabase/bitnami-values.yaml | 13 ++++++++++++- packages/supabase/scripts/setup-db.sh | 28 +++++++++++++++++++++++++++ packages/supabase/zarf.yaml | 3 +++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 packages/supabase/scripts/setup-db.sh diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index f63344b64..6fe5fef5c 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -88,7 +88,18 @@ realtime: - name: realtime-init image: docker.io/bitnami/os-shell:12-debian-12-r19 imagePullPolicy: Always - command: ['sh', '-c', 'echo "hello world"'] + env: + - name: SUPABASE_SERVICE_KEY + valueFrom: + secretKeyRef: + name: '{{ include "supabase.jwt.secretName" . }}' + key: '{{ include "supabase.jwt.serviceSecretKey" . }}' + - name: POSTGRESQL_PASSWORD + valueFrom: + secretKeyRef: + name: supabase-postgresql + key: postgres-password + command: ['sh', '-c', 'scripts/setup-db.sh'] rest: enabled: ###ZARF_VAR_ENABLE_REST### diff --git a/packages/supabase/scripts/setup-db.sh b/packages/supabase/scripts/setup-db.sh new file mode 100644 index 000000000..7397a9b0a --- /dev/null +++ b/packages/supabase/scripts/setup-db.sh @@ -0,0 +1,28 @@ +curl --insecure -L -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" \ + -H "apiKey: $SUPABASE_SERVICE_KEY" \ + -d '{ + "tenant" : { + "name": "supabase-realtime", + "external_id": "supabase-realtime", + "jwt_secret": "testenckeychaangethisforsuresure", + "extensions": [ + { + "type": "postgres_cdc_rls", + "settings": { + "db_name": "postgres", + "db_host": "supabase-postgresql", + "db_user": "supabase_admin", + "db_password": "'$POSTGRESQL_PASSWORD'", + "db_port": "5432", + "region": "us-east-1", + "publication": "supabase_realtime", + "ssl_enforced": "false", + "poll_interval_ms": "100", + "poll_max_record_bytes": "1048576" + } + } + ] + } + }' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants \ No newline at end of file diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 74d04ac9a..390503bf4 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -56,6 +56,9 @@ variables: components: - name: supabase required: true + files: + - source: scripts/setup-db.sh + target: /scripts/setup-db.sh charts: # This exists because the jwt token job fails to run in the main helm chart at the proper time due to its reliance on `helm.sh/hook: post-install`. # This annotation causes it to run at the end of the Supabase Zarf component. From 4d9bbd939383d0135b53f7b33474798ee78333d7 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 12:30:11 -0700 Subject: [PATCH 015/100] Moves script somewhere with permissions --- packages/supabase/bitnami-values.yaml | 2 +- packages/supabase/zarf.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 6fe5fef5c..0e54d00ae 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -99,7 +99,7 @@ realtime: secretKeyRef: name: supabase-postgresql key: postgres-password - command: ['sh', '-c', 'scripts/setup-db.sh'] + command: ['sh', '-c', 'setup-db.sh'] rest: enabled: ###ZARF_VAR_ENABLE_REST### diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 390503bf4..2fe586c23 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -58,7 +58,7 @@ components: required: true files: - source: scripts/setup-db.sh - target: /scripts/setup-db.sh + target: setup-db.sh charts: # This exists because the jwt token job fails to run in the main helm chart at the proper time due to its reliance on `helm.sh/hook: post-install`. # This annotation causes it to run at the end of the Supabase Zarf component. From 39cc4d5d46c3b4e8712a15b9446f2141e82cefb0 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 12:35:05 -0700 Subject: [PATCH 016/100] Moves script to values file --- packages/supabase/bitnami-values.yaml | 34 +++++++++++++++++++++++++-- packages/supabase/scripts/setup-db.sh | 28 ---------------------- packages/supabase/zarf.yaml | 3 --- 3 files changed, 32 insertions(+), 33 deletions(-) delete mode 100644 packages/supabase/scripts/setup-db.sh diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 0e54d00ae..f8db12217 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -99,7 +99,37 @@ realtime: secretKeyRef: name: supabase-postgresql key: postgres-password - command: ['sh', '-c', 'setup-db.sh'] + command: + - sh + - -c + - >- + curl --insecure -L -X POST + -H "Content-Type: application/json" + -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" + -H "apiKey: $SUPABASE_SERVICE_KEY" + -d '{ + "tenant": { + "name": "supabase-realtime", + "external_id": "supabase-realtime", + "jwt_secret": "testenckeychaangethisforsuresure", + "extensions": [{ + "type": "postgres_cdc_rls", + "settings": { + "db_name": "postgres", + "db_host": "supabase-postgresql", + "db_user": "supabase_admin", + "db_password": "'"$POSTGRESQL_PASSWORD"'", + "db_port": "5432", + "region": "us-east-1", + "publication": "supabase_realtime", + "ssl_enforced": "false", + "poll_interval_ms": "100", + "poll_max_record_bytes": "1048576" + } + }] + } + }' + https://supabase-kong.###ZARF_VAR_DOMAIN###/realtime/v1/api/tenants rest: enabled: ###ZARF_VAR_ENABLE_REST### @@ -209,4 +239,4 @@ postgresql: ## @param postgresql.postgresqlSharedPreloadLibraries Set the shared_preload_libraries parameter in postgresql.conf ## Setting an empty value in order to force the default extensions of supabase-postgres ## - postgresqlSharedPreloadLibraries: "pg_stat_statements, pg_stat_monitor, pgaudit, plpgsql, plpgsql_check, pg_cron, pg_net, pgsodium, timescaledb, auto_explain, vector" + postgresqlSharedPreloadLibraries: "pg_stat_statements, pg_stat_monitor, pgaudit, plpgsql, plpgsql_check, pg_cron, pg_net, pgsodium, timescaledb, auto_explain, vector" \ No newline at end of file diff --git a/packages/supabase/scripts/setup-db.sh b/packages/supabase/scripts/setup-db.sh deleted file mode 100644 index 7397a9b0a..000000000 --- a/packages/supabase/scripts/setup-db.sh +++ /dev/null @@ -1,28 +0,0 @@ -curl --insecure -L -X POST \ - -H 'Content-Type: application/json' \ - -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" \ - -H "apiKey: $SUPABASE_SERVICE_KEY" \ - -d '{ - "tenant" : { - "name": "supabase-realtime", - "external_id": "supabase-realtime", - "jwt_secret": "testenckeychaangethisforsuresure", - "extensions": [ - { - "type": "postgres_cdc_rls", - "settings": { - "db_name": "postgres", - "db_host": "supabase-postgresql", - "db_user": "supabase_admin", - "db_password": "'$POSTGRESQL_PASSWORD'", - "db_port": "5432", - "region": "us-east-1", - "publication": "supabase_realtime", - "ssl_enforced": "false", - "poll_interval_ms": "100", - "poll_max_record_bytes": "1048576" - } - } - ] - } - }' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants \ No newline at end of file diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 2fe586c23..74d04ac9a 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -56,9 +56,6 @@ variables: components: - name: supabase required: true - files: - - source: scripts/setup-db.sh - target: setup-db.sh charts: # This exists because the jwt token job fails to run in the main helm chart at the proper time due to its reliance on `helm.sh/hook: post-install`. # This annotation causes it to run at the end of the Supabase Zarf component. From dca8c6562cb7b5f96fbd8653be37dd17fffd5beb Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 12:39:37 -0700 Subject: [PATCH 017/100] Updates url and var for realtime init container --- packages/supabase/bitnami-values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index f8db12217..0b21ded23 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -111,7 +111,7 @@ realtime: "tenant": { "name": "supabase-realtime", "external_id": "supabase-realtime", - "jwt_secret": "testenckeychaangethisforsuresure", + "jwt_secret": "'"$DB_ENC_KEY"'", "extensions": [{ "type": "postgres_cdc_rls", "settings": { @@ -129,7 +129,7 @@ realtime: }] } }' - https://supabase-kong.###ZARF_VAR_DOMAIN###/realtime/v1/api/tenants + https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants rest: enabled: ###ZARF_VAR_ENABLE_REST### From c254ee3e5159b0a43750c151b5e71b2aca8b406a Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 13:33:25 -0700 Subject: [PATCH 018/100] Replace initContainer with sidecars --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 0b21ded23..9e90a201f 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -84,7 +84,7 @@ realtime: value: "testenckeychaangethisforsuresure" - name: DNS_NODES value: "''" - initContainers: + sidecars: - name: realtime-init image: docker.io/bitnami/os-shell:12-debian-12-r19 imagePullPolicy: Always From 811cefb2924b31ec2a3d3789e13d580ea3722eb6 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 13:49:56 -0700 Subject: [PATCH 019/100] Adds wait for realtime container --- packages/supabase/bitnami-values.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 9e90a201f..6149cb9b6 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -103,6 +103,9 @@ realtime: - sh - -c - >- + echo "Waiting for supabase-realtime to be ready..."; + until nc -z localhost 4000; do sleep 1; done; + echo "supabase-realtime is ready. Sending curl request..."; curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" From 1ec9ad869d464c244ab01003e880ee9e6293fa28 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 13:55:08 -0700 Subject: [PATCH 020/100] Replaces os-shell with busybox --- packages/supabase/bitnami-values.yaml | 4 ++-- packages/supabase/zarf.yaml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 6149cb9b6..5c4816b0e 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -86,8 +86,8 @@ realtime: value: "''" sidecars: - name: realtime-init - image: docker.io/bitnami/os-shell:12-debian-12-r19 - imagePullPolicy: Always + image: docker.io/busybox:stable + imagePullPolicy: IfNotPresent env: - name: SUPABASE_SERVICE_KEY valueFrom: diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 74d04ac9a..dd14a519c 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -96,6 +96,7 @@ components: - docker.io/bitnami/supabase-storage:0.48.4-debian-12-r2 - docker.io/bitnami/supabase-studio:0.24.3-debian-12-r3 - docker.io/bitnami/kong:3.6.1-debian-12-r18 + - docker.io/busybox:stable - name: supabase-post-process description: "Perform necessary post processing here" required: true From 2829b415b5503ec6b1baac9d2eed54568891bedc Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 14:08:49 -0700 Subject: [PATCH 021/100] Replaces busybox container with a curl container --- packages/supabase/bitnami-values.yaml | 4 ++-- packages/supabase/zarf.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 5c4816b0e..dd8b90a7d 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -86,7 +86,7 @@ realtime: value: "''" sidecars: - name: realtime-init - image: docker.io/busybox:stable + image: docker.io/curlimages/curl:latest imagePullPolicy: IfNotPresent env: - name: SUPABASE_SERVICE_KEY @@ -104,7 +104,7 @@ realtime: - -c - >- echo "Waiting for supabase-realtime to be ready..."; - until nc -z localhost 4000; do sleep 1; done; + until nc -z localhost 9999; do sleep 1; done; echo "supabase-realtime is ready. Sending curl request..."; curl --insecure -L -X POST -H "Content-Type: application/json" diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index dd14a519c..07e37fabd 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -96,7 +96,7 @@ components: - docker.io/bitnami/supabase-storage:0.48.4-debian-12-r2 - docker.io/bitnami/supabase-studio:0.24.3-debian-12-r3 - docker.io/bitnami/kong:3.6.1-debian-12-r18 - - docker.io/busybox:stable + - docker.io/curlimages/curl:latest - name: supabase-post-process description: "Perform necessary post processing here" required: true From 4a30a4b6e7b6298236b15baada7a90feb579d177 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 2 Aug 2024 14:24:23 -0700 Subject: [PATCH 022/100] Replaces nc wait with curl wait --- packages/supabase/bitnami-values.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index dd8b90a7d..4c7a6b360 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -104,9 +104,7 @@ realtime: - -c - >- echo "Waiting for supabase-realtime to be ready..."; - until nc -z localhost 9999; do sleep 1; done; - echo "supabase-realtime is ready. Sending curl request..."; - curl --insecure -L -X POST + curl --max-time 60 --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" From ffa7ffa475018277b99d88d151c07025a3989d67 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 09:40:36 -0700 Subject: [PATCH 023/100] Try to replace how the realtimme seed is happening --- packages/supabase/bitnami-values.yaml | 44 ++++++++++----------------- packages/supabase/zarf.yaml | 1 - 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 4c7a6b360..e6b315ad5 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -86,7 +86,7 @@ realtime: value: "''" sidecars: - name: realtime-init - image: docker.io/curlimages/curl:latest + image: docker.io/bitnami/kubectl:1.30.0-debian-12-r0 imagePullPolicy: IfNotPresent env: - name: SUPABASE_SERVICE_KEY @@ -99,38 +99,26 @@ realtime: secretKeyRef: name: supabase-postgresql key: postgres-password + - name: DB_ENC_KEY + value: "testenckeychaangethisforsuresure" command: - sh - -c - >- echo "Waiting for supabase-realtime to be ready..."; - curl --max-time 60 --insecure -L -X POST - -H "Content-Type: application/json" - -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" - -H "apiKey: $SUPABASE_SERVICE_KEY" - -d '{ - "tenant": { - "name": "supabase-realtime", - "external_id": "supabase-realtime", - "jwt_secret": "'"$DB_ENC_KEY"'", - "extensions": [{ - "type": "postgres_cdc_rls", - "settings": { - "db_name": "postgres", - "db_host": "supabase-postgresql", - "db_user": "supabase_admin", - "db_password": "'"$POSTGRESQL_PASSWORD"'", - "db_port": "5432", - "region": "us-east-1", - "publication": "supabase_realtime", - "ssl_enforced": "false", - "poll_interval_ms": "100", - "poll_max_record_bytes": "1048576" - } - }] - } - }' - https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants + until kubectl get pod supabase-realtime -n leapfrogai -o jsonpath='{.status.containerStatuses[?(@.name=="supabase-realtime")].ready}' | grep -q "true"; do + echo "Waiting for supabase-realtime container to be ready..."; + sleep 5; + done; + echo "supabase-realtime container is ready. Creating job to run curl command..."; + kubectl create job --from=cronjob/curl-job curl-job-$(date +%s) -n leapfrogai; + volumeMounts: + - name: curl-job-config + mountPath: /etc/config + volumes: + - name: curl-job-config + configMap: + name: curl-job-config rest: enabled: ###ZARF_VAR_ENABLE_REST### diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 07e37fabd..74d04ac9a 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -96,7 +96,6 @@ components: - docker.io/bitnami/supabase-storage:0.48.4-debian-12-r2 - docker.io/bitnami/supabase-studio:0.24.3-debian-12-r3 - docker.io/bitnami/kong:3.6.1-debian-12-r18 - - docker.io/curlimages/curl:latest - name: supabase-post-process description: "Perform necessary post processing here" required: true From 367738316d0920ff2bd6c37b0d9731ad3158c352 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 11:31:11 -0700 Subject: [PATCH 024/100] Replaces job with inline curl call --- packages/supabase/bitnami-values.yaml | 47 +++++++++++++++++++-------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index e6b315ad5..945c460d0 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -105,20 +105,39 @@ realtime: - sh - -c - >- - echo "Waiting for supabase-realtime to be ready..."; - until kubectl get pod supabase-realtime -n leapfrogai -o jsonpath='{.status.containerStatuses[?(@.name=="supabase-realtime")].ready}' | grep -q "true"; do - echo "Waiting for supabase-realtime container to be ready..."; - sleep 5; - done; - echo "supabase-realtime container is ready. Creating job to run curl command..."; - kubectl create job --from=cronjob/curl-job curl-job-$(date +%s) -n leapfrogai; - volumeMounts: - - name: curl-job-config - mountPath: /etc/config - volumes: - - name: curl-job-config - configMap: - name: curl-job-config + echo "Waiting for supabase-realtime to be ready..."; + until kubectl get pod supabase-realtime -n leapfrogai -o jsonpath='{.status.containerStatuses[?(@.name=="supabase-realtime")].ready}' | grep -q "true"; do + echo "Waiting for supabase-realtime container to be ready..."; + sleep 5; + done; + echo "supabase-realtime container is ready. Running curl command..."; + curl --insecure -L -X POST + -H "Content-Type: application/json" + -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" + -H "apiKey: $SUPABASE_SERVICE_KEY" + -d '{ + "tenant": { + "name": "supabase-realtime", + "external_id": "supabase-realtime", + "jwt_secret": "'"$DB_ENC_KEY"'", + "extensions": [{ + "type": "postgres_cdc_rls", + "settings": { + "db_name": "postgres", + "db_host": "supabase-postgresql", + "db_user": "supabase_admin", + "db_password": "'"$POSTGRESQL_PASSWORD"'", + "db_port": "5432", + "region": "us-east-1", + "publication": "supabase_realtime", + "ssl_enforced": "false", + "poll_interval_ms": "100", + "poll_max_record_bytes": "1048576" + } + }] + } + }' + https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants rest: enabled: ###ZARF_VAR_ENABLE_REST### From 395454cc6b5952b9e02a191d87280dd720d40bd0 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 11:39:57 -0700 Subject: [PATCH 025/100] Replaces wget commands with curl --- packages/supabase/bitnami-values.yaml | 68 +++++++++++++-------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 945c460d0..72b9dbd90 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -86,7 +86,7 @@ realtime: value: "''" sidecars: - name: realtime-init - image: docker.io/bitnami/kubectl:1.30.0-debian-12-r0 + image: docker.io/bitnami/os-shell:12-debian-12-r19 imagePullPolicy: IfNotPresent env: - name: SUPABASE_SERVICE_KEY @@ -105,39 +105,39 @@ realtime: - sh - -c - >- - echo "Waiting for supabase-realtime to be ready..."; - until kubectl get pod supabase-realtime -n leapfrogai -o jsonpath='{.status.containerStatuses[?(@.name=="supabase-realtime")].ready}' | grep -q "true"; do - echo "Waiting for supabase-realtime container to be ready..."; - sleep 5; - done; - echo "supabase-realtime container is ready. Running curl command..."; - curl --insecure -L -X POST - -H "Content-Type: application/json" - -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" - -H "apiKey: $SUPABASE_SERVICE_KEY" - -d '{ - "tenant": { - "name": "supabase-realtime", - "external_id": "supabase-realtime", - "jwt_secret": "'"$DB_ENC_KEY"'", - "extensions": [{ - "type": "postgres_cdc_rls", - "settings": { - "db_name": "postgres", - "db_host": "supabase-postgresql", - "db_user": "supabase_admin", - "db_password": "'"$POSTGRESQL_PASSWORD"'", - "db_port": "5432", - "region": "us-east-1", - "publication": "supabase_realtime", - "ssl_enforced": "false", - "poll_interval_ms": "100", - "poll_max_record_bytes": "1048576" - } - }] - } - }' - https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants + echo "Waiting for supabase-realtime to be ready..."; + until curl -s -o /dev/null -w "%{http_code}" http://supabase-realtime.leapfrogai.svc.cluster.local:80/health | grep -q "200"; do + echo "Waiting for supabase-realtime to be ready..."; + sleep 5; + done; + echo "supabase-realtime is ready. Running curl command..."; + curl --insecure -L -X POST + -H "Content-Type: application/json" + -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" + -H "apiKey: $SUPABASE_SERVICE_KEY" + -d '{ + "tenant": { + "name": "supabase-realtime", + "external_id": "supabase-realtime", + "jwt_secret": "'"$DB_ENC_KEY"'", + "extensions": [{ + "type": "postgres_cdc_rls", + "settings": { + "db_name": "postgres", + "db_host": "supabase-postgresql", + "db_user": "supabase_admin", + "db_password": "'"$POSTGRESQL_PASSWORD"'", + "db_port": "5432", + "region": "us-east-1", + "publication": "supabase_realtime", + "ssl_enforced": "false", + "poll_interval_ms": "100", + "poll_max_record_bytes": "1048576" + } + }] + } + }' + https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants rest: enabled: ###ZARF_VAR_ENABLE_REST### From f99eda0f0660b51c7c047aaa15057eb7d193c489 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 11:45:31 -0700 Subject: [PATCH 026/100] Replaces invalid health endpoint on realtime --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 72b9dbd90..03a277531 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -106,7 +106,7 @@ realtime: - -c - >- echo "Waiting for supabase-realtime to be ready..."; - until curl -s -o /dev/null -w "%{http_code}" http://supabase-realtime.leapfrogai.svc.cluster.local:80/health | grep -q "200"; do + until curl -s -o /dev/null -w "%{http_code}" http://supabase-realtime.leapfrogai.svc.cluster.local:80/ | grep -q "200"; do echo "Waiting for supabase-realtime to be ready..."; sleep 5; done; From 032709528a1fe6111f295b03a9b1a8730907f1b8 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 11:49:31 -0700 Subject: [PATCH 027/100] Moves the curl command to one line --- packages/supabase/bitnami-values.yaml | 28 +-------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 03a277531..4fdd6b670 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -111,33 +111,7 @@ realtime: sleep 5; done; echo "supabase-realtime is ready. Running curl command..."; - curl --insecure -L -X POST - -H "Content-Type: application/json" - -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" - -H "apiKey: $SUPABASE_SERVICE_KEY" - -d '{ - "tenant": { - "name": "supabase-realtime", - "external_id": "supabase-realtime", - "jwt_secret": "'"$DB_ENC_KEY"'", - "extensions": [{ - "type": "postgres_cdc_rls", - "settings": { - "db_name": "postgres", - "db_host": "supabase-postgresql", - "db_user": "supabase_admin", - "db_password": "'"$POSTGRESQL_PASSWORD"'", - "db_port": "5432", - "region": "us-east-1", - "publication": "supabase_realtime", - "ssl_enforced": "false", - "poll_interval_ms": "100", - "poll_max_record_bytes": "1048576" - } - }] - } - }' - https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants + curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$DB_ENC_KEY"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}]}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants rest: enabled: ###ZARF_VAR_ENABLE_REST### From 18fccace9e56b703db86115c22e6a66cd768ac9a Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 14:33:06 -0700 Subject: [PATCH 028/100] Updates versions of supabase-realtime --- packages/supabase/bitnami-values.yaml | 12 +++++------- packages/supabase/zarf.yaml | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 4fdd6b670..9db2a745c 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -76,17 +76,15 @@ realtime: podLabels: sidecar.istio.io/inject: "false" extraEnvVars: - - name: FLY_ALLOC_ID - value: "supabase-realtime" - - name: FLY_APP_NAME + - name: APP_NAME value: "supabase-realtime" - name: DB_ENC_KEY value: "testenckeychaangethisforsuresure" - name: DNS_NODES - value: "''" + value: "supabase-realtime" sidecars: - name: realtime-init - image: docker.io/bitnami/os-shell:12-debian-12-r19 + image: docker.io/bitnami/os-shell:12-debian-12-r27 imagePullPolicy: IfNotPresent env: - name: SUPABASE_SERVICE_KEY @@ -144,7 +142,7 @@ volumePermissions: resourcesPreset: "none" psqlImage: - tag: 15.1.1-debian-12-r69 + tag: 15.6.1-debian-12-r2 kong: enabled: ###ZARF_VAR_ENABLE_KONG### @@ -209,7 +207,7 @@ kong: postgresql: enabled: ###ZARF_VAR_ENABLE_POSTGRES### image: - tag: 15.1.1-debian-12-r69 + tag: 15.6.1-debian-12-r2 debug: true primary: resourcesPreset: "none" diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 74d04ac9a..094219fcd 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -66,7 +66,7 @@ components: namespace: leapfrogai url: oci://registry-1.docker.io/bitnamicharts/supabase releaseName: supabase-bootstrap - version: 4.0.5 + version: 5.3.3 valuesFiles: - "bitnami-values.yaml" - "bitnami-values-bootstrap.yaml" @@ -81,21 +81,21 @@ components: namespace: leapfrogai url: oci://registry-1.docker.io/bitnamicharts/supabase releaseName: supabase - version: 4.0.5 + version: 5.3.3 valuesFiles: - "bitnami-values.yaml" images: - - docker.io/bitnami/gotrue:2.150.1-debian-12-r1 - - docker.io/bitnami/jwt-cli:6.0.0-debian-12-r20 - - docker.io/bitnami/kubectl:1.30.0-debian-12-r0 - - docker.io/bitnami/os-shell:12-debian-12-r19 - - docker.io/bitnami/postgrest:11.2.2-debian-12-r15 - - docker.io/bitnami/supabase-postgres:15.1.1-debian-12-r69 - - docker.io/bitnami/supabase-postgres-meta:0.80.0-debian-12-r3 - - docker.io/bitnami/supabase-realtime:2.28.32-debian-12-r2 - - docker.io/bitnami/supabase-storage:0.48.4-debian-12-r2 - - docker.io/bitnami/supabase-studio:0.24.3-debian-12-r3 - - docker.io/bitnami/kong:3.6.1-debian-12-r18 + - docker.io/bitnami/gotrue:2.155.6-debian-12-r3 + - docker.io/bitnami/jwt-cli:6.1.0-debian-12-r5 + - docker.io/bitnami/kubectl:1.30.3-debian-12-r4 + - docker.io/bitnami/os-shell:12-debian-12-r27 + - docker.io/bitnami/postgrest:11.2.2-debian-12-r31 + - docker.io/bitnami/supabase-postgres:15.6.1-debian-12-r2 + - docker.io/bitnami/supabase-postgres-meta:0.83.2-debian-12-r3 + - docker.io/bitnami/supabase-realtime:2.30.14-debian-12-r2 + - docker.io/bitnami/supabase-storage:1.8.2-debian-12-r2 + - docker.io/bitnami/supabase-studio:1.24.5-debian-12-r4 + - docker.io/bitnami/kong:3.7.1-debian-12-r5 - name: supabase-post-process description: "Perform necessary post processing here" required: true From 57765329fd4304289d545c0e6737215a20486be4 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 15:11:03 -0700 Subject: [PATCH 029/100] Sets the ENC_KEY to a default value of 16 chars --- packages/supabase/bitnami-values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 9db2a745c..6621a5451 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -79,7 +79,7 @@ realtime: - name: APP_NAME value: "supabase-realtime" - name: DB_ENC_KEY - value: "testenckeychaangethisforsuresure" + value: "testenckeychange" # This needs to be exactly 16 characters - name: DNS_NODES value: "supabase-realtime" sidecars: @@ -98,7 +98,7 @@ realtime: name: supabase-postgresql key: postgres-password - name: DB_ENC_KEY - value: "testenckeychaangethisforsuresure" + value: "testenckeychange" # This needs to be exactly 16 characters command: - sh - -c From 294ec624851e092b18371cfeef8e7cdc13cc0724 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 5 Aug 2024 15:17:40 -0700 Subject: [PATCH 030/100] Makes the sidecar sleep after a successful run to prevent crashbackloops --- packages/supabase/bitnami-values.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 6621a5451..0ecc90457 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -109,7 +109,16 @@ realtime: sleep 5; done; echo "supabase-realtime is ready. Running curl command..."; - curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$DB_ENC_KEY"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}]}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants + if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$DB_ENC_KEY"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}]}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then + echo "Curl command completed successfully. Entering sleep loop..." + while true; do + sleep 3600 + echo "Sidecar still alive" + done + else + echo "Curl command failed. Exiting..." + exit 1 + fi rest: enabled: ###ZARF_VAR_ENABLE_REST### From b7cb54c0426bbad917e5b3534f6b15ef3cb1d605 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 7 Aug 2024 15:25:58 -0700 Subject: [PATCH 031/100] Adds notify private alpha --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 0ecc90457..5f814a0d8 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -109,7 +109,7 @@ realtime: sleep 5; done; echo "supabase-realtime is ready. Running curl command..."; - if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$DB_ENC_KEY"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}]}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then + if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$DB_ENC_KEY"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}],"notify_private_alpha":"true"}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then echo "Curl command completed successfully. Entering sleep loop..." while true; do sleep 3600 From 27b96e3529d06fb155ae6c4ec433cacdc4274b55 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 7 Aug 2024 15:30:10 -0700 Subject: [PATCH 032/100] Replaces DB_ENC_KEY with JWT_SECRET --- packages/supabase/bitnami-values.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 5f814a0d8..0bb308868 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -97,8 +97,11 @@ realtime: secretKeyRef: name: supabase-postgresql key: postgres-password - - name: DB_ENC_KEY - value: "testenckeychange" # This needs to be exactly 16 characters + - name: JWT_SECRET + valueFrom: + secretKeyRef: + name: '{{ include "supabase.jwt.secretName" . }}' + key: '{{ include "supabase.realtime.secretKey" . }}' command: - sh - -c @@ -109,7 +112,7 @@ realtime: sleep 5; done; echo "supabase-realtime is ready. Running curl command..."; - if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$DB_ENC_KEY"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}],"notify_private_alpha":"true"}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then + if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$JWT_SECRET"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}],"notify_private_alpha":"true"}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then echo "Curl command completed successfully. Entering sleep loop..." while true; do sleep 3600 From a2932a4c6f0e730842f3f821cb6d9cfee1205152 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 7 Aug 2024 16:02:18 -0700 Subject: [PATCH 033/100] Updates the JWT_SECRET to be correct --- packages/supabase/bitnami-values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 0bb308868..460347a63 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -100,8 +100,8 @@ realtime: - name: JWT_SECRET valueFrom: secretKeyRef: - name: '{{ include "supabase.jwt.secretName" . }}' - key: '{{ include "supabase.realtime.secretKey" . }}' + name: supabase-bootstrap-jwt + key: secret command: - sh - -c From cc85394c2c81b21b2f1170f556a960df5fe8fef5 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 7 Aug 2024 16:11:35 -0700 Subject: [PATCH 034/100] Removes unnecessary quotes that interfere with SSL configs --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 460347a63..63be8dc61 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -112,7 +112,7 @@ realtime: sleep 5; done; echo "supabase-realtime is ready. Running curl command..."; - if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$JWT_SECRET"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":"false","poll_interval_ms":"100","poll_max_record_bytes":"1048576"}}],"notify_private_alpha":"true"}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then + if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$JWT_SECRET"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":false,"poll_interval_ms":100,"poll_max_record_bytes":1048576}}],"notify_private_alpha":true}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then echo "Curl command completed successfully. Entering sleep loop..." while true; do sleep 3600 From a33a3bab756ec8c30dec8d111638ac9673c097a1 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 09:27:44 -0700 Subject: [PATCH 035/100] Replaces sidecar based realtime tenant initialization with init args --- packages/supabase/bitnami-values.yaml | 44 +++------------------------ 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 63be8dc61..37501f7ab 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -82,46 +82,10 @@ realtime: value: "testenckeychange" # This needs to be exactly 16 characters - name: DNS_NODES value: "supabase-realtime" - sidecars: - - name: realtime-init - image: docker.io/bitnami/os-shell:12-debian-12-r27 - imagePullPolicy: IfNotPresent - env: - - name: SUPABASE_SERVICE_KEY - valueFrom: - secretKeyRef: - name: '{{ include "supabase.jwt.secretName" . }}' - key: '{{ include "supabase.jwt.serviceSecretKey" . }}' - - name: POSTGRESQL_PASSWORD - valueFrom: - secretKeyRef: - name: supabase-postgresql - key: postgres-password - - name: JWT_SECRET - valueFrom: - secretKeyRef: - name: supabase-bootstrap-jwt - key: secret - command: - - sh - - -c - - >- - echo "Waiting for supabase-realtime to be ready..."; - until curl -s -o /dev/null -w "%{http_code}" http://supabase-realtime.leapfrogai.svc.cluster.local:80/ | grep -q "200"; do - echo "Waiting for supabase-realtime to be ready..."; - sleep 5; - done; - echo "supabase-realtime is ready. Running curl command..."; - if curl --insecure -L -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SUPABASE_SERVICE_KEY" -H "apiKey: $SUPABASE_SERVICE_KEY" -d '{"tenant":{"name":"supabase-realtime","external_id":"supabase-realtime","jwt_secret":"'"$JWT_SECRET"'","extensions":[{"type":"postgres_cdc_rls","settings":{"db_name":"postgres","db_host":"supabase-postgresql","db_user":"supabase_admin","db_password":"'"$POSTGRESQL_PASSWORD"'","db_port":"5432","region":"us-east-1","publication":"supabase_realtime","ssl_enforced":false,"poll_interval_ms":100,"poll_max_record_bytes":1048576}}],"notify_private_alpha":true}}' https://supabase-kong.leapfrogai.svc.cluster.local/realtime/v1/api/tenants; then - echo "Curl command completed successfully. Entering sleep loop..." - while true; do - sleep 3600 - echo "Sidecar still alive" - done - else - echo "Curl command failed. Exiting..." - exit 1 - fi + args: + - -ec + - | + realtime eval Realtime.Release.migrate && realtime eval Realtime.Release.seeds(Realtime.Repo) && realtime start rest: enabled: ###ZARF_VAR_ENABLE_REST### From 3882e156cd652cb7ac1105552357e6ab1d61ef11 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 09:32:46 -0700 Subject: [PATCH 036/100] Adds back necessary quotes --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 37501f7ab..5ef6dfd8d 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -85,7 +85,7 @@ realtime: args: - -ec - | - realtime eval Realtime.Release.migrate && realtime eval Realtime.Release.seeds(Realtime.Repo) && realtime start + realtime eval Realtime.Release.migrate && realtime eval 'Realtime.Release.seeds(Realtime.Repo)' && realtime start rest: enabled: ###ZARF_VAR_ENABLE_REST### From 5296c66d04fd11421eff4e01b2dab7cf07dc86e0 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 09:47:32 -0700 Subject: [PATCH 037/100] Adds migration to update the freshly seeded db --- ...20240808093300_v0.10.0_realtime_tenant.sql | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql diff --git a/packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql b/packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql new file mode 100644 index 000000000..680b55661 --- /dev/null +++ b/packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql @@ -0,0 +1,22 @@ +-- Disable the foreign key constraint +ALTER TABLE extensions +DROP CONSTRAINT extensions_tenant_external_id_fkey; + +-- Update the external_id and name for the realtime tenant +UPDATE tenants +SET external_id = 'supabase-realtime', + name = 'supabase-realtime' +WHERE external_id = 'realtime-dev' + AND name = 'realtime-dev'; + +-- Update the tenant_external_id for the realtime extension +UPDATE extensions +SET tenant_external_id = 'supabase-realtime' +WHERE tenant_external_id = 'realtime-dev'; + +-- Re-enable the foreign key constraint +ALTER TABLE extensions +ADD CONSTRAINT extensions_tenant_external_id_fkey +FOREIGN KEY (tenant_external_id) +REFERENCES tenants(external_id) +ON DELETE CASCADE; \ No newline at end of file From 673c07a3352c3d123050ce5c47302478dad2ed11 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 10:04:37 -0700 Subject: [PATCH 038/100] Randomly generates enc_secret instead of hard coding it --- packages/supabase/bitnami-values.yaml | 5 ++++- .../templates/suapbase-realtime-secret.yaml | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 packages/supabase/chart/templates/suapbase-realtime-secret.yaml diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 5ef6dfd8d..a1c1e1103 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -79,7 +79,10 @@ realtime: - name: APP_NAME value: "supabase-realtime" - name: DB_ENC_KEY - value: "testenckeychange" # This needs to be exactly 16 characters + valueFrom: + secretKeyRef: + name: supabase-realtime-extra + key: db-enc-key - name: DNS_NODES value: "supabase-realtime" args: diff --git a/packages/supabase/chart/templates/suapbase-realtime-secret.yaml b/packages/supabase/chart/templates/suapbase-realtime-secret.yaml new file mode 100644 index 000000000..613deb20a --- /dev/null +++ b/packages/supabase/chart/templates/suapbase-realtime-secret.yaml @@ -0,0 +1,18 @@ +{{- $dbEncKey := randAlphaNum 16 }} # This needs to be exactly 16 characters +{{- $existingSecret := (lookup "v1" "Secret" .Release.Namespace "supabase-realtime-extra") }} +apiVersion: v1 +kind: Secret +metadata: + name: supabase-realtime-extra + namespace: {{ .Release.Namespace }} + {{- if $existingSecret }} + annotations: + "helm.sh/resource-policy": keep + {{- end }} +type: Opaque +data: + {{- if $existingSecret }} + db-enc-key: {{ $existingSecret.data.db-enc-key }} + {{- else }} + db-enc-key: {{ $dbEncKey | b64enc | quote }} + {{- end }} \ No newline at end of file From 537239e63f23b6f235955a99086e9b84371f86eb Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 10:11:57 -0700 Subject: [PATCH 039/100] Replaces secret name to prevent deployment errors --- packages/supabase/bitnami-values.yaml | 2 +- .../templates/suapbase-realtime-secret.yaml | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index a1c1e1103..6a7446c33 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -82,7 +82,7 @@ realtime: valueFrom: secretKeyRef: name: supabase-realtime-extra - key: db-enc-key + key: dbEncKey - name: DNS_NODES value: "supabase-realtime" args: diff --git a/packages/supabase/chart/templates/suapbase-realtime-secret.yaml b/packages/supabase/chart/templates/suapbase-realtime-secret.yaml index 613deb20a..a762df845 100644 --- a/packages/supabase/chart/templates/suapbase-realtime-secret.yaml +++ b/packages/supabase/chart/templates/suapbase-realtime-secret.yaml @@ -3,16 +3,16 @@ apiVersion: v1 kind: Secret metadata: - name: supabase-realtime-extra - namespace: {{ .Release.Namespace }} - {{- if $existingSecret }} - annotations: - "helm.sh/resource-policy": keep - {{- end }} + name: supabase-realtime-extra + namespace: {{ .Release.Namespace }} + {{- if $existingSecret }} + annotations: + "helm.sh/resource-policy": keep + {{- end }} type: Opaque data: - {{- if $existingSecret }} - db-enc-key: {{ $existingSecret.data.db-enc-key }} - {{- else }} - db-enc-key: {{ $dbEncKey | b64enc | quote }} - {{- end }} \ No newline at end of file + {{- if $existingSecret }} + dbEncKey: {{ $existingSecret.data.dbEncKey }} + {{- else }} + dbEncKey: {{ $dbEncKey | b64enc | quote }} + {{- end }} \ No newline at end of file From 92c314fd39c892ae577f2fec3ad47c4911efa91f Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 14:28:34 -0700 Subject: [PATCH 040/100] Creates _realtime schema and sets up deployment to use it --- packages/supabase/bitnami-values.yaml | 2 ++ .../migrations/20240808083300_v0.10.0_realtime_schema.sql | 3 +++ 2 files changed, 5 insertions(+) create mode 100644 packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 6a7446c33..78f2d7bd0 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -78,6 +78,8 @@ realtime: extraEnvVars: - name: APP_NAME value: "supabase-realtime" + - name: DB_AFTER_CONNECT_QUERY + value: "SET search_path TO _realtime" - name: DB_ENC_KEY valueFrom: secretKeyRef: diff --git a/packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql b/packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql new file mode 100644 index 000000000..dd646a86c --- /dev/null +++ b/packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql @@ -0,0 +1,3 @@ +-- Create the _realtime schema if it doesn't exist +create schema if not exists _realtime; +alter schema _realtime owner to postgres; \ No newline at end of file From 5b055e575c497290cfcf47d555f03881b6b3efd3 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 14:34:53 -0700 Subject: [PATCH 041/100] Set the search path back to public --- packages/supabase/bitnami-values.yaml | 2 +- .../migrations/20240808083300_v0.10.0_realtime_schema.sql | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 78f2d7bd0..bb2f6ad6b 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -79,7 +79,7 @@ realtime: - name: APP_NAME value: "supabase-realtime" - name: DB_AFTER_CONNECT_QUERY - value: "SET search_path TO _realtime" + value: "SET search_path TO public" - name: DB_ENC_KEY valueFrom: secretKeyRef: diff --git a/packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql b/packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql deleted file mode 100644 index dd646a86c..000000000 --- a/packages/supabase/migrations/20240808083300_v0.10.0_realtime_schema.sql +++ /dev/null @@ -1,3 +0,0 @@ --- Create the _realtime schema if it doesn't exist -create schema if not exists _realtime; -alter schema _realtime owner to postgres; \ No newline at end of file From c04adf683a436371cb5615f22edf9214905ce348 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 14:37:15 -0700 Subject: [PATCH 042/100] Initialize _realtime table in after connect query --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index bb2f6ad6b..5bed2dedb 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -79,7 +79,7 @@ realtime: - name: APP_NAME value: "supabase-realtime" - name: DB_AFTER_CONNECT_QUERY - value: "SET search_path TO public" + value: "create schema if not exists _realtime; alter schema _realtime owner to postgres; SET search_path TO _realtime;" - name: DB_ENC_KEY valueFrom: secretKeyRef: From 87e8a522434bb27f8d90f8403229e53d58a78c24 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 14:50:54 -0700 Subject: [PATCH 043/100] Replaces multiple queries with a single query --- packages/supabase/bitnami-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 5bed2dedb..7a3428c66 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -79,7 +79,7 @@ realtime: - name: APP_NAME value: "supabase-realtime" - name: DB_AFTER_CONNECT_QUERY - value: "create schema if not exists _realtime; alter schema _realtime owner to postgres; SET search_path TO _realtime;" + value: "DO $body$ BEGIN CREATE SCHEMA IF NOT EXISTS _realtime; ALTER SCHEMA _realtime OWNER TO postgres; SET search_path TO _realtime; END $body$;" - name: DB_ENC_KEY valueFrom: secretKeyRef: From a5cbb4b79e5caa48e065ba8363f02d83ccfd0325 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 14:53:27 -0700 Subject: [PATCH 044/100] Updates the migration to represent the new location --- .../20240808093300_v0.10.0_realtime_tenant.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql b/packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql index 680b55661..27f58c83d 100644 --- a/packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql +++ b/packages/supabase/migrations/20240808093300_v0.10.0_realtime_tenant.sql @@ -1,22 +1,22 @@ -- Disable the foreign key constraint -ALTER TABLE extensions +ALTER TABLE _realtime.extensions DROP CONSTRAINT extensions_tenant_external_id_fkey; -- Update the external_id and name for the realtime tenant -UPDATE tenants +UPDATE _realtime.tenants SET external_id = 'supabase-realtime', name = 'supabase-realtime' WHERE external_id = 'realtime-dev' AND name = 'realtime-dev'; -- Update the tenant_external_id for the realtime extension -UPDATE extensions +UPDATE _realtime.extensions SET tenant_external_id = 'supabase-realtime' WHERE tenant_external_id = 'realtime-dev'; -- Re-enable the foreign key constraint -ALTER TABLE extensions +ALTER TABLE _realtime.extensions ADD CONSTRAINT extensions_tenant_external_id_fkey FOREIGN KEY (tenant_external_id) -REFERENCES tenants(external_id) +REFERENCES _realtime.tenants(external_id) ON DELETE CASCADE; \ No newline at end of file From bcc000d73c941245793cd779968d44270fa0c538 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 15:28:36 -0700 Subject: [PATCH 045/100] Swaps out postgres image for one with wal2json --- packages/supabase/bitnami-values.yaml | 6 ++++-- packages/supabase/zarf.yaml | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 7a3428c66..77b849b24 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -123,7 +123,8 @@ volumePermissions: resourcesPreset: "none" psqlImage: - tag: 15.6.1-debian-12-r2 + repository: betaboon/bitnami-postgresql-wal2json + tag: 15.7.0-debian-12-r19 kong: enabled: ###ZARF_VAR_ENABLE_KONG### @@ -188,7 +189,8 @@ kong: postgresql: enabled: ###ZARF_VAR_ENABLE_POSTGRES### image: - tag: 15.6.1-debian-12-r2 + repository: betaboon/bitnami-postgresql-wal2json + tag: 15.7.0-debian-12-r19 debug: true primary: resourcesPreset: "none" diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 094219fcd..6a5a1d48e 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -96,6 +96,7 @@ components: - docker.io/bitnami/supabase-storage:1.8.2-debian-12-r2 - docker.io/bitnami/supabase-studio:1.24.5-debian-12-r4 - docker.io/bitnami/kong:3.7.1-debian-12-r5 + - betaboon/bitnami-postgresql-wal2json:15.7.0-debian-12-r19 - name: supabase-post-process description: "Perform necessary post processing here" required: true From 3cc1bc11d83fa0c88a59273bb752d201e99cd793 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 15:35:02 -0700 Subject: [PATCH 046/100] Moves back to older postgres image --- packages/supabase/bitnami-values.yaml | 4 ++-- packages/supabase/zarf.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 77b849b24..05c843819 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -124,7 +124,7 @@ volumePermissions: psqlImage: repository: betaboon/bitnami-postgresql-wal2json - tag: 15.7.0-debian-12-r19 + tag: 15.6.0-debian-12-r20 kong: enabled: ###ZARF_VAR_ENABLE_KONG### @@ -190,7 +190,7 @@ postgresql: enabled: ###ZARF_VAR_ENABLE_POSTGRES### image: repository: betaboon/bitnami-postgresql-wal2json - tag: 15.7.0-debian-12-r19 + tag: 15.6.0-debian-12-r20 debug: true primary: resourcesPreset: "none" diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 6a5a1d48e..cfefd91d6 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -96,7 +96,7 @@ components: - docker.io/bitnami/supabase-storage:1.8.2-debian-12-r2 - docker.io/bitnami/supabase-studio:1.24.5-debian-12-r4 - docker.io/bitnami/kong:3.7.1-debian-12-r5 - - betaboon/bitnami-postgresql-wal2json:15.7.0-debian-12-r19 + - betaboon/bitnami-postgresql-wal2json:15.6.0-debian-12-r20 - name: supabase-post-process description: "Perform necessary post processing here" required: true From c57facf741d5d50ecc36b97127dcbcc8bf5ad0d9 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 15:42:38 -0700 Subject: [PATCH 047/100] Switch to official supabase postgres image --- packages/supabase/bitnami-values.yaml | 8 ++++---- packages/supabase/zarf.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 05c843819..d67ed97a1 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -123,8 +123,8 @@ volumePermissions: resourcesPreset: "none" psqlImage: - repository: betaboon/bitnami-postgresql-wal2json - tag: 15.6.0-debian-12-r20 + repository: supabase/postgres + tag: 15.6.1.106 kong: enabled: ###ZARF_VAR_ENABLE_KONG### @@ -189,8 +189,8 @@ kong: postgresql: enabled: ###ZARF_VAR_ENABLE_POSTGRES### image: - repository: betaboon/bitnami-postgresql-wal2json - tag: 15.6.0-debian-12-r20 + repository: supabase/postgres + tag: 15.6.1.106 debug: true primary: resourcesPreset: "none" diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index cfefd91d6..5439dbd39 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -96,7 +96,7 @@ components: - docker.io/bitnami/supabase-storage:1.8.2-debian-12-r2 - docker.io/bitnami/supabase-studio:1.24.5-debian-12-r4 - docker.io/bitnami/kong:3.7.1-debian-12-r5 - - betaboon/bitnami-postgresql-wal2json:15.6.0-debian-12-r20 + - supabase/postgres:15.6.1.106 - name: supabase-post-process description: "Perform necessary post processing here" required: true From 1c083e8446a74cd5edfb5b2f7bcdcc4c46f9ab3b Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 15:47:43 -0700 Subject: [PATCH 048/100] Returns to using a base postgres image --- packages/supabase/bitnami-values.yaml | 6 ++---- packages/supabase/zarf.yaml | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index d67ed97a1..7a3428c66 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -123,8 +123,7 @@ volumePermissions: resourcesPreset: "none" psqlImage: - repository: supabase/postgres - tag: 15.6.1.106 + tag: 15.6.1-debian-12-r2 kong: enabled: ###ZARF_VAR_ENABLE_KONG### @@ -189,8 +188,7 @@ kong: postgresql: enabled: ###ZARF_VAR_ENABLE_POSTGRES### image: - repository: supabase/postgres - tag: 15.6.1.106 + tag: 15.6.1-debian-12-r2 debug: true primary: resourcesPreset: "none" diff --git a/packages/supabase/zarf.yaml b/packages/supabase/zarf.yaml index 5439dbd39..094219fcd 100644 --- a/packages/supabase/zarf.yaml +++ b/packages/supabase/zarf.yaml @@ -96,7 +96,6 @@ components: - docker.io/bitnami/supabase-storage:1.8.2-debian-12-r2 - docker.io/bitnami/supabase-studio:1.24.5-debian-12-r4 - docker.io/bitnami/kong:3.7.1-debian-12-r5 - - supabase/postgres:15.6.1.106 - name: supabase-post-process description: "Perform necessary post processing here" required: true From 11888cbd7e4005210661e8b06df195954ab639d3 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 8 Aug 2024 16:18:43 -0700 Subject: [PATCH 049/100] Updates the config to set the wal_level to logical --- packages/supabase/bitnami-values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index 7a3428c66..2ae319903 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -191,6 +191,8 @@ postgresql: tag: 15.6.1-debian-12-r2 debug: true primary: + extendedConfiguration: | + wal_level = logical resourcesPreset: "none" podLabels: sidecar.istio.io/inject: "false" From 79e09a28961ac95a285723131a93a5f976c1e9c8 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 13 Aug 2024 11:37:44 -0700 Subject: [PATCH 050/100] Removes explicit install of requests dep and adds new realtime dep --- .github/workflows/e2e-playwright.yaml | 2 -- pyproject.toml | 3 ++- tests/e2e/test_supabase.py | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e-playwright.yaml b/.github/workflows/e2e-playwright.yaml index 671e4d910..0eea84eb0 100644 --- a/.github/workflows/e2e-playwright.yaml +++ b/.github/workflows/e2e-playwright.yaml @@ -83,12 +83,10 @@ jobs: - name: Test Supabase run: | - python -m pip install requests python -m pytest ./tests/e2e/test_supabase.py -v - name: Test API run: | - python -m pip install requests python -m pytest ./tests/e2e/test_api.py -v ########## diff --git a/pyproject.toml b/pyproject.toml index e794fec99..6e1ca65e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,8 @@ dev = [ "requests", "requests-toolbelt", "pytest", - "huggingface_hub[cli,hf_transfer]" + "huggingface_hub[cli,hf_transfer]", + "realtime >= 1.0.6" ] dev-whisper = [ diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index 467e075a0..cd1b5b934 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -1,4 +1,5 @@ import requests +from realtime.connection import Socket from .utils import ANON_KEY @@ -18,3 +19,16 @@ def test_studio(): print(f"Error: Request failed with status code {response.status_code}") print(e) exit(1) + + +def test_supabase_realtime_vector_store_indexing(): + def callback1(payload): + print("Callback 1: ", payload) + + URL = f"wss://supabase-kong.uds.dev/realtime/v1/?apikey={ANON_KEY}&vsn=1.0.0" + s = Socket(URL) + s.connect() + + channel_1 = s.set_channel("realtime:*") + channel_1.join().on("UPDATE", callback1) + s.listen() \ No newline at end of file From f6829260030a7fefb212317caa02fc5ba1f820c5 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 13 Aug 2024 11:39:25 -0700 Subject: [PATCH 051/100] Reverts removal of requests dependency in favor of doing it an a separate PR --- .github/workflows/e2e-playwright.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/e2e-playwright.yaml b/.github/workflows/e2e-playwright.yaml index 0eea84eb0..671e4d910 100644 --- a/.github/workflows/e2e-playwright.yaml +++ b/.github/workflows/e2e-playwright.yaml @@ -83,10 +83,12 @@ jobs: - name: Test Supabase run: | + python -m pip install requests python -m pytest ./tests/e2e/test_supabase.py -v - name: Test API run: | + python -m pip install requests python -m pytest ./tests/e2e/test_api.py -v ########## From 6a713ad60da509216826fa5852f2aa8cfba5d3b6 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 13 Aug 2024 12:21:22 -0700 Subject: [PATCH 052/100] Installs specific realtime version and switches how the tests listens for changes --- pyproject.toml | 2 +- tests/e2e/test_supabase.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6e1ca65e2..48249366b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dev = [ "requests-toolbelt", "pytest", "huggingface_hub[cli,hf_transfer]", - "realtime >= 1.0.6" + "realtime@git+https://github.com/supabase/realtime-py.git@7485e7ca6abc39bfabdc735daf7a8d6e18160121" # There has not yet been a full release with the v2 postgres functionality ] dev-whisper = [ diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index cd1b5b934..5d56d2452 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -1,5 +1,6 @@ import requests from realtime.connection import Socket +from realtime.channel import Channel from .utils import ANON_KEY @@ -22,13 +23,19 @@ def test_studio(): def test_supabase_realtime_vector_store_indexing(): - def callback1(payload): - print("Callback 1: ", payload) + def postgres_changes_callback(payload): + print("postgres_changes: ", payload) - URL = f"wss://supabase-kong.uds.dev/realtime/v1/?apikey={ANON_KEY}&vsn=1.0.0" - s = Socket(URL) + URL = f"https://supabase-kong.uds.dev/realtime/v1" + JWT = ANON_KEY + s = Socket(URL, JWT, auto_reconnect=True) s.connect() - channel_1 = s.set_channel("realtime:*") - channel_1.join().on("UPDATE", callback1) + channel_1: Channel = Channel(s, "postgres-vector-store-indexing-test") + channel_1.on_postgres_changes( + table="vector_store_file", + schema="public", + event="*", + callback=postgres_changes_callback, + ).subscribe() s.listen() \ No newline at end of file From 351488a244f8df5531433170fa3d8fd48ddf4729 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 13 Aug 2024 18:56:41 -0700 Subject: [PATCH 053/100] Adds complete test to check whether realtime is working --- .github/actions/lfai-core/action.yaml | 1 + pyproject.toml | 1 - tests/e2e/test_supabase.py | 166 ++++++++++++++++++++++---- tests/e2e/utils.py | 1 + 4 files changed, 146 insertions(+), 23 deletions(-) diff --git a/.github/actions/lfai-core/action.yaml b/.github/actions/lfai-core/action.yaml index 40807f8c3..b8695787b 100644 --- a/.github/actions/lfai-core/action.yaml +++ b/.github/actions/lfai-core/action.yaml @@ -18,6 +18,7 @@ runs: id: set-env-var run: | echo "ANON_KEY=$(uds zarf tools kubectl get secret supabase-bootstrap-jwt -n leapfrogai -o jsonpath='{.data.anon-key}' | base64 -d)" >> "$GITHUB_ENV" + echo "SERVICE_KEY=$(uds zarf tools kubectl get secret supabase-bootstrap-jwt -n leapfrogai -o jsonpath='{.data.service-key}' | base64 -d)" >> "$GITHUB_ENV" - name: Deploy LFAI-API shell: bash diff --git a/pyproject.toml b/pyproject.toml index 48249366b..cfcf3fc14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ dev = [ "requests-toolbelt", "pytest", "huggingface_hub[cli,hf_transfer]", - "realtime@git+https://github.com/supabase/realtime-py.git@7485e7ca6abc39bfabdc735daf7a8d6e18160121" # There has not yet been a full release with the v2 postgres functionality ] dev-whisper = [ diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index 5d56d2452..3a8eebad5 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -1,8 +1,26 @@ +import asyncio +import io +import threading +import uuid +from fastapi import UploadFile import requests -from realtime.connection import Socket -from realtime.channel import Channel +import time +from openai.types.beta.vector_stores import VectorStoreFile +from openai.types.beta import VectorStore +from openai.types.beta.vector_store import FileCounts +import _thread -from .utils import ANON_KEY +from supabase import AClient as AsyncClient, acreate_client +from realtime import Socket +from leapfrogai_api.data.crud_file_bucket import CRUDFileBucket +from leapfrogai_api.data.crud_file_object import CRUDFileObject +from leapfrogai_api.data.crud_vector_store import CRUDVectorStore + +from leapfrogai_api.data.crud_vector_store_file import CRUDVectorStoreFile + +from .utils import ANON_KEY, create_test_user, SERVICE_KEY +from supabase._sync.client import SyncClient +from openai.types import FileObject health_urls = { "auth_health_url": "http://supabase-kong.uds.dev/auth/v1/health", @@ -14,28 +32,132 @@ def test_studio(): try: for url_name in health_urls: - response = requests.get(health_urls[url_name], headers={"apikey": ANON_KEY}) - response.raise_for_status() + resp = requests.get(health_urls[url_name], headers={"apikey": ANON_KEY}) + resp.raise_for_status() except requests.exceptions.RequestException as e: - print(f"Error: Request failed with status code {response.status_code}") + print(f"Error: Request failed with status code {resp.status_code}") print(e) exit(1) - def test_supabase_realtime_vector_store_indexing(): + class TestCompleteException(Exception): + pass + + def timeout_handler(): + print("Test timed out after 10 seconds") + # This is necessary to stop the thread from hanging forever + _thread.interrupt_main() + + async def postgres_db_changes(): + client: AsyncClient = await acreate_client( + supabase_key=ANON_KEY, + supabase_url="https://supabase-kong.uds.dev", + ) + await client.auth.set_session( + access_token=access_token, refresh_token="dummy" + ) + + upload_file_id = await upload_file(client) + assert upload_file_id is not None, "Failed to upload file" + + vector_store = VectorStore( + id=str(uuid.uuid4()), + created_at=int(time.time()), + file_counts=FileCounts( + cancelled=0, + completed=0, + failed=0, + in_progress=0, + total=0, + ), + name="test_vector_store", + object="vector_store", + status="completed", + usage_bytes=0 + ) + + await CRUDVectorStore(client).create(vector_store) + + vector_store_file = VectorStoreFile( + id=upload_file_id, + vector_store_id=vector_store.id, + created_at=int(time.time()), + object="vector_store.file", + status="completed", + usage_bytes=0 + ) + + await CRUDVectorStoreFile(client).create(vector_store_file) + + def postgres_changes_callback(payload): - print("postgres_changes: ", payload) - - URL = f"https://supabase-kong.uds.dev/realtime/v1" - JWT = ANON_KEY - s = Socket(URL, JWT, auto_reconnect=True) - s.connect() - - channel_1: Channel = Channel(s, "postgres-vector-store-indexing-test") - channel_1.on_postgres_changes( - table="vector_store_file", - schema="public", - event="*", - callback=postgres_changes_callback, - ).subscribe() - s.listen() \ No newline at end of file + expected_record = { + 'object': 'vector_store.file', + 'status': 'completed', + 'usage_bytes': 0, + } + + all_records_match = all(payload.get('record', {}).get(key) == value + for key, value in expected_record.items()) + event_information_match = (payload.get('table') == 'vector_store_file' and payload.get('type') == 'INSERT') + + if (event_information_match and all_records_match): + raise TestCompleteException("Test completed successfully") + + async def upload_file(client: AsyncClient) -> str: + id_ = str(uuid.uuid4()) + + empty_file_object = FileObject( + id=id_, + bytes=0, + created_at=0, + filename="", + object="file", + purpose="assistants", + status="uploaded", + status_details=None, + ) + + crud_file_object = CRUDFileObject(client) + + file_object = await crud_file_object.create(object_=empty_file_object) + assert file_object is not None, "Failed to create file object" + + crud_file_bucket = CRUDFileBucket(db=client, model=UploadFile) + await crud_file_bucket.upload(file=UploadFile(filename="", file=io.BytesIO(b"")), id_=file_object.id) + return id_ + + def run_postgres_db_changes(): + asyncio.run(postgres_db_changes()) + + timeout_timer = None + try: + random_name = str(uuid.uuid4()) + access_token = create_test_user(email=f"{random_name}@fake.com") + + # Schedule postgres_db_changes to run after 5 seconds + threading.Timer(5.0, run_postgres_db_changes).start() + + # Set a timeout of 10 seconds + timeout_timer = threading.Timer(10.0, timeout_handler) + timeout_timer.start() + + # Listening socket + URL = f"wss://supabase-kong.uds.dev/realtime/v1/websocket?apikey={SERVICE_KEY}&vsn=1.0.0" + s = Socket(URL) + s.connect() + + # Set channel to listen for changes to the vector_store_file table + channel_1 = s.set_channel("realtime:public:vector_store_file") + # Listen for all events on the channel ex: INSERT, UPDATE, DELETE + channel_1.join().on("*", postgres_changes_callback) + + # Start listening + s.listen() + except TestCompleteException: + if timeout_timer is not None: + timeout_timer.cancel() # Cancel the timeout timer if test completes successfully + + assert True + except Exception: + assert False \ No newline at end of file diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py index e43002cd7..a51c398d4 100644 --- a/tests/e2e/utils.py +++ b/tests/e2e/utils.py @@ -6,6 +6,7 @@ # This is the anon_key for supabase, it provides access to the endpoints that would otherwise be inaccessible ANON_KEY = os.environ["ANON_KEY"] +SERVICE_KEY = os.environ["SERVICE_KEY"] DEFAULT_TEST_EMAIL = "fakeuser1@test.com" DEFAULT_TEST_PASSWORD = "password" From d084c72d0507d2ef0acf4e4de38fd7b52a30a5c2 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 13 Aug 2024 18:57:47 -0700 Subject: [PATCH 054/100] Ruff linting --- tests/e2e/test_supabase.py | 39 +++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index 3a8eebad5..b1990ac97 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -39,6 +39,7 @@ def test_studio(): print(e) exit(1) + def test_supabase_realtime_vector_store_indexing(): class TestCompleteException(Exception): pass @@ -53,9 +54,7 @@ async def postgres_db_changes(): supabase_key=ANON_KEY, supabase_url="https://supabase-kong.uds.dev", ) - await client.auth.set_session( - access_token=access_token, refresh_token="dummy" - ) + await client.auth.set_session(access_token=access_token, refresh_token="dummy") upload_file_id = await upload_file(client) assert upload_file_id is not None, "Failed to upload file" @@ -73,35 +72,39 @@ async def postgres_db_changes(): name="test_vector_store", object="vector_store", status="completed", - usage_bytes=0 + usage_bytes=0, ) await CRUDVectorStore(client).create(vector_store) vector_store_file = VectorStoreFile( - id=upload_file_id, + id=upload_file_id, vector_store_id=vector_store.id, created_at=int(time.time()), object="vector_store.file", status="completed", - usage_bytes=0 + usage_bytes=0, ) await CRUDVectorStoreFile(client).create(vector_store_file) - def postgres_changes_callback(payload): expected_record = { - 'object': 'vector_store.file', - 'status': 'completed', - 'usage_bytes': 0, + "object": "vector_store.file", + "status": "completed", + "usage_bytes": 0, } - all_records_match = all(payload.get('record', {}).get(key) == value - for key, value in expected_record.items()) - event_information_match = (payload.get('table') == 'vector_store_file' and payload.get('type') == 'INSERT') + all_records_match = all( + payload.get("record", {}).get(key) == value + for key, value in expected_record.items() + ) + event_information_match = ( + payload.get("table") == "vector_store_file" + and payload.get("type") == "INSERT" + ) - if (event_information_match and all_records_match): + if event_information_match and all_records_match: raise TestCompleteException("Test completed successfully") async def upload_file(client: AsyncClient) -> str: @@ -124,7 +127,9 @@ async def upload_file(client: AsyncClient) -> str: assert file_object is not None, "Failed to create file object" crud_file_bucket = CRUDFileBucket(db=client, model=UploadFile) - await crud_file_bucket.upload(file=UploadFile(filename="", file=io.BytesIO(b"")), id_=file_object.id) + await crud_file_bucket.upload( + file=UploadFile(filename="", file=io.BytesIO(b"")), id_=file_object.id + ) return id_ def run_postgres_db_changes(): @@ -137,7 +142,7 @@ def run_postgres_db_changes(): # Schedule postgres_db_changes to run after 5 seconds threading.Timer(5.0, run_postgres_db_changes).start() - + # Set a timeout of 10 seconds timeout_timer = threading.Timer(10.0, timeout_handler) timeout_timer.start() @@ -160,4 +165,4 @@ def run_postgres_db_changes(): assert True except Exception: - assert False \ No newline at end of file + assert False From 3d57f6d1ef04cd73ac4d2ac80240bd629f1a2b91 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 14 Aug 2024 09:10:30 -0700 Subject: [PATCH 055/100] Adds fastapi to dev deps list --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index cfcf3fc14..a7e3a6fee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dev = [ "requests-toolbelt", "pytest", "huggingface_hub[cli,hf_transfer]", + "fastapi" ] dev-whisper = [ From 8532be52ac245ac8ce69a6428f48377011a974df Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 14 Aug 2024 10:25:45 -0700 Subject: [PATCH 056/100] Fixes linting issue --- tests/e2e/test_supabase.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index b1990ac97..f8274b018 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -19,7 +19,6 @@ from leapfrogai_api.data.crud_vector_store_file import CRUDVectorStoreFile from .utils import ANON_KEY, create_test_user, SERVICE_KEY -from supabase._sync.client import SyncClient from openai.types import FileObject health_urls = { From cfba262d646ca723a3f3fc6beca4f595512ea13f Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 14 Aug 2024 10:58:22 -0700 Subject: [PATCH 057/100] Updates comments --- tests/e2e/test_supabase.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index f8274b018..db1ac84ad 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -49,6 +49,9 @@ def timeout_handler(): _thread.interrupt_main() async def postgres_db_changes(): + """ + This function is responsible for creating a vector store and uploading a file to it. + """ client: AsyncClient = await acreate_client( supabase_key=ANON_KEY, supabase_url="https://supabase-kong.uds.dev", @@ -88,6 +91,9 @@ async def postgres_db_changes(): await CRUDVectorStoreFile(client).create(vector_store_file) def postgres_changes_callback(payload): + """ + This function is responsible for listening for changes to the vector store file and signaling success if the file triggers realtime successfully. + """ expected_record = { "object": "vector_store.file", "status": "completed", @@ -107,6 +113,9 @@ def postgres_changes_callback(payload): raise TestCompleteException("Test completed successfully") async def upload_file(client: AsyncClient) -> str: + """ + This function is responsible for uploading a file to the file bucket. + """ id_ = str(uuid.uuid4()) empty_file_object = FileObject( @@ -132,6 +141,9 @@ async def upload_file(client: AsyncClient) -> str: return id_ def run_postgres_db_changes(): + """ + This function is responsible for running the postgres_db_changes function. + """ asyncio.run(postgres_db_changes()) timeout_timer = None From 33ef8facb18042ec97d4c1dafa8f83adc1e7e628 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 14 Aug 2024 11:16:09 -0700 Subject: [PATCH 058/100] Adds missing dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a7e3a6fee..a5c29cc1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,8 @@ dev = [ "requests-toolbelt", "pytest", "huggingface_hub[cli,hf_transfer]", - "fastapi" + "fastapi", + "supabase" ] dev-whisper = [ From b1955cd058e52f76e60b75c2d7c8d4bda8b5a249 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 22 Aug 2024 10:08:52 -0700 Subject: [PATCH 059/100] Pin supabase version at root to 2.6.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6a9160b92..75837bcad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dev = [ "pytest", "huggingface_hub[cli,hf_transfer]", "fastapi", - "supabase" + "supabase == 2.6.0" ] dev-whisper = ["ctranslate2 == 4.1.0", "transformers[torch] == 4.39.3"] From 568b4c936b1c7e2330062fb6fbedd4dfe8ca63f5 Mon Sep 17 00:00:00 2001 From: gharvey Date: Thu, 22 Aug 2024 14:03:51 -0700 Subject: [PATCH 060/100] Add background task for processing vectors --- .../routers/openai/vector_stores.py | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index 765d07429..5ba2e7147 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -2,11 +2,13 @@ import logging import traceback +import time -from fastapi import APIRouter, HTTPException, status +from fastapi import APIRouter, HTTPException, status, BackgroundTasks from openai.pagination import SyncCursorPage from openai.types.beta import VectorStore, VectorStoreDeleted from openai.types.beta.vector_stores import VectorStoreFile, VectorStoreFileDeleted +from openai.types.beta.vector_store import FileCounts from leapfrogai_api.backend.rag.index import IndexingService from leapfrogai_api.backend.types import ( CreateVectorStoreFileRequest, @@ -44,19 +46,47 @@ async def list_vector_stores( async def create_vector_store( request: CreateVectorStoreRequest, session: Session, + background_tasks: BackgroundTasks, ) -> VectorStore: """Create a vector store.""" indexing_service = IndexingService(db=session) try: - new_vector_store = await indexing_service.create_new_vector_store(request) + current_time = int(time.time()) + # Create a placeholder vector store + placeholder_vector_store = VectorStore( + id="placeholder_id", + name=request.name or "", + status="in_progress", + object="vector_store", + created_at=current_time, + last_active_at=current_time, + file_counts=FileCounts( + cancelled=0, + completed=0, + failed=0, + in_progress=0, + total=0 + ), + usage_bytes=0, + metadata=request.metadata if hasattr(request, 'metadata') else None, + expires_after=None, + expires_at=None + ) + + # Add the actual creation task to background tasks + background_tasks.add_task( + indexing_service.create_new_vector_store, + request + ) + + return placeholder_vector_store except Exception as exc: traceback.print_exc() raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Unable to create vector store", ) from exc - return new_vector_store @router.post("/{vector_store_id}") @@ -198,4 +228,4 @@ async def delete_vector_store_file( id=file_id, object="vector_store.file.deleted", deleted=deleted, - ) + ) \ No newline at end of file From 2d6c566ccc089505981b04db5c0638ac9a7c9fc8 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 23 Aug 2024 14:02:44 -0700 Subject: [PATCH 061/100] Create usable placeholder and refactor creation logic into indexing file --- src/leapfrogai_api/backend/rag/index.py | 78 ++++++++++++------- .../routers/openai/vector_stores.py | 32 +------- 2 files changed, 53 insertions(+), 57 deletions(-) diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index ce4ade400..de9b0ff8b 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -3,7 +3,7 @@ import logging import tempfile import time -from fastapi import HTTPException, UploadFile, status +from fastapi import HTTPException, UploadFile, status, BackgroundTasks from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from openai.types.beta.vector_store import FileCounts, VectorStore @@ -154,55 +154,77 @@ async def index_files( return responses async def create_new_vector_store( - self, request: CreateVectorStoreRequest + self, request: CreateVectorStoreRequest, background_tasks: BackgroundTasks ) -> VectorStore: """Create a new vector store given a set of file ids""" crud_vector_store = CRUDVectorStore(db=self.db) - last_active_at = int(time.time()) - - expires_after, expires_at = request.get_expiry(last_active_at) + current_time = int(time.time()) + expires_after, expires_at = request.get_expiry(current_time) try: - vector_store = VectorStore( + # Create a placeholder vector store + placeholder_vector_store = VectorStore( id="", # Leave blank to have Postgres generate a UUID - usage_bytes=0, # Automatically calculated by DB + name=request.name or "", + status=VectorStoreStatus.IN_PROGRESS.value, + object="vector_store", created_at=0, # Leave blank to have Postgres generate a timestamp + last_active_at=current_time, file_counts=FileCounts( - cancelled=0, completed=0, failed=0, in_progress=0, total=0 + cancelled=0, + completed=0, + failed=0, + in_progress=0, + total=0 ), - last_active_at=last_active_at, # Set to current time - metadata=request.metadata, - name=request.name or "", - object="vector_store", - status=VectorStoreStatus.IN_PROGRESS.value, + usage_bytes=0, + metadata=request.metadata if hasattr(request, 'metadata') else None, expires_after=expires_after, - expires_at=expires_at, + expires_at=expires_at ) - new_vector_store = await crud_vector_store.create(object_=vector_store) - if request.file_ids != []: - responses = await self.index_files( - new_vector_store.id, request.file_ids - ) - - for response in responses: - await self._increment_vector_store_file_status( - new_vector_store, response - ) + # Save the placeholder to the database + saved_placeholder = await crud_vector_store.create(object_=placeholder_vector_store) - new_vector_store.status = VectorStoreStatus.COMPLETED.value + if saved_placeholder is None: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Unable to create vector store", + ) - return await crud_vector_store.update( - id_=new_vector_store.id, - object_=new_vector_store, + # Add the actual creation task to background tasks + background_tasks.add_task( + self._complete_vector_store_creation, + saved_placeholder.id, + request ) + + return saved_placeholder except Exception as exc: + logging.error(exc) raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Unable to parse vector store request", ) from exc + async def _complete_vector_store_creation( + self, vector_store_id: str, request: CreateVectorStoreRequest + ): + """Complete the vector store creation process in the background.""" + crud_vector_store = CRUDVectorStore(db=self.db) + vector_store = await crud_vector_store.get(filters=FilterVectorStore(id=vector_store_id)) + + if request.file_ids: + responses = await self.index_files(vector_store_id, request.file_ids) + for response in responses: + await self._increment_vector_store_file_status(vector_store, response) + + vector_store.status = VectorStoreStatus.COMPLETED.value + vector_store.last_active_at = int(time.time()) + + await crud_vector_store.update(id_=vector_store_id, object_=vector_store) + async def modify_existing_vector_store( self, vector_store_id: str, diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index 5ba2e7147..8348d9e6e 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -15,6 +15,7 @@ CreateVectorStoreRequest, ListVectorStoresResponse, ModifyVectorStoreRequest, + VectorStoreStatus, ) from leapfrogai_api.data.crud_vector_content import CRUDVectorContent from leapfrogai_api.data.crud_vector_store import CRUDVectorStore, FilterVectorStore @@ -52,35 +53,8 @@ async def create_vector_store( indexing_service = IndexingService(db=session) try: - current_time = int(time.time()) - # Create a placeholder vector store - placeholder_vector_store = VectorStore( - id="placeholder_id", - name=request.name or "", - status="in_progress", - object="vector_store", - created_at=current_time, - last_active_at=current_time, - file_counts=FileCounts( - cancelled=0, - completed=0, - failed=0, - in_progress=0, - total=0 - ), - usage_bytes=0, - metadata=request.metadata if hasattr(request, 'metadata') else None, - expires_after=None, - expires_at=None - ) - - # Add the actual creation task to background tasks - background_tasks.add_task( - indexing_service.create_new_vector_store, - request - ) - - return placeholder_vector_store + vector_store = await indexing_service.create_new_vector_store(request, background_tasks) + return vector_store except Exception as exc: traceback.print_exc() raise HTTPException( From 58e81931563e6c1b1a0fa3eab18a45c939efd16f Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 23 Aug 2024 14:50:05 -0700 Subject: [PATCH 062/100] Ruff linting --- src/leapfrogai_api/backend/rag/index.py | 22 +++++++++---------- .../routers/openai/vector_stores.py | 6 +++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index de9b0ff8b..1105905d7 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -172,20 +172,18 @@ async def create_new_vector_store( created_at=0, # Leave blank to have Postgres generate a timestamp last_active_at=current_time, file_counts=FileCounts( - cancelled=0, - completed=0, - failed=0, - in_progress=0, - total=0 + cancelled=0, completed=0, failed=0, in_progress=0, total=0 ), usage_bytes=0, - metadata=request.metadata if hasattr(request, 'metadata') else None, + metadata=request.metadata if hasattr(request, "metadata") else None, expires_after=expires_after, - expires_at=expires_at + expires_at=expires_at, ) # Save the placeholder to the database - saved_placeholder = await crud_vector_store.create(object_=placeholder_vector_store) + saved_placeholder = await crud_vector_store.create( + object_=placeholder_vector_store + ) if saved_placeholder is None: raise HTTPException( @@ -195,9 +193,7 @@ async def create_new_vector_store( # Add the actual creation task to background tasks background_tasks.add_task( - self._complete_vector_store_creation, - saved_placeholder.id, - request + self._complete_vector_store_creation, saved_placeholder.id, request ) return saved_placeholder @@ -213,7 +209,9 @@ async def _complete_vector_store_creation( ): """Complete the vector store creation process in the background.""" crud_vector_store = CRUDVectorStore(db=self.db) - vector_store = await crud_vector_store.get(filters=FilterVectorStore(id=vector_store_id)) + vector_store = await crud_vector_store.get( + filters=FilterVectorStore(id=vector_store_id) + ) if request.file_ids: responses = await self.index_files(vector_store_id, request.file_ids) diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index 8348d9e6e..9f6a0a3ec 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -53,7 +53,9 @@ async def create_vector_store( indexing_service = IndexingService(db=session) try: - vector_store = await indexing_service.create_new_vector_store(request, background_tasks) + vector_store = await indexing_service.create_new_vector_store( + request, background_tasks + ) return vector_store except Exception as exc: traceback.print_exc() @@ -202,4 +204,4 @@ async def delete_vector_store_file( id=file_id, object="vector_store.file.deleted", deleted=deleted, - ) \ No newline at end of file + ) From b1d91c43e3dc7fdfbb9d6b97e123bc79f4910f37 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 23 Aug 2024 14:54:09 -0700 Subject: [PATCH 063/100] Updates modify vector store to use background task when files are provided --- src/leapfrogai_api/backend/rag/index.py | 65 ++++++++++++------- .../routers/openai/vector_stores.py | 11 ++-- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index 1105905d7..299409163 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -227,6 +227,7 @@ async def modify_existing_vector_store( self, vector_store_id: str, request: ModifyVectorStoreRequest, + background_tasks: BackgroundTasks, ) -> VectorStore: """Modify an existing vector store given its id.""" crud_vector_store = CRUDVectorStore(db=self.db) @@ -256,36 +257,27 @@ async def modify_existing_vector_store( expires_at=old_vector_store.expires_at, ) - await crud_vector_store.update( + # Update the vector store with the new information + updated_vector_store = await crud_vector_store.update( id_=vector_store_id, object_=new_vector_store, - ) # Sets status to in_progress for the duration of this function + ) - if request.file_ids: - responses = await self.index_files( - new_vector_store.id, request.file_ids + if updated_vector_store is None: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Unable to modify vector store", ) - for response in responses: - await self._increment_vector_store_file_status( - new_vector_store, response - ) - - new_vector_store.status = VectorStoreStatus.COMPLETED.value - - last_active_at = int(time.time()) - new_vector_store.last_active_at = ( - last_active_at # Update after indexing files - ) - expires_after, expires_at = request.get_expiry(last_active_at) - if expires_at and expires_at: - new_vector_store.expires_after = expires_after - new_vector_store.expires_at = expires_at + # Add the file indexing task to background tasks + if request.file_ids: + background_tasks.add_task( + self._complete_vector_store_modification, + vector_store_id, + request, + ) - return await crud_vector_store.update( - id_=vector_store_id, - object_=new_vector_store, - ) + return updated_vector_store except Exception as exc: logging.error(exc) raise HTTPException( @@ -293,6 +285,31 @@ async def modify_existing_vector_store( detail="Unable to parse vector store request", ) from exc + async def _complete_vector_store_modification( + self, vector_store_id: str, request: ModifyVectorStoreRequest + ): + """Complete the vector store modification process in the background.""" + crud_vector_store = CRUDVectorStore(db=self.db) + vector_store = await crud_vector_store.get( + filters=FilterVectorStore(id=vector_store_id) + ) + + if request.file_ids: + responses = await self.index_files(vector_store_id, request.file_ids) + for response in responses: + await self._increment_vector_store_file_status(vector_store, response) + + vector_store.status = VectorStoreStatus.COMPLETED.value + last_active_at = int(time.time()) + vector_store.last_active_at = last_active_at + + expires_after, expires_at = request.get_expiry(last_active_at) + if expires_after and expires_at: + vector_store.expires_after = expires_after + vector_store.expires_at = expires_at + + await crud_vector_store.update(id_=vector_store_id, object_=vector_store) + async def file_ids_are_valid(self, file_ids: str | list[str]) -> bool: """Check if the provided file ids exist""" crud_file_object = CRUDFileObject(db=self.db) diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index 9f6a0a3ec..a9e732702 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -53,9 +53,7 @@ async def create_vector_store( indexing_service = IndexingService(db=session) try: - vector_store = await indexing_service.create_new_vector_store( - request, background_tasks - ) + vector_store = await indexing_service.create_new_vector_store(request, background_tasks) return vector_store except Exception as exc: traceback.print_exc() @@ -70,13 +68,16 @@ async def modify_vector_store( vector_store_id: str, request: ModifyVectorStoreRequest, session: Session, + background_tasks: BackgroundTasks, ) -> VectorStore: """Modify a vector store.""" indexing_service = IndexingService(db=session) try: modified_vector_store = await indexing_service.modify_existing_vector_store( - vector_store_id=vector_store_id, request=request + vector_store_id=vector_store_id, + request=request, + background_tasks=background_tasks, ) except HTTPException as exc: raise exc @@ -204,4 +205,4 @@ async def delete_vector_store_file( id=file_id, object="vector_store.file.deleted", deleted=deleted, - ) + ) \ No newline at end of file From 20dca4a4781115fc551ba85faf7cf9a77069fb44 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 23 Aug 2024 14:54:21 -0700 Subject: [PATCH 064/100] Ruff linting --- src/leapfrogai_api/routers/openai/vector_stores.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index a9e732702..c5ea9998c 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -53,7 +53,9 @@ async def create_vector_store( indexing_service = IndexingService(db=session) try: - vector_store = await indexing_service.create_new_vector_store(request, background_tasks) + vector_store = await indexing_service.create_new_vector_store( + request, background_tasks + ) return vector_store except Exception as exc: traceback.print_exc() @@ -205,4 +207,4 @@ async def delete_vector_store_file( id=file_id, object="vector_store.file.deleted", deleted=deleted, - ) \ No newline at end of file + ) From e0c472adaa50b83fd842d0688a9ae3ba9470ff7c Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 23 Aug 2024 14:56:39 -0700 Subject: [PATCH 065/100] Ruff linting --- src/leapfrogai_api/routers/openai/vector_stores.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index c5ea9998c..d5f1bb5e9 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -2,20 +2,17 @@ import logging import traceback -import time from fastapi import APIRouter, HTTPException, status, BackgroundTasks from openai.pagination import SyncCursorPage from openai.types.beta import VectorStore, VectorStoreDeleted from openai.types.beta.vector_stores import VectorStoreFile, VectorStoreFileDeleted -from openai.types.beta.vector_store import FileCounts from leapfrogai_api.backend.rag.index import IndexingService from leapfrogai_api.backend.types import ( CreateVectorStoreFileRequest, CreateVectorStoreRequest, ListVectorStoresResponse, ModifyVectorStoreRequest, - VectorStoreStatus, ) from leapfrogai_api.data.crud_vector_content import CRUDVectorContent from leapfrogai_api.data.crud_vector_store import CRUDVectorStore, FilterVectorStore From 00e6e61ec0d7a12d6578565f7b585eba81286818 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 10:29:59 -0700 Subject: [PATCH 066/100] Removes hard dependency on FastAPI for indexing, cleans up comments --- src/leapfrogai_api/backend/rag/index.py | 54 ++++++++++++------- .../routers/openai/vector_stores.py | 8 ++- 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index 299409163..87a67851a 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -89,7 +89,7 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil ), object="vector_store.file", status=VectorStoreFileStatus.FAILED.value, - usage_bytes=0, + usage_bytes=0, # Leave blank to have Postgres generate a UUID vector_store_id=vector_store_id, ) return await crud_vector_store_file.create(object_=vector_store_file) @@ -100,7 +100,7 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil last_error=None, object="vector_store.file", status=VectorStoreFileStatus.IN_PROGRESS.value, - usage_bytes=0, + usage_bytes=0, # Leave blank to have Postgres generate a UUID vector_store_id=vector_store_id, ) @@ -154,7 +154,9 @@ async def index_files( return responses async def create_new_vector_store( - self, request: CreateVectorStoreRequest, background_tasks: BackgroundTasks + self, + request: CreateVectorStoreRequest, + background_tasks: BackgroundTasks | None = None, ) -> VectorStore: """Create a new vector store given a set of file ids""" crud_vector_store = CRUDVectorStore(db=self.db) @@ -174,8 +176,8 @@ async def create_new_vector_store( file_counts=FileCounts( cancelled=0, completed=0, failed=0, in_progress=0, total=0 ), - usage_bytes=0, - metadata=request.metadata if hasattr(request, "metadata") else None, + usage_bytes=0, # Leave blank to have Postgres generate a UUID + metadata=request.metadata, expires_after=expires_after, expires_at=expires_at, ) @@ -191,10 +193,18 @@ async def create_new_vector_store( detail="Unable to create vector store", ) - # Add the actual creation task to background tasks - background_tasks.add_task( - self._complete_vector_store_creation, saved_placeholder.id, request - ) + # Split the files, convert the chunks into vectors, and insert them into the db + if background_tasks: + # Perform the indexing in the background + background_tasks.add_task( + self._complete_vector_store_creation, + saved_placeholder.id, + request, + ) + else: + await self._complete_vector_store_creation( + saved_placeholder.id, request + ) return saved_placeholder except Exception as exc: @@ -213,7 +223,7 @@ async def _complete_vector_store_creation( filters=FilterVectorStore(id=vector_store_id) ) - if request.file_ids: + if request.file_ids != []: responses = await self.index_files(vector_store_id, request.file_ids) for response in responses: await self._increment_vector_store_file_status(vector_store, response) @@ -227,7 +237,7 @@ async def modify_existing_vector_store( self, vector_store_id: str, request: ModifyVectorStoreRequest, - background_tasks: BackgroundTasks, + background_tasks: BackgroundTasks | None = None, ) -> VectorStore: """Modify an existing vector store given its id.""" crud_vector_store = CRUDVectorStore(db=self.db) @@ -257,7 +267,7 @@ async def modify_existing_vector_store( expires_at=old_vector_store.expires_at, ) - # Update the vector store with the new information + # Update the vector store with the new information and set status to in_progress for the duration of this function updated_vector_store = await crud_vector_store.update( id_=vector_store_id, object_=new_vector_store, @@ -269,13 +279,19 @@ async def modify_existing_vector_store( detail="Unable to modify vector store", ) - # Add the file indexing task to background tasks + # Split the files, convert the chunks into vectors, and insert them into the db if request.file_ids: - background_tasks.add_task( - self._complete_vector_store_modification, - vector_store_id, - request, - ) + if background_tasks: + # Perform the indexing in the background + background_tasks.add_task( + self._complete_vector_store_modification, + vector_store_id, + request, + ) + else: + await self._complete_vector_store_modification( + vector_store_id, request + ) return updated_vector_store except Exception as exc: @@ -301,7 +317,7 @@ async def _complete_vector_store_modification( vector_store.status = VectorStoreStatus.COMPLETED.value last_active_at = int(time.time()) - vector_store.last_active_at = last_active_at + vector_store.last_active_at = last_active_at # Update after indexing files expires_after, expires_at = request.get_expiry(last_active_at) if expires_after and expires_at: diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index d5f1bb5e9..6a2590153 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -50,10 +50,9 @@ async def create_vector_store( indexing_service = IndexingService(db=session) try: - vector_store = await indexing_service.create_new_vector_store( - request, background_tasks + return await indexing_service.create_new_vector_store( + request, background_tasks=background_tasks ) - return vector_store except Exception as exc: traceback.print_exc() raise HTTPException( @@ -73,7 +72,7 @@ async def modify_vector_store( indexing_service = IndexingService(db=session) try: - modified_vector_store = await indexing_service.modify_existing_vector_store( + return await indexing_service.modify_existing_vector_store( vector_store_id=vector_store_id, request=request, background_tasks=background_tasks, @@ -86,7 +85,6 @@ async def modify_vector_store( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Unable to modify vector store", ) from exc - return modified_vector_store @router.get("/{vector_store_id}") From d8b949c2d106b2f952008f2c60597fee1a7742b6 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 11:27:30 -0700 Subject: [PATCH 067/100] Adds initial test --- tests/integration/api/test_vector_stores.py | 81 ++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index 4a939cb21..4b8bd6cb8 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -11,7 +11,10 @@ from openai.types.beta import VectorStore, VectorStoreDeleted from openai.types.beta.vector_store import ExpiresAfter from langchain_core.embeddings.fake import FakeEmbeddings - +from leapfrogai_api.backend.types import CreateVectorStoreRequest +from leapfrogai_api.routers.openai.requests.create_modify_assistant_request import CreateAssistantRequest +from leapfrogai_api.routers.openai.threads import router as threads_router +from leapfrogai_api.routers.openai.assistants import router as assistants_router import leapfrogai_api.backend.rag.index from leapfrogai_api.backend.types import ( CreateVectorStoreRequest, @@ -45,6 +48,8 @@ class MissingEnvironmentVariable(Exception): vector_store_client = TestClient(vector_store_router, headers=headers) files_client = TestClient(files_router, headers=headers) +assistants_client = TestClient(assistants_router, headers=headers) +threads_client = TestClient(threads_router, headers=headers) # Read in file for use with vector store files @@ -290,3 +295,77 @@ def test_cleanup_file(create_file): assert FileDeleted.model_validate( cleanup_response.json() ), "Should return a FileDeleted object." + + +def test_run_with_background_task(create_file): + """Test creating a run while the vector store is still being processed in the background.""" + # Create a vector store with files + request = CreateVectorStoreRequest( + file_ids=[create_file["id"]], + name="test_background", + expires_after=ExpiresAfter(anchor="last_active_at", days=10), + metadata={}, + ) + + vector_store_response = vector_store_client.post( + "/openai/v1/vector_stores", json=request.model_dump() + ) + assert vector_store_response.status_code == status.HTTP_200_OK + vector_store_id = vector_store_response.json()["id"] + + # Create an assistant with the vector store + assistant_request = CreateAssistantRequest( + model="llama-cpp-python", + name="Test Assistant", + instructions="You are a helpful assistant with access to a knowledge base.", + tools=[{"type": "file_search"}], + tool_resources={"file_search": {"vector_store_ids": [vector_store_id]}}, + ) + + assistant_response = assistants_client.post( + "/openai/v1/assistants", json=assistant_request.model_dump() + ) + assert assistant_response.status_code == status.HTTP_200_OK + assistant_id = assistant_response.json()["id"] + + # Create a thread + thread_response = threads_client.post("/openai/v1/threads", json={}) + assert thread_response.status_code == status.HTTP_200_OK + thread_id = thread_response.json()["id"] + + # Create a message in the thread + message_request = { + "role": "user", + "content": "What information can you provide about the content in the vector store?", + } + message_response = threads_client.post( + f"/openai/v1/threads/{thread_id}/messages", json=message_request + ) + assert message_response.status_code == status.HTTP_200_OK + + # Create a run + run_request = { + "assistant_id": assistant_id, + "instructions": "Please use the file_search tool to find relevant information.", + } + run_response = threads_client.post( + f"/openai/v1/threads/{thread_id}/runs", json=run_request + ) + assert run_response.status_code == status.HTTP_200_OK + + # Retrieve the assistant's message + messages_response = threads_client.get(f"/openai/v1/threads/{thread_id}/messages") + assert messages_response.status_code == status.HTTP_200_OK + messages = messages_response.json()["data"] + assert len(messages) > 1, "No response message from the assistant" + assistant_message = messages[0]["content"][0]["text"]["value"] + + # Check that the assistant's response contains relevant information + assert len(assistant_message) > 0, "Assistant's response is empty" + assert "vector store" in assistant_message.lower(), "Assistant's response doesn't mention the vector store" + + # Clean up + delete_assistant_response = assistants_client.delete(f"/openai/v1/assistants/{assistant_id}") + assert delete_assistant_response.status_code == status.HTTP_200_OK + delete_vector_store_response = vector_store_client.delete(f"/openai/v1/vector_stores/{vector_store_id}") + assert delete_vector_store_response.status_code == status.HTTP_200_OK \ No newline at end of file From bfb71edab6e78fc74c8f894328dcee1929dab699 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 14:32:38 -0700 Subject: [PATCH 068/100] Removes unnecessary import --- tests/integration/api/test_vector_stores.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index 4b8bd6cb8..d25977c7e 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -11,15 +11,11 @@ from openai.types.beta import VectorStore, VectorStoreDeleted from openai.types.beta.vector_store import ExpiresAfter from langchain_core.embeddings.fake import FakeEmbeddings -from leapfrogai_api.backend.types import CreateVectorStoreRequest +from leapfrogai_api.backend.types import (CreateVectorStoreRequest, ModifyVectorStoreRequest) from leapfrogai_api.routers.openai.requests.create_modify_assistant_request import CreateAssistantRequest from leapfrogai_api.routers.openai.threads import router as threads_router from leapfrogai_api.routers.openai.assistants import router as assistants_router import leapfrogai_api.backend.rag.index -from leapfrogai_api.backend.types import ( - CreateVectorStoreRequest, - ModifyVectorStoreRequest, -) from leapfrogai_api.routers.openai.vector_stores import router as vector_store_router from leapfrogai_api.routers.openai.files import router as files_router From fd2a2c38ef75fe35e14deefdba1c61ea0bad6fab Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 14:33:06 -0700 Subject: [PATCH 069/100] Ruff linting --- tests/integration/api/test_vector_stores.py | 23 +++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index d25977c7e..f6da81287 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -11,8 +11,13 @@ from openai.types.beta import VectorStore, VectorStoreDeleted from openai.types.beta.vector_store import ExpiresAfter from langchain_core.embeddings.fake import FakeEmbeddings -from leapfrogai_api.backend.types import (CreateVectorStoreRequest, ModifyVectorStoreRequest) -from leapfrogai_api.routers.openai.requests.create_modify_assistant_request import CreateAssistantRequest +from leapfrogai_api.backend.types import ( + CreateVectorStoreRequest, + ModifyVectorStoreRequest, +) +from leapfrogai_api.routers.openai.requests.create_modify_assistant_request import ( + CreateAssistantRequest, +) from leapfrogai_api.routers.openai.threads import router as threads_router from leapfrogai_api.routers.openai.assistants import router as assistants_router import leapfrogai_api.backend.rag.index @@ -358,10 +363,16 @@ def test_run_with_background_task(create_file): # Check that the assistant's response contains relevant information assert len(assistant_message) > 0, "Assistant's response is empty" - assert "vector store" in assistant_message.lower(), "Assistant's response doesn't mention the vector store" + assert ( + "vector store" in assistant_message.lower() + ), "Assistant's response doesn't mention the vector store" # Clean up - delete_assistant_response = assistants_client.delete(f"/openai/v1/assistants/{assistant_id}") + delete_assistant_response = assistants_client.delete( + f"/openai/v1/assistants/{assistant_id}" + ) assert delete_assistant_response.status_code == status.HTTP_200_OK - delete_vector_store_response = vector_store_client.delete(f"/openai/v1/vector_stores/{vector_store_id}") - assert delete_vector_store_response.status_code == status.HTTP_200_OK \ No newline at end of file + delete_vector_store_response = vector_store_client.delete( + f"/openai/v1/vector_stores/{vector_store_id}" + ) + assert delete_vector_store_response.status_code == status.HTTP_200_OK From 32fa43e7101ce129bb2262c25af85d6a07d878fb Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 15:08:13 -0700 Subject: [PATCH 070/100] Moves deletion test t o end of test_vector_stores --- tests/integration/api/test_vector_stores.py | 99 ++++++++++----------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index f6da81287..a544c3461 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -248,56 +248,6 @@ def test_get_modified_expired(): assert get_modified_response.json() is None -def test_delete(): - """Test deleting a vector store. Requires a running Supabase instance.""" - vector_store_id = vector_store_response.json()["id"] - delete_response = vector_store_client.delete( - f"/openai/v1/vector_stores/{vector_store_id}" - ) - assert delete_response.status_code == status.HTTP_200_OK - assert VectorStoreDeleted.model_validate( - delete_response.json() - ), "Should return a VectorStoreDeleted object." - assert delete_response.json()["deleted"] is True, "Should be able to delete." - - -def test_delete_twice(): - """Test deleting a vector store twice. Requires a running Supabase instance.""" - vector_store_id = vector_store_response.json()["id"] - delete_response = vector_store_client.delete( - f"/openai/v1/vector_stores/{vector_store_id}" - ) - assert delete_response.status_code == status.HTTP_200_OK - assert VectorStoreDeleted.model_validate( - delete_response.json() - ), "Should return a VectorStoreDeleted object." - assert ( - delete_response.json()["deleted"] is False - ), "Should not be able to delete twice." - - -def test_get_nonexistent(): - """Test getting a nonexistent vector store. Requires a running Supabase instance.""" - vector_store_id = vector_store_response.json()["id"] - get_response = vector_store_client.get( - f"/openai/v1/vector_stores/{vector_store_id}" - ) - assert get_response.status_code == status.HTTP_200_OK - assert ( - get_response.json() is None - ), f"Get should not return deleted VectorStore {vector_store_id}." - - -def test_cleanup_file(create_file): - """Test cleaning up the file created for the vector store. Requires a running Supabase instance.""" - file_id = create_file["id"] - cleanup_response = files_client.delete(f"/openai/v1/files/{file_id}") - assert cleanup_response.status_code == status.HTTP_200_OK - assert FileDeleted.model_validate( - cleanup_response.json() - ), "Should return a FileDeleted object." - - def test_run_with_background_task(create_file): """Test creating a run while the vector store is still being processed in the background.""" # Create a vector store with files @@ -376,3 +326,52 @@ def test_run_with_background_task(create_file): f"/openai/v1/vector_stores/{vector_store_id}" ) assert delete_vector_store_response.status_code == status.HTTP_200_OK + +def test_delete(): + """Test deleting a vector store. Requires a running Supabase instance.""" + vector_store_id = vector_store_response.json()["id"] + delete_response = vector_store_client.delete( + f"/openai/v1/vector_stores/{vector_store_id}" + ) + assert delete_response.status_code == status.HTTP_200_OK + assert VectorStoreDeleted.model_validate( + delete_response.json() + ), "Should return a VectorStoreDeleted object." + assert delete_response.json()["deleted"] is True, "Should be able to delete." + + +def test_delete_twice(): + """Test deleting a vector store twice. Requires a running Supabase instance.""" + vector_store_id = vector_store_response.json()["id"] + delete_response = vector_store_client.delete( + f"/openai/v1/vector_stores/{vector_store_id}" + ) + assert delete_response.status_code == status.HTTP_200_OK + assert VectorStoreDeleted.model_validate( + delete_response.json() + ), "Should return a VectorStoreDeleted object." + assert ( + delete_response.json()["deleted"] is False + ), "Should not be able to delete twice." + + +def test_get_nonexistent(): + """Test getting a nonexistent vector store. Requires a running Supabase instance.""" + vector_store_id = vector_store_response.json()["id"] + get_response = vector_store_client.get( + f"/openai/v1/vector_stores/{vector_store_id}" + ) + assert get_response.status_code == status.HTTP_200_OK + assert ( + get_response.json() is None + ), f"Get should not return deleted VectorStore {vector_store_id}." + + +def test_cleanup_file(create_file): + """Test cleaning up the file created for the vector store. Requires a running Supabase instance.""" + file_id = create_file["id"] + cleanup_response = files_client.delete(f"/openai/v1/files/{file_id}") + assert cleanup_response.status_code == status.HTTP_200_OK + assert FileDeleted.model_validate( + cleanup_response.json() + ), "Should return a FileDeleted object." \ No newline at end of file From 8e6e403d54bbf293994c5a1d8314cb319fb45666 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 15:15:37 -0700 Subject: [PATCH 071/100] Updates content payload --- tests/integration/api/test_vector_stores.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index a544c3461..9acd46be5 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -287,8 +287,17 @@ def test_run_with_background_task(create_file): # Create a message in the thread message_request = { "role": "user", - "content": "What information can you provide about the content in the vector store?", + "content": [ + { + "text": { + "annotations": [], + "value": "What information can you provide about the content in the vector store?", + }, + "type": "text", + }, + ], } + message_response = threads_client.post( f"/openai/v1/threads/{thread_id}/messages", json=message_request ) From a0d80445f196142ebb2573c48f7af8ce6956ec5d Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 15:18:15 -0700 Subject: [PATCH 072/100] Add messages client to test --- tests/integration/api/test_vector_stores.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index 9acd46be5..6e97cc273 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -19,6 +19,7 @@ CreateAssistantRequest, ) from leapfrogai_api.routers.openai.threads import router as threads_router +from leapfrogai_api.routers.openai.messages import router as messages_router from leapfrogai_api.routers.openai.assistants import router as assistants_router import leapfrogai_api.backend.rag.index from leapfrogai_api.routers.openai.vector_stores import router as vector_store_router @@ -51,7 +52,7 @@ class MissingEnvironmentVariable(Exception): files_client = TestClient(files_router, headers=headers) assistants_client = TestClient(assistants_router, headers=headers) threads_client = TestClient(threads_router, headers=headers) - +messages_client = TestClient(messages_router, headers=headers) # Read in file for use with vector store files @pytest.fixture(scope="session", autouse=True) @@ -298,7 +299,7 @@ def test_run_with_background_task(create_file): ], } - message_response = threads_client.post( + message_response = messages_client.post( f"/openai/v1/threads/{thread_id}/messages", json=message_request ) assert message_response.status_code == status.HTTP_200_OK From ae554b2d3874b1a1223495e6cd7fda0e9429d7e0 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 15:20:59 -0700 Subject: [PATCH 073/100] Replaces more incorrect clients --- tests/integration/api/test_vector_stores.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index 6e97cc273..5f4ccd474 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -21,6 +21,7 @@ from leapfrogai_api.routers.openai.threads import router as threads_router from leapfrogai_api.routers.openai.messages import router as messages_router from leapfrogai_api.routers.openai.assistants import router as assistants_router +from leapfrogai_api.routers.openai.runs import router as runs_router import leapfrogai_api.backend.rag.index from leapfrogai_api.routers.openai.vector_stores import router as vector_store_router from leapfrogai_api.routers.openai.files import router as files_router @@ -53,6 +54,7 @@ class MissingEnvironmentVariable(Exception): assistants_client = TestClient(assistants_router, headers=headers) threads_client = TestClient(threads_router, headers=headers) messages_client = TestClient(messages_router, headers=headers) +runs_client = TestClient(runs_router, headers=headers) # Read in file for use with vector store files @pytest.fixture(scope="session", autouse=True) @@ -309,13 +311,13 @@ def test_run_with_background_task(create_file): "assistant_id": assistant_id, "instructions": "Please use the file_search tool to find relevant information.", } - run_response = threads_client.post( + run_response = runs_client.post( f"/openai/v1/threads/{thread_id}/runs", json=run_request ) assert run_response.status_code == status.HTTP_200_OK # Retrieve the assistant's message - messages_response = threads_client.get(f"/openai/v1/threads/{thread_id}/messages") + messages_response = messages_client.get(f"/openai/v1/threads/{thread_id}/messages") assert messages_response.status_code == status.HTTP_200_OK messages = messages_response.json()["data"] assert len(messages) > 1, "No response message from the assistant" From f38f2cfe354761ac26d04fe09d5e15655f081f5a Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 15:26:05 -0700 Subject: [PATCH 074/100] Switch from llama-cpp-python to test-chat --- tests/integration/api/test_vector_stores.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index 5f4ccd474..2db627779 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -269,7 +269,7 @@ def test_run_with_background_task(create_file): # Create an assistant with the vector store assistant_request = CreateAssistantRequest( - model="llama-cpp-python", + model="test-chat", name="Test Assistant", instructions="You are a helpful assistant with access to a knowledge base.", tools=[{"type": "file_search"}], From 605a06c49e24ecbe87ef004a848fa402d76502da Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 15:43:34 -0700 Subject: [PATCH 075/100] Moves vector stores e2e test into test_api.py --- tests/e2e/test_api.py | 60 ++++++++++++- tests/integration/api/test_vector_stores.py | 96 +-------------------- 2 files changed, 62 insertions(+), 94 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index b556954e0..616e0a4a4 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -4,9 +4,13 @@ import pytest as pytest import requests - +from openai import OpenAI from .utils import create_test_user +client = OpenAI( + base_url="https://leapfrogai-api.uds.dev/openai/v1", api_key=create_test_user() +) + logger = logging.getLogger(__name__) test_id = str(uuid.uuid4()) @@ -111,3 +115,57 @@ def test_api_row_level_security(): verify_request(get_urls, "get", jwt_token, False) verify_request(post_urls, "post", jwt_token, False) verify_request(delete_urls, "delete", jwt_token, False) + + +def test_run_with_background_task(): + # Create a vector store + vector_store = client.beta.vector_stores.create( + name="test_background", + file_ids=["file-id"], # Replace with actual file ID + ) + assert vector_store.id is not None + + # Create an assistant + assistant = client.beta.assistants.create( + model="test-chat", + name="Test Assistant", + instructions="You are a helpful assistant with access to a knowledge base.", + tools=[{"type": "file_search"}], + tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}, + ) + assert assistant.id is not None + + # Create a thread + thread = client.beta.threads.create() + assert thread.id is not None + + # Add a message to the thread + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content="What information can you provide about the content in the vector store?", + ) + assert message.id is not None + + # Create a run + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant.id, + instructions="Please use the file_search tool to find relevant information.", + ) + assert run.id is not None + + # Retrieve the assistant's message + messages = client.beta.threads.messages.list(thread_id=thread.id) + assert len(messages.data) > 1, "No response message from the assistant" + assistant_message = messages.data[0].content[0].text.value + + # Check that the assistant's response contains relevant information + assert len(assistant_message) > 0, "Assistant's response is empty" + assert ( + "vector store" in assistant_message.lower() + ), "Assistant's response doesn't mention the vector store" + + # Clean up + client.beta.assistants.delete(assistant_id=assistant.id) + client.beta.vector_stores.delete(vector_store_id=vector_store.id) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index 2db627779..b95d30142 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -15,9 +15,6 @@ CreateVectorStoreRequest, ModifyVectorStoreRequest, ) -from leapfrogai_api.routers.openai.requests.create_modify_assistant_request import ( - CreateAssistantRequest, -) from leapfrogai_api.routers.openai.threads import router as threads_router from leapfrogai_api.routers.openai.messages import router as messages_router from leapfrogai_api.routers.openai.assistants import router as assistants_router @@ -56,6 +53,7 @@ class MissingEnvironmentVariable(Exception): messages_client = TestClient(messages_router, headers=headers) runs_client = TestClient(runs_router, headers=headers) + # Read in file for use with vector store files @pytest.fixture(scope="session", autouse=True) def read_testfile(): @@ -76,7 +74,7 @@ def create_file(read_testfile): # pylint: disable=redefined-outer-name, unused- file_response = files_client.post( "/openai/v1/files", - files={"file": ("test.txt", read_testfile, "text/plain")}, + files={"file": ("test.txt", read_testfile, "text/`pla`in")}, data={"purpose": "assistants"}, ) @@ -251,94 +249,6 @@ def test_get_modified_expired(): assert get_modified_response.json() is None -def test_run_with_background_task(create_file): - """Test creating a run while the vector store is still being processed in the background.""" - # Create a vector store with files - request = CreateVectorStoreRequest( - file_ids=[create_file["id"]], - name="test_background", - expires_after=ExpiresAfter(anchor="last_active_at", days=10), - metadata={}, - ) - - vector_store_response = vector_store_client.post( - "/openai/v1/vector_stores", json=request.model_dump() - ) - assert vector_store_response.status_code == status.HTTP_200_OK - vector_store_id = vector_store_response.json()["id"] - - # Create an assistant with the vector store - assistant_request = CreateAssistantRequest( - model="test-chat", - name="Test Assistant", - instructions="You are a helpful assistant with access to a knowledge base.", - tools=[{"type": "file_search"}], - tool_resources={"file_search": {"vector_store_ids": [vector_store_id]}}, - ) - - assistant_response = assistants_client.post( - "/openai/v1/assistants", json=assistant_request.model_dump() - ) - assert assistant_response.status_code == status.HTTP_200_OK - assistant_id = assistant_response.json()["id"] - - # Create a thread - thread_response = threads_client.post("/openai/v1/threads", json={}) - assert thread_response.status_code == status.HTTP_200_OK - thread_id = thread_response.json()["id"] - - # Create a message in the thread - message_request = { - "role": "user", - "content": [ - { - "text": { - "annotations": [], - "value": "What information can you provide about the content in the vector store?", - }, - "type": "text", - }, - ], - } - - message_response = messages_client.post( - f"/openai/v1/threads/{thread_id}/messages", json=message_request - ) - assert message_response.status_code == status.HTTP_200_OK - - # Create a run - run_request = { - "assistant_id": assistant_id, - "instructions": "Please use the file_search tool to find relevant information.", - } - run_response = runs_client.post( - f"/openai/v1/threads/{thread_id}/runs", json=run_request - ) - assert run_response.status_code == status.HTTP_200_OK - - # Retrieve the assistant's message - messages_response = messages_client.get(f"/openai/v1/threads/{thread_id}/messages") - assert messages_response.status_code == status.HTTP_200_OK - messages = messages_response.json()["data"] - assert len(messages) > 1, "No response message from the assistant" - assistant_message = messages[0]["content"][0]["text"]["value"] - - # Check that the assistant's response contains relevant information - assert len(assistant_message) > 0, "Assistant's response is empty" - assert ( - "vector store" in assistant_message.lower() - ), "Assistant's response doesn't mention the vector store" - - # Clean up - delete_assistant_response = assistants_client.delete( - f"/openai/v1/assistants/{assistant_id}" - ) - assert delete_assistant_response.status_code == status.HTTP_200_OK - delete_vector_store_response = vector_store_client.delete( - f"/openai/v1/vector_stores/{vector_store_id}" - ) - assert delete_vector_store_response.status_code == status.HTTP_200_OK - def test_delete(): """Test deleting a vector store. Requires a running Supabase instance.""" vector_store_id = vector_store_response.json()["id"] @@ -386,4 +296,4 @@ def test_cleanup_file(create_file): assert cleanup_response.status_code == status.HTTP_200_OK assert FileDeleted.model_validate( cleanup_response.json() - ), "Should return a FileDeleted object." \ No newline at end of file + ), "Should return a FileDeleted object." From 8011c229a985653a7bb034c22546f50765977075 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 15:53:10 -0700 Subject: [PATCH 076/100] Reverts test_vector_stores.py to match main --- tests/integration/api/test_vector_stores.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tests/integration/api/test_vector_stores.py b/tests/integration/api/test_vector_stores.py index b95d30142..4a939cb21 100644 --- a/tests/integration/api/test_vector_stores.py +++ b/tests/integration/api/test_vector_stores.py @@ -11,15 +11,12 @@ from openai.types.beta import VectorStore, VectorStoreDeleted from openai.types.beta.vector_store import ExpiresAfter from langchain_core.embeddings.fake import FakeEmbeddings + +import leapfrogai_api.backend.rag.index from leapfrogai_api.backend.types import ( CreateVectorStoreRequest, ModifyVectorStoreRequest, ) -from leapfrogai_api.routers.openai.threads import router as threads_router -from leapfrogai_api.routers.openai.messages import router as messages_router -from leapfrogai_api.routers.openai.assistants import router as assistants_router -from leapfrogai_api.routers.openai.runs import router as runs_router -import leapfrogai_api.backend.rag.index from leapfrogai_api.routers.openai.vector_stores import router as vector_store_router from leapfrogai_api.routers.openai.files import router as files_router @@ -48,10 +45,6 @@ class MissingEnvironmentVariable(Exception): vector_store_client = TestClient(vector_store_router, headers=headers) files_client = TestClient(files_router, headers=headers) -assistants_client = TestClient(assistants_router, headers=headers) -threads_client = TestClient(threads_router, headers=headers) -messages_client = TestClient(messages_router, headers=headers) -runs_client = TestClient(runs_router, headers=headers) # Read in file for use with vector store files @@ -74,7 +67,7 @@ def create_file(read_testfile): # pylint: disable=redefined-outer-name, unused- file_response = files_client.post( "/openai/v1/files", - files={"file": ("test.txt", read_testfile, "text/`pla`in")}, + files={"file": ("test.txt", read_testfile, "text/plain")}, data={"purpose": "assistants"}, ) From c2b1c05c0f07f69829e9598f152aadba9d0e41ea Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 17:09:39 -0700 Subject: [PATCH 077/100] Makes test handle concurrency better, allow for re-using creds --- tests/e2e/test_api.py | 160 ++++++++++++++++++++++++++++++++++-------- tests/e2e/utils.py | 22 +++--- 2 files changed, 144 insertions(+), 38 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index 616e0a4a4..685609882 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -1,10 +1,14 @@ import io import logging +import time import uuid - -import pytest as pytest +import tempfile import requests +import pytest from openai import OpenAI +from openai.types.beta.vector_store import VectorStore + +from leapfrogai_api.backend.types import VectorStoreStatus from .utils import create_test_user client = OpenAI( @@ -117,55 +121,153 @@ def test_api_row_level_security(): verify_request(delete_urls, "delete", jwt_token, False) +def download_arxiv_pdf(): + url = "https://arxiv.org/pdf/2305.16291.pdf" + response = requests.get(url) + if response.status_code == 200: + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + temp_file.write(response.content) + return temp_file.name + else: + raise Exception( + f"Failed to download PDF from ArXiv. Status code: {response.status_code}" + ) + + def test_run_with_background_task(): + """ + This test confirms whether a vector store for an assistant can index files + while chatting at the same time. + """ + print("Starting test_run_with_background_task") + + # Download the ArXiv PDF + pdf_path = download_arxiv_pdf() + print(f"Downloaded ArXiv PDF to: {pdf_path}") + + # Upload the PDF file + with open(pdf_path, "rb") as file: + file_upload = client.files.create(file=file, purpose="assistants") + assert file_upload.id is not None + print(f"Uploaded PDF file with ID: {file_upload.id}") + # Create a vector store - vector_store = client.beta.vector_stores.create( + vector_store: VectorStore = client.beta.vector_stores.create( name="test_background", - file_ids=["file-id"], # Replace with actual file ID + file_ids=[file_upload.id], ) assert vector_store.id is not None + print(f"Created vector store with ID: {vector_store.id}") + + # Check initial status + assert vector_store.status == VectorStoreStatus.IN_PROGRESS.value + print(f"Initial vector store status: {vector_store.status}") # Create an assistant assistant = client.beta.assistants.create( - model="test-chat", + model="vllm", name="Test Assistant", - instructions="You are a helpful assistant with access to a knowledge base.", + instructions="You are a helpful assistant with access to a knowledge base about AI and machine learning.", tools=[{"type": "file_search"}], tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}, ) assert assistant.id is not None + print(f"Created assistant with ID: {assistant.id}") # Create a thread thread = client.beta.threads.create() assert thread.id is not None + print(f"Created thread with ID: {thread.id}") - # Add a message to the thread - message = client.beta.threads.messages.create( - thread_id=thread.id, - role="user", - content="What information can you provide about the content in the vector store?", - ) - assert message.id is not None + # Function to check vector store status + def check_vector_store_status(): + nonlocal vector_store + vector_store = client.beta.vector_stores.retrieve( + vector_store_id=vector_store.id + ) + return vector_store.status - # Create a run - run = client.beta.threads.runs.create( - thread_id=thread.id, - assistant_id=assistant.id, - instructions="Please use the file_search tool to find relevant information.", - ) - assert run.id is not None + # Perform multiple runs while indexing is in progress + num_runs = 5 + responses = [] + + for i in range(num_runs): + # Check if indexing is still in progress + current_status = check_vector_store_status() + print(f"Run {i + 1}: Current vector store status: {current_status}") + if current_status == VectorStoreStatus.COMPLETED.value: + pytest.fail( + f"Vector store indexing completed without concurrently running multiple runs and ended after only {i} run(s)" + ) + + # Add a message to the thread + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=f"Run {i + 1}: What is the main topic of the paper in the vector store?", + ) + assert message.id is not None + print(f"Run {i + 1}: Added message to thread") + + # Create a run + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant.id, + instructions="Please use the file_search tool to find relevant information from the uploaded ArXiv paper.", + ) + assert run.id is not None + print(f"Run {i + 1}: Created run with ID: {run.id}") + + # Retrieve the assistant's message + messages = client.beta.threads.messages.list(thread_id=thread.id) + assert ( + len(messages.data) > i + 1 + ), f"No response message from the assistant for run {i + 1}" + assistant_message = messages.data[0].content[0].text.value + responses.append(assistant_message) + print(f"Run {i + 1}: Received assistant's response") - # Retrieve the assistant's message - messages = client.beta.threads.messages.list(thread_id=thread.id) - assert len(messages.data) > 1, "No response message from the assistant" - assistant_message = messages.data[0].content[0].text.value + print(f"Completed run {i + 1}") - # Check that the assistant's response contains relevant information - assert len(assistant_message) > 0, "Assistant's response is empty" - assert ( - "vector store" in assistant_message.lower() - ), "Assistant's response doesn't mention the vector store" + # Check if indexing is still in progress + current_status = check_vector_store_status() + if current_status == VectorStoreStatus.COMPLETED.value: + print(f"Vector store indexing completed after {i + 1} run(s)") + pytest.fail( + f"Vector store indexing completed without concurrently running multiple runs and ended after only {i} run(s)" + ) + + # Wait for indexing to complete if it hasn't already + max_wait_time = 60 # seconds + start_time = time.time() + while check_vector_store_status() != VectorStoreStatus.COMPLETED.value: + if time.time() - start_time > max_wait_time: + pytest.fail( + "Vector store indexing did not complete within the expected time" + ) + time.sleep(2) + print( + f"Waiting for indexing to complete... Current status: {check_vector_store_status()}" + ) + + # Verify final vector store status + assert vector_store.status == VectorStoreStatus.COMPLETED.value + print(f"Final vector store status: {vector_store.status}") + + # Check that at least one of the assistant's responses contains relevant information + assert any( + len(response) > 0 for response in responses + ), "All assistant responses are empty" + assert any( + "arxiv" in response.lower() or "paper" in response.lower() + for response in responses + ), "None of the assistant's responses mention the ArXiv paper" + print("Verified assistant responses") # Clean up client.beta.assistants.delete(assistant_id=assistant.id) client.beta.vector_stores.delete(vector_store_id=vector_store.id) + client.files.delete(file_id=file_upload.id) + print("Cleaned up resources") + + print("test_run_with_background_task completed successfully") diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py index e43002cd7..a344c9e4a 100644 --- a/tests/e2e/utils.py +++ b/tests/e2e/utils.py @@ -22,15 +22,19 @@ def create_test_user( "Content-Type": "application/json", } - requests.post( - url="https://supabase-kong.uds.dev/auth/v1/signup", - headers=headers, - json={ - "email": email, - "password": password, - "confirmPassword": password, - }, - ) + try: + requests.post( + url="https://supabase-kong.uds.dev/auth/v1/signup", + headers=headers, + json={ + "email": email, + "password": password, + "confirmPassword": password, + }, + ) + except Exception: + # If the user already exists, we can ignore the error + pass return get_jwt_token(anon_key, email, password) From fc38a8ef40337ac540b1ce9348986b0ff04f08ee Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 17:29:25 -0700 Subject: [PATCH 078/100] Updates text-embeddings to run on GPU --- packages/text-embeddings/main.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py index 0ad8ce824..3fc699c1a 100644 --- a/packages/text-embeddings/main.py +++ b/packages/text-embeddings/main.py @@ -11,13 +11,17 @@ serve, ) +GPU_ENABLED = ( + False if os.environ.get("GPU_ENABLED", "False").lower() != "true" else True +) + model_dir = os.environ.get("LFAI_MODEL_PATH", ".model") -model = INSTRUCTOR(model_dir) +model = INSTRUCTOR(model_dir, device="gpu" if GPU_ENABLED else "cpu") class InstructorEmbedding: async def CreateEmbedding(self, request: EmbeddingRequest, context: GrpcContext): - embeddings = model.encode(request.inputs) + embeddings = model.encode(sentences=request.inputs, show_progress_bar=True) embeddings = [Embedding(embedding=inner_list) for inner_list in embeddings] return EmbeddingResponse(embeddings=embeddings) From c5c580fd95fe29072c40a9b5044e38e28dd044c3 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 17:45:08 -0700 Subject: [PATCH 079/100] Updates text-embeddings to run on GPU --- packages/text-embeddings/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py index 3fc699c1a..5937ad21e 100644 --- a/packages/text-embeddings/main.py +++ b/packages/text-embeddings/main.py @@ -21,7 +21,10 @@ class InstructorEmbedding: async def CreateEmbedding(self, request: EmbeddingRequest, context: GrpcContext): - embeddings = model.encode(sentences=request.inputs, show_progress_bar=True) + # Run the CPU-intensive encoding in a separate thread + embeddings = await asyncio.to_thread( + model.encode, sentences=request.inputs, show_progress_bar=True + ) embeddings = [Embedding(embedding=inner_list) for inner_list in embeddings] return EmbeddingResponse(embeddings=embeddings) From d949f748642edecfb34b049c54ade99dae69e753 Mon Sep 17 00:00:00 2001 From: gharvey Date: Mon, 26 Aug 2024 17:48:07 -0700 Subject: [PATCH 080/100] Replaces gpu with cuda --- packages/text-embeddings/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py index 5937ad21e..db9044036 100644 --- a/packages/text-embeddings/main.py +++ b/packages/text-embeddings/main.py @@ -16,7 +16,7 @@ ) model_dir = os.environ.get("LFAI_MODEL_PATH", ".model") -model = INSTRUCTOR(model_dir, device="gpu" if GPU_ENABLED else "cpu") +model = INSTRUCTOR(model_dir, device="cuda" if GPU_ENABLED else "cpu") class InstructorEmbedding: From 03186f1a6ac570a734cc042b3a1b6894bb32a568 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 11:14:00 -0700 Subject: [PATCH 081/100] Removes code specifying the gpu device --- packages/text-embeddings/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py index db9044036..f135f60b6 100644 --- a/packages/text-embeddings/main.py +++ b/packages/text-embeddings/main.py @@ -11,12 +11,8 @@ serve, ) -GPU_ENABLED = ( - False if os.environ.get("GPU_ENABLED", "False").lower() != "true" else True -) - model_dir = os.environ.get("LFAI_MODEL_PATH", ".model") -model = INSTRUCTOR(model_dir, device="cuda" if GPU_ENABLED else "cpu") +model = INSTRUCTOR(model_dir) class InstructorEmbedding: From 44f5578898a67564d5e64bc438aa48fb7fd01849 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 11:22:57 -0700 Subject: [PATCH 082/100] Bumps wait time up to 10 min --- tests/e2e/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index 685609882..0685c921b 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -238,7 +238,7 @@ def check_vector_store_status(): ) # Wait for indexing to complete if it hasn't already - max_wait_time = 60 # seconds + max_wait_time = 60 * 10 # 10 minutes in seconds start_time = time.time() while check_vector_store_status() != VectorStoreStatus.COMPLETED.value: if time.time() - start_time > max_wait_time: From e7cbc2daf7041053de08be4625eec4ffede80d1f Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 11:47:17 -0700 Subject: [PATCH 083/100] Reduces wait time down to 3 min and swaps out pdf --- tests/e2e/test_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index 0685c921b..f95e9d2d0 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -122,7 +122,7 @@ def test_api_row_level_security(): def download_arxiv_pdf(): - url = "https://arxiv.org/pdf/2305.16291.pdf" + url = "https://arxiv.org/pdf/1706.03762.pdf" response = requests.get(url) if response.status_code == 200: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: @@ -238,10 +238,10 @@ def check_vector_store_status(): ) # Wait for indexing to complete if it hasn't already - max_wait_time = 60 * 10 # 10 minutes in seconds + max_wait_time_in_seconds = 60 * 3 # 3 minutes start_time = time.time() while check_vector_store_status() != VectorStoreStatus.COMPLETED.value: - if time.time() - start_time > max_wait_time: + if time.time() - start_time > max_wait_time_in_seconds: pytest.fail( "Vector store indexing did not complete within the expected time" ) From 143933c7706c26cf32754c5ac851e326bec105cc Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 12:03:32 -0700 Subject: [PATCH 084/100] Adds fastapi to project root for test --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index ca78057fb..b70c1cbb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dev = [ "requests-toolbelt", "pytest", "huggingface_hub[cli,hf_transfer]", + "fastapi", ] dev-whisper = ["ctranslate2 == 4.1.0", "transformers[torch] == 4.39.3"] From dbeadc6e08627e4baa81fc5f508952eeb899feff Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 12:45:04 -0700 Subject: [PATCH 085/100] Moves test into new e2e that has multiple backends --- .github/workflows/e2e-playwright.yaml | 2 +- .../workflows/e2e-text-backend-full-cpu.yaml | 101 +++++++++++ tests/e2e/test_api.py | 156 ----------------- tests/e2e/test_text_backend_full.py | 165 ++++++++++++++++++ 4 files changed, 267 insertions(+), 157 deletions(-) create mode 100644 .github/workflows/e2e-text-backend-full-cpu.yaml create mode 100644 tests/e2e/test_text_backend_full.py diff --git a/.github/workflows/e2e-playwright.yaml b/.github/workflows/e2e-playwright.yaml index 7d9eb3234..f7370d4a9 100644 --- a/.github/workflows/e2e-playwright.yaml +++ b/.github/workflows/e2e-playwright.yaml @@ -1,4 +1,4 @@ -# End-to-end testing that deploys and tests Supabase, API, UI, and VLLM +# End-to-end testing that deploys and tests Supabase, API, and UI name: e2e-playwright on: diff --git a/.github/workflows/e2e-text-backend-full-cpu.yaml b/.github/workflows/e2e-text-backend-full-cpu.yaml new file mode 100644 index 000000000..6e8507ae3 --- /dev/null +++ b/.github/workflows/e2e-text-backend-full-cpu.yaml @@ -0,0 +1,101 @@ +# End-to-end testing that deploys and tests Supabase, API, llama-cpp-python, and text-embeddings + +name: e2e-text-backend-full-cpu +on: + pull_request: + types: + - opened # default trigger + - reopened # default trigger + - synchronize # default trigger + - ready_for_review # don't run on draft PRs + - milestoned # allows us to trigger on bot PRs + paths: + # Catch-all + - "**" + + # Ignore updates to the .github directory, unless it's this current file + - "!.github/**" + - ".github/workflows/e2e-text-backend-full-cpu.yaml" + - ".github/actions/uds-cluster/action.yaml" + + # Ignore docs and website things + - "!**.md" + - "!docs/**" + - "!adr/**" + - "!website/**" + - "!netlify.toml" + + # Ignore updates to generic github metadata files + - "!CODEOWNERS" + - "!.gitignore" + - "!LICENSE" + + # Ignore local development files + - "!.pre-commit-config.yaml" + + # Ignore non e2e tests changes + - "!tests/pytest/**" + + # Ignore LFAI-UI source code changes + - "!src/leapfrogai_ui/**" + + # Ignore changes to unrelated packages + - "!packages/k3d-gpu/**" + - "!packages/repeater/**" + - "!packages/ui/**" + - "!packages/vllm/**" + - "!packages/whisper/**" + + + +concurrency: + group: e2e-text-backend-full-cpu-${{ github.ref }} + cancel-in-progress: true + +jobs: + e2e_text_backend_full_cpu: + runs-on: ai-ubuntu-big-boy-8-core + if: ${{ !github.event.pull_request.draft }} + + steps: + - name: Checkout Repo + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Setup Python + uses: ./.github/actions/python + + - name: Setup UDS Cluster + uses: ./.github/actions/uds-cluster + with: + registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }} + registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }} + + - name: Setup LFAI-API and Supabase + uses: ./.github/actions/lfai-core + + ########## + # text-embeddings + ########## + - name: Deploy text-embeddings + run: | + make build-text-embeddings LOCAL_VERSION=e2e-test + docker image prune -af + uds zarf package deploy packages/text-embeddings/zarf-package-text-embeddings-amd64-e2e-test.tar.zst -l=trace --confirm + rm packages/text-embeddings/zarf-package-text-embeddings-amd64-e2e-test.tar.zst + + ########## + # llama + ########## + - name: Deploy llama-cpp-python + run: | + make build-llama-cpp-python LOCAL_VERSION=e2e-test + docker image prune -af + uds zarf package deploy packages/llama-cpp-python/zarf-package-llama-cpp-python-amd64-e2e-test.tar.zst -l=trace --confirm + rm packages/llama-cpp-python/zarf-package-llama-cpp-python-amd64-e2e-test.tar.zst + + ########## + # Test + ########## + - name: Test Text Backend + run: | + python -m pytest ./tests/e2e/test_text_backend_full.py -v diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index f95e9d2d0..6c92c5acc 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -1,14 +1,10 @@ import io import logging -import time import uuid -import tempfile import requests import pytest from openai import OpenAI -from openai.types.beta.vector_store import VectorStore -from leapfrogai_api.backend.types import VectorStoreStatus from .utils import create_test_user client = OpenAI( @@ -119,155 +115,3 @@ def test_api_row_level_security(): verify_request(get_urls, "get", jwt_token, False) verify_request(post_urls, "post", jwt_token, False) verify_request(delete_urls, "delete", jwt_token, False) - - -def download_arxiv_pdf(): - url = "https://arxiv.org/pdf/1706.03762.pdf" - response = requests.get(url) - if response.status_code == 200: - with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: - temp_file.write(response.content) - return temp_file.name - else: - raise Exception( - f"Failed to download PDF from ArXiv. Status code: {response.status_code}" - ) - - -def test_run_with_background_task(): - """ - This test confirms whether a vector store for an assistant can index files - while chatting at the same time. - """ - print("Starting test_run_with_background_task") - - # Download the ArXiv PDF - pdf_path = download_arxiv_pdf() - print(f"Downloaded ArXiv PDF to: {pdf_path}") - - # Upload the PDF file - with open(pdf_path, "rb") as file: - file_upload = client.files.create(file=file, purpose="assistants") - assert file_upload.id is not None - print(f"Uploaded PDF file with ID: {file_upload.id}") - - # Create a vector store - vector_store: VectorStore = client.beta.vector_stores.create( - name="test_background", - file_ids=[file_upload.id], - ) - assert vector_store.id is not None - print(f"Created vector store with ID: {vector_store.id}") - - # Check initial status - assert vector_store.status == VectorStoreStatus.IN_PROGRESS.value - print(f"Initial vector store status: {vector_store.status}") - - # Create an assistant - assistant = client.beta.assistants.create( - model="vllm", - name="Test Assistant", - instructions="You are a helpful assistant with access to a knowledge base about AI and machine learning.", - tools=[{"type": "file_search"}], - tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}, - ) - assert assistant.id is not None - print(f"Created assistant with ID: {assistant.id}") - - # Create a thread - thread = client.beta.threads.create() - assert thread.id is not None - print(f"Created thread with ID: {thread.id}") - - # Function to check vector store status - def check_vector_store_status(): - nonlocal vector_store - vector_store = client.beta.vector_stores.retrieve( - vector_store_id=vector_store.id - ) - return vector_store.status - - # Perform multiple runs while indexing is in progress - num_runs = 5 - responses = [] - - for i in range(num_runs): - # Check if indexing is still in progress - current_status = check_vector_store_status() - print(f"Run {i + 1}: Current vector store status: {current_status}") - if current_status == VectorStoreStatus.COMPLETED.value: - pytest.fail( - f"Vector store indexing completed without concurrently running multiple runs and ended after only {i} run(s)" - ) - - # Add a message to the thread - message = client.beta.threads.messages.create( - thread_id=thread.id, - role="user", - content=f"Run {i + 1}: What is the main topic of the paper in the vector store?", - ) - assert message.id is not None - print(f"Run {i + 1}: Added message to thread") - - # Create a run - run = client.beta.threads.runs.create( - thread_id=thread.id, - assistant_id=assistant.id, - instructions="Please use the file_search tool to find relevant information from the uploaded ArXiv paper.", - ) - assert run.id is not None - print(f"Run {i + 1}: Created run with ID: {run.id}") - - # Retrieve the assistant's message - messages = client.beta.threads.messages.list(thread_id=thread.id) - assert ( - len(messages.data) > i + 1 - ), f"No response message from the assistant for run {i + 1}" - assistant_message = messages.data[0].content[0].text.value - responses.append(assistant_message) - print(f"Run {i + 1}: Received assistant's response") - - print(f"Completed run {i + 1}") - - # Check if indexing is still in progress - current_status = check_vector_store_status() - if current_status == VectorStoreStatus.COMPLETED.value: - print(f"Vector store indexing completed after {i + 1} run(s)") - pytest.fail( - f"Vector store indexing completed without concurrently running multiple runs and ended after only {i} run(s)" - ) - - # Wait for indexing to complete if it hasn't already - max_wait_time_in_seconds = 60 * 3 # 3 minutes - start_time = time.time() - while check_vector_store_status() != VectorStoreStatus.COMPLETED.value: - if time.time() - start_time > max_wait_time_in_seconds: - pytest.fail( - "Vector store indexing did not complete within the expected time" - ) - time.sleep(2) - print( - f"Waiting for indexing to complete... Current status: {check_vector_store_status()}" - ) - - # Verify final vector store status - assert vector_store.status == VectorStoreStatus.COMPLETED.value - print(f"Final vector store status: {vector_store.status}") - - # Check that at least one of the assistant's responses contains relevant information - assert any( - len(response) > 0 for response in responses - ), "All assistant responses are empty" - assert any( - "arxiv" in response.lower() or "paper" in response.lower() - for response in responses - ), "None of the assistant's responses mention the ArXiv paper" - print("Verified assistant responses") - - # Clean up - client.beta.assistants.delete(assistant_id=assistant.id) - client.beta.vector_stores.delete(vector_store_id=vector_store.id) - client.files.delete(file_id=file_upload.id) - print("Cleaned up resources") - - print("test_run_with_background_task completed successfully") diff --git a/tests/e2e/test_text_backend_full.py b/tests/e2e/test_text_backend_full.py new file mode 100644 index 000000000..3997bd631 --- /dev/null +++ b/tests/e2e/test_text_backend_full.py @@ -0,0 +1,165 @@ +import time +import tempfile +import requests +import pytest +from openai import OpenAI +from openai.types.beta.vector_store import VectorStore + +from leapfrogai_api.backend.types import VectorStoreStatus +from .utils import create_test_user + +client = OpenAI( + base_url="https://leapfrogai-api.uds.dev/openai/v1", api_key=create_test_user() +) + + +def download_arxiv_pdf(): + url = "https://arxiv.org/pdf/1706.03762.pdf" + response = requests.get(url) + if response.status_code == 200: + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + temp_file.write(response.content) + return temp_file.name + else: + raise Exception( + f"Failed to download PDF from ArXiv. Status code: {response.status_code}" + ) + + +def test_run_with_background_task(): + """ + This test confirms whether a vector store for an assistant can index files + while chatting at the same time. + """ + print("Starting test_run_with_background_task") + + # Download the ArXiv PDF + pdf_path = download_arxiv_pdf() + print(f"Downloaded ArXiv PDF to: {pdf_path}") + + # Upload the PDF file + with open(pdf_path, "rb") as file: + file_upload = client.files.create(file=file, purpose="assistants") + assert file_upload.id is not None + print(f"Uploaded PDF file with ID: {file_upload.id}") + + # Create a vector store + vector_store: VectorStore = client.beta.vector_stores.create( + name="test_background", + file_ids=[file_upload.id], + ) + assert vector_store.id is not None + print(f"Created vector store with ID: {vector_store.id}") + + # Check initial status + assert vector_store.status == VectorStoreStatus.IN_PROGRESS.value + print(f"Initial vector store status: {vector_store.status}") + + # Create an assistant + assistant = client.beta.assistants.create( + model="llama-cpp-python", + name="Test Assistant", + instructions="You are a helpful assistant with access to a knowledge base about AI and machine learning.", + tools=[{"type": "file_search"}], + tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}, + ) + assert assistant.id is not None + print(f"Created assistant with ID: {assistant.id}") + + # Create a thread + thread = client.beta.threads.create() + assert thread.id is not None + print(f"Created thread with ID: {thread.id}") + + # Function to check vector store status + def check_vector_store_status(): + nonlocal vector_store + vector_store = client.beta.vector_stores.retrieve( + vector_store_id=vector_store.id + ) + return vector_store.status + + # Perform multiple runs while indexing is in progress + num_runs = 5 + responses = [] + + for i in range(num_runs): + # Check if indexing is still in progress + current_status = check_vector_store_status() + print(f"Run {i + 1}: Current vector store status: {current_status}") + if current_status == VectorStoreStatus.COMPLETED.value: + pytest.fail( + f"Vector store indexing completed without concurrently running multiple runs and ended after only {i} run(s)" + ) + + # Add a message to the thread + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=f"Run {i + 1}: What is the main topic of the paper in the vector store?", + ) + assert message.id is not None + print(f"Run {i + 1}: Added message to thread") + + # Create a run + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant.id, + instructions="Please use the file_search tool to find relevant information from the uploaded ArXiv paper.", + ) + assert run.id is not None + print(f"Run {i + 1}: Created run with ID: {run.id}") + + # Retrieve the assistant's message + messages = client.beta.threads.messages.list(thread_id=thread.id) + assert ( + len(messages.data) > i + 1 + ), f"No response message from the assistant for run {i + 1}" + assistant_message = messages.data[0].content[0].text.value + responses.append(assistant_message) + print(f"Run {i + 1}: Received assistant's response") + + print(f"Completed run {i + 1}") + + # Check if indexing is still in progress + current_status = check_vector_store_status() + if current_status == VectorStoreStatus.COMPLETED.value: + print(f"Vector store indexing completed after {i + 1} run(s)") + pytest.fail( + f"Vector store indexing completed without concurrently running multiple runs and ended after only {i} run(s)" + ) + + # Wait for indexing to complete if it hasn't already + max_wait_time_in_seconds = 60 * 3 # 3 minutes + start_time = time.time() + while check_vector_store_status() != VectorStoreStatus.COMPLETED.value: + if time.time() - start_time > max_wait_time_in_seconds: + pytest.fail( + "Vector store indexing did not complete within the expected time" + ) + time.sleep(2) + print( + f"Waiting for indexing to complete... Current status: {check_vector_store_status()}" + ) + + # Verify final vector store status + assert vector_store.status == VectorStoreStatus.COMPLETED.value + print(f"Final vector store status: {vector_store.status}") + + # Check that at least one of the assistant's responses contains relevant information + assert any( + len(response) > 0 for response in responses + ), "All assistant responses are empty" + assert any( + "arxiv" in response.lower() or "paper" in response.lower() + for response in responses + ), "None of the assistant's responses mention the ArXiv paper" + print("Verified assistant responses") + + # Clean up + client.beta.assistants.delete(assistant_id=assistant.id) + client.beta.vector_stores.delete(vector_store_id=vector_store.id) + client.files.delete(file_id=file_upload.id) + print("Cleaned up resources") + + print("test_run_with_background_task completed successfully") From 0324f0235e20ada16ab06d36dd202fe19b47f7e4 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 13:14:09 -0700 Subject: [PATCH 086/100] Bumps migration name to latest --- ...ng_status.sql => 20240827103100_v0.11.0_indexing_status.sql} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename packages/api/supabase/migrations/{20240724103100_v0.9.2_indexing_status.sql => 20240827103100_v0.11.0_indexing_status.sql} (96%) diff --git a/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql b/packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql similarity index 96% rename from packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql rename to packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql index d829b87d4..4875b070e 100644 --- a/packages/api/supabase/migrations/20240724103100_v0.9.2_indexing_status.sql +++ b/packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql @@ -21,4 +21,4 @@ EXECUTE FUNCTION update_modified_column(); -- Enable Supabase realtime for the vector_store_file table alter publication supabase_realtime -add table vector_store_file; \ No newline at end of file +add table vector_store_file; From 8bb0166b6cbf6bdc0a99c2e99ac34cf5165390ec Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 13:17:54 -0700 Subject: [PATCH 087/100] Fixes typo --- .../templates/suapbase-realtime-secret.yaml | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 packages/supabase/chart/templates/suapbase-realtime-secret.yaml diff --git a/packages/supabase/chart/templates/suapbase-realtime-secret.yaml b/packages/supabase/chart/templates/suapbase-realtime-secret.yaml deleted file mode 100644 index a762df845..000000000 --- a/packages/supabase/chart/templates/suapbase-realtime-secret.yaml +++ /dev/null @@ -1,18 +0,0 @@ -{{- $dbEncKey := randAlphaNum 16 }} # This needs to be exactly 16 characters -{{- $existingSecret := (lookup "v1" "Secret" .Release.Namespace "supabase-realtime-extra") }} -apiVersion: v1 -kind: Secret -metadata: - name: supabase-realtime-extra - namespace: {{ .Release.Namespace }} - {{- if $existingSecret }} - annotations: - "helm.sh/resource-policy": keep - {{- end }} -type: Opaque -data: - {{- if $existingSecret }} - dbEncKey: {{ $existingSecret.data.dbEncKey }} - {{- else }} - dbEncKey: {{ $dbEncKey | b64enc | quote }} - {{- end }} \ No newline at end of file From d952c6a29b865f508a2265dae937a388ba623d92 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 13:20:45 -0700 Subject: [PATCH 088/100] Adds comment for service key --- tests/e2e/test_supabase.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index db1ac84ad..fa8c4f374 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -159,6 +159,8 @@ def run_postgres_db_changes(): timeout_timer.start() # Listening socket + # The service key is needed for proper permission to listen to realtime events + # At the time of writing this, the Supabase realtime library does not support RLS URL = f"wss://supabase-kong.uds.dev/realtime/v1/websocket?apikey={SERVICE_KEY}&vsn=1.0.0" s = Socket(URL) s.connect() From 18dc5e7707075ee06feeed90cf866e0dd4fd6176 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 27 Aug 2024 13:58:41 -0700 Subject: [PATCH 089/100] Changes updated_at precision to ms --- .../migrations/20240827103100_v0.11.0_indexing_status.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql b/packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql index 4875b070e..4716aec18 100644 --- a/packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql +++ b/packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql @@ -1,5 +1,5 @@ -- Update the vector_store_file table to add an updated_at column -alter table vector_store_file add column updated_at bigint default extract(epoch from now()) not null; +ALTER TABLE vector_store_file ADD COLUMN updated_at timestamp(3) DEFAULT CURRENT_TIMESTAMP NOT NULL; -- Add an index on user_id for faster queries CREATE INDEX idx_vector_store_file_user_id ON vector_store_file(user_id); @@ -8,7 +8,7 @@ CREATE INDEX idx_vector_store_file_user_id ON vector_store_file(user_id); CREATE OR REPLACE FUNCTION update_modified_column() RETURNS TRIGGER AS $$ BEGIN - NEW.updated_at = extract(epoch from now()); + NEW.updated_at = CURRENT_TIMESTAMP; RETURN NEW; END; $$ language 'plpgsql'; From 68a29c259bef2c909c7f63f083860eef33eea51c Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 28 Aug 2024 09:10:56 -0700 Subject: [PATCH 090/100] Bumps version to 0.11.1 --- ...xing_status.sql => 20240827103100_v0.11.1_indexing_status.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename packages/api/supabase/migrations/{20240827103100_v0.11.0_indexing_status.sql => 20240827103100_v0.11.1_indexing_status.sql} (100%) diff --git a/packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql b/packages/api/supabase/migrations/20240827103100_v0.11.1_indexing_status.sql similarity index 100% rename from packages/api/supabase/migrations/20240827103100_v0.11.0_indexing_status.sql rename to packages/api/supabase/migrations/20240827103100_v0.11.1_indexing_status.sql From 517bb751e469fa8321ec6ae7bbe69efb30fac9f0 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 28 Aug 2024 11:34:57 -0700 Subject: [PATCH 091/100] Resolves incorrect usage_bytes comment --- src/leapfrogai_api/backend/rag/index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index 87a67851a..a6c3bb56e 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -89,7 +89,7 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil ), object="vector_store.file", status=VectorStoreFileStatus.FAILED.value, - usage_bytes=0, # Leave blank to have Postgres generate a UUID + usage_bytes=0, # Leave blank to have Postgres calculate the document bytes vector_store_id=vector_store_id, ) return await crud_vector_store_file.create(object_=vector_store_file) @@ -100,7 +100,7 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil last_error=None, object="vector_store.file", status=VectorStoreFileStatus.IN_PROGRESS.value, - usage_bytes=0, # Leave blank to have Postgres generate a UUID + usage_bytes=0, # Leave blank to have Postgres calculate the document bytes vector_store_id=vector_store_id, ) @@ -176,7 +176,7 @@ async def create_new_vector_store( file_counts=FileCounts( cancelled=0, completed=0, failed=0, in_progress=0, total=0 ), - usage_bytes=0, # Leave blank to have Postgres generate a UUID + usage_bytes=0, # Leave blank to have Postgres calculate the document bytes metadata=request.metadata, expires_after=expires_after, expires_at=expires_at, From 17f1a3a62c094afa5cefc145bafb91a50d20b648 Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 28 Aug 2024 11:37:42 -0700 Subject: [PATCH 092/100] Reverts test back to match main --- tests/e2e/test_api.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index 6c92c5acc..b556954e0 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -1,16 +1,12 @@ import io import logging import uuid + +import pytest as pytest import requests -import pytest -from openai import OpenAI from .utils import create_test_user -client = OpenAI( - base_url="https://leapfrogai-api.uds.dev/openai/v1", api_key=create_test_user() -) - logger = logging.getLogger(__name__) test_id = str(uuid.uuid4()) From 50fc526999684a2c14df86b6364c4eed6aa75c4f Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 28 Aug 2024 11:57:46 -0700 Subject: [PATCH 093/100] Change how the timestamp is generated --- .../migrations/20240827103100_v0.11.1_indexing_status.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/api/supabase/migrations/20240827103100_v0.11.1_indexing_status.sql b/packages/api/supabase/migrations/20240827103100_v0.11.1_indexing_status.sql index 4716aec18..d5daf9231 100644 --- a/packages/api/supabase/migrations/20240827103100_v0.11.1_indexing_status.sql +++ b/packages/api/supabase/migrations/20240827103100_v0.11.1_indexing_status.sql @@ -1,5 +1,5 @@ -- Update the vector_store_file table to add an updated_at column -ALTER TABLE vector_store_file ADD COLUMN updated_at timestamp(3) DEFAULT CURRENT_TIMESTAMP NOT NULL; +ALTER TABLE vector_store_file ADD COLUMN updated_at timestamp DEFAULT timezone('utc', now()) NOT NULL; -- Add an index on user_id for faster queries CREATE INDEX idx_vector_store_file_user_id ON vector_store_file(user_id); @@ -8,7 +8,7 @@ CREATE INDEX idx_vector_store_file_user_id ON vector_store_file(user_id); CREATE OR REPLACE FUNCTION update_modified_column() RETURNS TRIGGER AS $$ BEGIN - NEW.updated_at = CURRENT_TIMESTAMP; + NEW.updated_at = timezone('utc', now()); RETURN NEW; END; $$ language 'plpgsql'; From 1a60c9a271828013314e329a0efb6a87b167462a Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 28 Aug 2024 14:41:47 -0700 Subject: [PATCH 094/100] Lets the db set the created_at time for the test --- tests/e2e/test_supabase.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index fa8c4f374..9e7c6aa40 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -4,7 +4,6 @@ import uuid from fastapi import UploadFile import requests -import time from openai.types.beta.vector_stores import VectorStoreFile from openai.types.beta import VectorStore from openai.types.beta.vector_store import FileCounts @@ -62,8 +61,8 @@ async def postgres_db_changes(): assert upload_file_id is not None, "Failed to upload file" vector_store = VectorStore( - id=str(uuid.uuid4()), - created_at=int(time.time()), + id="", + created_at=0, file_counts=FileCounts( cancelled=0, completed=0, @@ -82,7 +81,7 @@ async def postgres_db_changes(): vector_store_file = VectorStoreFile( id=upload_file_id, vector_store_id=vector_store.id, - created_at=int(time.time()), + created_at=0, object="vector_store.file", status="completed", usage_bytes=0, From 5ad9563564fdc0cdb1d0d0c66ae789f9fd1a98aa Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 28 Aug 2024 14:58:44 -0700 Subject: [PATCH 095/100] Returns manual uuid creation --- tests/e2e/test_supabase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index 9e7c6aa40..4a673f26b 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -61,7 +61,7 @@ async def postgres_db_changes(): assert upload_file_id is not None, "Failed to upload file" vector_store = VectorStore( - id="", + id=str(uuid.uuid4()), created_at=0, file_counts=FileCounts( cancelled=0, From 059ab1aaa6f8e9094f904eafc31001745694e031 Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 30 Aug 2024 19:33:24 -0700 Subject: [PATCH 096/100] Updates ids to be blank and allow db to generate --- tests/e2e/test_supabase.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/e2e/test_supabase.py b/tests/e2e/test_supabase.py index 4a673f26b..da4ccfd2b 100644 --- a/tests/e2e/test_supabase.py +++ b/tests/e2e/test_supabase.py @@ -61,7 +61,7 @@ async def postgres_db_changes(): assert upload_file_id is not None, "Failed to upload file" vector_store = VectorStore( - id=str(uuid.uuid4()), + id="", created_at=0, file_counts=FileCounts( cancelled=0, @@ -76,11 +76,12 @@ async def postgres_db_changes(): usage_bytes=0, ) - await CRUDVectorStore(client).create(vector_store) + new_vector_store = await CRUDVectorStore(client).create(vector_store) + assert new_vector_store is not None, "Failed to create vector store" vector_store_file = VectorStoreFile( id=upload_file_id, - vector_store_id=vector_store.id, + vector_store_id=new_vector_store.id, created_at=0, object="vector_store.file", status="completed", @@ -115,10 +116,8 @@ async def upload_file(client: AsyncClient) -> str: """ This function is responsible for uploading a file to the file bucket. """ - id_ = str(uuid.uuid4()) - empty_file_object = FileObject( - id=id_, + id="", bytes=0, created_at=0, filename="", @@ -128,16 +127,14 @@ async def upload_file(client: AsyncClient) -> str: status_details=None, ) - crud_file_object = CRUDFileObject(client) - - file_object = await crud_file_object.create(object_=empty_file_object) + file_object = await CRUDFileObject(client).create(object_=empty_file_object) assert file_object is not None, "Failed to create file object" crud_file_bucket = CRUDFileBucket(db=client, model=UploadFile) await crud_file_bucket.upload( file=UploadFile(filename="", file=io.BytesIO(b"")), id_=file_object.id ) - return id_ + return file_object.id def run_postgres_db_changes(): """ From 9f6ece2c632d9d2a7d3f6a447db56efef3b8342c Mon Sep 17 00:00:00 2001 From: gharvey Date: Fri, 30 Aug 2024 19:43:05 -0700 Subject: [PATCH 097/100] Switch to test fixture for openai client --- tests/e2e/test_text_backend_full.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/e2e/test_text_backend_full.py b/tests/e2e/test_text_backend_full.py index 3997bd631..3da7cdd0a 100644 --- a/tests/e2e/test_text_backend_full.py +++ b/tests/e2e/test_text_backend_full.py @@ -6,11 +6,6 @@ from openai.types.beta.vector_store import VectorStore from leapfrogai_api.backend.types import VectorStoreStatus -from .utils import create_test_user - -client = OpenAI( - base_url="https://leapfrogai-api.uds.dev/openai/v1", api_key=create_test_user() -) def download_arxiv_pdf(): @@ -26,7 +21,7 @@ def download_arxiv_pdf(): ) -def test_run_with_background_task(): +def test_run_with_background_task(client: OpenAI): """ This test confirms whether a vector store for an assistant can index files while chatting at the same time. From ebb2ed2a29c4e53c95dda4fa869a84e46e518fc7 Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 3 Sep 2024 13:03:06 -0700 Subject: [PATCH 098/100] Adds logging and cleanup operations for failed indexing --- src/leapfrogai_api/backend/rag/index.py | 4 ++++ tests/e2e/utils.py | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index c72f13a6a..babbca1c9 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -165,6 +165,7 @@ async def create_new_vector_store( current_time = int(time.time()) expires_after, expires_at = request.get_expiry(current_time) + saved_placeholder = None try: # Create a placeholder vector store @@ -211,6 +212,9 @@ async def create_new_vector_store( return saved_placeholder except Exception as exc: logging.error(exc) + # Clean up the placeholder vector store if it was created + if saved_placeholder: + await crud_vector_store.delete(id_=saved_placeholder.id) raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Unable to parse vector store request", diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py index feb3d5362..32fb05bfb 100644 --- a/tests/e2e/utils.py +++ b/tests/e2e/utils.py @@ -1,5 +1,7 @@ import json +import logging import os +import traceback import pytest import requests @@ -31,8 +33,10 @@ def create_test_user( }, ) except Exception: - # If the user already exists, we can ignore the error - pass + logging.error( + "Error creating user (likely because the user already exists): %s", + traceback.format_exc(), + ) return get_jwt_token(anon_key, email, password) From 7eea9d8a6e6604b078d531e6b30159f40d1f69fb Mon Sep 17 00:00:00 2001 From: gharvey Date: Tue, 3 Sep 2024 14:16:56 -0700 Subject: [PATCH 099/100] Fixes typo --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2ff6a9635..8dca0e69b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dev = [ "requests", "requests-toolbelt", "pytest", - "supabase == 2.6.0" + "supabase == 2.6.0", "huggingface_hub[cli,hf_transfer] == 0.24.5", "fastapi == 0.109.1", ] From b30efd8ea621d04d9d6fb95f83f72aafe94c1f9b Mon Sep 17 00:00:00 2001 From: gharvey Date: Wed, 4 Sep 2024 12:32:07 -0700 Subject: [PATCH 100/100] Trigger Build