Skip to content

Commit

Permalink
feat: finish ChatGPT connector (vana-com#15)
Browse files Browse the repository at this point in the history
Also include .prettierrc for formatting front-end files
  • Loading branch information
Kahtaf authored Feb 27, 2024
1 parent 7cf9a98 commit 69f2355
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 12 deletions.
6 changes: 6 additions & 0 deletions selfie-ui/.prettierrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"semi": true,
"singleQuote": false,
"printWidth": 80,
"trailingComma": "all"
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { ChangeEvent, useEffect, useState } from 'react';
import { apiBaseUrl } from "@/app/config";

type DocumentSourceSelectorProps = {
onSelect: (selectedId: string) => void;
Expand All @@ -13,7 +14,7 @@ const DocumentSourceSelector = ({ onSelect }: DocumentSourceSelectorProps) => {
const [sources, setSources] = useState<OptionType[]>([]);

useEffect(() => {
fetch('http://localhost:8181/v1/connectors')
fetch(`${apiBaseUrl}/v1/connectors`)
.then((response) => response.json())
.then((data) => {
const options: OptionType[] = data.connectors.map(
Expand Down
26 changes: 15 additions & 11 deletions selfie/parsers/chat/chatgpt.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# TODO: Currently this parser fails pydantic but working
from pydantic import BaseModel
from typing import Any, List
from typing import List, Any, Optional

from pydantic import BaseModel, RootModel

from selfie.parsers.chat.base import JsonBasedChatParser
from selfie.types.share_gpt import ShareGPTConversation
from selfie.utils import check_nested


class Author(BaseModel):
Expand All @@ -13,13 +15,13 @@ class Author(BaseModel):

class Content(BaseModel):
content_type: str
parts: List[str]
parts: Optional[List[Any]] = None


class Message(BaseModel):
id: str
author: Author
create_time: float
create_time: float | None
update_time: float | None
content: Content
status: str
Expand All @@ -36,7 +38,7 @@ class Node(BaseModel):
children: List[str]


class ChatGPTData(BaseModel):
class ChatGPTConversation(BaseModel):
title: str
create_time: float
update_time: float
Expand All @@ -53,26 +55,28 @@ class ChatGPTData(BaseModel):


class ChatGPTParser(JsonBasedChatParser):
SUPPORTED_SCHEMAS = [ChatGPTData]
SUPPORTED_SCHEMAS = [RootModel[List[ChatGPTConversation]]]

def extract_conversations(self, data: ChatGPTData) -> ShareGPTConversation:
def extract_conversations(self, data: List[ChatGPTConversation]) -> ShareGPTConversation:
"""
Extract conversations from a list of parsed ChatGPT JSON data.
Args:
data (List[ChatGPTData]): The list of parsed JSON data
data (List[ChatGPTConversation]): The list of parsed JSON data
Returns:
List[dict]: A list of conversation dictionaries
"""
conversations = []
for conversation in data:
for node_id, node in conversation["mapping"].items():
if node["message"]:
if (check_nested(node, "message", "content", "parts") and
isinstance(node["message"]["content"]["parts"], list) and
all(isinstance(elem, str) for elem in node["message"]["content"]["parts"])):
message = node["message"]
author_name = message["author"]["name"] if message["author"]["name"] else message["author"]["role"]
message_content = ' '.join(message["content"]["parts"])
message_timestamp = message["create_time"]
message_timestamp = message["create_time"] or conversation["create_time"]

conversations.append({
"from": author_name,
Expand Down
12 changes: 12 additions & 0 deletions selfie/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,15 @@ def data_uri_to_string(data_uri):
with BytesIO(data) as buffer:
content = buffer.read()
return content.decode('utf-8')


def check_nested(obj, *keys):
"""
Recursively check if nested keys exist in a dictionary.
"""
for key in keys:
try:
obj = obj[key]
except (KeyError, TypeError):
return False
return True

0 comments on commit 69f2355

Please sign in to comment.