Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Internal Image support #6902

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions lib/shared/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ export class ChatClient {

// We only want to send up the speaker and prompt text, regardless of whatever other fields
// might be on the messages objects (`file`, `displayText`, `contextFiles`, etc.).
const messagesToSend = augmentedMessages.map(({ speaker, text, cache_enabled }) => ({
const messagesToSend = augmentedMessages.map(({ speaker, text, cache_enabled, content }) => ({
text,
speaker,
cache_enabled,
content,
}))

const completionParams = {
const completionParams: CompletionParameters = {
...DEFAULT_CHAT_COMPLETION_PARAMETERS,
...params,
messages: messagesToSend,
Expand Down Expand Up @@ -108,8 +109,8 @@ export function sanitizeMessages(messages: Message[]): Message[] {
// the next one
const nextMessage = sanitizedMessages[index + 1]
if (
(nextMessage.speaker === 'assistant' && !nextMessage.text?.length) ||
(message.speaker === 'assistant' && !message.text?.length)
(nextMessage.speaker === 'assistant' && !nextMessage.text?.length && !nextMessage.content) ||
(message.speaker === 'assistant' && !message.text?.length && !message.content)
) {
return false
}
Expand Down
1 change: 1 addition & 0 deletions lib/shared/src/chat/transcript/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export interface SubMessage {

export interface ChatMessage extends Message {
contextFiles?: ContextItem[]
base64Image?: string

contextAlternatives?: RankedContext[]

Expand Down
1 change: 1 addition & 0 deletions lib/shared/src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ interface RawClientConfiguration {
experimentalMinionAnthropicKey: string | undefined
experimentalNoxideEnabled: boolean
experimentalGuardrailsTimeoutSeconds: number | undefined
experimentalImageUpload: boolean

//#region Unstable
internalUnstable: boolean
Expand Down
6 changes: 6 additions & 0 deletions lib/shared/src/sourcegraph-api/completions/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ export interface Message {
// mirrors what OpenAI and Anthropic expect
text?: PromptString
cache_enabled?: boolean | null
content?: string | MessagePart[]
base64Image?: string
}

export interface CompletionUsage {
Expand All @@ -43,6 +45,10 @@ export interface CompletionResponse {
stopReason?: string
}

type MessagePart =
| { type: 'text'; text: string } // a normal text message
| { type: 'image_url'; image_url: { url: string } } // image message, per https://platform.openai.com/docs/guides/vision

export interface CompletionParameters {
fast?: boolean
messages: Message[]
Expand Down
6 changes: 6 additions & 0 deletions vscode/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -1084,6 +1084,12 @@
"markdownDescription": "Enable OpenTelemetry tracing",
"default": false
},
"cody.experimental.imageUpload": {
"order": 99,
"type": "boolean",
"markdownDescription": "Enable image support",
"default": false
},
"cody.experimental.commitMessage": {
"order": 99,
"type": "boolean",
Expand Down
27 changes: 26 additions & 1 deletion vscode/src/chat/chat-view/ChatBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ export class ChatBuilder {
if (this.messages.at(-1)?.speaker === 'human') {
throw new Error('Cannot add a user message after a user message')
}
this.messages.push({ ...message, speaker: 'human' })
this.messages.push({ ...message, speaker: 'human', base64Image: this.getAndResetImage() })
this.changeNotifications.next()
}

Expand Down Expand Up @@ -322,6 +322,31 @@ export class ChatBuilder {
}
return result
}

/**
* Store the base64-encoded image uploaded by user to a multi-modal model.
* Requires vision support in the model, added in the PR
* https://github.com/sourcegraph/sourcegraph/pull/546
*/
private image: string | undefined = undefined

/**
* Sets the base64-encoded image for the chat model.
* @param base64Image - The base64-encoded image data to set.
*/
public setImage(base64Image: string): void {
this.image = base64Image
}

/**
* Gets the base64-encoded image for the chat model and resets the internal image property to undefined.
* @returns The base64-encoded image, or undefined if no image has been set.
*/
public getAndResetImage(): string | undefined {
const image = this.image
this.image = undefined
return image
}
}

function messageToSerializedChatInteraction(
Expand Down
5 changes: 5 additions & 0 deletions vscode/src/chat/chat-view/ChatController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,11 @@ export class ChatController implements vscode.Disposable, vscode.WebviewViewProv
}
break
}

case 'chat/upload-file': {
this.chatBuilder.setImage(message.base64)
break
}
case 'log': {
const logger = message.level === 'debug' ? logDebug : logError
logger(message.filterLabel, message.message)
Expand Down
1 change: 1 addition & 0 deletions vscode/src/chat/protocol.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ export type WebviewMessage =
selectedFilters: NLSSearchDynamicFilter[]
}
| { command: 'action/confirmation'; id: string; response: boolean }
| { command: 'chat/upload-file'; base64: string }

export interface SmartApplyResult {
taskId: FixupTaskID
Expand Down
3 changes: 3 additions & 0 deletions vscode/src/configuration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ describe('getConfiguration', () => {
return false
case 'cody.agentic.context.experimentalOptions':
return { shell: { allow: ['git'] } }
case 'cody.experimental.imageUpload':
return false
default:
assert(false, `unexpected key: ${key}`)
}
Expand Down Expand Up @@ -206,6 +208,7 @@ describe('getConfiguration', () => {

overrideAuthToken: undefined,
overrideServerEndpoint: undefined,
experimentalImageUpload: false,
} satisfies ClientConfiguration)
})
})
1 change: 1 addition & 0 deletions vscode/src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ export function getConfiguration(
>('autocomplete.experimental.graphContext', null),
experimentalCommitMessage: getHiddenSetting('experimental.commitMessage', true),
experimentalNoodle: getHiddenSetting('experimental.noodle', false),
experimentalImageUpload: getHiddenSetting('experimental.imageUpload', false),

experimentalTracing: getHiddenSetting('experimental.tracing', false),

Expand Down
10 changes: 10 additions & 0 deletions vscode/src/edit/prompt/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ const getContextFromIntent = async ({
uri,
PROMPT_TOPICS.OUTPUT
),
content: '',
base64Image: '',
file: {
type: 'file',
uri,
Expand All @@ -96,13 +98,17 @@ const getContextFromIntent = async ({
speaker: 'human',
text: populateCodeContextTemplate(truncatedPrecedingText, uri, undefined, 'edit'),
file: { type: 'file', uri, source: ContextItemSource.Editor, range: prefix.range },
content: '',
base64Image: '',
})
}
if (truncatedFollowingText.trim().length > 0) {
contextMessages.push({
speaker: 'human',
text: populateCodeContextTemplate(truncatedFollowingText, uri, undefined, 'edit'),
file: { type: 'file', uri, source: ContextItemSource.Editor, range: suffix.range },
content: '',
base64Image: '',
})
}
return contextMessages
Expand All @@ -125,6 +131,8 @@ const getContextFromIntent = async ({
speaker: 'human' as const,
text: populateCurrentEditorDiagnosticsTemplate(diagnostic, uri),
file: { type: 'file', uri, source: ContextItemSource.Editor },
content: '',
base64Image: '',
}) satisfies ContextMessage
),
...[truncatedPrecedingText, truncatedFollowingText]
Expand All @@ -135,6 +143,8 @@ const getContextFromIntent = async ({
speaker: 'human' as const,
text: populateCodeContextTemplate(text, uri, undefined, 'edit'),
file: { type: 'file', uri, source: ContextItemSource.Editor },
content: '',
base64Image: '',
}) satisfies ContextMessage
),
]
Expand Down
43 changes: 41 additions & 2 deletions vscode/src/prompt-builder/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ export class PromptBuilder {
* A list of context items that are used to build context messages.
*/
public contextItems: ContextItem[] = []

/**
* Convenience constructor because loading the tokenizer is async due to its large size.
*/
Expand All @@ -47,7 +46,6 @@ export class PromptBuilder {
if (this.contextItems.length > 0) {
this.buildContextMessages()
}

return this.prefixMessages.concat([...this.reverseMessages].reverse())
}

Expand Down Expand Up @@ -92,6 +90,7 @@ export class PromptBuilder {
])
if (!withinLimit) {
// Throw error if the limit was exceeded and no message was added.

if (!this.reverseMessages.length) {
throw new Error(
'The chat input has exceeded the token limit. If you are copying and pasting a file into the chat, try using the @-mention feature to attach the file instead.'
Expand All @@ -103,6 +102,12 @@ export class PromptBuilder {
this.reverseMessages.push(assistantMsg)
}
this.reverseMessages.push(humanMsg)

// Immediately inject an image message if there is a base64 image on the human message.
if (humanMsg.base64Image) {
const imageMessage = this.createImageMessage(humanMsg.base64Image)
this.reverseMessages.push(imageMessage)
}
}
// All messages were added successfully.
return undefined
Expand Down Expand Up @@ -188,4 +193,38 @@ export class PromptBuilder {
)
return result
}

/**
* Creates a Message object for an image given its base64 string.
* The function calculates the MIME type using detectImageType and wraps the data in a MessagePart.
*/
private createImageMessage(base64Image: string): Message {
const imageType = detectImageType(base64Image)
return {
speaker: 'human',
content: [
{
type: 'image_url',
image_url: {
url: `data:${imageType};base64,${base64Image}`,
},
},
],
}
}
}

function detectImageType(base64String: string): string {
// Check the first few bytes of the base64 string to determine image type
const header = base64String.substring(0, 8)

// Common image signatures in base64
if (header.startsWith('/9j/')) return 'image/jpeg'
if (header.startsWith('iVBORw0')) return 'image/png'
if (header.startsWith('R0lGOD')) return 'image/gif'
if (header.startsWith('UklGR')) return 'image/webp'
if (header.startsWith('PHN2Z')) return 'image/svg+xml'

// Default to jpeg if unknown
return 'image/jpeg'
}
8 changes: 7 additions & 1 deletion vscode/src/prompt-builder/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,13 @@ export function renderContextItem(contextItem: ContextItem): ContextMessage | nu
}
}

return { speaker: 'human', text: messageText, file: contextItem }
return {
speaker: 'human',
text: messageText,
file: contextItem,
content: '',
base64Image: '',
}
}

export function getContextItemTokenUsageType(item: ContextItem): ContextTokenUsageType {
Expand Down
1 change: 1 addition & 0 deletions vscode/src/testutils/mocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -948,4 +948,5 @@ export const DEFAULT_VSCODE_SETTINGS = {
experimentalGuardrailsTimeoutSeconds: undefined,
overrideAuthToken: undefined,
overrideServerEndpoint: undefined,
experimentalImageUpload: false,
} satisfies ClientConfiguration
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
import type { UserAccountInfo } from '../../../../../Chat'
import { type ClientActionListener, useClientActionListener } from '../../../../../client/clientState'
import { promptModeToIntent } from '../../../../../prompts/PromptsTab'
import { getVSCodeAPI } from '../../../../../utils/VSCodeApi'
import { useTelemetryRecorder } from '../../../../../utils/telemetry'
import { useFeatureFlag } from '../../../../../utils/useFeatureFlags'
import { useLinkOpener } from '../../../../../utils/useLinkOpener'
Expand Down Expand Up @@ -99,6 +100,8 @@ export const HumanMessageEditor: FunctionComponent<{
}) => {
const telemetryRecorder = useTelemetryRecorder()

const [imageFile, setImageFile] = useState<File | undefined>(undefined)

const editorRef = useRef<PromptEditorRefAPI>(null)
useImperativeHandle(parentEditorRef, (): PromptEditorRefAPI | null => editorRef.current, [])

Expand Down Expand Up @@ -126,7 +129,7 @@ export const HumanMessageEditor: FunctionComponent<{
const experimentalPromptEditorEnabled = useFeatureFlag(FeatureFlag.CodyExperimentalPromptEditor)

const onSubmitClick = useCallback(
(intent?: ChatMessage['intent'], forceSubmit?: boolean): void => {
async (intent?: ChatMessage['intent'], forceSubmit?: boolean): Promise<void> => {
if (!forceSubmit && submitState === 'emptyEditorValue') {
return
}
Expand All @@ -142,6 +145,28 @@ export const HumanMessageEditor: FunctionComponent<{

const value = editorRef.current.getSerializedValue()
parentOnSubmit(intent)
if (imageFile) {
const readFileGetBase64String = (file: File): Promise<string> => {
return new Promise((resolve, reject) => {
const reader = new FileReader()
reader.onload = () => {
const base64 = reader.result
if (base64 && typeof base64 === 'string') {
resolve(base64.split(',')[1])
} else {
reject(new Error('Failed to read file'))
}
}
reader.onerror = () => reject(new Error('Failed to read file'))
reader.readAsDataURL(file)
})
}

const base64 = await readFileGetBase64String(imageFile)
getVSCodeAPI().postMessage({ command: 'chat/upload-file', base64 })
setImageFile(undefined)
}
parentOnSubmit(intent)

telemetryRecorder.recordEvent('cody.humanMessageEditor', 'submit', {
metadata: {
Expand All @@ -157,7 +182,15 @@ export const HumanMessageEditor: FunctionComponent<{
},
})
},
[submitState, parentOnSubmit, onStop, telemetryRecorder.recordEvent, isFirstMessage, isSent]
[
submitState,
parentOnSubmit,
onStop,
telemetryRecorder.recordEvent,
isFirstMessage,
isSent,
imageFile,
]
)

const onEditorEnterKey = useCallback(
Expand Down Expand Up @@ -470,6 +503,8 @@ export const HumanMessageEditor: FunctionComponent<{
hidden={!focused && isSent}
className={styles.toolbar}
intent={intent}
imageFile={imageFile}
setImageFile={setImageFile}
/>
)}
</div>
Expand Down
Loading
Loading