Skip to content

Commit

Permalink
feat: image input and filter hook.js warning
Browse files Browse the repository at this point in the history
compatible with current backend
  • Loading branch information
ylxmf2005 committed Jan 27, 2025
1 parent bfd896b commit c7e47db
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 23 deletions.
10 changes: 10 additions & 0 deletions electron.vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,15 @@ export default defineConfig({
},
},
plugins: [react()],
build: {
rollupOptions: {
onwarn(warning, warn) {
if (warning.message.includes('onnxruntime')) {
return;
}
warn(warning);
},
},
},
},
});
8 changes: 7 additions & 1 deletion src/renderer/src/hooks/footer/use-text-input.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { useAiState } from '@/context/ai-state-context';
import { useInterrupt } from '@/components/canvas/live2d';
import { useChatHistory } from '@/context/chat-history-context';
import { useVAD } from '@/context/vad-context';
import { useMediaCapture } from '@/hooks/utils/use-media-capture';

export function useTextInput() {
const [inputText, setInputText] = useState('');
Expand All @@ -13,20 +14,25 @@ export function useTextInput() {
const { interrupt } = useInterrupt();
const { appendHumanMessage } = useChatHistory();
const { stopMic, autoStopMic } = useVAD();
const { captureAllMedia } = useMediaCapture();

const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
setInputText(e.target.value);
};

const handleSend = () => {
const handleSend = async () => {
if (!inputText.trim() || !wsContext) return;
if (aiState === 'thinking-speaking') {
interrupt();
}

const images = await captureAllMedia();

appendHumanMessage(inputText.trim());
wsContext.sendMessage({
type: 'text-input',
text: inputText.trim(),
images,
});

setAiState('thinking-speaking');
Expand Down
80 changes: 80 additions & 0 deletions src/renderer/src/hooks/utils/use-media-capture.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { useCallback } from 'react';
import { useCamera } from '@/context/camera-context';
import { useScreenCaptureContext } from '@/context/screen-capture-context';

// Add type definition for ImageCapture
declare class ImageCapture {
constructor(track: MediaStreamTrack);

grabFrame(): Promise<ImageBitmap>;
}

interface ImageData {
source: 'camera' | 'screen';
data: string;
mime_type: string;
}

export function useMediaCapture() {
const { stream: cameraStream } = useCamera();
const { stream: screenStream } = useScreenCaptureContext();

const captureFrame = useCallback(async (stream: MediaStream | null) => {
if (!stream) return null;

const videoTrack = stream.getVideoTracks()[0];
if (!videoTrack) return null;

const imageCapture = new ImageCapture(videoTrack);
try {
const bitmap = await imageCapture.grabFrame();
const canvas = document.createElement('canvas');
canvas.width = bitmap.width;
canvas.height = bitmap.height;
const ctx = canvas.getContext('2d');
if (!ctx) return null;

ctx.drawImage(bitmap, 0, 0);
return canvas.toDataURL('image/jpeg', 0.8);
} catch (error) {
console.error('Error capturing frame:', error);
return null;
}
}, []);

const captureAllMedia = useCallback(async () => {
const images: ImageData[] = [];

// Capture camera frame
if (cameraStream) {
const cameraFrame = await captureFrame(cameraStream);
if (cameraFrame) {
images.push({
source: 'camera',
data: cameraFrame,
mime_type: 'image/jpeg',
});
}
}

// Capture screen frame
if (screenStream) {
const screenFrame = await captureFrame(screenStream);
if (screenFrame) {
images.push({
source: 'screen',
data: screenFrame,
mime_type: 'image/jpeg',
});
}
}

console.log("images: ", images);

return images;
}, [cameraStream, screenStream, captureFrame]);

return {
captureAllMedia,
};
}
41 changes: 25 additions & 16 deletions src/renderer/src/hooks/utils/use-send-audio.tsx
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
import { useCallback } from 'react';
import { useWebSocket } from '@/context/websocket-context';
import { useCallback } from "react";
import { useWebSocket } from "@/context/websocket-context";
import { useMediaCapture } from "@/hooks/utils/use-media-capture";

export function useSendAudio() {
const { sendMessage } = useWebSocket();
const { captureAllMedia } = useMediaCapture();

const sendAudioPartition = useCallback((audio: Float32Array) => {
const chunkSize = 4096;
// Send the audio data in chunks
for (let index = 0; index < audio.length; index += chunkSize) {
const endIndex = Math.min(index + chunkSize, audio.length);
const chunk = audio.slice(index, endIndex);
sendMessage({
type: 'mic-audio-data',
audio: Array.from(chunk),
});
}
// Send end signal after all chunks
sendMessage({ type: 'mic-audio-end' });
}, [sendMessage]);
const sendAudioPartition = useCallback(
async (audio: Float32Array) => {
const chunkSize = 4096;

// Send the audio data in chunks
for (let index = 0; index < audio.length; index += chunkSize) {
const endIndex = Math.min(index + chunkSize, audio.length);
const chunk = audio.slice(index, endIndex);
sendMessage({
type: "mic-audio-data",
audio: Array.from(chunk),
// Only send images with first chunk
});
}

// Send end signal after all chunks
const images = await captureAllMedia();
sendMessage({ type: "mic-audio-end", images });
},
[sendMessage, captureAllMedia],
);

return {
sendAudioPartition,
Expand Down
19 changes: 13 additions & 6 deletions src/renderer/src/hooks/utils/use-trigger-speak.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import { useCallback } from 'react';
import { useWebSocket } from '@/context/websocket-context';
import { useMediaCapture } from './use-media-capture';

export function useTriggerSpeak() {
const { sendMessage } = useWebSocket();
const { captureAllMedia } = useMediaCapture();

const sendTriggerSignal = useCallback((actualIdleTime: number) => {
sendMessage({
type: 'ai-speak-signal',
idle_time: actualIdleTime,
});
}, [sendMessage]);
const sendTriggerSignal = useCallback(
async (actualIdleTime: number) => {
const images = await captureAllMedia();
sendMessage({
type: "ai-speak-signal",
idle_time: actualIdleTime,
images,
});
},
[sendMessage, captureAllMedia],
);

return {
sendTriggerSignal,
Expand Down
8 changes: 8 additions & 0 deletions src/renderer/src/main.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ import { createRoot } from 'react-dom/client';
import './index.css';
import App from './App';

const originalConsoleWarn = console.warn;
console.warn = (...args) => {
if (typeof args[0] === 'string' && args[0].includes('onnxruntime')) {
return;
}
originalConsoleWarn.apply(console, args);
};

if (typeof window !== 'undefined') {
createRoot(document.getElementById('root')!).render(
<App />,
Expand Down

0 comments on commit c7e47db

Please sign in to comment.