Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance: fix memory leak #5460

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion agent/src/TestClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ export class TestClient extends MessageHandler {
bin === 'node'
? [
'--enable-source-maps',
// '--expose-gc', // Uncoment when running memory.test.ts
'--expose-gc', // Used by `memory.test.ts` to reproduce memory leaks.
agentScript,
'api',
'jsonrpc-stdio',
Expand Down
134 changes: 132 additions & 2 deletions agent/src/memory.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import fs from 'node:fs'
import path from 'node:path'
import { applyPatch } from 'fast-myers-diff'
import { afterAll, beforeAll, describe, it } from 'vitest'
import * as vscode from 'vscode'
import { TESTING_CREDENTIALS } from '../../vscode/src/testutils/testing-credentials'
import { TestClient } from './TestClient'
import { TestWorkspace } from './TestWorkspace'
Expand All @@ -23,9 +26,91 @@ describe.skip('Memory Usage', () => {
afterAll(async () => {
await workspace.afterAll()
await client.afterAll()
})
}, 20_000)

async function applyEvent(event: EditorEvent): Promise<void> {
if (event.eventType === 'initialize') {
return
}
if (!event.uri) {
return
}
const uri = vscode.Uri.parse(event.uri)

if (event.eventType === 'document/didOpen' || event.eventType === 'document/wasOpen') {
const content: string = JSON.parse(event.json ?? '{}')?.content ?? ''
await client.openFile(uri, { text: content })
return
}

if (event.eventType === 'document/didClose') {
client.notify('textDocument/didClose', { uri: event.uri })
return
}

it('selection', async () => {
if (event.eventType === 'document/didChange') {
const document = client.workspace.getDocument(uri)
if (!document) {
throw new Error(`Document ${uri} not found`)
}
const contentChanges: [number, number, string][] =
JSON.parse(event.json ?? '{}')?.changes ?? []
const newText = [...applyPatch(document.content, contentChanges)].join('')
await client.changeFile(uri, {
text: newText,
})
}
}

// The test case below was used to fix CODY-3616, a memory leak that
// happened in our LRU cache for tree-sitter trees. The fix was to
// call `tree.delete()` when it got evicted from the LRU cache.
// Requires the following setup, which is not automated at the moment:
// - Download a ZIP file from Cody NES recordings. For example, https://console.cloud.google.com/storage/browser/_details/cody-nes/recordings-v2/olafurpg-cody-5e3355b3-2024-08-28T10-18-11.zip;tab=live_object
// - Unzip and you'll have a directory of CSV files
// - Convert those CSV files into JSON with the Cody NES CLI tool. In the sourcegraph/cody-nes repo, run:
//
// tsx src/cli/command-root.ts convert-to-json --dir ~/dev/sourcegraph/cody/agent/dist/replays
//
// - Make sure the converted JSON files are in the `agent/dist/replays` directory
it.skip('replay', async () => {
//
const dir = path.join(__dirname, '..', 'dist', 'replays')
const memory1 = await client.request('testing/memoryUsage', null)
let remainingReplays = 2
for (const file of fs.readdirSync(dir)) {
if (!file.endsWith('.json')) {
continue
}
remainingReplays--
if (remainingReplays < 0) {
break
}
const absolutePath = path.join(dir, file)
const events = parseEditorEvents(absolutePath)
if (events.length !== 5000) {
continue
}
for (const [index, event] of events.entries()) {
await applyEvent(event)
if (index % 50 === 0) {
const memory2 = await client.request('testing/memoryUsage', null)
console.log({
totalEvents: index,
heapUsed: prettyPrintBytes(memory2.usage.heapUsed - memory1.usage.heapUsed),
external: prettyPrintBytes(memory2.usage.external - memory1.usage.external),
})
}
}
}
}, 40_000)

// This test case was used to fix a memory leak in the agent, which happened
// on every text selection. The root cause was that we fired "visible text
// documents changed" event on selection changes. After fixing the issue, we
// still had a memory leak in the "visible text documents" event handler,
// but it fired less frequently so the memory leak wasn't as bad.
it.skip('selection', async () => {
const uri = workspace.file('src', 'animal.ts')
await client.openFile(uri)
const document = client.workspace.getDocument(uri)!
Expand Down Expand Up @@ -72,3 +157,48 @@ function prettyPrintBytes(bytes: number): string {

return `${bytes.toFixed(2)} ${units[unitIndex]}`
}

interface EditorEvent {
readonly timestamp: string
readonly eventType:
| 'initialize'
| 'document/wasOpen'
| 'document/didOpen'
| 'document/didClose'
| 'document/didSave'
| 'document/didFocus'
| 'document/didChange'
| 'selection/didChange'
| 'visibleRanges/didChange'
| 'diagnostics/didChange'
| 'unknown'
readonly uri?: string
readonly languageId?: string

/** String-encoded JSON object of the relevant metadata.
* For example, see SelectionInfos. */
readonly json?: string
recordName?: string // Intentionally mutable
}

function parseEditorEvents(file: string): EditorEvent[] {
// Parses the output of `cody-nes convert-to-json`.
const json: string[][] = JSON.parse(fs.readFileSync(file, 'utf8'))
const result: EditorEvent[] = []
for (const row of json) {
const [timestamp, eventType, uri, languageId, json] = row
if (timestamp === 'TIMESTAMP') {
// header row
continue
}
const event: EditorEvent = {
timestamp,
eventType: eventType as EditorEvent['eventType'],
uri,
languageId,
json,
}
result.push(event)
}
return result
}
3 changes: 3 additions & 0 deletions vscode/src/tree-sitter/parse-tree-cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import { type WrappedParser, createParser, getParser } from './parser'

const parseTreesPerFile = new LRUCache<string, Tree>({
max: 10,
// Important: we need to call `Tree.delete()` to free up memory. Without
// this, we leak memory. See CODY-3616.
disposeAfter: tree => tree.delete(),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@abeatrix the short explanation of this fix is that tree-sitter trees are not garbage collected by default, they are stored on the Node.js "external" heap and we have to explicitly free them with .delete(). FYI @valerybugakov

})

interface ParseTreeCache {
Expand Down
Loading