fix(core): use ripgrep --json output for robust cross-platform parsing (#12853)

This commit is contained in:
Abhi
2025-11-10 19:01:01 -05:00
committed by GitHub
parent 4af4f8644d
commit 51f952e700
3 changed files with 354 additions and 59 deletions
+91
View File
@@ -0,0 +1,91 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import * as path from 'node:path';
import * as fs from 'node:fs/promises';
import * as os from 'node:os';
import { RipGrepTool } from '../packages/core/src/tools/ripGrep.js';
import { Config } from '../packages/core/src/config/config.js';
import { WorkspaceContext } from '../packages/core/src/utils/workspaceContext.js';
// Mock Config to provide necessary context
class MockConfig {
constructor(private targetDir: string) {}
getTargetDir() {
return this.targetDir;
}
getWorkspaceContext() {
return new WorkspaceContext(this.targetDir, [this.targetDir]);
}
getDebugMode() {
return true;
}
}
describe('ripgrep-real-direct', () => {
let tempDir: string;
let tool: RipGrepTool;
beforeAll(async () => {
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'ripgrep-real-test-'));
// Create test files
await fs.writeFile(path.join(tempDir, 'file1.txt'), 'hello world\n');
await fs.mkdir(path.join(tempDir, 'subdir'));
await fs.writeFile(
path.join(tempDir, 'subdir', 'file2.txt'),
'hello universe\n',
);
await fs.writeFile(path.join(tempDir, 'file3.txt'), 'goodbye moon\n');
const config = new MockConfig(tempDir) as unknown as Config;
tool = new RipGrepTool(config);
});
afterAll(async () => {
await fs.rm(tempDir, { recursive: true, force: true });
});
it('should find matches using the real ripgrep binary', async () => {
const invocation = tool.build({ pattern: 'hello' });
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toContain('Found 2 matches');
expect(result.llmContent).toContain('file1.txt');
expect(result.llmContent).toContain('L1: hello world');
expect(result.llmContent).toContain('subdir'); // Should show path
expect(result.llmContent).toContain('file2.txt');
expect(result.llmContent).toContain('L1: hello universe');
expect(result.llmContent).not.toContain('goodbye moon');
});
it('should handle no matches correctly', async () => {
const invocation = tool.build({ pattern: 'nonexistent_pattern_123' });
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toContain('No matches found');
});
it('should respect include filters', async () => {
// Create a .js file
await fs.writeFile(
path.join(tempDir, 'script.js'),
'console.log("hello");\n',
);
const invocation = tool.build({ pattern: 'hello', include: '*.js' });
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toContain('Found 1 match');
expect(result.llmContent).toContain('script.js');
expect(result.llmContent).not.toContain('file1.txt');
});
});
+239 -24
View File
@@ -17,7 +17,7 @@ import type { RipGrepToolParams } from './ripGrep.js';
import { canUseRipgrep, RipGrepTool, ensureRgPath } from './ripGrep.js'; import { canUseRipgrep, RipGrepTool, ensureRgPath } from './ripGrep.js';
import path from 'node:path'; import path from 'node:path';
import fs from 'node:fs/promises'; import fs from 'node:fs/promises';
import os, { EOL } from 'node:os'; import os from 'node:os';
import type { Config } from '../config/config.js'; import type { Config } from '../config/config.js';
import { Storage } from '../config/storage.js'; import { Storage } from '../config/storage.js';
import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js';
@@ -360,7 +360,34 @@ describe('RipGrepTool', () => {
it('should find matches for a simple pattern in all files', async () => { it('should find matches for a simple pattern in all files', async () => {
mockSpawn.mockImplementationOnce( mockSpawn.mockImplementationOnce(
createMockSpawn({ createMockSpawn({
outputData: `fileA.txt:1:hello world${EOL}fileA.txt:2:second line with world${EOL}sub/fileC.txt:1:another world in sub dir${EOL}`, outputData:
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 1,
lines: { text: 'hello world\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 2,
lines: { text: 'second line with world\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'sub/fileC.txt' },
line_number: 1,
lines: { text: 'another world in sub dir\n' },
},
}) +
'\n',
exitCode: 0, exitCode: 0,
}), }),
); );
@@ -385,7 +412,15 @@ describe('RipGrepTool', () => {
// Setup specific mock for this test - searching in 'sub' should only return matches from that directory // Setup specific mock for this test - searching in 'sub' should only return matches from that directory
mockSpawn.mockImplementationOnce( mockSpawn.mockImplementationOnce(
createMockSpawn({ createMockSpawn({
outputData: `fileC.txt:1:another world in sub dir${EOL}`, outputData:
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileC.txt' },
line_number: 1,
lines: { text: 'another world in sub dir\n' },
},
}) + '\n',
exitCode: 0, exitCode: 0,
}), }),
); );
@@ -405,7 +440,15 @@ describe('RipGrepTool', () => {
// Setup specific mock for this test // Setup specific mock for this test
mockSpawn.mockImplementationOnce( mockSpawn.mockImplementationOnce(
createMockSpawn({ createMockSpawn({
outputData: `fileB.js:2:function baz() { return "hello"; }${EOL}`, outputData:
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileB.js' },
line_number: 2,
lines: { text: 'function baz() { return "hello"; }\n' },
},
}) + '\n',
exitCode: 0, exitCode: 0,
}), }),
); );
@@ -455,7 +498,18 @@ describe('RipGrepTool', () => {
if (onData) { if (onData) {
// Only return match from the .js file in sub directory // Only return match from the .js file in sub directory
onData(Buffer.from(`another.js:1:const greeting = "hello";${EOL}`)); onData(
Buffer.from(
JSON.stringify({
type: 'match',
data: {
path: { text: 'another.js' },
line_number: 1,
lines: { text: 'const greeting = "hello";\n' },
},
}) + '\n',
),
);
} }
if (onClose) { if (onClose) {
onClose(0); onClose(0);
@@ -540,7 +594,18 @@ describe('RipGrepTool', () => {
if (onData) { if (onData) {
// Return match for the regex pattern // Return match for the regex pattern
onData(Buffer.from(`fileB.js:1:const foo = "bar";${EOL}`)); onData(
Buffer.from(
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileB.js' },
line_number: 1,
lines: { text: 'const foo = "bar";\n' },
},
}) + '\n',
),
);
} }
if (onClose) { if (onClose) {
onClose(0); onClose(0);
@@ -589,7 +654,24 @@ describe('RipGrepTool', () => {
// Return case-insensitive matches for 'HELLO' // Return case-insensitive matches for 'HELLO'
onData( onData(
Buffer.from( Buffer.from(
`fileA.txt:1:hello world${EOL}fileB.js:2:function baz() { return "hello"; }${EOL}`, JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 1,
lines: { text: 'hello world\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileB.js' },
line_number: 2,
lines: { text: 'function baz() { return "hello"; }\n' },
},
}) +
'\n',
), ),
); );
} }
@@ -691,18 +773,54 @@ describe('RipGrepTool', () => {
if (callCount === 1) { if (callCount === 1) {
// First directory (tempRootDir) // First directory (tempRootDir)
outputData = outputData =
[ JSON.stringify({
'fileA.txt:1:hello world', type: 'match',
'fileA.txt:2:second line with world', data: {
'sub/fileC.txt:1:another world in sub dir', path: { text: 'fileA.txt' },
].join(EOL) + EOL; line_number: 1,
lines: { text: 'hello world\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 2,
lines: { text: 'second line with world\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'sub/fileC.txt' },
line_number: 1,
lines: { text: 'another world in sub dir\n' },
},
}) +
'\n';
} else if (callCount === 2) { } else if (callCount === 2) {
// Second directory (secondDir) // Second directory (secondDir)
outputData = outputData =
[ JSON.stringify({
'other.txt:2:world in second', type: 'match',
'another.js:1:function world() { return "test"; }', data: {
].join(EOL) + EOL; path: { text: 'other.txt' },
line_number: 2,
lines: { text: 'world in second\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'another.js' },
line_number: 1,
lines: { text: 'function world() { return "test"; }\n' },
},
}) +
'\n';
} }
if (stdoutDataHandler && outputData) { if (stdoutDataHandler && outputData) {
@@ -789,7 +907,18 @@ describe('RipGrepTool', () => {
)?.[1]; )?.[1];
if (onData) { if (onData) {
onData(Buffer.from(`fileC.txt:1:another world in sub dir${EOL}`)); onData(
Buffer.from(
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileC.txt' },
line_number: 1,
lines: { text: 'another world in sub dir\n' },
},
}) + '\n',
),
);
} }
if (onClose) { if (onClose) {
onClose(0); onClose(0);
@@ -1005,7 +1134,14 @@ describe('RipGrepTool', () => {
if (onData) { if (onData) {
onData( onData(
Buffer.from( Buffer.from(
`${specialFileName}:1:hello world with special chars${EOL}`, JSON.stringify({
type: 'match',
data: {
path: { text: specialFileName },
line_number: 1,
lines: { text: 'hello world with special chars\n' },
},
}) + '\n',
), ),
); );
} }
@@ -1060,7 +1196,14 @@ describe('RipGrepTool', () => {
if (onData) { if (onData) {
onData( onData(
Buffer.from( Buffer.from(
`a/b/c/d/e/deep.txt:1:content in deep directory${EOL}`, JSON.stringify({
type: 'match',
data: {
path: { text: 'a/b/c/d/e/deep.txt' },
line_number: 1,
lines: { text: 'content in deep directory\n' },
},
}) + '\n',
), ),
); );
} }
@@ -1115,7 +1258,14 @@ describe('RipGrepTool', () => {
if (onData) { if (onData) {
onData( onData(
Buffer.from( Buffer.from(
`code.js:1:function getName() { return "test"; }${EOL}`, JSON.stringify({
type: 'match',
data: {
path: { text: 'code.js' },
line_number: 1,
lines: { text: 'function getName() { return "test"; }\n' },
},
}) + '\n',
), ),
); );
} }
@@ -1168,7 +1318,33 @@ describe('RipGrepTool', () => {
if (onData) { if (onData) {
onData( onData(
Buffer.from( Buffer.from(
`case.txt:1:Hello World${EOL}case.txt:2:hello world${EOL}case.txt:3:HELLO WORLD${EOL}`, JSON.stringify({
type: 'match',
data: {
path: { text: 'case.txt' },
line_number: 1,
lines: { text: 'Hello World\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'case.txt' },
line_number: 2,
lines: { text: 'hello world\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'case.txt' },
line_number: 3,
lines: { text: 'HELLO WORLD\n' },
},
}) +
'\n',
), ),
); );
} }
@@ -1220,7 +1396,18 @@ describe('RipGrepTool', () => {
)?.[1]; )?.[1];
if (onData) { if (onData) {
onData(Buffer.from(`special.txt:1:Price: $19.99${EOL}`)); onData(
Buffer.from(
JSON.stringify({
type: 'match',
data: {
path: { text: 'special.txt' },
line_number: 1,
lines: { text: 'Price: $19.99\n' },
},
}) + '\n',
),
);
} }
if (onClose) { if (onClose) {
onClose(0); onClose(0);
@@ -1279,7 +1466,24 @@ describe('RipGrepTool', () => {
if (onData) { if (onData) {
onData( onData(
Buffer.from( Buffer.from(
`test.ts:1:typescript content${EOL}test.tsx:1:tsx content${EOL}`, JSON.stringify({
type: 'match',
data: {
path: { text: 'test.ts' },
line_number: 1,
lines: { text: 'typescript content\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'test.tsx' },
line_number: 1,
lines: { text: 'tsx content\n' },
},
}) +
'\n',
), ),
); );
} }
@@ -1337,7 +1541,18 @@ describe('RipGrepTool', () => {
)?.[1]; )?.[1];
if (onData) { if (onData) {
onData(Buffer.from(`src/main.ts:1:source code${EOL}`)); onData(
Buffer.from(
JSON.stringify({
type: 'match',
data: {
path: { text: 'src/main.ts' },
line_number: 1,
lines: { text: 'source code\n' },
},
}) + '\n',
),
);
} }
if (onClose) { if (onClose) {
onClose(0); onClose(0);
+20 -31
View File
@@ -7,7 +7,6 @@
import type { MessageBus } from '../confirmation-bus/message-bus.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js';
import fs from 'node:fs'; import fs from 'node:fs';
import path from 'node:path'; import path from 'node:path';
import { EOL } from 'node:os';
import { spawn } from 'node:child_process'; import { spawn } from 'node:child_process';
import { downloadRipGrep } from '@joshua.litt/get-ripgrep'; import { downloadRipGrep } from '@joshua.litt/get-ripgrep';
import type { ToolInvocation, ToolResult } from './tools.js'; import type { ToolInvocation, ToolResult } from './tools.js';
@@ -276,41 +275,38 @@ class GrepToolInvocation extends BaseToolInvocation<
} }
} }
private parseRipgrepOutput(output: string, basePath: string): GrepMatch[] { private parseRipgrepJsonOutput(
output: string,
basePath: string,
): GrepMatch[] {
const results: GrepMatch[] = []; const results: GrepMatch[] = [];
if (!output) return results; if (!output) return results;
const lines = output.split(EOL); const lines = output.trim().split('\n');
for (const line of lines) { for (const line of lines) {
if (!line.trim()) continue; if (!line.trim()) continue;
const firstColonIndex = line.indexOf(':'); try {
if (firstColonIndex === -1) continue; const json = JSON.parse(line);
if (json.type === 'match') {
const secondColonIndex = line.indexOf(':', firstColonIndex + 1); const match = json.data;
if (secondColonIndex === -1) continue; // Defensive check: ensure text properties exist (skips binary/invalid encoding)
if (match.path?.text && match.lines?.text) {
const filePathRaw = line.substring(0, firstColonIndex); const absoluteFilePath = path.resolve(basePath, match.path.text);
const lineNumberStr = line.substring(
firstColonIndex + 1,
secondColonIndex,
);
const lineContent = line.substring(secondColonIndex + 1);
const lineNumber = parseInt(lineNumberStr, 10);
if (!isNaN(lineNumber)) {
const absoluteFilePath = path.resolve(basePath, filePathRaw);
const relativeFilePath = path.relative(basePath, absoluteFilePath); const relativeFilePath = path.relative(basePath, absoluteFilePath);
results.push({ results.push({
filePath: relativeFilePath || path.basename(absoluteFilePath), filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber, lineNumber: match.line_number,
line: lineContent, line: match.lines.text.trimEnd(),
}); });
} }
} }
} catch (error) {
debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error);
}
}
return results; return results;
} }
@@ -322,14 +318,7 @@ class GrepToolInvocation extends BaseToolInvocation<
}): Promise<GrepMatch[]> { }): Promise<GrepMatch[]> {
const { pattern, path: absolutePath, include } = options; const { pattern, path: absolutePath, include } = options;
const rgArgs = [ const rgArgs = ['--json', '--ignore-case', '--regexp', pattern];
'--line-number',
'--no-heading',
'--with-filename',
'--ignore-case',
'--regexp',
pattern,
];
if (include) { if (include) {
rgArgs.push('--glob', include); rgArgs.push('--glob', include);
@@ -399,7 +388,7 @@ class GrepToolInvocation extends BaseToolInvocation<
}); });
}); });
return this.parseRipgrepOutput(output, absolutePath); return this.parseRipgrepJsonOutput(output, absolutePath);
} catch (error: unknown) { } catch (error: unknown) {
console.error(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`); console.error(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`);
throw error; throw error;