From 51f952e700f4b74f2ee4c80f08816a60d490c1e8 Mon Sep 17 00:00:00 2001 From: Abhi <43648792+abhipatel12@users.noreply.github.com> Date: Mon, 10 Nov 2025 19:01:01 -0500 Subject: [PATCH] fix(core): use ripgrep --json output for robust cross-platform parsing (#12853) --- integration-tests/ripgrep-real.test.ts | 91 ++++++++ packages/core/src/tools/ripGrep.test.ts | 263 +++++++++++++++++++++--- packages/core/src/tools/ripGrep.ts | 59 +++--- 3 files changed, 354 insertions(+), 59 deletions(-) create mode 100644 integration-tests/ripgrep-real.test.ts diff --git a/integration-tests/ripgrep-real.test.ts b/integration-tests/ripgrep-real.test.ts new file mode 100644 index 0000000000..bc106466b2 --- /dev/null +++ b/integration-tests/ripgrep-real.test.ts @@ -0,0 +1,91 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import * as path from 'node:path'; +import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; +import { RipGrepTool } from '../packages/core/src/tools/ripGrep.js'; +import { Config } from '../packages/core/src/config/config.js'; +import { WorkspaceContext } from '../packages/core/src/utils/workspaceContext.js'; + +// Mock Config to provide necessary context +class MockConfig { + constructor(private targetDir: string) {} + + getTargetDir() { + return this.targetDir; + } + + getWorkspaceContext() { + return new WorkspaceContext(this.targetDir, [this.targetDir]); + } + + getDebugMode() { + return true; + } +} + +describe('ripgrep-real-direct', () => { + let tempDir: string; + let tool: RipGrepTool; + + beforeAll(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'ripgrep-real-test-')); + + // Create test files + await fs.writeFile(path.join(tempDir, 'file1.txt'), 'hello world\n'); + await fs.mkdir(path.join(tempDir, 'subdir')); + await fs.writeFile( + path.join(tempDir, 'subdir', 'file2.txt'), + 'hello universe\n', + ); + await fs.writeFile(path.join(tempDir, 'file3.txt'), 'goodbye moon\n'); + + const config = new MockConfig(tempDir) as unknown as Config; + tool = new RipGrepTool(config); + }); + + afterAll(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + it('should find matches using the real ripgrep binary', async () => { + const invocation = tool.build({ pattern: 'hello' }); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('Found 2 matches'); + expect(result.llmContent).toContain('file1.txt'); + expect(result.llmContent).toContain('L1: hello world'); + expect(result.llmContent).toContain('subdir'); // Should show path + expect(result.llmContent).toContain('file2.txt'); + expect(result.llmContent).toContain('L1: hello universe'); + + expect(result.llmContent).not.toContain('goodbye moon'); + }); + + it('should handle no matches correctly', async () => { + const invocation = tool.build({ pattern: 'nonexistent_pattern_123' }); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('No matches found'); + }); + + it('should respect include filters', async () => { + // Create a .js file + await fs.writeFile( + path.join(tempDir, 'script.js'), + 'console.log("hello");\n', + ); + + const invocation = tool.build({ pattern: 'hello', include: '*.js' }); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('Found 1 match'); + expect(result.llmContent).toContain('script.js'); + expect(result.llmContent).not.toContain('file1.txt'); + }); +}); diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts index 8f70bcd570..2f3174c4f9 100644 --- a/packages/core/src/tools/ripGrep.test.ts +++ b/packages/core/src/tools/ripGrep.test.ts @@ -17,7 +17,7 @@ import type { RipGrepToolParams } from './ripGrep.js'; import { canUseRipgrep, RipGrepTool, ensureRgPath } from './ripGrep.js'; import path from 'node:path'; import fs from 'node:fs/promises'; -import os, { EOL } from 'node:os'; +import os from 'node:os'; import type { Config } from '../config/config.js'; import { Storage } from '../config/storage.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; @@ -360,7 +360,34 @@ describe('RipGrepTool', () => { it('should find matches for a simple pattern in all files', async () => { mockSpawn.mockImplementationOnce( createMockSpawn({ - outputData: `fileA.txt:1:hello world${EOL}fileA.txt:2:second line with world${EOL}sub/fileC.txt:1:another world in sub dir${EOL}`, + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'hello world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 2, + lines: { text: 'second line with world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'sub/fileC.txt' }, + line_number: 1, + lines: { text: 'another world in sub dir\n' }, + }, + }) + + '\n', exitCode: 0, }), ); @@ -385,7 +412,15 @@ describe('RipGrepTool', () => { // Setup specific mock for this test - searching in 'sub' should only return matches from that directory mockSpawn.mockImplementationOnce( createMockSpawn({ - outputData: `fileC.txt:1:another world in sub dir${EOL}`, + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileC.txt' }, + line_number: 1, + lines: { text: 'another world in sub dir\n' }, + }, + }) + '\n', exitCode: 0, }), ); @@ -405,7 +440,15 @@ describe('RipGrepTool', () => { // Setup specific mock for this test mockSpawn.mockImplementationOnce( createMockSpawn({ - outputData: `fileB.js:2:function baz() { return "hello"; }${EOL}`, + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileB.js' }, + line_number: 2, + lines: { text: 'function baz() { return "hello"; }\n' }, + }, + }) + '\n', exitCode: 0, }), ); @@ -455,7 +498,18 @@ describe('RipGrepTool', () => { if (onData) { // Only return match from the .js file in sub directory - onData(Buffer.from(`another.js:1:const greeting = "hello";${EOL}`)); + onData( + Buffer.from( + JSON.stringify({ + type: 'match', + data: { + path: { text: 'another.js' }, + line_number: 1, + lines: { text: 'const greeting = "hello";\n' }, + }, + }) + '\n', + ), + ); } if (onClose) { onClose(0); @@ -540,7 +594,18 @@ describe('RipGrepTool', () => { if (onData) { // Return match for the regex pattern - onData(Buffer.from(`fileB.js:1:const foo = "bar";${EOL}`)); + onData( + Buffer.from( + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileB.js' }, + line_number: 1, + lines: { text: 'const foo = "bar";\n' }, + }, + }) + '\n', + ), + ); } if (onClose) { onClose(0); @@ -589,7 +654,24 @@ describe('RipGrepTool', () => { // Return case-insensitive matches for 'HELLO' onData( Buffer.from( - `fileA.txt:1:hello world${EOL}fileB.js:2:function baz() { return "hello"; }${EOL}`, + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'hello world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileB.js' }, + line_number: 2, + lines: { text: 'function baz() { return "hello"; }\n' }, + }, + }) + + '\n', ), ); } @@ -691,18 +773,54 @@ describe('RipGrepTool', () => { if (callCount === 1) { // First directory (tempRootDir) outputData = - [ - 'fileA.txt:1:hello world', - 'fileA.txt:2:second line with world', - 'sub/fileC.txt:1:another world in sub dir', - ].join(EOL) + EOL; + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'hello world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 2, + lines: { text: 'second line with world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'sub/fileC.txt' }, + line_number: 1, + lines: { text: 'another world in sub dir\n' }, + }, + }) + + '\n'; } else if (callCount === 2) { // Second directory (secondDir) outputData = - [ - 'other.txt:2:world in second', - 'another.js:1:function world() { return "test"; }', - ].join(EOL) + EOL; + JSON.stringify({ + type: 'match', + data: { + path: { text: 'other.txt' }, + line_number: 2, + lines: { text: 'world in second\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'another.js' }, + line_number: 1, + lines: { text: 'function world() { return "test"; }\n' }, + }, + }) + + '\n'; } if (stdoutDataHandler && outputData) { @@ -789,7 +907,18 @@ describe('RipGrepTool', () => { )?.[1]; if (onData) { - onData(Buffer.from(`fileC.txt:1:another world in sub dir${EOL}`)); + onData( + Buffer.from( + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileC.txt' }, + line_number: 1, + lines: { text: 'another world in sub dir\n' }, + }, + }) + '\n', + ), + ); } if (onClose) { onClose(0); @@ -1005,7 +1134,14 @@ describe('RipGrepTool', () => { if (onData) { onData( Buffer.from( - `${specialFileName}:1:hello world with special chars${EOL}`, + JSON.stringify({ + type: 'match', + data: { + path: { text: specialFileName }, + line_number: 1, + lines: { text: 'hello world with special chars\n' }, + }, + }) + '\n', ), ); } @@ -1060,7 +1196,14 @@ describe('RipGrepTool', () => { if (onData) { onData( Buffer.from( - `a/b/c/d/e/deep.txt:1:content in deep directory${EOL}`, + JSON.stringify({ + type: 'match', + data: { + path: { text: 'a/b/c/d/e/deep.txt' }, + line_number: 1, + lines: { text: 'content in deep directory\n' }, + }, + }) + '\n', ), ); } @@ -1115,7 +1258,14 @@ describe('RipGrepTool', () => { if (onData) { onData( Buffer.from( - `code.js:1:function getName() { return "test"; }${EOL}`, + JSON.stringify({ + type: 'match', + data: { + path: { text: 'code.js' }, + line_number: 1, + lines: { text: 'function getName() { return "test"; }\n' }, + }, + }) + '\n', ), ); } @@ -1168,7 +1318,33 @@ describe('RipGrepTool', () => { if (onData) { onData( Buffer.from( - `case.txt:1:Hello World${EOL}case.txt:2:hello world${EOL}case.txt:3:HELLO WORLD${EOL}`, + JSON.stringify({ + type: 'match', + data: { + path: { text: 'case.txt' }, + line_number: 1, + lines: { text: 'Hello World\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'case.txt' }, + line_number: 2, + lines: { text: 'hello world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'case.txt' }, + line_number: 3, + lines: { text: 'HELLO WORLD\n' }, + }, + }) + + '\n', ), ); } @@ -1220,7 +1396,18 @@ describe('RipGrepTool', () => { )?.[1]; if (onData) { - onData(Buffer.from(`special.txt:1:Price: $19.99${EOL}`)); + onData( + Buffer.from( + JSON.stringify({ + type: 'match', + data: { + path: { text: 'special.txt' }, + line_number: 1, + lines: { text: 'Price: $19.99\n' }, + }, + }) + '\n', + ), + ); } if (onClose) { onClose(0); @@ -1279,7 +1466,24 @@ describe('RipGrepTool', () => { if (onData) { onData( Buffer.from( - `test.ts:1:typescript content${EOL}test.tsx:1:tsx content${EOL}`, + JSON.stringify({ + type: 'match', + data: { + path: { text: 'test.ts' }, + line_number: 1, + lines: { text: 'typescript content\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'test.tsx' }, + line_number: 1, + lines: { text: 'tsx content\n' }, + }, + }) + + '\n', ), ); } @@ -1337,7 +1541,18 @@ describe('RipGrepTool', () => { )?.[1]; if (onData) { - onData(Buffer.from(`src/main.ts:1:source code${EOL}`)); + onData( + Buffer.from( + JSON.stringify({ + type: 'match', + data: { + path: { text: 'src/main.ts' }, + line_number: 1, + lines: { text: 'source code\n' }, + }, + }) + '\n', + ), + ); } if (onClose) { onClose(0); diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index 12c97a4779..b28a14b4ce 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -7,7 +7,6 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js'; import fs from 'node:fs'; import path from 'node:path'; -import { EOL } from 'node:os'; import { spawn } from 'node:child_process'; import { downloadRipGrep } from '@joshua.litt/get-ripgrep'; import type { ToolInvocation, ToolResult } from './tools.js'; @@ -276,39 +275,36 @@ class GrepToolInvocation extends BaseToolInvocation< } } - private parseRipgrepOutput(output: string, basePath: string): GrepMatch[] { + private parseRipgrepJsonOutput( + output: string, + basePath: string, + ): GrepMatch[] { const results: GrepMatch[] = []; if (!output) return results; - const lines = output.split(EOL); + const lines = output.trim().split('\n'); for (const line of lines) { if (!line.trim()) continue; - const firstColonIndex = line.indexOf(':'); - if (firstColonIndex === -1) continue; + try { + const json = JSON.parse(line); + if (json.type === 'match') { + const match = json.data; + // Defensive check: ensure text properties exist (skips binary/invalid encoding) + if (match.path?.text && match.lines?.text) { + const absoluteFilePath = path.resolve(basePath, match.path.text); + const relativeFilePath = path.relative(basePath, absoluteFilePath); - const secondColonIndex = line.indexOf(':', firstColonIndex + 1); - if (secondColonIndex === -1) continue; - - const filePathRaw = line.substring(0, firstColonIndex); - const lineNumberStr = line.substring( - firstColonIndex + 1, - secondColonIndex, - ); - const lineContent = line.substring(secondColonIndex + 1); - - const lineNumber = parseInt(lineNumberStr, 10); - - if (!isNaN(lineNumber)) { - const absoluteFilePath = path.resolve(basePath, filePathRaw); - const relativeFilePath = path.relative(basePath, absoluteFilePath); - - results.push({ - filePath: relativeFilePath || path.basename(absoluteFilePath), - lineNumber, - line: lineContent, - }); + results.push({ + filePath: relativeFilePath || path.basename(absoluteFilePath), + lineNumber: match.line_number, + line: match.lines.text.trimEnd(), + }); + } + } + } catch (error) { + debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error); } } return results; @@ -322,14 +318,7 @@ class GrepToolInvocation extends BaseToolInvocation< }): Promise { const { pattern, path: absolutePath, include } = options; - const rgArgs = [ - '--line-number', - '--no-heading', - '--with-filename', - '--ignore-case', - '--regexp', - pattern, - ]; + const rgArgs = ['--json', '--ignore-case', '--regexp', pattern]; if (include) { rgArgs.push('--glob', include); @@ -399,7 +388,7 @@ class GrepToolInvocation extends BaseToolInvocation< }); }); - return this.parseRipgrepOutput(output, absolutePath); + return this.parseRipgrepJsonOutput(output, absolutePath); } catch (error: unknown) { console.error(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`); throw error;