fix(core): use ripgrep --json output for robust cross-platform parsing (#12853)

2026-05-12 12:54:07 -07:00 · 2025-11-10 19:01:01 -05:00
parent 4af4f8644d
commit 51f952e700
3 changed files with 354 additions and 59 deletions
@@ -0,0 +1,91 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import * as path from 'node:path';
+import * as fs from 'node:fs/promises';
+import * as os from 'node:os';
+import { RipGrepTool } from '../packages/core/src/tools/ripGrep.js';
+import { Config } from '../packages/core/src/config/config.js';
+import { WorkspaceContext } from '../packages/core/src/utils/workspaceContext.js';
+
+// Mock Config to provide necessary context
+class MockConfig {
+  constructor(private targetDir: string) {}
+
+  getTargetDir() {
+    return this.targetDir;
+  }
+
+  getWorkspaceContext() {
+    return new WorkspaceContext(this.targetDir, [this.targetDir]);
+  }
+
+  getDebugMode() {
+    return true;
+  }
+}
+
+describe('ripgrep-real-direct', () => {
+  let tempDir: string;
+  let tool: RipGrepTool;
+
+  beforeAll(async () => {
+    tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'ripgrep-real-test-'));
+
+    // Create test files
+    await fs.writeFile(path.join(tempDir, 'file1.txt'), 'hello world\n');
+    await fs.mkdir(path.join(tempDir, 'subdir'));
+    await fs.writeFile(
+      path.join(tempDir, 'subdir', 'file2.txt'),
+      'hello universe\n',
+    );
+    await fs.writeFile(path.join(tempDir, 'file3.txt'), 'goodbye moon\n');
+
+    const config = new MockConfig(tempDir) as unknown as Config;
+    tool = new RipGrepTool(config);
+  });
+
+  afterAll(async () => {
+    await fs.rm(tempDir, { recursive: true, force: true });
+  });
+
+  it('should find matches using the real ripgrep binary', async () => {
+    const invocation = tool.build({ pattern: 'hello' });
+    const result = await invocation.execute(new AbortController().signal);
+
+    expect(result.llmContent).toContain('Found 2 matches');
+    expect(result.llmContent).toContain('file1.txt');
+    expect(result.llmContent).toContain('L1: hello world');
+    expect(result.llmContent).toContain('subdir'); // Should show path
+    expect(result.llmContent).toContain('file2.txt');
+    expect(result.llmContent).toContain('L1: hello universe');
+
+    expect(result.llmContent).not.toContain('goodbye moon');
+  });
+
+  it('should handle no matches correctly', async () => {
+    const invocation = tool.build({ pattern: 'nonexistent_pattern_123' });
+    const result = await invocation.execute(new AbortController().signal);
+
+    expect(result.llmContent).toContain('No matches found');
+  });
+
+  it('should respect include filters', async () => {
+    // Create a .js file
+    await fs.writeFile(
+      path.join(tempDir, 'script.js'),
+      'console.log("hello");\n',
+    );
+
+    const invocation = tool.build({ pattern: 'hello', include: '*.js' });
+    const result = await invocation.execute(new AbortController().signal);
+
+    expect(result.llmContent).toContain('Found 1 match');
+    expect(result.llmContent).toContain('script.js');
+    expect(result.llmContent).not.toContain('file1.txt');
+  });
+});
@@ -17,7 +17,7 @@ import type { RipGrepToolParams } from './ripGrep.js';
 import { canUseRipgrep, RipGrepTool, ensureRgPath } from './ripGrep.js';
 import path from 'node:path';
 import fs from 'node:fs/promises';
-import os, { EOL } from 'node:os';
+import os from 'node:os';
 import type { Config } from '../config/config.js';
 import { Storage } from '../config/storage.js';
 import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js';
@@ -360,7 +360,34 @@ describe('RipGrepTool', () => {
    it('should find matches for a simple pattern in all files', async () => {
      mockSpawn.mockImplementationOnce(
        createMockSpawn({
-          outputData: `fileA.txt:1:hello world${EOL}fileA.txt:2:second line with world${EOL}sub/fileC.txt:1:another world in sub dir${EOL}`,
+          outputData:
+            JSON.stringify({
+              type: 'match',
+              data: {
+                path: { text: 'fileA.txt' },
+                line_number: 1,
+                lines: { text: 'hello world\n' },
+              },
+            }) +
+            '\n' +
+            JSON.stringify({
+              type: 'match',
+              data: {
+                path: { text: 'fileA.txt' },
+                line_number: 2,
+                lines: { text: 'second line with world\n' },
+              },
+            }) +
+            '\n' +
+            JSON.stringify({
+              type: 'match',
+              data: {
+                path: { text: 'sub/fileC.txt' },
+                line_number: 1,
+                lines: { text: 'another world in sub dir\n' },
+              },
+            }) +
+            '\n',
          exitCode: 0,
        }),
      );
@@ -385,7 +412,15 @@ describe('RipGrepTool', () => {
      // Setup specific mock for this test - searching in 'sub' should only return matches from that directory
      mockSpawn.mockImplementationOnce(
        createMockSpawn({
-          outputData: `fileC.txt:1:another world in sub dir${EOL}`,
+          outputData:
+            JSON.stringify({
+              type: 'match',
+              data: {
+                path: { text: 'fileC.txt' },
+                line_number: 1,
+                lines: { text: 'another world in sub dir\n' },
+              },
+            }) + '\n',
          exitCode: 0,
        }),
      );
@@ -405,7 +440,15 @@ describe('RipGrepTool', () => {
      // Setup specific mock for this test
      mockSpawn.mockImplementationOnce(
        createMockSpawn({
-          outputData: `fileB.js:2:function baz() { return "hello"; }${EOL}`,
+          outputData:
+            JSON.stringify({
+              type: 'match',
+              data: {
+                path: { text: 'fileB.js' },
+                line_number: 2,
+                lines: { text: 'function baz() { return "hello"; }\n' },
+              },
+            }) + '\n',
          exitCode: 0,
        }),
      );
@@ -455,7 +498,18 @@ describe('RipGrepTool', () => {

          if (onData) {
            // Only return match from the .js file in sub directory
-            onData(Buffer.from(`another.js:1:const greeting = "hello";${EOL}`));
+            onData(
+              Buffer.from(
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'another.js' },
+                    line_number: 1,
+                    lines: { text: 'const greeting = "hello";\n' },
+                  },
+                }) + '\n',
+              ),
+            );
          }
          if (onClose) {
            onClose(0);
@@ -540,7 +594,18 @@ describe('RipGrepTool', () => {

          if (onData) {
            // Return match for the regex pattern
-            onData(Buffer.from(`fileB.js:1:const foo = "bar";${EOL}`));
+            onData(
+              Buffer.from(
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'fileB.js' },
+                    line_number: 1,
+                    lines: { text: 'const foo = "bar";\n' },
+                  },
+                }) + '\n',
+              ),
+            );
          }
          if (onClose) {
            onClose(0);
@@ -589,7 +654,24 @@ describe('RipGrepTool', () => {
            // Return case-insensitive matches for 'HELLO'
            onData(
              Buffer.from(
-                `fileA.txt:1:hello world${EOL}fileB.js:2:function baz() { return "hello"; }${EOL}`,
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'fileA.txt' },
+                    line_number: 1,
+                    lines: { text: 'hello world\n' },
+                  },
+                }) +
+                  '\n' +
+                  JSON.stringify({
+                    type: 'match',
+                    data: {
+                      path: { text: 'fileB.js' },
+                      line_number: 2,
+                      lines: { text: 'function baz() { return "hello"; }\n' },
+                    },
+                  }) +
+                  '\n',
              ),
            );
          }
@@ -691,18 +773,54 @@ describe('RipGrepTool', () => {
          if (callCount === 1) {
            // First directory (tempRootDir)
            outputData =
-              [
-                'fileA.txt:1:hello world',
-                'fileA.txt:2:second line with world',
-                'sub/fileC.txt:1:another world in sub dir',
-              ].join(EOL) + EOL;
+              JSON.stringify({
+                type: 'match',
+                data: {
+                  path: { text: 'fileA.txt' },
+                  line_number: 1,
+                  lines: { text: 'hello world\n' },
+                },
+              }) +
+              '\n' +
+              JSON.stringify({
+                type: 'match',
+                data: {
+                  path: { text: 'fileA.txt' },
+                  line_number: 2,
+                  lines: { text: 'second line with world\n' },
+                },
+              }) +
+              '\n' +
+              JSON.stringify({
+                type: 'match',
+                data: {
+                  path: { text: 'sub/fileC.txt' },
+                  line_number: 1,
+                  lines: { text: 'another world in sub dir\n' },
+                },
+              }) +
+              '\n';
          } else if (callCount === 2) {
            // Second directory (secondDir)
            outputData =
-              [
-                'other.txt:2:world in second',
-                'another.js:1:function world() { return "test"; }',
-              ].join(EOL) + EOL;
+              JSON.stringify({
+                type: 'match',
+                data: {
+                  path: { text: 'other.txt' },
+                  line_number: 2,
+                  lines: { text: 'world in second\n' },
+                },
+              }) +
+              '\n' +
+              JSON.stringify({
+                type: 'match',
+                data: {
+                  path: { text: 'another.js' },
+                  line_number: 1,
+                  lines: { text: 'function world() { return "test"; }\n' },
+                },
+              }) +
+              '\n';
          }

          if (stdoutDataHandler && outputData) {
@@ -789,7 +907,18 @@ describe('RipGrepTool', () => {
          )?.[1];

          if (onData) {
-            onData(Buffer.from(`fileC.txt:1:another world in sub dir${EOL}`));
+            onData(
+              Buffer.from(
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'fileC.txt' },
+                    line_number: 1,
+                    lines: { text: 'another world in sub dir\n' },
+                  },
+                }) + '\n',
+              ),
+            );
          }
          if (onClose) {
            onClose(0);
@@ -1005,7 +1134,14 @@ describe('RipGrepTool', () => {
          if (onData) {
            onData(
              Buffer.from(
-                `${specialFileName}:1:hello world with special chars${EOL}`,
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: specialFileName },
+                    line_number: 1,
+                    lines: { text: 'hello world with special chars\n' },
+                  },
+                }) + '\n',
              ),
            );
          }
@@ -1060,7 +1196,14 @@ describe('RipGrepTool', () => {
          if (onData) {
            onData(
              Buffer.from(
-                `a/b/c/d/e/deep.txt:1:content in deep directory${EOL}`,
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'a/b/c/d/e/deep.txt' },
+                    line_number: 1,
+                    lines: { text: 'content in deep directory\n' },
+                  },
+                }) + '\n',
              ),
            );
          }
@@ -1115,7 +1258,14 @@ describe('RipGrepTool', () => {
          if (onData) {
            onData(
              Buffer.from(
-                `code.js:1:function getName() { return "test"; }${EOL}`,
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'code.js' },
+                    line_number: 1,
+                    lines: { text: 'function getName() { return "test"; }\n' },
+                  },
+                }) + '\n',
              ),
            );
          }
@@ -1168,7 +1318,33 @@ describe('RipGrepTool', () => {
          if (onData) {
            onData(
              Buffer.from(
-                `case.txt:1:Hello World${EOL}case.txt:2:hello world${EOL}case.txt:3:HELLO WORLD${EOL}`,
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'case.txt' },
+                    line_number: 1,
+                    lines: { text: 'Hello World\n' },
+                  },
+                }) +
+                  '\n' +
+                  JSON.stringify({
+                    type: 'match',
+                    data: {
+                      path: { text: 'case.txt' },
+                      line_number: 2,
+                      lines: { text: 'hello world\n' },
+                    },
+                  }) +
+                  '\n' +
+                  JSON.stringify({
+                    type: 'match',
+                    data: {
+                      path: { text: 'case.txt' },
+                      line_number: 3,
+                      lines: { text: 'HELLO WORLD\n' },
+                    },
+                  }) +
+                  '\n',
              ),
            );
          }
@@ -1220,7 +1396,18 @@ describe('RipGrepTool', () => {
          )?.[1];

          if (onData) {
-            onData(Buffer.from(`special.txt:1:Price: $19.99${EOL}`));
+            onData(
+              Buffer.from(
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'special.txt' },
+                    line_number: 1,
+                    lines: { text: 'Price: $19.99\n' },
+                  },
+                }) + '\n',
+              ),
+            );
          }
          if (onClose) {
            onClose(0);
@@ -1279,7 +1466,24 @@ describe('RipGrepTool', () => {
          if (onData) {
            onData(
              Buffer.from(
-                `test.ts:1:typescript content${EOL}test.tsx:1:tsx content${EOL}`,
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'test.ts' },
+                    line_number: 1,
+                    lines: { text: 'typescript content\n' },
+                  },
+                }) +
+                  '\n' +
+                  JSON.stringify({
+                    type: 'match',
+                    data: {
+                      path: { text: 'test.tsx' },
+                      line_number: 1,
+                      lines: { text: 'tsx content\n' },
+                    },
+                  }) +
+                  '\n',
              ),
            );
          }
@@ -1337,7 +1541,18 @@ describe('RipGrepTool', () => {
          )?.[1];

          if (onData) {
-            onData(Buffer.from(`src/main.ts:1:source code${EOL}`));
+            onData(
+              Buffer.from(
+                JSON.stringify({
+                  type: 'match',
+                  data: {
+                    path: { text: 'src/main.ts' },
+                    line_number: 1,
+                    lines: { text: 'source code\n' },
+                  },
+                }) + '\n',
+              ),
+            );
          }
          if (onClose) {
            onClose(0);
@@ -7,7 +7,6 @@
 import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import fs from 'node:fs';
 import path from 'node:path';
-import { EOL } from 'node:os';
 import { spawn } from 'node:child_process';
 import { downloadRipGrep } from '@joshua.litt/get-ripgrep';
 import type { ToolInvocation, ToolResult } from './tools.js';
@@ -276,39 +275,36 @@ class GrepToolInvocation extends BaseToolInvocation<
    }
  }

-  private parseRipgrepOutput(output: string, basePath: string): GrepMatch[] {
+  private parseRipgrepJsonOutput(
+    output: string,
+    basePath: string,
+  ): GrepMatch[] {
    const results: GrepMatch[] = [];
    if (!output) return results;

-    const lines = output.split(EOL);
+    const lines = output.trim().split('\n');

    for (const line of lines) {
      if (!line.trim()) continue;

-      const firstColonIndex = line.indexOf(':');
-      if (firstColonIndex === -1) continue;
+      try {
+        const json = JSON.parse(line);
+        if (json.type === 'match') {
+          const match = json.data;
+          // Defensive check: ensure text properties exist (skips binary/invalid encoding)
+          if (match.path?.text && match.lines?.text) {
+            const absoluteFilePath = path.resolve(basePath, match.path.text);
+            const relativeFilePath = path.relative(basePath, absoluteFilePath);

-      const secondColonIndex = line.indexOf(':', firstColonIndex + 1);
-      if (secondColonIndex === -1) continue;
-
-      const filePathRaw = line.substring(0, firstColonIndex);
-      const lineNumberStr = line.substring(
-        firstColonIndex + 1,
-        secondColonIndex,
-      );
-      const lineContent = line.substring(secondColonIndex + 1);
-
-      const lineNumber = parseInt(lineNumberStr, 10);
-
-      if (!isNaN(lineNumber)) {
-        const absoluteFilePath = path.resolve(basePath, filePathRaw);
-        const relativeFilePath = path.relative(basePath, absoluteFilePath);
-
-        results.push({
-          filePath: relativeFilePath || path.basename(absoluteFilePath),
-          lineNumber,
-          line: lineContent,
-        });
+            results.push({
+              filePath: relativeFilePath || path.basename(absoluteFilePath),
+              lineNumber: match.line_number,
+              line: match.lines.text.trimEnd(),
+            });
+          }
+        }
+      } catch (error) {
+        debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error);
      }
    }
    return results;
@@ -322,14 +318,7 @@ class GrepToolInvocation extends BaseToolInvocation<
  }): Promise<GrepMatch[]> {
    const { pattern, path: absolutePath, include } = options;

-    const rgArgs = [
-      '--line-number',
-      '--no-heading',
-      '--with-filename',
-      '--ignore-case',
-      '--regexp',
-      pattern,
-    ];
+    const rgArgs = ['--json', '--ignore-case', '--regexp', pattern];

    if (include) {
      rgArgs.push('--glob', include);
@@ -399,7 +388,7 @@ class GrepToolInvocation extends BaseToolInvocation<
        });
      });

-      return this.parseRipgrepOutput(output, absolutePath);
+      return this.parseRipgrepJsonOutput(output, absolutePath);
    } catch (error: unknown) {
      console.error(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`);
      throw error;