feat(filesearch): Introduce non-recursive file search strategy (#6087)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
Co-authored-by: Bryant Chandler <bryantchandler@chromium.org>
This commit is contained in:
Bryant Chandler
2025-08-18 13:43:24 -07:00
committed by GitHub
parent d66ddcd82e
commit 465ac9f547
8 changed files with 1033 additions and 430 deletions
@@ -4,17 +4,10 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import * as fs from 'fs/promises';
import * as path from 'path';
import * as cache from './crawlCache.js';
import { FileSearch, AbortError, filter } from './fileSearch.js';
import { describe, it, expect, afterEach, vi } from 'vitest';
import { FileSearchFactory, AbortError, filter } from './fileSearch.js';
import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils';
type FileSearchWithPrivateMethods = FileSearch & {
performCrawl: () => Promise<void>;
};
describe('FileSearch', () => {
let tmpDir: string;
afterEach(async () => {
@@ -31,13 +24,14 @@ describe('FileSearch', () => {
src: ['not-ignored.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: true,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -55,13 +49,14 @@ describe('FileSearch', () => {
src: ['not-ignored.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: true,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -81,13 +76,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: ['logs'],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -108,13 +104,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -139,13 +136,14 @@ describe('FileSearch', () => {
},
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -161,13 +159,14 @@ describe('FileSearch', () => {
'Foo.mk': '',
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -194,13 +193,14 @@ describe('FileSearch', () => {
},
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -222,13 +222,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -250,13 +251,14 @@ describe('FileSearch', () => {
src: ['file1.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: true,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
// Expect no errors to be thrown during initialization
@@ -275,13 +277,14 @@ describe('FileSearch', () => {
},
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -299,13 +302,14 @@ describe('FileSearch', () => {
},
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -319,13 +323,14 @@ describe('FileSearch', () => {
src: ['file1.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -346,170 +351,21 @@ describe('FileSearch', () => {
await expect(filterPromise).rejects.toThrow(AbortError);
});
describe('with in-memory cache', () => {
beforeEach(() => {
cache.clear();
it('should throw an error if search is called before initialization', async () => {
tmpDir = await createTmpDir({});
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
afterEach(() => {
vi.useRealTimers();
});
it('should throw an error if search is called before initialization', async () => {
tmpDir = await createTmpDir({});
const fileSearch = new FileSearch({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
});
await expect(fileSearch.search('')).rejects.toThrow(
'Engine not initialized. Call initialize() first.',
);
});
it('should hit the cache for subsequent searches', async () => {
tmpDir = await createTmpDir({ 'file1.js': '' });
const getOptions = () => ({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: true,
cacheTtl: 10,
});
const fs1 = new FileSearch(getOptions());
const crawlSpy1 = vi.spyOn(
fs1 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs1.initialize();
expect(crawlSpy1).toHaveBeenCalledTimes(1);
// Second search should hit the cache because the options are identical
const fs2 = new FileSearch(getOptions());
const crawlSpy2 = vi.spyOn(
fs2 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs2.initialize();
expect(crawlSpy2).not.toHaveBeenCalled();
});
it('should miss the cache when ignore rules change', async () => {
tmpDir = await createTmpDir({
'.gitignore': 'a.txt',
'a.txt': '',
'b.txt': '',
});
const options = {
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: true,
cacheTtl: 10000,
};
// Initial search to populate the cache
const fs1 = new FileSearch(options);
const crawlSpy1 = vi.spyOn(
fs1 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs1.initialize();
const results1 = await fs1.search('');
expect(crawlSpy1).toHaveBeenCalledTimes(1);
expect(results1).toEqual(['.gitignore', 'b.txt']);
// Modify the ignore file
await fs.writeFile(path.join(tmpDir, '.gitignore'), 'b.txt');
// Second search should miss the cache and trigger a recrawl
const fs2 = new FileSearch(options);
const crawlSpy2 = vi.spyOn(
fs2 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs2.initialize();
const results2 = await fs2.search('');
expect(crawlSpy2).toHaveBeenCalledTimes(1);
expect(results2).toEqual(['.gitignore', 'a.txt']);
});
it('should miss the cache after TTL expires', async () => {
vi.useFakeTimers();
tmpDir = await createTmpDir({ 'file1.js': '' });
const options = {
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: true,
cacheTtl: 10, // 10 seconds
};
// Initial search to populate the cache
const fs1 = new FileSearch(options);
await fs1.initialize();
// Advance time past the TTL
await vi.advanceTimersByTimeAsync(11000);
// Second search should miss the cache and trigger a recrawl
const fs2 = new FileSearch(options);
const crawlSpy = vi.spyOn(
fs2 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs2.initialize();
expect(crawlSpy).toHaveBeenCalledTimes(1);
});
it('should miss the cache when maxDepth changes', async () => {
tmpDir = await createTmpDir({ 'file1.js': '' });
const getOptions = (maxDepth?: number) => ({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: true,
cacheTtl: 10000,
maxDepth,
});
// 1. First search with maxDepth: 1, should trigger a crawl.
const fs1 = new FileSearch(getOptions(1));
const crawlSpy1 = vi.spyOn(
fs1 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs1.initialize();
expect(crawlSpy1).toHaveBeenCalledTimes(1);
// 2. Second search with maxDepth: 2, should be a cache miss and trigger a crawl.
const fs2 = new FileSearch(getOptions(2));
const crawlSpy2 = vi.spyOn(
fs2 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs2.initialize();
expect(crawlSpy2).toHaveBeenCalledTimes(1);
// 3. Third search with maxDepth: 1 again, should be a cache hit.
const fs3 = new FileSearch(getOptions(1));
const crawlSpy3 = vi.spyOn(
fs3 as FileSearchWithPrivateMethods,
'performCrawl',
);
await fs3.initialize();
expect(crawlSpy3).not.toHaveBeenCalled();
});
await expect(fileSearch.search('')).rejects.toThrow(
'Engine not initialized. Call initialize() first.',
);
});
it('should handle empty or commented-only ignore files', async () => {
@@ -518,13 +374,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -539,13 +396,14 @@ describe('FileSearch', () => {
src: ['main.js'],
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false, // Explicitly disable .gitignore to isolate this rule
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -561,13 +419,14 @@ describe('FileSearch', () => {
}
tmpDir = await createTmpDir(largeDir);
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -596,13 +455,14 @@ describe('FileSearch', () => {
},
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: true, // Enable caching for this test
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -634,13 +494,14 @@ describe('FileSearch', () => {
'other.txt': '',
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -676,13 +537,14 @@ describe('FileSearch', () => {
'file5.js': '',
});
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: true, // Ensure caching is enabled
cacheTtl: 10000,
enableRecursiveFileSearch: true,
});
await fileSearch.initialize();
@@ -704,108 +566,97 @@ describe('FileSearch', () => {
expect(limitedResults).toEqual(['file1.js', 'file2.js']);
});
describe('with maxDepth', () => {
beforeEach(async () => {
describe('DirectoryFileSearch', () => {
it('should search for files in the current directory', async () => {
tmpDir = await createTmpDir({
'file-root.txt': '',
level1: {
'file-level1.txt': '',
level2: {
'file-level2.txt': '',
level3: {
'file-level3.txt': '',
},
},
'file1.js': '',
'file2.ts': '',
'file3.js': '',
});
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
const results = await fileSearch.search('*.js');
expect(results).toEqual(['file1.js', 'file3.js']);
});
it('should search for files in a subdirectory', async () => {
tmpDir = await createTmpDir({
'file1.js': '',
src: {
'file2.js': '',
'file3.ts': '',
},
});
});
it('should only search top-level files when maxDepth is 0', async () => {
const fileSearch = new FileSearch({
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
maxDepth: 0,
enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
const results = await fileSearch.search('');
expect(results).toEqual(['level1/', 'file-root.txt']);
const results = await fileSearch.search('src/*.js');
expect(results).toEqual(['src/file2.js']);
});
it('should search one level deep when maxDepth is 1', async () => {
const fileSearch = new FileSearch({
it('should list all files in a directory', async () => {
tmpDir = await createTmpDir({
'file1.js': '',
src: {
'file2.js': '',
'file3.ts': '',
},
});
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
maxDepth: 1,
enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
const results = await fileSearch.search('');
expect(results).toEqual([
'level1/',
'level1/level2/',
'file-root.txt',
'level1/file-level1.txt',
]);
const results = await fileSearch.search('src/');
expect(results).toEqual(['src/file2.js', 'src/file3.ts']);
});
it('should search two levels deep when maxDepth is 2', async () => {
const fileSearch = new FileSearch({
it('should respect ignore rules', async () => {
tmpDir = await createTmpDir({
'.gitignore': '*.js',
'file1.js': '',
'file2.ts': '',
});
const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useGitignore: true,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
maxDepth: 2,
enableRecursiveFileSearch: false,
});
await fileSearch.initialize();
const results = await fileSearch.search('');
expect(results).toEqual([
'level1/',
'level1/level2/',
'level1/level2/level3/',
'file-root.txt',
'level1/file-level1.txt',
'level1/level2/file-level2.txt',
]);
});
it('should perform a full recursive search when maxDepth is undefined', async () => {
const fileSearch = new FileSearch({
projectRoot: tmpDir,
useGitignore: false,
useGeminiignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
maxDepth: undefined, // Explicitly undefined
});
await fileSearch.initialize();
const results = await fileSearch.search('');
expect(results).toEqual([
'level1/',
'level1/level2/',
'level1/level2/level3/',
'file-root.txt',
'level1/file-level1.txt',
'level1/level2/file-level2.txt',
'level1/level2/level3/file-level3.txt',
]);
const results = await fileSearch.search('*');
expect(results).toEqual(['.gitignore', 'file2.ts']);
});
});
});