feat(context): add configurable memoryBoundaryMarkers setting (#24020)

This commit is contained in:
Sandy Tao
2026-03-27 14:51:32 -07:00
committed by GitHub
parent 765fb67011
commit 4034c030e7
13 changed files with 265 additions and 55 deletions
@@ -1269,6 +1269,96 @@ included directory memory
expect(result.files[0].path).toBe(subDirMemory);
expect(result.files[0].content).toBe('Content without git');
});
it('should stop at a custom boundary marker instead of .git', async () => {
const rootDir = await createEmptyDir(
path.join(testRootDir, 'custom_marker'),
);
// Use a custom marker file instead of .git
await createTestFile(path.join(rootDir, '.monorepo-root'), '');
const subDir = await createEmptyDir(path.join(rootDir, 'packages/app'));
const targetFile = path.join(subDir, 'file.ts');
const rootMemory = await createTestFile(
path.join(rootDir, DEFAULT_CONTEXT_FILENAME),
'Root rules',
);
const subDirMemory = await createTestFile(
path.join(subDir, DEFAULT_CONTEXT_FILENAME),
'App rules',
);
const result = await loadJitSubdirectoryMemory(
targetFile,
[rootDir],
new Set(),
undefined,
['.monorepo-root'],
);
expect(result.files).toHaveLength(2);
expect(result.files.find((f) => f.path === rootMemory)).toBeDefined();
expect(result.files.find((f) => f.path === subDirMemory)).toBeDefined();
});
it('should support multiple boundary markers', async () => {
const rootDir = await createEmptyDir(
path.join(testRootDir, 'multi_marker'),
);
// Use a non-.git marker
await createTestFile(path.join(rootDir, 'package.json'), '{}');
const subDir = await createEmptyDir(path.join(rootDir, 'src'));
const targetFile = path.join(subDir, 'index.ts');
const rootMemory = await createTestFile(
path.join(rootDir, DEFAULT_CONTEXT_FILENAME),
'Root content',
);
const result = await loadJitSubdirectoryMemory(
targetFile,
[rootDir],
new Set(),
undefined,
['.git', 'package.json'],
);
// Should find the root because package.json is a marker
expect(result.files).toHaveLength(1);
expect(result.files[0].path).toBe(rootMemory);
});
it('should disable parent traversal when boundary markers array is empty', async () => {
const rootDir = await createEmptyDir(
path.join(testRootDir, 'empty_markers'),
);
await createEmptyDir(path.join(rootDir, '.git'));
const subDir = await createEmptyDir(path.join(rootDir, 'subdir'));
const targetFile = path.join(subDir, 'target.txt');
await createTestFile(
path.join(rootDir, DEFAULT_CONTEXT_FILENAME),
'Root content',
);
const subDirMemory = await createTestFile(
path.join(subDir, DEFAULT_CONTEXT_FILENAME),
'Subdir content',
);
const result = await loadJitSubdirectoryMemory(
targetFile,
[rootDir],
new Set(),
undefined,
[],
);
// With empty markers, no project root is found so the trusted root
// is used as the ceiling. Traversal still finds files between the
// target path and the trusted root.
expect(result.files).toHaveLength(2);
expect(result.files.find((f) => f.path === subDirMemory)).toBeDefined();
});
});
it('refreshServerHierarchicalMemory should refresh memory and update config', async () => {
@@ -1341,6 +1431,7 @@ included directory memory
getImportFormat: vi.fn().mockReturnValue('tree'),
getFileFilteringOptions: vi.fn().mockReturnValue(undefined),
getDiscoveryMaxDirs: vi.fn().mockReturnValue(200),
getMemoryBoundaryMarkers: vi.fn().mockReturnValue(['.git']),
setUserMemory: vi.fn(),
setGeminiMdFileCount: vi.fn(),
setGeminiMdFilePaths: vi.fn(),
+64 -37
View File
@@ -146,41 +146,54 @@ export async function deduplicatePathsByFileIdentity(
};
}
async function findProjectRoot(startDir: string): Promise<string | null> {
async function findProjectRoot(
startDir: string,
boundaryMarkers: readonly string[] = ['.git'],
): Promise<string | null> {
if (boundaryMarkers.length === 0) {
return null;
}
let currentDir = normalizePath(startDir);
while (true) {
const gitPath = path.join(currentDir, '.git');
try {
// Check for existence only — .git can be a directory (normal repos)
// or a file (submodules / worktrees).
await fs.access(gitPath);
return currentDir;
} catch (error: unknown) {
// Don't log ENOENT errors as they're expected when .git doesn't exist
// Also don't log errors in test environments, which often have mocked fs
const isENOENT =
typeof error === 'object' &&
error !== null &&
'code' in error &&
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
(error as { code: string }).code === 'ENOENT';
// Only log unexpected errors in non-test environments
// process.env['NODE_ENV'] === 'test' or VITEST are common test indicators
const isTestEnv =
process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
if (!isENOENT && !isTestEnv) {
if (typeof error === 'object' && error !== null && 'code' in error) {
for (const marker of boundaryMarkers) {
// Sanitize: skip markers with path traversal or absolute paths
if (path.isAbsolute(marker) || marker.includes('..')) {
continue;
}
const markerPath = path.join(currentDir, marker);
try {
// Check for existence only — marker can be a directory (normal repos)
// or a file (submodules / worktrees).
await fs.access(markerPath);
return currentDir;
} catch (error: unknown) {
// Don't log ENOENT errors as they're expected when marker doesn't exist
// Also don't log errors in test environments, which often have mocked fs
const isENOENT =
typeof error === 'object' &&
error !== null &&
'code' in error &&
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const fsError = error as { code: string; message: string };
logger.warn(
`Error checking for .git at ${gitPath}: ${fsError.message}`,
);
} else {
logger.warn(
`Non-standard error checking for .git at ${gitPath}: ${String(error)}`,
);
(error as { code: string }).code === 'ENOENT';
// Only log unexpected errors in non-test environments
// process.env['NODE_ENV'] === 'test' or VITEST are common test indicators
const isTestEnv =
process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
if (!isENOENT && !isTestEnv) {
if (typeof error === 'object' && error !== null && 'code' in error) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const fsError = error as { code: string; message: string };
logger.warn(
`Error checking for ${marker} at ${markerPath}: ${fsError.message}`,
);
} else {
logger.warn(
`Non-standard error checking for ${marker} at ${markerPath}: ${String(error)}`,
);
}
}
}
}
@@ -200,6 +213,7 @@ async function getGeminiMdFilePathsInternal(
folderTrust: boolean,
fileFilteringOptions: FileFilteringOptions,
maxDirs: number,
boundaryMarkers: readonly string[] = ['.git'],
): Promise<{ global: string[]; project: string[] }> {
const dirs = new Set<string>([
...includeDirectoriesToReadGemini,
@@ -222,6 +236,7 @@ async function getGeminiMdFilePathsInternal(
folderTrust,
fileFilteringOptions,
maxDirs,
boundaryMarkers,
),
);
@@ -253,6 +268,7 @@ async function getGeminiMdFilePathsInternalForEachDir(
folderTrust: boolean,
fileFilteringOptions: FileFilteringOptions,
maxDirs: number,
boundaryMarkers: readonly string[] = ['.git'],
): Promise<{ global: string[]; project: string[] }> {
const globalPaths = new Set<string>();
const projectPaths = new Set<string>();
@@ -289,7 +305,7 @@ async function getGeminiMdFilePathsInternalForEachDir(
resolvedCwd,
);
const projectRoot = await findProjectRoot(resolvedCwd);
const projectRoot = await findProjectRoot(resolvedCwd, boundaryMarkers);
debugLogger.debug(
'[DEBUG] [MemoryDiscovery] Determined project root:',
projectRoot ?? 'None',
@@ -356,6 +372,7 @@ async function getGeminiMdFilePathsInternalForEachDir(
export async function readGeminiMdFiles(
filePaths: string[],
importFormat: 'flat' | 'tree' = 'tree',
boundaryMarkers: readonly string[] = ['.git'],
): Promise<GeminiFileContent[]> {
// Process files in parallel with concurrency limit to prevent EMFILE errors
const CONCURRENT_LIMIT = 20; // Higher limit for file reads as they're typically faster
@@ -376,6 +393,7 @@ export async function readGeminiMdFiles(
undefined,
undefined,
importFormat,
boundaryMarkers,
);
debugLogger.debug(
'[DEBUG] [MemoryDiscovery] Successfully read and processed imports:',
@@ -481,13 +499,14 @@ export function getExtensionMemoryPaths(
export async function getEnvironmentMemoryPaths(
trustedRoots: string[],
boundaryMarkers: readonly string[] = ['.git'],
): Promise<string[]> {
const allPaths = new Set<string>();
// Trusted Roots Upward Traversal (Parallelized)
const traversalPromises = trustedRoots.map(async (root) => {
const resolvedRoot = normalizePath(root);
const gitRoot = await findProjectRoot(resolvedRoot);
const gitRoot = await findProjectRoot(resolvedRoot, boundaryMarkers);
const ceiling = gitRoot ? normalizePath(gitRoot) : resolvedRoot;
debugLogger.debug(
'[DEBUG] [MemoryDiscovery] Loading environment memory for trusted root:',
@@ -597,6 +616,7 @@ export async function loadServerHierarchicalMemory(
importFormat: 'flat' | 'tree' = 'tree',
fileFilteringOptions?: FileFilteringOptions,
maxDirs: number = 200,
boundaryMarkers: readonly string[] = ['.git'],
): Promise<LoadServerHierarchicalMemoryResponse> {
// FIX: Use real, canonical paths for a reliable comparison to handle symlinks.
const realCwd = normalizePath(
@@ -629,6 +649,7 @@ export async function loadServerHierarchicalMemory(
folderTrust,
fileFilteringOptions || DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
maxDirs,
boundaryMarkers,
),
Promise.resolve(getExtensionMemoryPaths(extensionLoader)),
]);
@@ -669,7 +690,11 @@ export async function loadServerHierarchicalMemory(
}
// 2. GATHER: Read all files in parallel
const allContents = await readGeminiMdFiles(allFilePaths, importFormat);
const allContents = await readGeminiMdFiles(
allFilePaths,
importFormat,
boundaryMarkers,
);
const contentsMap = new Map(allContents.map((c) => [c.filePath, c]));
// 3. CATEGORIZE: Back into Global, Project, Extension
@@ -707,6 +732,7 @@ export async function refreshServerHierarchicalMemory(config: Config) {
config.getImportFormat(),
config.getFileFilteringOptions(),
config.getDiscoveryMaxDirs(),
config.getMemoryBoundaryMarkers(),
);
const mcpInstructions =
config.getMcpClientManager()?.getMcpInstructions() || '';
@@ -728,6 +754,7 @@ export async function loadJitSubdirectoryMemory(
trustedRoots: string[],
alreadyLoadedPaths: Set<string>,
alreadyLoadedIdentities?: Set<string>,
boundaryMarkers: readonly string[] = ['.git'],
): Promise<MemoryLoadResult> {
const resolvedTarget = normalizePath(targetPath);
let bestRoot: string | null = null;
@@ -760,7 +787,7 @@ export async function loadJitSubdirectoryMemory(
// Find the git root to use as the traversal ceiling.
// If no git root exists, fall back to the trusted root as the ceiling.
const gitRoot = await findProjectRoot(bestRoot);
const gitRoot = await findProjectRoot(bestRoot, boundaryMarkers);
const resolvedCeiling = gitRoot ? normalizePath(gitRoot) : bestRoot;
debugLogger.debug(
@@ -850,7 +877,7 @@ export async function loadJitSubdirectoryMemory(
JSON.stringify(newPaths),
);
const contents = await readGeminiMdFiles(newPaths, 'tree');
const contents = await readGeminiMdFiles(newPaths, 'tree', boundaryMarkers);
return {
files: contents
@@ -48,18 +48,31 @@ export interface ProcessImportsResult {
importTree: MemoryFile;
}
// Helper to find the project root (looks for .git directory or file for worktrees)
async function findProjectRoot(startDir: string): Promise<string> {
// Helper to find the project root (looks for boundary marker directories/files)
async function findProjectRoot(
startDir: string,
boundaryMarkers: readonly string[] = ['.git'],
): Promise<string> {
if (boundaryMarkers.length === 0) {
return path.resolve(startDir);
}
let currentDir = path.resolve(startDir);
while (true) {
const gitPath = path.join(currentDir, '.git');
try {
// Check for existence only — .git can be a directory (normal repos)
// or a file (submodules / worktrees).
await fs.access(gitPath);
return currentDir;
} catch {
// .git not found, continue to parent
for (const marker of boundaryMarkers) {
// Sanitize: skip markers with path traversal or absolute paths
if (path.isAbsolute(marker) || marker.includes('..')) {
continue;
}
const markerPath = path.join(currentDir, marker);
try {
// Check for existence only — marker can be a directory (normal repos)
// or a file (submodules / worktrees).
await fs.access(markerPath);
return currentDir;
} catch {
// marker not found, continue
}
}
const parentDir = path.dirname(currentDir);
if (parentDir === currentDir) {
@@ -68,7 +81,7 @@ async function findProjectRoot(startDir: string): Promise<string> {
}
currentDir = parentDir;
}
// Fallback to startDir if .git not found
// Fallback to startDir if no marker found
return path.resolve(startDir);
}
@@ -185,9 +198,10 @@ export async function processImports(
},
projectRoot?: string,
importFormat: 'flat' | 'tree' = 'tree',
boundaryMarkers: readonly string[] = ['.git'],
): Promise<ProcessImportsResult> {
if (!projectRoot) {
projectRoot = await findProjectRoot(basePath);
projectRoot = await findProjectRoot(basePath, boundaryMarkers);
}
if (importState.currentDepth >= importState.maxDepth) {
@@ -346,6 +360,7 @@ export async function processImports(
newImportState,
projectRoot,
importFormat,
boundaryMarkers,
);
result += `<!-- Imported from: ${importPath} -->\n${imported.content}\n<!-- End of import from: ${importPath} -->`;
imports.push(imported.importTree);