fix(core): Prevent loop detection false positives on lists with long shared prefixes (#18975)

This commit is contained in:
Sandy Tao
2026-02-13 10:58:46 -08:00
committed by GitHub
parent c0e7da42b2
commit 9c285eaf15
2 changed files with 43 additions and 1 deletions

View File

@@ -210,6 +210,25 @@ describe('LoopDetectionService', () => {
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
});
it('should not detect a loop for a list with a long shared prefix', () => {
service.reset('');
let isLoop = false;
const longPrefix =
'projects/my-google-cloud-project-12345/locations/us-central1/services/';
let listContent = '';
for (let i = 0; i < 15; i++) {
listContent += `- ${longPrefix}${i}\n`;
}
// Simulate receiving the list in a single large chunk or a few chunks
// This is the specific case where the issue occurs, as list boundaries might not reset tracking properly
isLoop = service.addAndCheck(createContentEvent(listContent));
expect(isLoop).toBe(false);
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
});
it('should not detect a loop if repetitions are very far apart', () => {
service.reset('');
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);

View File

@@ -379,7 +379,30 @@ export class LoopDetectionService {
const averageDistance = totalDistance / (CONTENT_LOOP_THRESHOLD - 1);
const maxAllowedDistance = CONTENT_CHUNK_SIZE * 5;
return averageDistance <= maxAllowedDistance;
if (averageDistance > maxAllowedDistance) {
return false;
}
// Verify that the sequence is actually repeating, not just sharing a common prefix.
// For a true loop, the text between occurrences of the chunk (the period) should be highly repetitive.
const periods = new Set<string>();
for (let i = 0; i < recentIndices.length - 1; i++) {
periods.add(
this.streamContentHistory.substring(
recentIndices[i],
recentIndices[i + 1],
),
);
}
// If the periods are mostly unique, it's a list of distinct items with a shared prefix.
// A true loop will have a small number of unique periods (usually 1, sometimes 2 or 3).
// We use Math.floor(CONTENT_LOOP_THRESHOLD / 2) as a safe threshold.
if (periods.size > Math.floor(CONTENT_LOOP_THRESHOLD / 2)) {
return false;
}
return true;
}
/**