feat(core): stabilize agent harness, fix result extraction and turn limits

This commit is contained in:
mkorwel
2026-02-19 10:34:49 -06:00
parent 456f8be568
commit 102881c27f
6 changed files with 113 additions and 1257 deletions
-1131
View File
File diff suppressed because it is too large Load Diff
+5 -1
View File
@@ -801,7 +801,11 @@ export async function loadCliConfig(
argv.experimentalEnableAgents ?? settings.experimental?.enableAgents,
enableAgentHarness:
argv.experimentalAgentHarness ??
settings.experimental?.enableAgentHarness,
(process.env['GEMINI_ENABLE_AGENT_HARNESS'] === 'true'
? true
: process.env['GEMINI_ENABLE_AGENT_HARNESS'] === 'false'
? false
: settings.experimental?.enableAgentHarness),
plan: settings.experimental?.plan,
enableEventDrivenScheduler: true,
+34 -24
View File
@@ -95,14 +95,14 @@ export class HarnessSubagentInvocation extends BaseToolInvocation<
updateOutput(`🤖💭 ${lastThought}\n`);
// Also publish to message bus so UI hooks can see it regardless of where they listen
this.messageBus.publish({
void this.messageBus.publish({
type: 'subagent-activity',
activity: {
agentName: this.definition.name,
type: 'THOUGHT',
data: { subject: lastThought },
},
} as any);
} as never);
} else if (
event.type === GeminiEventType.SubagentActivity &&
'value' in event
@@ -113,10 +113,10 @@ export class HarnessSubagentInvocation extends BaseToolInvocation<
}
// Forward the core activity to the global bus
this.messageBus.publish({
void this.messageBus.publish({
type: 'subagent-activity',
activity: event.value,
} as any);
} as never);
}
}
}
@@ -126,7 +126,7 @@ export class HarnessSubagentInvocation extends BaseToolInvocation<
}
// 1. Initialize result with the explicit submitted output if available
let finalResultRaw: any = turn.submittedOutput;
let finalResultRaw: unknown = turn.submittedOutput;
let finalResultString: string | undefined;
// 2. Fallback: If no explicit output, try textual response
@@ -140,6 +140,8 @@ export class HarnessSubagentInvocation extends BaseToolInvocation<
}
}
const outputName = this.definition.outputConfig?.outputName || 'result';
// 3. Fallback: If still no result, extract from 'complete_task' tool call arguments (Directly from the turn)
if (finalResultRaw === undefined) {
const completeCall = turn.pendingToolCalls?.find(
@@ -150,8 +152,6 @@ export class HarnessSubagentInvocation extends BaseToolInvocation<
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Found 'complete_task' call in pending tool calls.`,
);
const outputName =
this.definition.outputConfig?.outputName || 'result';
finalResultRaw =
completeCall.args[outputName] || completeCall.args['result'];
@@ -210,11 +210,13 @@ export class HarnessSubagentInvocation extends BaseToolInvocation<
'functionCall' in callPart &&
callPart.functionCall
) {
const outputName =
this.definition.outputConfig?.outputName || 'result';
finalResultRaw =
(callPart.functionCall.args as any)?.[outputName] ||
(callPart.functionCall.args as any)?.['result'];
(callPart.functionCall.args as Record<string, unknown>)?.[
outputName
] ||
(callPart.functionCall.args as Record<string, unknown>)?.[
'result'
];
if (finalResultRaw !== undefined) {
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Extracted result from history function call.`,
@@ -225,9 +227,10 @@ export class HarnessSubagentInvocation extends BaseToolInvocation<
}
}
finalResultString = typeof finalResultRaw === 'object'
? JSON.stringify(finalResultRaw, null, 2)
: String(finalResultRaw ?? 'Task completed.');
finalResultString =
typeof finalResultRaw === 'object'
? JSON.stringify(finalResultRaw, null, 2)
: String(finalResultRaw ?? 'Task completed.');
const displayContent = `
Subagent ${this.definition.name} Finished (Harness Mode)
@@ -240,20 +243,27 @@ ${finalResultString}
updateOutput(displayContent);
}
const outputName = this.definition.outputConfig?.outputName || 'result';
// Parse as JSON if it's a string that looks like an object, to satisfy schema requirements
let finalResultData = finalResultRaw ?? 'Task completed.';
if (typeof finalResultData === 'string' && finalResultData.trim().startsWith('{')) {
try {
finalResultData = JSON.parse(finalResultData);
debugLogger.debug(`[AgentHarness] [Invocation:${this.definition.name}] Parsed string result into JSON object.`);
} catch (e) {
// Not valid JSON, keep as string
}
if (
typeof finalResultData === 'string' &&
finalResultData.trim().startsWith('{')
) {
try {
finalResultData = JSON.parse(finalResultData);
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Parsed string result into JSON object.`,
);
} catch (_e) {
// Not valid JSON, keep as string
}
}
const outputName = this.definition.outputConfig?.outputName || 'result';
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Returning data to parent: ${JSON.stringify(
finalResultData,
).slice(0, 500)}...`,
);
return {
llmContent: [{ text: finalResultString }],
+61 -101
View File
@@ -170,8 +170,9 @@ export class AgentHarness {
try {
while (this.turnCounter < maxTurnsLimit) {
const promptId = `${this.behavior.agentId}#${this.turnCounter}`;
const historySize = this.chat?.getHistory().length || 0;
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Starting turn ${this.turnCounter} (promptId: ${promptId})`,
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Starting turn ${this.turnCounter} (promptId: ${promptId}). History size: ${historySize} messages.`,
);
if (combinedSignal.aborted) {
@@ -265,9 +266,9 @@ export class AgentHarness {
// Subagent activity reporting
if (this.behavior.name !== 'main') {
const behaviorWithDef = this.behavior as SubagentBehavior;
const displayName =
(this.behavior as any).definition?.displayName ||
this.behavior.name;
behaviorWithDef.definition.displayName || this.behavior.name;
if (event.type === GeminiEventType.Thought) {
yield {
@@ -387,108 +388,67 @@ export class AgentHarness {
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] isGoalReached check: ${goalReached}`,
);
if (goalReached) {
terminateReason = AgentTerminateMode.GOAL;
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Goal reached. Processing findings for ${toolResults.length} tool results.`,
);
// Extract results from the 'complete_task' tool call arguments
for (const r of toolResults) {
const completeCall = turn.pendingToolCalls.find(
(c) => c.name === TASK_COMPLETE_TOOL_NAME,
);
let findingsText: string | undefined;
if (r.name === TASK_COMPLETE_TOOL_NAME && completeCall) {
const outputName =
(this.behavior as SubagentBehavior).definition?.outputConfig
?.outputName || 'result';
const rawFindings =
completeCall.args[outputName] || completeCall.args['result'];
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Extracting from complete_task args (${outputName}). Found: ${!!rawFindings}`,
);
if (rawFindings !== undefined) {
// CAPTURE RAW DATA: Don't stringify if it's an object/array,
// we need to preserve structure for the parent model.
turn.submittedOutput = rawFindings as any;
findingsText =
typeof rawFindings === 'object'
? JSON.stringify(rawFindings, null, 2)
: String(rawFindings);
}
} else {
const findings =
(r.result?.data as any)?.result || r.result?.resultDisplay;
if (findings !== undefined) {
findingsText = String(findings);
// Also capture as raw if not already set
if (turn.submittedOutput === undefined) {
turn.submittedOutput = findings;
}
}
}
if (findingsText) {
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Captured findings text. Length: ${findingsText.length}`,
);
if (this.chat) {
// Ensure the chat session records the final text result so future turns or getResponseText() can see it
this.chat.addHistory({
role: 'model',
parts: [{ text: findingsText }],
});
}
}
}
return turn;
if (goalReached) {
terminateReason = AgentTerminateMode.GOAL;
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Goal reached. Processing findings for ${toolResults.length} tool results.`,
);
// Extract results from the 'complete_task' tool call arguments
for (const r of toolResults) {
const completeCall = turn.pendingToolCalls.find(
(c) => c.name === TASK_COMPLETE_TOOL_NAME,
);
let findingsText: string | undefined;
if (r.name === TASK_COMPLETE_TOOL_NAME && completeCall) {
const behaviorWithDef = this.behavior as SubagentBehavior;
const outputName =
behaviorWithDef.definition.outputConfig.outputName ||
'result';
const args = completeCall.args as Record<string, unknown>;
const rawFindings = args[outputName] || args['result'];
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Extracting from complete_task args (${outputName}). Found: ${!!rawFindings}`,
);
if (rawFindings !== undefined) {
// CAPTURE RAW DATA: Don't stringify if it's an object/array,
// we need to preserve structure for the parent model.
turn.submittedOutput = rawFindings as string;
findingsText =
typeof rawFindings === 'object'
? JSON.stringify(rawFindings, null, 2)
: String(rawFindings);
}
currentRequest = toolResults.map((r) => {
// For subagents, we want to return the raw result to the LLM, not the human-friendly display.
const tool = this.toolRegistry.getTool(r.name);
if (tool instanceof SubagentTool) {
const outputName =
(tool as any).definition?.outputConfig?.outputName || 'result';
const findings = (r.result?.data as any)?.[outputName] || (r.result?.data as any)?.['result'];
debugLogger.debug(`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Subagent tool ${r.name} findings type: ${typeof findings}. Using outputName: ${outputName}`);
if (findings !== undefined && 'functionResponse' in r.part && r.part.functionResponse) {
const responsePayload = { [outputName]: findings };
debugLogger.debug(`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Sending tool response keys: ${Object.keys(responsePayload).join(', ')}`);
return {
functionResponse: {
...r.part.functionResponse,
response: responsePayload,
},
};
} else {
const findings =
(r.result?.data as Record<string, unknown> | undefined)?.[
'result'
] || r.result?.resultDisplay;
if (findings !== undefined) {
findingsText = String(findings);
// Also capture as raw if not already set
if (turn.submittedOutput === undefined) {
turn.submittedOutput = findings as string;
}
}
}
if (findingsText) {
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Captured findings text. Length: ${findingsText.length}`,
);
}
}
// Fallback for other tools: Ensure the LLM "sees" the rich result display if it's available.
if (
r.result?.resultDisplay &&
'functionResponse' in r.part &&
r.part.functionResponse
) {
return {
functionResponse: {
...r.part.functionResponse,
response: { result: String(r.result.resultDisplay) },
},
};
}
return r.part;
});
return turn;
}
currentRequest = toolResults.map((r) => r.part);
this.turnCounter++;
if (this.turnCounter >= maxTurnsLimit) {
terminateReason = AgentTerminateMode.MAX_TURNS;
@@ -235,6 +235,9 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
onWaitingForConfirmation?: (waiting: boolean) => void,
): Promise<AgentTurnResult> {
const promptId = `${this.agentId}#${turnCounter}`;
debugLogger.debug(
`[LegacySubagent] [${this.definition.name}:${this.agentId}] Starting turn ${turnCounter} (promptId: ${promptId})`,
);
await this.tryCompressChat(chat, promptId);
@@ -242,6 +245,14 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
this.callModel(chat, currentMessage, combinedSignal, promptId),
);
if (functionCalls.length > 0) {
debugLogger.debug(
`[LegacySubagent] [${this.definition.name}:${this.agentId}] Model made ${
functionCalls.length
} function calls: ${functionCalls.map((fc) => fc.name).join(', ')}`,
);
}
if (combinedSignal.aborted) {
const terminateReason = timeoutSignal.aborted
? AgentTerminateMode.TIMEOUT
+2
View File
@@ -560,6 +560,7 @@ export class GeminiClient {
let turn = new Turn(this.getChat(), prompt_id);
this.sessionTurnCount++;
debugLogger.debug(`[LegacyLoop] processTurn started. sessionTurnCount: ${this.sessionTurnCount}, prompt_id: ${prompt_id}`);
if (
this.config.getMaxSessionTurns() > 0 &&
this.sessionTurnCount > this.config.getMaxSessionTurns()
@@ -792,6 +793,7 @@ export class GeminiClient {
isInvalidStreamRetry: boolean = false,
displayContent?: PartListUnion,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
debugLogger.debug(`[LegacyLoop] sendMessageStream started. prompt_id: ${prompt_id}, turns left: ${turns}`);
if (!isInvalidStreamRetry) {
this.config.resetTurn();
}