|
|
|
@@ -0,0 +1,113 @@
|
|
|
|
|
{"id":"read_file-01","metadata":{"tags":["tool:read_file","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z","platform":"darwin"},"input":{"user_query":"What are the contents of package.json?"},"expected":{"tool_calls":[{"name":"read_file","arguments":{"file_path":"package.json"}}],"rationale":"Directly use read_file for reading file contents instead of shell 'cat'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cat package.json"}}],"reason":"Generic shell command 'cat' is used instead of the specialized read_file tool.","severity":"high"},{"tool_calls":[{"name":"read_file","arguments":{"file_path":"package.json"}}],"output_text":"Certainly! I can help you read that file. Here are the contents of package.json:","reason":"Correct tool but excessive conversational filler.","severity":"low"}]}
|
|
|
|
|
{"id":"read_file-02","metadata":{"tags":["tool:read_file","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z","platform":"win32"},"input":{"user_query":"Show me the content of the README.md file"},"expected":{"tool_calls":[{"name":"read_file","arguments":{"file_path":"README.md"}}],"rationale":"Directly use read_file for reading file contents instead of shell 'type'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"type README.md"}}],"reason":"Generic shell command 'type' is used on Windows instead of the specialized read_file tool.","severity":"high"}]}
|
|
|
|
|
{"id":"read_file-03","metadata":{"tags":["tool:read_file","range-efficiency"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Read the first 50 lines of src/index.ts"},"expected":{"tool_calls":[{"name":"read_file","arguments":{"file_path":"src/index.ts","start_line":1,"end_line":50}}],"rationale":"Use line range parameters to read only the requested portion of a file."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"head -n 50 src/index.ts"}}],"reason":"Used shell 'head' instead of read_file range parameters.","severity":"high"}]}
|
|
|
|
|
{"id":"read_file-04","metadata":{"tags":["tool:read_file","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Get the contents of the .env file"},"expected":{"tool_calls":[{"name":"read_file","arguments":{"file_path":".env"}}],"rationale":"Directly use read_file for reading file contents."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cat .env"}}],"reason":"Used shell 'cat' for a file read.","severity":"high"}]}
|
|
|
|
|
{"id":"read_file-05","metadata":{"tags":["tool:read_file","precision"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Check the version in the VERSION file"},"expected":{"tool_calls":[{"name":"read_file","arguments":{"file_path":"VERSION"}}],"rationale":"Use read_file for quick file inspection."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cat VERSION"}}],"reason":"Used shell 'cat' instead of read_file.","severity":"high"}]}
|
|
|
|
|
{"id":"list_directory-01","metadata":{"tags":["tool:list_directory","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z","platform":"darwin"},"input":{"user_query":"List the files in the src directory"},"expected":{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"src"}}],"rationale":"Use list_directory for viewing folder contents instead of shell 'ls'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"ls src"}}],"reason":"Used shell 'ls' instead of list_directory.","severity":"high"}]}
|
|
|
|
|
{"id":"list_directory-02","metadata":{"tags":["tool:list_directory","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z","platform":"win32"},"input":{"user_query":"Show me what's in the docs folder"},"expected":{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"docs"}}],"rationale":"Use list_directory for viewing folder contents instead of shell 'dir'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"dir docs"}}],"reason":"Used shell 'dir' on Windows instead of list_directory.","severity":"high"}]}
|
|
|
|
|
{"id":"list_directory-03","metadata":{"tags":["tool:list_directory","recursive-failure"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What files are in the project root?"},"expected":{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"."}}],"rationale":"Use list_directory for top-level directory listing."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"ls -F"}}],"reason":"Used shell 'ls' instead of list_directory.","severity":"high"}]}
|
|
|
|
|
{"id":"list_directory-04","metadata":{"tags":["tool:list_directory","filter-efficiency"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"List files in 'tests' but ignore '.tmp' files"},"expected":{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"tests","ignore":["*.tmp"]}}],"rationale":"Use the 'ignore' parameter of list_directory to filter results efficiently."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"ls tests | grep -v '.tmp'"}}],"reason":"Used a shell pipeline (ls | grep) instead of built-in list_directory filtering.","severity":"high"}]}
|
|
|
|
|
{"id":"list_directory-05","metadata":{"tags":["tool:list_directory","precision"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What is in the current folder?"},"expected":{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"."}}],"rationale":"Use list_directory for exploring the current working directory."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"ls"}}],"reason":"Used shell 'ls' instead of list_directory.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-01","metadata":{"tags":["tool:write_file","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Create a file named hello.txt with the content 'Hello World'"},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"hello.txt","content":"Hello World"}}],"rationale":"Use write_file to create new files instead of shell 'echo'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"echo 'Hello World' > hello.txt"}}],"reason":"Used shell redirection 'echo >' to create a file.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-02","metadata":{"tags":["tool:write_file","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Save this config to config.json: { \"active\": true }"},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"config.json","content":"{ \"active\": true }"}}],"rationale":"Use write_file for creating structured data files."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"echo '{ \"active\": true }' > config.json"}}],"reason":"Used shell 'echo' for file creation.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-03","metadata":{"tags":["tool:write_file","precision"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Create a new empty file called .gitignore"},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":".gitignore","content":""}}],"rationale":"Use write_file with empty content to create empty files."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"touch .gitignore"}}],"reason":"Used shell 'touch' instead of write_file.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-04","metadata":{"tags":["tool:write_file","boilerplate"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Initialize a new README.md with some basic info"},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"README.md","content":"# Project\n\nThis is a new project."}}],"rationale":"Use write_file for initial file creation."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cat <<EOF > README.md\n# Project\n\nThis is a new project.\nEOF"}}],"reason":"Used shell heredoc instead of write_file.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-05","metadata":{"tags":["tool:write_file","overwrite"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Overwrite the current LICENSE file with the Apache-2.0 header"},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"LICENSE","content":"Apache-2.0 Header..."}}],"rationale":"Use write_file to overwrite existing file content entirely."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cp apache_header.txt LICENSE"}}],"reason":"Used shell 'cp' to overwrite a file instead of write_file.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-06","metadata":{"tags":["tool:write_file","large-content"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Create a new utility file in src/utils/logger.ts with a full logger implementation"},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"src/utils/logger.ts","content":"export class Logger { ... }"}}],"rationale":"Use write_file for creating new source files with complete implementations."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"mkdir -p src/utils && echo 'export class Logger { ... }' > src/utils/logger.ts"}}],"reason":"Used shell 'mkdir' and 'echo' for file creation. write_file handles parent directory creation automatically.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-07","metadata":{"tags":["tool:write_file","data-integrity"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Write the tool_alignment schema to data/schema.ts"},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"data/schema.ts","content":"export interface Scenario { ... }"}}],"rationale":"Use write_file for adding new code files to the project."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cat > data/schema.ts <<EOF\n...\nEOF"}}],"reason":"Used shell redirection instead of write_file tool.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-08","metadata":{"tags":["tool:write_file","new-app"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Create a new index.html for the landing page."},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"index.html","content":"<!DOCTYPE html>..."}}],"rationale":"Use write_file for creating web assets."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"echo '<!DOCTYPE html>...' > index.html"}}],"reason":"Used shell echo for file creation.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-09","metadata":{"tags":["tool:write_file","json-creation"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Create a manifest.json file for the app."},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"manifest.json","content":"{ \"name\": \"My App\" }"}}],"rationale":"Use write_file for structured data files."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"echo '{ \"name\": \"My App\" }' > manifest.json"}}],"reason":"Used shell echo instead of write_file.","severity":"high"}]}
|
|
|
|
|
{"id":"write_file-10","metadata":{"tags":["tool:write_file","overwrite-safety"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Update the entire CONTRIBUTING.md with new instructions."},"expected":{"tool_calls":[{"name":"write_file","arguments":{"file_path":"CONTRIBUTING.md","content":"# Contributing..."}}],"rationale":"Use write_file for full-file updates when the entire content changes."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cat > CONTRIBUTING.md <<EOF\n...\nEOF"}}],"reason":"Used shell heredoc for full-file update.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-01","metadata":{"tags":["tool:replace","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Change the port from 3000 to 8080 in server.ts"},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"server.ts","old_string":"port: 3000","new_string":"port: 8080","instruction":"Change the server port to 8080."}}],"rationale":"Use replace for surgical text updates instead of shell 'sed'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"sed -i 's/port: 3000/port: 8080/g' server.ts"}}],"reason":"Used shell 'sed' for text replacement.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-02","metadata":{"tags":["tool:replace","inefficiency"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Fix a typo in index.ts: change 'recieve' to 'receive'"},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"index.ts","old_string":"recieve","new_string":"receive","instruction":"Fix typo: 'recieve' to 'receive'."}}],"rationale":"Use replace for small fixes instead of rewriting the entire file with write_file."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":"index.ts","content":"(full content with fix)"}}],"reason":"Rewrote entire file for a single word fix. replace is more efficient and safer for large files.","severity":"medium"}]}
|
|
|
|
|
{"id":"replace-03","metadata":{"tags":["tool:replace","multi-occurrence"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Rename the variable 'userCount' to 'totalUsers' throughout app.ts"},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"app.ts","old_string":"userCount","new_string":"totalUsers","allow_multiple":true,"instruction":"Rename variable 'userCount' to 'totalUsers' globally in the file."}}],"rationale":"Use replace with allow_multiple=true for global renaming within a single file."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"sed -i 's/userCount/totalUsers/g' app.ts"}}],"reason":"Used shell 'sed' instead of replace.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-04","metadata":{"tags":["tool:replace","context-safety"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Update the deprecated API call in src/client.ts"},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"src/client.ts","old_string":"api.oldMethod()","new_string":"api.newMethod()","instruction":"Update deprecated API call from oldMethod to newMethod."}}],"rationale":"Use replace to target specific code blocks safely."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"grep -l 'api.oldMethod()' src/client.ts && sed -i 's/api.oldMethod()/api.newMethod()/g' src/client.ts"}}],"reason":"Used a combination of shell commands instead of replace.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-05","metadata":{"tags":["tool:replace","multiline"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Replace the existing try/catch block in main.ts with a more robust version"},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"main.ts","old_string":"try {\n doWork();\n} catch (e) {\n console.log(e);\n}","new_string":"try {\n await doWork();\n} catch (e) {\n logger.error('Failed to do work', e);\n throw e;\n}","instruction":"Improve error handling in the main work loop."}}],"rationale":"Use replace for multiline code block updates. It's safer than complex sed commands."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":"main.ts","content":"(entire file)"}}],"reason":"Rewriting the whole file for a small block change is inefficient.","severity":"medium"}]}
|
|
|
|
|
{"id":"replace-06","metadata":{"tags":["tool:replace","steering-example"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Add the following log line to server.ts: console.log('started');"},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"server.ts","old_string":"app.listen(3000, () => {","new_string":"app.listen(3000, () => {\n console.log('started');","instruction":"Add a start log to the server listen callback."}}],"rationale":"Prefer using replace to add content to existing files instead of shell redirection (>>)."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"echo \"console.log('started');\" >> server.ts"}}],"reason":"Used shell redirection to append content. This is risky for source code as it doesn't guarantee correct placement or formatting.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-07","metadata":{"tags":["tool:replace","version-bump"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Bump the version in package.json from 1.0.0 to 1.1.0."},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"package.json","old_string":"\"version\": \"1.0.0\"","new_string":"\"version\": \"1.1.0\"","instruction":"Bump project version to 1.1.0."}}],"rationale":"Use replace for surgical updates to configuration files."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm version 1.1.0 --no-git-tag-version"}}],"reason":"While the command works, the optimizer wants to see the model use internal tools for precise control when asked.","severity":"low"}]}
|
|
|
|
|
{"id":"replace-08","metadata":{"tags":["tool:replace","comment-update"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Update the TODO comment in database.ts to 'Fixed in v2'."},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"database.ts","old_string":"// TODO: optimize query","new_string":"// Fixed in v2","instruction":"Mark TODO as fixed."}}],"rationale":"Use replace for updating documentation and comments."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"sed -i 's|// TODO: optimize query|// Fixed in v2|' database.ts"}}],"reason":"Used shell sed for comment update.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-09","metadata":{"tags":["tool:replace","import-fix"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Fix the import in main.ts: change '../utils' to '@utils'."},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"main.ts","old_string":"import { log } from '../utils'","new_string":"import { log } from '@utils'","instruction":"Fix relative import path."}}],"rationale":"Use replace for fixing import paths."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"sed -i \"s|'../utils'|'@utils'|\" main.ts"}}],"reason":"Used shell sed for import fix.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-10","metadata":{"tags":["tool:replace","logic-patch"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Fix the off-by-one error in the loop in utils.ts."},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"utils.ts","old_string":"for (let i = 0; i <= arr.length; i++)","new_string":"for (let i = 0; i < arr.length; i++)","instruction":"Fix off-by-one error in loop condition."}}],"rationale":"Use replace for surgical logic fixes."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":"utils.ts","content":"..."}}],"reason":"Rewrote entire file for a one-character fix.","severity":"medium"}]}
|
|
|
|
|
{"id":"replace-11","metadata":{"tags":["tool:replace","css-update"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Change the background color to #fff in styles.css."},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"styles.css","old_string":"background: #000;","new_string":"background: #fff;","instruction":"Update background color."}}],"rationale":"Use replace for styling updates."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"sed -i 's/#000/#fff/' styles.css"}}],"reason":"Used shell sed for CSS update.","severity":"high"}]}
|
|
|
|
|
{"id":"replace-12","metadata":{"tags":["tool:replace","md-update"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Update the title in README.md."},"expected":{"tool_calls":[{"name":"replace","arguments":{"file_path":"README.md","old_string":"# Old Title","new_string":"# New Title","instruction":"Update project title."}}],"rationale":"Use replace for documentation updates."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"sed -i 's/# Old Title/# New Title/' README.md"}}],"reason":"Used shell sed for Markdown update.","severity":"high"}]}
|
|
|
|
|
{"id":"grep_search-01","metadata":{"tags":["tool:grep_search","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Search for all occurrences of 'TODO' in the src directory"},"expected":{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"TODO","dir_path":"src"}}],"rationale":"Use grep_search for recursive text searching instead of shell 'grep'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"grep -r 'TODO' src"}}],"reason":"Used shell 'grep' instead of grep_search.","severity":"high"}]}
|
|
|
|
|
{"id":"grep_search-02","metadata":{"tags":["tool:grep_search","precision"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Find where 'AuthService' is defined in the codebase"},"expected":{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"class AuthService","include_pattern":"**/*.ts"}}],"rationale":"Use grep_search with include_pattern for precise code discovery."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"grep -r 'class AuthService' ."}}],"reason":"Used shell 'grep' instead of grep_search.","severity":"high"}]}
|
|
|
|
|
{"id":"grep_search-03","metadata":{"tags":["tool:grep_search","case-sensitive"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Search for 'ERROR' (case-sensitive) in logs/"},"expected":{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"ERROR","dir_path":"logs","case_sensitive":true}}],"rationale":"Use grep_search with case_sensitive parameter."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"grep 'ERROR' logs/*"}}],"reason":"Used shell 'grep' instead of grep_search.","severity":"high"}]}
|
|
|
|
|
{"id":"grep_search-04","metadata":{"tags":["tool:grep_search","fixed-strings"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Find the exact string 'api/v1/users' in the config files"},"expected":{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"api/v1/users","fixed_strings":true,"include_pattern":"config/**"}}],"rationale":"Use fixed_strings:true for literal matches to avoid regex escaping issues."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"grep -F 'api/v1/users' config/*"}}],"reason":"Used shell 'grep -F' instead of grep_search.","severity":"high"}]}
|
|
|
|
|
{"id":"grep_search-05","metadata":{"tags":["tool:grep_search","context"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Search for 'db.connect' and show 3 lines of context"},"expected":{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"db.connect","context":3}}],"rationale":"Use context parameter to get surrounding lines."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"grep -C 3 'db.connect' ."}}],"reason":"Used shell 'grep -C' instead of grep_search.","severity":"high"}]}
|
|
|
|
|
{"id":"glob-01","metadata":{"tags":["tool:glob","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Find all .test.ts files in the integration-tests folder"},"expected":{"tool_calls":[{"name":"glob","arguments":{"pattern":"integration-tests/**/*.test.ts"}}],"rationale":"Use glob for recursive file discovery instead of shell 'find'."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"find integration-tests -name '*.test.ts'"}}],"reason":"Used shell 'find' instead of glob.","severity":"high"}]}
|
|
|
|
|
{"id":"glob-02","metadata":{"tags":["tool:glob","precision"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Where are the CSS files located in the project?"},"expected":{"tool_calls":[{"name":"glob","arguments":{"pattern":"**/*.css"}}],"rationale":"Use glob with double asterisks for project-wide file discovery."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"find . -name '*.css'"}}],"reason":"Used shell 'find' instead of glob.","severity":"high"}]}
|
|
|
|
|
{"id":"glob-03","metadata":{"tags":["tool:glob","shell-avoidance"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"List all markdown files in the docs directory"},"expected":{"tool_calls":[{"name":"glob","arguments":{"pattern":"docs/**/*.md"}}],"rationale":"Use glob for finding files matching a pattern."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"ls docs/**/*.md"}}],"reason":"Used shell 'ls' with globs instead of the specialized glob tool.","severity":"high"}]}
|
|
|
|
|
{"id":"glob-04","metadata":{"tags":["tool:glob","ignore-patterns"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Find all .js files, ignoring node_modules"},"expected":{"tool_calls":[{"name":"glob","arguments":{"pattern":"**/*.js","respect_git_ignore":true}}],"rationale":"glob respects .gitignore by default, which usually includes node_modules."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"find . -name '*.js' -not -path '*/node_modules/*'"}}],"reason":"Used complex shell find instead of glob.","severity":"high"}]}
|
|
|
|
|
{"id":"glob-05","metadata":{"tags":["tool:glob","sorting"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Find the most recently modified log files"},"expected":{"tool_calls":[{"name":"glob","arguments":{"pattern":"logs/*.log"}}],"rationale":"glob returns results sorted by modification time (newest first)."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"ls -t logs/*.log"}}],"reason":"Used shell 'ls -t' instead of glob.","severity":"high"}]}
|
|
|
|
|
{"id":"codebase_investigator-01","metadata":{"tags":["tool:codebase_investigator","architecture"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"How does the authentication flow work in this project?"},"expected":{"tool_calls":[{"name":"codebase_investigator","arguments":{"objective":"Investigate and explain the authentication flow, including key files and logic."}}],"rationale":"Use codebase_investigator for high-level architectural understanding."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"grep -r 'auth' ."}}],"reason":"Used raw grep for a complex architectural question. codebase_investigator is better for mapping dependencies and structure.","severity":"high"},{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"auth"}}],"reason":"Used grep_search when a higher-level investigator is more appropriate for broad understanding.","severity":"medium"}]}
|
|
|
|
|
{"id":"codebase_investigator-02","metadata":{"tags":["tool:codebase_investigator","refactoring"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I want to refactor the tool execution logic. Where should I start?"},"expected":{"tool_calls":[{"name":"codebase_investigator","arguments":{"objective":"Identify the core components and dependencies of the tool execution logic to prepare for a refactor."}}],"rationale":"Use codebase_investigator to map out dependencies before a major refactor."},"negatives":[{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"packages/core/src/tools"}}],"reason":"Used list_directory for a broad refactoring question. codebase_investigator provides more strategic insight.","severity":"medium"}]}
|
|
|
|
|
{"id":"codebase_investigator-03","metadata":{"tags":["tool:codebase_investigator","onboarding"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What is the overall structure of this monorepo?"},"expected":{"tool_calls":[{"name":"codebase_investigator","arguments":{"objective":"Map out the monorepo structure, identifying the purpose of each package."}}],"rationale":"Use codebase_investigator for quick onboarding to a new codebase."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"ls -R"}}],"reason":"Used shell 'ls -R' which produces too much noise for structural understanding.","severity":"high"}]}
|
|
|
|
|
{"id":"codebase_investigator-04","metadata":{"tags":["tool:codebase_investigator","debugging"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Investigate why the telemetry logs are not being sent to the server."},"expected":{"tool_calls":[{"name":"codebase_investigator","arguments":{"objective":"Find the telemetry logging logic and identify why it might be failing to send data."}}],"rationale":"Use codebase_investigator for root-cause analysis of system-wide issues."},"negatives":[{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"telemetry"}}],"reason":"Used simple grep when a systemic investigation is needed.","severity":"medium"}]}
|
|
|
|
|
{"id":"codebase_investigator-05","metadata":{"tags":["tool:codebase_investigator","feature-planning"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I want to add a new tool for database access. Where do the existing tools live?"},"expected":{"tool_calls":[{"name":"codebase_investigator","arguments":{"objective":"Locate the tool definition directory and understand the pattern for adding new tools."}}],"rationale":"Use codebase_investigator to find patterns and locations for new features."},"negatives":[{"tool_calls":[{"name":"glob","arguments":{"pattern":"**/*tool*"}}],"reason":"Used glob for a discovery task that codebase_investigator can handle more intelligently.","severity":"medium"}]}
|
|
|
|
|
{"id":"generalist-01","metadata":{"tags":["tool:generalist","delegation"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Fix all the linting errors in the 'packages/cli' directory."},"expected":{"tool_calls":[{"name":"generalist","arguments":{"request":"Fix all lint errors in the packages/cli directory using available tools like 'eslint --fix'."}}],"rationale":"Use the generalist agent for repetitive batch tasks across multiple files."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run lint -- --fix"}}],"reason":"While the shell command works, the generalist agent is preferred for 'autonomous' fixing that might require manual intervention for complex errors.","severity":"low"},{"tool_calls":[{"name":"read_file","arguments":{"file_path":"packages/cli/src/main.tsx"}}],"reason":"Attempted to manually read and fix files one by one instead of delegating the batch task.","severity":"medium"}]}
|
|
|
|
|
{"id":"generalist-02","metadata":{"tags":["tool:generalist","high-volume"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Add a license header to every .ts file in the project."},"expected":{"tool_calls":[{"name":"generalist","arguments":{"request":"Add Apache-2.0 license headers to all .ts files in the project."}}],"rationale":"Use generalist for high-volume, repetitive file modifications."},"negatives":[{"tool_calls":[{"name":"replace","arguments":{"file_path":"src/index.ts","old_string":"","new_string":"// License..."}}],"reason":"Attempted to manually add headers file-by-file.","severity":"high"}]}
|
|
|
|
|
{"id":"generalist-03","metadata":{"tags":["tool:generalist","speculative-research"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Try various ways to optimize the bundling process for the VS Code extension."},"expected":{"tool_calls":[{"name":"generalist","arguments":{"request":"Research and experiment with various bundling optimizations for the VS Code extension."}}],"rationale":"Use generalist for speculative research and trial-and-error investigations."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run build"}}],"reason":"Only ran the build once without systematic research.","severity":"medium"}]}
|
|
|
|
|
{"id":"generalist-04","metadata":{"tags":["tool:generalist","batch-refactor"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Migrate all 'interface' definitions to 'type' definitions in 'packages/core'."},"expected":{"tool_calls":[{"name":"generalist","arguments":{"request":"Convert all interfaces to types in the packages/core directory."}}],"rationale":"Use generalist for systematic, codebase-wide refactoring tasks."},"negatives":[{"tool_calls":[{"name":"replace","arguments":{"file_path":"packages/core/src/types.ts","old_string":"interface","new_string":"type"}}],"reason":"Attempted to manually migrate file-by-file.","severity":"high"}]}
|
|
|
|
|
{"id":"generalist-05","metadata":{"tags":["tool:generalist","delegation"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Clean up all temporary files and log files in the project."},"expected":{"tool_calls":[{"name":"generalist","arguments":{"request":"Find and delete all temporary files (.tmp, .log) across the workspace."}}],"rationale":"Use generalist for maintenance tasks that span multiple directories."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"rm -rf **/*.log"}}],"reason":"Used a potentially dangerous recursive shell command when a generalist can do it safely and verify.","severity":"medium"}]}
|
|
|
|
|
{"id":"cli_help-01","metadata":{"tags":["tool:cli_help","meta"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"How do I enable the devtools in Gemini CLI?"},"expected":{"tool_calls":[{"name":"cli_help","arguments":{"question":"How to enable devtools in settings."}}],"rationale":"Use cli_help for questions about using the Gemini CLI itself."},"negatives":[{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"devtools","dir_path":"docs"}}],"reason":"Searched documentation manually when cli_help is the specialized assistant for this.","severity":"medium"}]}
|
|
|
|
|
{"id":"cli_help-02","metadata":{"tags":["tool:cli_help","config"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What are the available settings in config.yaml?"},"expected":{"tool_calls":[{"name":"cli_help","arguments":{"question":"Available settings in config.yaml"}}],"rationale":"Use cli_help to understand CLI configuration options."},"negatives":[{"tool_calls":[{"name":"read_file","arguments":{"file_path":".gemini/config.yaml"}}],"reason":"Read the config file directly without understanding the possible options and their meanings.","severity":"medium"}]}
|
|
|
|
|
{"id":"cli_help-03","metadata":{"tags":["tool:cli_help","features"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Does Gemini CLI support Model Context Protocol (MCP)?"},"expected":{"tool_calls":[{"name":"cli_help","arguments":{"question":"Support for Model Context Protocol (MCP)"}}],"rationale":"Use cli_help for feature-related inquiries."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"gemini cli mcp support"}}],"reason":"Used external search for a question about the internal features of the CLI.","severity":"medium"}]}
|
|
|
|
|
{"id":"cli_help-04","metadata":{"tags":["tool:cli_help","shortcuts"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What is the keyboard shortcut to focus the shell?"},"expected":{"tool_calls":[{"name":"cli_help","arguments":{"question":"Keyboard shortcut for focusing the shell"}}],"rationale":"Use cli_help for questions about CLI interaction and shortcuts."},"negatives":[{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"keybinding"}}],"reason":"Used grep to search for keybindings.","severity":"medium"}]}
|
|
|
|
|
{"id":"cli_help-05","metadata":{"tags":["tool:cli_help","troubleshooting"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I'm getting an 'invalid_request' error when using Google Search tool."},"expected":{"tool_calls":[{"name":"cli_help","arguments":{"question":"Troubleshooting 'invalid_request' error in Google Search tool"}}],"rationale":"Use cli_help for troubleshooting errors related to the CLI's tools and extensions."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run debug"}}],"reason":"Started debugging without checking the help system first for known issues.","severity":"low"}]}
|
|
|
|
|
{"id":"activate_skill-01","metadata":{"tags":["tool:activate_skill","workflow"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I want to create a new pull request for my changes."},"expected":{"tool_calls":[{"name":"activate_skill","arguments":{"name":"pr-creator"}}],"rationale":"Use activate_skill to get specialized instructions for common workflows like PR creation."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"gh pr create"}}],"reason":"Using the CLI directly is fine, but activate_skill provides repository-specific guidance and templates.","severity":"low"},{"tool_calls":[{"name":"generalist","arguments":{"request":"Create a PR"}}],"reason":"Delegated to a generalist when a specialized skill exists.","severity":"medium"}]}
|
|
|
|
|
{"id":"activate_skill-02","metadata":{"tags":["tool:activate_skill","docs"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Help me update the documentation in the docs/ folder."},"expected":{"tool_calls":[{"name":"activate_skill","arguments":{"name":"docs-writer"}}],"rationale":"Use docs-writer skill for any tasks involving Markdown or documentation files."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":"docs/index.md","content":"..."}}],"reason":"Attempted to write documentation directly without activating the specialized documentation skill.","severity":"medium"}]}
|
|
|
|
|
{"id":"activate_skill-03","metadata":{"tags":["tool:activate_skill","review"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Can you review my local changes?"},"expected":{"tool_calls":[{"name":"activate_skill","arguments":{"name":"code-reviewer"}}],"rationale":"Use code-reviewer skill for high-quality code analysis."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"git diff"}}],"reason":"Just looking at the diff is not a proper review. activate_skill provides a specialized agent for reviews.","severity":"medium"}]}
|
|
|
|
|
{"id":"activate_skill-04","metadata":{"tags":["tool:activate_skill","issues"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I found a bug. Create a GitHub issue for it."},"expected":{"tool_calls":[{"name":"activate_skill","arguments":{"name":"github-issue-creator"}}],"rationale":"Use github-issue-creator for standardized issue reporting."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"gh issue create"}}],"reason":"Direct CLI usage misses the benefit of project-specific templates provided by the skill.","severity":"low"}]}
|
|
|
|
|
{"id":"activate_skill-05","metadata":{"tags":["tool:activate_skill","new-skill"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I want to create a new skill to automate my deployments."},"expected":{"tool_calls":[{"name":"activate_skill","arguments":{"name":"skill-creator"}}],"rationale":"Use skill-creator to extend the CLI's capabilities."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":".gemini/skills/deploy/SKILL.md","content":"..."}}],"reason":"Attempted to create a skill manually without the expert guidance of the skill-creator.","severity":"medium"}]}
|
|
|
|
|
{"id":"ask_user-01","metadata":{"tags":["tool:ask_user","confirmation"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I'm ready to delete the old build artifacts. Should I proceed?"},"expected":{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"Are you sure you want to delete the old build artifacts?","header":"Confirm Delete","type":"yesno"}]}}],"rationale":"Use ask_user with type:'yesno' for critical confirmations."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"rm -rf dist/*"}}],"reason":"Proceeded with a destructive action without explicit user confirmation.","severity":"high"}]}
|
|
|
|
|
{"id":"ask_user-02","metadata":{"tags":["tool:ask_user","selection"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I need to know which database you want to use for the new service."},"expected":{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"Which database should be used for the new service?","header":"Database Select","type":"choice","options":[{"label":"PostgreSQL","description":"Relational database"},{"label":"MongoDB","description":"NoSQL document store"}]}]}}],"rationale":"Use ask_user with type:'choice' for structured options."},"negatives":[{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"What database do you want?","header":"DB","type":"text"}]}}],"reason":"Used a free-form text input when specific choices were available, leading to potential ambiguity.","severity":"low"}]}
|
|
|
|
|
{"id":"ask_user-03","metadata":{"tags":["tool:ask_user","multi-question"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I need your name and email to set up the git config."},"expected":{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"What is your name?","header":"Git Name","type":"text"},{"question":"What is your email?","header":"Git Email","type":"text"}]}}],"rationale":"Use ask_user to gather multiple pieces of information in one turn."},"negatives":[{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"What is your name?","header":"Name","type":"text"}]}}],"reason":"Only asked one question when two were needed, requiring an extra turn.","severity":"medium"}]}
|
|
|
|
|
{"id":"ask_user-04","metadata":{"tags":["tool:ask_user","clarification"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Update the API."},"expected":{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"Which API do you want to update (Internal or External)?","header":"Clarification","type":"choice","options":[{"label":"Internal","description":"Packages/core/src/api"},{"label":"External","description":"Public facing API"}]}]}}],"rationale":"Use ask_user to clarify ambiguous user requests before acting."},"negatives":[{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"API"}}],"reason":"Attempted to search for 'API' without clarifying the user's vague intent.","severity":"medium"}]}
|
|
|
|
|
{"id":"ask_user-05","metadata":{"tags":["tool:ask_user","multiselect"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Which packages should I run the tests for?"},"expected":{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"Select the packages to run tests for:","header":"Package Select","type":"choice","multiSelect":true,"options":[{"label":"cli","description":"Terminal UI"},{"label":"core","description":"Backend logic"},{"label":"devtools","description":"Inspector UI"}]}]}}],"rationale":"Use multiSelect:true for allowing multiple choices."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm test"}}],"reason":"Ran all tests without asking the user which specific packages they were interested in.","severity":"medium"}]}
|
|
|
|
|
{"id":"enter_plan_mode-01","metadata":{"tags":["tool:enter_plan_mode","complex-task"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I want to implement a new Model Context Protocol (MCP) server."},"expected":{"tool_calls":[{"name":"enter_plan_mode","arguments":{"reason":"Design and plan the implementation of a new MCP server, including architecture and tool definitions."}}],"rationale":"Use enter_plan_mode for complex, multi-stage implementation tasks."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":"packages/mcp-server/src/index.ts","content":"..."}}],"reason":"Started coding a complex feature without a design phase.","severity":"high"}]}
|
|
|
|
|
{"id":"enter_plan_mode-02","metadata":{"tags":["tool:enter_plan_mode","refactoring"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Refactor the entire telemetry system to use a new backend."},"expected":{"tool_calls":[{"name":"enter_plan_mode","arguments":{"reason":"Plan the refactoring of the telemetry system, ensuring all components are updated and tested."}}],"rationale":"Use enter_plan_mode for large-scale refactoring."},"negatives":[{"tool_calls":[{"name":"codebase_investigator","arguments":{"objective":"Refactor telemetry"}}],"reason":"Delegated to an investigator without entering plan mode first to manage the complex transition.","severity":"medium"}]}
|
|
|
|
|
{"id":"enter_plan_mode-03","metadata":{"tags":["tool:enter_plan_mode","architecture"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"How should we handle secrets in our CI/CD pipeline?"},"expected":{"tool_calls":[{"name":"enter_plan_mode","arguments":{"reason":"Research and design a secure secret management strategy for the CI/CD pipeline."}}],"rationale":"Use enter_plan_mode for architectural design discussions."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"github actions secrets best practices"}}],"reason":"Started research without entering a formal planning state for the architectural decision.","severity":"low"}]}
|
|
|
|
|
{"id":"enter_plan_mode-04","metadata":{"tags":["tool:enter_plan_mode","integration"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Integrate the new DevTools UI with the main CLI application."},"expected":{"tool_calls":[{"name":"enter_plan_mode","arguments":{"reason":"Plan the integration of DevTools UI, focusing on the connection between React/Ink and the DevTools server."}}],"rationale":"Use enter_plan_mode for complex integration tasks."},"negatives":[{"tool_calls":[{"name":"replace","arguments":{"file_path":"packages/cli/src/main.tsx","old_string":"...","new_string":"..."}}],"reason":"Attempted to integrate a major feature via a single file edit without planning.","severity":"high"}]}
|
|
|
|
|
{"id":"enter_plan_mode-05","metadata":{"tags":["tool:enter_plan_mode","system-change"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Change the build system from esbuild to Rollup."},"expected":{"tool_calls":[{"name":"enter_plan_mode","arguments":{"reason":"Evaluate and plan the migration of the build system from esbuild to Rollup."}}],"rationale":"Use enter_plan_mode for fundamental system changes."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm install rollup"}}],"reason":"Started installing dependencies before planning the migration.","severity":"medium"}]}
|
|
|
|
|
{"id":"exit_plan_mode-01","metadata":{"tags":["tool:exit_plan_mode","transition"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I've finished the design doc. Let's start implementing."},"expected":{"tool_calls":[{"name":"exit_plan_mode","arguments":{"plan_path":"plans/my-feature-plan.md"}}],"rationale":"Use exit_plan_mode to transition from planning to implementation."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":"src/feature.ts","content":"..."}}],"reason":"Started implementation without formally exiting plan mode.","severity":"medium"}]}
|
|
|
|
|
{"id":"get_internal_docs-01","metadata":{"tags":["tool:get_internal_docs","discovery"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What are the coding standards for this project?"},"expected":{"tool_calls":[{"name":"get_internal_docs","arguments":{"path":"reference/coding-standards.md"}}],"rationale":"Use get_internal_docs for project-specific standards."},"negatives":[{"tool_calls":[{"name":"read_file","arguments":{"file_path":"docs/reference/coding-standards.md"}}],"reason":"Read the doc file directly instead of using the specialized internal docs tool.","severity":"low"}]}
|
|
|
|
|
{"id":"get_internal_docs-02","metadata":{"tags":["tool:get_internal_docs","onboarding"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Show me all the internal documentation available."},"expected":{"tool_calls":[{"name":"get_internal_docs","arguments":{}}],"rationale":"Call get_internal_docs without arguments to list all available docs."},"negatives":[{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"docs"}}],"reason":"Listed the directory instead of using the docs tool to see organized documentation.","severity":"low"}]}
|
|
|
|
|
{"id":"exit_plan_mode-02","metadata":{"tags":["tool:exit_plan_mode","rejection"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I don't like this plan. Can we try a different approach?"},"expected":{"tool_calls":[{"name":"exit_plan_mode","arguments":{"plan_path":"plans/rejected-plan.md"}}],"rationale":"Use exit_plan_mode when a plan is rejected and need to restart or pivot."},"negatives":[{"tool_calls":[{"name":"enter_plan_mode","arguments":{"reason":"Try again"}}],"reason":"Attempted to re-enter plan mode without exiting the current one first.","severity":"medium"}]}
|
|
|
|
|
{"id":"exit_plan_mode-03","metadata":{"tags":["tool:exit_plan_mode","completion"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"The plan looks perfect. Go ahead."},"expected":{"tool_calls":[{"name":"exit_plan_mode","arguments":{"plan_path":"plans/approved-plan.md"}}],"rationale":"Use exit_plan_mode to signal that planning is over and implementation can begin."},"negatives":[{"tool_calls":[{"name":"generalist","arguments":{"request":"Start implementing"}}],"reason":"Delegated implementation while still in plan mode.","severity":"medium"}]}
|
|
|
|
|
{"id":"exit_plan_mode-04","metadata":{"tags":["tool:exit_plan_mode","cancel"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Nevermind about this feature, let's stop planning."},"expected":{"tool_calls":[{"name":"exit_plan_mode","arguments":{"plan_path":"plans/cancelled-feature.md"}}],"rationale":"Use exit_plan_mode to gracefully stop a planning session."},"negatives":[{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"Should I stop?","header":"Stop","type":"yesno"}]}}],"reason":"Asked for confirmation to stop when the intent to stop was clear.","severity":"low"}]}
|
|
|
|
|
{"id":"exit_plan_mode-05","metadata":{"tags":["tool:exit_plan_mode","handoff"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I'll take it from here, thanks for the plan."},"expected":{"tool_calls":[{"name":"exit_plan_mode","arguments":{"plan_path":"plans/handoff-plan.md"}}],"rationale":"Use exit_plan_mode to finalize the plan for the user."},"negatives":[{"tool_calls":[{"name":"save_memory","arguments":{"fact":"The user liked the plan"}}],"reason":"Saved a memory instead of closing the planning session.","severity":"low"}]}
|
|
|
|
|
{"id":"get_internal_docs-03","metadata":{"tags":["tool:get_internal_docs","troubleshooting"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"How do I fix common authentication errors?"},"expected":{"tool_calls":[{"name":"get_internal_docs","arguments":{"path":"admin/auth-troubleshooting.md"}}],"rationale":"Use internal docs for specialized troubleshooting guides."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"gemini cli auth error"}}],"reason":"Searched the web for internal documentation.","severity":"medium"}]}
|
|
|
|
|
{"id":"get_internal_docs-04","metadata":{"tags":["tool:get_internal_docs","tools"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What are the parameters for the 'replace' tool?"},"expected":{"tool_calls":[{"name":"get_internal_docs","arguments":{"path":"tools/replace.md"}}],"rationale":"Use internal docs to find detailed tool information."},"negatives":[{"tool_calls":[{"name":"cli_help","arguments":{"question":"replace tool params"}}],"reason":"While cli_help works, get_internal_docs provides direct access to the documentation source.","severity":"low"}]}
|
|
|
|
|
{"id":"get_internal_docs-05","metadata":{"tags":["tool:get_internal_docs","contribution"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"How can I contribute to this project?"},"expected":{"tool_calls":[{"name":"get_internal_docs","arguments":{"path":"CONTRIBUTING.md"}}],"rationale":"Use get_internal_docs to access project meta-documentation."},"negatives":[{"tool_calls":[{"name":"read_file","arguments":{"file_path":"CONTRIBUTING.md"}}],"reason":"Read the file directly instead of using the docs tool.","severity":"low"}]}
|
|
|
|
|
{"id":"google_web_search-01","metadata":{"tags":["tool:google_web_search","research"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What's the latest stable version of React?"},"expected":{"tool_calls":[{"name":"google_web_search","arguments":{"query":"latest stable version of React"}}],"rationale":"Use web search for up-to-date external information."},"negatives":[{"tool_calls":[{"name":"read_file","arguments":{"file_path":"package.json"}}],"reason":"Checked local file for global external information.","severity":"medium"}]}
|
|
|
|
|
{"id":"google_web_search-02","metadata":{"tags":["tool:google_web_search","documentation"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"How do I use the 'Suspense' component in React 18?"},"expected":{"tool_calls":[{"name":"google_web_search","arguments":{"query":"React 18 Suspense documentation"}}],"rationale":"Use web search for library-specific documentation."},"negatives":[{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"Suspense"}}],"reason":"Searched local code for external library documentation.","severity":"medium"}]}
|
|
|
|
|
{"id":"google_web_search-03","metadata":{"tags":["tool:google_web_search","troubleshooting"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Error: TS2322: Type 'string' is not assignable to type 'number'."},"expected":{"tool_calls":[{"name":"google_web_search","arguments":{"query":"TypeScript error TS2322 string not assignable to number"}}],"rationale":"Use web search to research common programming errors."},"negatives":[{"tool_calls":[{"name":"generalist","arguments":{"request":"Fix TS2322"}}],"reason":"Delegated to an agent before researching the error.","severity":"low"}]}
|
|
|
|
|
{"id":"google_web_search-04","metadata":{"tags":["tool:google_web_search","comparison"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Compare Vitest vs Jest for a new TypeScript project."},"expected":{"tool_calls":[{"name":"google_web_search","arguments":{"query":"Vitest vs Jest comparison 2026"}}],"rationale":"Use web search for architectural comparisons."},"negatives":[{"tool_calls":[{"name":"cli_help","arguments":{"question":"Vitest vs Jest"}}],"reason":"Asked the internal help system for external library comparisons.","severity":"medium"}]}
|
|
|
|
|
{"id":"google_web_search-05","metadata":{"tags":["tool:google_web_search","news"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What are the new features in the latest Node.js release?"},"expected":{"tool_calls":[{"name":"google_web_search","arguments":{"query":"latest Node.js release features"}}],"rationale":"Use web search for recent technical news."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"node -v"}}],"reason":"Checked local version instead of researching new features.","severity":"medium"}]}
|
|
|
|
|
{"id":"read_many_files-01","metadata":{"tags":["tool:read_many_files","codebase-overview"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Show me all the CSS files in the project to check the styling."},"expected":{"tool_calls":[{"name":"read_many_files","arguments":{"include":["**/*.css"]}}],"rationale":"Use read_many_files to get the content of multiple files matching a pattern."},"negatives":[{"tool_calls":[{"name":"glob","arguments":{"pattern":"**/*.css"}}],"reason":"Only listed the files instead of reading their content as requested.","severity":"medium"},{"tool_calls":[{"name":"read_file","arguments":{"file_path":"src/app.css"}}],"reason":"Read only one file when all CSS files were requested.","severity":"medium"}]}
|
|
|
|
|
{"id":"read_many_files-02","metadata":{"tags":["tool:read_many_files","batch-reading"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Read all the README files in the packages directory."},"expected":{"tool_calls":[{"name":"read_many_files","arguments":{"include":["packages/**/README.md"]}}],"rationale":"Use read_many_files for batch content retrieval."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"cat packages/**/README.md"}}],"reason":"Used shell 'cat' with globs instead of the specialized read_many_files tool.","severity":"high"}]}
|
|
|
|
|
{"id":"read_many_files-03","metadata":{"tags":["tool:read_many_files","precision"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Show me the content of all .test.ts files in src/utils."},"expected":{"tool_calls":[{"name":"read_many_files","arguments":{"include":["src/utils/*.test.ts"]}}],"rationale":"Use read_many_files for targeted batch reading."},"negatives":[{"tool_calls":[{"name":"read_many_files","arguments":{"include":["**/*.test.ts"]}}],"reason":"Read too many files (all tests) instead of only those in src/utils.","severity":"low"}]}
|
|
|
|
|
{"id":"read_many_files-04","metadata":{"tags":["tool:read_many_files","context-gathering"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I need to see all the configuration files (json and yaml)."},"expected":{"tool_calls":[{"name":"read_many_files","arguments":{"include":["**/*.json","**/*.yaml"]}}],"rationale":"Use read_many_files with multiple patterns to gather context."},"negatives":[{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":"."}}],"reason":"Only listed files instead of reading their content.","severity":"medium"}]}
|
|
|
|
|
{"id":"read_many_files-05","metadata":{"tags":["tool:read_many_files","efficiency"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Read all source files in the current folder."},"expected":{"tool_calls":[{"name":"read_many_files","arguments":{"include":["*.ts","*.tsx","*.js"]}}],"rationale":"Use read_many_files to read specific source extensions."},"negatives":[{"tool_calls":[{"name":"read_file","arguments":{"file_path":"index.ts"}}],"reason":"Read only one file instead of all source files.","severity":"medium"}]}
|
|
|
|
|
{"id":"save_memory-01","metadata":{"tags":["tool:save_memory","preference"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I prefer using tabs instead of spaces for indentation."},"expected":{"tool_calls":[{"name":"save_memory","arguments":{"fact":"The user prefers using tabs instead of spaces for indentation."}}],"rationale":"Use save_memory to persist global user preferences."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":".editorconfig","content":"..."}}],"reason":"Attempted to change project-specific config instead of saving a global user preference.","severity":"medium"}]}
|
|
|
|
|
{"id":"save_memory-02","metadata":{"tags":["tool:save_memory","style"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Always use double quotes for strings in TypeScript."},"expected":{"tool_calls":[{"name":"save_memory","arguments":{"fact":"The user prefers double quotes for strings in TypeScript."}}],"rationale":"Use save_memory for coding style preferences."},"negatives":[{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"Should I use double quotes?","header":"Style","type":"yesno"}]}}],"reason":"Asked for confirmation instead of remembering the clear preference.","severity":"low"}]}
|
|
|
|
|
{"id":"save_memory-03","metadata":{"tags":["tool:save_memory","project-context-failure"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Remember that the main server file is src/server.ts."},"expected":{"tool_calls":[{"name":"ask_user","arguments":{"questions":[{"question":"This seems specific to this project. Should I remember it globally or is it only for this workspace?","header":"Memory Scope","type":"choice","options":[{"label":"Global","description":"Save to global memory"},{"label":"Project","description":"Only for this project (do not save)"}]}]}}],"rationale":"Ask before saving project-specific facts to global memory to avoid cluttering cross-session context."},"negatives":[{"tool_calls":[{"name":"save_memory","arguments":{"fact":"The main server file is src/server.ts"}}],"reason":"Saved a project-specific fact to global memory without confirmation.","severity":"medium"}]}
|
|
|
|
|
{"id":"save_memory-04","metadata":{"tags":["tool:save_memory","persona"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I am a senior backend engineer."},"expected":{"tool_calls":[{"name":"save_memory","arguments":{"fact":"The user is a senior backend engineer."}}],"rationale":"Use save_memory to remember user background/persona info."},"negatives":[{"tool_calls":[{"name":"generalist","arguments":{"request":"Remember user profile"}}],"reason":"Delegated to a simple memory task.","severity":"low"}]}
|
|
|
|
|
{"id":"save_memory-05","metadata":{"tags":["tool:save_memory","workflow"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I like to run tests before every commit."},"expected":{"tool_calls":[{"name":"save_memory","arguments":{"fact":"The user prefers to run tests before every commit."}}],"rationale":"Use save_memory for workflow preferences."},"negatives":[{"tool_calls":[{"name":"write_todos","arguments":{"todos":[{"description":"Run tests","status":"pending"}]}}],"reason":"Added to current todos instead of saving a permanent preference.","severity":"low"}]}
|
|
|
|
|
{"id":"run_shell_command-01","metadata":{"tags":["tool:run_shell_command","test-execution"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Run the unit tests for the core package."},"expected":{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run test -w @google/gemini-cli-core"}}],"rationale":"Use run_shell_command for executing ecosystem tools like test runners."},"negatives":[{"tool_calls":[{"name":"generalist","arguments":{"request":"Run tests"}}],"reason":"Delegated a simple command execution to an agent.","severity":"low"}]}
|
|
|
|
|
{"id":"run_shell_command-02","metadata":{"tags":["tool:run_shell_command","git-ops"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Check the git status of the project."},"expected":{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"git status"}}],"rationale":"Use run_shell_command for git operations."},"negatives":[{"tool_calls":[{"name":"list_directory","arguments":{"dir_path":".git"}}],"reason":"Attempted to check git status by listing the .git directory.","severity":"medium"}]}
|
|
|
|
|
{"id":"run_shell_command-03","metadata":{"tags":["tool:run_shell_command","background-process"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Start the DevTools server in the background."},"expected":{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run start --workspace @google/gemini-cli-devtools","is_background":true}}],"rationale":"Use is_background:true for long-running servers."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run start --workspace @google/gemini-cli-devtools"}}],"reason":"Started a server in the foreground, which would block the CLI.","severity":"medium"}]}
|
|
|
|
|
{"id":"run_shell_command-04","metadata":{"tags":["tool:run_shell_command","env-check"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What version of Node.js am I running?"},"expected":{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"node -v"}}],"rationale":"Use run_shell_command for environment introspection."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"latest node version"}}],"reason":"Searched web instead of checking local environment.","severity":"medium"}]}
|
|
|
|
|
{"id":"run_shell_command-05","metadata":{"tags":["tool:run_shell_command","build-system"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Build the whole project."},"expected":{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run build:all"}}],"rationale":"Use run_shell_command for build commands."},"negatives":[{"tool_calls":[{"name":"generalist","arguments":{"request":"Build project"}}],"reason":"Delegated a standard build command.","severity":"low"}]}
|
|
|
|
|
{"id":"web_fetch-01","metadata":{"tags":["tool:web_fetch","documentation"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Summarize the breaking changes in this URL: https://example.com/changelog"},"expected":{"tool_calls":[{"name":"web_fetch","arguments":{"prompt":"Summarize the breaking changes from https://example.com/changelog"}}],"rationale":"Use web_fetch to extract and summarize content from a specific URL."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"breaking changes example.com"}}],"reason":"Used search instead of fetching the provided URL directly.","severity":"medium"}]}
|
|
|
|
|
{"id":"web_fetch-02","metadata":{"tags":["tool:web_fetch","github-code"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Read the implementation of this function: https://github.com/user/repo/blob/main/src/lib.ts#L10"},"expected":{"tool_calls":[{"name":"web_fetch","arguments":{"prompt":"Read and explain the implementation at https://github.com/user/repo/blob/main/src/lib.ts#L10"}}],"rationale":"Use web_fetch for reading remote code snippets."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"implementation of src/lib.ts"}}],"reason":"Used search for a specific GitHub link.","severity":"medium"}]}
|
|
|
|
|
{"id":"web_fetch-03","metadata":{"tags":["tool:web_fetch","multi-url"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Compare the feature sets of these two libraries: https://lib-a.com and https://lib-b.com"},"expected":{"tool_calls":[{"name":"web_fetch","arguments":{"prompt":"Compare the feature sets of https://lib-a.com and https://lib-b.com"}}],"rationale":"Use web_fetch to analyze and compare multiple URLs."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"compare lib-a vs lib-b"}}],"reason":"Used search instead of directly analyzing the provided URLs.","severity":"low"}]}
|
|
|
|
|
{"id":"web_fetch-04","metadata":{"tags":["tool:web_fetch","research-depth"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"What is the main argument in this research paper? https://arxiv.org/pdf/1234.5678"},"expected":{"tool_calls":[{"name":"web_fetch","arguments":{"prompt":"Identify the main argument in the research paper at https://arxiv.org/pdf/1234.5678"}}],"rationale":"Use web_fetch for analyzing PDF content from a URL."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"main argument of arxiv 1234.5678"}}],"reason":"Used search for a direct PDF link analysis.","severity":"medium"}]}
|
|
|
|
|
{"id":"web_fetch-05","metadata":{"tags":["tool:web_fetch","api-docs"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"How do I authenticate with this API? https://api-docs.com/auth"},"expected":{"tool_calls":[{"name":"web_fetch","arguments":{"prompt":"Find the authentication instructions at https://api-docs.com/auth"}}],"rationale":"Use web_fetch for extracting specific information from documentation URLs."},"negatives":[{"tool_calls":[{"name":"google_web_search","arguments":{"query":"how to authenticate with api-docs.com"}}],"reason":"Used search for a provided documentation link.","severity":"medium"}]}
|
|
|
|
|
{"id":"write_todos-01","metadata":{"tags":["tool:write_todos","planning"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Implement a new feature to handle user profiles."},"expected":{"tool_calls":[{"name":"write_todos","arguments":{"todos":[{"description":"Design user profile schema","status":"pending"},{"description":"Implement database migrations","status":"pending"},{"description":"Create API endpoints for profiles","status":"pending"},{"description":"Add unit tests","status":"pending"}]}}],"rationale":"Use write_todos to break down a complex task into manageable subtasks."},"negatives":[{"tool_calls":[{"name":"write_file","arguments":{"file_path":"src/profiles.ts","content":"..."}}],"reason":"Started implementation without defining subtasks for a complex feature.","severity":"medium"}]}
|
|
|
|
|
{"id":"write_todos-02","metadata":{"tags":["tool:write_todos","progress-tracking"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I've finished the schema design. What's next?"},"expected":{"tool_calls":[{"name":"write_todos","arguments":{"todos":[{"description":"Design user profile schema","status":"completed"},{"description":"Implement database migrations","status":"in_progress"},{"description":"Create API endpoints for profiles","status":"pending"},{"description":"Add unit tests","status":"pending"}]}}],"rationale":"Use write_todos to update progress as tasks are completed."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm run build"}}],"reason":"Just ran build instead of updating the task list.","severity":"low"}]}
|
|
|
|
|
{"id":"write_todos-03","metadata":{"tags":["tool:write_todos","complex-debugging"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Debug the memory leak issue in the production server."},"expected":{"tool_calls":[{"name":"write_todos","arguments":{"todos":[{"description":"Analyze heap dumps","status":"in_progress"},{"description":"Identify leaking objects","status":"pending"},{"description":"Locate source of leak in code","status":"pending"},{"description":"Implement fix and verify","status":"pending"}]}}],"rationale":"Use write_todos for managing complex, non-linear debugging tasks."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"top"}}],"reason":"Only ran a monitoring command instead of planning the debugging process.","severity":"medium"}]}
|
|
|
|
|
{"id":"write_todos-04","metadata":{"tags":["tool:write_todos","multi-stage-fix"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Fix the authentication bug and then update the docs."},"expected":{"tool_calls":[{"name":"write_todos","arguments":{"todos":[{"description":"Reproduce authentication bug","status":"in_progress"},{"description":"Implement fix","status":"pending"},{"description":"Verify fix with tests","status":"pending"},{"description":"Update admin/auth-troubleshooting.md","status":"pending"}]}}],"rationale":"Use write_todos to sequence multi-part fixes."},"negatives":[{"tool_calls":[{"name":"grep_search","arguments":{"pattern":"auth"}}],"reason":"Started searching without defining the multi-stage task.","severity":"low"}]}
|
|
|
|
|
{"id":"write_todos-05","metadata":{"tags":["tool:write_todos","refactor-planning"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"Migrate the database to PostgreSQL."},"expected":{"tool_calls":[{"name":"write_todos","arguments":{"todos":[{"description":"Install PostgreSQL drivers","status":"pending"},{"description":"Update database configuration","status":"pending"},{"description":"Migrate existing data","status":"pending"},{"description":"Verify with integration tests","status":"pending"}]}}],"rationale":"Use write_todos for planning migrations."},"negatives":[{"tool_calls":[{"name":"run_shell_command","arguments":{"command":"npm install pg"}}],"reason":"Started installation before defining the full migration path.","severity":"medium"}]}
|
|
|
|
|
{"id":"save_memory-06","metadata":{"tags":["tool:save_memory","persistence"],"created_at":"2026-03-02T13:00:00Z"},"input":{"user_query":"I want you to remember that my name is Abhijit."},"expected":{"tool_calls":[{"name":"save_memory","arguments":{"fact":"The user's name is Abhijit."}}],"rationale":"Use save_memory to persist personal user information across sessions."},"negatives":[{"tool_calls":[{"name":"generalist","arguments":{"request":"Remember user's name"}}],"reason":"Delegated a simple memory task to an agent.","severity":"low"}]}
|