mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-17 00:31:44 -07:00
fix: improve audio MIME normalization and validation in file reads (#21636)
Co-authored-by: Coco Sheng <cocosheng@google.com>
This commit is contained in:
@@ -38,6 +38,7 @@ import {
|
||||
isEmpty,
|
||||
} from './fileUtils.js';
|
||||
import { StandardFileSystemService } from '../services/fileSystemService.js';
|
||||
import { ToolErrorType } from '../tools/tool-error.js';
|
||||
|
||||
vi.mock('mime/lite', () => ({
|
||||
default: { getType: vi.fn() },
|
||||
@@ -54,6 +55,7 @@ describe('fileUtils', () => {
|
||||
let testImageFilePath: string;
|
||||
let testPdfFilePath: string;
|
||||
let testAudioFilePath: string;
|
||||
let testVideoFilePath: string;
|
||||
let testBinaryFilePath: string;
|
||||
let nonexistentFilePath: string;
|
||||
let directoryPath: string;
|
||||
@@ -70,6 +72,7 @@ describe('fileUtils', () => {
|
||||
testImageFilePath = path.join(tempRootDir, 'image.png');
|
||||
testPdfFilePath = path.join(tempRootDir, 'document.pdf');
|
||||
testAudioFilePath = path.join(tempRootDir, 'audio.mp3');
|
||||
testVideoFilePath = path.join(tempRootDir, 'video.mp4');
|
||||
testBinaryFilePath = path.join(tempRootDir, 'app.exe');
|
||||
nonexistentFilePath = path.join(tempRootDir, 'nonexistent.txt');
|
||||
directoryPath = path.join(tempRootDir, 'subdir');
|
||||
@@ -704,6 +707,19 @@ describe('fileUtils', () => {
|
||||
},
|
||||
);
|
||||
|
||||
it('should detect supported audio files by extension when mime lookup is missing', async () => {
|
||||
const filePath = path.join(tempRootDir, 'fallback.flac');
|
||||
actualNodeFs.writeFileSync(
|
||||
filePath,
|
||||
Buffer.from([0x66, 0x4c, 0x61, 0x43, 0x00, 0x00, 0x00, 0x22]),
|
||||
);
|
||||
mockMimeGetType.mockReturnValueOnce(false);
|
||||
|
||||
expect(await detectFileType(filePath)).toBe('audio');
|
||||
|
||||
actualNodeFs.unlinkSync(filePath);
|
||||
});
|
||||
|
||||
it('should detect svg type by extension', async () => {
|
||||
expect(await detectFileType('image.svg')).toBe('svg');
|
||||
expect(await detectFileType('image.icon.svg')).toBe('svg');
|
||||
@@ -755,6 +771,8 @@ describe('fileUtils', () => {
|
||||
actualNodeFs.unlinkSync(testPdfFilePath);
|
||||
if (actualNodeFs.existsSync(testAudioFilePath))
|
||||
actualNodeFs.unlinkSync(testAudioFilePath);
|
||||
if (actualNodeFs.existsSync(testVideoFilePath))
|
||||
actualNodeFs.unlinkSync(testVideoFilePath);
|
||||
if (actualNodeFs.existsSync(testBinaryFilePath))
|
||||
actualNodeFs.unlinkSync(testBinaryFilePath);
|
||||
});
|
||||
@@ -880,6 +898,70 @@ describe('fileUtils', () => {
|
||||
expect(result.returnDisplay).toContain('Read audio file: audio.mp3');
|
||||
});
|
||||
|
||||
it('should normalize supported audio mime types before returning inline data', async () => {
|
||||
const fakeWavData = Buffer.from([
|
||||
0x52, 0x49, 0x46, 0x46, 0x24, 0x00, 0x00, 0x00,
|
||||
]);
|
||||
const wavFilePath = path.join(tempRootDir, 'voice.wav');
|
||||
actualNodeFs.writeFileSync(wavFilePath, fakeWavData);
|
||||
mockMimeGetType.mockReturnValue('audio/x-wav');
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
wavFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
|
||||
.mimeType,
|
||||
).toBe('audio/wav');
|
||||
});
|
||||
|
||||
it('should reject unsupported audio mime types with a clear error', async () => {
|
||||
const unsupportedAudioPath = path.join(tempRootDir, 'legacy.adp');
|
||||
actualNodeFs.writeFileSync(
|
||||
unsupportedAudioPath,
|
||||
Buffer.from([0x00, 0x01, 0x02, 0x03]),
|
||||
);
|
||||
mockMimeGetType.mockReturnValue('audio/adpcm');
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
unsupportedAudioPath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
|
||||
expect(result.errorType).toBe(ToolErrorType.READ_CONTENT_FAILURE);
|
||||
expect(result.error).toContain('Unsupported audio file format');
|
||||
expect(result.returnDisplay).toContain('Unsupported audio file format');
|
||||
});
|
||||
|
||||
it('should process a video file', async () => {
|
||||
const fakeMp4Data = Buffer.from([
|
||||
0x00, 0x00, 0x00, 0x1c, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f, 0x6d,
|
||||
0x00, 0x00, 0x02, 0x00,
|
||||
]);
|
||||
actualNodeFs.writeFileSync(testVideoFilePath, fakeMp4Data);
|
||||
mockMimeGetType.mockReturnValue('video/mp4');
|
||||
const result = await processSingleFileContent(
|
||||
testVideoFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
).toBeDefined();
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
|
||||
.mimeType,
|
||||
).toBe('video/mp4');
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
|
||||
).toBe(fakeMp4Data.toString('base64'));
|
||||
expect(result.returnDisplay).toContain('Read video file: video.mp4');
|
||||
});
|
||||
|
||||
it('should read an SVG file as text when under 1MB', async () => {
|
||||
const svgContent = `
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100">
|
||||
|
||||
@@ -201,6 +201,72 @@ export function getSpecificMimeType(filePath: string): string | undefined {
|
||||
return typeof lookedUpMime === 'string' ? lookedUpMime : undefined;
|
||||
}
|
||||
|
||||
const SUPPORTED_AUDIO_MIME_TYPES_BY_EXTENSION = new Map<string, string>([
|
||||
['.mp3', 'audio/mpeg'],
|
||||
['.wav', 'audio/wav'],
|
||||
['.aiff', 'audio/aiff'],
|
||||
['.aif', 'audio/aiff'],
|
||||
['.aac', 'audio/aac'],
|
||||
['.ogg', 'audio/ogg'],
|
||||
['.flac', 'audio/flac'],
|
||||
]);
|
||||
|
||||
const AUDIO_MIME_TYPE_NORMALIZATION: Record<string, string> = {
|
||||
'audio/mp3': 'audio/mpeg',
|
||||
'audio/x-mp3': 'audio/mpeg',
|
||||
'audio/wave': 'audio/wav',
|
||||
'audio/x-wav': 'audio/wav',
|
||||
'audio/vnd.wave': 'audio/wav',
|
||||
'audio/x-pn-wav': 'audio/wav',
|
||||
'audio/x-aiff': 'audio/aiff',
|
||||
'audio/aif': 'audio/aiff',
|
||||
'audio/x-aac': 'audio/aac',
|
||||
};
|
||||
|
||||
function formatSupportedAudioFormats(): string {
|
||||
const displayNames = Array.from(
|
||||
new Set(
|
||||
Array.from(SUPPORTED_AUDIO_MIME_TYPES_BY_EXTENSION.keys()).map((ext) => {
|
||||
if (ext === '.aif' || ext === '.aiff') {
|
||||
return 'AIFF';
|
||||
}
|
||||
return ext.slice(1).toUpperCase();
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
if (displayNames.length <= 1) {
|
||||
return displayNames[0] ?? '';
|
||||
}
|
||||
|
||||
return `${displayNames.slice(0, -1).join(', ')}, and ${displayNames.at(-1)}`;
|
||||
}
|
||||
|
||||
const SUPPORTED_AUDIO_FORMATS_DISPLAY = formatSupportedAudioFormats();
|
||||
|
||||
function getSupportedAudioMimeTypeForFile(
|
||||
filePath: string,
|
||||
): string | undefined {
|
||||
const extension = path.extname(filePath).toLowerCase();
|
||||
const extensionMimeType =
|
||||
SUPPORTED_AUDIO_MIME_TYPES_BY_EXTENSION.get(extension);
|
||||
const lookedUpMimeType = getSpecificMimeType(filePath)?.toLowerCase();
|
||||
const normalizedMimeType = lookedUpMimeType
|
||||
? (AUDIO_MIME_TYPE_NORMALIZATION[lookedUpMimeType] ?? lookedUpMimeType)
|
||||
: undefined;
|
||||
|
||||
if (
|
||||
normalizedMimeType &&
|
||||
[...SUPPORTED_AUDIO_MIME_TYPES_BY_EXTENSION.values()].includes(
|
||||
normalizedMimeType,
|
||||
)
|
||||
) {
|
||||
return normalizedMimeType;
|
||||
}
|
||||
|
||||
return extensionMimeType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a path is within a given root directory.
|
||||
* @param pathToCheck The absolute path to check.
|
||||
@@ -370,6 +436,14 @@ export async function detectFileType(
|
||||
}
|
||||
}
|
||||
|
||||
const supportedAudioMimeType = getSupportedAudioMimeTypeForFile(filePath);
|
||||
if (supportedAudioMimeType) {
|
||||
if (!(await isBinaryFile(filePath))) {
|
||||
return 'text';
|
||||
}
|
||||
return 'audio';
|
||||
}
|
||||
|
||||
// Stricter binary check for common non-text extensions before content check
|
||||
// These are often not well-covered by mime-types or might be misidentified.
|
||||
if (BINARY_EXTENSIONS.includes(ext)) {
|
||||
@@ -532,17 +606,40 @@ export async function processSingleFileContent(
|
||||
linesShown: [actualStart + 1, sliceEnd],
|
||||
};
|
||||
}
|
||||
case 'image':
|
||||
case 'pdf':
|
||||
case 'audio':
|
||||
case 'video': {
|
||||
case 'audio': {
|
||||
const mimeType = getSupportedAudioMimeTypeForFile(filePath);
|
||||
if (!mimeType) {
|
||||
return {
|
||||
llmContent: `Could not read audio file because its format is not supported. Supported audio formats are ${SUPPORTED_AUDIO_FORMATS_DISPLAY}.`,
|
||||
returnDisplay: `Unsupported audio file format: ${relativePathForDisplay}`,
|
||||
error: `Unsupported audio file format for ${filePath}. Supported audio formats are ${SUPPORTED_AUDIO_FORMATS_DISPLAY}.`,
|
||||
errorType: ToolErrorType.READ_CONTENT_FAILURE,
|
||||
};
|
||||
}
|
||||
const contentBuffer = await fs.promises.readFile(filePath);
|
||||
const base64Data = contentBuffer.toString('base64');
|
||||
return {
|
||||
llmContent: {
|
||||
inlineData: {
|
||||
data: base64Data,
|
||||
mimeType: mime.getType(filePath) || 'application/octet-stream',
|
||||
mimeType,
|
||||
},
|
||||
},
|
||||
returnDisplay: `Read audio file: ${relativePathForDisplay}`,
|
||||
};
|
||||
}
|
||||
case 'image':
|
||||
case 'pdf':
|
||||
case 'video': {
|
||||
const mimeType =
|
||||
getSpecificMimeType(filePath) ?? 'application/octet-stream';
|
||||
const contentBuffer = await fs.promises.readFile(filePath);
|
||||
const base64Data = contentBuffer.toString('base64');
|
||||
return {
|
||||
llmContent: {
|
||||
inlineData: {
|
||||
data: base64Data,
|
||||
mimeType,
|
||||
},
|
||||
},
|
||||
returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
|
||||
|
||||
Reference in New Issue
Block a user