fix(core): update language detection to use LSP 3.18 identifiers (#21931)

This commit is contained in:
Yuna Seol
2026-03-11 12:05:52 -04:00
committed by GitHub
parent eaf6e8bbb1
commit 50384ab3c9
4 changed files with 147 additions and 94 deletions

View File

@@ -375,7 +375,7 @@ describe('telemetry', () => {
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
expect.objectContaining({
language: 'TypeScript',
language: 'typescript',
}),
);
});
@@ -408,7 +408,7 @@ describe('telemetry', () => {
expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith(
expect.objectContaining({
language: 'Python',
language: 'python',
}),
);
});

View File

@@ -18,14 +18,14 @@ describe('getProgrammingLanguage', () => {
{
name: 'file_path is present',
args: { file_path: 'src/test.ts' },
expected: 'TypeScript',
expected: 'typescript',
},
{
name: 'absolute_path is present',
args: { absolute_path: 'src/test.py' },
expected: 'Python',
expected: 'python',
},
{ name: 'path is present', args: { path: 'src/test.go' }, expected: 'Go' },
{ name: 'path is present', args: { path: 'src/test.go' }, expected: 'go' },
{
name: 'no file path is present',
args: {},

View File

@@ -0,0 +1,44 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { getLanguageFromFilePath } from './language-detection.js';
describe('language-detection', () => {
it('should return correct LSP identifiers for various extensions', () => {
expect(getLanguageFromFilePath('test.ts')).toBe('typescript');
expect(getLanguageFromFilePath('test.js')).toBe('javascript');
expect(getLanguageFromFilePath('test.py')).toBe('python');
expect(getLanguageFromFilePath('test.java')).toBe('java');
expect(getLanguageFromFilePath('test.go')).toBe('go');
expect(getLanguageFromFilePath('test.cs')).toBe('csharp');
expect(getLanguageFromFilePath('test.cpp')).toBe('cpp');
expect(getLanguageFromFilePath('test.sh')).toBe('shellscript');
expect(getLanguageFromFilePath('test.bat')).toBe('bat');
expect(getLanguageFromFilePath('test.json')).toBe('json');
expect(getLanguageFromFilePath('test.md')).toBe('markdown');
expect(getLanguageFromFilePath('test.tsx')).toBe('typescriptreact');
expect(getLanguageFromFilePath('test.jsx')).toBe('javascriptreact');
});
it('should handle uppercase extensions', () => {
expect(getLanguageFromFilePath('TEST.TS')).toBe('typescript');
});
it('should handle filenames without extensions but in map', () => {
expect(getLanguageFromFilePath('.gitignore')).toBe('ignore');
expect(getLanguageFromFilePath('.dockerfile')).toBe('dockerfile');
expect(getLanguageFromFilePath('Dockerfile')).toBe('dockerfile');
});
it('should return undefined for unknown extensions', () => {
expect(getLanguageFromFilePath('test.unknown')).toBeUndefined();
});
it('should return undefined for files without extension or known filename', () => {
expect(getLanguageFromFilePath('just_a_file')).toBeUndefined();
});
});

View File

@@ -6,98 +6,107 @@
import * as path from 'node:path';
/**
* Maps file extensions or filenames to LSP 3.18 language identifiers.
* See: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.18/specification/#textDocumentItem
*/
const extensionToLanguageMap: { [key: string]: string } = {
'.ts': 'TypeScript',
'.js': 'JavaScript',
'.mjs': 'JavaScript',
'.cjs': 'JavaScript',
'.jsx': 'JavaScript',
'.tsx': 'TypeScript',
'.py': 'Python',
'.java': 'Java',
'.go': 'Go',
'.rb': 'Ruby',
'.php': 'PHP',
'.phtml': 'PHP',
'.cs': 'C#',
'.cpp': 'C++',
'.cxx': 'C++',
'.cc': 'C++',
'.c': 'C',
'.h': 'C/C++',
'.hpp': 'C++',
'.swift': 'Swift',
'.kt': 'Kotlin',
'.rs': 'Rust',
'.m': 'Objective-C',
'.mm': 'Objective-C',
'.pl': 'Perl',
'.pm': 'Perl',
'.lua': 'Lua',
'.r': 'R',
'.scala': 'Scala',
'.sc': 'Scala',
'.sh': 'Shell',
'.ps1': 'PowerShell',
'.bat': 'Batch',
'.cmd': 'Batch',
'.sql': 'SQL',
'.html': 'HTML',
'.htm': 'HTML',
'.css': 'CSS',
'.less': 'Less',
'.sass': 'Sass',
'.scss': 'Sass',
'.json': 'JSON',
'.xml': 'XML',
'.yaml': 'YAML',
'.yml': 'YAML',
'.md': 'Markdown',
'.markdown': 'Markdown',
'.dockerfile': 'Dockerfile',
'.vim': 'Vim script',
'.vb': 'Visual Basic',
'.fs': 'F#',
'.clj': 'Clojure',
'.cljs': 'Clojure',
'.dart': 'Dart',
'.ex': 'Elixir',
'.erl': 'Erlang',
'.hs': 'Haskell',
'.lisp': 'Lisp',
'.rkt': 'Racket',
'.groovy': 'Groovy',
'.jl': 'Julia',
'.tex': 'LaTeX',
'.ino': 'Arduino',
'.asm': 'Assembly',
'.s': 'Assembly',
'.toml': 'TOML',
'.vue': 'Vue',
'.svelte': 'Svelte',
'.gohtml': 'Go Template',
'.hbs': 'Handlebars',
'.ejs': 'EJS',
'.erb': 'ERB',
'.jsp': 'JSP',
'.dockerignore': 'Docker',
'.gitignore': 'Git',
'.npmignore': 'npm',
'.editorconfig': 'EditorConfig',
'.prettierrc': 'Prettier',
'.eslintrc': 'ESLint',
'.babelrc': 'Babel',
'.tsconfig': 'TypeScript',
'.flow': 'Flow',
'.graphql': 'GraphQL',
'.proto': 'Protocol Buffers',
'.ts': 'typescript',
'.js': 'javascript',
'.mjs': 'javascript',
'.cjs': 'javascript',
'.jsx': 'javascriptreact',
'.tsx': 'typescriptreact',
'.py': 'python',
'.java': 'java',
'.go': 'go',
'.rb': 'ruby',
'.php': 'php',
'.phtml': 'php',
'.cs': 'csharp',
'.cpp': 'cpp',
'.cxx': 'cpp',
'.cc': 'cpp',
'.c': 'c',
'.h': 'c',
'.hpp': 'cpp',
'.swift': 'swift',
'.kt': 'kotlin',
'.rs': 'rust',
'.m': 'objective-c',
'.mm': 'objective-cpp',
'.pl': 'perl',
'.pm': 'perl',
'.lua': 'lua',
'.r': 'r',
'.scala': 'scala',
'.sc': 'scala',
'.sh': 'shellscript',
'.ps1': 'powershell',
'.bat': 'bat',
'.cmd': 'bat',
'.sql': 'sql',
'.html': 'html',
'.htm': 'html',
'.css': 'css',
'.less': 'less',
'.sass': 'sass',
'.scss': 'scss',
'.json': 'json',
'.xml': 'xml',
'.yaml': 'yaml',
'.yml': 'yaml',
'.md': 'markdown',
'.markdown': 'markdown',
'.dockerfile': 'dockerfile',
'.vim': 'vim',
'.vb': 'vb',
'.fs': 'fsharp',
'.clj': 'clojure',
'.cljs': 'clojure',
'.dart': 'dart',
'.ex': 'elixir',
'.erl': 'erlang',
'.hs': 'haskell',
'.lisp': 'lisp',
'.rkt': 'racket',
'.groovy': 'groovy',
'.jl': 'julia',
'.tex': 'latex',
'.ino': 'arduino',
'.asm': 'assembly',
'.s': 'assembly',
'.toml': 'toml',
'.vue': 'vue',
'.svelte': 'svelte',
'.gohtml': 'gohtml', // Not in standard LSP well-known list but kept for compatibility
'.hbs': 'handlebars',
'.ejs': 'ejs',
'.erb': 'erb',
'.jsp': 'jsp',
'.dockerignore': 'ignore',
'.gitignore': 'ignore',
'.npmignore': 'ignore',
'.editorconfig': 'properties',
'.prettierrc': 'json',
'.eslintrc': 'json',
'.babelrc': 'json',
'.tsconfig': 'json',
'.flow': 'javascript',
'.graphql': 'graphql',
'.proto': 'proto',
};
export function getLanguageFromFilePath(filePath: string): string | undefined {
const extension = path.extname(filePath).toLowerCase();
if (extension) {
return extensionToLanguageMap[extension];
}
const filename = path.basename(filePath).toLowerCase();
return extensionToLanguageMap[`.${filename}`];
const extension = path.extname(filePath).toLowerCase();
const candidates = [
extension, // 1. Standard extension (e.g., '.js')
filename, // 2. Exact filename (e.g., 'dockerfile')
`.${filename}`, // 3. Dot-prefixed filename (e.g., '.gitignore')
];
const match = candidates.find((key) => key in extensionToLanguageMap);
return match ? extensionToLanguageMap[match] : undefined;
}