From 50384ab3c9d63e6cbdc68298e305b2e32cac5877 Mon Sep 17 00:00:00 2001 From: Yuna Seol Date: Wed, 11 Mar 2026 12:05:52 -0400 Subject: [PATCH] fix(core): update language detection to use LSP 3.18 identifiers (#21931) --- .../core/src/code_assist/telemetry.test.ts | 4 +- .../src/telemetry/telemetry-utils.test.ts | 6 +- .../core/src/utils/language-detection.test.ts | 44 +++++ packages/core/src/utils/language-detection.ts | 187 +++++++++--------- 4 files changed, 147 insertions(+), 94 deletions(-) create mode 100644 packages/core/src/utils/language-detection.test.ts diff --git a/packages/core/src/code_assist/telemetry.test.ts b/packages/core/src/code_assist/telemetry.test.ts index b9452f9e6c..e2260ba788 100644 --- a/packages/core/src/code_assist/telemetry.test.ts +++ b/packages/core/src/code_assist/telemetry.test.ts @@ -375,7 +375,7 @@ describe('telemetry', () => { expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith( expect.objectContaining({ - language: 'TypeScript', + language: 'typescript', }), ); }); @@ -408,7 +408,7 @@ describe('telemetry', () => { expect(mockServer.recordConversationInteraction).toHaveBeenCalledWith( expect.objectContaining({ - language: 'Python', + language: 'python', }), ); }); diff --git a/packages/core/src/telemetry/telemetry-utils.test.ts b/packages/core/src/telemetry/telemetry-utils.test.ts index 4240ae6666..8b1b173a1d 100644 --- a/packages/core/src/telemetry/telemetry-utils.test.ts +++ b/packages/core/src/telemetry/telemetry-utils.test.ts @@ -18,14 +18,14 @@ describe('getProgrammingLanguage', () => { { name: 'file_path is present', args: { file_path: 'src/test.ts' }, - expected: 'TypeScript', + expected: 'typescript', }, { name: 'absolute_path is present', args: { absolute_path: 'src/test.py' }, - expected: 'Python', + expected: 'python', }, - { name: 'path is present', args: { path: 'src/test.go' }, expected: 'Go' }, + { name: 'path is present', args: { path: 'src/test.go' }, expected: 'go' }, { name: 'no file path is present', args: {}, diff --git a/packages/core/src/utils/language-detection.test.ts b/packages/core/src/utils/language-detection.test.ts new file mode 100644 index 0000000000..e6c8d3f72b --- /dev/null +++ b/packages/core/src/utils/language-detection.test.ts @@ -0,0 +1,44 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { getLanguageFromFilePath } from './language-detection.js'; + +describe('language-detection', () => { + it('should return correct LSP identifiers for various extensions', () => { + expect(getLanguageFromFilePath('test.ts')).toBe('typescript'); + expect(getLanguageFromFilePath('test.js')).toBe('javascript'); + expect(getLanguageFromFilePath('test.py')).toBe('python'); + expect(getLanguageFromFilePath('test.java')).toBe('java'); + expect(getLanguageFromFilePath('test.go')).toBe('go'); + expect(getLanguageFromFilePath('test.cs')).toBe('csharp'); + expect(getLanguageFromFilePath('test.cpp')).toBe('cpp'); + expect(getLanguageFromFilePath('test.sh')).toBe('shellscript'); + expect(getLanguageFromFilePath('test.bat')).toBe('bat'); + expect(getLanguageFromFilePath('test.json')).toBe('json'); + expect(getLanguageFromFilePath('test.md')).toBe('markdown'); + expect(getLanguageFromFilePath('test.tsx')).toBe('typescriptreact'); + expect(getLanguageFromFilePath('test.jsx')).toBe('javascriptreact'); + }); + + it('should handle uppercase extensions', () => { + expect(getLanguageFromFilePath('TEST.TS')).toBe('typescript'); + }); + + it('should handle filenames without extensions but in map', () => { + expect(getLanguageFromFilePath('.gitignore')).toBe('ignore'); + expect(getLanguageFromFilePath('.dockerfile')).toBe('dockerfile'); + expect(getLanguageFromFilePath('Dockerfile')).toBe('dockerfile'); + }); + + it('should return undefined for unknown extensions', () => { + expect(getLanguageFromFilePath('test.unknown')).toBeUndefined(); + }); + + it('should return undefined for files without extension or known filename', () => { + expect(getLanguageFromFilePath('just_a_file')).toBeUndefined(); + }); +}); diff --git a/packages/core/src/utils/language-detection.ts b/packages/core/src/utils/language-detection.ts index ebbefe8b31..c0debcbaea 100644 --- a/packages/core/src/utils/language-detection.ts +++ b/packages/core/src/utils/language-detection.ts @@ -6,98 +6,107 @@ import * as path from 'node:path'; +/** + * Maps file extensions or filenames to LSP 3.18 language identifiers. + * See: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.18/specification/#textDocumentItem + */ const extensionToLanguageMap: { [key: string]: string } = { - '.ts': 'TypeScript', - '.js': 'JavaScript', - '.mjs': 'JavaScript', - '.cjs': 'JavaScript', - '.jsx': 'JavaScript', - '.tsx': 'TypeScript', - '.py': 'Python', - '.java': 'Java', - '.go': 'Go', - '.rb': 'Ruby', - '.php': 'PHP', - '.phtml': 'PHP', - '.cs': 'C#', - '.cpp': 'C++', - '.cxx': 'C++', - '.cc': 'C++', - '.c': 'C', - '.h': 'C/C++', - '.hpp': 'C++', - '.swift': 'Swift', - '.kt': 'Kotlin', - '.rs': 'Rust', - '.m': 'Objective-C', - '.mm': 'Objective-C', - '.pl': 'Perl', - '.pm': 'Perl', - '.lua': 'Lua', - '.r': 'R', - '.scala': 'Scala', - '.sc': 'Scala', - '.sh': 'Shell', - '.ps1': 'PowerShell', - '.bat': 'Batch', - '.cmd': 'Batch', - '.sql': 'SQL', - '.html': 'HTML', - '.htm': 'HTML', - '.css': 'CSS', - '.less': 'Less', - '.sass': 'Sass', - '.scss': 'Sass', - '.json': 'JSON', - '.xml': 'XML', - '.yaml': 'YAML', - '.yml': 'YAML', - '.md': 'Markdown', - '.markdown': 'Markdown', - '.dockerfile': 'Dockerfile', - '.vim': 'Vim script', - '.vb': 'Visual Basic', - '.fs': 'F#', - '.clj': 'Clojure', - '.cljs': 'Clojure', - '.dart': 'Dart', - '.ex': 'Elixir', - '.erl': 'Erlang', - '.hs': 'Haskell', - '.lisp': 'Lisp', - '.rkt': 'Racket', - '.groovy': 'Groovy', - '.jl': 'Julia', - '.tex': 'LaTeX', - '.ino': 'Arduino', - '.asm': 'Assembly', - '.s': 'Assembly', - '.toml': 'TOML', - '.vue': 'Vue', - '.svelte': 'Svelte', - '.gohtml': 'Go Template', - '.hbs': 'Handlebars', - '.ejs': 'EJS', - '.erb': 'ERB', - '.jsp': 'JSP', - '.dockerignore': 'Docker', - '.gitignore': 'Git', - '.npmignore': 'npm', - '.editorconfig': 'EditorConfig', - '.prettierrc': 'Prettier', - '.eslintrc': 'ESLint', - '.babelrc': 'Babel', - '.tsconfig': 'TypeScript', - '.flow': 'Flow', - '.graphql': 'GraphQL', - '.proto': 'Protocol Buffers', + '.ts': 'typescript', + '.js': 'javascript', + '.mjs': 'javascript', + '.cjs': 'javascript', + '.jsx': 'javascriptreact', + '.tsx': 'typescriptreact', + '.py': 'python', + '.java': 'java', + '.go': 'go', + '.rb': 'ruby', + '.php': 'php', + '.phtml': 'php', + '.cs': 'csharp', + '.cpp': 'cpp', + '.cxx': 'cpp', + '.cc': 'cpp', + '.c': 'c', + '.h': 'c', + '.hpp': 'cpp', + '.swift': 'swift', + '.kt': 'kotlin', + '.rs': 'rust', + '.m': 'objective-c', + '.mm': 'objective-cpp', + '.pl': 'perl', + '.pm': 'perl', + '.lua': 'lua', + '.r': 'r', + '.scala': 'scala', + '.sc': 'scala', + '.sh': 'shellscript', + '.ps1': 'powershell', + '.bat': 'bat', + '.cmd': 'bat', + '.sql': 'sql', + '.html': 'html', + '.htm': 'html', + '.css': 'css', + '.less': 'less', + '.sass': 'sass', + '.scss': 'scss', + '.json': 'json', + '.xml': 'xml', + '.yaml': 'yaml', + '.yml': 'yaml', + '.md': 'markdown', + '.markdown': 'markdown', + '.dockerfile': 'dockerfile', + '.vim': 'vim', + '.vb': 'vb', + '.fs': 'fsharp', + '.clj': 'clojure', + '.cljs': 'clojure', + '.dart': 'dart', + '.ex': 'elixir', + '.erl': 'erlang', + '.hs': 'haskell', + '.lisp': 'lisp', + '.rkt': 'racket', + '.groovy': 'groovy', + '.jl': 'julia', + '.tex': 'latex', + '.ino': 'arduino', + '.asm': 'assembly', + '.s': 'assembly', + '.toml': 'toml', + '.vue': 'vue', + '.svelte': 'svelte', + '.gohtml': 'gohtml', // Not in standard LSP well-known list but kept for compatibility + '.hbs': 'handlebars', + '.ejs': 'ejs', + '.erb': 'erb', + '.jsp': 'jsp', + '.dockerignore': 'ignore', + '.gitignore': 'ignore', + '.npmignore': 'ignore', + '.editorconfig': 'properties', + '.prettierrc': 'json', + '.eslintrc': 'json', + '.babelrc': 'json', + '.tsconfig': 'json', + '.flow': 'javascript', + '.graphql': 'graphql', + '.proto': 'proto', }; export function getLanguageFromFilePath(filePath: string): string | undefined { - const extension = path.extname(filePath).toLowerCase(); - if (extension) { - return extensionToLanguageMap[extension]; - } const filename = path.basename(filePath).toLowerCase(); - return extensionToLanguageMap[`.${filename}`]; + const extension = path.extname(filePath).toLowerCase(); + + const candidates = [ + extension, // 1. Standard extension (e.g., '.js') + filename, // 2. Exact filename (e.g., 'dockerfile') + `.${filename}`, // 3. Dot-prefixed filename (e.g., '.gitignore') + ]; + const match = candidates.find((key) => key in extensionToLanguageMap); + + return match ? extensionToLanguageMap[match] : undefined; }