Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow looking up language by filename #22

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import json
import os
import glob
import pprint
import re
import subprocess
import sys
from tree_sitter import Language
Expand Down Expand Up @@ -32,7 +36,40 @@
subprocess.check_call(["git", "fetch", "--depth=1", "origin", commit], cwd=clone_directory)
subprocess.check_call(["git", "checkout", commit], cwd=clone_directory)

print()

langs = {}
for _, _, clone_directory in repos:
keys = []
for parser_path in glob.glob(os.path.join(clone_directory, "**/parser.c"), recursive=True):
with open(parser_path, 'r') as parser:
for line in parser:
if line.startswith("extern const TSLanguage *tree_sitter_"):
key = re.search(r"tree_sitter_(.+?)\(", line).group(1)
keys.append(key)
package_json_path = os.path.join(clone_directory, 'package.json')
if not os.path.isfile(package_json_path):
for key in keys:
langs[key] = {}
continue
with open(package_json_path, 'r') as file:
package_json = json.load(file)
if 'tree-sitter' not in package_json:
for key in keys:
langs[key] = {}
continue
for entry in package_json['tree-sitter']:
if len(keys) == 1:
langs[keys[0]] = entry
continue
for key in keys:
if entry['scope'].endswith(key) or ('path' in entry and entry['path'] == key):
langs[key] = entry
break

with open('tree_sitter_languages/generated.pyx', 'w') as file:
file.write('compiled_languages = ')
pprint.pprint(langs, stream=file)


if sys.platform == "win32":
languages_filename = "tree_sitter_languages\\languages.dll"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
author_email='[email protected]',
url='https://github.com/grantjenks/py-tree-sitter-languages',
license='Apache 2.0',
ext_modules=cythonize('tree_sitter_languages/core.pyx', language_level='3'),
ext_modules=cythonize('tree_sitter_languages/*.pyx', language_level='3'),
packages=['tree_sitter_languages'],
package_data={'tree_sitter_languages': ['languages.so', 'languages.dll']},
install_requires=['tree-sitter'],
Expand Down
15 changes: 14 additions & 1 deletion tests/test_tree_sitter_languages.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from tree_sitter_languages import get_language, get_parser
from tree_sitter_languages import get_language, get_parser, get_language_by_filename
from tree_sitter_languages.generated import compiled_languages

LANGUAGES = [
'bash',
Expand Down Expand Up @@ -45,6 +46,7 @@
'sqlite',
'toml',
'tsq',
'tsx',
'typescript',
'yaml',
]
Expand Down Expand Up @@ -87,3 +89,14 @@ def test_get_language():
for language in LANGUAGES:
language = get_language(language)
assert language

def test_generated():
for language in LANGUAGES:
assert compiled_languages[language] is not None

def test_get_language_by_filename():
for filename, lang in {
'file.sh': 'bash',
'test.go': 'go',
}.items():
assert get_language_by_filename(filename).name == get_language(lang).name
2 changes: 1 addition & 1 deletion tree_sitter_languages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Tree Sitter with Languages
"""

from .core import get_language, get_parser
from .core import get_language, get_parser, get_language_by_filename

__version__ = '1.7.0'
__title__ = 'tree_sitter_languages'
Expand Down
27 changes: 27 additions & 0 deletions tree_sitter_languages/core.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pathlib
import re
import sys

from .generated import compiled_languages
from tree_sitter import Language, Parser


Expand All @@ -14,6 +16,31 @@ def get_language(language):
language = Language(binary_path, language)
return language

def get_language_by_filename(name, contents=None):
matching_keys = []
for key, entry in compiled_languages.items():
if 'file-types' not in entry:
continue
for ft in entry['file-types']:
if name == ft or name.endswith(ft):
matching_keys.append(key)

if contents is None or not matching_keys:
return get_language(matching_keys[0]) if matching_keys else None

best_score = -1
best_key = None
for key in matching_keys:
entry = compiled_languages[key]
if 'content-regex' in entry and contents is not None:
match = re.search(entry['content-regex'], contents)
if match:
score = match.end() - match.start()
if score > best_score:
best_score = score
best_key = key

return get_language(best_key) if best_key else get_language(matching_keys[0])

def get_parser(language):
language = get_language(language)
Expand Down
117 changes: 117 additions & 0 deletions tree_sitter_languages/generated.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
compiled_languages = {'bash': {'file-types': ['sh', 'bash', 'zsh'], 'scope': 'source.bash'},
'c': {'file-types': ['c', 'h'], 'scope': 'source.c'},
'c_sharp': {'file-types': ['cs'], 'scope': 'source.cs'},
'commonlisp': {},
'cpp': {'file-types': ['cc', 'cpp', 'hpp', 'h'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-c/queries/highlights.scm'],
'scope': 'source.cpp'},
'css': {'file-types': ['css'],
'injection-regex': '^css$',
'scope': 'source.css'},
'dockerfile': {},
'dot': {'file-types': ['dot', 'gv'], 'scope': 'source.dot'},
'elisp': {'file-types': ['el'], 'scope': 'source.emacs.lisp'},
'elixir': {'file-types': ['ex', 'exs'],
'injection-regex': '^(ex|elixir)$',
'scope': 'source.elixir'},
'elm': {'file-types': ['elm'], 'scope': 'source.elm'},
'embedded_template': {'file-types': ['erb'],
'injection-regex': 'erb',
'injections': 'queries/injections-erb.scm',
'scope': 'text.html.erb'},
'erlang': {},
'go': {'file-types': ['go'], 'scope': 'source.go'},
'gomod': {},
'hack': {'file-types': ['hack'],
'first-line-regex': '^((<\\?hh.*)|(#!.+ hhvm))',
'scope': 'source.hack'},
'haskell': {'file-types': ['hs'],
'highlights': ['queries/highlights.scm'],
'injection-regex': '^(hs|haskell)$',
'scope': 'source.haskell'},
'hcl': {'file-types': ['hcl'], 'scope': 'source.hcl'},
'html': {'file-types': ['html'],
'injection-regex': 'html',
'scope': 'text.html.basic'},
'java': {'file-types': ['java'], 'scope': 'source.java'},
'javascript': {'file-types': ['js'],
'highlights': ['queries/highlights-jsx.scm',
'queries/highlights-params.scm',
'queries/highlights.scm'],
'injection-regex': '^(js|javascript)$',
'scope': 'source.js'},
'jsdoc': {'injection-regex': 'jsdoc', 'scope': 'text.jsdoc'},
'json': {'file-types': ['json'], 'scope': 'source.json'},
'julia': {'file-types': ['jl'], 'scope': 'source.julia'},
'kotlin': {},
'lua': {'file-types': ['lua'], 'scope': 'source.lua'},
'make': {'file-types': ['makefile',
'Makefile',
'MAKEFILE',
'GNUmakefile',
'mk',
'mak',
'dsp'],
'scope': 'source.mk'},
'markdown': {},
'objc': {'file-types': ['h', 'm'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-c/queries/highlights.scm'],
'scope': 'source.objc'},
'ocaml': {'file-types': ['ml'],
'first-line-regex': '',
'injection-regex': '^(ocaml|ml)$',
'path': 'ocaml',
'scope': 'source.ocaml'},
'perl': {'file-types': ['pl'], 'scope': 'source.perl'},
'php': {'file-types': ['php'],
'highlights': 'queries/highlights.scm',
'scope': 'source.php'},
'python': {'file-types': ['py'], 'scope': 'source.python'},
'ql': {'file-types': ['ql', 'qll'], 'scope': 'source.ql'},
'r': {'file-types': ['R', 'r'],
'first-line-regex': '#!.*\\bRscript$',
'scope': 'source.R'},
'regex': {'injection-regex': '^regex$', 'scope': 'source.regex'},
'rst': {'file-types': ['rst'], 'injection-regex': 'rst', 'scope': 'text.rst'},
'ruby': {'file-types': ['rb'],
'injection-regex': 'ruby',
'scope': 'source.ruby'},
'rust': {'file-types': ['rs'],
'injection-regex': 'rust',
'scope': 'source.rust'},
'scala': {'file-types': ['scala'], 'scope': 'source.scala'},
'sql': {'file-types': ['sql'], 'scope': 'source.sql'},
'sqlite': {'file-types': ['sql'],
'highlights': 'queries/highlights.scm',
'injection-regex': '^(sql)$',
'scope': 'source.sql'},
'toml': {'file-types': ['toml'],
'highlights': ['queries/highlights.scm'],
'injection-regex': '^toml$',
'scope': 'source.toml'},
'tsq': {'file-types': ['tsq', 'scm'], 'scope': 'scope.tsq'},
'tsx': {'content-regex': '@flow',
'file-types': ['js'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-javascript/queries/highlights-jsx.scm',
'node_modules/tree-sitter-javascript/queries/highlights.scm'],
'injections': 'node_modules/tree-sitter-javascript/queries/injections.scm',
'locals': 'node_modules/tree-sitter-javascript/queries/locals.scm',
'path': 'tsx',
'scope': 'source.js.flow',
'tags': ['queries/tags.scm',
'node_modules/tree-sitter-javascript/queries/tags.scm']},
'typescript': {'file-types': ['ts'],
'highlights': ['queries/highlights.scm',
'node_modules/tree-sitter-javascript/queries/highlights.scm'],
'injection-regex': '^(ts|typescript)$',
'injections': 'node_modules/tree-sitter-javascript/queries/injections.scm',
'locals': ['queries/locals.scm',
'node_modules/tree-sitter-javascript/queries/locals.scm'],
'path': 'typescript',
'scope': 'source.ts',
'tags': ['queries/tags.scm',
'node_modules/tree-sitter-javascript/queries/tags.scm']},
'yaml': {}}