-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rework setup grammar script to better handle more targets.
- Loading branch information
Showing
1 changed file
with
90 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,70 +1,125 @@ | ||
import datetime | ||
import pathlib | ||
import subprocess | ||
from enum import Enum | ||
|
||
import requests | ||
|
||
GRAMMAR = { | ||
'lexer': { | ||
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4', | ||
'filename': 'SqlBaseLexer.g4' | ||
|
||
class Antlr4Target(Enum): | ||
js = 'JavaScript' | ||
python = 'Python3' | ||
|
||
|
||
build_options = { | ||
'antlr4_compiled_target_output': { | ||
Antlr4Target.js: 'cratedb_sqlparse_js', | ||
Antlr4Target.python: 'cratedb_sqlparse_py' | ||
}, | ||
'parser': { | ||
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4', | ||
'filename': 'SqlBaseParser.g4' | ||
} | ||
|
||
'antlr4_compiled_target_subdir': 'parser/generated_parser', | ||
|
||
# List of '.g4' files that will be built | ||
'files': [ | ||
{ | ||
'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4', | ||
'filename': 'SqlBaseLexer.g4' | ||
}, | ||
{ | ||
'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4', | ||
'filename': 'SqlBaseParser.g4' | ||
} | ||
] | ||
} | ||
|
||
PARSER_COMPILE_PATH = pathlib.Path(__file__).parent | ||
|
||
|
||
class Target(Enum): | ||
js = 'JavaScript' | ||
python = 'Python3' | ||
|
||
|
||
def download_cratedb_grammar(): | ||
def download_cratedb_grammar(version='master'): | ||
""" | ||
Downloads CrateDB's grammar files. | ||
Downloads CrateDB's `version` grammar files. | ||
Version should match a tag; for a list of tags run: | ||
$ curl https://api.github.com/repos/crate/crate/tags | jq -r '.[] | .name' | ||
""" | ||
for file in GRAMMAR.values(): | ||
response = requests.get(file['url']) | ||
with open(str(pathlib.Path(__file__).parent / file['filename']), | ||
'w') as f: | ||
f.write(response.text) | ||
for file in build_options['files']: | ||
response = requests.get(file['url'].format(version=version)) | ||
|
||
# We annotate the CrateDB branch and date of download to the Grammar files for reference. | ||
text = f'/* crate_branch={version}, at={datetime.datetime.now()}, annotatedby=cratedb_sqlparse */\n' + response.text | ||
|
||
with open(str(PARSER_COMPILE_PATH.parent / file['filename']), 'w') as f: | ||
f.write(text) | ||
|
||
def compile_grammar(target: Target, path: str): | ||
|
||
def compile_grammar(target: Antlr4Target): | ||
""" | ||
Compiles antlr4 files into Python code. | ||
Compiles antlr4 files into `target` code. | ||
""" | ||
for file in GRAMMAR.values(): | ||
|
||
base_dir = build_options['antlr4_compiled_target_output'][target] | ||
sub_dir = build_options['antlr4_compiled_target_subdir'] | ||
|
||
for file in build_options['files']: | ||
subprocess.run( | ||
[ | ||
'antlr4', f'-Dlanguage={target.value}', | ||
'-o', str(PARSER_COMPILE_PATH / path), | ||
'-o', | ||
str(PARSER_COMPILE_PATH / base_dir / sub_dir), | ||
file['filename'] | ||
] | ||
) | ||
|
||
|
||
def patch_lexer(): | ||
def patch_lexer(target: Antlr4Target): | ||
""" | ||
Patches the lexer file, removing bad syntax generated by Antlr4. | ||
""" | ||
|
||
REMOVE_LINES = [ | ||
'import io.crate.sql.AbstractSqlBaseLexer;', | ||
] | ||
sqlbaselexer_pyfile = PARSER_COMPILE_PATH / GRAMMAR['lexer']['filename'].replace('g4', 'py') | ||
text = pathlib.Path(sqlbaselexer_pyfile).read_text() | ||
|
||
# We remove lines that do not properly work. | ||
# If more targets are added, this needs to be improved. | ||
extension = 'py' if target == Antlr4Target.python else 'js' | ||
|
||
base_dir = build_options['antlr4_compiled_target_output'][target] | ||
sub_dir = build_options['antlr4_compiled_target_subdir'] | ||
file_name = build_options['files'][0]['filename'].replace('g4', extension) | ||
|
||
lexer_file = PARSER_COMPILE_PATH / base_dir / sub_dir / file_name | ||
|
||
text = pathlib.Path(lexer_file).read_text() | ||
|
||
for text_to_remove in REMOVE_LINES: | ||
text = text.replace(text_to_remove, | ||
'# Code removed by cratedb_sqlparse.setup_grammar.patch_lexer') | ||
text = text.replace(text_to_remove, '') | ||
|
||
pathlib.Path(lexer_file).write_text(text) | ||
|
||
|
||
def set_version(target: Antlr4Target, version: str): | ||
""" | ||
Specifies the compiled version to the target package, | ||
depending on the package the strategy differs. | ||
""" | ||
base_dir = build_options['antlr4_compiled_target_output'][target] | ||
sub_dir = build_options['antlr4_compiled_target_subdir'] | ||
|
||
target_path = (PARSER_COMPILE_PATH / base_dir / sub_dir).parent | ||
|
||
version = f'"{version}"' # Escape quotes on echo command. | ||
echo_command = '' | ||
|
||
if target == Antlr4Target.python: | ||
echo_command = f"\necho __cratedb_version__ = {repr(version)} >> {target_path / '__init__.py'}" | ||
|
||
pathlib.Path(sqlbaselexer_pyfile).write_text(text) | ||
if target == Antlr4Target.js: | ||
echo_command = f"\necho export const __cratedb_version__ = {repr(version)} >> {target_path / 'index.js'}" | ||
|
||
subprocess.run(echo_command, shell=True) | ||
|
||
if __name__ == '__main__': | ||
download_cratedb_grammar() | ||
compile_grammar(Target.js, 'cratedb_sqlparse_js/parser/generated_parser') | ||
patch_lexer() | ||
# if __name__ == '__main__': | ||
# download_cratedb_grammar('5.6.4') | ||
# compile_grammar(Antlr4Target.js) | ||
# patch_lexer(Antlr4Target.js) | ||
# set_version(Antlr4Target.js, '5.63.4') |