Skip to content

Commit

Permalink
Rework setup grammar script to better handle more targets.
Browse files Browse the repository at this point in the history
  • Loading branch information
surister committed Apr 16, 2024
1 parent a002e68 commit fb3b2de
Showing 1 changed file with 90 additions and 35 deletions.
125 changes: 90 additions & 35 deletions setup_grammar.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,125 @@
import datetime
import pathlib
import subprocess
from enum import Enum

import requests

GRAMMAR = {
'lexer': {
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4',
'filename': 'SqlBaseLexer.g4'

class Antlr4Target(Enum):
js = 'JavaScript'
python = 'Python3'


build_options = {
'antlr4_compiled_target_output': {
Antlr4Target.js: 'cratedb_sqlparse_js',
Antlr4Target.python: 'cratedb_sqlparse_py'
},
'parser': {
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4',
'filename': 'SqlBaseParser.g4'
}

'antlr4_compiled_target_subdir': 'parser/generated_parser',

# List of '.g4' files that will be built
'files': [
{
'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4',
'filename': 'SqlBaseLexer.g4'
},
{
'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4',
'filename': 'SqlBaseParser.g4'
}
]
}

PARSER_COMPILE_PATH = pathlib.Path(__file__).parent


class Target(Enum):
js = 'JavaScript'
python = 'Python3'


def download_cratedb_grammar():
def download_cratedb_grammar(version='master'):
"""
Downloads CrateDB's grammar files.
Downloads CrateDB's `version` grammar files.
Version should match a tag; for a list of tags run:
$ curl https://api.github.com/repos/crate/crate/tags | jq -r '.[] | .name'
"""
for file in GRAMMAR.values():
response = requests.get(file['url'])
with open(str(pathlib.Path(__file__).parent / file['filename']),
'w') as f:
f.write(response.text)
for file in build_options['files']:
response = requests.get(file['url'].format(version=version))

# We annotate the CrateDB branch and date of download to the Grammar files for reference.
text = f'/* crate_branch={version}, at={datetime.datetime.now()}, annotatedby=cratedb_sqlparse */\n' + response.text

with open(str(PARSER_COMPILE_PATH.parent / file['filename']), 'w') as f:
f.write(text)

def compile_grammar(target: Target, path: str):

def compile_grammar(target: Antlr4Target):
"""
Compiles antlr4 files into Python code.
Compiles antlr4 files into `target` code.
"""
for file in GRAMMAR.values():

base_dir = build_options['antlr4_compiled_target_output'][target]
sub_dir = build_options['antlr4_compiled_target_subdir']

for file in build_options['files']:
subprocess.run(
[
'antlr4', f'-Dlanguage={target.value}',
'-o', str(PARSER_COMPILE_PATH / path),
'-o',
str(PARSER_COMPILE_PATH / base_dir / sub_dir),
file['filename']
]
)


def patch_lexer():
def patch_lexer(target: Antlr4Target):
"""
Patches the lexer file, removing bad syntax generated by Antlr4.
"""

REMOVE_LINES = [
'import io.crate.sql.AbstractSqlBaseLexer;',
]
sqlbaselexer_pyfile = PARSER_COMPILE_PATH / GRAMMAR['lexer']['filename'].replace('g4', 'py')
text = pathlib.Path(sqlbaselexer_pyfile).read_text()

# We remove lines that do not properly work.
# If more targets are added, this needs to be improved.
extension = 'py' if target == Antlr4Target.python else 'js'

base_dir = build_options['antlr4_compiled_target_output'][target]
sub_dir = build_options['antlr4_compiled_target_subdir']
file_name = build_options['files'][0]['filename'].replace('g4', extension)

lexer_file = PARSER_COMPILE_PATH / base_dir / sub_dir / file_name

text = pathlib.Path(lexer_file).read_text()

for text_to_remove in REMOVE_LINES:
text = text.replace(text_to_remove,
'# Code removed by cratedb_sqlparse.setup_grammar.patch_lexer')
text = text.replace(text_to_remove, '')

pathlib.Path(lexer_file).write_text(text)


def set_version(target: Antlr4Target, version: str):
"""
Specifies the compiled version to the target package,
depending on the package the strategy differs.
"""
base_dir = build_options['antlr4_compiled_target_output'][target]
sub_dir = build_options['antlr4_compiled_target_subdir']

target_path = (PARSER_COMPILE_PATH / base_dir / sub_dir).parent

version = f'"{version}"' # Escape quotes on echo command.
echo_command = ''

if target == Antlr4Target.python:
echo_command = f"\necho __cratedb_version__ = {repr(version)} >> {target_path / '__init__.py'}"

pathlib.Path(sqlbaselexer_pyfile).write_text(text)
if target == Antlr4Target.js:
echo_command = f"\necho export const __cratedb_version__ = {repr(version)} >> {target_path / 'index.js'}"

subprocess.run(echo_command, shell=True)

if __name__ == '__main__':
download_cratedb_grammar()
compile_grammar(Target.js, 'cratedb_sqlparse_js/parser/generated_parser')
patch_lexer()
# if __name__ == '__main__':
# download_cratedb_grammar('5.6.4')
# compile_grammar(Antlr4Target.js)
# patch_lexer(Antlr4Target.js)
# set_version(Antlr4Target.js, '5.63.4')

0 comments on commit fb3b2de

Please sign in to comment.