-
Notifications
You must be signed in to change notification settings - Fork 581
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add support for analysis of source code/scripted languages #1080
base: master
Are you sure you want to change the base?
Changes from 1 commit
bbd3f70
8173397
428f6bc
a6d7ba2
80bf78b
cf3dc7e
9d7f575
3d4b4ec
eca7ead
5fd953f
1f79db9
a58bc0b
5ddb8ba
31e2fb9
5bf3f18
a4529fc
d5de9a1
6c10458
9bd9824
2594849
619ed94
5e23802
5d83e8d
9570523
7c5e6e3
1e0326a
ca1939f
d7ab2db
5cfbecc
26cc1bc
2a9e76f
672ca71
ca426ca
fd80277
d0c4acb
ad31d83
e52a9b3
b27713b
b2df2b0
a0379a6
eeecb63
cebc5e1
d7dcc94
32dc5ff
5e85a6e
614900f
bb08181
1fd9d4a
7ba978f
25cf09b
e693573
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
from tree_sitter import Language | ||
|
||
build_dir = "build/my-languages.so" | ||
languages = [ | ||
"vendor/tree-sitter-c-sharp", | ||
] | ||
|
||
|
||
def ts_build(): | ||
Language.build_library(build_dir, languages) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
from typing import List, Tuple | ||
from typing import List, Tuple, Iterator | ||
|
||
from tree_sitter import Node, Tree, Parser | ||
|
||
import capa.features.extractors.ts.sig | ||
import capa.features.extractors.ts.build | ||
import capa.features.extractors.ts.query | ||
from capa.features.address import FileOffsetRangeAddress | ||
from capa.features.extractors.ts.query import QueryBinding | ||
|
@@ -16,6 +17,7 @@ class TreeSitterExtractorEngine: | |
import_signatures: set | ||
|
||
def __init__(self, language: str, path: str): | ||
capa.features.extractors.ts.build.ts_build() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm lets find a better place for this initialization There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. global in this file is a good place to start |
||
self.language = language | ||
self.query = capa.features.extractors.ts.query.QueryBinding(language) | ||
self.import_signatures = capa.features.extractors.ts.sig.load_import_signatures(language) | ||
|
@@ -27,6 +29,9 @@ def __init__(self, language: str, path: str): | |
def get_language(self): | ||
return self.language | ||
|
||
def get_ts_language(self): | ||
return self.query.language | ||
|
||
def parse(self): | ||
self.parser = Parser() | ||
self.parser.set_language(self.get_ts_language()) | ||
|
@@ -38,37 +43,46 @@ def get_new_objects(self, node: Node) -> List[Tuple[Node, str]]: | |
def get_object_id(self, node: Node) -> Node: | ||
return node.child_by_field_name(self.query.new_object_field_name) | ||
|
||
def get_new_object_ids(self, node: Node) -> Iterator[Node]: | ||
for obj_node, _ in self.get_new_objects(node): | ||
yield self.get_object_id(obj_node) | ||
|
||
def get_import_names(self, node: Node) -> List[Tuple[Node, str]]: | ||
join_names = capa.features.extractors.ts.sig.get_name_joiner(self.language) | ||
import_names = [] | ||
namespaces = set([self.get_range(node) for node, _ in self.get_all_namespaces()]) | ||
for node, _ in self.get_new_objects(node): | ||
for node, _ in self.get_new_object_ids(node): | ||
for namespace in namespaces: | ||
name = join_names(namespace, self.get_range(node)) | ||
if name in self.import_signatures: | ||
import_names.append(name) | ||
return import_names | ||
|
||
def get_functions(self, node: Node) -> List[Tuple[Node, str]]: | ||
return self.query.function_def.captures(node) | ||
|
||
def get_all_functions(self) -> List[Tuple[Node, str]]: | ||
return self.get_functions(self.tree.root_node) | ||
def get_function_definitions(self, node: Node = None) -> List[Tuple[Node, str]]: | ||
return self.query.function_definition.captures(node if node is not None else self.tree.root_node) | ||
|
||
def get_function_definition_id(self, node: Node) -> Node: | ||
return node.child_by_field_name(self.query.function_def_field_name) | ||
return node.child_by_field_name(self.query.function_definition_field_name) | ||
|
||
def get_function_definition_ids(self, node: Node) -> Iterator[Node]: | ||
for fn_node, _ in self.get_function_definitions(node): | ||
yield self.get_function_definition_id(fn_node) | ||
|
||
def get_function_calls(self, node: Node) -> List[Tuple[Node, str]]: | ||
return self.query.function_call.captures(node) | ||
|
||
def get_function_call_id(self, node: Node) -> Node: | ||
return node.child_by_field_name(self.query.function_call_field_name) | ||
|
||
def get_function_call_ids(self, node: Node) -> Iterator[Node]: | ||
for fn_node, _ in self.get_function_calls(node): | ||
yield self.get_function_call_id(fn_node) | ||
|
||
def get_function_names(self, node: Node) -> List[Tuple[Node, str]]: | ||
join_names = capa.features.extractors.ts.sig.get_name_joiner(self.language) | ||
function_names = [] | ||
namespaces = set([self.get_range(node) for node, _ in self.get_all_namespaces()]) | ||
for node, _ in self.get_function_calls(node): | ||
for node, _ in self.get_function_call_ids(node): | ||
for namespace in namespaces: | ||
name = join_names(namespace, self.get_range(node)) | ||
if name in self.import_signatures: | ||
|
@@ -97,4 +111,4 @@ def get_address(self, node: Node): | |
return FileOffsetRangeAddress(node.start_byte, node.end_byte) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i love this |
||
|
||
def get_default_address(self): | ||
return self.get_addr(self.tree.root_node) | ||
return self.get_address(self.tree.root_node) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does this mean we only support Linux?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tree-sitter needs to compile its (C) language bindings. Although I have a limited knowledge of package management, I've suggested to Moritz that we should precompile and package the supported tree-sitter bindings for each platform we support. The current state is a temporary measure.