From e29afe681dd7ef06dcbd6d02d4f039bbed165bf5 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Mon, 15 Jul 2024 10:03:01 +0200 Subject: [PATCH 1/6] Add test for snapshot testing We are currently using a huge file `tests/data/test1.zeek` for manual "snapshot testing". The idea is that one would add samples to the file and then manually update the baseline `tests/data/test1.zeek.out`. As this file grows this manual process becomes very cumbersome. Additionally when the test fails it can be very hard to identify which parts of the baseline differed. This patch introduces a dedicated test `test_samples` which automates this. The idea is that one would add new compact samples to `test/samples` which the test discovers automatically. When the test is run equality assertions are automatically routed to checks against managed snapshots which can be updated with `pytest --snapshot-update`. This should reduce overhead from adding/updating snapshots which should make it easier to add coverage. We will migrate the existing tests over in follow-up commits. --- .pre-commit-config.yaml | 6 +- pyproject.toml | 7 +- ...mples[sample0][testssamplestest1.zeek].raw | 208 ++++++++++++++++++ tests/test_samples.py | 65 ++++++ 4 files changed, 281 insertions(+), 5 deletions(-) create mode 100644 tests/__snapshots__/test_samples/test_samples[sample0][testssamplestest1.zeek].raw create mode 100644 tests/test_samples.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3dc22f3..a8c3391 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,9 +14,11 @@ repos: hooks: - id: pylint additional_dependencies: + - "pytest==8.2.2" + - "syrupy==4.6.1" - "setuptools" - - "tree-sitter>=0.21.3" - - "tree-sitter-zeek" + - "tree-sitter==0.22.3" + - "tree-sitter-zeek==0.1.1" - repo: https://github.com/psf/black rev: 23.10.1 diff --git a/pyproject.toml b/pyproject.toml index 06d4c99..460fa67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,13 +27,14 @@ keywords = [ requires-python = ">=3.9" dependencies = [ - "tree-sitter == 0.22.3", - "tree-sitter-zeek == 0.1.1", + "tree-sitter==0.22.3", + "tree-sitter-zeek==0.1.1", ] [project.optional-dependencies] dev = [ - "pytest>=8.1.1", + "pytest==8.2.2", + "syrupy==4.6.1", ] [[project.maintainers]] diff --git a/tests/__snapshots__/test_samples/test_samples[sample0][testssamplestest1.zeek].raw b/tests/__snapshots__/test_samples/test_samples[sample0][testssamplestest1.zeek].raw new file mode 100644 index 0000000..4cb187a --- /dev/null +++ b/tests/__snapshots__/test_samples/test_samples[sample0][testssamplestest1.zeek].raw @@ -0,0 +1,208 @@ +##! A test script with all kinds of formatting errors. +##! +##! This Zeekygen head comment has multiple lines with more detail +##! about this module. It spans two lines. + +@load foo/bar/baz.zeek # A "preprocessor" line with comment +@load blum/frub + +@if ( getenv("ZEEK_PORT") != "" ) +redef Broker::default_port = to_port(getenv("ZEEK_PORT")); +@endif + +module Test; + +export { + # A regular comment + type An::ID: enum { + ## A Zeekygen comment + ENUM_VAL1, ##< A Zeekygen post-comment + ##< that continues on the next line + ## Anoter Zeekygen comment + PRINTLOG + }; + + # An enum, with assignments, on multiple lines, which should remain. + # Also, we separate via an empty minor comment, which used to + # trigger zeek/tree-sitter-zeek#9: + # + type AssigedEnum: enum { FOO = 1, BAR = 10, }; + + # Another one that we put on one line. That should also remain + type SingeLineEnum: enum { FOO, BAR }; + + ## A constant. + const a_constant = T &redef; + + ## An option. + option an_option: table[string, count] of string = table() &redef; + + ## A function. + global a_function: function(foo: BAR): bool; + + ## A lambda. + const a_lambda: function(foo: string) = function(foo: string) + { } &redef; +} + +# Another type of sequence where zeek-format considers existing linebreaks. +# This one should stay as-is... +const deltas1: vector of double = { 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }; +# ... while this one gets fully line-broken: +const deltas2: vector of double = { + 0.01, + 0.02, # WHOA! + 0.01, + 0.03, # DOUBLE whoa! + 0.01, + 0.01 +}; + +function a_function(a: int, b: count, another_argument_for_linewrapping: string) + : a_long_boolean + { + if ( foo in bar ) + return somthing[foo$bar](bar); + else + # A comment + return T; + + # Mixed {}-blocks in these if-else statements: + if ( foo ) + { + bar(); + } + else + baz(); + + if ( ! foo ) + bar(); + else + { + baz(); + } + + if ( a_long_var_a in a_long_var_b + && ( c in d || e in f ) + && a_long_var_g in a_long_var_h ) + { + return somthing[foo$bar](bar); + } + else + { + # A comment + return T; + } + + # This shouldn't break the closing ")" onto a new line + if ( another_very_long_access_to_some_member[foo] !in Some::other$nestedthing ) + { + # Directives should neither get indented nor wrapped: +@if ( VERY_LOOOOOOOOOOOOONG_FOO && VERY_LOOOOOOOOOOOOONG_BAR && VERY_LOOOOOOOOOOOOONG_BAZ ) + return T; +@endif + } + + # This shouldn't break the 0 index number onto a new line + another_very_long_access_to_some_member += + yetanotherveryveryveryverylongthing[0]; + + # Normal arithmetic should not always break around "+", in contrast to strings (see below): + local length = bytestring_to_count(data[mres$off - 1 + 8 : mres$off - 1 + 10 + 23 / 15 + - 3]); + + if ( |foo| > 0 ) + print "foo"; + else if ( bar && baz ) + { + print "bar"; + } + else if ( baz ) + # This comment should not move. Also, the following should + # _not_ wrap because the long string alone is too long for + # the line limit. + print fmt("%s", "Lovely patio around the fountain. Spent a lovely lunch on the patio."); + else + print "Lovely patio around the fountain. " + + "Spent a lovely lunch on the patio. " + + "The menu was inviting and lots of things I wanted to order. " + + "Ordered the Eutropia pizza thin crust-YUM! " + + "Will go back the next time I'm in Berkeley."; + + when [x] ( ( local x = foo() ) && x == 42 ) + { + print x; + } + timeout 5sec + { + print "timeout"; + } + } + +function b_function(a: int, b: count, + another_argument_for_longer_linewrapping: string): string + { + # This should stay on one line: + local r1 = SomeRecord($foo=some_foo(), $bar=some$long_bar()); + # This should not: + local t2 = SomeRecord($foo=some_foo(), $bar=some$long_bar()); + + call( # with an interrupting comment + arg1, arg2); + + switch ( foo ) + { + case A: + bar(); + fallthrough; + case B: + bar(); + break; + default: + bar(); + baz(); + } + + while ( T ) + do_one_thing(); + + while ( ! F ) + { + do_another_thing(); + and_another(); + } + + for ( i in some_set ) + do_one_thing(); + for ( i in some_set ) + { + do_one_thing(); + and_another(); + } + + { + a_block_for_the_sake_of_it(); + } + } + +function blanklines() + { + foo(); + bar(); + + # With one comment + baz(); # and another comment + + # String-like directives: + print @DIR, @FILENAME; + } + +# This verifies handling of comment-only content in a block +function comments() + { + # TODO: fix this + # and this too + } + +function no_comments() + { } diff --git a/tests/test_samples.py b/tests/test_samples.py new file mode 100644 index 0000000..6ecd1b2 --- /dev/null +++ b/tests/test_samples.py @@ -0,0 +1,65 @@ +"""Snapshot test of formatting of sample files. + +Code samples are located in tests/samples. To add a new sample create a new +file and create a snapshot of the formatting result with + + $ pytest --snapshot-update + +Commit both the sample as well as the updated snapshot. +""" + +import io +from pathlib import Path + +import pytest +from syrupy.assertion import SnapshotAssertion +from syrupy.extensions.single_file import SingleFileSnapshotExtension + +from testutils import zeekscript + +SAMPLES_DIR = Path(__file__).parent / "samples" + + +# Use a custom snapshot fixture so we emit one file per generated test case +# instead of one per module. +@pytest.fixture +def snapshot(snapshot: SnapshotAssertion): # pylint: disable=redefined-outer-name + return snapshot.use_extension(SingleFileSnapshotExtension) + + +def _format(script: zeekscript.Script): + """Formats a given `Script`""" + buf = io.BytesIO() + script.format(buf) + return buf.getvalue() + + +def _get_samples(): + """Helper to enumerate samples""" + + # We exclude directories since we store snapshots along with the samples. + # This assumes that there are no tests in subdirectories of `SAMPLES_DIR`. + try: + return [sample for sample in SAMPLES_DIR.iterdir() if sample.is_file()] + except FileNotFoundError: + return [] + + +# For each file in `SAMPLES_DIR` test formatting of the file. +@pytest.mark.parametrize("sample", _get_samples()) +# pylint: disable-next=redefined-outer-name +def test_samples(sample: Path, snapshot: SnapshotAssertion): + input_ = zeekscript.Script(sample) + + assert input_.parse(), f"failed to parse input {sample}" + assert not input_.has_error(), f"parse result for {sample} has parse errors" + + name = str(sample.relative_to(SAMPLES_DIR.parent.parent)) + + output = _format(input_) + assert output == snapshot( + name=name + ), f"formatted {sample} inconsistent with snapshot" + + output2 = _format(input_) + assert output2 == output, f"idempotency violation for {sample}" From 1bc1a82de11dd8a464f3a2ec3c5b946d2a554bb6 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Mon, 15 Jul 2024 10:20:23 +0200 Subject: [PATCH 2/6] Seed snapshot tests with `test1.zeek` --- .pre-commit-config.yaml | 2 +- tests/samples/test1.zeek | 176 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 tests/samples/test1.zeek diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8c3391..4a9a00b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: tests/data +exclude: tests/(data|__snapshots__|samples) repos: - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/tests/samples/test1.zeek b/tests/samples/test1.zeek new file mode 100644 index 0000000..f32711a --- /dev/null +++ b/tests/samples/test1.zeek @@ -0,0 +1,176 @@ +##! A test script with all kinds of formatting errors. +##! +##! This Zeekygen head comment has multiple lines with more detail +##! about this module. It spans two lines. + +@load foo/bar/baz.zeek # A "preprocessor" line with comment +@load blum/frub + +@if(getenv("ZEEK_PORT") != "") +redef Broker::default_port = to_port(getenv( "ZEEK_PORT")); +@endif + +module Test; + + export { + # A regular comment + type An::ID: enum { + ## A Zeekygen comment + ENUM_VAL1, ##< A Zeekygen post-comment + ##< that continues on the next line + ## Anoter Zeekygen comment + PRINTLOG + }; + + # An enum, with assignments, on multiple lines, which should remain. + # Also, we separate via an empty minor comment, which used to + # trigger zeek/tree-sitter-zeek#9: + # + type AssigedEnum: enum { + FOO=1, + BAR=10, + }; + + # Another one that we put on one line. That should also remain + type SingeLineEnum: enum { FOO, BAR }; + + ## A constant. + const a_constant=T &redef ; + + ## An option. + option an_option: table[ string,count ] of string=table() &redef; + + ## A function. + global a_function : function(foo: BAR) :bool; + + ## A lambda. + const a_lambda: function( foo: string ) = function (foo: string) { + } &redef; + +} + +# Another type of sequence where zeek-format considers existing linebreaks. +# This one should stay as-is... +const deltas1: vector of double = { 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }; +# ... while this one gets fully line-broken: +const deltas2: vector of double = { 0.01, 0.02, # WHOA! + 0.01, 0.03, # DOUBLE whoa! + 0.01, 0.01 }; + +function a_function ( a: int, b: count, another_argument_for_linewrapping: string ) : a_long_boolean + { + if ( foo in bar ) + return somthing [ foo$bar ] (bar) ; + else + # A comment + return T; + + # Mixed {}-blocks in these if-else statements: + if ( foo ) { + bar(); + } else baz(); + + if ( !foo ) bar(); + else { baz(); } + + if ( a_long_var_a in a_long_var_b && ( c in d || e in f ) && + a_long_var_g in a_long_var_h ) + { + return somthing [ foo$bar ] (bar) ; + } + else + { + # A comment + return T; + } + + # This shouldn't break the closing ")" onto a new line + if ( another_very_long_access_to_some_member[foo] !in Some::other$nestedthing ) + { + # Directives should neither get indented nor wrapped: + @if ( VERY_LOOOOOOOOOOOOONG_FOO && VERY_LOOOOOOOOOOOOONG_BAR && VERY_LOOOOOOOOOOOOONG_BAZ ) + return T; + @endif + } + + # This shouldn't break the 0 index number onto a new line + another_very_long_access_to_some_member += yetanotherveryveryveryverylongthing[0]; + + # Normal arithmetic should not always break around "+", in contrast to strings (see below): + local length = bytestring_to_count(data[ mres$off - 1 + 8 : mres$off - 1 + 10 + 23 / 15 - 3]); + + if ( | foo | > 0 ) + print "foo"; + else if (bar && baz) { + print "bar"; + } else if ( baz) + # This comment should not move. Also, the following should + # _not_ wrap because the long string alone is too long for + # the line limit. + print fmt("%s", "Lovely patio around the fountain. Spent a lovely lunch on the patio."); + else + print "Lovely patio around the fountain. " + "Spent a lovely lunch on the patio. " + "The menu was inviting and lots of things I wanted to order. " + "Ordered the Eutropia pizza thin crust-YUM! " + "Will go back the next time I'm in Berkeley."; + + when [x]((local x=foo()) && x == 42) + { print x; } timeout 5sec + { + print "timeout"; + } + } + +function b_function ( a: int, b: count, another_argument_for_longer_linewrapping: string ) : string + { + # This should stay on one line: + local r1 = SomeRecord($foo=some_foo(), $bar=some$long_bar()); + # This should not: + local t2 = SomeRecord($foo=some_foo(), + $bar=some$long_bar() + ); + + call( # with an interrupting comment + arg1, arg2); + + switch ( foo ) { + case A: bar(); fallthrough; + case B: bar(); break; + default: bar(); baz(); + } + + while ( T ) + do_one_thing(); + + while ( ! F ) { do_another_thing(); and_another(); } + + for ( i in some_set ) + do_one_thing(); + for ( i in some_set ) + { do_one_thing(); and_another(); } + + { a_block_for_the_sake_of_it(); } + } + +function blanklines() { + + foo(); + bar(); + + # With one comment + baz(); # and another comment + + # String-like directives: + print @DIR, @FILENAME; + +} + +# This verifies handling of comment-only content in a block +function comments() + { + # TODO: fix this + # and this too + } + +function no_comments() + { + } + + From 35aa31b534e770ceb7d2a23e0d69a897b91964a1 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Mon, 15 Jul 2024 10:25:41 +0200 Subject: [PATCH 3/6] Simplify general test data This test file is used for e.g., CLI tests and we want to keep it even with snapshot tests. This patch simplifies it so it is easier to maintain while still performing some formatting. --- tests/data/test1.zeek | 176 +------------------------------- tests/data/test1.zeek.out | 209 +------------------------------------- 2 files changed, 6 insertions(+), 379 deletions(-) diff --git a/tests/data/test1.zeek b/tests/data/test1.zeek index f32711a..da67f26 100644 --- a/tests/data/test1.zeek +++ b/tests/data/test1.zeek @@ -1,176 +1,6 @@ -##! A test script with all kinds of formatting errors. -##! -##! This Zeekygen head comment has multiple lines with more detail -##! about this module. It spans two lines. -@load foo/bar/baz.zeek # A "preprocessor" line with comment -@load blum/frub - -@if(getenv("ZEEK_PORT") != "") -redef Broker::default_port = to_port(getenv( "ZEEK_PORT")); -@endif - -module Test; - - export { - # A regular comment - type An::ID: enum { - ## A Zeekygen comment - ENUM_VAL1, ##< A Zeekygen post-comment - ##< that continues on the next line - ## Anoter Zeekygen comment - PRINTLOG - }; - - # An enum, with assignments, on multiple lines, which should remain. - # Also, we separate via an empty minor comment, which used to - # trigger zeek/tree-sitter-zeek#9: - # - type AssigedEnum: enum { - FOO=1, - BAR=10, - }; - - # Another one that we put on one line. That should also remain - type SingeLineEnum: enum { FOO, BAR }; - - ## A constant. - const a_constant=T &redef ; - - ## An option. - option an_option: table[ string,count ] of string=table() &redef; - - ## A function. - global a_function : function(foo: BAR) :bool; - - ## A lambda. - const a_lambda: function( foo: string ) = function (foo: string) { - } &redef; - -} - -# Another type of sequence where zeek-format considers existing linebreaks. -# This one should stay as-is... -const deltas1: vector of double = { 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }; -# ... while this one gets fully line-broken: -const deltas2: vector of double = { 0.01, 0.02, # WHOA! - 0.01, 0.03, # DOUBLE whoa! - 0.01, 0.01 }; - -function a_function ( a: int, b: count, another_argument_for_linewrapping: string ) : a_long_boolean - { - if ( foo in bar ) - return somthing [ foo$bar ] (bar) ; - else - # A comment - return T; - - # Mixed {}-blocks in these if-else statements: - if ( foo ) { - bar(); - } else baz(); - - if ( !foo ) bar(); - else { baz(); } - - if ( a_long_var_a in a_long_var_b && ( c in d || e in f ) && - a_long_var_g in a_long_var_h ) - { - return somthing [ foo$bar ] (bar) ; - } - else - { - # A comment - return T; - } - - # This shouldn't break the closing ")" onto a new line - if ( another_very_long_access_to_some_member[foo] !in Some::other$nestedthing ) - { - # Directives should neither get indented nor wrapped: - @if ( VERY_LOOOOOOOOOOOOONG_FOO && VERY_LOOOOOOOOOOOOONG_BAR && VERY_LOOOOOOOOOOOOONG_BAZ ) - return T; - @endif - } - - # This shouldn't break the 0 index number onto a new line - another_very_long_access_to_some_member += yetanotherveryveryveryverylongthing[0]; - - # Normal arithmetic should not always break around "+", in contrast to strings (see below): - local length = bytestring_to_count(data[ mres$off - 1 + 8 : mres$off - 1 + 10 + 23 / 15 - 3]); - - if ( | foo | > 0 ) - print "foo"; - else if (bar && baz) { - print "bar"; - } else if ( baz) - # This comment should not move. Also, the following should - # _not_ wrap because the long string alone is too long for - # the line limit. - print fmt("%s", "Lovely patio around the fountain. Spent a lovely lunch on the patio."); - else - print "Lovely patio around the fountain. " + "Spent a lovely lunch on the patio. " + "The menu was inviting and lots of things I wanted to order. " + "Ordered the Eutropia pizza thin crust-YUM! " + "Will go back the next time I'm in Berkeley."; - - when [x]((local x=foo()) && x == 42) - { print x; } timeout 5sec - { - print "timeout"; - } - } - -function b_function ( a: int, b: count, another_argument_for_longer_linewrapping: string ) : string - { - # This should stay on one line: - local r1 = SomeRecord($foo=some_foo(), $bar=some$long_bar()); - # This should not: - local t2 = SomeRecord($foo=some_foo(), - $bar=some$long_bar() - ); - - call( # with an interrupting comment - arg1, arg2); - - switch ( foo ) { - case A: bar(); fallthrough; - case B: bar(); break; - default: bar(); baz(); - } - - while ( T ) - do_one_thing(); - - while ( ! F ) { do_another_thing(); and_another(); } - - for ( i in some_set ) - do_one_thing(); - for ( i in some_set ) - { do_one_thing(); and_another(); } - - { a_block_for_the_sake_of_it(); } - } - -function blanklines() { - - foo(); - bar(); - - # With one comment - baz(); # and another comment - - # String-like directives: - print @DIR, @FILENAME; - -} - -# This verifies handling of comment-only content in a block -function comments() - { - # TODO: fix this - # and this too - } - -function no_comments() - { - } +# A sample file which needs some formatting. +# NOTE: More extensive tests should go into unit tests or separate files in `tests/samples`. +global foo=1 +2 ; diff --git a/tests/data/test1.zeek.out b/tests/data/test1.zeek.out index 4cb187a..0636435 100644 --- a/tests/data/test1.zeek.out +++ b/tests/data/test1.zeek.out @@ -1,208 +1,5 @@ -##! A test script with all kinds of formatting errors. -##! -##! This Zeekygen head comment has multiple lines with more detail -##! about this module. It spans two lines. +# A sample file which needs some formatting. -@load foo/bar/baz.zeek # A "preprocessor" line with comment -@load blum/frub +# NOTE: More extensive tests should go into unit tests or separate files in `tests/samples`. -@if ( getenv("ZEEK_PORT") != "" ) -redef Broker::default_port = to_port(getenv("ZEEK_PORT")); -@endif - -module Test; - -export { - # A regular comment - type An::ID: enum { - ## A Zeekygen comment - ENUM_VAL1, ##< A Zeekygen post-comment - ##< that continues on the next line - ## Anoter Zeekygen comment - PRINTLOG - }; - - # An enum, with assignments, on multiple lines, which should remain. - # Also, we separate via an empty minor comment, which used to - # trigger zeek/tree-sitter-zeek#9: - # - type AssigedEnum: enum { FOO = 1, BAR = 10, }; - - # Another one that we put on one line. That should also remain - type SingeLineEnum: enum { FOO, BAR }; - - ## A constant. - const a_constant = T &redef; - - ## An option. - option an_option: table[string, count] of string = table() &redef; - - ## A function. - global a_function: function(foo: BAR): bool; - - ## A lambda. - const a_lambda: function(foo: string) = function(foo: string) - { } &redef; -} - -# Another type of sequence where zeek-format considers existing linebreaks. -# This one should stay as-is... -const deltas1: vector of double = { 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }; -# ... while this one gets fully line-broken: -const deltas2: vector of double = { - 0.01, - 0.02, # WHOA! - 0.01, - 0.03, # DOUBLE whoa! - 0.01, - 0.01 -}; - -function a_function(a: int, b: count, another_argument_for_linewrapping: string) - : a_long_boolean - { - if ( foo in bar ) - return somthing[foo$bar](bar); - else - # A comment - return T; - - # Mixed {}-blocks in these if-else statements: - if ( foo ) - { - bar(); - } - else - baz(); - - if ( ! foo ) - bar(); - else - { - baz(); - } - - if ( a_long_var_a in a_long_var_b - && ( c in d || e in f ) - && a_long_var_g in a_long_var_h ) - { - return somthing[foo$bar](bar); - } - else - { - # A comment - return T; - } - - # This shouldn't break the closing ")" onto a new line - if ( another_very_long_access_to_some_member[foo] !in Some::other$nestedthing ) - { - # Directives should neither get indented nor wrapped: -@if ( VERY_LOOOOOOOOOOOOONG_FOO && VERY_LOOOOOOOOOOOOONG_BAR && VERY_LOOOOOOOOOOOOONG_BAZ ) - return T; -@endif - } - - # This shouldn't break the 0 index number onto a new line - another_very_long_access_to_some_member += - yetanotherveryveryveryverylongthing[0]; - - # Normal arithmetic should not always break around "+", in contrast to strings (see below): - local length = bytestring_to_count(data[mres$off - 1 + 8 : mres$off - 1 + 10 + 23 / 15 - - 3]); - - if ( |foo| > 0 ) - print "foo"; - else if ( bar && baz ) - { - print "bar"; - } - else if ( baz ) - # This comment should not move. Also, the following should - # _not_ wrap because the long string alone is too long for - # the line limit. - print fmt("%s", "Lovely patio around the fountain. Spent a lovely lunch on the patio."); - else - print "Lovely patio around the fountain. " - + "Spent a lovely lunch on the patio. " - + "The menu was inviting and lots of things I wanted to order. " - + "Ordered the Eutropia pizza thin crust-YUM! " - + "Will go back the next time I'm in Berkeley."; - - when [x] ( ( local x = foo() ) && x == 42 ) - { - print x; - } - timeout 5sec - { - print "timeout"; - } - } - -function b_function(a: int, b: count, - another_argument_for_longer_linewrapping: string): string - { - # This should stay on one line: - local r1 = SomeRecord($foo=some_foo(), $bar=some$long_bar()); - # This should not: - local t2 = SomeRecord($foo=some_foo(), $bar=some$long_bar()); - - call( # with an interrupting comment - arg1, arg2); - - switch ( foo ) - { - case A: - bar(); - fallthrough; - case B: - bar(); - break; - default: - bar(); - baz(); - } - - while ( T ) - do_one_thing(); - - while ( ! F ) - { - do_another_thing(); - and_another(); - } - - for ( i in some_set ) - do_one_thing(); - for ( i in some_set ) - { - do_one_thing(); - and_another(); - } - - { - a_block_for_the_sake_of_it(); - } - } - -function blanklines() - { - foo(); - bar(); - - # With one comment - baz(); # and another comment - - # String-like directives: - print @DIR, @FILENAME; - } - -# This verifies handling of comment-only content in a block -function comments() - { - # TODO: fix this - # and this too - } - -function no_comments() - { } +global foo = 1 + 2; From 4805c4282d7bdd26952e26b0ff28bbc79629dfb0 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Mon, 15 Jul 2024 10:36:17 +0200 Subject: [PATCH 4/6] Bump pre-commit hooks. All source changes in this patch are created automatically from `black`. --- .pre-commit-config.yaml | 8 +++---- setup.py | 1 + tests/test_dir_recursion.py | 42 +++++++++++++++++++++---------------- tests/testutils.py | 1 + zeekscript/__init__.py | 1 + zeekscript/cli.py | 1 + zeekscript/formatter.py | 1 + zeekscript/output.py | 1 + zeekscript/script.py | 6 +++--- 9 files changed, 37 insertions(+), 25 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4a9a00b..5093186 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ exclude: tests/(data|__snapshots__|samples) repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -21,17 +21,17 @@ repos: - "tree-sitter-zeek==0.1.1" - repo: https://github.com/psf/black - rev: 23.10.1 + rev: 24.4.2 hooks: - id: black - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 + rev: v3.16.0 hooks: - id: pyupgrade args: ["--py37-plus"] - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.37.0 + rev: v0.41.0 hooks: - id: markdownlint-fix diff --git a/setup.py b/setup.py index a478f3e..73699a2 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ """Installation setup.""" + from setuptools import setup diff --git a/tests/test_dir_recursion.py b/tests/test_dir_recursion.py index 1bfe978..c35689e 100755 --- a/tests/test_dir_recursion.py +++ b/tests/test_dir_recursion.py @@ -33,16 +33,18 @@ def tearDown(self): # pylint: disable-next=invalid-name def assertEqualContent(self, file1, file2): - with open(file1, encoding="utf-8") as hdl1, open( - file2, encoding="utf-8" - ) as hdl2: + with ( + open(file1, encoding="utf-8") as hdl1, + open(file2, encoding="utf-8") as hdl2, + ): self.assertEqual(hdl1.read(), hdl2.read()) # pylint: disable-next=invalid-name def assertNotEqualContent(self, file1, file2): - with open(file1, encoding="utf-8") as hdl1, open( - file2, encoding="utf-8" - ) as hdl2: + with ( + open(file1, encoding="utf-8") as hdl1, + open(file2, encoding="utf-8") as hdl2, + ): self.assertNotEqual(hdl1.read(), hdl2.read()) def test_recursive_formatting(self): @@ -51,9 +53,10 @@ def test_recursive_formatting(self): args = parser.parse_args(["-i", "-r", "a"]) # Python < 3.10 does not yet support parenthesized context managers: - with unittest.mock.patch( - "sys.stdout", new=io.StringIO() - ) as out, unittest.mock.patch("sys.stderr", new=io.StringIO()): + with ( + unittest.mock.patch("sys.stdout", new=io.StringIO()) as out, + unittest.mock.patch("sys.stderr", new=io.StringIO()), + ): ret = args.run_cmd(args) self.assertEqual(ret, 0) self.assertEqual(out.getvalue(), "4 files processed, 0 errors\n") @@ -80,9 +83,10 @@ def test_recurse_inplace(self): zeekscript.add_format_cmd(parser) args = parser.parse_args(["-ir"]) - with unittest.mock.patch("sys.stdout", new=io.StringIO()), unittest.mock.patch( - "sys.stderr", new=io.StringIO() - ) as err: + with ( + unittest.mock.patch("sys.stdout", new=io.StringIO()), + unittest.mock.patch("sys.stderr", new=io.StringIO()) as err, + ): ret = args.run_cmd(args) self.assertEqual(ret, 0) self.assertEqual( @@ -95,9 +99,10 @@ def test_dir_without_recurse(self): zeekscript.add_format_cmd(parser) args = parser.parse_args(["-i", "a"]) - with unittest.mock.patch("sys.stdout", new=io.StringIO()), unittest.mock.patch( - "sys.stderr", new=io.StringIO() - ) as err: + with ( + unittest.mock.patch("sys.stdout", new=io.StringIO()), + unittest.mock.patch("sys.stderr", new=io.StringIO()) as err, + ): ret = args.run_cmd(args) self.assertEqual(ret, 0) self.assertEqual( @@ -110,9 +115,10 @@ def test_recurse_without_inplace(self): zeekscript.add_format_cmd(parser) args = parser.parse_args(["-r", "a"]) - with unittest.mock.patch("sys.stdout", new=io.StringIO()), unittest.mock.patch( - "sys.stderr", new=io.StringIO() - ) as err: + with ( + unittest.mock.patch("sys.stdout", new=io.StringIO()), + unittest.mock.patch("sys.stderr", new=io.StringIO()) as err, + ): ret = args.run_cmd(args) self.assertEqual(ret, 1) self.assertEqual( diff --git a/tests/testutils.py b/tests/testutils.py index 4d1e202..f541fdc 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -1,4 +1,5 @@ """Helpers for the various test_*.py files.""" + import os import sys diff --git a/zeekscript/__init__.py b/zeekscript/__init__.py index d2cd2c5..f873c7d 100644 --- a/zeekscript/__init__.py +++ b/zeekscript/__init__.py @@ -1,4 +1,5 @@ """Wrapper around more low-level tests.""" + __version__ = "1.2.9-15" __all__ = ["cli", "error", "formatter", "node", "output", "script"] diff --git a/zeekscript/cli.py b/zeekscript/cli.py index 0dc5601..7bd8f4d 100644 --- a/zeekscript/cli.py +++ b/zeekscript/cli.py @@ -1,4 +1,5 @@ """This module provides reusable command line parsers and tooling.""" + import argparse import io import os diff --git a/zeekscript/formatter.py b/zeekscript/formatter.py index 9363c9d..48416f1 100644 --- a/zeekscript/formatter.py +++ b/zeekscript/formatter.py @@ -14,6 +14,7 @@ examine the difference by playing with `zeek-script parse ...` vs `zeek-script parse --concrete`. """ + import enum import inspect import os diff --git a/zeekscript/output.py b/zeekscript/output.py index 9231883..2a46d22 100644 --- a/zeekscript/output.py +++ b/zeekscript/output.py @@ -1,4 +1,5 @@ """Functionality related to output.""" + import os import sys diff --git a/zeekscript/script.py b/zeekscript/script.py index fb5bad4..5365207 100644 --- a/zeekscript/script.py +++ b/zeekscript/script.py @@ -470,9 +470,9 @@ def _patch_tree(self): node.next_cst_sibling = None if node.children[-1].next_cst_siblings: - node.children[-1].next_cst_siblings[ - -1 - ].next_cst_sibling = node.next_cst_siblings[0] + node.children[-1].next_cst_siblings[-1].next_cst_sibling = ( + node.next_cst_siblings[0] + ) node.next_cst_siblings[0].prev_cst_sibling = node.children[ -1 ].next_cst_siblings[-1] From 48740d8c2e43c5f6a170391d2b04e511497f5985 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Mon, 15 Jul 2024 10:38:14 +0200 Subject: [PATCH 5/6] Bump pylint --- .pre-commit-config.yaml | 2 +- zeekscript/node.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5093186..b5a4faf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: check-added-large-files - repo: https://github.com/PyCQA/pylint - rev: v3.0.1 + rev: v3.2.5 hooks: - id: pylint additional_dependencies: diff --git a/zeekscript/node.py b/zeekscript/node.py index 17b6646..ea0bbf2 100644 --- a/zeekscript/node.py +++ b/zeekscript/node.py @@ -143,8 +143,7 @@ def script_range(self, with_cst=False): node = self while node: if node.prev_cst_siblings: - if node.prev_cst_siblings[0].start_byte < start: - start = node.prev_cst_siblings[0].start_byte + start = max(start, node.prev_cst_siblings[0].start_byte) # No need to dig into child nodes, they won't have # any earlier content. break @@ -153,8 +152,7 @@ def script_range(self, with_cst=False): node = self while node: if node.next_cst_siblings: - if node.next_cst_siblings[-1].end_byte > end: - end = node.next_cst_siblings[-1].end_byte + end = max(end, node.next_cst_siblings[-1].end_byte) # No need to dig into child nodes, they won't have # any later content. break From fbb835c5bb62fca4d5ded5f13a744378dea020d7 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Tue, 16 Jul 2024 13:27:57 +0200 Subject: [PATCH 6/6] Completely remove old `test/data` These test files were used in a couple of tests but didn't need to be standalone. This patch moves them into `testutils`. --- tests/data/test1.zeek | 6 ---- tests/data/test1.zeek.out | 5 ---- tests/test_dir_recursion.py | 47 ++++++++++++++++--------------- tests/test_formatting.py | 56 ++++++++++--------------------------- tests/testutils.py | 13 ++++++++- 5 files changed, 52 insertions(+), 75 deletions(-) delete mode 100644 tests/data/test1.zeek delete mode 100644 tests/data/test1.zeek.out diff --git a/tests/data/test1.zeek b/tests/data/test1.zeek deleted file mode 100644 index da67f26..0000000 --- a/tests/data/test1.zeek +++ /dev/null @@ -1,6 +0,0 @@ - -# A sample file which needs some formatting. - -# NOTE: More extensive tests should go into unit tests or separate files in `tests/samples`. - -global foo=1 +2 ; diff --git a/tests/data/test1.zeek.out b/tests/data/test1.zeek.out deleted file mode 100644 index 0636435..0000000 --- a/tests/data/test1.zeek.out +++ /dev/null @@ -1,5 +0,0 @@ -# A sample file which needs some formatting. - -# NOTE: More extensive tests should go into unit tests or separate files in `tests/samples`. - -global foo = 1 + 2; diff --git a/tests/test_dir_recursion.py b/tests/test_dir_recursion.py index c35689e..7455b5e 100755 --- a/tests/test_dir_recursion.py +++ b/tests/test_dir_recursion.py @@ -22,30 +22,28 @@ def setUp(self): shutil.rmtree("a", ignore_errors=True) os.makedirs(join("a", "b", "c")) - shutil.copy(join(tu.DATA, "test1.zeek"), join("a", "test1.zeek")) - shutil.copy(join(tu.DATA, "test1.zeek"), join("a", "test2.zeek")) - shutil.copy(join(tu.DATA, "test1.zeek"), join("a", "b", "test3.txt")) - shutil.copy(join(tu.DATA, "test1.zeek"), join("a", "b", "test4.zeek")) - shutil.copy(join(tu.DATA, "test1.zeek"), join("a", "b", "c", "test5.zeek")) + for f in ( + join("a", "test1.zeek"), + join("a", "test2.zeek"), + join("a", "b", "test3.txt"), + join("a", "b", "test4.zeek"), + join("a", "b", "c", "test5.zeek"), + ): + with open(f, "w", encoding="utf-8") as h: + h.write(tu.SAMPLE_UNFORMATTED) def tearDown(self): shutil.rmtree("a", ignore_errors=True) # pylint: disable-next=invalid-name - def assertEqualContent(self, file1, file2): - with ( - open(file1, encoding="utf-8") as hdl1, - open(file2, encoding="utf-8") as hdl2, - ): - self.assertEqual(hdl1.read(), hdl2.read()) + def assertEqualContent(self, file1, content_expected): + with open(file1, encoding="utf-8") as hdl1: + self.assertEqual(hdl1.read(), content_expected) # pylint: disable-next=invalid-name - def assertNotEqualContent(self, file1, file2): - with ( - open(file1, encoding="utf-8") as hdl1, - open(file2, encoding="utf-8") as hdl2, - ): - self.assertNotEqual(hdl1.read(), hdl2.read()) + def assertNotEqualContent(self, file1, content_expected): + with open(file1, encoding="utf-8") as hdl1: + self.assertNotEqual(hdl1.read(), content_expected) def test_recursive_formatting(self): parser = argparse.ArgumentParser() @@ -62,20 +60,25 @@ def test_recursive_formatting(self): self.assertEqual(out.getvalue(), "4 files processed, 0 errors\n") self.assertEqualContent( - join(tu.DATA, "test1.zeek.out"), join("a", "test1.zeek") + join("a", "test1.zeek"), + tu.SAMPLE_FORMATTED, ) self.assertEqualContent( - join(tu.DATA, "test1.zeek.out"), join("a", "test2.zeek") + join("a", "test2.zeek"), + tu.SAMPLE_FORMATTED, ) self.assertEqualContent( - join(tu.DATA, "test1.zeek.out"), join("a", "b", "test4.zeek") + join("a", "b", "test4.zeek"), + tu.SAMPLE_FORMATTED, ) self.assertEqualContent( - join(tu.DATA, "test1.zeek.out"), join("a", "b", "c", "test5.zeek") + join("a", "b", "c", "test5.zeek"), + tu.SAMPLE_FORMATTED, ) self.assertNotEqualContent( - join(tu.DATA, "test1.zeek.out"), join("a", "b", "test3.txt") + join("a", "b", "test3.txt"), + tu.SAMPLE_FORMATTED, ) def test_recurse_inplace(self): diff --git a/tests/test_formatting.py b/tests/test_formatting.py index f7cc290..c43bf9c 100755 --- a/tests/test_formatting.py +++ b/tests/test_formatting.py @@ -15,15 +15,6 @@ class TestFormatting(unittest.TestCase): - def _get_input_and_baseline(self, filename): - with open(os.path.join(tu.DATA, filename), "rb") as hdl: - input_ = hdl.read() - - with open(os.path.join(tu.DATA, filename + ".out"), "rb") as hdl: - output = hdl.read() - - return input_, output - def _format(self, content): script = zeekscript.Script(io.BytesIO(content)) @@ -35,17 +26,6 @@ def _format(self, content): return buf.getvalue() - def test_file_formatting(self): - input_, baseline = self._get_input_and_baseline("test1.zeek") - - # Format the input data and compare to baseline: - result1 = self._format(input_) - self.assertEqual(baseline, result1) - - # Format the result again. There should be no change. - result2 = self._format(result1) - self.assertEqual(baseline, result2) - def test_interval(self): self.assertEqual(self._format(b"1 sec;").rstrip(), b"1sec;") self.assertEqual(self._format(b"1min;").rstrip(), b"1min;") @@ -94,7 +74,8 @@ def test_format_comment_separator(self): # We split out lines here to work around different line endings on Windows. self.assertEqual( - self._format(code.encode()).decode().splitlines(), expected.splitlines() + self._format(code.encode("UTF-8")).decode().splitlines(), + expected.splitlines(), ) @@ -213,34 +194,27 @@ class TestNewlineFormatting(unittest.TestCase): # This test verifies correct processing when line endings in the input # differ from that normally used by the platform. - def _get_formatted_and_baseline(self, filename): + def test_file_formatting(self): + given = tu.SAMPLE_UNFORMATTED.encode("utf-8") # Swap line endings for something not native to the platform: - with open(os.path.join(tu.DATA, filename), "rb") as hdl: - data = hdl.read() - if zeekscript.Formatter.NL == b"\n": - # Turn everything to \r\n, even if mixed - data = data.replace(b"\r\n", b"\n") - data = data.replace(b"\n", b"\r\n") - else: - data = data.replace(b"\r\n", b"\n") - - buf = io.BytesIO(data) - + if zeekscript.Formatter.NL == b"\n": + # Turn everything to \r\n, even if mixed + given = given.replace(b"\r\n", b"\n") + given = given.replace(b"\n", b"\r\n") + else: + given = given.replace(b"\r\n", b"\n") + + buf = io.BytesIO(given) script = zeekscript.Script(buf) script.parse() buf = io.BytesIO() script.format(buf) - with open(os.path.join(tu.DATA, filename + ".out"), "rb") as hdl: - result_wanted = hdl.read() - - result_is = buf.getvalue() - return result_wanted, result_is + given = buf.getvalue() - def test_file_formatting(self): - result_wanted, result_is = self._get_formatted_and_baseline("test1.zeek") - self.assertEqual(result_wanted, result_is) + expected = tu.SAMPLE_FORMATTED.encode("utf-8") + self.assertEqual(expected, given) class TestScriptConstruction(unittest.TestCase): diff --git a/tests/testutils.py b/tests/testutils.py index f541fdc..be6b3f2 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -5,7 +5,6 @@ TESTS = os.path.dirname(os.path.realpath(__file__)) ROOT = os.path.normpath(os.path.join(TESTS, "..")) -DATA = os.path.normpath(os.path.join(TESTS, "data")) # Prepend the tree's root folder to the module searchpath so we find zeekscript # via it. This allows tests to run without package installation. (We do need a @@ -37,3 +36,15 @@ def normalize(content): out = content return fix_lineseps(out) + + +# A small unformatted source sample for general testing. +SAMPLE_UNFORMATTED = """\ +global foo=1 +2 ; +""" + + +# A small formatted source sample for general testing. +SAMPLE_FORMATTED = """\ +global foo = 1 + 2; +"""