Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lint rule formatting and improved rule dump #401

Merged
merged 7 commits into from
Jan 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions capa/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

import re
import uuid
import codecs
import logging
Expand Down Expand Up @@ -600,6 +601,9 @@ def _get_ruamel_yaml_parser():
# use block mode, not inline json-like mode
y.default_flow_style = False

# leave quotes unchanged
y.preserve_quotes = True

# indent lists by two spaces below their parent
#
# features:
Expand All @@ -614,16 +618,20 @@ def _get_ruamel_yaml_parser():
return y

@classmethod
def from_yaml(cls, s):
# use pyyaml because it can be much faster than ruamel (pure python)
doc = yaml.load(s, Loader=cls._get_yaml_loader())
def from_yaml(cls, s, use_ruamel=False):
if use_ruamel:
# ruamel enables nice formatting and doc roundtripping with comments
doc = cls._get_ruamel_yaml_parser().load(s)
else:
# use pyyaml because it can be much faster than ruamel (pure python)
doc = yaml.load(s, Loader=cls._get_yaml_loader())
return cls.from_dict(doc, s)

@classmethod
def from_yaml_file(cls, path):
def from_yaml_file(cls, path, use_ruamel=False):
with open(path, "rb") as f:
try:
return cls.from_yaml(f.read().decode("utf-8"))
return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel)
except InvalidRule as e:
raise InvalidRuleWithPath(path, str(e))

Expand Down Expand Up @@ -716,7 +724,18 @@ def move_to_end(m, k):
# tweaking `ruamel.indent()` doesn't quite give us the control we want.
# so, add the two extra spaces that we've determined we need through experimentation.
# see #263
doc = doc.replace(" description:", " description:")
# only do this for the features section, so the meta description doesn't get reformatted
# assumes features section always exists
features_offset = doc.find("features")
doc = doc[:features_offset] + doc[features_offset:].replace(" description:", " description:")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch


# for negative hex numbers, yaml dump outputs:
# - offset: !!int '0x-30'
# we prefer:
# - offset: -0x30
# the below regex makes these adjustments and while ugly, we don't have to explore the ruamel.yaml insides
doc = re.sub(r"!!int '0x-([0-9a-fA-F]+)'", r"-0x\1", doc)

return doc


Expand Down
22 changes: 19 additions & 3 deletions scripts/capafmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ def main(argv=None):
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
parser.add_argument(
"-c",
"--check",
action="store_true",
help="Don't output (reformatted) rule, only return status. 0 = no changes, 1 = would reformat",
)
args = parser.parse_args(args=argv)

if args.verbose:
Expand All @@ -50,12 +56,22 @@ def main(argv=None):
logging.basicConfig(level=level)
logging.getLogger("capafmt").setLevel(level)

rule = capa.rules.Rule.from_yaml_file(args.path)
rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True)
reformatted_rule = rule.to_yaml()

if args.check:
if rule.definition == reformatted_rule:
logger.info("rule is formatted correctly, nice! (%s)", rule.name)
return 0
else:
logger.info("rule requires reformatting (%s)", rule.name)
return 1

if args.in_place:
with open(args.path, "wb") as f:
f.write(rule.to_yaml().encode("utf-8"))
f.write(reformatted_rule.encode("utf-8"))
else:
print(rule.to_yaml().rstrip("\n"))
print(reformatted_rule)

return 0

Expand Down
52 changes: 46 additions & 6 deletions scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import sys
import time
import string
import difflib
import hashlib
import logging
import os.path
Expand All @@ -25,6 +26,7 @@
import posixpath

import capa.main
import capa.rules
import capa.engine
import capa.features
import capa.features.insn
Expand Down Expand Up @@ -277,6 +279,32 @@ def check_features(self, ctx, features):
return False


class FormatSingleEmptyLineEOF(Lint):
name = "EOF format"
recommendation = "end file with a single empty line"

def check_rule(self, ctx, rule):
if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"):
return False
return True


class FormatIncorrect(Lint):
name = "rule format incorrect"
recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}"

def check_rule(self, ctx, rule):
actual = rule.definition
expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()

if actual != expected:
diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1))
self.recommendation = self.recommendation_template.format("".join(diff))
return True

return False


def run_lints(lints, ctx, rule):
for lint in lints:
if lint.check_rule(ctx, rule):
Expand Down Expand Up @@ -332,15 +360,25 @@ def lint_meta(ctx, rule):
)


def get_normpath(path):
return posixpath.normpath(path).replace(os.sep, "/")


def lint_features(ctx, rule):
features = get_features(ctx, rule)
return run_feature_lints(FEATURE_LINTS, ctx, features)


FORMAT_LINTS = (
FormatSingleEmptyLineEOF(),
FormatIncorrect(),
)


def lint_format(ctx, rule):
return run_lints(FORMAT_LINTS, ctx, rule)


def get_normpath(path):
return posixpath.normpath(path).replace(os.sep, "/")


def get_features(ctx, rule):
# get features from rule and all dependencies including subscopes and matched rules
features = []
Expand Down Expand Up @@ -391,6 +429,7 @@ def lint_rule(ctx, rule):
lint_meta(ctx, rule),
lint_logic(ctx, rule),
lint_features(ctx, rule),
lint_format(ctx, rule),
)
)

Expand Down Expand Up @@ -518,6 +557,7 @@ def main(argv=None):

capa.main.set_vivisect_log_level(logging.CRITICAL)
logging.getLogger("capa").setLevel(logging.CRITICAL)
logging.getLogger("viv_utils").setLevel(logging.CRITICAL)

time0 = time.time()

Expand Down Expand Up @@ -549,8 +589,8 @@ def main(argv=None):

did_violate = lint(ctx, rules)

diff = time.time() - time0
logger.debug("lint ran for ~ %02d:%02d", (diff // 60), diff)
min, sec = divmod(time.time() - time0, 60)
logger.debug("lints ran for ~ %02d:%02dm", min, sec)

if not did_violate:
logger.info("no suggestions, nice!")
Expand Down