Skip to content

Commit

Permalink
[FR] Re-factor Build Integrations Manifest (#2274)
Browse files Browse the repository at this point in the history
* adjusted how integrations list is created

* removed unused import and addressed linting errors

* adjusted integration_manifest dictionary to only load latest major

* adjusted manifests sourcing from GH to EPR CDN

* addressed flake errors

* added some additional comments and formatting

* updaing integration-manifests file

* adjusted test_integration testing

* addressed flake errors

* Update detection_rules/integrations.py

Co-authored-by: Justin Ibarra <[email protected]>

* Update detection_rules/integrations.py

Co-authored-by: Justin Ibarra <[email protected]>

* added folder unit tests

* updated unit test to remove network calls

* Update tests/test_all_rules.py

Co-authored-by: Mika Ayenson <[email protected]>

Co-authored-by: Justin Ibarra <[email protected]>
Co-authored-by: Mika Ayenson <[email protected]>

(cherry picked from commit b31a1b7)
  • Loading branch information
terrancedejesus authored and github-actions[bot] committed Sep 28, 2022
1 parent 74235ce commit f910583
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 55 deletions.
10 changes: 6 additions & 4 deletions detection_rules/devtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,8 +1131,10 @@ def integrations_group():

@integrations_group.command('build-manifests')
@click.option('--overwrite', '-o', is_flag=True, help="Overwrite the existing integrations-manifest.json.gz file")
@click.option("--token", required=True, prompt=get_github_token() is None, default=get_github_token(),
help="GitHub token to use for the PR", hide_input=True)
def build_integration_manifests(overwrite: bool, token: str):
def build_integration_manifests(overwrite: bool):
"""Builds consolidated integrations manifests file."""
build_integrations_manifest(token, overwrite)
click.echo("loading rules to determine all integration tags")
rules = RuleCollection.default()
integration_tags = list(set([r.contents.metadata.integration for r in rules if r.contents.metadata.integration]))
click.echo(f"integration tags identified: {integration_tags}")
build_integrations_manifest(overwrite, integration_tags)
Binary file modified detection_rules/etc/integration-manifests.json.gz
Binary file not shown.
64 changes: 26 additions & 38 deletions detection_rules/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@
from pathlib import Path
from typing import Union

import yaml
import requests
from marshmallow import EXCLUDE, Schema, fields, post_load

from .ghwrap import GithubClient
from .semver import Version
from .utils import INTEGRATION_RULE_DIR, cached, get_etc_path, read_gzip
from .utils import cached, get_etc_path, read_gzip

MANIFEST_FILE_PATH = Path(get_etc_path('integration-manifests.json.gz'))

Expand All @@ -34,36 +33,24 @@ class IntegrationManifestSchema(Schema):
description = fields.Str(required=True)
conditions = fields.Dict(required=True)
policy_templates = fields.List(fields.Dict, required=True)
owner = fields.Dict(required=True)
owner = fields.Dict(required=False)

@post_load
def transform_policy_template(self, data, **kwargs):
data["policy_templates"] = [policy["name"] for policy in data["policy_templates"]]
return data


def build_integrations_manifest(token: str, overwrite: bool) -> None:
def build_integrations_manifest(overwrite: bool, rule_integrations: list) -> None:
"""Builds a new local copy of manifest.yaml from integrations Github."""
if overwrite:
if os.path.exists(MANIFEST_FILE_PATH):
os.remove(MANIFEST_FILE_PATH)
rule_integrations = [d.name for d in Path(INTEGRATION_RULE_DIR).glob('*') if d.is_dir()]
if "endpoint" in rule_integrations:
rule_integrations.remove("endpoint")

final_integration_manifests = {integration: {} for integration in rule_integrations}

# initialize github client and point to package-storage prod
github = GithubClient(token)
client = github.authenticated_client
organization = client.get_organization("elastic")
repository = organization.get_repo("package-storage")
pkg_storage_prod_branch = repository.get_branch("production")
pkg_storage_branch_sha = pkg_storage_prod_branch.commit.sha

for integration in rule_integrations:
integration_manifests = get_integration_manifests(repository, pkg_storage_branch_sha,
pkg_path=f"packages/{integration}")
integration_manifests = get_integration_manifests(integration)
for manifest in integration_manifests:
validated_manifest = IntegrationManifestSchema(unknown=EXCLUDE).load(manifest)
package_version = validated_manifest.pop("version")
Expand All @@ -72,13 +59,19 @@ def build_integrations_manifest(token: str, overwrite: bool) -> None:
manifest_file = gzip.open(MANIFEST_FILE_PATH, "w+")
manifest_file_bytes = json.dumps(final_integration_manifests).encode("utf-8")
manifest_file.write(manifest_file_bytes)
print(f"final integrations manifests dumped: {MANIFEST_FILE_PATH}")


def find_least_compatible_version(package: str, integration: str,
current_stack_version: str, packages_manifest: dict) -> Union[str, None]:
"""Finds least compatible version for specified integration based on stack version supplied."""
integration_manifests = {k: v for k, v in sorted(packages_manifest[package].items(), key=Version)}

# trim integration_manifests to only the latest major entries
max_major, *_ = max([Version(manifest_version) for manifest_version in integration_manifests])
latest_major_integration_manifests = \
{k: v for k, v in integration_manifests.items() if Version(k)[0] == max_major}

def compare_versions(int_ver: str, pkg_ver: str) -> bool:
"""Compares integration and package version"""
pkg_major, pkg_minor = Version(pkg_ver)
Expand All @@ -90,33 +83,28 @@ def compare_versions(int_ver: str, pkg_ver: str) -> bool:
compatible = Version(int_ver) <= Version(pkg_ver)
return compatible

for version, manifest in integration_manifests.items():
for kibana_compat_vers in re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana.version"]).split(" || "):
for version, manifest in latest_major_integration_manifests.items():
for kibana_compat_vers in re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana"]["version"]).split(" || "):
if compare_versions(kibana_compat_vers, current_stack_version):
return version
return f"^{version}"
print(f"no compatible version for integration {package}:{integration}")
return None


def get_integration_manifests(repository, sha: str, pkg_path: str) -> list:
def get_integration_manifests(integration: str) -> list:
"""Iterates over specified integrations from package-storage and combines manifests per version."""
integration = pkg_path.split("/")[-1]
versioned_packages = repository.get_dir_contents(pkg_path, ref=sha)
versions = [p.path.split("/")[-1] for p in versioned_packages]

manifests = []
for version in versions:
contents = repository.get_dir_contents(f"{pkg_path}/{version}", ref=sha)
print(f"Processing {integration} - Version: {version}")

processing_version = contents[0].path.split("/")[2]
manifest_content = [c for c in contents if "manifest" in c.path]
epr_search_url = "https://epr.elastic.co/search"

if len(manifest_content) < 1:
raise Exception(f"manifest file does not exist for {integration}:{processing_version}")
# link for search parameters - https://github.com/elastic/package-registry
epr_search_parameters = {"package": f"{integration}", "prerelease": "true",
"all": "true", "include_policy_templates": "true"}
epr_search_response = requests.get(epr_search_url, params=epr_search_parameters)
epr_search_response.raise_for_status()
manifests = epr_search_response.json()

path = manifest_content[0].path
manifest_content = yaml.safe_load(repository.get_contents(path, ref=sha).decoded_content.decode())
manifests.append(manifest_content)
if not manifests:
raise ValueError(f"EPR search for {integration} integration package returned empty list")

print(f"loaded {integration} manifests from the following package versions: "
f"{[manifest['version'] for manifest in manifests]}")
return manifests
2 changes: 1 addition & 1 deletion detection_rules/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def _add_related_integrations(self, obj: dict) -> None:
# if integration is not a policy template remove
if package["version"]:
policy_templates = packages_manifest[
package["package"]][package["version"]]["policy_templates"]
package["package"]][package["version"].strip("^")]["policy_templates"]
if package["integration"] not in policy_templates:
del package["integration"]

Expand Down
42 changes: 30 additions & 12 deletions tests/test_all_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
from pathlib import Path

import kql

from detection_rules import attack
from detection_rules.beats import parse_beats_from_index
from detection_rules.packaging import current_stack_version
from detection_rules.rule import QueryRuleData
from detection_rules.rule import (QueryRuleData, TOMLRuleContents,
load_integrations_manifests)
from detection_rules.rule_loader import FILE_PATTERN
from detection_rules.schemas import definitions
from detection_rules.semver import Version
from detection_rules.utils import get_path, load_etc_dump
from detection_rules.utils import INTEGRATION_RULE_DIR, get_path, load_etc_dump
from detection_rules.version_lock import default_version_lock
from rta import get_available_tests

from .base import BaseRuleTest


Expand Down Expand Up @@ -440,19 +441,36 @@ def test_integration(self):
"""Test that rules in integrations folders have matching integration defined."""
failures = []

packages_manifest = load_integrations_manifests()

for rule in self.production_rules:
rules_path = get_path('rules')
*_, grandparent, parent, _ = rule.path.parts
in_integrations = grandparent == 'integrations'
integration = rule.contents.metadata.get('integration')
has_integration = integration is not None

if (in_integrations or has_integration) and (parent != integration):
err_msg = f'{self.rule_str(rule)}\nintegration: {integration}\npath: {rule.path.relative_to(rules_path)}' # noqa: E501
rule_integration = rule.contents.metadata.get('integration')

# checks if metadata tag matches from a list of integrations in EPR
if rule_integration and rule_integration not in packages_manifest.keys():
err_msg = f"{self.rule_str(rule)} integration '{rule_integration}' unknown"
failures.append(err_msg)

# checks if the rule path matches the intended integration
valid_integration_folders = [p.name for p in list(Path(INTEGRATION_RULE_DIR).glob("*"))]
if rule_integration and rule_integration in valid_integration_folders:
if rule_integration != rule.path.parent.name:
err_msg = f'{self.rule_str(rule)} {rule_integration} tag, but path is {rule.path.parent.name}'
failures.append(err_msg)

# checks if event.dataset exists in query object and a tag exists in metadata
if isinstance(rule.contents.data, QueryRuleData) and rule.contents.data.language != 'lucene':
trc = TOMLRuleContents(rule.contents.metadata, rule.contents.data)
package_integrations = trc._get_packaged_integrations(packages_manifest)
if package_integrations and not rule_integration:
err_msg = f'{self.rule_str(rule)} integration tag should exist: '

if failures:
err_msg = 'The following rules have missing/incorrect integrations or are not in an integrations folder:\n'
err_msg = """
The following rules have missing or invalid integrations tags.
Try updating the integrations manifest file:
- `python -m detection_rules dev integrations build-manifests`\n
"""
self.fail(err_msg + '\n'.join(failures))

def test_rule_demotions(self):
Expand Down

0 comments on commit f910583

Please sign in to comment.