Skip to content

Commit

Permalink
Add validations for removed dependencies (#14556)
Browse files Browse the repository at this point in the history
* Map out new licenses validation

* Implement validations for extra licenses

* Add constants to config.toml

* Implement license validation

* Uncomment legacy licenses validation

* Keep license command addition in same place

* Small style change

* Update config.toml override values

* Refactor

* Fix style

* Apply suggestions from code review

Co-authored-by: Ofek Lev <[email protected]>

* Update suggestions

* Require CI for license validation tests and update to use empty envvars

* Fix permission for file

* Add windows version of setting github env vars

* Fix windows file

* Change to powershell script

* Output GITHUB_ENV on windows CI

* Convert entirely to powershell

* Change back to bat file

* Test DD_GITHUB_USER value

* Print github user in license test

* Manually set Github user and token in test

* Fix config_file

* Print github user

* Check if tokens are the same

* Remove additional space in bat script

* Fix style and remove test code

* Change order of scripts

* Try commenting out model.github override

* Revert previous commit

* Change to threads instead of async

* Switch out async request to requests

* Clean up

* Fix style

---------

Co-authored-by: Ofek Lev <[email protected]>
  • Loading branch information
yzhan289 and ofek authored Jun 15, 2023
1 parent 19d4658 commit f9d372b
Show file tree
Hide file tree
Showing 12 changed files with 223 additions and 17 deletions.
9 changes: 9 additions & 0 deletions .ddev/ci/scripts/ddev/linux/60_set_github_env_vars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
set -euo pipefail

set +x

echo "DD_GITHUB_USER=$DD_GITHUB_USER" >> "$GITHUB_ENV"
echo "DD_GITHUB_TOKEN=$DD_GITHUB_TOKEN" >> "$GITHUB_ENV"

set -x
2 changes: 2 additions & 0 deletions .ddev/ci/scripts/ddev/windows/60_set_github_env_vars.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo DD_GITHUB_USER=%DD_GITHUB_USER%>> %GITHUB_ENV%
echo DD_GITHUB_TOKEN=%DD_GITHUB_TOKEN%>> %GITHUB_ENV%
64 changes: 64 additions & 0 deletions .ddev/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,67 @@ platforms = ["linux", "windows"]

[overrides.ci.tokumx]
only-py2 = true

[overrides.dependencies.licenses]
# https://github.com/aerospike/aerospike-client-python/blob/master/LICENSE
aerospike = ['Apache-2.0']
# https://github.com/baztian/jaydebeapi/blob/master/COPYING
JayDeBeApi = ['LGPL-3.0-only']
# https://github.com/pyca/cryptography/blob/main/LICENSE
cryptography = ['Apache-2.0', 'BSD-3-Clause', 'PSF']
# https://github.com/rthalley/dnspython/blob/master/LICENSE
dnspython = ['ISC']
# https://github.com/cannatag/ldap3/blob/dev/COPYING.txt
ldap3 = ['LGPL-3.0-only']
# https://cloudera.github.io/cm_api/
cm-client = ['Apache-2.0']
# https://github.com/oauthlib/oauthlib/blob/master/LICENSE
oauthlib = ['BSD-3-Clause']
# https://github.com/hajimes/mmh3/blob/master/LICENSE
mmh3 = ['CC0-1.0']
# https://github.com/paramiko/paramiko/blob/master/LICENSE
paramiko = ['LGPL-2.1-only']
# https://github.com/oracle/python-oracledb/blob/main/LICENSE.txt
oracledb = ['Apache-2.0']
# https://github.com/psycopg/psycopg2/blob/master/LICENSE
# https://github.com/psycopg/psycopg2/blob/master/doc/COPYING.LESSER
psycopg2-binary = ['LGPL-3.0-only', 'BSD-3-Clause']
# https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst
pycryptodomex = ['Unlicense', 'BSD-2-Clause']
# https://github.com/requests/requests-kerberos/pull/123
requests-kerberos = ['ISC']
# https://github.com/requests/requests-ntlm/blob/master/LICENSE
requests-ntlm = ['ISC']
# https://github.com/rethinkdb/rethinkdb-python/blob/master/LICENSE
rethinkdb = ['Apache-2.0']
# https://github.com/simplejson/simplejson/blob/master/LICENSE.txt
simplejson = ['MIT']
# https://github.com/Supervisor/supervisor/blob/master/LICENSES.txt
supervisor = ['BSD-3-Clause-Modification']
# https://github.com/Cairnarvon/uptime/blob/master/COPYING.txt
uptime = ['BSD-2-Clause']
# https://github.com/hickeroar/win_inet_pton/blob/master/LICENSE
win-inet-pton = ['Unlicense']

[overrides.dependencies.repo]
PyYAML = 'https://github.com/yaml/pyyaml'
Pyro4 = 'https://github.com/irmen/Pyro4'
contextlib2 = 'https://github.com/jazzband/contextlib2'
dnspython = 'https://github.com/rthalley/dnspython'
foundationdb = 'https://github.com/apple/foundationdb'
in-toto = 'https://github.com/in-toto/in-toto'
lxml = 'https://github.com/lxml/lxml'
oracledb = 'https://github.com/oracle/python-oracledb'
packaging = 'https://github.com/pypa/packaging'
paramiko = 'https://github.com/paramiko/paramiko'
protobuf = 'https://github.com/protocolbuffers/protobuf'
psycopg2-binary = 'https://github.com/psycopg/psycopg2'
pycryptodomex = 'https://github.com/Legrandin/pycryptodome'
redis = 'https://github.com/redis/redis-py'
requests = 'https://github.com/psf/requests'
requests-toolbelt = 'https://github.com/requests/toolbelt'
service-identity = 'https://github.com/pyca/service-identity'
snowflake-connector-python = 'https://github.com/snowflakedb/snowflake-connector-python'
supervisor = 'https://github.com/Supervisor/supervisor'
tuf = 'https://github.com/theupdateframework/python-tuf'
typing = 'https://github.com/python/typing'
8 changes: 6 additions & 2 deletions .github/workflows/test-target.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,17 @@ jobs:
"DOCKER_ACCESS_TOKEN": "{1}",
"ORACLE_DOCKER_USERNAME": "{2}",
"ORACLE_DOCKER_PASSWORD": "{3}",
"SINGLESTORE_LICENSE": "{4}"
"SINGLESTORE_LICENSE": "{4}",
"DD_GITHUB_USER": "{5}",
"DD_GITHUB_TOKEN": "{6}"
}}',
secrets.DOCKER_USERNAME,
secrets.DOCKER_ACCESS_TOKEN,
secrets.ORACLE_DOCKER_USERNAME,
secrets.ORACLE_DOCKER_PASSWORD,
secrets.SINGLESTORE_LICENSE
secrets.SINGLESTORE_LICENSE,
github.actor,
secrets.GITHUB_TOKEN
))}}
run: ddev ci setup ${{ inputs.target }}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# (C) Datadog, Inc. 2021-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import asyncio
import concurrent.futures
import difflib
import io
import os
Expand All @@ -14,8 +14,6 @@
import click
import orjson
import requests
from aiohttp import request
from aiomultiprocess import Pool
from packaging.requirements import Requirement

from ....fs import file_exists, read_file_lines, write_file_lines
Expand All @@ -35,8 +33,6 @@
'aerospike': ['Apache-2.0'],
# https://github.com/baztian/jaydebeapi/blob/master/COPYING
'JayDeBeApi': ['LGPL-3.0-only'],
# https://github.com/mhammond/pywin32/blob/master/adodbapi/license.txt
'adodbapi': ['LGPL-2.1-only'],
# https://github.com/pyca/cryptography/blob/main/LICENSE
'cryptography': ['Apache-2.0', 'BSD-3-Clause', 'PSF'],
# https://github.com/rthalley/dnspython/blob/master/LICENSE
Expand Down Expand Up @@ -206,18 +202,20 @@ def get_known_spdx_licenses():
return {data['licenseId'] for data in license_list}


async def get_data(url):
async with request('GET', url) as response:
return orjson.loads(await response.read())
def get_data(url):
with requests.get(url) as response:
return orjson.loads(response.content)


async def scrape_license_data(urls):
def scrape_license_data(urls):
package_data = defaultdict(
lambda: {'copyright': {}, 'licenses': set(), 'classifiers': set(), 'home_page': None, 'author': None}
)

async with Pool() as pool:
async for resp in pool.map(get_data, urls):
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
future_to_url = {executor.submit(get_data, url): url for url in urls}
for future in concurrent.futures.as_completed(future_to_url):
resp = future.result()
info = resp['info']
data = package_data[(info['name'], info['version'])]
data['urls'] = resp['urls']
Expand Down Expand Up @@ -453,7 +451,7 @@ def licenses(ctx, sync):
for version in versions:
api_urls.append(f'https://pypi.org/pypi/{package}/{version}/json')

package_data = asyncio.run(scrape_license_data(api_urls))
package_data = scrape_license_data(api_urls)
known_spdx_licenses = {license_id.lower(): license_id for license_id in get_known_spdx_licenses()}

package_license_errors = defaultdict(list)
Expand Down
2 changes: 1 addition & 1 deletion ddev/src/ddev/cli/validate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from datadog_checks.dev.tooling.commands.validate.integration_style import integration_style
from datadog_checks.dev.tooling.commands.validate.jmx_metrics import jmx_metrics
from datadog_checks.dev.tooling.commands.validate.license_headers import license_headers
from datadog_checks.dev.tooling.commands.validate.licenses import licenses
from datadog_checks.dev.tooling.commands.validate.metadata import metadata
from datadog_checks.dev.tooling.commands.validate.models import models
from datadog_checks.dev.tooling.commands.validate.package import package
Expand All @@ -26,6 +25,7 @@
from datadog_checks.dev.tooling.commands.validate.typos import typos

from ddev.cli.validate.ci import ci
from ddev.cli.validate.licenses import licenses
from ddev.cli.validate.manifest import manifest
from ddev.cli.validate.openmetrics import openmetrics

Expand Down
55 changes: 55 additions & 0 deletions ddev/src/ddev/cli/validate/licenses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# (C) Datadog, Inc. 2023-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
from __future__ import annotations

from typing import TYPE_CHECKING

import click

if TYPE_CHECKING:
from ddev.cli.application import Application


@click.command(short_help='Validate third-party license list')
@click.option('--sync', '-s', is_flag=True, help='Generate the `LICENSE-3rdparty.csv` file')
@click.pass_context
def licenses(ctx: click.Context, sync):
app: Application = ctx.obj

if app.repo.name != 'core':
app.display_info(f"License validation is only available for repo `core`, skipping for repo `{app.repo.name}`")
app.abort()

from packaging.requirements import Requirement

validation_tracker = app.create_validation_tracker('Licenses')

# Validate that all values in the constants (EXPLICIT_LICENSES and
# PACKAGE_REPO_OVERRIDES) appear in agent_requirements.in file

agent_requirements_path = app.repo.agent_requirements

packages_set = set()
with open(agent_requirements_path, 'r', encoding='utf-8') as f:
for _i, line in enumerate(f.readlines()):
requirement = Requirement(line.strip())
packages_set.add(requirement.name)

for dependency_override, constant_name in [('licenses', 'EXPLICIT_LICENSES'), ('repo', 'PACKAGE_REPO_OVERRIDES')]:
for name in app.repo.config.get(f'/overrides/dependencies/{dependency_override}', {}):
if name.lower() not in packages_set:
validation_tracker.error(
(constant_name, name),
message=f"{constant_name} contains additional package not in agent requirements: {name}",
)

if validation_tracker.errors:
validation_tracker.display()
app.abort()

# Call legacy licenses validation
from datadog_checks.dev.tooling.commands.validate.licenses import licenses as legacy_licenses_validation

ctx.invoke(legacy_licenses_validation, sync=sync)
validation_tracker.display()
4 changes: 4 additions & 0 deletions ddev/src/ddev/repo/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def config(self) -> RepositoryConfig:

return RepositoryConfig(self.path / CONFIG_DIRECTORY / 'config.toml')

@cached_property
def agent_requirements(self) -> Path:
return self.path / 'datadog_checks_base' / 'datadog_checks' / 'base' / 'data' / 'agent_requirements.in'


class IntegrationRegistry:
def __init__(self, repo: Repository):
Expand Down
7 changes: 7 additions & 0 deletions ddev/src/ddev/utils/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,10 @@ def load_toml_data(data):
def load_toml_file(path):
with open(path, encoding='utf-8') as f:
return tomllib.loads(f.read())


def dump_toml_data(data, path):
import tomli_w

with open(path, "wb") as f:
tomli_w.dump(data, f)
2 changes: 2 additions & 0 deletions ddev/tests/cli/config/test_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
def test_default_scrubbed(ddev, config_file, helpers):
config_file.model.orgs['default']['api_key'] = 'foo'
config_file.model.orgs['default']['app_key'] = 'bar'
config_file.model.github = {'user': '', 'token': ''}
config_file.save()

result = ddev('config', 'show')
Expand Down Expand Up @@ -68,6 +69,7 @@ def test_default_scrubbed(ddev, config_file, helpers):
def test_reveal(ddev, config_file, helpers):
config_file.model.orgs['default']['api_key'] = 'foo'
config_file.model.orgs['default']['app_key'] = 'bar'
config_file.model.github = {'user': '', 'token': ''}
config_file.save()

result = ddev('config', 'show', '-a')
Expand Down
63 changes: 63 additions & 0 deletions ddev/tests/cli/validate/test_licenses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# (C) Datadog, Inc. 2023-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)


import pytest
from ddev.utils.toml import dump_toml_data, load_toml_file


@pytest.mark.parametrize(
"name, contents, expected_error_output",
[
pytest.param(
"licenses",
{'dummy_package': 'dummy_license'},
"EXPLICIT_LICENSES contains additional package not in agent",
id="explicit licenses",
),
pytest.param(
"repo",
{'dummy_package': 'https://github.com/dummy_package'},
"PACKAGE_REPO_OVERRIDES contains additional package not in agent",
id="package repo overrides",
),
],
)
def test_error_extra_dependency(name, contents, expected_error_output, ddev, repository, network_replay, helpers):
network_replay('fixtures/network/license/extra_dependency.yaml', record_mode='none')
ddev_config_path = repository.path / '.ddev' / 'config.toml'

data = load_toml_file(ddev_config_path)

data['overrides']['dependencies'] = {name: contents}

dump_toml_data(data, ddev_config_path)

result = ddev('validate', 'licenses')

assert result.exit_code == 1, result.output

# Check if expected error validation error message is in output
assert expected_error_output in helpers.remove_trailing_spaces(result.output)


@pytest.mark.parametrize(
"repo, expected_message",
[
pytest.param("core", "Licenses file is valid!", id="Core integrations"),
pytest.param(
"extras",
"License validation is only available for repo `core`, skipping for repo `extras`",
id="Extras integrations",
),
],
)
@pytest.mark.requires_ci
def test_validate_repo(repo, repository, expected_message, ddev, helpers, config_file):
config_file.model.repo = repo
config_file.save()

result = ddev("validate", "licenses")

assert expected_message in helpers.remove_trailing_spaces(result.output)
2 changes: 0 additions & 2 deletions ddev/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ def valid_integration(valid_integrations) -> str:
@pytest.fixture(autouse=True)
def config_file(tmp_path, monkeypatch) -> ConfigFile:
for env_var in (
'DD_GITHUB_USER',
'DD_GITHUB_TOKEN',
'DD_SITE',
'DD_LOGS_CONFIG_DD_URL',
'DD_DD_URL',
Expand Down

0 comments on commit f9d372b

Please sign in to comment.