Skip to content

Commit

Permalink
Improve back-reference generation (#33149)
Browse files Browse the repository at this point in the history
* better command line support with better auto-complete
* allows to run back-referance generation for individual provider packages
* better typing including using Pathlib everywhere
* handling of redirection between providers - when an operator is
  moved from one provider to the other - instead of generating invalid
  "stable" link we generate back reference for all versions of the
  provider.
  • Loading branch information
potiuk authored Aug 6, 2023
1 parent 3dd0c99 commit 14faf19
Show file tree
Hide file tree
Showing 10 changed files with 242 additions and 149 deletions.
4 changes: 2 additions & 2 deletions dev/breeze/src/airflow_breeze/commands/developer_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
DEFAULT_PYTHON_MAJOR_MINOR_VERSION,
DOCKER_DEFAULT_PLATFORM,
MOUNT_SELECTED,
get_available_documentation_packages,
get_available_documentation_provider_packages,
)
from airflow_breeze.params.build_ci_params import BuildCiParams
from airflow_breeze.params.doc_build_params import DocBuildParams
Expand Down Expand Up @@ -331,7 +331,7 @@ def start_airflow(
@click.option(
"--package-filter",
help="List of packages to consider.",
type=NotVerifiedBetterChoice(get_available_documentation_packages()),
type=NotVerifiedBetterChoice(get_available_documentation_provider_packages()),
multiple=True,
)
@click.option(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
MOUNT_ALL,
MOUNT_SELECTED,
MULTI_PLATFORM,
get_available_documentation_packages,
get_available_documentation_provider_packages,
)
from airflow_breeze.params.shell_params import ShellParams
from airflow_breeze.utils.add_back_references import (
Expand Down Expand Up @@ -783,7 +783,7 @@ def alias_image(image_from: str, image_to: str):
@click.option(
"--package-filter",
help="List of packages to consider.",
type=NotVerifiedBetterChoice(get_available_documentation_packages()),
type=NotVerifiedBetterChoice(get_available_documentation_provider_packages()),
multiple=True,
)
@option_verbose
Expand Down Expand Up @@ -821,38 +821,52 @@ def publish_docs(
"-a",
"--airflow-site-directory",
envvar="AIRFLOW_SITE_DIRECTORY",
type=click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True),
help="Local directory path of cloned airflow-site repo.",
required=True,
)
@click.option(
"-g",
"--gen-type",
help="Type of back references to generate, supports: [airflow | providers | helm]",
type=str,
required=True,
show_default=True,
help="Type of back references to generate. Forced to providers if providers specified as arguments.",
type=BetterChoice(
[e.name for e in GenerationType],
),
default=GenerationType.airflow.name,
)
@argument_packages
@option_verbose
@option_dry_run
def add_back_references(
airflow_site_directory: bool,
airflow_site_directory: str,
gen_type: str,
packages: list[str],
):
"""Adds back references for documentation generated by build-docs and publish-docs"""
if not os.path.isdir(airflow_site_directory):
site_path = Path(airflow_site_directory)
if not site_path.is_dir():
get_console().print(
"\n[error]location pointed by airflow_site_dir is not valid. "
"Provide the path of cloned airflow-site repo\n"
)
sys.exit(1)

if len(packages) != 0 and gen_type != GenerationType.providers.name:
get_console().print(
[
f"[warning]Forcing gen type to "
f"{GenerationType.providers} as some provider_packages are selected."
]
)
gen_type = GenerationType.providers.name
gen = GenerationType[gen_type]
if gen not in GenerationType:
get_console().print(
"\n[error]invalid type of doc generation required. Pass one of [airflow | providers | helm]\n"
)
sys.exit(1)

start_generating_back_references(gen, airflow_site_directory)
start_generating_back_references(gen, site_path, packages)


@release_management.command(
Expand Down
2 changes: 1 addition & 1 deletion dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def all_helm_test_packages() -> list[str]:
ALL_HISTORICAL_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"]


def get_available_documentation_packages(short_version=False) -> list[str]:
def get_available_documentation_provider_packages(short_version=False) -> list[str]:
provider_names: list[str] = list(json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text()).keys())
doc_provider_names = [provider_name.replace(".", "-") for provider_name in provider_names]
available_packages = [f"apache-airflow-providers-{doc_provider}" for doc_provider in doc_provider_names]
Expand Down
71 changes: 41 additions & 30 deletions dev/breeze/src/airflow_breeze/utils/add_back_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@

import enum
import os
import re
import tempfile
from pathlib import Path
from urllib.error import URLError
from urllib.request import urlopen

from rich import print

from airflow_breeze.global_constants import get_available_documentation_provider_packages

airflow_redirects_link = (
"https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
)
Expand Down Expand Up @@ -89,24 +92,31 @@ def get_github_redirects_url(provider_name: str):
return f"https://raw.githubusercontent.com/apache/airflow/main/docs/{provider_name}/redirects.txt"


def get_provider_docs_path(docs_archive_path, provider_name: str):
return docs_archive_path + "/" + provider_name
def crete_redirect_html_if_not_exist(path: Path, content: str):
if not path.exists():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
print(f"[green]Created back reference redirect: {path}")
else:
print(f"Skipping file:{path}, redirects already exist")


def create_back_reference_html(back_ref_url, path):
def create_back_reference_html(back_ref_url: str, target_path: Path):
content = get_redirect_content(back_ref_url)

if Path(path).exists():
print(f"Skipping file:{path}, redirects already exist")
return

# creating a back reference html file
with open(path, "w") as f:
f.write(content)
print(f"[green]Created back reference redirect: {path}")
version_match = re.compile(r"[0-9]+.[0-9]+.[0-9]+")
target_path_as_posix = target_path.as_posix()
if "/stable/" in target_path_as_posix:
prefix, postfix = target_path_as_posix.split("/stable/")
base_folder = Path(prefix)
for folder in base_folder.iterdir():
if folder.is_dir() and version_match.match(folder.name):
crete_redirect_html_if_not_exist(folder / postfix, content)
else:
crete_redirect_html_if_not_exist(Path(target_path), content)


def generate_back_references(link: str, base_path: str):
def generate_back_references(link: str, base_path: Path):
is_downloaded, file_name = download_file(link)
if not is_downloaded:
old_to_new: list[tuple[str, str]] = []
Expand All @@ -118,46 +128,47 @@ def generate_back_references(link: str, base_path: str):
old_to_new.append(("security.html", "security/security-model.html"))

versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()]

for version in versions:
print(f"Processing {base_path}, version: {version}")
versioned_provider_path = base_path + "/" + version
versioned_provider_path = base_path / version

for old, new in old_to_new:
# only if old file exists, add the back reference
if os.path.exists(versioned_provider_path + "/" + old):
if os.path.exists(versioned_provider_path / old):
split_new_path = new.split("/")
file_name = new.split("/")[-1]
dest_dir = versioned_provider_path + "/" + "/".join(split_new_path[: len(split_new_path) - 1])
dest_dir = versioned_provider_path.joinpath(*split_new_path[: len(split_new_path) - 1])

# finds relative path of old file with respect to new and handles case of different file
# names also
relative_path = os.path.relpath(old, new)
# remove one directory level because file path was used above
relative_path = relative_path.replace("../", "", 1)

os.makedirs(dest_dir, exist_ok=True)
dest_file_path = dest_dir + "/" + file_name
dest_file_path = dest_dir / file_name
create_back_reference_html(relative_path, dest_file_path)


def start_generating_back_references(gen_type, airflow_site_directory):
docs_archive_path = airflow_site_directory + "/docs-archive"
airflow_docs_path = docs_archive_path + "/apache-airflow"
helm_docs_path = docs_archive_path + "/helm-chart"
def start_generating_back_references(
gen_type: GenerationType, airflow_site_directory: Path, short_provider_package_ids: list[str]
):
# Either packages or gen_type should be provided
docs_archive_path = airflow_site_directory / "docs-archive"
airflow_docs_path = docs_archive_path / "apache-airflow"
helm_docs_path = docs_archive_path / "helm-chart"

if gen_type == GenerationType.airflow:
generate_back_references(airflow_redirects_link, airflow_docs_path)
elif gen_type == GenerationType.helm:
generate_back_references(helm_redirects_link, helm_docs_path)
elif gen_type == GenerationType.providers:
all_providers = [
f.path.split("/")[-1]
for f in os.scandir(docs_archive_path)
if f.is_dir() and "providers" in f.name
]
if short_provider_package_ids:
all_providers = [
f"apache-airflow-providers-{package.replace('.','-')}"
for package in short_provider_package_ids
]
else:
all_providers = get_available_documentation_provider_packages()
for p in all_providers:
print(f"Processing airflow provider: {p}")
generate_back_references(
get_github_redirects_url(p), get_provider_docs_path(docs_archive_path, p)
)
generate_back_references(get_github_redirects_url(p), docs_archive_path / p)
4 changes: 2 additions & 2 deletions dev/breeze/src/airflow_breeze/utils/common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
SINGLE_PLATFORMS,
START_AIRFLOW_ALLOWED_EXECUTORS,
START_AIRFLOW_DEFAULT_ALLOWED_EXECUTORS,
get_available_documentation_packages,
get_available_documentation_provider_packages,
)
from airflow_breeze.utils.custom_param_types import (
AnswerChoice,
Expand Down Expand Up @@ -448,7 +448,7 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option,
"packages",
nargs=-1,
required=False,
type=BetterChoice(get_available_documentation_packages(short_version=True)),
type=BetterChoice(get_available_documentation_provider_packages(short_version=True)),
)
option_airflow_constraints_reference = click.option(
"--airflow-constraints-reference",
Expand Down
6 changes: 3 additions & 3 deletions dev/breeze/tests/test_global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
# under the License.
from __future__ import annotations

from airflow_breeze.global_constants import get_available_documentation_packages
from airflow_breeze.global_constants import get_available_documentation_provider_packages

AVAILABLE_PACKAGES_STARTING_LIST = ("apache-airflow", "helm-chart", "docker-stack")


def test_get_available_packages():
assert len(get_available_documentation_packages()) > 70
for package in get_available_documentation_packages():
assert len(get_available_documentation_provider_packages()) > 70
for package in get_available_documentation_provider_packages():
assert package.startswith(AVAILABLE_PACKAGES_STARTING_LIST)
4 changes: 2 additions & 2 deletions images/breeze/output-commands-hash.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ prod-image:build:85bb45de92e86a55474238c3f04def92
prod-image:pull:76f1f27e6119928412abecf153fce4bb
prod-image:verify:bd2b78738a7c388dbad6076c41a9f906
prod-image:7b3369e182724fc155b3399c95d0fd73
release-management:add-back-references:8dab6a30076a55f2d31c6d22a94e0ccb
release-management:add-back-references:0d4eb5ed82e5381bc630b343ba605a72
release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d
release-management:generate-constraints:b8fcaf8f0acd35ed5dbd48659bdb6485
release-management:generate-issue-content-providers:6b0d954cb6dbdec0da0a7988feec58f0
Expand All @@ -49,7 +49,7 @@ release-management:release-prod-images:cfbfe8b19fee91fd90718f98ef2fd078
release-management:start-rc-process:b27bd524dd3c89f50a747b60a7e892c1
release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe
release-management:verify-provider-packages:96dce5644aad6b37080acf77b3d8de3a
release-management:b6ee5d92b636083c7b127821afb71ea3
release-management:dc4897917210deefb4338f2038f8cb33
sbom:generate-provider-requirements:9abe53200ea5f40e0bf7c27f6087f27f
sbom:update-sbom-information:0ce56884e5f842e3e80d6619df1ccc64
sbom:935d041028e847d3faf763a95b51063e
Expand Down
Loading

0 comments on commit 14faf19

Please sign in to comment.