diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands.py b/dev/breeze/src/airflow_breeze/commands/developer_commands.py index f17d8bd0d8bea..4f0d14a174045 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands.py @@ -32,7 +32,7 @@ DEFAULT_PYTHON_MAJOR_MINOR_VERSION, DOCKER_DEFAULT_PLATFORM, MOUNT_SELECTED, - get_available_documentation_packages, + get_available_documentation_provider_packages, ) from airflow_breeze.params.build_ci_params import BuildCiParams from airflow_breeze.params.doc_build_params import DocBuildParams @@ -331,7 +331,7 @@ def start_airflow( @click.option( "--package-filter", help="List of packages to consider.", - type=NotVerifiedBetterChoice(get_available_documentation_packages()), + type=NotVerifiedBetterChoice(get_available_documentation_provider_packages()), multiple=True, ) @click.option( diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 201f51752456e..aca5f66395bb1 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -43,7 +43,7 @@ MOUNT_ALL, MOUNT_SELECTED, MULTI_PLATFORM, - get_available_documentation_packages, + get_available_documentation_provider_packages, ) from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.add_back_references import ( @@ -783,7 +783,7 @@ def alias_image(image_from: str, image_to: str): @click.option( "--package-filter", help="List of packages to consider.", - type=NotVerifiedBetterChoice(get_available_documentation_packages()), + type=NotVerifiedBetterChoice(get_available_documentation_provider_packages()), multiple=True, ) @option_verbose @@ -821,30 +821,44 @@ def publish_docs( "-a", "--airflow-site-directory", envvar="AIRFLOW_SITE_DIRECTORY", + type=click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True), help="Local directory path of cloned airflow-site repo.", required=True, ) @click.option( "-g", "--gen-type", - help="Type of back references to generate, supports: [airflow | providers | helm]", - type=str, - required=True, + show_default=True, + help="Type of back references to generate. Forced to providers if providers specified as arguments.", + type=BetterChoice( + [e.name for e in GenerationType], + ), + default=GenerationType.airflow.name, ) +@argument_packages @option_verbose @option_dry_run def add_back_references( - airflow_site_directory: bool, + airflow_site_directory: str, gen_type: str, + packages: list[str], ): """Adds back references for documentation generated by build-docs and publish-docs""" - if not os.path.isdir(airflow_site_directory): + site_path = Path(airflow_site_directory) + if not site_path.is_dir(): get_console().print( "\n[error]location pointed by airflow_site_dir is not valid. " "Provide the path of cloned airflow-site repo\n" ) sys.exit(1) - + if len(packages) != 0 and gen_type != GenerationType.providers.name: + get_console().print( + [ + f"[warning]Forcing gen type to " + f"{GenerationType.providers} as some provider_packages are selected." + ] + ) + gen_type = GenerationType.providers.name gen = GenerationType[gen_type] if gen not in GenerationType: get_console().print( @@ -852,7 +866,7 @@ def add_back_references( ) sys.exit(1) - start_generating_back_references(gen, airflow_site_directory) + start_generating_back_references(gen, site_path, packages) @release_management.command( diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index e16b6874bf523..dadae7a2e4db5 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -145,7 +145,7 @@ def all_helm_test_packages() -> list[str]: ALL_HISTORICAL_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] -def get_available_documentation_packages(short_version=False) -> list[str]: +def get_available_documentation_provider_packages(short_version=False) -> list[str]: provider_names: list[str] = list(json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text()).keys()) doc_provider_names = [provider_name.replace(".", "-") for provider_name in provider_names] available_packages = [f"apache-airflow-providers-{doc_provider}" for doc_provider in doc_provider_names] diff --git a/dev/breeze/src/airflow_breeze/utils/add_back_references.py b/dev/breeze/src/airflow_breeze/utils/add_back_references.py index e27ddcfa6985a..0b7c2abfe8907 100644 --- a/dev/breeze/src/airflow_breeze/utils/add_back_references.py +++ b/dev/breeze/src/airflow_breeze/utils/add_back_references.py @@ -18,6 +18,7 @@ import enum import os +import re import tempfile from pathlib import Path from urllib.error import URLError @@ -25,6 +26,8 @@ from rich import print +from airflow_breeze.global_constants import get_available_documentation_provider_packages + airflow_redirects_link = ( "https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt" ) @@ -89,24 +92,31 @@ def get_github_redirects_url(provider_name: str): return f"https://raw.githubusercontent.com/apache/airflow/main/docs/{provider_name}/redirects.txt" -def get_provider_docs_path(docs_archive_path, provider_name: str): - return docs_archive_path + "/" + provider_name +def crete_redirect_html_if_not_exist(path: Path, content: str): + if not path.exists(): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) + print(f"[green]Created back reference redirect: {path}") + else: + print(f"Skipping file:{path}, redirects already exist") -def create_back_reference_html(back_ref_url, path): +def create_back_reference_html(back_ref_url: str, target_path: Path): content = get_redirect_content(back_ref_url) - if Path(path).exists(): - print(f"Skipping file:{path}, redirects already exist") - return - - # creating a back reference html file - with open(path, "w") as f: - f.write(content) - print(f"[green]Created back reference redirect: {path}") + version_match = re.compile(r"[0-9]+.[0-9]+.[0-9]+") + target_path_as_posix = target_path.as_posix() + if "/stable/" in target_path_as_posix: + prefix, postfix = target_path_as_posix.split("/stable/") + base_folder = Path(prefix) + for folder in base_folder.iterdir(): + if folder.is_dir() and version_match.match(folder.name): + crete_redirect_html_if_not_exist(folder / postfix, content) + else: + crete_redirect_html_if_not_exist(Path(target_path), content) -def generate_back_references(link: str, base_path: str): +def generate_back_references(link: str, base_path: Path): is_downloaded, file_name = download_file(link) if not is_downloaded: old_to_new: list[tuple[str, str]] = [] @@ -118,46 +128,47 @@ def generate_back_references(link: str, base_path: str): old_to_new.append(("security.html", "security/security-model.html")) versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()] - for version in versions: print(f"Processing {base_path}, version: {version}") - versioned_provider_path = base_path + "/" + version + versioned_provider_path = base_path / version for old, new in old_to_new: # only if old file exists, add the back reference - if os.path.exists(versioned_provider_path + "/" + old): + if os.path.exists(versioned_provider_path / old): split_new_path = new.split("/") file_name = new.split("/")[-1] - dest_dir = versioned_provider_path + "/" + "/".join(split_new_path[: len(split_new_path) - 1]) + dest_dir = versioned_provider_path.joinpath(*split_new_path[: len(split_new_path) - 1]) # finds relative path of old file with respect to new and handles case of different file # names also relative_path = os.path.relpath(old, new) # remove one directory level because file path was used above relative_path = relative_path.replace("../", "", 1) - os.makedirs(dest_dir, exist_ok=True) - dest_file_path = dest_dir + "/" + file_name + dest_file_path = dest_dir / file_name create_back_reference_html(relative_path, dest_file_path) -def start_generating_back_references(gen_type, airflow_site_directory): - docs_archive_path = airflow_site_directory + "/docs-archive" - airflow_docs_path = docs_archive_path + "/apache-airflow" - helm_docs_path = docs_archive_path + "/helm-chart" +def start_generating_back_references( + gen_type: GenerationType, airflow_site_directory: Path, short_provider_package_ids: list[str] +): + # Either packages or gen_type should be provided + docs_archive_path = airflow_site_directory / "docs-archive" + airflow_docs_path = docs_archive_path / "apache-airflow" + helm_docs_path = docs_archive_path / "helm-chart" if gen_type == GenerationType.airflow: generate_back_references(airflow_redirects_link, airflow_docs_path) elif gen_type == GenerationType.helm: generate_back_references(helm_redirects_link, helm_docs_path) elif gen_type == GenerationType.providers: - all_providers = [ - f.path.split("/")[-1] - for f in os.scandir(docs_archive_path) - if f.is_dir() and "providers" in f.name - ] + if short_provider_package_ids: + all_providers = [ + f"apache-airflow-providers-{package.replace('.','-')}" + for package in short_provider_package_ids + ] + else: + all_providers = get_available_documentation_provider_packages() for p in all_providers: print(f"Processing airflow provider: {p}") - generate_back_references( - get_github_redirects_url(p), get_provider_docs_path(docs_archive_path, p) - ) + generate_back_references(get_github_redirects_url(p), docs_archive_path / p) diff --git a/dev/breeze/src/airflow_breeze/utils/common_options.py b/dev/breeze/src/airflow_breeze/utils/common_options.py index f21e08032385b..bd039593a51b1 100644 --- a/dev/breeze/src/airflow_breeze/utils/common_options.py +++ b/dev/breeze/src/airflow_breeze/utils/common_options.py @@ -43,7 +43,7 @@ SINGLE_PLATFORMS, START_AIRFLOW_ALLOWED_EXECUTORS, START_AIRFLOW_DEFAULT_ALLOWED_EXECUTORS, - get_available_documentation_packages, + get_available_documentation_provider_packages, ) from airflow_breeze.utils.custom_param_types import ( AnswerChoice, @@ -448,7 +448,7 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option, "packages", nargs=-1, required=False, - type=BetterChoice(get_available_documentation_packages(short_version=True)), + type=BetterChoice(get_available_documentation_provider_packages(short_version=True)), ) option_airflow_constraints_reference = click.option( "--airflow-constraints-reference", diff --git a/dev/breeze/tests/test_global_constants.py b/dev/breeze/tests/test_global_constants.py index ee0f72888ef1f..f9878f7914423 100644 --- a/dev/breeze/tests/test_global_constants.py +++ b/dev/breeze/tests/test_global_constants.py @@ -16,12 +16,12 @@ # under the License. from __future__ import annotations -from airflow_breeze.global_constants import get_available_documentation_packages +from airflow_breeze.global_constants import get_available_documentation_provider_packages AVAILABLE_PACKAGES_STARTING_LIST = ("apache-airflow", "helm-chart", "docker-stack") def test_get_available_packages(): - assert len(get_available_documentation_packages()) > 70 - for package in get_available_documentation_packages(): + assert len(get_available_documentation_provider_packages()) > 70 + for package in get_available_documentation_provider_packages(): assert package.startswith(AVAILABLE_PACKAGES_STARTING_LIST) diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index d6acdd924933a..2416ad7db0cfd 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -35,7 +35,7 @@ prod-image:build:2a8217f4e61c405530d487b60abc614e prod-image:pull:76f1f27e6119928412abecf153fce4bb prod-image:verify:bd2b78738a7c388dbad6076c41a9f906 prod-image:1598bf6964c2e31c387edbd3aacb6fbb -release-management:add-back-references:8dab6a30076a55f2d31c6d22a94e0ccb +release-management:add-back-references:0d4eb5ed82e5381bc630b343ba605a72 release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d release-management:generate-constraints:b8fcaf8f0acd35ed5dbd48659bdb6485 release-management:generate-issue-content-providers:6b0d954cb6dbdec0da0a7988feec58f0 @@ -49,7 +49,7 @@ release-management:release-prod-images:cfbfe8b19fee91fd90718f98ef2fd078 release-management:start-rc-process:b27bd524dd3c89f50a747b60a7e892c1 release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe release-management:verify-provider-packages:2bfa1015b2a4682f7b098587e09026d4 -release-management:ccbc384d728dd606ab2727e59fc52c19 +release-management:3a02b790089e161f2addef69ea0ce08a sbom:generate-provider-requirements:9abe53200ea5f40e0bf7c27f6087f27f sbom:update-sbom-information:0ce56884e5f842e3e80d6619df1ccc64 sbom:935d041028e847d3faf763a95b51063e diff --git a/images/breeze/output_release-management_add-back-references.svg b/images/breeze/output_release-management_add-back-references.svg index 87839c0041b0f..b8ead97cde426 100644 --- a/images/breeze/output_release-management_add-back-references.svg +++ b/images/breeze/output_release-management_add-back-references.svg @@ -1,4 +1,4 @@ - +