Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a structured logging script #437

Merged
merged 2 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions pipeline/bicleaner/download_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import tempfile
from typing import Optional

from pipeline.common.logging import get_logger

logger = get_logger(__file__)


# bicleaner-ai-download downloads the latest models from Hugging Face / Github
# If a new model is released and you want to invalidate Taskcluster caches,
Expand All @@ -31,7 +35,7 @@ def _run_download(src: str, trg: str, dir: str) -> subprocess.CompletedProcess:


def _compress_dir(dir_path: str, compression_cmd: str) -> str:
print(f"Compressing {dir_path}")
logger.info(f"Compressing {dir_path}")
if compression_cmd not in ["gzip", "zstd", "zstdmt", "pigz"]:
raise ValueError(f"Unsupported compression tool {compression_cmd}.")

Expand Down Expand Up @@ -70,16 +74,16 @@ def download(src: str, trg: str, output_path: str, compression_cmd: str) -> None
# 1: src-trg
# 2: trg-src
# 3: multilingual model
print(f"Attempt 1 of 3: Downloading a model for {src}-{trg}")
logger.info(f"Attempt 1 of 3: Downloading a model for {src}-{trg}")
result = _run_download(src, trg, tmp_dir)

pack_path = os.path.join(tmp_dir, f"{src}-{trg}")
if os.path.exists(pack_path):
check_result(result)
print(f"The model for {src}-{trg} existed")
logger.info(f"The model for {src}-{trg} existed")
else:
src, trg = trg, src
print(f"Attempt 2 of 3. Downloading a model for {src}-{trg}")
logger.info(f"Attempt 2 of 3. Downloading a model for {src}-{trg}")
result = _run_download(src, trg, tmp_dir)

pack_path = os.path.join(tmp_dir, f"{src}-{trg}")
Expand All @@ -88,7 +92,7 @@ def download(src: str, trg: str, output_path: str, compression_cmd: str) -> None
check_result(result)
print(f"The model for {src}-{trg} existed")
else:
print("Attempt 3 of 3. Downloading the multilingual model en-xx")
logger.info("Attempt 3 of 3. Downloading the multilingual model en-xx")
src = "en"
trg = "xx"
result = _run_download(src, trg, tmp_dir)
Expand All @@ -98,13 +102,13 @@ def download(src: str, trg: str, output_path: str, compression_cmd: str) -> None
check_result(result)
raise Exception("Could not download the multilingual model.")

print("Compress the downloaded pack.")
logger.info("Compress the downloaded pack.")
new_name = os.path.join(tmp_dir, f"bicleaner-ai-{original_src}-{original_trg}")
print("pack_path: ", pack_path)
print("new_name: ", new_name)
logger.info(f'pack_path: "{pack_path}"')
logger.info(f'new_name: "{new_name}"')

if os.path.isdir(new_name):
print("rmtree", new_name)
logger.info(f"rmtree {new_name}")
shutil.rmtree(new_name)

shutil.move(pack_path, new_name)
Expand All @@ -113,10 +117,10 @@ def download(src: str, trg: str, output_path: str, compression_cmd: str) -> None
pack_path = _compress_dir(pack_path, compression_cmd)

# Move to the expected path
print(f"Moving {pack_path} to {output_path}")
logger.info(f"Moving {pack_path} to {output_path}")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
shutil.move(pack_path, output_path)
print("Done")
logger.info("Done")


def main(args: Optional[list[str]] = None) -> None:
Expand Down
Empty file added pipeline/common/__init__.py
Empty file.
23 changes: 23 additions & 0 deletions pipeline/common/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import logging
from pathlib import Path

logging.basicConfig(level=logging.INFO, format="[%(name)s] %(message)s")


def get_logger(name: str):
"""
Get a logger using the __file__ name.

For example in pipeline/bicleaner/download_pack.py

logger = get_logger(__file__)
logger.info("This is a log.")

Will log:

> [download_pack] This is a log.
"""

logger = logging.getLogger(Path(name).stem)
logger.setLevel(logging.INFO)
return logger
1 change: 1 addition & 0 deletions taskcluster/kinds/bicleaner-model/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ tasks:
pip install $MOZ_FETCHES_DIR/kenlm-0.0.0-cp310-cp310-linux_x86_64.whl &&
pip install -r $VCS_PATH/pipeline/bicleaner/requirements/bicleaner-ai.txt &&
export PATH=$PATH:~/.local/bin &&
export PYTHONPATH=$PYTHONPATH:$VCS_PATH &&
python3 $VCS_PATH/pipeline/bicleaner/download_pack.py
--src={src_locale}
--trg={trg_locale}
Expand Down