-
Notifications
You must be signed in to change notification settings - Fork 27.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Save other CI jobs' result (torch/tf pipeline, example, deepspeed etc) #30699
Changes from 8 commits
44cc3ab
2ee86c4
4f100c4
4819d64
5516c22
e9aa51f
597a67b
4b2812c
d10c145
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,12 +60,10 @@ jobs: | |
|
||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. | ||
- name: Failure table artifacts | ||
# Only the model testing job is concerned for this step | ||
if: ${{ inputs.job == 'run_models_gpu' }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: ci_results | ||
path: ci_results | ||
name: ci_results_${{ inputs.job }} | ||
path: ci_results_${{ inputs.job }} | ||
|
||
- uses: actions/checkout@v4 | ||
- uses: actions/download-artifact@v4 | ||
|
@@ -77,6 +75,7 @@ jobs: | |
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }} | ||
CI_EVENT: scheduled | ||
CI_SHA: ${{ github.sha }} | ||
CI_TEST_JOB: ${{ inputs.job }} | ||
SETUP_STATUS: ${{ inputs.setup_status }} | ||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change | ||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`. | ||
|
@@ -85,3 +84,11 @@ jobs: | |
pip install slack_sdk | ||
pip show slack_sdk | ||
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" | ||
|
||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. | ||
- name: Failure table artifacts | ||
if: ${{ inputs.job == 'run_quantization_torch_gpu' }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: ci_results_${{ inputs.job }} | ||
path: ci_results_${{ inputs.job }} | ||
Comment on lines
+87
to
+94
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For quantization and only for it |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -416,7 +416,7 @@ def per_model_sum(model_category_dict): | |
reports=sorted_model_reports, | ||
to_truncate=False, | ||
) | ||
file_path = os.path.join(os.getcwd(), "ci_results/model_failures_report.txt") | ||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/model_failures_report.txt") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (all same changes below too) the same reason as above
ydshieh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
with open(file_path, "w", encoding="UTF-8") as fp: | ||
fp.write(model_failures_report) | ||
|
||
|
@@ -426,18 +426,18 @@ def per_model_sum(model_category_dict): | |
reports=sorted_module_reports, | ||
to_truncate=False, | ||
) | ||
file_path = os.path.join(os.getcwd(), "ci_results/module_failures_report.txt") | ||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/module_failures_report.txt") | ||
with open(file_path, "w", encoding="UTF-8") as fp: | ||
fp.write(module_failures_report) | ||
|
||
if self.prev_ci_artifacts is not None: | ||
# if the last run produces artifact named `ci_results` | ||
ydshieh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if ( | ||
"ci_results" in self.prev_ci_artifacts | ||
and "model_failures_report.txt" in self.prev_ci_artifacts["ci_results"] | ||
f"ci_results_{job_name}" in self.prev_ci_artifacts | ||
and "model_failures_report.txt" in self.prev_ci_artifacts[f"ci_results_{job_name}"] | ||
): | ||
# Compute the difference of the previous/current (model failure) table | ||
prev_model_failures = self.prev_ci_artifacts["ci_results"]["model_failures_report.txt"] | ||
prev_model_failures = self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_failures_report.txt"] | ||
entries_changed = self.compute_diff_for_failure_reports(model_failures_report, prev_model_failures) | ||
if len(entries_changed) > 0: | ||
# Save the complete difference | ||
|
@@ -447,7 +447,7 @@ def per_model_sum(model_category_dict): | |
reports=entries_changed, | ||
to_truncate=False, | ||
) | ||
file_path = os.path.join(os.getcwd(), "ci_results/changed_model_failures_report.txt") | ||
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/changed_model_failures_report.txt") | ||
with open(file_path, "w", encoding="UTF-8") as fp: | ||
fp.write(diff_report) | ||
|
||
|
@@ -643,8 +643,11 @@ def get_new_model_failure_blocks(self, with_header=True): | |
sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0]) | ||
|
||
prev_model_results = {} | ||
if "ci_results" in self.prev_ci_artifacts and "model_results.json" in self.prev_ci_artifacts["ci_results"]: | ||
prev_model_results = json.loads(self.prev_ci_artifacts["ci_results"]["model_results.json"]) | ||
if ( | ||
f"ci_results_{job_name}" in self.prev_ci_artifacts | ||
and "model_results.json" in self.prev_ci_artifacts[f"ci_results_{job_name}"] | ||
): | ||
prev_model_results = json.loads(self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"]) | ||
|
||
all_failure_lines = {} | ||
for job, job_result in sorted_dict: | ||
|
@@ -1139,20 +1142,32 @@ def prepare_reports(title, header, reports, to_truncate=True): | |
with open(os.path.join(directory, "selected_warnings.json")) as fp: | ||
selected_warnings = json.load(fp) | ||
|
||
if not os.path.isdir(os.path.join(os.getcwd(), "ci_results")): | ||
os.makedirs(os.path.join(os.getcwd(), "ci_results")) | ||
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")): | ||
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}")) | ||
|
||
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as | ||
# results. | ||
if job_name == "run_models_gpu": | ||
with open("ci_results/model_results.json", "w", encoding="UTF-8") as fp: | ||
with open(f"ci_results_{job_name}/model_results.json", "w", encoding="UTF-8") as fp: | ||
json.dump(model_results, fp, indent=4, ensure_ascii=False) | ||
|
||
# Must have the same keys as in `additional_results`. | ||
# The values are used as the file names where to save the corresponding CI job results. | ||
test_to_result_name = { | ||
"PyTorch pipelines": "torch_pipeline", | ||
"TensorFlow pipelines": "tf_pipeline", | ||
"Examples directory": "example", | ||
"Torch CUDA extension tests": "deepspeed", | ||
} | ||
for job, job_result in additional_results.items(): | ||
with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp: | ||
json.dump(job_result, fp, indent=4, ensure_ascii=False) | ||
|
||
Comment on lines
+1154
to
+1165
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the main changes in this PR. |
||
prev_ci_artifacts = None | ||
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled.yml@refs/heads/main" | ||
if os.environ.get("CI_WORKFLOW_REF") == target_workflow: | ||
# Get the last previously completed CI's failure tables | ||
artifact_names = ["ci_results"] | ||
artifact_names = [f"ci_results_{job_name}"] | ||
output_dir = os.path.join(os.getcwd(), "previous_reports") | ||
os.makedirs(output_dir, exist_ok=True) | ||
prev_ci_artifacts = get_last_daily_ci_reports( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -242,6 +242,13 @@ def post_reply(self): | |
{"line": line, "trace": stacktraces.pop(0)} | ||
) | ||
|
||
job_name = os.getenv("CI_TEST_JOB") | ||
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")): | ||
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}")) | ||
|
||
with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp: | ||
json.dump(quantization_results, fp, indent=4, ensure_ascii=False) | ||
Comment on lines
+245
to
+250
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above but for quantization |
||
|
||
message = QuantizationMessage( | ||
title, | ||
results=quantization_results, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried not to change this again, but there doesn't seem a better solution.
The problem is that we can't upload the different artifacts to the same
name
(workflow run will fail).