Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix buildah retry mechanism #681

Merged
merged 1 commit into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions iib/workers/tasks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,14 @@ def run_cmd(
raise IIBError(f'{exc_msg.rstrip(".")}: {match.groups()[0]}')
elif cmd[0] == 'buildah':
# Check for HTTP 50X errors on buildah
regex = r'.*(error creating build container).*((?:50[0-9]|125)\s.*$)'
match = _regex_reverse_search(regex, response)
if match:
raise ExternalServiceError(f'{exc_msg}: {": ".join(match.groups()).strip()}')
network_regexes = [
r'.*([e,E]rror:? creating build container).*(:?(50[0-9]|125)\s.*$)',
r'.*(read\/write on closed pipe.*$)',
]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you considered to get regular expressions from config file?
So one needn't deploy new version of IIB just to retry new network issue reported by buildah

Copy link
Contributor Author

@xDaile xDaile Jun 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lipoja what do you think?

IMO those network issues error messages are not changing that fast, so we would need to have that in config.

for regex in network_regexes:
match = _regex_reverse_search(regex, response)
if match:
raise ExternalServiceError(f'{exc_msg}: {": ".join(match.groups()).strip()}')

raise IIBError(exc_msg)

Expand Down
54 changes: 31 additions & 23 deletions tests/test_workers/test_tasks/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,34 +271,42 @@ def test_run_cmd_failed_opm(mock_sub_run):
mock_sub_run.assert_called_once()


@pytest.mark.parametrize(
'expected_exc, subprocess_stderr',
[
(
r'Failed build the index image:.*error creating build container: 503 \(Service Unavailable\)', # noqa: E501
textwrap.dedent(
'''
2021-12-14 08:52:39,144 iib.workers.tasks.utils DEBUG utils.run_cmd Running the command "buildah bud --no-cache --override-arch s390x --arch s390x -t iib-build:56056-s390x -f /tmp/iib-ozo81z6o/index.Dockerfile"
2021-12-14 08:55:10,212 iib.workers.tasks.utils ERROR utils.run_cmd The command "buildah bud --no-cache --override-arch s390x --arch s390x -t iib-build:56056-s390x -f /tmp/iib-ozo81z6o/index.Dockerfile" failed with: Trying to pull registry.redhat.io/openshift4/ose-operator-registry@sha256:72498731bbea4307178f9d0d237bf2a8439bfa8f580f87c35e5a73cb1c854bd6...
Copying blob sha256:27cb39a08c6eb46426e92622c4edea9b9b8495b2401d02c773e239dd40d99a22
error creating build container: reading blob sha256:3224b0f72681ebcfaec3c51b3d7efe187a5cab0355b4bbe6cffadde0d17d2292: Error fetching blob: invalid status code from registry 503 (Service Unavailable)
time="2021-12-14T08:55:10-05:00" level=error msg="exit status 125"
''' # noqa: E501
),
),
(
r'Failed build the index image:.*read/write on closed pipe',
textwrap.dedent(
'''
2024-04-25 15:46:56,754 iib.workers.tasks.utils ForkPoolWorker-1 request-715681 ERROR utils.run_cmd The command "buildah bud --no-cache --format docker --override-arch arm64 --arch arm64 -t iib-build:715681-arm64 -f /tmp/iib-715681-h8oqqbe6/index.Dockerfile" failed with: Trying to pull registry.redhat.io/openshift4/ose-operator-registry@sha256:26ebec42ba8d632ac9e2b7af92eba447c90f1d864d93481ac44d092e003600db...
time="2024-04-25T15:46:56Z" level=error msg="Can't add file /home/iib-worker-cvp-parallel-2/.local/share/containers/storage/overlay/5a5a673222a5bde8d43d28bcab6665b4921a62ec34de97a2f170a1a46774169e/diff/tmp/cache/cache/advanced-cluster-management_release-2.7_advanced-cluster-management.v2.7.2.json to tar: io: read/write on closed pipe"
time="2024-04-25T15:46:56Z" level=error msg="io: read/write on closed pipe"
time="2024-04-25T15:46:56Z" level=error msg="Can't close tar writer: io: read/write on closed pipe"
Error: committing container for step to file: io: read/write on closed pipe
''' # noqa: E501
),
),
],
)
@mock.patch('iib.workers.tasks.utils.subprocess.run')
def test_run_cmd_failed_buildah(mock_sub_run):
def test_run_cmd_failed_buildah(mock_sub_run, expected_exc, subprocess_stderr):
mock_rv = mock.Mock()
mock_rv.returncode = 1
mock_rv.stderr = textwrap.dedent(
'''
2021-12-14 08:52:39,144 iib.workers.tasks.utils DEBUG utils.run_cmd Running the command "buildah bud --no-cache --override-arch s390x --arch s390x -t iib-build:56056-s390x -f /tmp/iib-ozo81z6o/index.Dockerfile"
2021-12-14 08:55:10,212 iib.workers.tasks.utils ERROR utils.run_cmd The command "buildah bud --no-cache --override-arch s390x --arch s390x -t iib-build:56056-s390x -f /tmp/iib-ozo81z6o/index.Dockerfile" failed with: Trying to pull registry.redhat.io/openshift4/ose-operator-registry@sha256:72498731bbea4307178f9d0d237bf2a8439bfa8f580f87c35e5a73cb1c854bd6...
Getting image source signatures
Checking if image destination supports signatures
Copying blob sha256:b381d16488eb8afbbaed78ff48e8b4702b04c236400524dfd2ae759127422edf
Copying blob sha256:27cb39a08c6eb46426e92622c4edea9b9b8495b2401d02c773e239dd40d99a22
Copying blob sha256:3eabe22a2aec9181c0849b1a23a6104a81bcf00bea55a52a45dba613f0afd896
Copying blob sha256:3224b0f72681ebcfaec3c51b3d7efe187a5cab0355b4bbe6cffadde0d17d2292
Copying blob sha256:45ac5acd44f7a277e412330b36e908278d979fa0de30ca0628ef0729f61d825e
Copying blob sha256:45ac5acd44f7a277e412330b36e908278d979fa0de30ca0628ef0729f61d825e
Copying blob sha256:3eabe22a2aec9181c0849b1a23a6104a81bcf00bea55a52a45dba613f0afd896
Copying blob sha256:27cb39a08c6eb46426e92622c4edea9b9b8495b2401d02c773e239dd40d99a22
error creating build container: reading blob sha256:3224b0f72681ebcfaec3c51b3d7efe187a5cab0355b4bbe6cffadde0d17d2292: Error fetching blob: invalid status code from registry 503 (Service Unavailable)
time="2021-12-14T08:55:10-05:00" level=error msg="exit status 125"
''' # noqa: E501
)
mock_rv.stderr = subprocess_stderr
mock_sub_run.return_value = mock_rv

expected_exc = (
r'Failed build the index image: error creating build container: 503 \(Service Unavailable\)'
)

with pytest.raises(ExternalServiceError, match=expected_exc):
utils.run_cmd(
[
Expand Down