Skip to content

Commit

Permalink
CA-395560 Improve logging and error checking on sg_readcap
Browse files Browse the repository at this point in the history
When reading information using sg_readcap, it would be useful in the
event of failure to log what the return code was so that we know what
failure we're dealing with.

Also, per the manual pages for these commands, there are quite a few
more return codes which should be considered retryable, plus a non-zero
success return code which is theoretically possible.

Add a function to gather up the checks and logs for these things and
then wrap a call to that function in a short retry loop.

Signed-off-by: Tim Smith <[email protected]>
  • Loading branch information
Tim Smith authored and MarkSymsCtx committed Feb 19, 2025
1 parent d87b53a commit 25ceda3
Showing 1 changed file with 52 additions and 5 deletions.
57 changes: 52 additions & 5 deletions drivers/scsiutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,15 +681,62 @@ def remove_stale_luns(hostids, lunid, expectedPath, mpath):
" up properly! Error: %s" % str(e))


def sg_return_check(rc, logmsg, stderr):
"""
Return true if the return code indicates success, false if it is not success
but is retryable, and raise a util.SMException if it is not retryable using the
logmessage and stderr provided.
In the event that a delay is desirable before a retry, that delay is
baked into this function.
"""
if rc == 0:
return True
if rc == 2:
# This is "device not ready", so sleep and try again
util.SMlog(f"{logmsg}: not ready")
time.sleep(1)
return False
if rc == 6:
# retry without a wait for "unit attention".
util.SMlog(f"{logmsg}: unit attention")
return False
if rc == 11:
# Aborted command. Retryable without delay
util.SMlog(f"{logmsg}: command aborted")
return False
if rc == 14:
# Sense miscompare. Retryable without delay until proven otherwise
util.SMlog(f"{logmsg}: sense miscompare")
return False
if rc == 21:
# An error was recovered. This is a success but we would not normally
# expect to see it. Log if it happens.
util.SMlog(f"{logmsg}: recovered error: {stderr}")
return True
if rc == 33:
# Timed out. Retryable without delay
util.SMlog(f"{logmsg}: timed out")
return False

raise util.SMException(f"{logmsg}: RC={rc}, STDERR={stderr}")


def sg_readcap(device):
device = os.path.join('/dev', getdev(device))
readcapcommand = ['/usr/bin/sg_readcap', '-b', device]
(rc, stdout, stderr) = util.doexec(readcapcommand)
if rc == 6:
# retry one time for "Capacity data has changed"
attempts = 3
succeeded = False
while attempts > 0:
attempts -= 1
(rc, stdout, stderr) = util.doexec(readcapcommand)
if rc != 0:
raise util.SMException("scsiutil.sg_readcap(%s) failed" % (device))
if sg_return_check(rc, f"scsiutil.sg_readcap({device})", stderr):
succeeded = True
break

if not succeeded:
raise util.SMException(f"scsiutil.sg_readcap({device}): too many failures")

match = re.search('(^|.*\n)(0x[0-9a-fA-F]+) (0x[0-9a-fA-F]+)\n$', stdout)
if not match:
raise util.SMException("scsiutil.sg_readcap(%s) failed to parse: %s"
Expand Down

0 comments on commit 25ceda3

Please sign in to comment.