Skip to content

Commit

Permalink
Updates readline logic for azure to match s3
Browse files Browse the repository at this point in the history
Loosely copies the readline buffer management from s3 to azure,
improving performance.
  • Loading branch information
quantumfusion committed Jul 2, 2024
1 parent 5a82613 commit c34aa5c
Showing 1 changed file with 12 additions and 14 deletions.
26 changes: 12 additions & 14 deletions smart_open/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,24 +325,22 @@ def readline(self, limit=-1):
"""Read up to and including the next newline. Returns the bytes read."""
if limit != -1:
raise NotImplementedError('limits other than -1 not implemented yet')
the_line = io.BytesIO()

#
# A single line may span multiple buffers.
#
line = io.BytesIO()
while not (self._position == self._size and len(self._current_part) == 0):
#
# In the worst case, we're reading the unread part of self._current_part
# twice here, once in the if condition and once when calling index.
#
# This is sub-optimal, but better than the alternative: wrapping
# .index in a try..except, because that is slower.
#
remaining_buffer = self._current_part.peek()
if self._line_terminator in remaining_buffer:
next_newline = remaining_buffer.index(self._line_terminator)
the_line.write(self._read_from_buffer(next_newline + 1))
line_part = self._current_part.readline(self._line_terminator)
line.write(line_part)
self._position += len(line_part)

if line_part.endswith(self._line_terminator):
break
else:
the_line.write(self._read_from_buffer())
self._fill_buffer()
return the_line.getvalue()

return line.getvalue()

#
# Internal methods.
Expand Down

0 comments on commit c34aa5c

Please sign in to comment.