Skip to content

Commit

Permalink
bpo-43785: Improve BZ2File performance by removing RLock (pythonGH-25299
Browse files Browse the repository at this point in the history
)

Remove `RLock` from `BZ2File`. It makes `BZ2File` to thread unsafe, but
gzip and lzma don't use it too.

Co-authored-by: Gregory P. Smith <[email protected]>
  • Loading branch information
methane and gpshead authored Apr 12, 2021
1 parent 553ee27 commit cc2ffcd
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 59 deletions.
103 changes: 44 additions & 59 deletions Lib/bz2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import io
import os
import _compression
from threading import RLock

from _bz2 import BZ2Compressor, BZ2Decompressor

Expand Down Expand Up @@ -53,9 +52,6 @@ def __init__(self, filename, mode="r", *, compresslevel=9):
If mode is 'r', the input file may be the concatenation of
multiple compressed streams.
"""
# This lock must be recursive, so that BufferedIOBase's
# writelines() does not deadlock.
self._lock = RLock()
self._fp = None
self._closefp = False
self._mode = _MODE_CLOSED
Expand Down Expand Up @@ -104,24 +100,23 @@ def close(self):
May be called more than once without error. Once the file is
closed, any other operation on it will raise a ValueError.
"""
with self._lock:
if self._mode == _MODE_CLOSED:
return
if self._mode == _MODE_CLOSED:
return
try:
if self._mode == _MODE_READ:
self._buffer.close()
elif self._mode == _MODE_WRITE:
self._fp.write(self._compressor.flush())
self._compressor = None
finally:
try:
if self._mode == _MODE_READ:
self._buffer.close()
elif self._mode == _MODE_WRITE:
self._fp.write(self._compressor.flush())
self._compressor = None
if self._closefp:
self._fp.close()
finally:
try:
if self._closefp:
self._fp.close()
finally:
self._fp = None
self._closefp = False
self._mode = _MODE_CLOSED
self._buffer = None
self._fp = None
self._closefp = False
self._mode = _MODE_CLOSED
self._buffer = None

@property
def closed(self):
Expand Down Expand Up @@ -153,22 +148,20 @@ def peek(self, n=0):
Always returns at least one byte of data, unless at EOF.
The exact number of bytes returned is unspecified.
"""
with self._lock:
self._check_can_read()
# Relies on the undocumented fact that BufferedReader.peek()
# always returns at least one byte (except at EOF), independent
# of the value of n
return self._buffer.peek(n)
self._check_can_read()
# Relies on the undocumented fact that BufferedReader.peek()
# always returns at least one byte (except at EOF), independent
# of the value of n
return self._buffer.peek(n)

def read(self, size=-1):
"""Read up to size uncompressed bytes from the file.
If size is negative or omitted, read until EOF is reached.
Returns b'' if the file is already at EOF.
"""
with self._lock:
self._check_can_read()
return self._buffer.read(size)
self._check_can_read()
return self._buffer.read(size)

def read1(self, size=-1):
"""Read up to size uncompressed bytes, while trying to avoid
Expand All @@ -177,20 +170,18 @@ def read1(self, size=-1):
Returns b'' if the file is at EOF.
"""
with self._lock:
self._check_can_read()
if size < 0:
size = io.DEFAULT_BUFFER_SIZE
return self._buffer.read1(size)
self._check_can_read()
if size < 0:
size = io.DEFAULT_BUFFER_SIZE
return self._buffer.read1(size)

def readinto(self, b):
"""Read bytes into b.
Returns the number of bytes read (0 for EOF).
"""
with self._lock:
self._check_can_read()
return self._buffer.readinto(b)
self._check_can_read()
return self._buffer.readinto(b)

def readline(self, size=-1):
"""Read a line of uncompressed bytes from the file.
Expand All @@ -203,9 +194,8 @@ def readline(self, size=-1):
if not hasattr(size, "__index__"):
raise TypeError("Integer argument expected")
size = size.__index__()
with self._lock:
self._check_can_read()
return self._buffer.readline(size)
self._check_can_read()
return self._buffer.readline(size)

def readlines(self, size=-1):
"""Read a list of lines of uncompressed bytes from the file.
Expand All @@ -218,9 +208,8 @@ def readlines(self, size=-1):
if not hasattr(size, "__index__"):
raise TypeError("Integer argument expected")
size = size.__index__()
with self._lock:
self._check_can_read()
return self._buffer.readlines(size)
self._check_can_read()
return self._buffer.readlines(size)

def write(self, data):
"""Write a byte string to the file.
Expand All @@ -229,12 +218,11 @@ def write(self, data):
always len(data). Note that due to buffering, the file on disk
may not reflect the data written until close() is called.
"""
with self._lock:
self._check_can_write()
compressed = self._compressor.compress(data)
self._fp.write(compressed)
self._pos += len(data)
return len(data)
self._check_can_write()
compressed = self._compressor.compress(data)
self._fp.write(compressed)
self._pos += len(data)
return len(data)

def writelines(self, seq):
"""Write a sequence of byte strings to the file.
Expand All @@ -244,8 +232,7 @@ def writelines(self, seq):
Line separators are not added between the written byte strings.
"""
with self._lock:
return _compression.BaseStream.writelines(self, seq)
return _compression.BaseStream.writelines(self, seq)

def seek(self, offset, whence=io.SEEK_SET):
"""Change the file position.
Expand All @@ -262,17 +249,15 @@ def seek(self, offset, whence=io.SEEK_SET):
Note that seeking is emulated, so depending on the parameters,
this operation may be extremely slow.
"""
with self._lock:
self._check_can_seek()
return self._buffer.seek(offset, whence)
self._check_can_seek()
return self._buffer.seek(offset, whence)

def tell(self):
"""Return the current file position."""
with self._lock:
self._check_not_closed()
if self._mode == _MODE_READ:
return self._buffer.tell()
return self._pos
self._check_not_closed()
if self._mode == _MODE_READ:
return self._buffer.tell()
return self._pos


def open(filename, mode="rb", compresslevel=9,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Improve ``bz2.BZ2File`` performance by removing the RLock from BZ2File.
This makes BZ2File thread unsafe in the face of multiple simultaneous
readers or writers, just like its equivalent classes in :mod:`gzip` and
:mod:`lzma` have always been. Patch by Inada Naoki.

0 comments on commit cc2ffcd

Please sign in to comment.