Skip to content

Commit

Permalink
Python API: ramp up read buffer size
Browse files Browse the repository at this point in the history
This will reduce the number of iterations of the read loop and thus
save CPU time (because of the boilerplate code).
  • Loading branch information
fvennetier committed Aug 20, 2021
1 parent 0a7e51b commit 3236495
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
7 changes: 6 additions & 1 deletion oio/api/io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2015-2020 OpenIO SAS, as part of OpenIO SDS
# Copyright (C) 2021 OVH SAS
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
Expand Down Expand Up @@ -267,6 +268,10 @@ def __init__(self, chunk_iter, buf_size, headers,
self.status = None
# buf size indicates the amount we data we yield
self.buf_size = buf_size
if self.buf_size:
self.read_size = itertools.repeat(self.buf_size)
else:
self.read_size = exp_ramp_gen(8192, 1048576)
self.discard_bytes = 0
self.align = align
self.connection_timeout = connection_timeout or CONNECTION_TIMEOUT
Expand Down Expand Up @@ -472,7 +477,7 @@ def iter_from_resp(self, source, parts_iter, part, chunk):
while True:
try:
with green.ChunkReadTimeout(self.read_timeout):
data = part.read(self.buf_size or READ_CHUNK_SIZE)
data = part.read(next(self.read_size))
count += 1
buf += data
except (green.ChunkReadTimeout, IOError) as crto:
Expand Down
4 changes: 2 additions & 2 deletions oio/blob/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2015-2020 OpenIO SAS, as part of OpenIO SDS
# Copyright (C) 2021 OVH SAS
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
Expand Down Expand Up @@ -36,7 +37,6 @@
CONNECTION_TIMEOUT = 10.0
# chunk operations timeout
CHUNK_TIMEOUT = 60.0
READ_BUFFER_SIZE = 65535
PARALLEL_CHUNKS_DELETE = 3


Expand Down Expand Up @@ -172,7 +172,7 @@ def chunk_get(self, url, check_headers=True, **kwargs):
to the chunk's data.
"""
url = self.resolve_url(url)
reader = ChunkReader([{'url': url}], READ_BUFFER_SIZE,
reader = ChunkReader([{'url': url}], None,
**kwargs)
# This must be done now if we want to access headers
stream = reader.stream()
Expand Down
5 changes: 3 additions & 2 deletions oio/common/storage_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2017-2019 OpenIO SAS, as part of OpenIO SDS
# Copyright (C) 2021 OVH SAS
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
Expand All @@ -16,7 +17,7 @@

import random

from oio.api.io import ChunkReader, READ_CHUNK_SIZE
from oio.api.io import ChunkReader
from oio.api.ec import ECChunkDownloadHandler
from oio.common import exceptions as exc
from oio.common.constants import OBJECT_METADATA_PREFIX
Expand Down Expand Up @@ -212,7 +213,7 @@ def fetch_stream(chunks, ranges, storage_method, headers=None,
headers['Range'] = http_header_from_ranges(
(meta_range_dict[pos], ))
reader = ChunkReader(
iter(chunks[pos]), READ_CHUNK_SIZE, headers=headers,
iter(chunks[pos]), None, headers=headers,
**kwargs)
try:
it = reader.get_iter()
Expand Down

0 comments on commit 3236495

Please sign in to comment.