From 323649586a71b0a1f91a91823c00b2475d3b3305 Mon Sep 17 00:00:00 2001 From: Florent Vennetier Date: Fri, 20 Aug 2021 18:33:43 +0200 Subject: [PATCH] Python API: ramp up read buffer size This will reduce the number of iterations of the read loop and thus save CPU time (because of the boilerplate code). --- oio/api/io.py | 7 ++++++- oio/blob/client.py | 4 ++-- oio/common/storage_functions.py | 5 +++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/oio/api/io.py b/oio/api/io.py index bdb5a75afd..5b62d8eb17 100644 --- a/oio/api/io.py +++ b/oio/api/io.py @@ -1,4 +1,5 @@ # Copyright (C) 2015-2020 OpenIO SAS, as part of OpenIO SDS +# Copyright (C) 2021 OVH SAS # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -267,6 +268,10 @@ def __init__(self, chunk_iter, buf_size, headers, self.status = None # buf size indicates the amount we data we yield self.buf_size = buf_size + if self.buf_size: + self.read_size = itertools.repeat(self.buf_size) + else: + self.read_size = exp_ramp_gen(8192, 1048576) self.discard_bytes = 0 self.align = align self.connection_timeout = connection_timeout or CONNECTION_TIMEOUT @@ -472,7 +477,7 @@ def iter_from_resp(self, source, parts_iter, part, chunk): while True: try: with green.ChunkReadTimeout(self.read_timeout): - data = part.read(self.buf_size or READ_CHUNK_SIZE) + data = part.read(next(self.read_size)) count += 1 buf += data except (green.ChunkReadTimeout, IOError) as crto: diff --git a/oio/blob/client.py b/oio/blob/client.py index ce5e02a13f..27de1df550 100644 --- a/oio/blob/client.py +++ b/oio/blob/client.py @@ -1,4 +1,5 @@ # Copyright (C) 2015-2020 OpenIO SAS, as part of OpenIO SDS +# Copyright (C) 2021 OVH SAS # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -36,7 +37,6 @@ CONNECTION_TIMEOUT = 10.0 # chunk operations timeout CHUNK_TIMEOUT = 60.0 -READ_BUFFER_SIZE = 65535 PARALLEL_CHUNKS_DELETE = 3 @@ -172,7 +172,7 @@ def chunk_get(self, url, check_headers=True, **kwargs): to the chunk's data. """ url = self.resolve_url(url) - reader = ChunkReader([{'url': url}], READ_BUFFER_SIZE, + reader = ChunkReader([{'url': url}], None, **kwargs) # This must be done now if we want to access headers stream = reader.stream() diff --git a/oio/common/storage_functions.py b/oio/common/storage_functions.py index b700ffbcf2..77ebb300be 100644 --- a/oio/common/storage_functions.py +++ b/oio/common/storage_functions.py @@ -1,4 +1,5 @@ # Copyright (C) 2017-2019 OpenIO SAS, as part of OpenIO SDS +# Copyright (C) 2021 OVH SAS # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -16,7 +17,7 @@ import random -from oio.api.io import ChunkReader, READ_CHUNK_SIZE +from oio.api.io import ChunkReader from oio.api.ec import ECChunkDownloadHandler from oio.common import exceptions as exc from oio.common.constants import OBJECT_METADATA_PREFIX @@ -212,7 +213,7 @@ def fetch_stream(chunks, ranges, storage_method, headers=None, headers['Range'] = http_header_from_ranges( (meta_range_dict[pos], )) reader = ChunkReader( - iter(chunks[pos]), READ_CHUNK_SIZE, headers=headers, + iter(chunks[pos]), None, headers=headers, **kwargs) try: it = reader.get_iter()