diff --git a/sdks/python/apache_beam/io/aws/s3io.py b/sdks/python/apache_beam/io/aws/s3io.py index 172a81d2171e..ac0c92f0d372 100644 --- a/sdks/python/apache_beam/io/aws/s3io.py +++ b/sdks/python/apache_beam/io/aws/s3io.py @@ -604,8 +604,13 @@ def _write_to_s3(self, data): raise def finish(self): - - self._write_to_s3(self.buffer) + if len(self.buffer) > 0: + # writes with zero length or mid size files between + # MIN_WRITE_SIZE = 5 * 1024 * 1024 + # MAX_WRITE_SIZE = 5 * 1024 * 1024 * 1024 + # as we will reach this finish() with len(self.buffer) == 0 + # which will fail + self._write_to_s3(self.buffer) if self.last_error is not None: raise self.last_error # pylint: disable=raising-bad-type diff --git a/sdks/python/apache_beam/io/aws/s3io_test.py b/sdks/python/apache_beam/io/aws/s3io_test.py index 20db01614c3a..cbc05ca25d3c 100644 --- a/sdks/python/apache_beam/io/aws/s3io_test.py +++ b/sdks/python/apache_beam/io/aws/s3io_test.py @@ -777,6 +777,22 @@ def test_list_prefix(self): for (object_name, size) in objects: self.aws.delete(self.TEST_DATA_PATH + object_name) + def test_midsize_file(self): + file_name = self.TEST_DATA_PATH + 'midsized' + file_size = 6 * 1024 * 1024 + self._insert_random_file(self.aws.client, file_name, file_size) + with self.aws.open(file_name, 'r') as f: + self.assertEqual(len(f.read()), file_size) + self.aws.delete(file_name) + + def test_zerosize_file(self): + file_name = self.TEST_DATA_PATH + 'zerosized' + file_size = 0 + self._insert_random_file(self.aws.client, file_name, file_size) + with self.aws.open(file_name, 'r') as f: + self.assertEqual(len(f.read()), file_size) + self.aws.delete(file_name) + if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO)