Skip to content

Commit

Permalink
Merge pull request #19150 from ClickHouse/backport/21.1/19101
Browse files Browse the repository at this point in the history
Backport #19101 to 21.1: Fix compression codec read for empty files
  • Loading branch information
alesapin authored Jan 16, 2021
2 parents fc3ba24 + 1250bbd commit 5979d6b
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 4 deletions.
20 changes: 16 additions & 4 deletions src/Storages/MergeTree/IMergeTreeDataPart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,14 +549,26 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
auto column_size = getColumnSize(part_column.name, *part_column.type);
if (column_size.data_compressed != 0 && !storage_columns.hasCompressionCodec(part_column.name))
{
String path_to_data_file = getFullRelativePath() + getFileNameForColumn(part_column) + ".bin";
if (!volume->getDisk()->exists(path_to_data_file))
String path_to_data_file;
part_column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
LOG_WARNING(storage.log, "Part's {} column {} has non zero data compressed size, but data file {} doesn't exists", name, backQuoteIfNeed(part_column.name), path_to_data_file);
if (path_to_data_file.empty())
{
String candidate_path = getFullRelativePath() + IDataType::getFileNameForStream(part_column.name, substream_path) + ".bin";

/// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file.
if (volume->getDisk()->exists(candidate_path) && volume->getDisk()->getFileSize(candidate_path) != 0)
path_to_data_file = candidate_path;
}
});

if (path_to_data_file.empty())
{
LOG_WARNING(storage.log, "Part's {} column {} has non zero data compressed size, but all data files don't exist or empty", name, backQuoteIfNeed(part_column.name));
continue;
}

result = getCompressionCodecForFile(volume->getDisk(), getFullRelativePath() + getFileNameForColumn(part_column) + ".bin");
result = getCompressionCodecForFile(volume->getDisk(), path_to_data_file);
break;
}
}
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_compression_codec_read/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#!/usr/bin/env python3
46 changes: 46 additions & 0 deletions tests/integration/test_compression_codec_read/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest

from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry

cluster = ClickHouseCluster(__file__)

node1 = cluster.add_instance('node1', image='yandex/clickhouse-server', tag='20.8.11.17', with_installed_binary=True, stay_alive=True)

@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()

yield cluster
finally:
cluster.shutdown()

def test_default_codec_read(start_cluster):
node1.query("""
CREATE TABLE test_18340
(
`lns` LowCardinality(Nullable(String)),
`ns` Nullable(String),
`s` String,
`ni64` Nullable(Int64),
`ui64` UInt64,
`alns` Array(LowCardinality(Nullable(String))),
`ans` Array(Nullable(String)),
`dt` DateTime,
`i32` Int32
)
ENGINE = MergeTree()
PARTITION BY i32
ORDER BY (s, farmHash64(s))
SAMPLE BY farmHash64(s)
""")

node1.query("insert into test_18340 values ('test', 'test', 'test', 0, 0, ['a'], ['a'], now(), 0)")


assert node1.query("SELECT COUNT() FROM test_18340") == "1\n"

node1.restart_with_latest_version()

assert node1.query("SELECT COUNT() FROM test_18340") == "1\n"

0 comments on commit 5979d6b

Please sign in to comment.