Skip to content

Commit

Permalink
GH-2976: Parquet CLI compression commands should accept lowercase com…
Browse files Browse the repository at this point in the history
…pression name (#2977)
  • Loading branch information
pan3793 authored Aug 7, 2024
1 parent 26febde commit fd82995
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.cli.BaseCommand;
import org.apache.parquet.cli.util.Codecs;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.hadoop.rewrite.MaskMode;
import org.apache.parquet.hadoop.rewrite.ParquetRewriter;
Expand Down Expand Up @@ -114,7 +115,7 @@ private RewriteOptions buildOptionsOrFail() throws IOException {
}

if (codec != null) {
CompressionCodecName codecName = CompressionCodecName.valueOf(codec);
CompressionCodecName codecName = Codecs.parquetCodec(codec);
builder.transform(codecName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.parquet.HadoopReadOptions;
import org.apache.parquet.cli.BaseCommand;
import org.apache.parquet.cli.util.Codecs;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
Expand Down Expand Up @@ -76,7 +77,7 @@ public int run() throws IOException {

Path inPath = new Path(input);
Path outPath = new Path(output);
CompressionCodecName codecName = CompressionCodecName.valueOf(codec);
CompressionCodecName codecName = Codecs.parquetCodec(codec);

ParquetMetadata metaData = ParquetFileReader.readFooter(getConf(), inPath, NO_FILTER);
MessageType schema = metaData.getFileMetaData().getSchema();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,32 @@ public void testRewriteCommandWithOverwrite() throws IOException {
Assert.assertEquals(0, command.run());
Assert.assertTrue(output.exists());
}

@Test
public void testRewriteCommandWithCompression_GZIP() throws IOException {
File file = parquetFile();
RewriteCommand command = new RewriteCommand(createLogger());
command.inputs = Arrays.asList(file.getAbsolutePath());
File output = new File(getTempFolder(), "converted-1.GZIP.parquet");
command.output = output.getAbsolutePath();
command.codec = "GZIP";
command.setConf(new Configuration());

Assert.assertEquals(0, command.run());
Assert.assertTrue(output.exists());
}

@Test
public void testRewriteCommandWithCompression_gzip() throws IOException {
File file = parquetFile();
RewriteCommand command = new RewriteCommand(createLogger());
command.inputs = Arrays.asList(file.getAbsolutePath());
File output = new File(getTempFolder(), "converted-2.gzip.parquet");
command.output = output.getAbsolutePath();
command.codec = "gzip";
command.setConf(new Configuration());

Assert.assertEquals(0, command.run());
Assert.assertTrue(output.exists());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,32 @@
public class TransCompressionCommandTest extends ParquetFileTest {

@Test
public void testTransCompressionCommand() throws IOException {
public void testTransCompressionCommand_ZSTD() throws IOException {
TransCompressionCommand command = new TransCompressionCommand(createLogger());

command.input = parquetFile().getAbsolutePath();

File output = new File(getTempFolder(), getClass().getSimpleName() + ".converted.parquet");
File output = new File(getTempFolder(), getClass().getSimpleName() + ".converted-1.ZSTD.parquet");
command.output = output.getAbsolutePath();
command.codec = "ZSTD";
command.setConf(new Configuration());

Assert.assertEquals(0, command.run());
Assert.assertTrue(output.exists());
}

@Test
public void testTransCompressionCommand_zstd() throws IOException {
TransCompressionCommand command = new TransCompressionCommand(createLogger());

command.input = parquetFile().getAbsolutePath();

File output = new File(getTempFolder(), getClass().getSimpleName() + ".converted-2.zstd.parquet");
command.output = output.getAbsolutePath();
command.codec = "zstd";
command.setConf(new Configuration());

Assert.assertEquals(0, command.run());
Assert.assertTrue(output.exists());
}
}

0 comments on commit fd82995

Please sign in to comment.