Skip to content

Commit

Permalink
Make encryption an optional feature
Browse files Browse the repository at this point in the history
  • Loading branch information
adamreeve committed Dec 20, 2024
1 parent 31405dd commit 9030f3a
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 43 deletions.
5 changes: 3 additions & 2 deletions parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ paste = { version = "1.0" }
half = { version = "2.1", default-features = false, features = ["num-traits"] }
sysinfo = { version = "0.32.0", optional = true, default-features = false, features = ["system"] }
crc32fast = { version = "1.4.2", optional = true, default-features = false }
ring = { version = "0.17", default-features = false, features = ["std"]}
ring = { version = "0.17", default-features = false, features = ["std"], optional = true }

[dev-dependencies]
base64 = { version = "0.22", default-features = false, features = ["std"] }
Expand Down Expand Up @@ -121,7 +121,8 @@ zstd = ["dep:zstd", "zstd-sys"]
sysinfo = ["dep:sysinfo"]
# Verify 32-bit CRC checksum when decoding parquet pages
crc = ["dep:crc32fast"]
#encryption = ["aes-gcm", "base64"]
# Enable Parquet modular encryption support
encryption = ["dep:ring"]


[[example]]
Expand Down
44 changes: 33 additions & 11 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ mod filter;
mod selection;
pub mod statistics;

#[cfg(feature = "encryption")]
use crate::encryption::ciphers::{CryptoContext, FileDecryptionProperties};

/// Builder for constructing parquet readers into arrow.
Expand Down Expand Up @@ -383,12 +384,14 @@ impl ArrowReaderMetadata {
pub fn load<T: ChunkReader>(
reader: &T,
options: ArrowReaderOptions,
file_decryption_properties: Option<&FileDecryptionProperties>,
#[cfg(feature = "encryption")] file_decryption_properties: Option<
&FileDecryptionProperties,
>,
) -> Result<Self> {
let metadata = ParquetMetaDataReader::new()
.with_page_indexes(options.page_index)
.with_encryption_properties(file_decryption_properties)
.parse_and_finish(reader)?;
let metadata = ParquetMetaDataReader::new().with_page_indexes(options.page_index);
#[cfg(feature = "encryption")]
let metadata = metadata.with_encryption_properties(file_decryption_properties);
let metadata = metadata.parse_and_finish(reader)?;
Self::try_new(Arc::new(metadata), options)
}

Expand Down Expand Up @@ -534,11 +537,17 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {

/// Create a new [`ParquetRecordBatchReaderBuilder`] with [`ArrowReaderOptions`]
pub fn try_new_with_options(reader: T, options: ArrowReaderOptions) -> Result<Self> {
let metadata = ArrowReaderMetadata::load(&reader, options, None)?;
let metadata = ArrowReaderMetadata::load(
&reader,
options,
#[cfg(feature = "encryption")]
None,
)?;
Ok(Self::new_with_metadata(reader, metadata))
}

/// Create a new [`ParquetRecordBatchReaderBuilder`] with [`ArrowReaderOptions`] and [`FileDecryptionProperties`]
#[cfg(feature = "encryption")]
pub fn try_new_with_decryption(
reader: T,
options: ArrowReaderOptions,
Expand Down Expand Up @@ -694,6 +703,7 @@ impl<T: ChunkReader + 'static> Iterator for ReaderPageIterator<T> {
let total_rows = rg.num_rows() as usize;
let reader = self.reader.clone();

#[cfg(feature = "encryption")]
let crypto_context = if self.metadata.file_decryptor().is_some() {
let file_decryptor = Arc::new(self.metadata.file_decryptor().clone().unwrap());

Expand All @@ -708,8 +718,14 @@ impl<T: ChunkReader + 'static> Iterator for ReaderPageIterator<T> {
None
};

let ret =
SerializedPageReader::new(reader, meta, total_rows, page_locations, crypto_context);
let ret = SerializedPageReader::new(
reader,
meta,
total_rows,
page_locations,
#[cfg(feature = "encryption")]
crypto_context,
);
Some(ret.map(|x| Box::new(x) as _))
}
}
Expand Down Expand Up @@ -824,6 +840,7 @@ impl ParquetRecordBatchReader {
///
/// Note: this is needed when the parquet file is encrypted
// todo: add options or put file_decryption_properties into options
#[cfg(feature = "encryption")]
pub fn try_new_with_decryption<T: ChunkReader + 'static>(
reader: T,
batch_size: usize,
Expand Down Expand Up @@ -993,10 +1010,11 @@ mod tests {
};
use arrow_select::concat::concat_batches;

#[cfg(feature = "encryption")]
use crate::arrow::arrow_reader::ArrowReaderMetadata;
use crate::arrow::arrow_reader::{
ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderMetadata, ArrowReaderOptions,
ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder, RowFilter, RowSelection,
RowSelector,
ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
ParquetRecordBatchReaderBuilder, RowFilter, RowSelection, RowSelector,
};
use crate::arrow::schema::add_encoded_arrow_schema_to_metadata;
use crate::arrow::{ArrowWriter, ProjectionMask};
Expand All @@ -1006,6 +1024,7 @@ mod tests {
BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType,
FloatType, Int32Type, Int64Type, Int96Type,
};
#[cfg(feature = "encryption")]
use crate::encryption::ciphers;
use crate::errors::Result;
use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
Expand Down Expand Up @@ -1716,6 +1735,7 @@ mod tests {
}

#[test]
#[cfg(feature = "encryption")]
fn test_non_uniform_encryption_plaintext_footer() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
Expand Down Expand Up @@ -1766,6 +1786,7 @@ mod tests {
}

#[test]
#[cfg(feature = "encryption")]
fn test_non_uniform_encryption() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
Expand Down Expand Up @@ -1797,6 +1818,7 @@ mod tests {
}

#[test]
#[cfg(feature = "encryption")]
fn test_uniform_encryption() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
Expand Down
4 changes: 4 additions & 0 deletions parquet/src/column/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2027,6 +2027,7 @@ mod tests {
r.rows_written as usize,
None,
Arc::new(props),
#[cfg(feature = "encryption")]
None,
)
.unwrap();
Expand Down Expand Up @@ -2080,6 +2081,7 @@ mod tests {
r.rows_written as usize,
None,
Arc::new(props),
#[cfg(feature = "encryption")]
None,
)
.unwrap();
Expand Down Expand Up @@ -2216,6 +2218,7 @@ mod tests {
r.rows_written as usize,
None,
Arc::new(props),
#[cfg(feature = "encryption")]
None,
)
.unwrap(),
Expand Down Expand Up @@ -3487,6 +3490,7 @@ mod tests {
result.rows_written as usize,
None,
Arc::new(props),
#[cfg(feature = "encryption")]
None,
)
.unwrap(),
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ impl From<object_store::Error> for ParquetError {
}
}

//#[cfg(feature = "encryption")]
#[cfg(feature = "encryption")]
impl From<ring::error::Unspecified> for ParquetError {
fn from(e: ring::error::Unspecified) -> ParquetError {
ParquetError::External(Box::new(e))
Expand Down
9 changes: 7 additions & 2 deletions parquet/src/file/footer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

//! Module for working with Parquet file footers.
#[cfg(feature = "encryption")]
use crate::encryption::ciphers::FileDecryptionProperties;
use crate::errors::Result;
use crate::file::{metadata::*, reader::ChunkReader, FOOTER_SIZE};
Expand Down Expand Up @@ -60,9 +61,13 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_metadata")]
pub fn decode_metadata(
buf: &[u8],
file_decryption_properties: Option<&FileDecryptionProperties>,
#[cfg(feature = "encryption")] file_decryption_properties: Option<&FileDecryptionProperties>,
) -> Result<ParquetMetaData> {
ParquetMetaDataReader::decode_metadata(buf, file_decryption_properties)
ParquetMetaDataReader::decode_metadata(
buf,
#[cfg(feature = "encryption")]
file_decryption_properties,
)
}

/// Decodes the Parquet footer returning the metadata length in bytes
Expand Down
13 changes: 11 additions & 2 deletions parquet/src/file/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ use crate::format::{
};

use crate::basic::{ColumnOrder, Compression, Encoding, Type};
#[cfg(feature = "encryption")]
use crate::encryption::ciphers::FileDecryptor;
use crate::errors::{ParquetError, Result};
pub(crate) use crate::file::metadata::memory::HeapSize;
Expand Down Expand Up @@ -176,6 +177,7 @@ pub struct ParquetMetaData {
/// Offset index for each page in each column chunk
offset_index: Option<ParquetOffsetIndex>,
/// Optional file decryptor
#[cfg(feature = "encryption")]
file_decryptor: Option<FileDecryptor>,
}

Expand All @@ -185,11 +187,12 @@ impl ParquetMetaData {
pub fn new(
file_metadata: FileMetaData,
row_groups: Vec<RowGroupMetaData>,
file_decryptor: Option<FileDecryptor>,
#[cfg(feature = "encryption")] file_decryptor: Option<FileDecryptor>,
) -> Self {
ParquetMetaData {
file_metadata,
row_groups,
#[cfg(feature = "encryption")]
file_decryptor,
column_index: None,
offset_index: None,
Expand Down Expand Up @@ -223,6 +226,7 @@ impl ParquetMetaData {
}

/// Returns file decryptor as reference.
#[cfg(feature = "encryption")]
pub fn file_decryptor(&self) -> &Option<FileDecryptor> {
&self.file_decryptor
}
Expand Down Expand Up @@ -350,7 +354,12 @@ pub struct ParquetMetaDataBuilder(ParquetMetaData);
impl ParquetMetaDataBuilder {
/// Create a new builder from a file metadata, with no row groups
pub fn new(file_meta_data: FileMetaData) -> Self {
Self(ParquetMetaData::new(file_meta_data, vec![], None))
Self(ParquetMetaData::new(
file_meta_data,
vec![],
#[cfg(feature = "encryption")]
None,
))
}

/// Create a new builder from an existing ParquetMetaData
Expand Down
20 changes: 15 additions & 5 deletions parquet/src/file/metadata/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use std::{io::Read, ops::Range, sync::Arc};
use bytes::Bytes;

use crate::basic::ColumnOrder;
#[cfg(feature = "encryption")]
use crate::encryption::ciphers::{
create_footer_aad, BlockDecryptor, FileDecryptionProperties, FileDecryptor,
};
Expand All @@ -29,10 +30,9 @@ use crate::file::page_index::index::Index;
use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index};
use crate::file::reader::ChunkReader;
use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER};
use crate::format::{
ColumnOrder as TColumnOrder, EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData,
FileMetaData as TFileMetaData,
};
use crate::format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
#[cfg(feature = "encryption")]
use crate::format::{EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData};
use crate::schema::types;
use crate::schema::types::SchemaDescriptor;
use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
Expand Down Expand Up @@ -74,6 +74,7 @@ pub struct ParquetMetaDataReader {
// Size of the serialized thrift metadata plus the 8 byte footer. Only set if
// `self.parse_metadata` is called.
metadata_size: Option<usize>,
#[cfg(feature = "encryption")]
file_decryption_properties: Option<FileDecryptionProperties>,
}

Expand Down Expand Up @@ -136,6 +137,7 @@ impl ParquetMetaDataReader {
/// Provide the [`FileDecryptionProperties`] to use when decrypting the file.
///
/// This is only necessary when the file is encrypted.
#[cfg(feature = "encryption")]
pub fn with_encryption_properties(
mut self,
properties: Option<&FileDecryptionProperties>,
Expand Down Expand Up @@ -532,6 +534,7 @@ impl ParquetMetaDataReader {
let start = file_size - footer_metadata_len as u64;
Self::decode_metadata(
chunk_reader.get_bytes(start, metadata_len)?.as_ref(),
#[cfg(feature = "encryption")]
self.file_decryption_properties.as_ref(),
)
}
Expand Down Expand Up @@ -639,12 +642,18 @@ impl ParquetMetaDataReader {
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
pub fn decode_metadata(
buf: &[u8],
file_decryption_properties: Option<&FileDecryptionProperties>,
#[cfg(feature = "encryption")] file_decryption_properties: Option<
&FileDecryptionProperties,
>,
) -> Result<ParquetMetaData> {
let mut prot = TCompactSliceInputProtocol::new(buf);

#[cfg(feature = "encryption")]
let mut file_decryptor = None;
#[cfg(feature = "encryption")]
let decrypted_fmd_buf;

#[cfg(feature = "encryption")]
if file_decryption_properties.is_some()
&& file_decryption_properties.unwrap().has_footer_key()
{
Expand Down Expand Up @@ -708,6 +717,7 @@ impl ParquetMetaDataReader {
Ok(ParquetMetaData::new(
file_metadata,
row_groups,
#[cfg(feature = "encryption")]
file_decryptor,
))
}
Expand Down
Loading

0 comments on commit 9030f3a

Please sign in to comment.