Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Jan 21, 2025
1 parent 6c9f5c5 commit 782cd85
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 50 deletions.
3 changes: 2 additions & 1 deletion parquet/src/arrow/async_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
let mut buf = Vec::with_capacity(metadata_len);
self.take(metadata_len as _).read_to_end(&mut buf).await?;

// todo: use file_decryption_properties
// todo: provide file_decryption_properties
Ok(Arc::new(ParquetMetaDataReader::decode_metadata(
&buf,
footer.encrypted_footer(),
Expand Down Expand Up @@ -971,6 +971,7 @@ impl RowGroups for InMemoryRowGroup<'_> {
// filter out empty offset indexes (old versions specified Some(vec![]) when no present)
.filter(|index| !index.is_empty())
.map(|index| index[i].page_locations.clone());
// todo: provide crypto_context
let page_reader: Box<dyn PageReader> = Box::new(SerializedPageReader::new(
data.clone(),
self.metadata.column(i),
Expand Down
78 changes: 35 additions & 43 deletions parquet/src/file/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,6 @@ pub struct RowGroupMetaData {
ordinal: Option<i16>,
}

// todo:rok

impl RowGroupMetaData {
/// Returns builder for row group metadata.
pub fn builder(schema_descr: SchemaDescPtr) -> RowGroupMetaDataBuilder {
Expand Down Expand Up @@ -646,49 +644,43 @@ impl RowGroupMetaData {
.zip(schema_descr.columns())
.enumerate()
{
let cc;
#[cfg(feature = "encryption")]
if let Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) =
c.crypto_metadata.clone()
{
if c.encrypted_column_metadata.is_none() {
cc = ColumnChunkMetaData::from_thrift(d.clone(), c)?;
} else {
let decryptor = decryptor.unwrap();
let column_name = crypto_metadata.path_in_schema.join(".");
let column_decryptor = decryptor
.get_column_decryptor(column_name.as_bytes())
.footer_decryptor()
.unwrap();

let aad_file_unique = decryptor.aad_file_unique();
let aad_prefix: Vec<u8> = decryptor
.decryption_properties()
.aad_prefix()
.unwrap_or_default();
let column_aad = create_module_aad(
[aad_prefix.as_slice(), aad_file_unique.as_slice()]
.concat()
.as_slice(),
ModuleType::ColumnMetaData,
rg.ordinal.unwrap() as usize,
i as usize,
None,
)?;

let buf = c.encrypted_column_metadata.clone().unwrap();
let decrypted_cc_buf =
column_decryptor.decrypt(buf.as_slice().as_ref(), column_aad.as_ref())?;

let mut prot = TCompactSliceInputProtocol::new(decrypted_cc_buf.as_slice());
c.meta_data = Some(ColumnMetaData::read_from_in_protocol(&mut prot)?);
cc = ColumnChunkMetaData::from_thrift(d.clone(), c)?;
}
} else {
cc = ColumnChunkMetaData::from_thrift(d.clone(), c)?;
if c.encrypted_column_metadata.is_some() {
let decryptor = decryptor.unwrap();
let Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) =
c.crypto_metadata.clone()
else {
todo!()
};
let column_name = crypto_metadata.path_in_schema.join(".");
let column_decryptor = decryptor
.get_column_decryptor(column_name.as_bytes())
.footer_decryptor()
.unwrap();

let aad_file_unique = decryptor.aad_file_unique();
let aad_prefix: Vec<u8> = decryptor
.decryption_properties()
.aad_prefix()
.unwrap_or_default();
let column_aad = create_module_aad(
[aad_prefix.as_slice(), aad_file_unique.as_slice()]
.concat()
.as_slice(),
ModuleType::ColumnMetaData,
rg.ordinal.unwrap() as usize,
i,
None,
)?;

let buf = c.encrypted_column_metadata.clone().unwrap();
let decrypted_cc_buf =
column_decryptor.decrypt(buf.as_slice(), column_aad.as_ref())?;

let mut prot = TCompactSliceInputProtocol::new(decrypted_cc_buf.as_slice());
c.meta_data = Some(ColumnMetaData::read_from_in_protocol(&mut prot)?);
}

columns.push(cc);
columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?);
}
let sorting_columns = rg.sorting_columns;
Ok(RowGroupMetaData {
Expand Down
6 changes: 0 additions & 6 deletions parquet/src/file/metadata/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -736,11 +736,6 @@ impl ParquetMetaDataReader {
}; // todo decr: add support for GCMCTRV1

// todo decr: get key_metadata

// remaining buffer contains encrypted FileMetaData

// todo decr: get aad_prefix
// todo decr: set both aad_prefix and aad_file_unique in file_decryptor
let aad_file_unique = aes_gcm_algo.aad_file_unique.unwrap();
let aad_footer = create_footer_aad(aad_file_unique.as_ref())?;
let aad_prefix: Vec<u8> = aes_gcm_algo.aad_prefix.unwrap_or_default();
Expand All @@ -762,7 +757,6 @@ impl ParquetMetaDataReader {
let schema = types::from_thrift(&t_file_metadata.schema)?;
let schema_descr = Arc::new(SchemaDescriptor::new(schema));

// todo add file decryptor
#[cfg(feature = "encryption")]
if t_file_metadata.encryption_algorithm.is_some() {
let algo = t_file_metadata.encryption_algorithm;
Expand Down

0 comments on commit 782cd85

Please sign in to comment.