Skip to content

Commit

Permalink
Datalakestg82 quick query parquet format (#3942)
Browse files Browse the repository at this point in the history
  • Loading branch information
microzchang authored Sep 21, 2022
1 parent 44270c0 commit ca567ee
Show file tree
Hide file tree
Showing 9 changed files with 559 additions and 3 deletions.
12 changes: 9 additions & 3 deletions sdk/storage/azure-storage-blobs/src/rest_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5573,9 +5573,15 @@ namespace Azure { namespace Storage { namespace Blobs {
response.LeaseDuration
= Models::LeaseDurationType(pRawResponse->GetHeaders().at("x-ms-lease-duration"));
}
response.LeaseState = Models::LeaseState(pRawResponse->GetHeaders().at("x-ms-lease-state"));
response.LeaseStatus
= Models::LeaseStatus(pRawResponse->GetHeaders().at("x-ms-lease-status"));
if (pRawResponse->GetHeaders().count("x-ms-lease-state") != 0)
{
response.LeaseState = Models::LeaseState(pRawResponse->GetHeaders().at("x-ms-lease-state"));
}
if (pRawResponse->GetHeaders().count("x-ms-lease-status") != 0)
{
response.LeaseStatus
= Models::LeaseStatus(pRawResponse->GetHeaders().at("x-ms-lease-status"));
}
response.IsServerEncrypted
= pRawResponse->GetHeaders().at("x-ms-server-encrypted") == std::string("true");
return Response<Models::QueryBlobResult>(std::move(response), std::move(pRawResponse));
Expand Down
4 changes: 4 additions & 0 deletions sdk/storage/azure-storage-blobs/swagger/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,10 @@ directive:
delete $[status_code].headers["x-ms-blob-content-md5"];
delete $[status_code].headers["x-ms-content-crc64"];
$[status_code].headers["x-ms-lease-duration"]["x-nullable"] = true;
$[status_code].headers["x-ms-lease-state"]["x-ms-client-default"] = "";
$[status_code].headers["x-ms-lease-state"]["x-nullable"] = true;
$[status_code].headers["x-ms-lease-status"]["x-ms-client-default"] = "";
$[status_code].headers["x-ms-lease-status"]["x-nullable"] = true;
}
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,19 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
const ScheduleFileDeletionOptions& options = ScheduleFileDeletionOptions(),
const Azure::Core::Context& context = Azure::Core::Context()) const;

/**
* @brief Returns the result of a query against the file.
*
* @param querySqlExpression The query expression in SQL.
* @param options Optional parameters to execute this function.
* @param context Context for cancelling long running operations.
* @return A QueryFileResult describing the query result.
*/
Azure::Response<Models::QueryFileResult> Query(
const std::string& querySqlExpression,
const QueryFileOptions& options = QueryFileOptions(),
const Azure::Core::Context& context = Azure::Core::Context()) const;

private:
explicit DataLakeFileClient(
Azure::Core::Url fileUrl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
using PathHttpHeaders = Blobs::Models::BlobHttpHeaders;
using ListFileSystemsIncludeFlags = Blobs::Models::ListBlobContainersIncludeFlags;
using SignedIdentifier = Blobs::Models::SignedIdentifier;
using FileQueryArrowField = Blobs::Models::BlobQueryArrowField;
using FileQueryArrowFieldType = Blobs::Models::BlobQueryArrowFieldType;
} // namespace Models

using DownloadFileToOptions = Blobs::DownloadBlobToOptions;
Expand Down Expand Up @@ -638,4 +640,35 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
using ReleaseLeaseOptions = Blobs::ReleaseLeaseOptions;
using ChangeLeaseOptions = Blobs::ChangeLeaseOptions;

using FileQueryInputTextOptions = Blobs::BlobQueryInputTextOptions;
using FileQueryOutputTextOptions = Blobs::BlobQueryOutputTextOptions;
using FileQueryError = Blobs::BlobQueryError;

/**
* @brief Optional parameters for #Azure::Storage::Files::DataLake::DataLakeFileClient::Query.
*/
struct QueryFileOptions final
{
/**
* @brief Input text configuration.
*/
FileQueryInputTextOptions InputTextConfiguration;
/**
* @brief Output text configuration.
*/
FileQueryOutputTextOptions OutputTextConfiguration;
/**
* @brief Optional conditions that must be met to perform this operation.
*/
PathAccessConditions AccessConditions;
/**
* @brief Callback for progress handling.
*/
std::function<void(int64_t, int64_t)> ProgressHandler;
/**
* @brief Callback for error handling. If you don't specify one, the default will be used, which
* will ignore all non-fatal errors and throw for fatal errors.
*/
std::function<void(FileQueryError)> ErrorHandler;
};
}}}} // namespace Azure::Storage::Files::DataLake
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,44 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
using ScheduleFileDeletionResult = Blobs::Models::SetBlobExpiryResult;
using CopyStatus = Blobs::Models::CopyStatus;

/**
* @brief Response type for #Azure::Storage::Files::DataLake::FileClient::Query.
*/
struct QueryFileResult final
{
std::unique_ptr<Core::IO::BodyStream> BodyStream;
/**
* Returns the date and time the container was last modified. Any operation that modifies the
* file, including an update of the file's metadata or properties, changes the last-modified
* time of the file.
*/
DateTime LastModified;
/**
* The ETag contains a value that you can use to perform operations conditionally. If the
* request version is 2011-08-18 or newer, the ETag value will be in quotes.
*/
Azure::ETag ETag;
/**
* When a file is leased, specifies whether the lease is of infinite or fixed duration.
*/
Nullable<LeaseDurationType> LeaseDuration;
/**
* Lease state of the file.
*/
Models::LeaseState LeaseState;
/**
* The current lease status of the file.
*/
Models::LeaseStatus LeaseStatus;
/**
* The value of this header is set to true if the file data and application metadata are
* completely encrypted using the specified algorithm. Otherwise, the value is set to false
* (when the file is unencrypted, or if only parts of the file/application metadata are
* encrypted).
*/
bool IsServerEncrypted = bool();
};

/**
* @brief The detailed information returned when downloading a file.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,4 +327,33 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
*m_pipeline, m_blobClient.m_blobUrl, protocolLayerOptions, context);
}

Azure::Response<Models::QueryFileResult> DataLakeFileClient::Query(
const std::string& querySqlExpression,
const QueryFileOptions& options,
const Azure::Core::Context& context) const
{
Blobs::QueryBlobOptions blobOptions;
blobOptions.InputTextConfiguration = options.InputTextConfiguration;
blobOptions.OutputTextConfiguration = options.OutputTextConfiguration;
blobOptions.ErrorHandler = options.ErrorHandler;
blobOptions.ProgressHandler = options.ProgressHandler;
blobOptions.AccessConditions.IfMatch = options.AccessConditions.IfMatch;
blobOptions.AccessConditions.IfNoneMatch = options.AccessConditions.IfNoneMatch;
blobOptions.AccessConditions.IfModifiedSince = options.AccessConditions.IfModifiedSince;
blobOptions.AccessConditions.IfUnmodifiedSince = options.AccessConditions.IfUnmodifiedSince;
blobOptions.AccessConditions.LeaseId = options.AccessConditions.LeaseId;
auto response
= m_blobClient.AsBlockBlobClient().Query(querySqlExpression, blobOptions, context);
Models::QueryFileResult ret;
ret.BodyStream = std::move(response.Value.BodyStream);
ret.ETag = std::move(response.Value.ETag);
ret.LastModified = std::move(response.Value.LastModified);
ret.LeaseDuration = std::move(response.Value.LeaseDuration);
ret.LeaseState = std::move(response.Value.LeaseState);
ret.LeaseStatus = std::move(response.Value.LeaseStatus);
ret.IsServerEncrypted = response.Value.IsServerEncrypted;
return Azure::Response<Models::QueryFileResult>(
std::move(ret), std::move(response.RawResponse));
}

}}}} // namespace Azure::Storage::Files::DataLake
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ add_executable (
datalake_directory_client_test.hpp
datalake_file_client_test.cpp
datalake_file_client_test.hpp
datalake_file_query_test.cpp
datalake_file_system_client_test.cpp
datalake_file_system_client_test.hpp
datalake_path_client_test.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace Azure { namespace Storage { namespace Test {
void DataLakeFileClientTest::SetUp()
{
DataLakeFileSystemClientTest::SetUp();
CHECK_SKIP_TEST();
m_fileName = GetFileSystemValidName();
m_fileClient = std::make_shared<Files::DataLake::DataLakeFileClient>(
m_fileSystemClient->GetFileClient(m_fileName));
Expand All @@ -37,6 +38,7 @@ namespace Azure { namespace Storage { namespace Test {

void DataLakeFileClientTest::TearDown()
{
CHECK_SKIP_TEST();
m_fileSystemClient->GetFileClient(m_fileName).Delete();
DataLakeFileSystemClientTest::TearDown();
}
Expand Down
Loading

0 comments on commit ca567ee

Please sign in to comment.