Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datalakestg82 quick query parquet format #3942

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions sdk/storage/azure-storage-blobs/src/rest_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5573,9 +5573,15 @@ namespace Azure { namespace Storage { namespace Blobs {
response.LeaseDuration
= Models::LeaseDurationType(pRawResponse->GetHeaders().at("x-ms-lease-duration"));
}
response.LeaseState = Models::LeaseState(pRawResponse->GetHeaders().at("x-ms-lease-state"));
response.LeaseStatus
= Models::LeaseStatus(pRawResponse->GetHeaders().at("x-ms-lease-status"));
if (pRawResponse->GetHeaders().count("x-ms-lease-state") != 0)
{
response.LeaseState = Models::LeaseState(pRawResponse->GetHeaders().at("x-ms-lease-state"));
}
if (pRawResponse->GetHeaders().count("x-ms-lease-status") != 0)
{
response.LeaseStatus
= Models::LeaseStatus(pRawResponse->GetHeaders().at("x-ms-lease-status"));
}
response.IsServerEncrypted
= pRawResponse->GetHeaders().at("x-ms-server-encrypted") == std::string("true");
return Response<Models::QueryBlobResult>(std::move(response), std::move(pRawResponse));
Expand Down
4 changes: 4 additions & 0 deletions sdk/storage/azure-storage-blobs/swagger/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,10 @@ directive:
delete $[status_code].headers["x-ms-blob-content-md5"];
delete $[status_code].headers["x-ms-content-crc64"];
$[status_code].headers["x-ms-lease-duration"]["x-nullable"] = true;
$[status_code].headers["x-ms-lease-state"]["x-ms-client-default"] = "";
$[status_code].headers["x-ms-lease-state"]["x-nullable"] = true;
$[status_code].headers["x-ms-lease-status"]["x-ms-client-default"] = "";
$[status_code].headers["x-ms-lease-status"]["x-nullable"] = true;
}
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,19 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
const ScheduleFileDeletionOptions& options = ScheduleFileDeletionOptions(),
const Azure::Core::Context& context = Azure::Core::Context()) const;

/**
* @brief Returns the result of a query against the file.
*
* @param querySqlExpression The query expression in SQL.
* @param options Optional parameters to execute this function.
* @param context Context for cancelling long running operations.
* @return A QueryFileResult describing the query result.
*/
Azure::Response<Models::QueryFileResult> Query(
const std::string& querySqlExpression,
const QueryFileOptions& options = QueryFileOptions(),
const Azure::Core::Context& context = Azure::Core::Context()) const;

private:
explicit DataLakeFileClient(
Azure::Core::Url fileUrl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
using PathHttpHeaders = Blobs::Models::BlobHttpHeaders;
using ListFileSystemsIncludeFlags = Blobs::Models::ListBlobContainersIncludeFlags;
using SignedIdentifier = Blobs::Models::SignedIdentifier;
using FileQueryArrowField = Blobs::Models::BlobQueryArrowField;
using FileQueryArrowFieldType = Blobs::Models::BlobQueryArrowFieldType;
} // namespace Models

using DownloadFileToOptions = Blobs::DownloadBlobToOptions;
Expand Down Expand Up @@ -638,4 +640,35 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
using ReleaseLeaseOptions = Blobs::ReleaseLeaseOptions;
using ChangeLeaseOptions = Blobs::ChangeLeaseOptions;

using FileQueryInputTextOptions = Blobs::BlobQueryInputTextOptions;
using FileQueryOutputTextOptions = Blobs::BlobQueryOutputTextOptions;
using FileQueryError = Blobs::BlobQueryError;

/**
* @brief Optional parameters for #Azure::Storage::Files::DataLake::DataLakeFileClient::Query.
*/
struct QueryFileOptions final
{
/**
* @brief Input text configuration.
*/
FileQueryInputTextOptions InputTextConfiguration;
/**
* @brief Output text configuration.
*/
FileQueryOutputTextOptions OutputTextConfiguration;
/**
* @brief Optional conditions that must be met to perform this operation.
*/
PathAccessConditions AccessConditions;
/**
* @brief Callback for progress handling.
*/
std::function<void(int64_t, int64_t)> ProgressHandler;
/**
* @brief Callback for error handling. If you don't specify one, the default will be used, which
* will ignore all non-fatal errors and throw for fatal errors.
*/
std::function<void(FileQueryError)> ErrorHandler;
};
}}}} // namespace Azure::Storage::Files::DataLake
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,44 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
using ScheduleFileDeletionResult = Blobs::Models::SetBlobExpiryResult;
using CopyStatus = Blobs::Models::CopyStatus;

/**
* @brief Response type for #Azure::Storage::Files::DataLake::FileClient::Query.
*/
struct QueryFileResult final
{
std::unique_ptr<Core::IO::BodyStream> BodyStream;
/**
* Returns the date and time the container was last modified. Any operation that modifies the
* file, including an update of the file's metadata or properties, changes the last-modified
* time of the file.
*/
DateTime LastModified;
/**
* The ETag contains a value that you can use to perform operations conditionally. If the
* request version is 2011-08-18 or newer, the ETag value will be in quotes.
*/
Azure::ETag ETag;
/**
* When a file is leased, specifies whether the lease is of infinite or fixed duration.
*/
Nullable<LeaseDurationType> LeaseDuration;
/**
* Lease state of the file.
*/
Models::LeaseState LeaseState;
/**
* The current lease status of the file.
*/
Models::LeaseStatus LeaseStatus;
/**
* The value of this header is set to true if the file data and application metadata are
* completely encrypted using the specified algorithm. Otherwise, the value is set to false
* (when the file is unencrypted, or if only parts of the file/application metadata are
* encrypted).
*/
bool IsServerEncrypted = bool();
};

/**
* @brief The detailed information returned when downloading a file.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,4 +327,33 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
*m_pipeline, m_blobClient.m_blobUrl, protocolLayerOptions, context);
}

Azure::Response<Models::QueryFileResult> DataLakeFileClient::Query(
const std::string& querySqlExpression,
const QueryFileOptions& options,
const Azure::Core::Context& context) const
{
Blobs::QueryBlobOptions blobOptions;
blobOptions.InputTextConfiguration = options.InputTextConfiguration;
blobOptions.OutputTextConfiguration = options.OutputTextConfiguration;
blobOptions.ErrorHandler = options.ErrorHandler;
blobOptions.ProgressHandler = options.ProgressHandler;
blobOptions.AccessConditions.IfMatch = options.AccessConditions.IfMatch;
blobOptions.AccessConditions.IfNoneMatch = options.AccessConditions.IfNoneMatch;
blobOptions.AccessConditions.IfModifiedSince = options.AccessConditions.IfModifiedSince;
blobOptions.AccessConditions.IfUnmodifiedSince = options.AccessConditions.IfUnmodifiedSince;
blobOptions.AccessConditions.LeaseId = options.AccessConditions.LeaseId;
auto response
= m_blobClient.AsBlockBlobClient().Query(querySqlExpression, blobOptions, context);
Models::QueryFileResult ret;
ret.BodyStream = std::move(response.Value.BodyStream);
ret.ETag = std::move(response.Value.ETag);
ret.LastModified = std::move(response.Value.LastModified);
ret.LeaseDuration = std::move(response.Value.LeaseDuration);
ret.LeaseState = std::move(response.Value.LeaseState);
ret.LeaseStatus = std::move(response.Value.LeaseStatus);
ret.IsServerEncrypted = response.Value.IsServerEncrypted;
return Azure::Response<Models::QueryFileResult>(
std::move(ret), std::move(response.RawResponse));
}

}}}} // namespace Azure::Storage::Files::DataLake
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ add_executable (
datalake_directory_client_test.hpp
datalake_file_client_test.cpp
datalake_file_client_test.hpp
datalake_file_query_test.cpp
datalake_file_system_client_test.cpp
datalake_file_system_client_test.hpp
datalake_path_client_test.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace Azure { namespace Storage { namespace Test {
void DataLakeFileClientTest::SetUp()
{
DataLakeFileSystemClientTest::SetUp();
CHECK_SKIP_TEST();
m_fileName = GetFileSystemValidName();
m_fileClient = std::make_shared<Files::DataLake::DataLakeFileClient>(
m_fileSystemClient->GetFileClient(m_fileName));
Expand All @@ -37,6 +38,7 @@ namespace Azure { namespace Storage { namespace Test {

void DataLakeFileClientTest::TearDown()
{
CHECK_SKIP_TEST();
m_fileSystemClient->GetFileClient(m_fileName).Delete();
DataLakeFileSystemClientTest::TearDown();
}
Expand Down
Loading