Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for POST + other improvements of the server #657

Merged
merged 8 commits into from
Jul 4, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ if [ ${REBUILD_THE_INDEX} == "YES" ] || ! [ -f "${INDEX}.vocabulary" ]; then
popd
fi

# Launch the Server using the freshly baked index. Can't simply use a subshell here because
# then we can't easily get the SERVER_PID out of that subshell
# Launch the Server using the freshly baked index. Can't simply use a subshell
# here because then we can't easily get the SERVER_PID out of that subshell
pushd "$BINARY_DIR"
echo "Launching server from path $(pwd)"
./ServerMain -i "$INDEX" -p 9099 -m 1 -t &> server_log.txt &
Expand Down
7 changes: 5 additions & 2 deletions src/ServerMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ int main(int argc, char** argv) {
// filled / set depending on the options.
using ad_utility::NonNegative;

string indexBasename;
std::string indexBasename;
std::string accessToken;
bool text = false;
int port;
NonNegative numSimultaneousQueries = 1;
Expand All @@ -62,6 +63,8 @@ int main(int argc, char** argv) {
"The basename of the index files (required).");
add("port,p", po::value<int>(&port)->required(),
"The port on which HTTP requests are served (required).");
add("access-token,a", po::value<std::string>(&accessToken)->default_value(""),
"Access token for restricted API calls (default: no access).");
add("num-simultaneous-queries,j",
po::value<NonNegative>(&numSimultaneousQueries)->default_value(1),
"The number of queries that can be processed simultaneously.");
Expand Down Expand Up @@ -123,7 +126,7 @@ int main(int argc, char** argv) {

try {
Server server(port, static_cast<int>(numSimultaneousQueries),
memoryMaxSizeGb);
memoryMaxSizeGb, std::move(accessToken));
server.run(indexBasename, text, !noPatterns, !noPatternTrick,
!onlyPsoAndPosPermutations);
} catch (const std::exception& e) {
Expand Down
18 changes: 9 additions & 9 deletions src/engine/Operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,25 +119,25 @@ shared_ptr<const ResultTable> Operation::getResult(bool isRoot) {
// A child Operation was aborted, do not print the information again.
throw;
} catch (const ad_utility::WaitedForResultWhichThenFailedException& e) {
// Here and in the following, show the detailed information (it's the
// runtime info) only in the DEBUG log. Note that the exception will be
// caught by the `processQuery` method, where the error message will be
// printed *and* included in an error response sent to the client.
LOG(ERROR) << "Waited for a result from another thread which then failed"
<< endl;
LOG(ERROR) << asString();
LOG(DEBUG) << asString();
throw ad_semsearch::AbortException(e);
} catch (const std::exception& e) {
// We are in the innermost level of the exception, so print
LOG(ERROR) << "Aborted Operation:" << endl;
LOG(ERROR) << asString() << endl;
LOG(ERROR) << e.what() << endl;
LOG(ERROR) << "Aborted Operation" << endl;
LOG(DEBUG) << asString() << endl;
// Rethrow as QUERY_ABORTED allowing us to print the Operation
// only at innermost failure of a recursive call
throw ad_semsearch::AbortException(e);
} catch (...) {
// We are in the innermost level of the exception, so print
LOG(ERROR) << "Aborted Operation:" << endl;
LOG(ERROR) << asString() << endl;
LOG(ERROR)
<< "Unexpected exception that is not a subclass of std::exception"
<< endl;
LOG(ERROR) << "Aborted Operation" << endl;
LOG(DEBUG) << asString() << endl;
// Rethrow as QUERY_ABORTED allowing us to print the Operation
// only at innermost failure of a recursive call
throw ad_semsearch::AbortException(
Expand Down
303 changes: 216 additions & 87 deletions src/engine/Server.cpp

Large diffs are not rendered by default.

19 changes: 14 additions & 5 deletions src/engine/Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ using ad_utility::Socket;
//! The HTTP Server used.
class Server {
public:
explicit Server(const int port, const int numThreads, size_t maxMemGB)
explicit Server(const int port, const int numThreads, size_t maxMemGB,
std::string accessToken)
: _numThreads(numThreads),
_port(port),
accessToken_(accessToken),
_allocator{ad_utility::makeAllocationMemoryLeftThreadsafeObject(
maxMemGB * (1ull << 30u)),
[this](size_t numBytesToAllocate) {
Expand Down Expand Up @@ -90,6 +92,7 @@ class Server {
private:
const int _numThreads;
int _port;
std::string accessToken_;
QueryResultCache _cache;
ad_utility::AllocatorWithLimit<Id> _allocator;
SortPerformanceEstimator _sortPerformanceEstimator;
Expand All @@ -106,14 +109,19 @@ class Server {
template <typename T>
using Awaitable = boost::asio::awaitable<T>;

/// Parse the path and URL parameters from the given request. Supports both
/// GET and POST request according to the SPARQL 1.1 standard.
ad_utility::UrlParser::UrlPathAndParameters getUrlPathAndParameters(
const ad_utility::httpUtils::HttpRequest auto& request);

/// Handle a single HTTP request. Check whether a file request or a query was
/// sent, and dispatch to functions handling these cases. This function
/// requires the constraints for the `HttpHandler` in `HttpServer.h`.
/// \param req The HTTP request.
/// \param send The action that sends a http:response. (see the
/// `HttpServer.h` for documentation).
Awaitable<void> process(const ad_utility::httpUtils::HttpRequest auto& req,
auto&& send);
Awaitable<void> process(
const ad_utility::httpUtils::HttpRequest auto& request, auto&& send);

/// Handle a http request that asks for the processing of a query.
/// \param params The key-value-pairs sent in the HTTP GET request. When this
Expand Down Expand Up @@ -141,8 +149,9 @@ class Server {
Awaitable<ad_utility::streams::stream_generator> composeResponseSepValues(
const ParsedQuery& query, const QueryExecutionTree& qet) const;

static json composeExceptionJson(const string& query, const std::exception& e,
ad_utility::Timer& requestTimer);
static json composeErrorResponseJson(const string& query,
const std::string& errorMsg,
ad_utility::Timer& requestTimer);

static ad_utility::streams::stream_generator composeTurtleResponse(
const ParsedQuery& query, const QueryExecutionTree& qet);
Expand Down
34 changes: 17 additions & 17 deletions src/util/HttpServer/HttpUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,10 @@ static auto createJsonResponse(const json& j, const auto& request,
/// Create a HttpResponse with status 404 Not Found. The string body will be a
/// default message including the name of the file that was not found, which can
/// be read from the request directly.
static auto createNotFoundResponse(const HttpRequest auto& request) {
return createHttpResponseFromString("Resource \"" +
std::string(request.target()) +
"\" was not found on this server",
http::status::not_found, request);
static auto createNotFoundResponse(const std::string& errorMsg,
const HttpRequest auto& request) {
return createHttpResponseFromString(errorMsg, http::status::not_found,
request);
}

/// Create a HttpResponse with status 400 Bad Request.
Expand Down Expand Up @@ -206,27 +205,25 @@ boost::asio::awaitable<void> makeFileServerImpl(
// Make sure we can handle the method
if (request.method() != http::verb::get &&
request.method() != http::verb::head) {
co_await send(createBadRequestResponse(
"Unknown HTTP-method, only GET and HEAD requests are supported",
request));
co_return;
throw std::runtime_error(
"When serving files, only GET and HEAD requests are supported");
}

// Decode the path and check that it is absolute and contains no "..".
auto urlPath =
ad_utility::UrlParser::getDecodedPathAndCheck(request.target());
if (!urlPath.has_value()) {
co_await send(createBadRequestResponse(
"Invalid url path \"" + std::string{request.target()} + '"', request));
co_return;
throw std::runtime_error(
absl::StrCat("Invalid URL path \"", request.target(), "\""));
}

// Check if the target is in the whitelist. The `target()` starts with a
// slash, entries in the whitelist don't.
auto urlPathWithFirstCharRemoved = urlPath.value().substr(1);
if (whitelist.has_value() &&
!whitelist.value().contains(urlPath.value().substr(1))) {
co_await send(createNotFoundResponse(request));
co_return;
!whitelist.value().contains(urlPathWithFirstCharRemoved)) {
throw std::runtime_error(absl::StrCat(
"Resource \"", urlPathWithFirstCharRemoved, "\" not in whitelist"));
}

// Build the path to the requested file on the file system.
Expand All @@ -240,12 +237,15 @@ boost::asio::awaitable<void> makeFileServerImpl(

// Handle the case where the file doesn't exist.
if (errorCode == beast::errc::no_such_file_or_directory) {
co_await send(createNotFoundResponse(request));
co_return;
std::string errorMsg =
absl::StrCat("Resource \"", request.target(), "\" not found");
LOG(ERROR) << errorMsg << std::endl;
co_return co_await send(createNotFoundResponse(errorMsg, request));
}

// Handle an unknown error.
if (errorCode) {
LOG(ERROR) << errorCode.message() << std::endl;
co_return co_await send(
createServerErrorResponse(errorCode.message(), request));
}
Expand Down
60 changes: 40 additions & 20 deletions src/util/HttpServer/UrlParser.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@


// Copyright 2021, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Authors: Johannes Kalmbach <[email protected]>
// Hannah Bast <[email protected]>

#include "UrlParser.h"

#include "../Exception.h"

using namespace ad_utility;

using std::string;
// _____________________________________________________________________________
string UrlParser::applyPercentDecoding(std::string_view url) {
string decoded;
std::string UrlParser::applyPercentDecoding(std::string_view url,
bool urlDecode) {
// If not decoding wanted, just convert to `std::string`.
if (urlDecode == false) {
return std::string{url};
}
// Otherwise resolve all %XX.
std::string decoded;
for (size_t i = 0; i < url.size(); ++i) {
if (url[i] == '+') {
decoded += ' ';
Expand Down Expand Up @@ -45,27 +50,42 @@ string UrlParser::applyPercentDecoding(std::string_view url) {
}

// ___________________________________________________________________________
UrlParser::UrlTarget UrlParser::parseTarget(std::string_view target) {
static constexpr auto npos = std::string_view::npos;
UrlTarget result;
UrlParser::UrlPathAndParameters UrlParser::parseGetRequestTarget(
std::string_view target, bool urlDecode) {
UrlPathAndParameters result;

target = target.substr(0, target.find('#'));
// Remove everything after # (including it). Does nothing if there is no #.
// Don't do this is `urlDecode == false` because in that case, the given
// string contains an unencode SPARQL query, which frequently contains a # as
// a regular character.
if (urlDecode == true) {
target = target.substr(0, target.find('#'));
}

// Set `_path` and remove it from `target`. If there is no query string (part
// starting with "?"), we are done at this point.
size_t index = target.find('?');
result._target = target.substr(0, index);
if (index == npos) {
result._path = target.substr(0, index);
if (index == std::string::npos) {
return result;
}
target.remove_prefix(index + 1);

// Parse the query string and store the result in a hash map. Throw an error
// if the same key appears twice in the query string. Note that this excludes
// having two "cmd=..." parameters, although that would be meaningful (though
// not necessary) to support.
while (true) {
auto next = target.find('&');
auto paramAndValue = parseSingleKeyValuePair(target.substr(0, next));
auto paramAndValue =
parseSingleKeyValuePair(target.substr(0, next), urlDecode);
auto [iterator, isNewElement] =
result._parameters.insert(std::move(paramAndValue));
if (!isNewElement) {
AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
"Duplicate HTTP parameter: " + iterator->first);
}
if (next == npos) {
if (next == std::string::npos) {
break;
}
target.remove_prefix(next + 1);
Expand All @@ -75,22 +95,22 @@ UrlParser::UrlTarget UrlParser::parseTarget(std::string_view target) {

// ____________________________________________________________________________
std::pair<std::string, std::string> UrlParser::parseSingleKeyValuePair(
std::string_view input) {
std::string_view input, bool urlDecode) {
size_t posOfEq = input.find('=');
if (posOfEq == std::string_view::npos) {
AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
"Parameter without \"=\" in HTTP Request. " + std::string{input});
}
std::string param{applyPercentDecoding(input.substr(0, posOfEq))};
std::string value{applyPercentDecoding(input.substr(posOfEq + 1))};
std::string param{applyPercentDecoding(input.substr(0, posOfEq), urlDecode)};
std::string value{applyPercentDecoding(input.substr(posOfEq + 1), urlDecode)};
return {std::move(param), std::move(value)};
}

// _________________________________________________________________________
std::optional<std::string> UrlParser::getDecodedPathAndCheck(
std::string_view target) noexcept {
try {
auto filename = parseTarget(target)._target;
auto filename = parseGetRequestTarget(target)._path;
AD_CHECK(filename.starts_with('/'));
AD_CHECK(filename.find("..") == string::npos);
return filename;
Expand Down
35 changes: 24 additions & 11 deletions src/util/HttpServer/UrlParser.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright 2021, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Authors: Johannes Kalmbach <[email protected]>
// Hannah Bast <[email protected]>

#ifndef QLEVER_URLPARSER_H
#define QLEVER_URLPARSER_H
Expand All @@ -21,17 +22,26 @@ namespace ad_utility {
*/
class UrlParser {
public:
struct UrlTarget {
std::string _target;
/// Representation of the "path" and "query" of a URL. For a GET request, the
/// "path" is the part before the "?" (or everything if there is no "?"), and
/// the "query" is the part after the "?" (empty if there is no "?"). The
/// key-value pairs of the "query" are stored in a hash map.
struct UrlPathAndParameters {
std::string _path;
ad_utility::HashMap<std::string, std::string> _parameters;
};

// ___________________________________________________________________________
static std::string applyPercentDecoding(std::string_view url);
// URL-decode the given (part of a) URL. If the second argument is false, do
// nothing except converting the given `std::string_view` to `std::string`.
static std::string applyPercentDecoding(std::string_view url,
bool urlDecode = true);

/// Parse the `target` part of an HTTP GET Request,
/// for example, `/api.html?someKey=some+val%0Fue`.
static UrlTarget parseTarget(std::string_view target);
/// Parse the `target` part of an HTTP GET Request, for example,
/// `/api.html?someKey=some+val%0Fue`. The second argument specifies whether
/// the key-value pairs of the query string should be URL-decoded (default:
/// yes).
static UrlPathAndParameters parseGetRequestTarget(std::string_view target,
bool urlDecode = true);

/// From the `target` part of an HTTP GET request, only extract the path,
/// with percent decoding applied. E.g. `/target.html?key=value` will become
Expand All @@ -43,8 +53,11 @@ class UrlParser {
std::string_view target) noexcept;

private:
// Helper function that parses a single key-value pair from a URL query
// string. The second argument specifies whether the key and value should be
// URL-decoded (default: yes).
static std::pair<std::string, std::string> parseSingleKeyValuePair(
std::string_view input);
std::string_view input, bool urlDecode = true);
};
} // namespace ad_utility

Expand Down
10 changes: 5 additions & 5 deletions src/util/Timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ class Timer {
inline void setMsecs(off_t msecs) { _usecs = msecs * (off_t)(1000); }
inline void setSecs(off_t secs) { _usecs = secs * (off_t)(1000000); }

//! Time at last stop (initially zero)
off_t value() const { return _usecs; } /* in microseconds */
off_t usecs() const { return _usecs; } /* in microseconds */
off_t msecs() const { return _usecs / 1000; } /* in milliseconds */
float secs() const { return _usecs / 1000000.0; } /* in seconds */
//! Time at last stop (initially zero).
off_t value() const { return _usecs; }
off_t usecs() const { return _usecs; }
off_t msecs() const { return (_usecs + 500) / 1000; }
float secs() const { return _usecs / 1000000.0; }

// is the timer currently running
bool isRunning() const { return _running; }
Expand Down