diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c9fb36d30f..b738ad12db7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Unreleased - Changes from 5.27.0 - Misc: + - FIXED: Revert back to using custom HTTP parser instead of Boost.Beast. [#6407](https://github.com/Project-OSRM/osrm-backend/pull/6407) - FIXED: Fix bug with large HTTP requests leading to Bad Request in osrm-routed. [#6403](https://github.com/Project-OSRM/osrm-backend/pull/6403) - Routing: - CHANGED: Add support for surface=metal,grass_paver,woodchips in bicyle profile. [#6395](https://github.com/Project-OSRM/osrm-backend/pull/6395) diff --git a/features/step_definitions/requests.js b/features/step_definitions/requests.js index 14817a11f09..a16e24a4efc 100644 --- a/features/step_definitions/requests.js +++ b/features/step_definitions/requests.js @@ -55,4 +55,4 @@ module.exports = function () { assert.equal(this.processError.process, binary); assert.equal(parseInt(this.processError.code), parseInt(code)); }); -}; +}; \ No newline at end of file diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt index 67792e07ae7..17b26b6e005 100644 --- a/fuzz/CMakeLists.txt +++ b/fuzz/CMakeLists.txt @@ -38,7 +38,8 @@ if (ENABLE_FUZZING) "table_parameters" "tile_parameters" "trip_parameters" - "url_parser") + "url_parser" + "request_parser") foreach (target ${ServerTargets}) add_fuzz_target(${target}) diff --git a/fuzz/request_parser.cc b/fuzz/request_parser.cc new file mode 100644 index 00000000000..e292ab27ee7 --- /dev/null +++ b/fuzz/request_parser.cc @@ -0,0 +1,28 @@ +#include "server/request_parser.hpp" +#include "server/http/request.hpp" + +#include "util.hpp" + +#include +#include + +using osrm::server::RequestParser; +using osrm::server::http::request; + +extern "C" int LLVMFuzzerTestOneInput(const unsigned char *data, unsigned long size) +{ + std::string in(reinterpret_cast(data), size); + + auto first = begin(in); + auto last = end(in); + + RequestParser parser; + request req; + + // &(*it) is needed to go from iterator to underlying item to pointer to underlying item + parser.parse(req, &(*first), &(*last)); + + escape(&req); + + return 0; +} diff --git a/include/server/connection.hpp b/include/server/connection.hpp index f90c577b961..df5b47d2681 100644 --- a/include/server/connection.hpp +++ b/include/server/connection.hpp @@ -4,15 +4,14 @@ #include "server/http/compression_type.hpp" #include "server/http/reply.hpp" #include "server/http/request.hpp" +#include "server/request_parser.hpp" + #include #include -#include -#include #include #include #include -#include #include // workaround for incomplete std::shared_ptr compatibility in old boost versions @@ -48,7 +47,6 @@ class Connection : public std::enable_shared_from_this void start(); private: - using RequestParser = boost::beast::http::request_parser; void handle_read(const boost::system::error_code &e, std::size_t bytes_transferred); /// Handle completion of a write operation. @@ -62,14 +60,12 @@ class Connection : public std::enable_shared_from_this std::vector compress_buffers(const std::vector &uncompressed_data, const http::compression_type compression_type); - void fill_request(const RequestParser::value_type &httpMessage, http::request &request); - boost::asio::strand strand; boost::asio::ip::tcp::socket TCP_socket; boost::asio::deadline_timer timer; RequestHandler &request_handler; - std::optional http_request_parser; - std::vector incoming_data_buffer; + RequestParser request_parser; + boost::array incoming_data_buffer; http::request current_request; http::reply current_reply; std::vector compressed_output; diff --git a/include/server/request_parser.hpp b/include/server/request_parser.hpp new file mode 100644 index 00000000000..5af1911c9f1 --- /dev/null +++ b/include/server/request_parser.hpp @@ -0,0 +1,75 @@ +#ifndef REQUEST_PARSER_HPP +#define REQUEST_PARSER_HPP + +#include "server/http/compression_type.hpp" +#include "server/http/header.hpp" + +#include + +namespace osrm +{ +namespace server +{ + +namespace http +{ +struct request; +} + +class RequestParser +{ + public: + RequestParser(); + + enum class RequestStatus : char + { + valid, + invalid, + indeterminate + }; + + std::tuple + parse(http::request ¤t_request, char *begin, char *end); + + private: + RequestStatus consume(http::request ¤t_request, const char input); + + bool is_char(const int character) const; + + bool is_CTL(const int character) const; + + bool is_special(const int character) const; + + bool is_digit(const int character) const; + + enum class internal_state : unsigned char + { + method_start, + method, + uri_start, + uri, + http_version_h, + http_version_t_1, + http_version_t_2, + http_version_p, + http_version_slash, + http_version_major_start, + http_version_major, + http_version_minor_start, + http_version_minor, + expecting_newline_1, + header_line_start, + header_lws, + header_name, + header_value, + expecting_newline_2, + expecting_newline_3 + } state; + + http::header current_header; + http::compression_type selected_compression; +}; +} // namespace server +} // namespace osrm + +#endif // REQUEST_PARSER_HPP diff --git a/src/server/connection.cpp b/src/server/connection.cpp index 980d2fcdf77..68e24c36006 100644 --- a/src/server/connection.cpp +++ b/src/server/connection.cpp @@ -1,10 +1,12 @@ #include "server/connection.hpp" #include "server/request_handler.hpp" +#include "server/request_parser.hpp" #include #include #include #include + #include namespace osrm @@ -12,40 +14,14 @@ namespace osrm namespace server { -namespace -{ -const size_t CHUNK_SIZE = 8192; - -} // namespace Connection::Connection(boost::asio::io_context &io_context, RequestHandler &handler) : strand(boost::asio::make_strand(io_context)), TCP_socket(strand), timer(strand), - request_handler(handler), http_request_parser(std::make_optional()) + request_handler(handler) { - http_request_parser->header_limit(std::numeric_limits::max()); } boost::asio::ip::tcp::socket &Connection::socket() { return TCP_socket; } -namespace -{ - -http::compression_type select_compression(const boost::beast::http::fields &fields) -{ - const auto header_value = fields[boost::beast::http::field::accept_encoding]; - /* giving gzip precedence over deflate */ - if (boost::icontains(header_value, "gzip")) - { - return http::gzip_rfc1952; - } - if (boost::icontains(header_value, "deflate")) - { - return http::deflate_rfc1951; - } - return http::no_compression; -} - -} // namespace - /// Start the first asynchronous operation for the connection. void Connection::start() { @@ -65,8 +41,7 @@ void Connection::start() } } -void Connection::handle_read(const boost::system::error_code &error, - std::size_t /*bytes_transferred*/) +void Connection::handle_read(const boost::system::error_code &error, std::size_t bytes_transferred) { if (error) { @@ -85,48 +60,20 @@ void Connection::handle_read(const boost::system::error_code &error, timer.expires_from_now(boost::posix_time::seconds(0)); } - boost::beast::error_code ec; - http_request_parser->put(boost::asio::buffer(incoming_data_buffer), ec); // no error detected, let's parse the request http::compression_type compression_type(http::no_compression); - - if (ec) - { - if (ec == boost::beast::http::error::need_more) - { - const auto current_size = incoming_data_buffer.size(); - incoming_data_buffer.resize(incoming_data_buffer.size() + CHUNK_SIZE, 0); - // we don't have a result yet, so continue reading - TCP_socket.async_read_some( - boost::asio::buffer(incoming_data_buffer.data() + current_size, CHUNK_SIZE), - boost::bind(&Connection::handle_read, - this->shared_from_this(), - boost::asio::placeholders::error, - boost::asio::placeholders::bytes_transferred)); - } - else - { - // request is not parseable - current_reply = http::reply::stock_reply(http::reply::bad_request); - - boost::asio::async_write(TCP_socket, - current_reply.to_buffers(), - boost::bind(&Connection::handle_write, - this->shared_from_this(), - boost::asio::placeholders::error)); - } - } - else + RequestParser::RequestStatus result; + std::tie(result, compression_type) = + request_parser.parse(current_request, + incoming_data_buffer.data(), + incoming_data_buffer.data() + bytes_transferred); + + // the request has been parsed + if (result == RequestParser::RequestStatus::valid) { - // the request has been parsed - const auto &message = http_request_parser->get(); - compression_type = select_compression(message); - - fill_request(message, current_request); boost::system::error_code ec; current_request.endpoint = TCP_socket.remote_endpoint(ec).address(); - if (ec) { util::Log(logDEBUG) << "Socket remote endpoint error: " << ec.message(); @@ -180,6 +127,25 @@ void Connection::handle_read(const boost::system::error_code &error, this->shared_from_this(), boost::asio::placeholders::error)); } + else if (result == RequestParser::RequestStatus::invalid) + { // request is not parseable + current_reply = http::reply::stock_reply(http::reply::bad_request); + + boost::asio::async_write(TCP_socket, + current_reply.to_buffers(), + boost::bind(&Connection::handle_write, + this->shared_from_this(), + boost::asio::placeholders::error)); + } + else + { + // we don't have a result yet, so continue reading + TCP_socket.async_read_some(boost::asio::buffer(incoming_data_buffer), + boost::bind(&Connection::handle_read, + this->shared_from_this(), + boost::asio::placeholders::error, + boost::asio::placeholders::bytes_transferred)); + } } /// Handle completion of a write operation. @@ -192,9 +158,8 @@ void Connection::handle_write(const boost::system::error_code &error) --processed_requests; current_request = http::request(); current_reply = http::reply(); - http_request_parser.emplace(); - http_request_parser->header_limit(std::numeric_limits::max()); - incoming_data_buffer.resize(CHUNK_SIZE, 0); + request_parser = RequestParser(); + incoming_data_buffer = boost::array(); output_buffer.clear(); this->start(); } @@ -255,15 +220,5 @@ std::vector Connection::compress_buffers(const std::vector &uncompre return compressed_data; } - -void Connection::fill_request(const RequestParser::value_type &http_message, - http::request ¤t_request) -{ - current_request.uri = http_message.target().to_string(); - current_request.agent = http_message[boost::beast::http::field::user_agent].to_string(); - current_request.referrer = http_message[boost::beast::http::field::referer].to_string(); - current_request.connection = http_message[boost::beast::http::field::connection].to_string(); -} - } // namespace server } // namespace osrm diff --git a/src/server/request_parser.cpp b/src/server/request_parser.cpp new file mode 100644 index 00000000000..3224bec3110 --- /dev/null +++ b/src/server/request_parser.cpp @@ -0,0 +1,302 @@ +#include "server/request_parser.hpp" + +#include "server/http/compression_type.hpp" +#include "server/http/header.hpp" +#include "server/http/request.hpp" + +#include + +#include + +namespace osrm +{ +namespace server +{ + +RequestParser::RequestParser() + : state(internal_state::method_start), current_header({"", ""}), + selected_compression(http::no_compression) +{ +} + +std::tuple +RequestParser::parse(http::request ¤t_request, char *begin, char *end) +{ + while (begin != end) + { + RequestStatus result = consume(current_request, *begin++); + if (result != RequestStatus::indeterminate) + { + return std::make_tuple(result, selected_compression); + } + } + RequestStatus result = RequestStatus::indeterminate; + + return std::make_tuple(result, selected_compression); +} + +RequestParser::RequestStatus RequestParser::consume(http::request ¤t_request, + const char input) +{ + switch (state) + { + case internal_state::method_start: + if (!is_char(input) || is_CTL(input) || is_special(input)) + { + return RequestStatus::invalid; + } + state = internal_state::method; + return RequestStatus::indeterminate; + case internal_state::method: + if (input == ' ') + { + state = internal_state::uri; + return RequestStatus::indeterminate; + } + if (!is_char(input) || is_CTL(input) || is_special(input)) + { + return RequestStatus::invalid; + } + return RequestStatus::indeterminate; + case internal_state::uri_start: + if (is_CTL(input)) + { + return RequestStatus::invalid; + } + state = internal_state::uri; + current_request.uri.push_back(input); + return RequestStatus::indeterminate; + case internal_state::uri: + if (input == ' ') + { + state = internal_state::http_version_h; + return RequestStatus::indeterminate; + } + if (is_CTL(input)) + { + return RequestStatus::invalid; + } + current_request.uri.push_back(input); + return RequestStatus::indeterminate; + case internal_state::http_version_h: + if (input == 'H') + { + state = internal_state::http_version_t_1; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_t_1: + if (input == 'T') + { + state = internal_state::http_version_t_2; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_t_2: + if (input == 'T') + { + state = internal_state::http_version_p; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_p: + if (input == 'P') + { + state = internal_state::http_version_slash; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_slash: + if (input == '/') + { + state = internal_state::http_version_major_start; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_major_start: + if (is_digit(input)) + { + state = internal_state::http_version_major; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_major: + if (input == '.') + { + state = internal_state::http_version_minor_start; + return RequestStatus::indeterminate; + } + if (is_digit(input)) + { + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_minor_start: + if (is_digit(input)) + { + state = internal_state::http_version_minor; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::http_version_minor: + if (input == '\r') + { + state = internal_state::expecting_newline_1; + return RequestStatus::indeterminate; + } + if (is_digit(input)) + { + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::expecting_newline_1: + if (input == '\n') + { + state = internal_state::header_line_start; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + case internal_state::header_line_start: + if (boost::iequals(current_header.name, "Accept-Encoding")) + { + /* giving gzip precedence over deflate */ + if (boost::icontains(current_header.value, "deflate")) + { + selected_compression = http::deflate_rfc1951; + } + if (boost::icontains(current_header.value, "gzip")) + { + selected_compression = http::gzip_rfc1952; + } + } + + if (boost::iequals(current_header.name, "Referer")) + { + current_request.referrer = current_header.value; + } + + if (boost::iequals(current_header.name, "User-Agent")) + { + current_request.agent = current_header.value; + } + + if (boost::iequals(current_header.name, "Connection")) + { + current_request.connection = current_header.value; + } + + if (input == '\r') + { + state = internal_state::expecting_newline_3; + return RequestStatus::indeterminate; + } + if (!is_char(input) || is_CTL(input) || is_special(input)) + { + return RequestStatus::invalid; + } + state = internal_state::header_name; + current_header.clear(); + current_header.name.push_back(input); + return RequestStatus::indeterminate; + case internal_state::header_lws: + if (input == '\r') + { + state = internal_state::expecting_newline_2; + return RequestStatus::indeterminate; + } + if (input == ' ' || input == '\t') + { + return RequestStatus::indeterminate; + } + if (is_CTL(input)) + { + return RequestStatus::invalid; + } + state = internal_state::header_value; + return RequestStatus::indeterminate; + case internal_state::header_name: + if (input == ':') + { + state = internal_state::header_value; + return RequestStatus::indeterminate; + } + if (!is_char(input) || is_CTL(input) || is_special(input)) + { + return RequestStatus::invalid; + } + current_header.name.push_back(input); + return RequestStatus::indeterminate; + case internal_state::header_value: + if (input == ' ') + { + state = internal_state::header_value; + return RequestStatus::indeterminate; + } + if (input == '\r') + { + state = internal_state::expecting_newline_2; + return RequestStatus::indeterminate; + } + if (is_CTL(input)) + { + return RequestStatus::invalid; + } + current_header.value.push_back(input); + return RequestStatus::indeterminate; + case internal_state::expecting_newline_2: + if (input == '\n') + { + state = internal_state::header_line_start; + return RequestStatus::indeterminate; + } + return RequestStatus::invalid; + default: // expecting_newline_3 + return input == '\n' ? RequestStatus::valid : RequestStatus::invalid; + } +} + +bool RequestParser::is_char(const int character) const +{ + return character >= 0 && character <= 127; +} + +bool RequestParser::is_CTL(const int character) const +{ + return (character >= 0 && character <= 31) || (character == 127); +} + +bool RequestParser::is_special(const int character) const +{ + switch (character) + { + case '(': + case ')': + case '<': + case '>': + case '@': + case ',': + case ';': + case ':': + case '\\': + case '"': + case '/': + case '[': + case ']': + case '?': + case '=': + case '{': + case '}': + case ' ': + case '\t': + return true; + default: + return false; + } +} + +bool RequestParser::is_digit(const int character) const +{ + return character >= '0' && character <= '9'; +} +} // namespace server +} // namespace osrm