diff --git a/api/envoy/extensions/network/socket_interface/v3/default_socket_interface.proto b/api/envoy/extensions/network/socket_interface/v3/default_socket_interface.proto index ae2693871f72..75e6ce88a282 100644 --- a/api/envoy/extensions/network/socket_interface/v3/default_socket_interface.proto +++ b/api/envoy/extensions/network/socket_interface/v3/default_socket_interface.proto @@ -2,7 +2,10 @@ syntax = "proto3"; package envoy.extensions.network.socket_interface.v3; +import "google/protobuf/wrappers.proto"; + import "udpa/annotations/status.proto"; +import "validate/validate.proto"; option java_package = "io.envoyproxy.envoy.extensions.network.socket_interface.v3"; option java_outer_classname = "DefaultSocketInterfaceProto"; @@ -15,4 +18,38 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE; // Configuration for default socket interface that relies on OS dependent syscall to create // sockets. message DefaultSocketInterface { + // Enable io_uring for socket operations if the kernel supports. io_uring is only valid in + // Linux with minimum kernel version 5.6. Otherwise, Envoy will fallback to use the default + // socket operations. Default to false. + bool enable_io_uring = 1; + + // The size for io_uring submission queues (SQ) and completion queues (CQ). io_uring is built + // up during configuration with a fixed size in each threads, and each io_uring operation will + // create a submission queue entry (SQE). Once the SQ is used up, more operations will not be + // added to the io_uring. Default to 1000. + google.protobuf.UInt32Value io_uring_size = 2; + + // Enable io_uring submission queue polling (SQPOLL). io_uring SQPOLL mode polls all SQEs in SQ + // in the kernel thread. io_uring SQPOLL mode may reduce latency and increase CPU usage as a + // cost. Default to false. + bool enable_io_uring_submission_queue_polling = 3; + + // The size of a io_uring TCP accept socket's pending connections queue can grow to. The value is + // different from :ref:`tcp_backlog_size `. + // Connections in TCP listener's queue are not being accepted, while connections in io_uring TCP + // accept socket's queue are accepted but not handled. io_uring accepts sockets asynchronously, + // and a large backlog value will have a better performance in situation with large number of no + // keep-alive connections. Default to 5. + google.protobuf.UInt32Value io_uring_accept_backlog = 4; + + // The size of a io_uring socket's read buffer. Each io_uring read operation will allocate buffer + // with the given size, and if the buffer provided is to small, the socket may read multiple + // times to read all the data. Default to 8192. + google.protobuf.UInt32Value io_uring_read_buffer_size = 5; + + // The timeout of a io_uring socket's write on closing in ms. io_uring writes and closes + // asynchronously. If the remote stops reading, the io_uring write operation may never complete. + // Connections may have timeout like per_try_timeout before closing, and io_uring adds another + // timeout period on top of it. Default to 1000. + google.protobuf.UInt32Value io_uring_write_timeout_ms = 6; } diff --git a/source/common/network/socket_interface_impl.cc b/source/common/network/socket_interface_impl.cc index 3a182f9a4036..b3b570503036 100644 --- a/source/common/network/socket_interface_impl.cc +++ b/source/common/network/socket_interface_impl.cc @@ -2,6 +2,7 @@ #include "envoy/common/exception.h" #include "envoy/extensions/network/socket_interface/v3/default_socket_interface.pb.h" +#include "envoy/extensions/network/socket_interface/v3/default_socket_interface.pb.validate.h" #include "source/common/api/os_sys_calls_impl.h" #include "source/common/common/assert.h" @@ -162,15 +163,21 @@ bool SocketInterfaceImpl::ipFamilySupported(int domain) { } Server::BootstrapExtensionPtr SocketInterfaceImpl::createBootstrapExtension( - const Protobuf::Message&, + const Protobuf::Message& config, [[maybe_unused]] Server::Configuration::ServerFactoryContext& context) { + const auto& message = MessageUtil::downcastAndValidate< + const envoy::extensions::network::socket_interface::v3::DefaultSocketInterface&>( + config, context.messageValidationVisitor()); #ifdef __linux__ - // TODO (soulxu): Add runtime flag here. - if (Io::isIoUringSupported()) { + if (message.enable_io_uring() && Io::isIoUringSupported()) { std::shared_ptr io_uring_factory = - std::make_shared(DefaultIoUringSize, UseSubmissionQueuePolling, - DefaultAcceptSize, DefaultReadBufferSize, - DefaultWriteTimeoutMs, context.threadLocal()); + std::make_shared( + PROTOBUF_GET_WRAPPED_OR_DEFAULT(message, io_uring_size, 1000), + message.enable_io_uring_submission_queue_polling(), + PROTOBUF_GET_WRAPPED_OR_DEFAULT(message, io_uring_accept_backlog, 5), + PROTOBUF_GET_WRAPPED_OR_DEFAULT(message, io_uring_read_buffer_size, 8192), + PROTOBUF_GET_WRAPPED_OR_DEFAULT(message, io_uring_write_timeout_ms, 1000), + context.threadLocal()); io_uring_factory_ = io_uring_factory; return std::make_unique(*this, io_uring_factory); diff --git a/source/common/network/socket_interface_impl.h b/source/common/network/socket_interface_impl.h index 4aa313356824..fff69ad4b621 100644 --- a/source/common/network/socket_interface_impl.h +++ b/source/common/network/socket_interface_impl.h @@ -44,13 +44,6 @@ class SocketInterfaceImpl : public SocketInterfaceBase { absl::optional domain, Io::IoUringFactory* io_uring_factory = nullptr); - // TODO (soulxu): making those configurable - static constexpr uint32_t DefaultIoUringSize = 300; - static constexpr uint32_t DefaultAcceptSize = 5; - static constexpr uint32_t DefaultReadBufferSize = 8192; - static constexpr uint32_t DefaultWriteTimeoutMs = 1000; - static constexpr bool UseSubmissionQueuePolling = false; - protected: virtual IoHandlePtr makeSocket(int socket_fd, bool socket_v6only, Socket::Type socket_type, absl::optional domain) const; diff --git a/tools/spelling/spelling_dictionary.txt b/tools/spelling/spelling_dictionary.txt index 39cc6d820c11..acdcd4700ed2 100644 --- a/tools/spelling/spelling_dictionary.txt +++ b/tools/spelling/spelling_dictionary.txt @@ -391,7 +391,9 @@ SPD SPDY SPIFFE SPKI +SQE SQL +SQPOLL SR SRCDIR SRDS