fluent · edsiper · Aug 31, 2023 · Aug 3, 2023 · Aug 3, 2023 · Aug 3, 2023
@@ -174,6 +174,23 @@ int flb_io_net_connect(struct flb_connection *connection,
     return 0;
 }
 
+static void net_io_propagate_critical_error(
+                struct flb_connection *connection)
+{
+    switch (errno) {
+    case EBADF:
 int flb_net_socket_blocking(flb_sockfd_t fd) 
 { 
 #ifdef _WIN32 
     unsigned long off = 0; 
     if (ioctlsocket(fd, FIONBIO, &off) != 0) { 
 #else 
     if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) & ~O_NONBLOCK) == -1) { 
 #endif 
         flb_errno(); 
         return -1; 
     } 
     return 0; 
 } 
 int flb_net_socket_blocking(flb_sockfd_t fd) 
 { 
 #ifdef _WIN32 
     unsigned long off = 0; 
     if (ioctlsocket(fd, FIONBIO, &off) != 0) { 
 #else 
     if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) & ~O_NONBLOCK) == -1) { 
 #endif 
         flb_errno(); 
         return -1; 
     } 
  
     return 0; 
 } 
+    case ECONNRESET:
+    case EDESTADDRREQ:
+    case ENOTCONN:
+    case EPIPE:
+    case EACCES:
+    case EIO:
+    case ENETDOWN:
+    case ENETUNREACH:
+        connection->net_error = errno;
+    }
+}
+
 static int fd_io_write(int fd, struct sockaddr_storage *address,
                        const void *data, size_t len, size_t *out_len);
 static int net_io_write(struct flb_connection *connection,
@@ -204,7 +221,13 @@ static int net_io_write(struct flb_connection *connection,
         }
     }
 
-    return fd_io_write(connection->fd, address, data, len, out_len);
+    ret = fd_io_write(connection->fd, address, data, len, out_len);
+
+    if (ret == -1) {
+        net_io_propagate_critical_error(connection);
+    }
+
+    return ret;
 }
 
 static int fd_io_write(int fd, struct sockaddr_storage *address,
@@ -430,6 +453,7 @@ static FLB_INLINE int net_io_write_async(struct flb_coro *co,
             *out_len = total;
 
             net_io_restore_event(connection, &event_backup);
+            net_io_propagate_critical_error(connection);
 
             return -1;
         }
@@ -519,6 +543,9 @@ static ssize_t net_io_read(struct flb_connection *connection,
                      connection->net->io_timeout,
                      flb_connection_get_remote_address(connection));
         }
+        else {
+            net_io_propagate_critical_error(connection);
+        }
 
         return -1;
     }
@@ -597,6 +624,9 @@ static FLB_INLINE ssize_t net_io_read_async(struct flb_coro *co,
 
             goto retry_read;
         }
+        else {
+            net_io_propagate_critical_error(connection);
+        }
 
         ret = -1;
     }

@@ -726,9 +726,6 @@ struct flb_connection *flb_upstream_conn_get(struct flb_upstream *u)
 
             flb_stream_release_lock(&u->base);
 
-            /* Reset errno */
-            conn->net_error = -1;
-
             err = flb_socket_error(conn->fd);
 
             if (!FLB_EINPROGRESS(err) && err != 0) {
@@ -740,6 +737,9 @@ struct flb_connection *flb_upstream_conn_get(struct flb_upstream *u)
                 continue;
             }
 
+            /* Reset errno */
+            conn->net_error = -1;
+
             /* Connect timeout */
             conn->ts_assigned = time(NULL);
             flb_debug("[upstream] KA connection #%i to %s:%i has been assigned (recycled)",
@@ -803,7 +803,8 @@ int flb_upstream_conn_release(struct flb_connection *conn)
     /* If this is a valid KA connection just recycle */
     if (u->base.net.keepalive == FLB_TRUE &&
         conn->recycle == FLB_TRUE &&
-        conn->fd > -1) {
+        conn->fd > -1 &&
+        conn->net_error == -1) {
         /*
          * This connection is still useful, move it to the 'available' queue and
          * initialize variables.

@@ -434,6 +434,13 @@ static int tls_net_read(struct flb_tls_session *session,
             ERR_error_string_n(ret, err_buf, sizeof(err_buf)-1);
             flb_error("[tls] syscall error: %s", err_buf);
 
+            /* According to the documentation these are non-recoverable
 static int tls_net_handshake(struct flb_tls *tls, 
 static int tls_net_handshake(struct flb_tls *tls, 
+             * errors so we don't need to screen them before saving them
+             * to the net_error field.
+             */
+
+            session->connection->net_error = errno;
 ret = flb_tls_net_write_async(coro, connection->tls_session, data, len, out_len); 
 ret = flb_tls_net_write_async(coro, connection->tls_session, data, len, out_len); 
+
             ret = -1;
         }
         else if (ret < 0) {
@@ -489,6 +496,13 @@ static int tls_net_write(struct flb_tls_session *session,
             ERR_error_string_n(ret, err_buf, sizeof(err_buf)-1);
             flb_error("[tls] syscall error: %s", err_buf);
 
+            /* According to the documentation these are non-recoverable
+             * errors so we don't need to screen them before saving them
+             * to the net_error field.
+             */
+
+            session->connection->net_error = errno;
+
             ret = -1;
         }
         else {