Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reference counting changes #1951

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c109115
tests: ipc test valgrind fix for uninitialized stack data
gdamore Dec 2, 2024
dd6eced
aio: make sure aio is initialized before certain operations
gdamore Nov 30, 2024
9d1f671
ctx: Simplify handling for closed contexts.
gdamore Nov 28, 2024
f3a76d6
Dialer and listener reference count refactor.
gdamore Nov 29, 2024
78de5eb
performance: reference counters can use relaxed order when incrementing
gdamore Nov 29, 2024
f46f16a
Fix failure reference count leak for dialer/listener
gdamore Nov 29, 2024
f46f49b
Eliminate s_cv unused condvar.
gdamore Nov 29, 2024
b5d9329
Context reference counts
gdamore Nov 29, 2024
3ccd057
socket: convert to using reference counts for shutdown
gdamore Nov 29, 2024
f9910a8
nng_fini: Simpler clean up of sockets, add a test.
gdamore Nov 30, 2024
27fdb7f
listener: fix leaking listener after nng_listener_close
gdamore Dec 1, 2024
019f20b
websocket transport: inline the aios
gdamore Dec 1, 2024
d3ed1bb
websocket: more aio inlining (generic websocket layer)
gdamore Dec 1, 2024
cac1485
pipe: allocate and destroy pipe transport data with common pipe data
gdamore Dec 2, 2024
10431db
aio: make nni_aio_fini safer by not implicitly stoppping
gdamore Dec 2, 2024
48371f3
socket transport: No need for a cool down for this transport.
gdamore Dec 2, 2024
66e207e
sockfd: constify ops vectors
gdamore Dec 3, 2024
d584373
pair1: stop the pipe sooner
gdamore Dec 3, 2024
c050e2c
pipe: simplify because we always have tran and proto data
gdamore Dec 3, 2024
c1cb69e
dialer/listener/socket: ensure teardown order prevents use-after-free
gdamore Dec 4, 2024
4823401
aio: significant rework
gdamore Dec 4, 2024
0e81279
pipes: make separate dialer/listener pipe allocators.
gdamore Dec 4, 2024
e5f6632
aio: wrong test for nni_aio_abort
gdamore Dec 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 41 additions & 57 deletions src/core/aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
//

#include "core/nng_impl.h"
#include "core/taskq.h"
#include <string.h>

struct nni_aio_expire_q {
Expand Down Expand Up @@ -89,38 +88,16 @@
aio->a_timeout = NNG_DURATION_INFINITE;
aio->a_expire_q =
nni_aio_expire_q_list[nni_random() % nni_aio_expire_q_cnt];
aio->a_init = true;
}

void
nni_aio_fini(nni_aio *aio)
{
nni_aio_cancel_fn fn;
void *arg;
nni_aio_expire_q *eq = aio->a_expire_q;

// This is like aio_close, but we don't want to dispatch
// the task. And unlike aio_stop, we don't want to wait
// for the task. (Because we implicitly do task_fini.)
// We also wait if the aio is being expired.
nni_mtx_lock(&eq->eq_mtx);
aio->a_stop = true;
while (aio->a_expiring) {
nni_cv_wait(&eq->eq_cv);
}
nni_aio_expire_rm(aio);
fn = aio->a_cancel_fn;
arg = aio->a_cancel_arg;
aio->a_cancel_fn = NULL;
aio->a_cancel_arg = NULL;
nni_mtx_unlock(&eq->eq_mtx);

if (fn != NULL) {
fn(aio, arg, NNG_ECLOSED);
} else {
nni_task_abort(&aio->a_task);
if (aio != NULL && aio->a_init) {
NNI_ASSERT(!nni_aio_busy(aio));
nni_task_fini(&aio->a_task);
}

nni_task_fini(&aio->a_task);
}

int
Expand All @@ -140,6 +117,7 @@
nni_aio_free(nni_aio *aio)
{
if (aio != NULL) {
nni_aio_stop(aio);
nni_aio_fini(aio);
NNI_FREE_STRUCT(aio);
}
Expand All @@ -148,7 +126,7 @@
void
nni_aio_reap(nni_aio *aio)
{
if (aio != NULL) {
if (aio != NULL && aio->a_init) {
nni_reap(&aio_reap_list, aio);
}
}
Expand Down Expand Up @@ -181,13 +159,13 @@
void
nni_aio_stop(nni_aio *aio)
{
if (aio != NULL) {
if (aio != NULL && aio->a_init) {
nni_aio_cancel_fn fn;
void *arg;
nni_aio_expire_q *eq = aio->a_expire_q;

nni_mtx_lock(&eq->eq_mtx);
aio->a_stop = true;
aio->a_stop = true;
while (aio->a_expiring) {
nni_cv_wait(&eq->eq_cv);
}
Expand All @@ -200,18 +178,17 @@

if (fn != NULL) {
fn(aio, arg, NNG_ECANCELED);
} else {
nni_task_abort(&aio->a_task);
}

nni_aio_wait(aio);
NNI_ASSERT(!nni_aio_busy(aio));
}
}

void
nni_aio_close(nni_aio *aio)
{
if (aio != NULL) {
if (aio != NULL && aio->a_init) {
nni_aio_cancel_fn fn;
void *arg;
nni_aio_expire_q *eq = aio->a_expire_q;
Expand All @@ -227,8 +204,6 @@

if (fn != NULL) {
fn(aio, arg, NNG_ECLOSED);
} else {
nni_task_abort(&aio->a_task);
}
}
}
Expand Down Expand Up @@ -314,7 +289,9 @@
void
nni_aio_wait(nni_aio *aio)
{
nni_task_wait(&aio->a_task);
if (aio != NULL && aio->a_init) {
nni_task_wait(&aio->a_task);
}
}

bool
Expand All @@ -326,26 +303,21 @@
int
nni_aio_begin(nni_aio *aio)
{
// If any of these triggers then the caller has a defect because
// it means that the aio is already in use. This is always
// a bug in the caller. These checks are not technically thread
// safe in the event that they are false. Users of race detectors
// checks may wish ignore or suppress these checks.
nni_aio_expire_q *eq = aio->a_expire_q;

nni_mtx_lock(&eq->eq_mtx);
NNI_ASSERT(!nni_aio_list_active(aio));
NNI_ASSERT(aio->a_cancel_fn == NULL);
NNI_ASSERT(!nni_list_node_active(&aio->a_expire_node));

// Some initialization can be done outside the lock, because
// we must have exclusive access to the aio.
for (unsigned i = 0; i < NNI_NUM_ELEMENTS(aio->a_outputs); i++) {
aio->a_outputs[i] = NULL;
}
aio->a_result = 0;
aio->a_count = 0;
aio->a_cancel_fn = NULL;
aio->a_result = 0;
aio->a_count = 0;
aio->a_cancel_fn = NULL;
aio->a_abort = false;
aio->a_abort_result = 0;

// We should not reschedule anything at this point.
if (aio->a_stop || eq->eq_stop) {
Expand All @@ -372,7 +344,6 @@
// Convert the relative timeout to an absolute timeout.
switch (aio->a_timeout) {
case NNG_DURATION_ZERO:
nni_task_abort(&aio->a_task);
return (NNG_ETIMEDOUT);
case NNG_DURATION_INFINITE:
case NNG_DURATION_DEFAULT:
Expand All @@ -386,10 +357,16 @@

nni_mtx_lock(&eq->eq_mtx);
if (aio->a_stop || eq->eq_stop) {
nni_task_abort(&aio->a_task);
nni_mtx_unlock(&eq->eq_mtx);
return (NNG_ECLOSED);
}
if (aio->a_abort) {
int rv = aio->a_abort_result;
aio->a_abort = false;
aio->a_abort_result = 0;
nni_mtx_unlock(&eq->eq_mtx);
return (rv);

Check warning on line 368 in src/core/aio.c

View check run for this annotation

Codecov / codecov/patch

src/core/aio.c#L364-L368

Added lines #L364 - L368 were not covered by tests
}

NNI_ASSERT(aio->a_cancel_fn == NULL);
aio->a_cancel_fn = cancel;
Expand All @@ -413,19 +390,24 @@
void *arg;
nni_aio_expire_q *eq = aio->a_expire_q;

NNI_ASSERT(rv > 0);
nni_mtx_lock(&eq->eq_mtx);
nni_aio_expire_rm(aio);
fn = aio->a_cancel_fn;
arg = aio->a_cancel_arg;
aio->a_cancel_fn = NULL;
aio->a_cancel_arg = NULL;
fn = aio->a_cancel_fn;
arg = aio->a_cancel_arg;
aio->a_cancel_fn = NULL;
aio->a_cancel_arg = NULL;
aio->a_abort = true;
aio->a_abort_result = rv;
nni_mtx_unlock(&eq->eq_mtx);

// Stop any I/O at the provider level.
// If this doesn't catch it, it will be reported
// at nni_aio_schedule (if this is to be scheduled),
// or else we have proceeded to far to cancel this operation.
// (In which case it should complete shortly.)
if (fn != NULL) {
fn(aio, arg, rv);
} else {
nni_task_abort(&aio->a_task);
}
}

Expand All @@ -448,9 +430,11 @@
aio->a_msg = msg;
}

aio->a_expire = NNI_TIME_NEVER;
aio->a_sleep = false;
aio->a_use_expire = false;
aio->a_expire = NNI_TIME_NEVER;
aio->a_sleep = false;
aio->a_use_expire = false;
aio->a_abort = false;
aio->a_abort_result = 0;
nni_mtx_unlock(&eq->eq_mtx);

if (sync) {
Expand Down
55 changes: 33 additions & 22 deletions src/core/aio.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright 2023 Staysail Systems, Inc. <[email protected]>
// Copyright 2024 Staysail Systems, Inc. <[email protected]>
// Copyright 2018 Capitar IT Group BV <[email protected]>
//
// This software is supplied under the terms of the MIT License, a
Expand All @@ -13,6 +13,7 @@

#include "core/defs.h"
#include "core/list.h"
#include "core/platform.h"
#include "core/reap.h"
#include "core/taskq.h"
#include "core/thread.h"
Expand All @@ -26,11 +27,14 @@ typedef void (*nni_aio_cancel_fn)(nni_aio *, void *, int);
extern void nni_aio_init(nni_aio *, nni_cb, void *arg);

// nni_aio_fini finalizes an aio object, releasing associated resources.
// It waits for the callback to complete.
// It does NOT wait for the callback to complete, so it is mandatory
// that nni_aio_stop be called first.
extern void nni_aio_fini(nni_aio *);

// nni_aio_reap is used to asynchronously reap the aio. It can
// be called even from the callback of the aio itself.
// Note that this only works with aio objects created
// with nni_aio_alloc.
extern void nni_aio_reap(nni_aio *);

// nni_aio_alloc allocates an aio object and initializes it. The callback
Expand All @@ -42,17 +46,19 @@ extern int nni_aio_alloc(nni_aio **, nni_cb, void *arg);
// nni_aio_free frees the aio, releasing resources (locks)
// associated with it. This is safe to call on zeroed memory.
// This must only be called on an object that was allocated
// with nni_aio_allocate.
// with nni_aio_alloc.
extern void nni_aio_free(nni_aio *aio);

// nni_aio_stop cancels any unfinished I/O, running completion callbacks,
// but also prevents any new operations from starting (nni_aio_start will
// return NNG_ESTATE). This should be called before nni_aio_free(). The
// best pattern is to call nni_aio_stop on all linked aio objects, before
// calling nni_aio_free on any of them. This function will block until any
// callbacks are executed, and therefore it should never be executed
// from a callback itself. (To abort operations without blocking
// use nni_aio_cancel instead.)
// return NNG_ESTATE). This should be called before nni_aio_free(), and MUST
// be called before nni_aio_fini().
//
// The best pattern is to call nni_aio_stop on all linked aio objects, before
// calling nni_aio_free or nni_aio_fini on any of them. This function will
// block until any callbacks are executed, and therefore it should never be
// executed from a callback itself. (To abort operations without blocking use
// nni_aio_cancel instead.)
extern void nni_aio_stop(nni_aio *);

// nni_aio_close closes the aio for further activity. It aborts any in-progress
Expand Down Expand Up @@ -205,16 +211,20 @@ typedef struct nni_aio_expire_q nni_aio_expire_q;
// any of these members -- the definition is provided here to facilitate
// inlining, but that should be the only use.
struct nng_aio {
size_t a_count; // Bytes transferred (I/O only)
nni_time a_expire; // Absolute timeout
nni_duration a_timeout; // Relative timeout
int a_result; // Result code (nng_errno)
bool a_stop; // Shutting down (no new operations)
bool a_sleep; // Sleeping with no action
bool a_expire_ok; // Expire from sleep is ok
bool a_expiring; // Expiration in progress
bool a_use_expire; // Use expire instead of timeout
nni_task a_task;
size_t a_count; // Bytes transferred (I/O only)
nni_time a_expire; // Absolute timeout
nni_duration a_timeout; // Relative timeout
int a_result; // Result code (nng_errno)
int a_abort_result; // Reason for abort (result code)
bool a_stop; // Shutting down (no new operations)
bool a_abort; // Aborted (after begin, before schedule)
bool a_sleep; // Sleeping with no action
bool a_expire_ok; // Expire from sleep is ok
bool a_expiring; // Expiration in progress
bool a_use_expire; // Use expire instead of timeout
bool a_init; // This is initialized

nni_task a_task;

// Read/write operations.
nni_iov a_iov[8];
Expand All @@ -229,14 +239,15 @@ struct nng_aio {
void *a_inputs[4];
void *a_outputs[4];

nni_aio_expire_q *a_expire_q;
nni_list_node a_expire_node; // Expiration node
nni_reap_node a_reap_node;

// Provider-use fields.
nni_aio_cancel_fn a_cancel_fn;
void *a_cancel_arg;
void *a_prov_data;
nni_list_node a_prov_node; // Linkage on provider list.
nni_aio_expire_q *a_expire_q;
nni_list_node a_expire_node; // Expiration node
nni_reap_node a_reap_node;
};

#endif // CORE_AIO_H
4 changes: 2 additions & 2 deletions src/core/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ device_fini(void *arg)
for (int i = 0; i < d->num_paths; i++) {
nni_aio_stop(&d->paths[i].aio);
}
nni_sock_rele(d->paths[0].src);
nni_sock_rele(d->paths[0].dst);
NNI_FREE_STRUCT(d);
}

Expand Down Expand Up @@ -97,8 +99,6 @@ device_cb(void *arg)
nni_aio_finish_error(d->user, d->rv);
d->user = NULL;
}
nni_sock_rele(d->paths[0].src);
nni_sock_rele(d->paths[0].dst);

nni_reap(&device_reap, d);
}
Expand Down
Loading
Loading