Skip to content

Commit

Permalink
DAOS-15540 cart: Change proto query default timeout (#14382)
Browse files Browse the repository at this point in the history
Set it to 3 seconds initially and increase it as we try other targets so we
can get going more quickly when a rank is down.

Signed-off-by: Jeff Olivier <[email protected]>
  • Loading branch information
jolivier23 committed May 17, 2024
1 parent 5da76e9 commit 592e76e
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 21 deletions.
30 changes: 23 additions & 7 deletions src/cart/crt_register.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2016-2022 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -572,13 +572,14 @@ proto_query_cb(const struct crt_cb_info *cb_info)

int
crt_proto_query_int(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc, uint32_t *ver, int count,
crt_proto_query_cb_t cb, void *arg, crt_context_t ctx)
uint32_t timeout, crt_proto_query_cb_t cb, void *arg, crt_context_t ctx)
{
crt_rpc_t *rpc_req = NULL;
crt_context_t crt_ctx;
struct crt_proto_query_in *rpc_req_input;
struct proto_query_t *proto_query = NULL;
uint32_t *tmp_array = NULL;
uint32_t default_timeout;
int rc = DER_SUCCESS;

if (ver == NULL) {
Expand Down Expand Up @@ -629,6 +630,21 @@ crt_proto_query_int(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc, uint32_t *ver
proto_query->pq_user_arg = arg;
proto_query->pq_coq->coq_base = base_opc;

if (timeout != 0) {
/** The global timeout may be overwritten by the per context timeout
* so let's use the API to get the actual setting.
*/
rc = crt_req_get_timeout(rpc_req, &default_timeout);
/** Should only fail if invalid parameter */
D_ASSERT(rc == 0);

if (timeout < default_timeout) {
rc = crt_req_set_timeout(rpc_req, timeout);
/** Should only fail if invalid parameter */
D_ASSERT(rc == 0);
}
}

rc = crt_req_send(rpc_req, proto_query_cb, proto_query);
if (rc != 0)
D_ERROR("crt_req_send() failed: "DF_RC"\n", DP_RC(rc));
Expand All @@ -650,17 +666,17 @@ crt_proto_query_int(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc, uint32_t *ver
}

int
crt_proto_query(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc,
uint32_t *ver, int count, crt_proto_query_cb_t cb, void *arg)
crt_proto_query(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc, uint32_t *ver, int count,
uint32_t timeout, crt_proto_query_cb_t cb, void *arg)
{
return crt_proto_query_int(tgt_ep, base_opc, ver, count, cb, arg, NULL);
return crt_proto_query_int(tgt_ep, base_opc, ver, count, timeout, cb, arg, NULL);
}

int
crt_proto_query_with_ctx(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc, uint32_t *ver, int count,
crt_proto_query_cb_t cb, void *arg, crt_context_t ctx)
uint32_t timeout, crt_proto_query_cb_t cb, void *arg, crt_context_t ctx)
{
return crt_proto_query_int(tgt_ep, base_opc, ver, count, cb, arg, ctx);
return crt_proto_query_int(tgt_ep, base_opc, ver, count, timeout, cb, arg, ctx);
}

/* local operation, query if base_opc with version number ver is registered. */
Expand Down
8 changes: 3 additions & 5 deletions src/cart/crt_rpc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -346,8 +346,7 @@ crt_register_proto_fi(crt_endpoint_t *ep)
if (rc != 0)
return -DER_MISC;

rc = crt_proto_query(ep, cpf.cpf_base, &cpf.cpf_ver,
1, crt_pfi_cb, &pfi);
rc = crt_proto_query(ep, cpf.cpf_base, &cpf.cpf_ver, 1, 0, crt_pfi_cb, &pfi);
if (rc != -DER_SUCCESS)
D_GOTO(out, rc);

Expand Down Expand Up @@ -386,8 +385,7 @@ crt_register_proto_ctl(crt_endpoint_t *ep)
if (rc != 0)
return -DER_MISC;

rc = crt_proto_query(ep, cpf.cpf_base, &cpf.cpf_ver,
1, crt_pfi_cb, &pfi);
rc = crt_proto_query(ep, cpf.cpf_base, &cpf.cpf_ver, 1, 0, crt_pfi_cb, &pfi);
if (rc != -DER_SUCCESS)
D_GOTO(out, rc);

Expand Down
9 changes: 6 additions & 3 deletions src/client/api/rpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ struct rpc_proto {
crt_opcode_t base_opc;
uint32_t *ver_array;
uint32_t array_size;
uint32_t timeout;
};

static void
Expand All @@ -113,8 +114,9 @@ query_cb(struct crt_proto_query_cb_info *cb_info)

if (daos_rpc_retryable_rc(cb_info->pq_rc)) {
rproto->ep.ep_rank = (rproto->ep.ep_rank + 1) % rproto->nr_ranks;
rproto->timeout += 3;
rc = crt_proto_query_with_ctx(&rproto->ep, rproto->base_opc, rproto->ver_array,
rproto->array_size, query_cb, rproto,
rproto->array_size, rproto->timeout, query_cb, rproto,
daos_get_crt_ctx());
if (rc) {
D_ERROR("crt_proto_query_with_ctx() failed: "DF_RC"\n", DP_RC(rc));
Expand Down Expand Up @@ -155,9 +157,10 @@ daos_rpc_proto_query(crt_opcode_t base_opc, uint32_t *ver_array, int count, int
rproto->array_size = count;
rproto->ep.ep_grp = sys->sy_group;
rproto->base_opc = base_opc;
rproto->timeout = 3;

rc = crt_proto_query_with_ctx(&rproto->ep, base_opc,
ver_array, count, query_cb, rproto, ctx);
rc = crt_proto_query_with_ctx(&rproto->ep, base_opc, ver_array, count, rproto->timeout,
query_cb, rproto, ctx);
if (rc) {
D_ERROR("crt_proto_query_with_ctx() failed: "DF_RC"\n", DP_RC(rc));
D_GOTO(out_free, rc);
Expand Down
10 changes: 7 additions & 3 deletions src/include/cart/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1875,6 +1875,8 @@ crt_proto_register(struct crt_proto_format *cpf);
* \param[in] base_opc the base opcode for the protocol
* \param[in] ver array of protocol version
* \param[in] count number of elements in ver
* \param[in] timeout Timeout in seconds, ignored if 0 or greater than
* default timeout
* \param[in] cb completion callback. crt_proto_query() internally
* sends an RPC to \a tgt_ep. \a cb will be called
* upon completion of that RPC. The highest protocol
Expand All @@ -1888,8 +1890,8 @@ crt_proto_register(struct crt_proto_format *cpf);
* failure.
*/
int
crt_proto_query(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc,
uint32_t *ver, int count, crt_proto_query_cb_t cb, void *arg);
crt_proto_query(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc, uint32_t *ver, int count,
uint32_t timeout, crt_proto_query_cb_t cb, void *arg);

/**
* query tgt_ep if it has registered base_opc with version using a user provided cart context.
Expand All @@ -1898,6 +1900,8 @@ crt_proto_query(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc,
* \param[in] base_opc the base opcode for the protocol
* \param[in] ver array of protocol version
* \param[in] count number of elements in ver
* \param[in] timeout Timeout in seconds, ignored if 0 or greater than
* default timeout
* \param[in] cb completion callback. crt_proto_query() internally
* sends an RPC to \a tgt_ep. \a cb will be called
* upon completion of that RPC. The highest protocol
Expand All @@ -1912,7 +1916,7 @@ crt_proto_query(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc,
*/
int
crt_proto_query_with_ctx(crt_endpoint_t *tgt_ep, crt_opcode_t base_opc, uint32_t *ver, int count,
crt_proto_query_cb_t cb, void *arg, crt_context_t ctx);
uint32_t timeout, crt_proto_query_cb_t cb, void *arg, crt_context_t ctx);
/**
* Set self rank.
*
Expand Down
10 changes: 7 additions & 3 deletions src/tests/ftest/cart/test_proto_client.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2018-2022 Intel Corporation.
* (C) Copyright 2018-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -65,6 +65,7 @@ test_run()
uint32_t s_high_ver = 0xFFFFFFFF;
uint32_t c_high_ver = test.tg_num_proto - 1;
int rc;
uint32_t timeout;

fprintf(stderr, "local group: %s remote group: %s\n",
test.tg_local_group_name, test.tg_remote_group_name);
Expand Down Expand Up @@ -119,8 +120,11 @@ test_run()
server_ep.ep_rank = 0;

DBG_PRINT("proto query\n");
rc = crt_proto_query(&server_ep, OPC_MY_PROTO, my_ver_array, 7,
query_cb, &s_high_ver);
timeout = 1;
do {
rc = crt_proto_query(&server_ep, OPC_MY_PROTO, my_ver_array, 7, timeout++, query_cb,
&s_high_ver);
} while (rc == -DER_TIMEDOUT);
D_ASSERT(rc == 0);

while (s_high_ver == 0xFFFFFFFF)
Expand Down

0 comments on commit 592e76e

Please sign in to comment.