Skip to content

Commit

Permalink
Merge pull request #15427 from daos-stack/jvolivie/merge
Browse files Browse the repository at this point in the history
Restore patches after upstream fixes
  • Loading branch information
jolivier23 authored Nov 1, 2024
2 parents bee6866 + b7ea05b commit 46b5d32
Show file tree
Hide file tree
Showing 22 changed files with 404 additions and 24 deletions.
1 change: 1 addition & 0 deletions .github/workflows/landing-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ on:
- ci/**
- requirements-build.txt
- requirements-utest.txt
- utils/build.config

permissions: {}

Expand Down
3 changes: 2 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,7 @@ pipeline {
unitTestPost artifacts: ['nlt_logs/'],
testResults: 'nlt-junit.xml',
always_script: 'ci/unit/test_nlt_post.sh',
referenceJobName: 'daos-stack/daos/release%252F2.6',
valgrind_stash: 'el8-gcc-nlt-memcheck'
recordIssues enabledForFailure: true,
failOnError: false,
Expand Down Expand Up @@ -1037,7 +1038,7 @@ pipeline {
}
post {
always {
discoverGitReferenceBuild referenceJob: 'daos-stack/daos/master',
discoverGitReferenceBuild referenceJob: 'daos-stack/daos/release%252F2.6',
scm: 'daos-stack/daos',
requiredResult: hudson.model.Result.UNSTABLE
recordIssues enabledForFailure: true,
Expand Down
16 changes: 16 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
daos (2.6.1-4) unstable; urgency=medium
[ Tomasz Gromadzki ]
* Add support of the PMDK package 2.1.0 with NDCTL enabled.
* Increase the default ULT stack size to 20KiB if the engine uses
the DCPM storage class.
* Prevent using the RAM storage class (simulated PMem) when
the shutdown state (SDS) is active.
* Automatically disable SDS for the RAM storage class on engine startup.
* Force explicitly setting the PMEMOBJ_CONF='sds.at_create=0'
environment variable to deactivate SDS for the DAOS tools
(ddb, daos_perf, vos_perf, etc.) when used WITHOUT DCPM.
Otherwise, a user is supposed to be stopped by an error
like: "Unsafe shutdown count is not supported for this source".

-- Tomasz Gromadzki <[email protected]> Wed, 02 Oct 2024 12:00:00 +0200

daos (2.6.1-3) unstable; urgency=medium
[ Phillip Henderson ]
* Third release candidate for 2.6.1
Expand Down
8 changes: 5 additions & 3 deletions debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ Build-Depends: debhelper (>= 10),
libopenmpi-dev,
libssl-dev,
libyaml-dev,
libmercury-dev (>= 2.3.1.1),
libmercury-dev (>= 2.4),
scons,
uuid-dev,
pkg-config,
python3-dev,
python3-distro,
libabt-dev,
libucx-dev,
libpmemobj-dev (>= 2.0.0),
libpmemobj-dev (>= 2.1.0),
libfuse3-dev,
libprotobuf-c-dev,
libjson-c-dev,
Expand Down Expand Up @@ -118,7 +118,9 @@ Depends: python (>=3.8), python3, python-yaml, python3-yaml,
daos-client (= ${binary:Version}),
daos-admin (= ${binary:Version}),
golang-go (>= 2:1.21),
libcapstone-dev
libcapstone-dev,
libndctl-dev,
libdaxctl-dev
Description: The Distributed Asynchronous Object Storage (DAOS) is an open-source
software-defined object store designed from the ground up for
massively distributed Non Volatile Memory (NVM). DAOS takes advantage
Expand Down
1 change: 0 additions & 1 deletion site_scons/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ def define_components(reqs):
retriever=GitRepoRetriever(),
commands=[['make',
'all',
'NDCTL_ENABLE=n',
'BUILD_EXAMPLES=n',
'BUILD_BENCHMARKS=n',
'DOC=n',
Expand Down
19 changes: 19 additions & 0 deletions src/cart/README.env
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,22 @@ This file lists the environment variables used in CaRT.
Set parent directory for client side metrics. Each client will write its metrics to
a file with the pattern <D_CLIENT_METRICS_DUMP_DIR>/<DAOS_JOBID>-<pid>.csv. As a
convenience, setting this variable automatically sets D_CLIENT_METRICS_ENABLE=1.

D_POST_INIT
(server only) Controls the initial number of requests that are posted on context creation.
When using a transport that supports multi-recv, also controls the maximum size
of buffers (DAOS_RPC_SIZE x D_POST_INIT x D_MRECV_BUF).

D_POST_INCR
(server only) Controls the number of RPC handles that are incrementally posted when the
initial number of requests (D_POST_INIT) is exhausted.

D_MRECV_BUF
(server only) When using a transport that supports multi-recv, controls the total number
of multi-recv buffers that are posted.

D_MRECV_BUF_COPY
(server only) When using a transport that supports multi-recv, controls when we should
start copying data in an effort to release multi-recv buffers. Copy will occur when at
most D_MRECV_BUF_COPY buffers remain.

8 changes: 5 additions & 3 deletions src/cart/crt_hg.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,10 +859,12 @@ crt_hg_class_init(crt_provider_t provider, int ctx_idx, bool primary, int iface_
if (prov_data->cpg_max_unexp_size > 0)
init_info.na_init_info.max_unexpected_size = prov_data->cpg_max_unexp_size;

init_info.request_post_init = crt_gdata.cg_post_init;
init_info.request_post_incr = crt_gdata.cg_post_incr;
init_info.request_post_init = crt_gdata.cg_post_init;
init_info.request_post_incr = crt_gdata.cg_post_incr;
init_info.multi_recv_op_max = crt_gdata.cg_mrecv_buf;
init_info.multi_recv_copy_threshold = crt_gdata.cg_mrecv_buf_copy;

hg_class = HG_Init_opt(info_string, crt_is_service(), &init_info);
hg_class = HG_Init_opt2(info_string, crt_is_service(), HG_VERSION(2, 4), &init_info);
if (hg_class == NULL) {
D_ERROR("Could not initialize HG class.\n");
D_GOTO(out, rc = -DER_HG);
Expand Down
3 changes: 2 additions & 1 deletion src/cart/crt_hg.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -30,6 +30,7 @@
/** default values for init / incr to prepost handles */
#define CRT_HG_POST_INIT (512)
#define CRT_HG_POST_INCR (512)
#define CRT_HG_MRECV_BUF (16)

#define CRT_UCX_STR "ucx"

Expand Down
8 changes: 7 additions & 1 deletion src/cart/crt_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ static int data_init(int server, crt_init_options_t *opt)
uint32_t mem_pin_enable = 0;
uint32_t is_secondary;
uint32_t post_init = CRT_HG_POST_INIT, post_incr = CRT_HG_POST_INCR;
int rc = 0;
unsigned int mrecv_buf = CRT_HG_MRECV_BUF;
unsigned int mrecv_buf_copy = 0; /* buf copy disabled by default */
int rc = 0;

crt_env_dump();

Expand All @@ -255,6 +257,10 @@ static int data_init(int server, crt_init_options_t *opt)
crt_gdata.cg_post_init = post_init;
crt_env_get(D_POST_INCR, &post_incr);
crt_gdata.cg_post_incr = post_incr;
crt_env_get(D_MRECV_BUF, &mrecv_buf);
crt_gdata.cg_mrecv_buf = mrecv_buf;
crt_env_get(D_MRECV_BUF_COPY, &mrecv_buf_copy);
crt_gdata.cg_mrecv_buf_copy = mrecv_buf_copy;

is_secondary = 0;
/* Apply CART-890 workaround for server side only */
Expand Down
4 changes: 4 additions & 0 deletions src/cart/crt_internal_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ struct crt_gdata {
/** Hints to mercury for request post init (ignored for clients) */
uint32_t cg_post_init;
uint32_t cg_post_incr;
unsigned int cg_mrecv_buf;
unsigned int cg_mrecv_buf_copy;

/** global timeout value (second) for all RPCs */
uint32_t cg_timeout;
Expand Down Expand Up @@ -208,6 +210,8 @@ struct crt_event_cb_priv {
ENV(D_PORT_AUTO_ADJUST) \
ENV(D_POST_INCR) \
ENV(D_POST_INIT) \
ENV(D_MRECV_BUF) \
ENV(D_MRECV_BUF_COPY) \
ENV_STR(D_PROVIDER) \
ENV_STR_NO_PRINT(D_PROVIDER_AUTH_KEY) \
ENV(D_QUOTA_RPCS) \
Expand Down
86 changes: 86 additions & 0 deletions src/control/server/engine/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package engine
import (
"fmt"
"os"
"strconv"
"strings"

"github.com/pkg/errors"
Expand All @@ -28,6 +29,8 @@ const (
envLogMasks = "D_LOG_MASK"
envLogDbgStreams = "DD_MASK"
envLogSubsystems = "DD_SUBSYS"

minABTThreadStackSizeDCPM = 20480
)

// FabricConfig encapsulates networking fabric configuration.
Expand Down Expand Up @@ -342,7 +345,80 @@ func (c *Config) Validate() error {
if err := ValidateLogSubsystems(subsystems); err != nil {
return errors.Wrap(err, "validate engine log subsystems")
}
return nil
}

// Ensure at least 20KiB ABT stack size for an engine with DCPM storage class.
func (c *Config) UpdatePMDKEnvarsStackSizeDCPM() error {
stackSizeStr, err := c.GetEnvVar("ABT_THREAD_STACKSIZE")
if err != nil {
c.EnvVars = append(c.EnvVars, fmt.Sprintf("ABT_THREAD_STACKSIZE=%d",
minABTThreadStackSizeDCPM))
return nil
}
// Ensure at least 20KiB ABT stack size for an engine with DCPM storage class.
stackSizeValue, err := strconv.Atoi(stackSizeStr)
if err != nil {
return errors.Errorf("env_var ABT_THREAD_STACKSIZE has invalid value: %s",
stackSizeStr)
}
if stackSizeValue < minABTThreadStackSizeDCPM {
return errors.Errorf("env_var ABT_THREAD_STACKSIZE should be >= %d "+
"for DCPM storage class, found %d", minABTThreadStackSizeDCPM,
stackSizeValue)
}
return nil
}

// Ensure proper configuration of shutdown (SDS) state
func (c *Config) UpdatePMDKEnvarsPMemobjConf(isDCPM bool) error {
pmemobjConfStr, pmemobjConfErr := c.GetEnvVar("PMEMOBJ_CONF")
//also work for empty string
hasSdsAtCreate := strings.Contains(pmemobjConfStr, "sds.at_create")
if isDCPM {
if !hasSdsAtCreate {
return nil
}
// Confirm default handling of shutdown state (SDS) for DCPM storage class.
return errors.New("env_var PMEMOBJ_CONF should NOT contain 'sds.at_create=?' " +
"for DCPM storage class, found '" + pmemobjConfStr + "'")
}

// Disable shutdown state (SDS) (part of RAS) for RAM-based simulated SCM.
if pmemobjConfErr != nil {
c.EnvVars = append(c.EnvVars, "PMEMOBJ_CONF=sds.at_create=0")
return nil
}
if !hasSdsAtCreate {
envVars, _ := common.DeleteKeyValue(c.EnvVars, "PMEMOBJ_CONF")
c.EnvVars = append(envVars, "PMEMOBJ_CONF="+pmemobjConfStr+
";sds.at_create=0")
return nil
}
if strings.Contains(pmemobjConfStr, "sds.at_create=1") {
return errors.New("env_var PMEMOBJ_CONF should contain 'sds.at_create=0' " +
"for non-DCPM storage class, found '" + pmemobjConfStr + "'")
}
return nil
}

// Ensure proper environment variables for PMDK w/ NDCTL enabled based on
// the actual configuration of the storage class.
func (c *Config) UpdatePMDKEnvars() error {

if len(c.Storage.Tiers) == 0 {
return errors.New("Invalid config - no tier 0 defined")
}

isDCPM := c.Storage.Tiers[0].Class == storage.ClassDcpm

if err := c.UpdatePMDKEnvarsPMemobjConf(isDCPM); err != nil {
return err
}

if isDCPM {
return c.UpdatePMDKEnvarsStackSizeDCPM()
}
return nil
}

Expand Down Expand Up @@ -690,3 +766,13 @@ func (c *Config) WithStorageIndex(i uint32) *Config {
c.Storage.EngineIdx = uint(i)
return c
}

// WithEnvVarAbtThreadStackSize sets environment variable ABT_THREAD_STACKSIZE.
func (c *Config) WithEnvVarAbtThreadStackSize(stack_size uint16) *Config {
return c.WithEnvVars(fmt.Sprintf("ABT_THREAD_STACKSIZE=%d", stack_size))
}

// WithEnvVarPMemObjSdsAtCreate sets PMEMOBJ_CONF env. var. to sds.at_create=0/1 value
func (c *Config) WithEnvVarPMemObjSdsAtCreate(value uint8) *Config {
return c.WithEnvVars(fmt.Sprintf("PMEMOBJ_CONF=sds.at_create=%d", value))
}
Loading

0 comments on commit 46b5d32

Please sign in to comment.