Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into DAOS-4441
Browse files Browse the repository at this point in the history
  • Loading branch information
PetFet committed Apr 22, 2020
2 parents 1919e92 + ab5f529 commit 1f1edc7
Show file tree
Hide file tree
Showing 40 changed files with 1,646 additions and 1,572 deletions.
11 changes: 10 additions & 1 deletion src/cart/src/cart/crt_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ dump_envariables(void)
"D_LOG_FILE_APPEND_PID", "D_LOG_MASK", "DD_MASK",
"DD_STDERR", "DD_SUBSYS", "CRT_TIMEOUT", "CRT_ATTACH_INFO_PATH",
"OFI_PORT", "OFI_INTERFACE", "OFI_DOMAIN", "CRT_CREDIT_EP_CTX",
"CRT_CTX_SHARE_ADDR", "CRT_CTX_NUM", "D_FI_CONFIG"};
"CRT_CTX_SHARE_ADDR", "CRT_CTX_NUM", "D_FI_CONFIG",
"FI_UNIVERSE_SIZE"};

D_DEBUG(DB_ALL, "-- ENVARS: --\n");
for (i = 0; i < ARRAY_SIZE(envars); i++) {
Expand All @@ -71,6 +72,7 @@ static int data_init(crt_init_options_t *opt)
uint32_t credits;
bool share_addr = false;
uint32_t ctx_num = 1;
uint32_t fi_univ_size = 0;
int rc = 0;

D_DEBUG(DB_ALL, "initializing crt_gdata...\n");
Expand Down Expand Up @@ -121,6 +123,13 @@ static int data_init(crt_init_options_t *opt)
d_getenv_int("CRT_CREDIT_EP_CTX", &credits);
}

/* This is a workaround for CART-871 if universe size is not set */
d_getenv_int("FI_UNIVERSE_SIZE", &fi_univ_size);
if (fi_univ_size == 0) {
D_WARN("FI_UNIVERSE_SIZE was not set; setting to 2048\n");
setenv("FI_UNIVERSE_SIZE", "2048", 1);
}

if (credits == 0) {
D_DEBUG(DB_ALL, "CRT_CREDIT_EP_CTX set as 0, flow control "
"disabled.\n");
Expand Down
1 change: 1 addition & 0 deletions src/client/api/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ const struct daos_task_api dc_funcs[] = {
{dc_tx_abort, sizeof(daos_tx_abort_t)},
{dc_tx_open_snap, sizeof(daos_tx_open_snap_t)},
{dc_tx_close, sizeof(daos_tx_close_t)},
{dc_tx_restart, sizeof(daos_tx_restart_t)},

/** Object */
{dc_obj_register_class, sizeof(daos_obj_register_class_t)},
Expand Down
3 changes: 2 additions & 1 deletion src/client/api/task_internal.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2017-2019 Intel Corporation.
* (C) Copyright 2017-2020 Intel Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -80,6 +80,7 @@ struct daos_task_args {
daos_tx_commit_t tx_commit;
daos_tx_abort_t tx_abort;
daos_tx_close_t tx_close;
daos_tx_restart_t tx_restart;

/** Object */
daos_obj_register_class_t obj_reg_class;
Expand Down
24 changes: 22 additions & 2 deletions src/client/api/tx.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2018 Intel Corporation.
* (C) Copyright 2018-2020 Intel Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,7 +28,8 @@
#include "task_internal.h"

int
daos_tx_open(daos_handle_t coh, daos_handle_t *th, daos_event_t *ev)
daos_tx_open(daos_handle_t coh, daos_handle_t *th, uint64_t flags,
daos_event_t *ev)
{
daos_tx_open_t *args;
tse_task_t *task;
Expand All @@ -42,6 +43,7 @@ daos_tx_open(daos_handle_t coh, daos_handle_t *th, daos_event_t *ev)
args = dc_task_get_args(task);
args->coh = coh;
args->th = th;
args->flags = flags;

return dc_task_schedule(task, true);
}
Expand Down Expand Up @@ -121,6 +123,24 @@ daos_tx_open_snap(daos_handle_t coh, daos_epoch_t epoch, daos_handle_t *th,
return dc_task_schedule(task, true);
}

int
daos_tx_restart(daos_handle_t th, daos_event_t *ev)
{
daos_tx_restart_t *args;
tse_task_t *task;
int rc;

DAOS_API_ARG_ASSERT(*args, TX_RESTART);
rc = dc_task_create(dc_tx_restart, NULL, ev, &task);
if (rc)
return rc;

args = dc_task_get_args(task);
args->th = th;

return dc_task_schedule(task, true);
}

int
daos_tx_local2global(daos_handle_t th, d_iov_t *glob)
{
Expand Down
17 changes: 16 additions & 1 deletion src/client/pydaos/raw/daos_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1669,7 +1669,7 @@ def get_new_tx(self):
c_tx = ctypes.c_uint64(txn)

func = self.context.get_function('open-tx')
ret = func(self.coh, ctypes.byref(c_tx), None)
ret = func(self.coh, ctypes.byref(c_tx), 0, None)
if ret != 0:
raise DaosApiError("tx open returned non-zero. RC: {0}"
.format(ret))
Expand Down Expand Up @@ -1716,6 +1716,20 @@ def abort_tx(self, txn):
raise DaosApiError("TX abort returned non-zero. RC: {0}"
.format(ret))

def restart_tx(self, txn):
"""Restart a transaction that is being modified."""
# container should be in open state
if self.coh == 0:
raise DaosApiError("Container needs to be opened.")

c_tx = ctypes.c_uint64(txn)

func = self.context.get_function('restart-tx')
ret = func(c_tx, None)
if ret != 0:
raise DaosApiError("TX restart returned non-zero. RC: {0}"
.format(ret))

def write_an_array_value(self, datalist, dkey, akey, obj=None, rank=None,
obj_cls=None, txn=daos_cref.DAOS_TX_NONE):
"""Write an array of data to an object.
Expand Down Expand Up @@ -2303,6 +2317,7 @@ def __init__(self, path):
'query-obj': self.libdaos.daos_obj_query,
'query-pool': self.libdaos.daos_pool_query,
'query-target': self.libdaos.daos_pool_query_target,
'restart-tx': self.libdaos.daos_tx_restart,
'set-cont-attr': self.libdaos.daos_cont_set_attr,
'set-pool-attr': self.libdaos.daos_pool_set_attr,
'stop-service': self.libdaos.daos_pool_stop_svc,
Expand Down
7 changes: 7 additions & 0 deletions src/container/cli_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,13 @@ dc_tx_close(tse_task_t *task)
return rc;
}

int
dc_tx_restart(tse_task_t *task)
{
/* TBD */
return 0;
}

/*
* MSC - this is a temporary special TX for rebuild that needs to use the client
* stack with a specific epoch.
Expand Down
55 changes: 28 additions & 27 deletions src/control/server/ctl_storage_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import (
)

const (
msgFormatErr = "failure formatting storage, check RPC response for details"
msgFormatErr = "instance %d: failure formatting storage, check RPC response for details"
msgNvmeFormatSkip = "NVMe format skipped on instance %d as SCM format did not complete"
)

Expand Down Expand Up @@ -209,7 +209,7 @@ func newCret(log logging.Logger, op, pciAddr string, status ctlpb.ResponseStatus
}
}

func (c *ControlService) scmFormat(scmCfg storage.ScmConfig, reformat bool) (*ctlpb.ScmMountResult, error) {
func (c *ControlService) scmFormat(srvIdx uint32, scmCfg storage.ScmConfig, reformat bool) (*ctlpb.ScmMountResult, error) {
var eMsg, iMsg string
status := ctlpb.ResponseStatus_CTL_SUCCESS

Expand All @@ -219,7 +219,7 @@ func (c *ControlService) scmFormat(scmCfg storage.ScmConfig, reformat bool) (*ct
}

scmStr := fmt.Sprintf("SCM (%s:%s)", scmCfg.Class, scmCfg.MountPoint)
c.log.Infof("Starting format of %s", scmStr)
c.log.Infof("Instance %d: starting format of %s", srvIdx, scmStr)
res, err := c.scm.Format(*req)
if err != nil {
eMsg = err.Error()
Expand All @@ -235,28 +235,34 @@ func (c *ControlService) scmFormat(scmCfg storage.ScmConfig, reformat bool) (*ct
if err != nil {
c.log.Errorf(" format of %s failed: %s", scmStr, err)
}
c.log.Infof("Finished format of %s", scmStr)
c.log.Infof("Instance %d: finished format of %s", srvIdx, scmStr)

return newMntRet(c.log, "format", scmCfg.MountPoint, status, eMsg, iMsg), nil
}

// doFormat performs format on storage subsystems, populates response results
// in storage subsystem routines and broadcasts (closes channel) if successful.
func (c *ControlService) doFormat(i *IOServerInstance, reformat bool, resp *ctlpb.StorageFormatResp) error {
func (c *ControlService) doFormat(srv *IOServerInstance, reformat bool, resp *ctlpb.StorageFormatResp) error {
srvIdx := srv.Index()
needsSuperblock := true
needsScmFormat := reformat
skipNvmeResult := newCret(c.log, "format", "", ctlpb.ResponseStatus_CTL_SUCCESS, "",
fmt.Sprintf(msgNvmeFormatSkip, i.Index()))
fmt.Sprintf(msgNvmeFormatSkip, srvIdx))

c.log.Infof("formatting storage for %s instance %d (reformat: %t)",
DataPlaneName, i.Index(), reformat)
c.log.Infof("Formatting storage for %s instance %d (reformat: %t)",
DataPlaneName, srvIdx, reformat)

scmConfig := i.scmConfig()
scmConfig := srv.scmConfig()

if srv.isStarted() {
return errors.Errorf("instance %d: can't format storage of running instance",
srvIdx)
}

// If not reformatting, check if SCM is already formatted.
if !reformat {
var err error
needsScmFormat, err = i.NeedsScmFormat()
needsScmFormat, err = srv.NeedsScmFormat()
if err != nil {
return errors.Wrap(err, "unable to check storage formatting")
}
Expand All @@ -267,7 +273,7 @@ func (c *ControlService) doFormat(i *IOServerInstance, reformat bool, resp *ctlp
ctlpb.ResponseStatus_CTL_ERR_SCM, err.Error(),
fault.ShowResolutionFor(err)))

if len(i.bdevConfig().DeviceList) > 0 {
if len(srv.bdevConfig().DeviceList) > 0 {
resp.Crets = append(resp.Crets, skipNvmeResult)
}

Expand All @@ -277,15 +283,15 @@ func (c *ControlService) doFormat(i *IOServerInstance, reformat bool, resp *ctlp

// When SCM format is required, format and append to response results.
if needsScmFormat {
result, err := c.scmFormat(scmConfig, true)
result, err := c.scmFormat(srvIdx, scmConfig, true)
if err != nil {
return errors.Wrap(err, "scm format") // return unexpected errors
}
resp.Mrets = append(resp.Mrets, result)

if result.State.Status != ctlpb.ResponseStatus_CTL_SUCCESS {
c.log.Error(msgFormatErr)
if len(i.bdevConfig().DeviceList) > 0 {
c.log.Errorf(msgFormatErr, srvIdx)
if len(srv.bdevConfig().DeviceList) > 0 {
resp.Crets = append(resp.Crets, skipNvmeResult)
}

Expand All @@ -294,7 +300,7 @@ func (c *ControlService) doFormat(i *IOServerInstance, reformat bool, resp *ctlp
} else {
var err error
// If SCM was already formatted, verify if superblock exists.
needsSuperblock, err = i.NeedsSuperblock()
needsSuperblock, err = srv.NeedsSuperblock()
if err != nil {
return errors.Wrap(err, "unable to check instance superblock")
}
Expand All @@ -303,12 +309,13 @@ func (c *ControlService) doFormat(i *IOServerInstance, reformat bool, resp *ctlp
// If no superblock exists, format NVMe and populate response with results.
if needsSuperblock {
nvmeResults := proto.NvmeControllerResults{}
bdevConfig := i.bdevConfig()
bdevConfig := srv.bdevConfig()

// A config with SCM and no block devices is valid.
if len(bdevConfig.DeviceList) > 0 {
bdevListStr := strings.Join(bdevConfig.DeviceList, ",")
c.log.Infof("Starting format of %s block devices (%s)", bdevConfig.Class, bdevListStr)
c.log.Infof("Instance %d: starting format of %s block devices (%s)",
srvIdx, bdevConfig.Class, bdevListStr)

res, err := c.bdev.Format(bdev.FormatRequest{
Class: bdevConfig.Class,
Expand All @@ -333,18 +340,19 @@ func (c *ControlService) doFormat(i *IOServerInstance, reformat bool, resp *ctlp
newCret(c.log, "format", dev, ctlpbStatus, errMsg, infoMsg))
}

c.log.Infof("Finished format of %s block devices (%s)", bdevConfig.Class, bdevListStr)
c.log.Infof("Instance %d: finished format of %s block devices (%s)",
srvIdx, bdevConfig.Class, bdevListStr)
}

resp.Crets = append(resp.Crets, nvmeResults...) // append this instance's results

if nvmeResults.HasErrors() {
c.log.Error(msgFormatErr)
c.log.Errorf(msgFormatErr, srvIdx)
return nil // don't continue if we can't format NVMe
}
}

i.NotifyStorageReady()
srv.NotifyStorageReady()

return nil
}
Expand All @@ -364,13 +372,6 @@ func (c *ControlService) StorageFormat(ctx context.Context, req *ctlpb.StorageFo

c.log.Debugf("received StorageFormat RPC %v; proceeding to instance storage format", req)

// TODO: We may want to ease this restriction at some point, but having this
// here for now should help to cut down on shenanigans which might result
// in data loss.
if c.harness.IsStarted() {
return nil, errors.New("cannot format storage with running I/O server instances")
}

// temporary scaffolding
for _, i := range c.harness.Instances() {
if err := c.doFormat(i, req.Reformat, resp); err != nil {
Expand Down
Loading

0 comments on commit 1f1edc7

Please sign in to comment.