Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[teamd] Add Warm-reboot startup and shutdown mode for teamd #2173

Merged
merged 3 commits into from
Nov 6, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
354 changes: 354 additions & 0 deletions src/libteam/0005-libteam-Add-warm_reboot-mode.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,354 @@
diff --git a/libteam/ifinfo.c b/libteam/ifinfo.c
index 72155ae..266cc21 100644
--- a/libteam/ifinfo.c
+++ b/libteam/ifinfo.c
@@ -58,6 +58,7 @@ struct team_ifinfo {
char ifname[IFNAMSIZ];
uint32_t master_ifindex;
bool admin_state;
+ bool orig_addr_updated; /* FIXME: Check this. I think we don't need this flag */
Copy link
Collaborator

@qiluo-msft qiluo-msft Oct 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    [](start = 1, length = 8)

Better follow existing code's indentation style. #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this chunk was removed. It was needed only if netdev wasn't removed

#define MAX_PHYS_PORT_ID_LEN 32
char phys_port_id[MAX_PHYS_PORT_ID_LEN];
size_t phys_port_id_len;
@@ -105,15 +106,17 @@ static void update_hwaddr(struct team_ifinfo *ifinfo, struct rtnl_link *link)
hwaddr_len = nl_addr_get_len(nl_addr);
if (ifinfo->hwaddr_len != hwaddr_len) {
ifinfo->hwaddr_len = hwaddr_len;
- if (!ifinfo->master_ifindex)
+ if (!ifinfo->orig_addr_updated)
ifinfo->orig_hwaddr_len = hwaddr_len;
set_changed(ifinfo, CHANGED_HWADDR_LEN);
}
hwaddr = nl_addr_get_binary_addr(nl_addr);
if (memcmp(ifinfo->hwaddr, hwaddr, hwaddr_len)) {
memcpy(ifinfo->hwaddr, hwaddr, hwaddr_len);
- if (!ifinfo->master_ifindex)
+ if (!ifinfo->orig_addr_updated) {
memcpy(ifinfo->orig_hwaddr, hwaddr, hwaddr_len);
+ ifinfo->orig_addr_updated = true;
+ }
set_changed(ifinfo, CHANGED_HWADDR);
}
}
diff --git a/teamd/teamd.c b/teamd/teamd.c
index c987333..37c2fb3 100644
--- a/teamd/teamd.c
+++ b/teamd/teamd.c
@@ -116,7 +116,9 @@ static void print_help(const struct teamd_context *ctx) {
" -D --dbus-enable Enable D-Bus interface\n"
" -Z --zmq-enable=ADDRESS Enable ZeroMQ interface\n"
" -U --usock-enable Enable UNIX domain socket interface\n"
- " -u --usock-disable Disable UNIX domain socket interface\n",
+ " -u --usock-disable Disable UNIX domain socket interface\n"
+ " -w --warm-reboot Warm-reboot startup mode\n"
Copy link
Collaborator

@qiluo-msft qiluo-msft Oct 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warm-reboot [](start = 23, length = 11)

I believe these was discussion on the terminology. Use 'warm-start'? @lguohan #Closed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree, warm-start is better

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

+ " -L --lacp-directory Directory for saving lacp pdu dumps\n",
Copy link
Collaborator

@qiluo-msft qiluo-msft Oct 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lacp-directory [](start = 23, length = 14)

Suggest users specify the lacp-filename directly. #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not that easy. We have not just one name, but list of names, one filename per interface from LAG.

ctx->argv0);
printf("Available runners: ");
for (i = 0; i < TEAMD_RUNNER_LIST_SIZE; i++) {
@@ -149,10 +151,12 @@ static int parse_command_line(struct teamd_context *ctx,
{ "zmq-enable", required_argument, NULL, 'Z' },
{ "usock-enable", no_argument, NULL, 'U' },
{ "usock-disable", no_argument, NULL, 'u' },
+ { "warm-reboot", no_argument, NULL, 'w' },
+ { "lacp-directory", required_argument, NULL, 'L' },
{ NULL, 0, NULL, 0 }
};

- while ((opt = getopt_long(argc, argv, "hdkevf:c:p:groNt:nDZ:Uu",
+ while ((opt = getopt_long(argc, argv, "hdkevf:c:p:groNt:nDZ:UuwL:",
long_options, NULL)) >= 0) {

switch(opt) {
@@ -230,6 +234,12 @@ static int parse_command_line(struct teamd_context *ctx,
case 'u':
ctx->usock.enabled = false;
break;
+ case 'w':
+ ctx->warm_reboot = true;
+ break;
+ case 'L':
+ ctx->lacp_directory = strdup(optarg);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make sure the directory exist here? we do not want to get error msg when we try to dump the lacp pdu to disk, if directory does not exist, we'd like to know the error msg early.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What to do after the error message? exit?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Just error message, no exit

+ break;
default:
return -1;
}
@@ -384,8 +394,14 @@ static int teamd_run_loop_run(struct teamd_context *ctx)
if (err != -1) {
switch(ctrl_byte) {
case 'q':
+ case 'w':
if (quit_in_progress)
return -EBUSY;
+ if (ctrl_byte == 'w') {
+ ctx->keep_ports = true;
+ ctx->no_quit_destroy = true;
+ teamd_ports_flush_data(ctx);
+ }
teamd_refresh_ports(ctx);
err = teamd_flush_ports(ctx);
if (err)
@@ -428,6 +444,12 @@ void teamd_run_loop_quit(struct teamd_context *ctx, int err)
teamd_run_loop_sent_ctrl_byte(ctx, 'q');
}

+static void teamd_run_loop_quit_w_boot(struct teamd_context *ctx, int err)
+{
+ ctx->run_loop.err = err;
+ teamd_run_loop_sent_ctrl_byte(ctx, 'w');
+}
+
void teamd_run_loop_restart(struct teamd_context *ctx)
{
teamd_run_loop_sent_ctrl_byte(ctx, 'r');
@@ -694,6 +716,10 @@ static int callback_daemon_signal(struct teamd_context *ctx, int events,
teamd_log_warn("Got SIGINT, SIGQUIT or SIGTERM.");
Copy link
Collaborator

@qiluo-msft qiluo-msft Oct 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SIGQUIT [](start = 31, length = 7)

We may reuse sigterm instead of sigusr1.
If lacp-directory is provided, we can save state, otherwise just normal quit.

It's no harm to save for cold system reboot or cold docker restart. #Closed

Copy link
Contributor Author

@pavel-shirshov pavel-shirshov Nov 1, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warm stop is not about file saving only. It includes other activities, like remove portchannel interface, put down physical ports. Also I think explicit signal is better than changed behavior as side effect of some setting.

teamd_run_loop_quit(ctx, 0);
break;
+ case SIGUSR1:
+ teamd_log_warn("Got SIGUSR1.");
+ teamd_run_loop_quit_w_boot(ctx, 0);
+ break;
}
return 0;
}
@@ -1507,7 +1533,7 @@ static int teamd_start(struct teamd_context *ctx, enum teamd_exit_code *p_ret)
return -errno;
}

- if (daemon_signal_init(SIGINT, SIGTERM, SIGQUIT, SIGHUP, 0) < 0) {
+ if (daemon_signal_init(SIGINT, SIGTERM, SIGQUIT, SIGHUP, SIGUSR1, 0) < 0) {
teamd_log_err("Could not register signal handlers.");
daemon_retval_send(errno);
err = -errno;
diff --git a/teamd/teamd.h b/teamd/teamd.h
index ef0fb1c..503d9db 100644
--- a/teamd/teamd.h
+++ b/teamd/teamd.h
@@ -125,6 +125,9 @@ struct teamd_context {
char * hwaddr;
uint32_t hwaddr_len;
bool hwaddr_explicit;
+ bool warm_reboot;
+ bool keep_ports;
+ char * lacp_directory;
struct {
struct list_item callback_list;
int ctrl_pipe_r;
@@ -191,12 +194,15 @@ struct teamd_event_watch_ops {
struct teamd_port *tdport, void *priv);
void (*refresh)(struct teamd_context *ctx,
struct teamd_port *tdport, void *priv);
+ void (*port_flush_data)(struct teamd_context *ctx,
+ struct teamd_port *tdport, void *priv);
int (*option_changed)(struct teamd_context *ctx,
struct team_option *option, void *priv);
char *option_changed_match_name;
};

void teamd_refresh_ports(struct teamd_context *ctx);
+void teamd_ports_flush_data(struct teamd_context *ctx);
int teamd_event_port_added(struct teamd_context *ctx,
struct teamd_port *tdport);
void teamd_event_port_removed(struct teamd_context *ctx,
diff --git a/teamd/teamd_events.c b/teamd/teamd_events.c
index 5c2ef56..50e5a08 100644
--- a/teamd/teamd_events.c
+++ b/teamd/teamd_events.c
@@ -47,6 +47,19 @@ void teamd_refresh_ports(struct teamd_context *ctx)
}
}

+void teamd_ports_flush_data(struct teamd_context *ctx)
+{
+ struct teamd_port *tdport;
+ struct event_watch_item *watch;
+
+ teamd_for_each_tdport(tdport, ctx) {
+ list_for_each_node_entry(watch, &ctx->event_watch_list, list) {
+ if (!watch->ops->port_flush_data) continue;
+ watch->ops->port_flush_data(ctx, tdport, watch->priv);
+ }
+ }
+}
+
int teamd_event_port_added(struct teamd_context *ctx,
struct teamd_port *tdport)
{
diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c
index 81324de..45d667d 100644
--- a/teamd/teamd_runner_lacp.c
+++ b/teamd/teamd_runner_lacp.c
@@ -174,6 +174,9 @@ struct lacp_port {
struct lacp_port *agg_lead; /* leading port of aggregator.
* NULL in case this port is not selected */
enum lacp_port_state state;
+ bool lacpdu_received;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't see it being used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. You're absolutely right. I've found this issue too, but didn't have a chance to fix it. I'm going to fix it on the next iteration.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

+ bool lacpdu_saved;
+ struct lacpdu last_pdu;
struct {
uint32_t speed;
uint8_t duplex;
@@ -1084,26 +1087,23 @@ static int lacpdu_send(struct lacp_port *lacp_port)
return err;
}

-static int lacpdu_recv(struct lacp_port *lacp_port)
+static int lacpdu_process(struct lacp_port *lacp_port, struct lacpdu* lacpdu)
{
- struct lacpdu lacpdu;
- struct sockaddr_ll ll_from;
int err;

- err = teamd_recvfrom(lacp_port->sock, &lacpdu, sizeof(lacpdu), 0,
- (struct sockaddr *) &ll_from, sizeof(ll_from));
- if (err <= 0)
- return err;
-
- if (!lacpdu_check(&lacpdu)) {
+ if (!lacpdu_check(lacpdu)) {
teamd_log_warn("malformed LACP PDU came.");
return 0;
}

+ /* save received lacp pdu frame */
+ (void)memcpy(&lacp_port->last_pdu, lacpdu, sizeof(struct lacpdu));
+ lacp_port->lacpdu_saved = true;
+
/* Check if we have correct info about the other side */
- if (memcmp(&lacpdu.actor, &lacp_port->partner,
+ if (memcmp(&lacpdu->actor, &lacp_port->partner,
sizeof(struct lacpdu_info))) {
- lacp_port->partner = lacpdu.actor;
+ lacp_port->partner = lacpdu->actor;
err = lacp_port_partner_update(lacp_port);
if (err)
return err;
@@ -1118,7 +1118,7 @@ static int lacpdu_recv(struct lacp_port *lacp_port)

/* Check if the other side has correct info about us */
if (!lacp_port->periodic_on &&
- memcmp(&lacpdu.partner, &lacp_port->actor,
+ memcmp(&lacpdu->partner, &lacp_port->actor,
sizeof(struct lacpdu_info))) {
err = lacpdu_send(lacp_port);
if (err)
@@ -1133,6 +1133,47 @@ static int lacpdu_recv(struct lacp_port *lacp_port)
return 0;
}

+static int lacpdu_recv(struct lacp_port *lacp_port)
+{
+ struct lacpdu lacpdu;
+ struct sockaddr_ll ll_from;
+ int err;
+
+ err = teamd_recvfrom(lacp_port->sock, &lacpdu, sizeof(lacpdu), 0,
+ (struct sockaddr *) &ll_from, sizeof(ll_from));
+ if (err <= 0)
+ return err;
+
+ return lacpdu_process(lacp_port, &lacpdu);
+}
+
+static int lacpdu_read(struct lacp_port *lacp_port)
+{
+ FILE* fp;
+ char path[PATH_MAX];
+ struct lacpdu lacpdu;
+ int err;
+
+ strcpy(path, lacp_port->ctx->lacp_directory);
+ if (path[strlen(path) - 1] != '/')
+ strcat(path, "/"); /* Add trailing slash if we don't have one in the path */
+ strcat(path, lacp_port->tdport->ifname);
+ fp = fopen(path, "r");
+ if (!fp) {
+ teamd_log_err("Can't open lacp-saved dump from file %s: %s", path, strerror(errno));
+ return errno;
+ }
+
+ err = fread(&lacpdu, sizeof(struct lacpdu), 1, fp);
+ (void)fclose(fp);
+ if (err <= 0) {
+ teamd_log_err("Can't read lacp-saved dump from file %s: %s", path, strerror(errno));
+ return err;
+ }
+
+ return lacpdu_process(lacp_port, &lacpdu);
Copy link
Collaborator

@qiluo-msft qiluo-msft Oct 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    [](start = 1, length = 8)

less indentation? #Closed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually you use 8 blanks, and original style is tab.


In reply to: 229898243 [](ancestors = 229898243)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fixed now

+}
+
static int lacp_callback_timeout(struct teamd_context *ctx, int events,
void *priv)
{
@@ -1299,6 +1340,11 @@ static int lacp_port_added(struct teamd_context *ctx,
lacp_port_actor_init(lacp_port);
lacp_port_link_update(lacp_port);

+ /* Read data from file and process it */
+ if (ctx->warm_reboot && ctx->lacp_directory) {
+ (void)lacpdu_read(lacp_port);
+ }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When will the dumped data file for each port be removed? erase after read or upon port remove, or outside of the teamd process? Leaving the file permanently there might cause confusion if there are multiple reboot/restart (Mixed warm/cold).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should be outside program to remove the file, teamd already close it and read it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My design is to remove the files right after the read. The information in the file is outdated right after the read.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in the latest patch

+
teamd_loop_callback_enable(ctx, LACP_SOCKET_CB_NAME, lacp_port);
return 0;

@@ -1321,7 +1367,11 @@ static void lacp_port_removed(struct teamd_context *ctx,
{
struct lacp_port *lacp_port = priv;

- lacp_port_set_state(lacp_port, PORT_STATE_DISABLED);
+ if (!lacp_port->ctx->keep_ports) {
+ /* Don't transition into DISABLED state,
+ which sends EXPIRED LACP PDU update */
+ lacp_port_set_state(lacp_port, PORT_STATE_DISABLED);
+ }
teamd_loop_callback_del(ctx, LACP_TIMEOUT_CB_NAME, lacp_port);
teamd_loop_callback_del(ctx, LACP_PERIODIC_CB_NAME, lacp_port);
teamd_loop_callback_del(ctx, LACP_SOCKET_CB_NAME, lacp_port);
@@ -1413,6 +1463,29 @@ static void lacp_event_watch_refresh(struct teamd_context *ctx, struct teamd_por
(void) lacpdu_send(lacp_port);
}

+static void lacp_event_watch_port_flush_data(struct teamd_context *ctx, struct teamd_port *tdport, void *priv)
+{
+ struct lacp *lacp = priv;
+
+ struct lacp_port *lacp_port = lacp_port_get(lacp, tdport);
+ if(lacp_port->lacpdu_saved && lacp_port->ctx->lacp_directory) {
+ char filename[PATH_MAX];
+ strcpy(filename, lacp_port->ctx->lacp_directory);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in case the directory is not ended with '/', you may want to add here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed

+ strcat(filename, lacp_port->tdport->ifname);
+ FILE *fp = fopen(filename, "wb");
+ if (fp != NULL) {
+ (void)fwrite(&lacp_port->last_pdu, sizeof(struct lacpdu), 1, fp);
+ (void)fclose(fp);
+ } else {
+ teamd_log_err("Can't open file %s for writing %s", filename, strerror(errno));
+ }
+ } else {
+ teamd_log_err("Can't dump received lacp pdu for port %s. "
+ "Either it wasn't received, or directory to save wasn't configured",
+ lacp_port->tdport->ifname);
+ }
+}
+
static const struct teamd_event_watch_ops lacp_event_watch_ops = {
.hwaddr_changed = lacp_event_watch_hwaddr_changed,
.port_added = lacp_event_watch_port_added,
@@ -1420,6 +1493,7 @@ static const struct teamd_event_watch_ops lacp_event_watch_ops = {
.port_changed = lacp_event_watch_port_changed,
.admin_state_changed = lacp_event_watch_admin_state_changed,
.refresh = lacp_event_watch_refresh,
+ .port_flush_data = lacp_event_watch_port_flush_data,
};

static int lacp_carrier_init(struct teamd_context *ctx, struct lacp *lacp)
@@ -1946,7 +2020,7 @@ static void lacp_fini(struct teamd_context *ctx, void *priv)
teamd_state_val_unregister(ctx, &lacp_state_vg, lacp);
teamd_balancer_fini(lacp->tb);
teamd_event_watch_unregister(ctx, &lacp_event_watch_ops, lacp);
- lacp_carrier_fini(ctx, lacp);
+ if (!ctx->keep_ports) lacp_carrier_fini(ctx, lacp);
}

const struct teamd_runner teamd_runner_lacp = {
1 change: 1 addition & 0 deletions src/libteam/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
git apply ../0002-libteam-Temporarily-remove-redundant-debug-mes.patch
git apply ../0003-teamd-lacp-runner-will-send-lacp-update-right-after-.patch
git apply ../0004-libteam-Add-lacp-fallback-support-for-single-member-.patch
git apply ../0005-libteam-Add-warm_reboot-mode.patch
popd

# Obtain debian packaging
Expand Down