From 90395743d30fdde9e1e76c18d8ab5c2f0435c7b8 Mon Sep 17 00:00:00 2001 From: Marco Varlese Date: Wed, 10 Oct 2018 09:38:07 +0200 Subject: [PATCH 01/70] Update .gitreview for stable 18.10 branch Change-Id: I9f3d551acad6fd2fdd733f7f49e8c75ef43ceebc Signed-off-by: Marco Varlese --- .gitreview | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitreview b/.gitreview index 1db08df202d3..98f6b2d8d67d 100644 --- a/.gitreview +++ b/.gitreview @@ -2,3 +2,4 @@ host=gerrit.fd.io port=29418 project=vpp +defaultbranch=stable/1810 From 5551e41f78249b72715203ecd93586f48acccbf4 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 11 Oct 2018 09:18:26 -0700 Subject: [PATCH 02/70] Fix vpp-ext-deps package version in stable branch Change-Id: Ifb33622b50113501f1d23ab94ba9da708678d6be Signed-off-by: Damjan Marion --- build/external/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/external/Makefile b/build/external/Makefile index 8ac764caf25a..1082cfc1c397 100644 --- a/build/external/Makefile +++ b/build/external/Makefile @@ -20,7 +20,7 @@ MAKE_ARGS ?= -j BUILD_DIR ?= $(CURDIR)/_build INSTALL_DIR ?= $(CURDIR)/_install PKG_VERSION ?= $(shell git describe --abbrev=0 | cut -d- -f1 | cut -dv -f2) -PKG_SUFFIX ?= $(shell git log --oneline $$(git describe --abbrev=0).. . | wc -l) +PKG_SUFFIX ?= $(shell git log --oneline v$(PKG_VERSION)-rc0.. . | wc -l) JOBS := $(if $(shell [ -f /proc/cpuinfo ] && head /proc/cpuinfo),\ $(shell grep -c ^processor /proc/cpuinfo), 2) From 376414f4c3f53af44da4e82ee5d0b1843b291f8e Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 10 Oct 2018 16:15:55 +0200 Subject: [PATCH 03/70] vnet: complete the fix for l3_hdr_offset calculation for single loop fastpass case (VPP-1444) 20e6d36b has moved the calculation of the l3_hdr_offset into the determine_next_node() function, with the assumption that the current_data in the buffer is at the L3 header. This is not the case for the single loop fastpath, where the vlib_buffer_advance() call is made after the call to determine_next_node(), as a day1 behavior. As a result - that path incorrectly sets the l3_hdr_offset. Solution: move the vlib_buffer_advance() call to before determine_next_node() Change-Id: Id5eaa084c43fb6564f8239df4a0b3dc0412b15de Signed-off-by: Andrew Yourtchenko --- src/vnet/ethernet/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index 0034577694c7..53d5b4eb02d9 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -657,9 +657,9 @@ ethernet_input_inline (vlib_main_t * vm, (hi->hw_address != 0) && !eth_mac_equal ((u8 *) e0, hi->hw_address)) error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; + vlib_buffer_advance (b0, sizeof (ethernet_header_t)); determine_next_node (em, variant, 0, type0, b0, &error0, &next0); - vlib_buffer_advance (b0, sizeof (ethernet_header_t)); } goto ship_it0; } From 7212e61d925f1025dbefa2811e0e8a704b24108a Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 10 Oct 2018 10:39:36 +0200 Subject: [PATCH 04/70] acl-plugin: reduce the syslog level for debug messages (VPP-1443) Change-Id: Ie8380cb39424548bf64cb19aee59ec20e29d1e39 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/sess_mgmt_node.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c index f38677f8d5a6..a6c5e8049d6a 100644 --- a/src/plugins/acl/sess_mgmt_node.c +++ b/src/plugins/acl/sess_mgmt_node.c @@ -689,7 +689,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } } } - acl_log_err + acl_log_info ("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U, clear_all: %u", format_bitmap_hex, clear_sw_if_index_bitmap, clear_all); vec_foreach (pw0, am->per_worker_data) @@ -727,7 +727,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup (clear_sw_if_index_bitmap); } - acl_log_err + acl_log_info ("ACL_FA_CLEANER: thread %u, pending clear bitmap: %U", (am->per_worker_data - pw0), format_bitmap_hex, pw0->pending_clear_sw_if_index_bitmap); @@ -738,8 +738,9 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, send_interrupts_to_workers (vm, am); /* now wait till they all complete */ - acl_log_err ("CLEANER mains len: %u per-worker len: %d", - vec_len (vlib_mains), vec_len (am->per_worker_data)); + acl_log_info ("CLEANER mains len: %u per-worker len: %d", + vec_len (vlib_mains), + vec_len (am->per_worker_data)); vec_foreach (pw0, am->per_worker_data) { CLIB_MEMORY_BARRIER (); @@ -758,7 +759,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } } } - acl_log_err ("ACL_FA_NODE_CLEAN: cleaning done"); + acl_log_info ("ACL_FA_NODE_CLEAN: cleaning done"); clib_bitmap_free (clear_sw_if_index_bitmap); } am->fa_cleaner_cnt_delete_by_sw_index_ok++; From 33f276e0af41212ae3894101f7182ab4772a71f5 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Thu, 11 Oct 2018 04:28:48 -0700 Subject: [PATCH 05/70] NAT44: identity NAT fix (VPP-1441) Change-Id: Ic4affc54d15d08b9b730f6ec6146ee053b28b4b6 Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 105 +++++++++++++++++++++++++++-------- src/plugins/nat/nat.h | 7 +++ src/plugins/nat/nat_api.c | 19 ++++--- src/plugins/nat/nat_format.c | 19 ++++++- test/test_nat.py | 4 ++ 5 files changed, 122 insertions(+), 32 deletions(-) diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index ae2e64e1f82e..2ebd6834d927 100755 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -631,7 +631,6 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, clib_bihash_kv_8_8_t kv, value; snat_address_t *a = 0; u32 fib_index = ~0; - uword *p; snat_interface_t *interface; int i; snat_main_per_thread_data_t *tsm; @@ -643,6 +642,8 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, u64 user_index; snat_session_t *s; snat_static_map_resolve_t *rp, *rp_match = 0; + nat44_lb_addr_port_t *local; + u8 find = 0; if (!sm->endpoint_dependent) { @@ -732,19 +733,42 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, if (is_add) { if (m) - return VNET_API_ERROR_VALUE_EXIST; + { + if (is_identity_static_mapping (m)) + { + /* *INDENT-OFF* */ + vec_foreach (local, m->locals) + { + if (local->vrf_id == vrf_id) + return VNET_API_ERROR_VALUE_EXIST; + } + /* *INDENT-ON* */ + vec_add2 (m->locals, local, 1); + local->vrf_id = vrf_id; + local->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_PLUGIN_LOW); + m_key.addr = m->local_addr; + m_key.port = m->local_port; + m_key.protocol = m->proto; + m_key.fib_index = local->fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1); + return 0; + } + else + return VNET_API_ERROR_VALUE_EXIST; + } if (twice_nat && addr_only) return VNET_API_ERROR_UNSUPPORTED; /* Convert VRF id to FIB index */ if (vrf_id != ~0) - { - p = hash_get (sm->ip4_main->fib_index_by_table_id, vrf_id); - if (!p) - return VNET_API_ERROR_NO_SUCH_FIB; - fib_index = p[0]; - } + fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_PLUGIN_LOW); /* If not specified use inside VRF id from SNAT plugin startup config */ else { @@ -752,7 +776,7 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, vrf_id = sm->inside_vrf_id; } - if (!out2in_only) + if (!(out2in_only || identity_nat)) { m_key.addr = l_addr; m_key.port = addr_only ? 0 : l_port; @@ -825,15 +849,23 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, m->tag = vec_dup (tag); m->local_addr = l_addr; m->external_addr = e_addr; - m->vrf_id = vrf_id; - m->fib_index = fib_index; m->twice_nat = twice_nat; if (out2in_only) m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY; if (addr_only) m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY; if (identity_nat) - m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT; + { + m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT; + vec_add2 (m->locals, local, 1); + local->vrf_id = vrf_id; + local->fib_index = fib_index; + } + else + { + m->vrf_id = vrf_id; + m->fib_index = fib_index; + } if (!addr_only) { m->local_port = l_port; @@ -855,7 +887,7 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, m_key.addr = m->local_addr; m_key.port = m->local_port; m_key.protocol = m->proto; - m_key.fib_index = m->fib_index; + m_key.fib_index = fib_index; kv.key = m_key.as_u64; kv.value = m - sm->static_mappings; if (!out2in_only) @@ -920,6 +952,25 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, return VNET_API_ERROR_NO_SUCH_ENTRY; } + if (identity_nat) + { + for (i = 0; i < vec_len (m->locals); i++) + { + if (m->locals[i].vrf_id == vrf_id) + { + find = 1; + break; + } + } + if (!find) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + fib_index = m->locals[i].fib_index; + vec_del1 (m->locals, i); + } + else + fib_index = m->fib_index; + /* Free external address port */ if (!(addr_only || sm->static_mapping_only || out2in_only)) { @@ -958,23 +1009,17 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, m_key.addr = m->local_addr; m_key.port = m->local_port; m_key.protocol = m->proto; - m_key.fib_index = m->fib_index; + m_key.fib_index = fib_index; kv.key = m_key.as_u64; if (!out2in_only) clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0); - m_key.addr = m->external_addr; - m_key.port = m->external_port; - m_key.fib_index = 0; - kv.key = m_key.as_u64; - clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0); - /* Delete session(s) for static mapping if exist */ if (!(sm->static_mapping_only) || (sm->static_mapping_only && sm->static_mapping_connection_tracking)) { u_key.addr = m->local_addr; - u_key.fib_index = m->fib_index; + u_key.fib_index = fib_index; kv.key = u_key.as_u64; if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { @@ -1018,6 +1063,16 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, } } + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_PLUGIN_LOW); + if (vec_len (m->locals)) + return 0; + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.fib_index = 0; + kv.key = m_key.as_u64; + clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0); + vec_free (m->tag); vec_free (m->workers); /* Delete static mapping from pool */ @@ -1137,6 +1192,7 @@ nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, m->external_port = e_port; m->proto = proto; m->twice_nat = twice_nat; + m->flags |= NAT_STATIC_MAPPING_FLAG_LB; if (out2in_only) m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY; m->affinity = affinity; @@ -1205,6 +1261,9 @@ nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, if (!m) return VNET_API_ERROR_NO_SUCH_ENTRY; + if (!is_lb_static_mapping (m)) + return VNET_API_ERROR_INVALID_VALUE; + /* Free external address port */ if (!(sm->static_mapping_only || out2in_only)) { @@ -2041,7 +2100,7 @@ snat_static_mapping_match (snat_main_t * sm, if (by_external) { - if (vec_len (m->locals)) + if (is_lb_static_mapping (m)) { if (PREDICT_FALSE (lb != 0)) *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT; @@ -2612,7 +2671,7 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index) (&sm->static_mapping_by_external, &kv, &value)) { m = pool_elt_at_index (sm->static_mappings, value.value); - if (!vec_len (m->locals)) + if (!is_lb_static_mapping (m)) return m->workers[0]; hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) + diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 0549acdba3af..134672039172 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -183,6 +183,7 @@ typedef enum #define NAT_STATIC_MAPPING_FLAG_ADDR_ONLY 1 #define NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY 2 #define NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT 4 +#define NAT_STATIC_MAPPING_FLAG_LB 8 /* *INDENT-OFF* */ typedef CLIB_PACKED(struct @@ -666,6 +667,12 @@ unformat_function_t unformat_snat_protocol; */ #define is_identity_static_mapping(sm) (sm->flags & NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT) +/** \brief Check if NAT static mapping is load-balancing. + @param sm NAT static mapping + @return 1 if load-balancing +*/ +#define is_lb_static_mapping(sm) (sm->flags & NAT_STATIC_MAPPING_FLAG_LB) + /* logging */ #define nat_log_err(...) \ vlib_log(VLIB_LOG_LEVEL_ERR, snat_main.log_class, __VA_ARGS__) diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index 8ad5c6652cad..4727826049bf 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -1100,7 +1100,7 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ - if (!is_identity_static_mapping(m) && !vec_len (m->locals)) + if (!is_identity_static_mapping(m) && !is_lb_static_mapping (m)) send_nat44_static_mapping_details (m, reg, mp->context); })); /* *INDENT-ON* */ @@ -1181,17 +1181,17 @@ static void *vl_api_nat44_add_del_identity_mapping_t_print if (mp->addr_only == 0) s = - format (s, "protocol %d port %d", mp->protocol, + format (s, " protocol %d port %d", mp->protocol, clib_net_to_host_u16 (mp->port)); if (mp->vrf_id != ~0) - s = format (s, "vrf %d", clib_net_to_host_u32 (mp->vrf_id)); + s = format (s, " vrf %d", clib_net_to_host_u32 (mp->vrf_id)); FINISH; } static void -send_nat44_identity_mapping_details (snat_static_mapping_t * m, +send_nat44_identity_mapping_details (snat_static_mapping_t * m, int index, vl_api_registration_t * reg, u32 context) { vl_api_nat44_identity_mapping_details_t *rmp; @@ -1205,7 +1205,7 @@ send_nat44_identity_mapping_details (snat_static_mapping_t * m, clib_memcpy (rmp->ip_address, &(m->local_addr), 4); rmp->port = htons (m->local_port); rmp->sw_if_index = ~0; - rmp->vrf_id = htonl (m->vrf_id); + rmp->vrf_id = htonl (m->locals[index].vrf_id); rmp->protocol = snat_proto_to_ip_proto (m->proto); rmp->context = context; if (m->tag) @@ -1258,8 +1258,11 @@ static void /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ - if (is_identity_static_mapping(m) && !vec_len (m->locals)) - send_nat44_identity_mapping_details (m, reg, mp->context); + if (is_identity_static_mapping(m) && !is_lb_static_mapping (m)) + { + for (j = 0; j < vec_len (m->locals); j++) + send_nat44_identity_mapping_details (m, j, reg, mp->context); + } })); /* *INDENT-ON* */ @@ -1689,7 +1692,7 @@ static void /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ - if (vec_len(m->locals)) + if (is_lb_static_mapping(m)) send_nat44_lb_static_mapping_details (m, reg, mp->context); })); /* *INDENT-ON* */ diff --git a/src/plugins/nat/nat_format.c b/src/plugins/nat/nat_format.c index a4b62b6e9773..5ce00dcb1d04 100644 --- a/src/plugins/nat/nat_format.c +++ b/src/plugins/nat/nat_format.c @@ -220,6 +220,23 @@ format_snat_static_mapping (u8 * s, va_list * args) snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *); nat44_lb_addr_port_t *local; + if (is_identity_static_mapping (m)) + { + if (is_addr_only_static_mapping (m)) + s = format (s, "identity mapping %U", + format_ip4_address, &m->local_addr); + else + s = format (s, "identity mapping %U:%d", + format_ip4_address, &m->local_addr, m->local_port); + + /* *INDENT-OFF* */ + vec_foreach (local, m->locals) + s = format (s, " vrf %d", local->vrf_id); + /* *INDENT-ON* */ + + return s; + } + if (is_addr_only_static_mapping (m)) s = format (s, "local %U external %U vrf %d %s %s", format_ip4_address, &m->local_addr, @@ -230,7 +247,7 @@ format_snat_static_mapping (u8 * s, va_list * args) is_out2in_only_static_mapping (m) ? "out2in-only" : ""); else { - if (vec_len (m->locals)) + if (is_lb_static_mapping (m)) { s = format (s, "%U external %U:%d %s %s", format_snat_protocol, m->proto, diff --git a/test/test_nat.py b/test/test_nat.py index 3b9007f6e93e..e9e7dfa3068b 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -1937,6 +1937,10 @@ def test_identity_nat(self): sessions = self.vapi.nat44_user_session_dump(self.pg0.remote_ip4n, 0) self.assertEqual(len(sessions), 0) + self.vapi.nat44_add_del_identity_mapping(ip=self.pg0.remote_ip4n, + vrf_id=1) + identity_mappings = self.vapi.nat44_identity_mapping_dump() + self.assertEqual(len(identity_mappings), 2) def test_multiple_inside_interfaces(self): """ NAT44 multiple non-overlapping address space inside interfaces """ From 713322bd32a07135a5d16c55bcd909f2d073b8cb Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 10 Oct 2018 13:27:00 +0000 Subject: [PATCH 06/70] Integer underflow and out-of-bounds read (VPP-1442) Change-Id: Ife2a83b9d7f733f36e0e786ef79edcd394d7c0f9 Signed-off-by: Neale Ranns --- src/vlib/buffer_node.h | 13 +++++++++---- src/vppinfra/string.h | 8 ++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h index 93ffb1e9dce8..35e15a5d9196 100644 --- a/src/vlib/buffer_node.h +++ b/src/vlib/buffer_node.h @@ -366,10 +366,15 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node, n_enqueued = count_trailing_zeros (~bitmap) / 2; #else u16 x = 0; - x |= next_index ^ nexts[1]; - x |= next_index ^ nexts[2]; - x |= next_index ^ nexts[3]; - n_enqueued = (x == 0) ? 4 : 1; + if (count + 3 < max) + { + x |= next_index ^ nexts[1]; + x |= next_index ^ nexts[2]; + x |= next_index ^ nexts[3]; + n_enqueued = (x == 0) ? 4 : 1; + } + else + n_enqueued = 1; #endif if (PREDICT_FALSE (n_enqueued > max)) diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h index 8f165dfa18e2..2c794baf71f0 100644 --- a/src/vppinfra/string.h +++ b/src/vppinfra/string.h @@ -356,7 +356,7 @@ clib_count_equal_u64 (u64 * data, uword max_count) #endif count += 2; data += 2; - while (count < max_count - 3 && + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { @@ -424,7 +424,7 @@ clib_count_equal_u32 (u32 * data, uword max_count) #endif count += 2; data += 2; - while (count < max_count - 3 && + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { @@ -492,7 +492,7 @@ clib_count_equal_u16 (u16 * data, uword max_count) #endif count += 2; data += 2; - while (count < max_count - 3 && + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { @@ -560,7 +560,7 @@ clib_count_equal_u8 (u8 * data, uword max_count) #endif count += 2; data += 2; - while (count < max_count - 3 && + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { From 0d222f88edb0ba7011f0d717f075e579beef3570 Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Thu, 11 Oct 2018 22:08:50 +0200 Subject: [PATCH 07/70] Stats: Include stat_segment.h in packages. Change-Id: I976c0aba8397badf64763c4dbddce67009a4fb23 Signed-off-by: Ole Troan --- src/vpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vpp/CMakeLists.txt b/src/vpp/CMakeLists.txt index 601bc0397d62..16843f73e8a3 100644 --- a/src/vpp/CMakeLists.txt +++ b/src/vpp/CMakeLists.txt @@ -82,6 +82,7 @@ add_vpp_executable(vpp add_vpp_headers(vpp api/vpe_msg_enum.h api/vpe_all_api_h.h + stats/stat_segment.h ) ############################################################################## From 125760947a642f0cd5a016deb899a78d83340379 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Thu, 11 Oct 2018 14:09:58 +0300 Subject: [PATCH 08/70] bfd:fix handling session creation batch when multiple session creating script is ran (via exec) only the first one actually starts Change-Id: I0fc36f65795c8921cf180e0b555c446e5a80be45 Signed-off-by: Eyal Bari (cherry picked from commit 0db9b04cf0f9c892a00988e7a61ae703aa83b721) --- src/vnet/bfd/bfd_main.c | 61 +++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 55ea23dea410..bd2addf3b0f3 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -1165,6 +1165,7 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) } } now = clib_cpu_time_now (); + uword *session_index; switch (event_type) { case ~0: /* no events => timeout */ @@ -1180,35 +1181,41 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) * each event or timeout */ break; case BFD_EVENT_NEW_SESSION: - bfd_lock (bm); - if (!pool_is_free_index (bm->sessions, *event_data)) - { - bfd_session_t *bs = - pool_elt_at_index (bm->sessions, *event_data); - bfd_send_periodic (vm, rt, bm, bs, now); - bfd_set_timer (bm, bs, now, 1); - } - else - { - BFD_DBG ("Ignoring event for non-existent session index %u", - (u32) * event_data); - } - bfd_unlock (bm); + vec_foreach (session_index, event_data) + { + bfd_lock (bm); + if (!pool_is_free_index (bm->sessions, *session_index)) + { + bfd_session_t *bs = + pool_elt_at_index (bm->sessions, *session_index); + bfd_send_periodic (vm, rt, bm, bs, now); + bfd_set_timer (bm, bs, now, 1); + } + else + { + BFD_DBG ("Ignoring event for non-existent session index %u", + (u32) * session_index); + } + bfd_unlock (bm); + } break; case BFD_EVENT_CONFIG_CHANGED: - bfd_lock (bm); - if (!pool_is_free_index (bm->sessions, *event_data)) - { - bfd_session_t *bs = - pool_elt_at_index (bm->sessions, *event_data); - bfd_on_config_change (vm, rt, bm, bs, now); - } - else - { - BFD_DBG ("Ignoring event for non-existent session index %u", - (u32) * event_data); - } - bfd_unlock (bm); + vec_foreach (session_index, event_data) + { + bfd_lock (bm); + if (!pool_is_free_index (bm->sessions, *session_index)) + { + bfd_session_t *bs = + pool_elt_at_index (bm->sessions, *session_index); + bfd_on_config_change (vm, rt, bm, bs, now); + } + else + { + BFD_DBG ("Ignoring event for non-existent session index %u", + (u32) * session_index); + } + bfd_unlock (bm); + } break; default: vlib_log_err (bm->log_class, "BUG: event type 0x%wx", event_type); From 9864f87b1bd410a6ef533f34b571e28500ee80f7 Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 9 Oct 2018 21:12:25 -0700 Subject: [PATCH 09/70] vmxnet3: better error handling [VPP-1449] try harder on output - if there is no descriptor space available, try to free up some and check again. make sure we free the buffer if error is encountered on input. Change-Id: I41a45213e29de71935afe707889e515037cd081f Signed-off-by: Steven (cherry picked from commit 8b0995366110ff8c97d1d10aaa8291ad465b0b2f) --- src/plugins/vmxnet3/input.c | 37 +++++++++++++++++++++++++----------- src/plugins/vmxnet3/output.c | 25 +++++++++++++++--------- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c index 4ff459a066ac..9392d57747d8 100644 --- a/src/plugins/vmxnet3/input.c +++ b/src/plugins/vmxnet3/input.c @@ -27,6 +27,7 @@ _(BUFFER_ALLOC, "buffer alloc error") \ _(RX_PACKET_NO_SOP, "Rx packet error - no SOP") \ _(RX_PACKET, "Rx packet error") \ + _(RX_PACKET_EOP, "Rx packet error found on EOP") \ _(NO_BUFFER, "Rx no buffer error") typedef enum @@ -79,7 +80,6 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_trace = vlib_get_trace_count (vm, node); u32 n_rx_packets = 0, n_rx_bytes = 0; vmxnet3_rx_comp *rx_comp; - u32 comp_idx; u32 desc_idx; vmxnet3_rxq_t *rxq; u32 thread_index = vm->thread_index; @@ -98,16 +98,14 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, comp_ring = &rxq->rx_comp_ring; bi = buffer_indices; next = nexts; + rx_comp = &rxq->rx_comp[comp_ring->next]; + while (PREDICT_TRUE (n_rx_packets < VLIB_FRAME_SIZE) && - (comp_ring->gen == - (rxq->rx_comp[comp_ring->next].flags & VMXNET3_RXCF_GEN))) + (comp_ring->gen == (rx_comp->flags & VMXNET3_RXCF_GEN))) { vlib_buffer_t *b0; u32 bi0; - comp_idx = comp_ring->next; - rx_comp = &rxq->rx_comp[comp_idx]; - rid = vmxnet3_find_rid (vd, rx_comp); ring = &rxq->rx_ring[rid]; @@ -117,10 +115,15 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { vlib_error_count (vm, node->node_index, VMXNET3_INPUT_ERROR_NO_BUFFER, 1); + if (hb) + { + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, hb)); + hb = 0; + } + prev_b0 = 0; break; } - vmxnet3_rx_comp_ring_advance_next (rxq); desc_idx = rx_comp->index & VMXNET3_RXC_INDEX; ring->consume = desc_idx; rxd = &rxq->rx_desc[rid][desc_idx]; @@ -146,14 +149,14 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { vlib_buffer_free_one (vm, bi0); vlib_error_count (vm, node->node_index, - VMXNET3_INPUT_ERROR_RX_PACKET, 1); + VMXNET3_INPUT_ERROR_RX_PACKET_EOP, 1); if (hb && vlib_get_buffer_index (vm, hb) != bi0) { vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, hb)); hb = 0; } prev_b0 = 0; - continue; + goto next; } if (rx_comp->index & VMXNET3_RXCI_SOP) @@ -199,7 +202,7 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, hb)); hb = 0; } - continue; + goto next; } } else if (prev_b0) // !sop && !eop @@ -213,7 +216,15 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - ASSERT (0); + vlib_error_count (vm, node->node_index, + VMXNET3_INPUT_ERROR_RX_PACKET, 1); + vlib_buffer_free_one (vm, bi0); + if (hb && vlib_get_buffer_index (vm, hb) != bi0) + { + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, hb)); + hb = 0; + } + goto next; } n_rx_bytes += b0->current_length; @@ -275,6 +286,10 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, hb = 0; got_packet = 0; } + + next: + vmxnet3_rx_comp_ring_advance_next (rxq); + rx_comp = &rxq->rx_comp[comp_ring->next]; } if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)))) diff --git a/src/plugins/vmxnet3/output.c b/src/plugins/vmxnet3/output.c index bcb02949184e..2a8494ed447c 100644 --- a/src/plugins/vmxnet3/output.c +++ b/src/plugins/vmxnet3/output.c @@ -143,15 +143,22 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, } if (PREDICT_FALSE (space_left < space_needed)) { - vlib_buffer_free_one (vm, bi0); - vlib_error_count (vm, node->node_index, - VMXNET3_TX_ERROR_NO_FREE_SLOTS, 1); - buffers++; - n_left--; - /* - * Drop this packet. But we may have enough room for the next packet - */ - continue; + vmxnet3_txq_release (vm, vd, txq); + space_left = vmxnet3_tx_ring_space_left (txq); + + if (PREDICT_FALSE (space_left < space_needed)) + { + vlib_buffer_free_one (vm, bi0); + vlib_error_count (vm, node->node_index, + VMXNET3_TX_ERROR_NO_FREE_SLOTS, 1); + buffers++; + n_left--; + /* + * Drop this packet. But we may have enough room for the next + * packet + */ + continue; + } } /* From 9a5f9c9a43ae6057fe760a59d40443a12af37f90 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 2 Oct 2018 07:27:02 -0700 Subject: [PATCH 10/70] L2-flood: no clone for 1 replication Change-Id: If178dd38e7920f35588f5d821ff097168b078026 Signed-off-by: Neale Ranns (cherry picked from commit b9fa29d513bfad0d9f18e8ed8c2da3feaa6d3bf0) --- src/vnet/l2/l2_flood.c | 128 ++++++++++++++++++++++------------------- test/test_l2_flood.py | 33 +++++++++++ 2 files changed, 102 insertions(+), 59 deletions(-) diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c index 97a4ff59da7b..8908c4312d39 100644 --- a/src/vnet/l2/l2_flood.c +++ b/src/vnet/l2/l2_flood.c @@ -209,77 +209,87 @@ l2flood_node_fn (vlib_main_t * vm, bi0, L2FLOOD_NEXT_DROP); continue; } - - vec_validate (msm->clones[thread_index], n_clones); - vec_reset_length (msm->clones[thread_index]); - - /* - * the header offset needs to be large enough to incorporate - * all the L3 headers that could be touched when doing BVI - * processing. So take the current l2 length plus 2 * IPv6 - * headers (for tunnel encap) - */ - n_cloned = vlib_buffer_clone (vm, bi0, - msm->clones[thread_index], - n_clones, - (vnet_buffer (b0)->l2.l2_len + - sizeof (udp_header_t) + - 2 * sizeof (ip6_header_t))); - - if (PREDICT_FALSE (n_cloned != n_clones)) + else if (n_clones > 1) { - b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL]; - } + vec_validate (msm->clones[thread_index], n_clones); + vec_reset_length (msm->clones[thread_index]); + + /* + * the header offset needs to be large enough to incorporate + * all the L3 headers that could be touched when doing BVI + * processing. So take the current l2 length plus 2 * IPv6 + * headers (for tunnel encap) + */ + n_cloned = vlib_buffer_clone (vm, bi0, + msm->clones[thread_index], + n_clones, + (vnet_buffer (b0)->l2.l2_len + + sizeof (udp_header_t) + + 2 * sizeof (ip6_header_t))); + + if (PREDICT_FALSE (n_cloned != n_clones)) + { + b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL]; + } - /* - * for all but the last clone, these are not BVI bound - */ - for (clone0 = 0; clone0 < n_cloned - 1; clone0++) - { - member = msm->members[thread_index][clone0]; - ci0 = msm->clones[thread_index][clone0]; - c0 = vlib_get_buffer (vm, ci0); + /* + * for all but the last clone, these are not BVI bound + */ + for (clone0 = 0; clone0 < n_cloned - 1; clone0++) + { + member = msm->members[thread_index][clone0]; + ci0 = msm->clones[thread_index][clone0]; + c0 = vlib_get_buffer (vm, ci0); - to_next[0] = ci0; - to_next += 1; - n_left_to_next -= 1; + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && - (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - ethernet_header_t *h0; - l2flood_trace_t *t; - - if (c0 != b0) - vlib_buffer_copy_trace_flag (vm, b0, ci0); - - t = vlib_add_trace (vm, node, c0, sizeof (*t)); - h0 = vlib_buffer_get_current (c0); - t->sw_if_index = sw_if_index0; - t->bd_index = vnet_buffer (c0)->l2.bd_index; - clib_memcpy (t->src, h0->src_address, 6); - clib_memcpy (t->dst, h0->dst_address, 6); - } + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ethernet_header_t *h0; + l2flood_trace_t *t; + + if (c0 != b0) + vlib_buffer_copy_trace_flag (vm, b0, ci0); + + t = vlib_add_trace (vm, node, c0, sizeof (*t)); + h0 = vlib_buffer_get_current (c0); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (c0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } - /* Do normal L2 forwarding */ - vnet_buffer (c0)->sw_if_index[VLIB_TX] = member->sw_if_index; + /* Do normal L2 forwarding */ + vnet_buffer (c0)->sw_if_index[VLIB_TX] = + member->sw_if_index; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - ci0, next0); - if (PREDICT_FALSE (0 == n_left_to_next)) - { - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + ci0, next0); + if (PREDICT_FALSE (0 == n_left_to_next)) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } } + member = msm->members[thread_index][clone0]; + ci0 = msm->clones[thread_index][clone0]; + } + else + { + /* one clone */ + ci0 = bi0; + member = msm->members[thread_index][0]; } /* * the last clone that might go to a BVI */ - member = msm->members[thread_index][clone0]; - ci0 = msm->clones[thread_index][clone0]; c0 = vlib_get_buffer (vm, ci0); to_next[0] = ci0; diff --git a/test/test_l2_flood.py b/test/test_l2_flood.py index 50a692e57e8d..8b8a3f0f960c 100644 --- a/test/test_l2_flood.py +++ b/test/test_l2_flood.py @@ -144,6 +144,39 @@ def test_flood(self): self.vapi.bridge_domain_add_del(1, is_add=0) + def test_flood_one(self): + """ L2 no-Flood Test """ + + # + # Create a single bridge Domain + # + self.vapi.bridge_domain_add_del(1) + + # + # add 2 interfaces to the BD. this means a flood goes to only + # one member + # + for i in self.pg_interfaces[:2]: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, 0) + + p = (Ether(dst="ff:ff:ff:ff:ff:ff", + src="00:00:de:ad:be:ef") / + IP(src="10.10.10.10", dst="1.1.1.1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + # + # input on pg0 expect copies on pg1 + # + self.send_and_expect(self.pg0, p*65, self.pg1) + + # + # cleanup + # + for i in self.pg_interfaces[:2]: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, enable=0) + self.vapi.bridge_domain_add_del(1, is_add=0) + def test_uu_fwd(self): """ UU Flood """ From 02a60e01a71660d17a6578e8f35f468d5b6532cb Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Fri, 12 Oct 2018 16:55:14 -0700 Subject: [PATCH 11/70] session: don't wait indefinitely for apps to consume evts (VPP-1454) Change-Id: I544b24d2b2c4a09829773cf180d1747f4b087d4c Signed-off-by: Florin Coras --- src/vnet/session/session_api.c | 38 +++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index fc63428277ee..e82be560b1fe 100755 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -421,6 +421,23 @@ static session_cb_vft_t session_cb_vft = { .del_segment_callback = send_del_segment_callback, }; +static int +mq_try_lock_and_alloc_msg (svm_msg_q_t * app_mq, svm_msg_q_msg_t * msg) +{ + int rv; + u8 try = 0; + while (try < 100) + { + rv = svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, + SESSION_MQ_CTRL_EVT_RING, + SVM_Q_NOWAIT, msg); + if (!rv) + return 0; + try++; + } + return -1; +} + static int mq_send_session_accepted_cb (stream_session_t * s) { @@ -436,8 +453,8 @@ mq_send_session_accepted_cb (stream_session_t * s) app = application_get (app_wrk->app_index); app_mq = app_wrk->event_queue; - svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, SESSION_MQ_CTRL_EVT_RING, - SVM_Q_WAIT, msg); + if (mq_try_lock_and_alloc_msg (app_mq, msg)) + return -1; evt = svm_msg_q_msg_data (app_mq, msg); memset (evt, 0, sizeof (*evt)); @@ -523,8 +540,8 @@ mq_send_session_disconnected_cb (stream_session_t * s) app = application_get (app_wrk->app_index); app_mq = app_wrk->event_queue; - svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, SESSION_MQ_CTRL_EVT_RING, - SVM_Q_WAIT, msg); + if (mq_try_lock_and_alloc_msg (app_mq, msg)) + return; evt = svm_msg_q_msg_data (app_mq, msg); memset (evt, 0, sizeof (*evt)); evt->event_type = SESSION_CTRL_EVT_DISCONNECTED; @@ -544,8 +561,8 @@ mq_send_session_reset_cb (stream_session_t * s) session_event_t *evt; app_mq = app->event_queue; - svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, SESSION_MQ_CTRL_EVT_RING, - SVM_Q_WAIT, msg); + if (mq_try_lock_and_alloc_msg (app_mq, msg)) + return; evt = svm_msg_q_msg_data (app_mq, msg); memset (evt, 0, sizeof (*evt)); evt->event_type = SESSION_CTRL_EVT_RESET; @@ -576,8 +593,8 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, return -1; } - svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, SESSION_MQ_CTRL_EVT_RING, - SVM_Q_WAIT, msg); + if (mq_try_lock_and_alloc_msg (app_mq, msg)) + return -1; evt = svm_msg_q_msg_data (app_mq, msg); memset (evt, 0, sizeof (*evt)); evt->event_type = SESSION_CTRL_EVT_CONNECTED; @@ -656,8 +673,9 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, return -1; } - svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, SESSION_MQ_CTRL_EVT_RING, - SVM_Q_WAIT, msg); + if (mq_try_lock_and_alloc_msg (app_mq, msg)) + return -1; + evt = svm_msg_q_msg_data (app_mq, msg); memset (evt, 0, sizeof (*evt)); evt->event_type = SESSION_CTRL_EVT_BOUND; From 795539326b2224e80940ad381a865ea103c777d1 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Fri, 12 Oct 2018 13:09:36 -0700 Subject: [PATCH 12/70] vcl: fix empty epoll returns (VPP-1453) Change-Id: I0b191ddb749b1aa132c2d33b8359c146b36d27af Signed-off-by: Florin Coras --- src/vcl/vppcom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 3f12b86df429..df4ebde72eba 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -2508,6 +2508,7 @@ vppcom_epoll_wait_eventfd (vcl_worker_t * wrk, struct epoll_event *events, u64 buf; vec_validate (wrk->mq_events, pool_elts (wrk->mq_evt_conns)); +again: n_mq_evts = epoll_wait (wrk->mqs_epfd, wrk->mq_events, vec_len (wrk->mq_events), wait_for_time); for (i = 0; i < n_mq_evts; i++) @@ -2516,6 +2517,8 @@ vppcom_epoll_wait_eventfd (vcl_worker_t * wrk, struct epoll_event *events, n_read = read (mqc->mq_fd, &buf, sizeof (buf)); vcl_epoll_wait_handle_mq (wrk, mqc->mq, events, maxevents, 0, &n_evts); } + if (!n_evts && n_mq_evts > 0) + goto again; return (int) n_evts; } From 6a86ca9627e4064e9689b3890d180cb078ec6726 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Sun, 14 Oct 2018 10:54:32 +0300 Subject: [PATCH 13/70] vxlan:fix ip6 tunnel deletion Change-Id: I70fb7394f85b26f7e632d74fc31ef83597efdd16 Signed-off-by: Eyal Bari (cherry picked from commit f8d5e214687c17fba000607336295e054672459d) --- src/vnet/vxlan/vxlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c index 93a4e2635792..4276d6689acf 100644 --- a/src/vnet/vxlan/vxlan.c +++ b/src/vnet/vxlan/vxlan.c @@ -599,7 +599,8 @@ int vnet_vxlan_add_del_tunnel if (!p) return VNET_API_ERROR_NO_SUCH_ENTRY; - u32 instance = vxm->tunnel_index_by_sw_if_index[p->sw_if_index]; + u32 instance = is_ip6 ? key6.value : + vxm->tunnel_index_by_sw_if_index[p->sw_if_index]; vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, instance); sw_if_index = t->sw_if_index; From 051984c6a132517de325576a0847cc4bdc9b2897 Mon Sep 17 00:00:00 2001 From: "mu.duojiao" Date: Thu, 11 Oct 2018 14:27:30 +0800 Subject: [PATCH 14/70] VPP-1448: Fix error when recurse on down the trie. Change-Id: Idfed8243643780d3f52dfe6e6ec621c440daa6ae Signed-off-by: mu.duojiao (cherry picked from commit 59a829533c1345945dc1b6decc3afe29494e85cd) --- src/vnet/ip/ip4_mtrie.c | 40 ++++++++++++++++----------------- test/test_ip4.py | 49 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 97c250746393..6cd199a971e2 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -369,10 +369,10 @@ set_leaf (ip4_fib_mtrie_t * m, old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); - new_leaf = ply_create (m, old_leaf, - clib_max (old_ply->dst_address_bits_of_leaves - [dst_byte], ply_base_len), - ply_base_len); + new_leaf = + ply_create (m, old_leaf, + old_ply->dst_address_bits_of_leaves[dst_byte], + ply_base_len); new_ply = get_next_ply_for_leaf (m, new_leaf); /* Refetch since ply_create may move pool. */ @@ -492,10 +492,10 @@ set_root_leaf (ip4_fib_mtrie_t * m, if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { /* There is a leaf occupying the slot. Replace it with a new ply */ - new_leaf = ply_create (m, old_leaf, - clib_max (old_ply->dst_address_bits_of_leaves - [dst_byte], ply_base_len), - ply_base_len); + new_leaf = + ply_create (m, old_leaf, + old_ply->dst_address_bits_of_leaves[dst_byte], + ply_base_len); new_ply = get_next_ply_for_leaf (m, new_leaf); __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, @@ -714,24 +714,23 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) return s; } -#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \ +#define FORMAT_PLY(s, _p, _a, _i, _base_address, _ply_max_len, _indent) \ ({ \ u32 a, ia_length; \ ip4_address_t ia; \ ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \ \ - a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \ + a = (_base_address) + ((_a) << (32 - (_ply_max_len))); \ ia.as_u32 = clib_host_to_net_u32 (a); \ ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \ - s = format (s, "\n%U%20U %U", \ - format_white_space, (_indent) + 2, \ + s = format (s, "\n%U%U %U", \ + format_white_space, (_indent) + 4, \ format_ip4_address_and_length, &ia, ia_length, \ format_ip4_fib_mtrie_leaf, _l); \ \ if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \ - s = format (s, "\n%U%U", \ - format_white_space, (_indent) + 2, \ - format_ip4_fib_mtrie_ply, m, a, \ + s = format (s, "\n%U", \ + format_ip4_fib_mtrie_ply, m, a, (_indent) + 8, \ ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \ s; \ }) @@ -741,21 +740,20 @@ format_ip4_fib_mtrie_ply (u8 * s, va_list * va) { ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); u32 base_address = va_arg (*va, u32); + u32 indent = va_arg (*va, u32); u32 ply_index = va_arg (*va, u32); ip4_fib_mtrie_8_ply_t *p; - u32 indent; int i; p = pool_elt_at_index (ip4_ply_pool, ply_index); - indent = format_get_indent (s); - s = format (s, "ply index %d, %d non-empty leaves", ply_index, - p->n_non_empty_leafs); + s = format (s, "%Uply index %d, %d non-empty leaves", + format_white_space, indent, ply_index, p->n_non_empty_leafs); for (i = 0; i < ARRAY_LEN (p->leaves); i++) { if (ip4_fib_mtrie_leaf_is_non_empty (p, i)) { - s = FORMAT_PLY (s, p, i, base_address, + s = FORMAT_PLY (s, p, i, i, base_address, p->dst_address_bits_base + 8, indent); } } @@ -791,7 +789,7 @@ format_ip4_fib_mtrie (u8 * s, va_list * va) if (p->dst_address_bits_of_leaves[slot] > 0) { - s = FORMAT_PLY (s, p, slot, base_address, 16, 2); + s = FORMAT_PLY (s, p, i, slot, base_address, 16, 0); } } } diff --git a/test/test_ip4.py b/test/test_ip4.py index 02a31be830a4..e9ec71a2830e 100644 --- a/test/test_ip4.py +++ b/test/test_ip4.py @@ -1505,5 +1505,54 @@ def test_ip_input(self): self.pg1.unconfig_ip4() +class TestIPLPM(VppTestCase): + """ IPv4 longest Prefix Match """ + + def setUp(self): + super(TestIPLPM, self).setUp() + + self.create_pg_interfaces(range(4)) + + for i in self.pg_interfaces: + i.admin_up() + i.config_ip4() + i.resolve_arp() + + def tearDown(self): + super(TestIPLPM, self).tearDown() + for i in self.pg_interfaces: + i.admin_down() + i.unconfig_ip4() + + def test_ip_lpm(self): + """ IP longest Prefix Match """ + + s_24 = VppIpRoute(self, "10.1.2.0", 24, + [VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index)]) + s_24.add_vpp_config() + s_8 = VppIpRoute(self, "10.0.0.0", 8, + [VppRoutePath(self.pg2.remote_ip4, + self.pg2.sw_if_index)]) + s_8.add_vpp_config() + + p_8 = (Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IP(src="1.1.1.1", + dst="10.1.1.1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 2000)) + p_24 = (Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IP(src="1.1.1.1", + dst="10.1.2.1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 2000)) + + self.logger.info(self.vapi.cli("sh ip fib mtrie")) + rx = self.send_and_expect(self.pg0, p_8 * 65, self.pg2) + rx = self.send_and_expect(self.pg0, p_24 * 65, self.pg1) + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) From 3d29e83112a349b7d27ef792463f246b18115d3e Mon Sep 17 00:00:00 2001 From: Steven Date: Mon, 15 Oct 2018 10:22:53 -0700 Subject: [PATCH 15/70] vmxnet3: vmxnet3_test_plugin.so: undefined symbol: format_vlib_pci_addr [VPP-1456] When using vpp_api_test, there is an undefined symbol error for format_vlib_pci_addr when vmxnet3_test_plugin.so is loaded. The cause is due to vlib not included in vpp_api_test. Remove the reference for vlib.so in vmxnet3_test. Change-Id: I37c00dfe2f843d99ad6c4fc7af6ed10bac4c2df8 Signed-off-by: Steven --- src/plugins/vmxnet3/vmxnet3_test.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/plugins/vmxnet3/vmxnet3_test.c b/src/plugins/vmxnet3/vmxnet3_test.c index b08f61b0bd99..1b5fd5b7c9a8 100644 --- a/src/plugins/vmxnet3/vmxnet3_test.c +++ b/src/plugins/vmxnet3/vmxnet3_test.c @@ -227,6 +227,14 @@ api_vmxnet3_dump (vat_main_t * vam) return ret; } +static u8 * +format_pci_addr (u8 * s, va_list * va) +{ + vlib_pci_addr_t *addr = va_arg (*va, vlib_pci_addr_t *); + return format (s, "%04x:%02x:%02x.%x", addr->domain, addr->bus, + addr->slot, addr->function); +} + static void vl_api_vmxnet3_details_t_handler (vl_api_vmxnet3_details_t * mp) { @@ -246,7 +254,7 @@ vl_api_vmxnet3_details_t_handler (vl_api_vmxnet3_details_t * mp) " state %s\n", mp->if_name, ntohl (mp->sw_if_index), format_ethernet_address, mp->hw_addr, mp->version, - format_vlib_pci_addr, &pci_addr, + format_pci_addr, &pci_addr, ntohs (mp->rx_next), ntohs (mp->rx_qid), ntohs (mp->rx_qsize), ntohs (mp->rx_fill[0]), From 347c523c23277a6061a21dbca43be7498ebb41af Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 12 Oct 2018 10:21:32 -0700 Subject: [PATCH 16/70] vmxnet3: show vmxnet3 with filtering capability [VPP-1452] show vmxnet3 desc may display 5000 lines of output since it has 5 tables. Each table may have 1000 entries. It would not be very useful to debug problem. We need filtering capability for the subject show command. We need to be able to display the descriptor table per interface, per interface per table, and per interface per table per slot. The latter is the most useful. tested the following valid combinations show vmxnet3 show vmxnet3 desc show vmxnet3 vmxnet3-0/13/0/0 show vmxnet3 vmxnet3-0/13/0/0 desc show vmxnet3 vmxnet3-0/13/0/0 rx-comp show vmxnet3 vmxnet3-0/13/0/0 rx-comp 1 show vmxnet3 vmxnet3-0/13/0/0 tx-comp show vmxnet3 vmxnet3-0/13/0/0 tx-comp 1 show vmxnet3 vmxnet3-0/13/0/0 rx-desc-0 show vmxnet3 vmxnet3-0/13/0/0 rx-desc-0 1 show vmxnet3 vmxnet3-0/13/0/0 rx-desc-1 show vmxnet3 vmxnet3-0/13/0/0 rx-desc-1 1 show vmxnet3 vmxnet3-0/13/0/0 tx-desc show vmxnet3 vmxnet3-0/13/0/0 tx-desc 1 negative tests and command is rejected show vmxnet3 abc show vmxnet3 desc abc show vmxnet3 vmxnet3-0/13/0/0 abc show vmxnet3 vmxnet3-0/13/0/0 desc abc show vmxnet3 vmxnet3-0/13/0/0 rx-comp abc show vmxnet3 vmxnet3-0/13/0/0 rx-comp 1 abc Change-Id: I0ff233413496e58236f8fb4a94e493494c20c5cb Signed-off-by: Steven --- src/plugins/vmxnet3/README.md | 3 +- src/plugins/vmxnet3/cli.c | 225 ++++++++++++++++++++++++++++++++-- src/plugins/vmxnet3/vmxnet3.h | 18 ++- 3 files changed, 234 insertions(+), 12 deletions(-) diff --git a/src/plugins/vmxnet3/README.md b/src/plugins/vmxnet3/README.md index 4f03c1575f9a..031c5962ee9b 100644 --- a/src/plugins/vmxnet3/README.md +++ b/src/plugins/vmxnet3/README.md @@ -16,7 +16,8 @@ vfio driver can still be used with recent kernels which support no-iommu mode. ##Known issues * NUMA support -* TSO +* TSO/LRO +* RSS/multiple queues * VLAN filter ## Usage diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c index 40d379bb21a7..170f9ad7f5b1 100644 --- a/src/plugins/vmxnet3/cli.c +++ b/src/plugins/vmxnet3/cli.c @@ -184,7 +184,8 @@ VLIB_CLI_COMMAND (vmxnet3_test_command, static) = { /* *INDENT-ON* */ static void -show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr) +show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, + u8 show_one_table, u32 which, u8 show_one_slot, u32 slot) { u32 i, desc_idx; vmxnet3_device_t *vd; @@ -228,6 +229,8 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr) rxq->rx_comp_ring.next); vlib_cli_output (vm, " RX completion generation flag 0x%x", rxq->rx_comp_ring.gen); + + /* RX descriptors tables */ for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++) { vmxnet3_rx_ring *ring = &rxq->rx_ring[rid]; @@ -248,16 +251,70 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr) vlib_cli_output (vm, " %5u 0x%016llx 0x%08x", desc_idx, rxd->address, rxd->flags); } + } + else if (show_one_table) + { + if (((which == VMXNET3_SHOW_RX_DESC0) && (rid == 0)) || + ((which == VMXNET3_SHOW_RX_DESC1) && (rid == 1))) + { + vlib_cli_output (vm, "RX descriptors table"); + vlib_cli_output (vm, " %5s %18s %10s", + "slot", "address", "flags"); + if (show_one_slot) + { + rxd = &rxq->rx_desc[rid][slot]; + vlib_cli_output (vm, " %5u 0x%016llx 0x%08x", + slot, rxd->address, rxd->flags); + } + else + for (desc_idx = 0; desc_idx < rxq->size; desc_idx++) + { + rxd = &rxq->rx_desc[rid][desc_idx]; + vlib_cli_output (vm, " %5u 0x%016llx 0x%08x", + desc_idx, rxd->address, + rxd->flags); + } + } + } + } + + /* RX completion table */ + if (show_descr) + { + vlib_cli_output (vm, "RX completion descriptors table"); + vlib_cli_output (vm, " %5s %10s %10s %10s %10s", + "slot", "index", "rss", "len", "flags"); + for (desc_idx = 0; desc_idx < rxq->size; desc_idx++) + { + rx_comp = &rxq->rx_comp[desc_idx]; + vlib_cli_output (vm, " %5u 0x%08x %10u %10u 0x%08x", + desc_idx, rx_comp->index, rx_comp->rss, + rx_comp->len, rx_comp->flags); + } + } + else if (show_one_table) + { + if (which == VMXNET3_SHOW_RX_COMP) + { vlib_cli_output (vm, "RX completion descriptors table"); vlib_cli_output (vm, " %5s %10s %10s %10s %10s", "slot", "index", "rss", "len", "flags"); - for (desc_idx = 0; desc_idx < rxq->size; desc_idx++) + if (show_one_slot) { - rx_comp = &rxq->rx_comp[desc_idx]; + rx_comp = &rxq->rx_comp[slot]; vlib_cli_output (vm, " %5u 0x%08x %10u %10u 0x%08x", - desc_idx, rx_comp->index, rx_comp->rss, + slot, rx_comp->index, rx_comp->rss, rx_comp->len, rx_comp->flags); } + else + for (desc_idx = 0; desc_idx < rxq->size; desc_idx++) + { + rx_comp = &rxq->rx_comp[desc_idx]; + vlib_cli_output (vm, + " %5u 0x%08x %10u %10u 0x%08x", + desc_idx, rx_comp->index, rx_comp->rss, + rx_comp->len, rx_comp->flags); + } } } } @@ -285,6 +342,7 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr) desc_idx, txd->address, txd->flags[0], txd->flags[1]); } + vlib_cli_output (vm, "TX completion descriptors table"); vlib_cli_output (vm, " %5s %10s %10s", "slot", "index", "flags"); @@ -295,6 +353,50 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr) desc_idx, tx_comp->index, tx_comp->flags); } } + else if (show_one_table) + { + if (which == VMXNET3_SHOW_TX_DESC) + { + vlib_cli_output (vm, "TX descriptors table"); + vlib_cli_output (vm, " %5s %18s %10s %10s", + "slot", "address", "flags0", "flags1"); + if (show_one_slot) + { + txd = &txq->tx_desc[slot]; + vlib_cli_output (vm, " %5u 0x%016llx 0x%08x 0x%08x", + slot, txd->address, txd->flags[0], + txd->flags[1]); + } + else + for (desc_idx = 0; desc_idx < txq->size; desc_idx++) + { + txd = &txq->tx_desc[desc_idx]; + vlib_cli_output (vm, " %5u 0x%016llx 0x%08x 0x%08x", + desc_idx, txd->address, txd->flags[0], + txd->flags[1]); + } + } + else if (which == VMXNET3_SHOW_TX_COMP) + { + vlib_cli_output (vm, "TX completion descriptors table"); + vlib_cli_output (vm, " %5s %10s %10s", + "slot", "index", "flags"); + if (show_one_slot) + { + tx_comp = &txq->tx_comp[slot]; + vlib_cli_output (vm, " %5u 0x%08x 0x%08x", + slot, tx_comp->index, tx_comp->flags); + } + else + for (desc_idx = 0; desc_idx < txq->size; desc_idx++) + { + tx_comp = &txq->tx_comp[desc_idx]; + vlib_cli_output (vm, " %5u 0x%08x 0x%08x", + desc_idx, tx_comp->index, + tx_comp->flags); + } + } + } } } } @@ -308,8 +410,9 @@ show_vmxnet3_fn (vlib_main_t * vm, unformat_input_t * input, vmxnet3_device_t *vd; clib_error_t *error = 0; u32 hw_if_index, *hw_if_indices = 0; - vnet_hw_interface_t *hi; - u8 show_descr = 0; + vnet_hw_interface_t *hi = 0; + u8 show_descr = 0, show_one_table = 0, show_one_slot = 0; + u32 which = ~0, slot; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -325,8 +428,110 @@ show_vmxnet3_fn (vlib_main_t * vm, unformat_input_t * input, } vec_add1 (hw_if_indices, hw_if_index); } - else if (unformat (input, "descriptors") || unformat (input, "desc")) + else if (unformat (input, "desc")) show_descr = 1; + else if (hi) + { + vmxnet3_device_t *vd = + vec_elt_at_index (vmxm->devices, hi->dev_instance); + + if (unformat (input, "rx-comp")) + { + show_one_table = 1; + which = VMXNET3_SHOW_RX_COMP; + if (unformat (input, "%u", &slot)) + { + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, 0); + + if (slot >= rxq->size) + { + error = clib_error_return (0, + "slot size must be < rx queue " + "size %u", rxq->size); + goto done; + } + show_one_slot = 1; + } + } + else if (unformat (input, "rx-desc-0")) + { + show_one_table = 1; + which = VMXNET3_SHOW_RX_DESC0; + if (unformat (input, "%u", &slot)) + { + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, 0); + + if (slot >= rxq->size) + { + error = clib_error_return (0, + "slot size must be < rx queue " + "size %u", rxq->size); + goto done; + } + show_one_slot = 1; + } + } + else if (unformat (input, "rx-desc-1")) + { + show_one_table = 1; + which = VMXNET3_SHOW_RX_DESC1; + if (unformat (input, "%u", &slot)) + { + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, 0); + + if (slot >= rxq->size) + { + error = clib_error_return (0, + "slot size must be < rx queue " + "size %u", rxq->size); + goto done; + } + show_one_slot = 1; + } + } + else if (unformat (input, "tx-comp")) + { + show_one_table = 1; + which = VMXNET3_SHOW_TX_COMP; + if (unformat (input, "%u", &slot)) + { + vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, 0); + + if (slot >= txq->size) + { + error = clib_error_return (0, + "slot size must be < tx queue " + "size %u", txq->size); + goto done; + } + show_one_slot = 1; + } + } + else if (unformat (input, "tx-desc")) + { + show_one_table = 1; + which = VMXNET3_SHOW_TX_DESC; + if (unformat (input, "%u", &slot)) + { + vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, 0); + + if (slot >= txq->size) + { + error = clib_error_return (0, + "slot size must be < tx queue " + "size %u", txq->size); + goto done; + } + show_one_slot = 1; + } + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } else { error = clib_error_return (0, "unknown input `%U'", @@ -342,7 +547,8 @@ show_vmxnet3_fn (vlib_main_t * vm, unformat_input_t * input, ); } - show_vmxnet3 (vm, hw_if_indices, show_descr); + show_vmxnet3 (vm, hw_if_indices, show_descr, show_one_table, which, + show_one_slot, slot); done: vec_free (hw_if_indices); @@ -352,7 +558,8 @@ show_vmxnet3_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_vmxnet3_command, static) = { .path = "show vmxnet3", - .short_help = "show vmxnet3 []", + .short_help = "show vmxnet3 [[] ([desc] | ([rx-comp] | " + "[rx-desc-0] | [rx-desc-1] | [tx-comp] | [tx-desc]) [])]", .function = show_vmxnet3_fn, }; /* *INDENT-ON* */ diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h index f3868a88ae3b..491b8c102463 100644 --- a/src/plugins/vmxnet3/vmxnet3.h +++ b/src/plugins/vmxnet3/vmxnet3.h @@ -43,6 +43,20 @@ enum #undef _ }; +#define foreach_vmxnet3_show_entry \ + _(RX_COMP, "rx comp") \ + _(RX_DESC0, "rx desc 0") \ + _(RX_DESC1, "rx desc 1") \ + _(TX_COMP, "tx comp") \ + _(TX_DESC, "tx desc") + +enum +{ +#define _(a, b) VMXNET3_SHOW_##a, + foreach_vmxnet3_show_entry +#undef _ +}; + /* BAR 0 */ #define VMXNET3_REG_IMR 0x0000 /* Interrupt Mask Register */ #define VMXNET3_REG_TXPROD 0x0600 /* Tx Producer Index */ @@ -396,8 +410,8 @@ typedef struct typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u64 next; u32 gen; + u16 next; } vmxnet3_rx_comp_ring; typedef struct @@ -423,8 +437,8 @@ typedef struct typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u64 next; u32 gen; + u16 next; } vmxnet3_tx_comp_ring; typedef struct From d6a0d0e2063fcc93eb56fc8a542e89a8e0b53741 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Sat, 13 Oct 2018 12:20:01 -0700 Subject: [PATCH 17/70] vcl: fix bidirectional tests (VPP-1455) - add epoll dequeued events beyond maxevents to unhandled - filter multiple epoll rx events Change-Id: I618f5f02b19581473de891b3b59bb6a0faad10b5 Signed-off-by: Florin Coras (cherry picked from commit aa27eb95b7ee3bb69b62166d5e418e973cbbdcfa) --- src/vcl/vcl_private.h | 1 + src/vcl/vcl_test.h | 2 +- src/vcl/vppcom.c | 27 ++++++++++--------- src/vnet/session/session_api.c | 48 +++++++++++++++++----------------- 4 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index 5975f15ac4f3..d1a40b933a7c 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -153,6 +153,7 @@ typedef struct /* Socket configuration state */ u8 is_vep; u8 is_vep_session; + u8 has_rx_evt; u32 attr; u32 wait_cont_idx; vppcom_epoll_t vep; diff --git a/src/vcl/vcl_test.h b/src/vcl/vcl_test.h index 927110f55d20..9d28b262e3ab 100644 --- a/src/vcl/vcl_test.h +++ b/src/vcl/vcl_test.h @@ -438,7 +438,7 @@ vcl_test_write (int fd, uint8_t * buf, uint32_t nbytes, { if (stats) stats->tx_eagain++; - continue; + break; } else break; diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index df4ebde72eba..fad2ac98538a 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -1293,13 +1293,14 @@ vppcom_session_read_internal (uint32_t session_handle, void *buf, int n, is_ct = vcl_session_is_ct (s); mq = is_ct ? s->our_evt_q : wrk->app_event_queue; rx_fifo = s->rx_fifo; + s->has_rx_evt = 0; if (svm_fifo_is_empty (rx_fifo)) { if (is_nonblocking) { svm_fifo_unset_event (rx_fifo); - return VPPCOM_OK; + return VPPCOM_EWOULDBLOCK; } while (svm_fifo_is_empty (rx_fifo)) { @@ -1385,13 +1386,14 @@ vppcom_session_read_segments (uint32_t session_handle, is_ct = vcl_session_is_ct (s); mq = is_ct ? s->our_evt_q : wrk->app_event_queue; rx_fifo = s->rx_fifo; + s->has_rx_evt = 0; if (svm_fifo_is_empty (rx_fifo)) { if (is_nonblocking) { svm_fifo_unset_event (rx_fifo); - return VPPCOM_OK; + return VPPCOM_EWOULDBLOCK; } while (svm_fifo_is_empty (rx_fifo)) { @@ -1551,7 +1553,8 @@ vppcom_session_write (uint32_t session_handle, void *buf, size_t n) { svm_fifo_set_want_tx_evt (tx_fifo, 1); svm_msg_q_lock (mq); - svm_msg_q_wait (mq); + if (svm_msg_q_is_empty (mq)) + svm_msg_q_wait (mq); svm_msg_q_sub_w_lock (mq, &msg); e = svm_msg_q_msg_data (mq, &msg); @@ -2303,11 +2306,12 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, sid = e->fifo->client_session_index; session = vcl_session_get (wrk, sid); session_events = session->vep.ev.events; - if (!(EPOLLIN & session->vep.ev.events)) + if (!(EPOLLIN & session->vep.ev.events) || session->has_rx_evt) break; add_event = 1; events[*num_ev].events |= EPOLLIN; session_evt_data = session->vep.ev.data.u64; + session->has_rx_evt = 1; break; case FIFO_EVENT_APP_TX: sid = e->fifo->client_session_index; @@ -2324,11 +2328,12 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, session = vcl_ct_session_get_from_fifo (wrk, e->fifo, 0); sid = session->session_index; session_events = session->vep.ev.events; - if (!(EPOLLIN & session->vep.ev.events)) + if (!(EPOLLIN & session->vep.ev.events) || session->has_rx_evt) break; add_event = 1; events[*num_ev].events |= EPOLLIN; session_evt_data = session->vep.ev.data.u64; + session->has_rx_evt = 1; break; case SESSION_IO_EVT_CT_RX: session = vcl_ct_session_get_from_fifo (wrk, e->fifo, 1); @@ -2452,15 +2457,13 @@ vcl_epoll_wait_handle_mq (vcl_worker_t * wrk, svm_msg_q_t * mq, { msg = vec_elt_at_index (wrk->mq_msg_vector, i); e = svm_msg_q_msg_data (mq, msg); - vcl_epoll_wait_handle_mq_event (wrk, e, events, num_ev); + if (*num_ev < maxevents) + vcl_epoll_wait_handle_mq_event (wrk, e, events, num_ev); + else + vec_add1 (wrk->unhandled_evts_vector, *e); svm_msg_q_free_msg (mq, msg); - if (*num_ev == maxevents) - { - i += 1; - break; - } } - vec_delete (wrk->mq_msg_vector, i, 0); + vec_reset_length (wrk->mq_msg_vector); return *num_ev; } diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index e82be560b1fe..565938653816 100755 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -278,30 +278,6 @@ send_session_accept_callback (stream_session_t * s) return 0; } -void -mq_send_local_session_disconnected_cb (u32 app_wrk_index, - local_session_t * ls) -{ - app_worker_t *app_wrk = app_worker_get (app_wrk_index); - svm_msg_q_msg_t _msg, *msg = &_msg; - session_disconnected_msg_t *mp; - svm_msg_q_t *app_mq; - session_event_t *evt; - application_t *app; - - app = application_get (app_wrk->app_index); - app_mq = app_wrk->event_queue; - svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, SESSION_MQ_CTRL_EVT_RING, - SVM_Q_WAIT, msg); - evt = svm_msg_q_msg_data (app_mq, msg); - memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_DISCONNECTED; - mp = (session_disconnected_msg_t *) evt->data; - mp->handle = application_local_session_handle (ls); - mp->context = app->api_client_index; - svm_msg_q_add_and_unlock (app_mq, msg); -} - static void send_session_disconnect_callback (stream_session_t * s) { @@ -551,6 +527,30 @@ mq_send_session_disconnected_cb (stream_session_t * s) svm_msg_q_add_and_unlock (app_mq, msg); } +void +mq_send_local_session_disconnected_cb (u32 app_wrk_index, + local_session_t * ls) +{ + app_worker_t *app_wrk = app_worker_get (app_wrk_index); + svm_msg_q_msg_t _msg, *msg = &_msg; + session_disconnected_msg_t *mp; + svm_msg_q_t *app_mq; + session_event_t *evt; + application_t *app; + + app = application_get (app_wrk->app_index); + app_mq = app_wrk->event_queue; + if (mq_try_lock_and_alloc_msg (app_mq, msg)) + return; + evt = svm_msg_q_msg_data (app_mq, msg); + memset (evt, 0, sizeof (*evt)); + evt->event_type = SESSION_CTRL_EVT_DISCONNECTED; + mp = (session_disconnected_msg_t *) evt->data; + mp->handle = application_local_session_handle (ls); + mp->context = app->api_client_index; + svm_msg_q_add_and_unlock (app_mq, msg); +} + static void mq_send_session_reset_cb (stream_session_t * s) { From 84112dd4f98e5a31a8c7340a741f89e77fd03363 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Tue, 16 Oct 2018 17:18:26 +0200 Subject: [PATCH 18/70] acl-plugin: tuplemerge: refresh the pointer to hash-readied ACL entries per each collision in split_partition() (VPP-1458) A pointer to hash-ready ACL rules is only set once, which might cause a crash if there are colliding entries from more than one ACL applied. Solution: reload the pointer based on the element being processed. Change-Id: I7a701c2c3b4236d67293159f2a33c4f967168953 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/hash_lookup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c index c37aae44a988..0087a5e2ab74 100644 --- a/src/plugins/acl/hash_lookup.c +++ b/src/plugins/acl/hash_lookup.c @@ -1487,6 +1487,8 @@ split_partition(acl_main_t *am, u32 first_index, int i=0; u64 collisions = vec_len(pae->colliding_rules); for(i=0; ihash_acl_infos, pae->acl_index); DBG( "TM-collision: base_ace:%d (ace_mask:%d, first_collision_mask:%d)", pae->ace_index, pae->mask_type_index, coll_mask_type_index); From 7d76878ab3eea9d233d569d9b714a40924fbaa8b Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Mon, 15 Oct 2018 21:35:42 -0700 Subject: [PATCH 19/70] tls: fix multi threaded medium scale test (VPP-1457) - ensure session enqueue epoch does not wrap between two enqueues - use 3 states for echo clients app, to distinguish between starting and closing phases - force tcp fin retransmit if out of buffers while sending a fin Change-Id: I6f2cab46affd1148aba2a33fb6d58bcc54f32805 Signed-off-by: Florin Coras --- src/vnet/session-apps/echo_client.c | 26 +++++++++++++++++++++++--- src/vnet/session-apps/echo_client.h | 6 ++++++ src/vnet/session/session.c | 8 ++++---- src/vnet/session/session.h | 3 ++- src/vnet/session/stream_session.h | 5 ++++- src/vnet/tcp/tcp_output.c | 10 +++++++++- src/vnet/tls/tls.c | 5 ++++- 7 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/vnet/session-apps/echo_client.c b/src/vnet/session-apps/echo_client.c index b47dcf21a4b9..0258c16520a5 100644 --- a/src/vnet/session-apps/echo_client.c +++ b/src/vnet/session-apps/echo_client.c @@ -208,7 +208,7 @@ echo_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, connections_this_batch = ecm->connections_this_batch_by_thread[my_thread_index]; - if ((ecm->run_test == 0) || + if ((ecm->run_test != ECHO_CLIENTS_RUNNING) || ((vec_len (connection_indices) == 0) && vec_len (connections_this_batch) == 0)) return 0; @@ -352,6 +352,16 @@ echo_clients_init (vlib_main_t * vm) return 0; } +static void +echo_clients_session_disconnect (stream_session_t * s) +{ + echo_client_main_t *ecm = &echo_client_main; + vnet_disconnect_args_t _a, *a = &_a; + a->handle = session_handle (s); + a->app_index = ecm->app_index; + vnet_disconnect_session (a); +} + static int echo_clients_session_connected_callback (u32 app_index, u32 api_context, stream_session_t * s, u8 is_fail) @@ -361,6 +371,9 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context, u32 session_index; u8 thread_index; + if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_STARTING)) + return -1; + if (is_fail) { clib_warning ("connection %d failed!", api_context); @@ -407,7 +420,7 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context, __sync_fetch_and_add (&ecm->ready_connections, 1); if (ecm->ready_connections == ecm->expected_connections) { - ecm->run_test = 1; + ecm->run_test = ECHO_CLIENTS_RUNNING; /* Signal the CLI process that the action is starting... */ signal_evt_to_cli (1); } @@ -447,6 +460,12 @@ echo_clients_rx_callback (stream_session_t * s) echo_client_main_t *ecm = &echo_client_main; eclient_session_t *sp; + if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_RUNNING)) + { + echo_clients_session_disconnect (s); + return -1; + } + sp = pool_elt_at_index (ecm->sessions, s->server_rx_fifo->client_session_index); receive_data_chunk (ecm, sp); @@ -624,6 +643,7 @@ echo_clients_command_fn (vlib_main_t * vm, ecm->vlib_main = vm; ecm->tls_engine = TLS_ENGINE_OPENSSL; ecm->no_copy = 0; + ecm->run_test = ECHO_CLIENTS_STARTING; if (thread_main->n_vlib_mains > 1) clib_spinlock_init (&ecm->sessions_lock); @@ -825,7 +845,7 @@ echo_clients_command_fn (vlib_main_t * vm, error = clib_error_return (0, "failed: test bytes"); cleanup: - ecm->run_test = 0; + ecm->run_test = ECHO_CLIENTS_EXITING; vlib_process_wait_for_event_or_clock (vm, 10e-3); for (i = 0; i < vec_len (ecm->connection_index_by_thread); i++) { diff --git a/src/vnet/session-apps/echo_client.h b/src/vnet/session-apps/echo_client.h index db5ba1636289..2270720dcdad 100644 --- a/src/vnet/session-apps/echo_client.h +++ b/src/vnet/session-apps/echo_client.h @@ -105,6 +105,12 @@ typedef struct vlib_main_t *vlib_main; } echo_client_main_t; +enum +{ + ECHO_CLIENTS_STARTING, + ECHO_CLIENTS_RUNNING, + ECHO_CLIENTS_EXITING +} echo_clients_test_state_e; extern echo_client_main_t echo_client_main; vlib_node_registration_t echo_clients_node; diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 83b96d317dcd..7b50950fe017 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -153,7 +153,7 @@ session_free (stream_session_t * s) memset (s, 0xFA, sizeof (*s)); } -static void +void session_free_w_fifos (stream_session_t * s) { segment_manager_dealloc_fifos (s->svm_segment_index, s->server_rx_fifo, @@ -197,7 +197,7 @@ session_alloc_for_connection (transport_connection_t * tc) s = session_alloc (thread_index); s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4); s->session_state = SESSION_STATE_CONNECTING; - s->enqueue_epoch = ~0; + s->enqueue_epoch = (u64) ~ 0; /* Attach transport to session and vice versa */ s->connection_index = tc->c_index; @@ -393,7 +393,7 @@ session_enqueue_stream_connection (transport_connection_t * tc, * by calling stream_server_flush_enqueue_events () */ session_manager_main_t *smm = vnet_get_session_manager_main (); u32 thread_index = s->thread_index; - u32 enqueue_epoch = smm->current_enqueue_epoch[tc->proto][thread_index]; + u64 enqueue_epoch = smm->current_enqueue_epoch[tc->proto][thread_index]; if (s->enqueue_epoch != enqueue_epoch) { @@ -434,7 +434,7 @@ session_enqueue_dgram_connection (stream_session_t * s, * by calling stream_server_flush_enqueue_events () */ session_manager_main_t *smm = vnet_get_session_manager_main (); u32 thread_index = s->thread_index; - u32 enqueue_epoch = smm->current_enqueue_epoch[proto][thread_index]; + u64 enqueue_epoch = smm->current_enqueue_epoch[proto][thread_index]; if (s->enqueue_epoch != enqueue_epoch) { diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 1e08cccb6f7f..914e0581fecd 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -195,7 +195,7 @@ struct _session_manager_main clib_rwlock_t *peekers_rw_locks; /** Per-proto, per-worker enqueue epoch counters */ - u32 *current_enqueue_epoch[TRANSPORT_N_PROTO]; + u64 *current_enqueue_epoch[TRANSPORT_N_PROTO]; /** Per-proto, per-worker thread vector of sessions to enqueue */ u32 **session_to_enqueue[TRANSPORT_N_PROTO]; @@ -308,6 +308,7 @@ stream_session_is_valid (u32 si, u8 thread_index) stream_session_t *session_alloc (u32 thread_index); int session_alloc_fifos (segment_manager_t * sm, stream_session_t * s); void session_free (stream_session_t * s); +void session_free_w_fifos (stream_session_t * s); always_inline stream_session_t * session_get (u32 si, u32 thread_index) diff --git a/src/vnet/session/stream_session.h b/src/vnet/session/stream_session.h index 30178d7a4539..287a8927339a 100644 --- a/src/vnet/session/stream_session.h +++ b/src/vnet/session/stream_session.h @@ -67,7 +67,7 @@ typedef struct _stream_session_t u8 thread_index; /** To avoid n**2 "one event per frame" check */ - u8 enqueue_epoch; + u64 enqueue_epoch; /** svm segment index where fifos were allocated */ u32 svm_segment_index; @@ -120,6 +120,9 @@ typedef struct local_session_ /** Port for connection. Overlaps thread_index/enqueue_epoch */ u16 port; + /** Partly overlaps enqueue_epoch */ + u8 pad_epoch[7]; + /** Segment index where fifos were allocated */ u32 svm_segment_index; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 0d5feb976f86..79d64cf2da3c 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1078,7 +1078,15 @@ tcp_send_fin (tcp_connection_t * tc) tcp_retransmit_timer_force_update (tc); if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) - return; + { + /* Out of buffers so program fin retransmit ASAP */ + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1); + tc->flags |= TCP_CONN_FINSNT; + tc->snd_una_max += 1; + tc->snd_nxt = tc->snd_una_max; + return; + } + b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); fin_snt = tc->flags & TCP_CONN_FINSNT; diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index aba7919927c0..9a82360610b8 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -119,6 +119,7 @@ tls_ctx_half_open_alloc (void) { clib_rwlock_writer_lock (&tm->half_open_rwlock); pool_get (tm->half_open_ctx_pool, ctx); + ctx_index = ctx - tm->half_open_ctx_pool; clib_rwlock_writer_unlock (&tm->half_open_rwlock); } else @@ -126,10 +127,10 @@ tls_ctx_half_open_alloc (void) /* reader lock assumption: only main thread will call pool_get */ clib_rwlock_reader_lock (&tm->half_open_rwlock); pool_get (tm->half_open_ctx_pool, ctx); + ctx_index = ctx - tm->half_open_ctx_pool; clib_rwlock_reader_unlock (&tm->half_open_rwlock); } memset (ctx, 0, sizeof (*ctx)); - ctx_index = ctx - tm->half_open_ctx_pool; return ctx_index; } @@ -254,6 +255,8 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed) { TLS_DBG (1, "failed to notify app"); tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ()); + session_free_w_fifos (app_session); + return -1; } session_lookup_add_connection (&ctx->connection, From b3aff922ffbddd61b44df50271e4aaee2820a432 Mon Sep 17 00:00:00 2001 From: "mu.duojiao" Date: Wed, 17 Oct 2018 10:59:09 +0800 Subject: [PATCH 20/70] VPP-1459:Ip4 lookup fail when exist prefix cover. Change-Id: I4ba0aeb65219596475345e42b8cd34019f5594c6 Signed-off-by: mu.duojiao (cherry picked from commit 9744e6d0273c0d7d11ab4f271c8694f69d51ccf3) --- src/vnet/ip/ip4_mtrie.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) mode change 100644 => 100755 src/vnet/ip/ip4_mtrie.c diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c old mode 100644 new mode 100755 index 6cd199a971e2..fbb8a7480742 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -551,9 +551,7 @@ unset_leaf (ip4_fib_mtrie_t * m, old_ply->leaves[i] = ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); - old_ply->dst_address_bits_of_leaves[i] = - clib_max (old_ply->dst_address_bits_base, - a->cover_address_length); + old_ply->dst_address_bits_of_leaves[i] = a->cover_address_length; old_ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); From f1a1a4dbfc20fc7eafbcbf0a1f7ea22a7f0a0860 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 16 Oct 2018 19:52:10 -0700 Subject: [PATCH 21/70] tcp: avoid sack processing when not needed (VPP-1460) Change-Id: If81ee34e1f1e929de1a5b758ddb9aede4002e858 Signed-off-by: Florin Coras --- src/plugins/unittest/tcp_test.c | 2 +- src/vnet/tcp/tcp.c | 6 ++++-- src/vnet/tcp/tcp_input.c | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index d26532d4596d..33a7fd6af0c6 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -1580,7 +1580,6 @@ tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input) tc->connection.proto = TRANSPORT_PROTO_TCP; tc->connection.is_ip4 = 1; clib_memcpy (tc1, &tc->connection, sizeof (*tc1)); - s1 = s; /* * Allocate fake session and connection 2 @@ -1607,6 +1606,7 @@ tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input) * Confirm that connection lookup works */ + s1 = pool_elt_at_index (smm->sessions[0], 0); session_lookup_add_connection (tc1, session_handle (s1)); tconn = session_lookup_connection_wt4 (0, &tc1->lcl_ip.ip4, &tc1->rmt_ip.ip4, diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 1c8ce34a728a..884602deb6f4 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -939,9 +939,11 @@ format_tcp_scoreboard (u8 * s, va_list * args) s = format (s, "sacked_bytes %u last_sacked_bytes %u lost_bytes %u\n", sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes); s = format (s, " last_bytes_delivered %u high_sacked %u snd_una_adv %u\n", - sb->last_bytes_delivered, sb->high_sacked, sb->snd_una_adv); + sb->last_bytes_delivered, sb->high_sacked - tc->iss, + sb->snd_una_adv); s = format (s, " cur_rxt_hole %u high_rxt %u rescue_rxt %u", - sb->cur_rxt_hole, sb->high_rxt, sb->rescue_rxt); + sb->cur_rxt_hole, sb->high_rxt - tc->iss, + sb->rescue_rxt - tc->iss); hole = scoreboard_first_hole (sb); if (hole) diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 10f50fefc093..85aaa16f245c 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -135,7 +135,8 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to) data = (const u8 *) (th + 1); /* Zero out all flags but those set in SYN */ - to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE); + to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE + | TCP_OPTS_FLAG_SACK); for (; opts_len > 0; opts_len -= opt_len, data += opt_len) { From 44c6ca6038776787671e78e3dae78e812c72429e Mon Sep 17 00:00:00 2001 From: Marco Varlese Date: Wed, 17 Oct 2018 11:21:32 +0200 Subject: [PATCH 22/70] Update to doxygen documentation for release 18.10 Change-Id: Id8eebd59bad27ac3cc46bf993a5ca1d8410bf84c Signed-off-by: Marco Varlese --- doxygen/test_framework_doc.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doxygen/test_framework_doc.md b/doxygen/test_framework_doc.md index cedd6d31ced3..3da29bbf1050 100644 --- a/doxygen/test_framework_doc.md +++ b/doxygen/test_framework_doc.md @@ -4,6 +4,8 @@ Test Framework Documentation {#test_framework_doc} PyDoc generated documentation for the "make test" framework is available for the following releases: +- [Test framework documentation for VPP 18.10](https://docs.fd.io/vpp/18.10/vpp_make_test/html) +- [Test framework documentation for VPP 18.07](https://docs.fd.io/vpp/18.07/vpp_make_test/html) - [Test framework documentation for VPP 18.04](https://docs.fd.io/vpp/18.04/vpp_make_test/html) - [Test framework documentation for VPP 18.01](https://docs.fd.io/vpp/18.01/vpp_make_test/html) - [Test framework documentation for VPP 17.10](https://docs.fd.io/vpp/17.10/vpp_make_test/html) From 44c6e1d1880513e6e02612509f27b24da04bb5b3 Mon Sep 17 00:00:00 2001 From: Michal Cmarada Date: Wed, 17 Oct 2018 13:40:32 +0200 Subject: [PATCH 23/70] fix ip6 prefix check for tap_v2 Change-Id: Ifbb1393fc3c50c140c09e8baedcd4a92189ba56e Signed-off-by: Michal Cmarada --- src/vat/api_format.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 0e53f56196ad..96cf80269005 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -8315,7 +8315,7 @@ api_tap_create_v2 (vat_main_t * vam) clib_memcpy (mp->host_bridge, host_bridge, vec_len (host_bridge)); if (host_ip4_prefix_len) clib_memcpy (mp->host_ip4_addr, &host_ip4_addr, 4); - if (host_ip4_prefix_len) + if (host_ip6_prefix_len) clib_memcpy (mp->host_ip6_addr, &host_ip6_addr, 16); if (host_ip4_gw_set) clib_memcpy (mp->host_ip4_gw, &host_ip4_gw, 4); From 0d87894bf279a4678cfca6cc438583090b166f85 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Wed, 17 Oct 2018 17:13:42 +0300 Subject: [PATCH 24/70] vxlan:decap caching error (VPP-1462) Change-Id: I3ef0725684bcb8ea526abe0ce62562b35a0070f5 Signed-off-by: Eyal Bari --- src/vnet/vxlan/decap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c index 387539d8f4a2..b84a9b6eb56e 100644 --- a/src/vnet/vxlan/decap.c +++ b/src/vnet/vxlan/decap.c @@ -91,7 +91,7 @@ vxlan4_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache4 * cache, }; if (PREDICT_TRUE - (key4.key[0] == cache->key[0] || key4.key[1] == cache->key[1])) + (key4.key[0] == cache->key[0] && key4.key[1] == cache->key[1])) { /* cache hit */ vxlan_decap_info_t di = {.as_u64 = cache->value }; From c10c73ffb3fa45f4f054e9517f1f80bbec489b47 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 16 Oct 2018 20:30:31 -0700 Subject: [PATCH 25/70] tcp: fix multiple fin retries (VPP-1461) Change-Id: I1be7c59df7b48875f81ebeebf5f39ed15a43d2d8 Signed-off-by: Florin Coras (cherry picked from commit c977e7c100f5727183f9452e2c0d086623a21d15) --- src/vnet/session-apps/echo_client.c | 2 +- src/vnet/tcp/tcp_output.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/vnet/session-apps/echo_client.c b/src/vnet/session-apps/echo_client.c index 0258c16520a5..1fd7ad0cd9fe 100644 --- a/src/vnet/session-apps/echo_client.c +++ b/src/vnet/session-apps/echo_client.c @@ -765,7 +765,7 @@ echo_clients_command_fn (vlib_main_t * vm, /* Fire off connect requests */ time_before_connects = vlib_time_now (vm); if ((error = echo_clients_connect (vm, n_clients))) - return error; + goto cleanup; /* Park until the sessions come up, or ten seconds elapse... */ vlib_process_wait_for_event_or_clock (vm, syn_timeout); diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 79d64cf2da3c..7d7c32ad210a 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1076,24 +1076,25 @@ tcp_send_fin (tcp_connection_t * tc) u32 bi; u8 fin_snt = 0; - tcp_retransmit_timer_force_update (tc); + fin_snt = tc->flags & TCP_CONN_FINSNT; + if (fin_snt) + tc->snd_nxt = tc->snd_una; + if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) { /* Out of buffers so program fin retransmit ASAP */ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1); - tc->flags |= TCP_CONN_FINSNT; - tc->snd_una_max += 1; - tc->snd_nxt = tc->snd_una_max; - return; + goto post_enqueue; } + tcp_retransmit_timer_force_update (tc); b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); - fin_snt = tc->flags & TCP_CONN_FINSNT; - if (fin_snt) - tc->snd_nxt = tc->snd_una; tcp_make_fin (tc, b); tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4); + TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc); + +post_enqueue: if (!fin_snt) { tc->flags |= TCP_CONN_FINSNT; @@ -1106,7 +1107,6 @@ tcp_send_fin (tcp_connection_t * tc) { tc->snd_nxt = tc->snd_una_max; } - TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc); } always_inline u8 From dc532e4ff75af463794fe64764da890427a278d5 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 17 Oct 2018 10:41:28 -0700 Subject: [PATCH 26/70] tcp: fix cleanup of non established connections (VPP-1463) - fix delete of connection in syn-received - fix delete of half-open connection Change-Id: I72ff4b60406a2762d998328c52f41adea40d2c1b Signed-off-by: Florin Coras --- src/vnet/session/session.c | 1 + src/vnet/tcp/tcp.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 7b50950fe017..23d258ffdea2 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -820,6 +820,7 @@ stream_session_delete_notify (transport_connection_t * tc) session_lookup_del_session (s); break; case SESSION_STATE_CLOSED: + case SESSION_STATE_ACCEPTING: stream_session_delete (s); break; } diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 884602deb6f4..5378de1c1daa 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -210,7 +210,8 @@ tcp_connection_cleanup (tcp_connection_t * tc) /* Try to remove the half-open connection. If this is not the owning * thread, tc won't be removed. Retransmit or establish timers will * eventually expire and call again cleanup on the right thread. */ - tcp_half_open_connection_cleanup (tc); + if (tcp_half_open_connection_cleanup (tc)) + tc->flags |= TCP_CONN_HALF_OPEN_DONE; } else { @@ -322,8 +323,10 @@ tcp_connection_close (tcp_connection_t * tc) tc->state = TCP_STATE_CLOSED; break; case TCP_STATE_SYN_RCVD: + tcp_connection_timers_reset (tc); tcp_send_fin (tc); tc->state = TCP_STATE_FIN_WAIT_1; + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); break; case TCP_STATE_ESTABLISHED: if (!session_tx_fifo_max_dequeue (&tc->connection)) From e1fe33d890741eddb1877446e87f9b18bdc1490b Mon Sep 17 00:00:00 2001 From: Marco Varlese Date: Thu, 18 Oct 2018 09:37:17 +0200 Subject: [PATCH 27/70] Update version (18.10) for API changes script Change-Id: I41d22bfc87849e923628de08f922f7a541579fe1 Signed-off-by: Marco Varlese --- extras/scripts/list_api_changes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extras/scripts/list_api_changes.py b/extras/scripts/list_api_changes.py index a25edf2e8ebe..e56da0ace197 100755 --- a/extras/scripts/list_api_changes.py +++ b/extras/scripts/list_api_changes.py @@ -1,8 +1,8 @@ #!/usr/bin/env python import os, fnmatch, subprocess -starttag = 'v18.07-rc0' -endtag = 'v18.07' +starttag = 'v18.10-rc0' +endtag = 'v18.10' emit_md = True apifiles = [] From 703ee73dfbbec8c34ff7106170f5de05d9eae689 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 17 Oct 2018 23:34:54 -0700 Subject: [PATCH 28/70] tcp: fix sacks lost bytes counting (VPP-1465) Change-Id: Ie46b3a81de4ed39b7b40e3879436f7e5a2908d98 Signed-off-by: Florin Coras --- src/plugins/unittest/tcp_test.c | 124 +++++++++++++++++++++++--------- src/vnet/tcp/tcp_input.c | 44 +++++++----- 2 files changed, 118 insertions(+), 50 deletions(-) diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index 33a7fd6af0c6..92bf0ffd9c41 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -89,6 +89,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tc->snd_una_max = 1000; tc->snd_nxt = 1000; tc->rcv_opts.flags |= TCP_OPTS_FLAG_SACK; + tc->snd_mss = 150; scoreboard_init (&tc->sack_sb); for (i = 0; i < 1000 / 100; i++) @@ -110,8 +111,8 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tcp_rcv_sacks (tc, 0); if (verbose) - vlib_cli_output (vm, "sb after even blocks:\n%U", format_tcp_scoreboard, - sb); + vlib_cli_output (vm, "sb after even blocks (mss %u):\n%U", + tc->snd_mss, format_tcp_scoreboard, sb, tc); TCP_TEST ((pool_elts (sb->holes) == 5), "scoreboard has %d elements", pool_elts (sb->holes)); @@ -127,7 +128,9 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); TCP_TEST ((sb->last_sacked_bytes == 400), "last sacked bytes %d", sb->last_sacked_bytes); - TCP_TEST ((sb->high_sacked == 900), "max byte sacked %u", sb->high_sacked); + TCP_TEST ((sb->high_sacked == 900), "high sacked %u", sb->high_sacked); + TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); + /* * Inject odd blocks */ @@ -141,8 +144,8 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tcp_rcv_sacks (tc, 0); if (verbose) - vlib_cli_output (vm, "sb after odd blocks:\n%U", format_tcp_scoreboard, - sb); + vlib_cli_output (vm, "\nsb after odd blocks:\n%U", format_tcp_scoreboard, + sb, tc); hole = scoreboard_first_hole (sb); TCP_TEST ((pool_elts (sb->holes) == 1), @@ -151,17 +154,18 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) "first hole start %u end %u", hole->start, hole->end); TCP_TEST ((sb->sacked_bytes == 900), "sacked bytes %d", sb->sacked_bytes); TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); - TCP_TEST ((sb->high_sacked == 1000), "max sacked byte %u", sb->high_sacked); + TCP_TEST ((sb->high_sacked == 1000), "high sacked %u", sb->high_sacked); TCP_TEST ((sb->last_sacked_bytes == 500), "last sacked bytes %d", sb->last_sacked_bytes); + TCP_TEST ((sb->lost_bytes == 100), "lost bytes %u", sb->lost_bytes); /* * Ack until byte 100, all bytes are now acked + sacked */ tcp_rcv_sacks (tc, 100); if (verbose) - vlib_cli_output (vm, "ack until byte 100:\n%U", format_tcp_scoreboard, - sb); + vlib_cli_output (vm, "\nack until byte 100:\n%U", format_tcp_scoreboard, + sb, tc); TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", pool_elts (sb->holes)); @@ -171,6 +175,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", sb->last_sacked_bytes); + TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); /* * Add new block @@ -182,16 +187,14 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) block.end = 1300; vec_add1 (tc->rcv_opts.sacks, block); - if (verbose) - vlib_cli_output (vm, "add [1200, 1300]:\n%U", format_tcp_scoreboard, sb); tc->snd_una_max = 1500; tc->snd_una = 1000; tc->snd_nxt = 1500; tcp_rcv_sacks (tc, 1000); if (verbose) - vlib_cli_output (vm, "sb snd_una_max 1500, snd_una 1000:\n%U", - format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nadd [1200, 1300] snd_una_max 1500, snd_una 1000:" + " \n%U", format_tcp_scoreboard, sb, tc); TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv after ack %u", sb->snd_una_adv); @@ -207,6 +210,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((hole->start == 1300 && hole->end == 1500), "last hole start %u end %u", hole->start, hole->end); TCP_TEST ((sb->sacked_bytes == 100), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); /* * Ack first hole @@ -216,19 +220,19 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tcp_rcv_sacks (tc, 1200); if (verbose) - vlib_cli_output (vm, "sb ack up to byte 1200:\n%U", format_tcp_scoreboard, - sb); + vlib_cli_output (vm, "\nsb ack up to byte 1200:\n%U", + format_tcp_scoreboard, sb, tc); TCP_TEST ((sb->snd_una_adv == 100), "snd_una_adv after ack %u", sb->snd_una_adv); TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); - TCP_TEST ((pool_elts (sb->holes) == 1), + TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", pool_elts (sb->holes)); - hole = scoreboard_first_hole (sb); - TCP_TEST ((hole->prev == TCP_INVALID_SACK_HOLE_INDEX - && hole->next == TCP_INVALID_SACK_HOLE_INDEX), "hole is valid"); TCP_TEST ((sb->last_bytes_delivered == 100), "last bytes delivered %d", sb->last_bytes_delivered); + TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); + TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); /* * Add some more blocks and then remove all @@ -246,7 +250,8 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) scoreboard_clear (sb); if (verbose) - vlib_cli_output (vm, "sb cleared all:\n%U", format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nsb cleared all:\n%U", format_tcp_scoreboard, sb, + tc); TCP_TEST ((pool_elts (sb->holes) == 0), "number of holes %d", pool_elts (sb->holes)); @@ -267,14 +272,17 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks); tcp_rcv_sacks (tc, 0); if (verbose) - vlib_cli_output (vm, "sb added odd blocks and ack [0, 950]:\n%U", - format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nsb added odd blocks snd_una 0 snd_una_max 1500:" + "\n%U", format_tcp_scoreboard, sb, tc); + TCP_TEST ((pool_elts (sb->holes) == 5), + "scoreboard has %d elements", pool_elts (sb->holes)); + TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); tcp_rcv_sacks (tc, 950); if (verbose) - vlib_cli_output (vm, "sb added odd blocks and ack [0, 950]:\n%U", - format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nack [0, 950]:\n%U", format_tcp_scoreboard, sb, + tc); TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", pool_elts (sb->holes)); @@ -282,6 +290,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", sb->last_sacked_bytes); + TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); /* * Inject one block, ack it and overlap hole @@ -299,22 +308,26 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tcp_rcv_sacks (tc, 0); if (verbose) - vlib_cli_output (vm, "sb added [100, 500]:\n%U", - format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nsb added [100, 500] snd_una 0 snd_una_max 1000:" + "\n%U", format_tcp_scoreboard, sb, tc); tcp_rcv_sacks (tc, 800); if (verbose) - vlib_cli_output (vm, "sb ack [0, 800]:\n%U", format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nsb ack [0, 800]:\n%U", format_tcp_scoreboard, sb, + tc); - TCP_TEST ((pool_elts (sb->holes) == 1), + TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", pool_elts (sb->holes)); TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); - TCP_TEST ((sb->last_sacked_bytes == 0), - "last sacked bytes %d", sb->last_sacked_bytes); + TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", + sb->last_sacked_bytes); TCP_TEST ((sb->last_bytes_delivered == 400), "last bytes delivered %d", sb->last_bytes_delivered); + TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); + TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); /* * One hole close to head, patch head, split in two and start acking @@ -332,8 +345,12 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tcp_rcv_sacks (tc, 0); if (verbose) - vlib_cli_output (vm, "sb added [500, 1000]:\n%U", - format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nsb added [500, 1000]:\n%U", + format_tcp_scoreboard, sb, tc); + TCP_TEST ((sb->sacked_bytes == 500), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->last_sacked_bytes == 500), "last sacked bytes %d", + sb->last_sacked_bytes); + TCP_TEST ((sb->lost_bytes == 500), "lost bytes %u", sb->lost_bytes); vec_reset_length (tc->rcv_opts.sacks); block.start = 300; @@ -342,18 +359,57 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks); tcp_rcv_sacks (tc, 100); if (verbose) - vlib_cli_output (vm, "sb added [0, 100] [300, 400]:\n%U", - format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nsb added [0, 100] [300, 400]:\n%U", + format_tcp_scoreboard, sb, tc); TCP_TEST ((pool_elts (sb->holes) == 2), "scoreboard has %d elements", pool_elts (sb->holes)); + TCP_TEST ((sb->sacked_bytes == 600), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->last_sacked_bytes == 100), "last sacked bytes %d", + sb->last_sacked_bytes); + TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", + sb->last_bytes_delivered); + TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); tc->snd_una = 100; tcp_rcv_sacks (tc, 200); + tc->snd_una = 200; tcp_rcv_sacks (tc, 300); if (verbose) - vlib_cli_output (vm, "sb added [0, 300]:\n%U", format_tcp_scoreboard, sb); + vlib_cli_output (vm, "\nacked [0, 300] in two steps:\n%U", + format_tcp_scoreboard, sb, tc); TCP_TEST ((sb->sacked_bytes == 500), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->lost_bytes == 100), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->last_bytes_delivered == 100), "last bytes delivered %d", + sb->last_bytes_delivered); + tc->snd_una = 400; + tcp_rcv_sacks (tc, 500); + if (verbose) + vlib_cli_output (vm, "\nacked [400, 500]:\n%U", format_tcp_scoreboard, sb, + tc); + TCP_TEST ((pool_elts (sb->holes) == 0), + "scoreboard has %d elements", pool_elts (sb->holes)); + TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", + sb->last_sacked_bytes); + TCP_TEST ((sb->last_bytes_delivered == 500), "last bytes delivered %d", + sb->last_bytes_delivered); + TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->snd_una_adv == 500), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); + TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); + + /* + * Re-ack high sacked, to make sure last_bytes_delivered and + * snd_una_adv are 0-ed + */ + tcp_rcv_sacks (tc, 1000); + if (verbose) + vlib_cli_output (vm, "\nAck high sacked:\n%U", format_tcp_scoreboard, sb, + tc); + TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", + sb->last_bytes_delivered); + TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); return 0; } diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 85aaa16f245c..19012578c6bc 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -667,38 +667,41 @@ scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index, static void scoreboard_update_bytes (tcp_connection_t * tc, sack_scoreboard_t * sb) { - sack_scoreboard_hole_t *hole, *prev; + sack_scoreboard_hole_t *left, *right; u32 bytes = 0, blks = 0; sb->lost_bytes = 0; sb->sacked_bytes = 0; - hole = scoreboard_last_hole (sb); - if (!hole) + left = scoreboard_last_hole (sb); + if (!left) return; - if (seq_gt (sb->high_sacked, hole->end)) + if (seq_gt (sb->high_sacked, left->end)) { - bytes = sb->high_sacked - hole->end; + bytes = sb->high_sacked - left->end; blks = 1; + if (bytes > (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss + && left->prev == TCP_INVALID_SACK_HOLE_INDEX) + sb->lost_bytes += scoreboard_hole_bytes (left); } - while ((prev = scoreboard_prev_hole (sb, hole)) + right = left; + while ((left = scoreboard_prev_hole (sb, right)) && (bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss && blks < TCP_DUPACK_THRESHOLD)) { - bytes += hole->start - prev->end; + bytes += right->start - left->end; blks++; - hole = prev; + right = left; } - while (hole) + while (left) { - sb->lost_bytes += scoreboard_hole_bytes (hole); - hole->is_lost = 1; - prev = hole; - hole = scoreboard_prev_hole (sb, hole); - if (hole) - bytes += prev->start - hole->end; + bytes += right->start - left->end; + sb->lost_bytes += scoreboard_hole_bytes (left); + left->is_lost = 1; + right = left; + left = scoreboard_prev_hole (sb, left); } sb->sacked_bytes = bytes; } @@ -815,7 +818,8 @@ tcp_scoreboard_is_sane_post_recovery (tcp_connection_t * tc) { sack_scoreboard_hole_t *hole; hole = scoreboard_first_hole (&tc->sack_sb); - return (!hole || seq_geq (hole->start, tc->snd_una)); + return (!hole || (seq_geq (hole->start, tc->snd_una) + && seq_lt (hole->end, tc->snd_una_max))); } void @@ -974,6 +978,14 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) } } + if (pool_elts (sb->holes) == 1) + { + hole = scoreboard_first_hole (sb); + if (hole->start == ack + sb->snd_una_adv + && hole->end == tc->snd_una_max) + scoreboard_remove_hole (sb, hole); + } + scoreboard_update_bytes (tc, sb); sb->last_sacked_bytes = sb->sacked_bytes - (old_sacked_bytes - sb->last_bytes_delivered); From 9a1e6eafd68b372e7aec53f75ac8f1881de72150 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 17 Oct 2018 14:53:11 -0700 Subject: [PATCH 29/70] tls: fix connection failures/interrupts at scale (VPP-1464) Change-Id: I0bc4062c1fd3202ee201acb36a2bb14fc6ee1543 Signed-off-by: Florin Coras (cherry picked from commit c01d578a625fb136bc33b0eb9c19907769a67989) --- src/plugins/unittest/tcp_test.c | 5 +++-- src/vnet/session-apps/echo_client.c | 21 ++++++++++---------- src/vnet/session/session.c | 8 ++++++-- src/vnet/tcp/tcp_input.c | 10 +++++++--- src/vnet/tls/tls.c | 30 +++++++++++++++++------------ 5 files changed, 45 insertions(+), 29 deletions(-) diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index 92bf0ffd9c41..608f1efc53d8 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -1615,13 +1615,14 @@ tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input) tcp_connection_t *tc; stream_session_t *s, *s1; u8 cmp = 0, is_filtered = 0; + u32 sidx; /* * Allocate fake session and connection 1 */ pool_get (smm->sessions[0], s); memset (s, 0, sizeof (*s)); - s->session_index = s - smm->sessions[0]; + s->session_index = sidx = s - smm->sessions[0]; pool_get (tm->connections[0], tc); memset (tc, 0, sizeof (*tc)); @@ -1662,7 +1663,7 @@ tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input) * Confirm that connection lookup works */ - s1 = pool_elt_at_index (smm->sessions[0], 0); + s1 = pool_elt_at_index (smm->sessions[0], sidx); session_lookup_add_connection (tc1, session_handle (s1)); tconn = session_lookup_connection_wt4 (0, &tc1->lcl_ip.ip4, &tc1->rmt_ip.ip4, diff --git a/src/vnet/session-apps/echo_client.c b/src/vnet/session-apps/echo_client.c index 1fd7ad0cd9fe..1ece0196dde3 100644 --- a/src/vnet/session-apps/echo_client.c +++ b/src/vnet/session-apps/echo_client.c @@ -352,16 +352,6 @@ echo_clients_init (vlib_main_t * vm) return 0; } -static void -echo_clients_session_disconnect (stream_session_t * s) -{ - echo_client_main_t *ecm = &echo_client_main; - vnet_disconnect_args_t _a, *a = &_a; - a->handle = session_handle (s); - a->app_index = ecm->app_index; - vnet_disconnect_session (a); -} - static int echo_clients_session_connected_callback (u32 app_index, u32 api_context, stream_session_t * s, u8 is_fail) @@ -377,6 +367,7 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context, if (is_fail) { clib_warning ("connection %d failed!", api_context); + ecm->run_test = ECHO_CLIENTS_EXITING; signal_evt_to_cli (-1); return 0; } @@ -454,6 +445,16 @@ echo_clients_session_disconnect_callback (stream_session_t * s) return; } +void +echo_clients_session_disconnect (stream_session_t * s) +{ + echo_client_main_t *ecm = &echo_client_main; + vnet_disconnect_args_t _a, *a = &_a; + a->handle = session_handle (s); + a->app_index = ecm->app_index; + vnet_disconnect_session (a); +} + static int echo_clients_rx_callback (stream_session_t * s) { diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 23d258ffdea2..81c93064d38d 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -814,18 +814,22 @@ stream_session_delete_notify (transport_connection_t * tc) * from the app, do the whole disconnect since we might still * have lingering events */ stream_session_disconnect (s); + s->session_state = SESSION_STATE_CLOSED; break; case SESSION_STATE_CLOSING: /* Cleanup lookup table. Transport needs to still be valid */ session_lookup_del_session (s); + s->session_state = SESSION_STATE_CLOSED; break; case SESSION_STATE_CLOSED: case SESSION_STATE_ACCEPTING: stream_session_delete (s); break; + default: + /* Assume connection was not yet added the lookup table */ + session_free_w_fifos (s); + break; } - - s->session_state = SESSION_STATE_CLOSED; } /** diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 19012578c6bc..e75c77d0e2fa 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -2687,9 +2687,12 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0); - /* Delete the connection/session since the pipes should be - * clear by now */ - tcp_connection_del (tc0); + + /* Don't free the connection from the data path since + * we can't ensure that we have no packets already enqueued + * to output. Rely instead on the waitclose timer */ + tcp_connection_timers_reset (tc0); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, 1); goto drop; @@ -3501,6 +3504,7 @@ do { \ _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(LAST_ACK, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(TIME_WAIT, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(TIME_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index 9a82360610b8..f4814a3ce217 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -26,6 +26,18 @@ static tls_engine_vft_t *tls_vfts; void tls_disconnect (u32 ctx_handle, u32 thread_index); +static void +tls_disconnect_transport (tls_ctx_t * ctx) +{ + vnet_disconnect_args_t a = { + .handle = ctx->tls_session_handle, + .app_index = tls_main.app_index, + }; + + if (vnet_disconnect_session (&a)) + clib_warning ("disconnect returned"); +} + tls_engine_type_t tls_get_available_engine (void) { @@ -91,6 +103,8 @@ tls_listener_ctx_alloc (void) void tls_listener_ctx_free (tls_ctx_t * ctx) { + if (CLIB_DEBUG) + memset (ctx, 0xfb, sizeof (*ctx)); pool_put (tls_main.listener_ctx_pool, ctx); } @@ -226,7 +240,7 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed) app_wrk = app_worker_get_if_valid (ctx->parent_app_index); if (!app_wrk) { - tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ()); + tls_disconnect_transport (ctx); return -1; } @@ -249,16 +263,16 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed) ctx->app_session_handle = session_handle (app_session); ctx->c_s_index = app_session->session_index; - app_session->session_state = SESSION_STATE_READY; + app_session->session_state = SESSION_STATE_CONNECTING; if (cb_fn (ctx->parent_app_index, ctx->parent_app_api_context, app_session, 0 /* not failed */ )) { TLS_DBG (1, "failed to notify app"); tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ()); - session_free_w_fifos (app_session); return -1; } + app_session->session_state = SESSION_STATE_READY; session_lookup_add_connection (&ctx->connection, session_handle (app_session)); @@ -553,15 +567,7 @@ tls_disconnect (u32 ctx_handle, u32 thread_index) TLS_DBG (1, "Disconnecting %x", ctx_handle); ctx = tls_ctx_get (ctx_handle); - - vnet_disconnect_args_t a = { - .handle = ctx->tls_session_handle, - .app_index = tls_main.app_index, - }; - - if (vnet_disconnect_session (&a)) - clib_warning ("disconnect returned"); - + tls_disconnect_transport (ctx); stream_session_delete_notify (&ctx->connection); tls_ctx_free (ctx); } From 23064dd22af4d5206ee52c887e6c413f42de6389 Mon Sep 17 00:00:00 2001 From: Jakub Grajciar Date: Tue, 9 Oct 2018 12:28:21 +0200 Subject: [PATCH 30/70] Test FW: Use unittest temp dir as unix runtime dir Change-Id: I5273d5f3f59cc3c43da0a15bb0c4a4056098adcf Signed-off-by: Jakub Grajciar (cherry picked from commit 997439170aa3bb562c84e882c45331ba476e7c8e) --- test/framework.py | 15 ++++++++------- test/test_memif.py | 45 ++++++++++++++++++++++++--------------------- test/vpp_memif.py | 3 ++- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/test/framework.py b/test/framework.py index da34724befd6..a7f4fc774b44 100644 --- a/test/framework.py +++ b/test/framework.py @@ -284,13 +284,14 @@ def setUpConstants(cls): cls.vpp_cmdline = [cls.vpp_bin, "unix", "{", "nodaemon", debug_cli, "full-coredump", - coredump_size, "}", "api-trace", "{", "on", "}", - "api-segment", "{", "prefix", cls.shm_prefix, "}", - "cpu", "{", "main-core", str(cpu_core_number), "}", - "statseg", "{", "socket-name", cls.stats_sock, "}", - "plugins", "{", "plugin", "dpdk_plugin.so", "{", - "disable", "}", "plugin", "unittest_plugin.so", - "{", "enable", "}", "}", ] + coredump_size, "runtime-dir", cls.tempdir, "}", + "api-trace", "{", "on", "}", "api-segment", "{", + "prefix", cls.shm_prefix, "}", "cpu", "{", + "main-core", str(cpu_core_number), "}", "statseg", + "{", "socket-name", cls.stats_sock, "}", "plugins", + "{", "plugin", "dpdk_plugin.so", "{", "disable", + "}", "plugin", "unittest_plugin.so", "{", "enable", + "}", "}", ] if plugin_path is not None: cls.vpp_cmdline.extend(["plugin_path", plugin_path]) cls.logger.info("vpp_cmdline: %s" % cls.vpp_cmdline) diff --git a/test/test_memif.py b/test/test_memif.py index 8fe229986aa4..6d462bae018b 100644 --- a/test/test_memif.py +++ b/test/test_memif.py @@ -54,14 +54,14 @@ def test_memif_socket_filename_add_del(self): dump = self.vapi.memif_socket_filename_dump() self.assertTrue( self._check_socket_filename( - dump, 0, "/run/vpp/memif.sock")) + dump, 0, self.tempdir + "/memif.sock")) memif_sockets = [] # existing path memif_sockets.append( VppSocketFilename( - self, 1, "/run/vpp/memif1.sock")) - # default path ("/run/vpp") + self, 1, self.tempdir + "/memif1.sock")) + # default path (test tempdir) memif_sockets.append( VppSocketFilename( self, @@ -91,7 +91,7 @@ def test_memif_socket_filename_add_del(self): dump = self.vapi.memif_socket_filename_dump() self.assertTrue( self._check_socket_filename( - dump, 0, "/run/vpp/memif.sock")) + dump, 0, self.tempdir + "/memif.sock")) def _create_delete_test_one_interface(self, memif): memif.add_vpp_config() @@ -150,8 +150,8 @@ def test_memif_create_custom_socket(self): # existing path memif_sockets.append( VppSocketFilename( - self, 1, "/run/vpp/memif1.sock")) - # default path ("/run/vpp") + self, 1, self.tempdir + "/memif1.sock")) + # default path (test tempdir) memif_sockets.append( VppSocketFilename( self, @@ -178,18 +178,16 @@ def test_memif_create_custom_socket(self): def test_memif_connect(self): """ Memif connect """ - memif = VppMemif( - self, - MEMIF_ROLE.SLAVE, - MEMIF_MODE.ETHERNET, - ring_size=1024, - buffer_size=2048) - remote_memif = VppMemif( - self.remote_test, - MEMIF_ROLE.MASTER, - MEMIF_MODE.ETHERNET, - ring_size=1024, - buffer_size=2048) + memif = VppMemif(self, MEMIF_ROLE.SLAVE, MEMIF_MODE.ETHERNET, + ring_size=1024, buffer_size=2048) + + remote_socket = VppSocketFilename(self.remote_test, 1, + self.tempdir + "/memif.sock") + remote_socket.add_vpp_config() + + remote_memif = VppMemif(self.remote_test, MEMIF_ROLE.MASTER, + MEMIF_MODE.ETHERNET, socket_id=1, + ring_size=1024, buffer_size=2048) self._connect_test_interface_pair(memif, remote_memif) @@ -219,9 +217,14 @@ def _verify_icmp(self, pg, memif, rx, seq): def test_memif_ping(self): """ Memif ping """ - memif = VppMemif(self, MEMIF_ROLE.MASTER, MEMIF_MODE.ETHERNET) - remote_memif = VppMemif(self.remote_test, MEMIF_ROLE.SLAVE, - MEMIF_MODE.ETHERNET) + memif = VppMemif(self, MEMIF_ROLE.SLAVE, MEMIF_MODE.ETHERNET) + + remote_socket = VppSocketFilename(self.remote_test, 1, + self.tempdir + "/memif.sock") + remote_socket.add_vpp_config() + + remote_memif = VppMemif(self.remote_test, MEMIF_ROLE.MASTER, + MEMIF_MODE.ETHERNET, socket_id=1) memif.add_vpp_config() memif.config_ip4() diff --git a/test/vpp_memif.py b/test/vpp_memif.py index 2095480a7c2b..24e8d19b1c71 100644 --- a/test/vpp_memif.py +++ b/test/vpp_memif.py @@ -50,7 +50,8 @@ def add_vpp_config(self): rv = self._test.vapi.memif_socket_filename_add_del( 1, self.socket_id, self.socket_filename) if self.add_default_folder: - self.socket_filename = "/run/vpp/" + self.socket_filename + self.socket_filename = self._test.tempdir + "/" \ + + self.socket_filename return rv def remove_vpp_config(self): From 79e4bc2d1582fb042cfdc4926afd849f71454b75 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Fri, 19 Oct 2018 04:01:19 -0700 Subject: [PATCH 31/70] NAT44: fix ICMP virtual fragmentation reassembly (VPP-1466) Change-Id: I8006bca02948d9121f474a3d14f0576747bb3c51 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 48 ++++++++-- src/plugins/nat/in2out_ed.c | 7 +- src/plugins/nat/nat44_hairpinning.c | 72 +++++++-------- src/plugins/nat/out2in.c | 58 ++++++++---- test/test_nat.py | 131 +++++----------------------- 5 files changed, 144 insertions(+), 172 deletions(-) diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index b99aef3944d3..cb169814eac9 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -952,8 +952,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE - (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (proto0 == ~0)) { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace00; @@ -964,6 +963,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, next0 = SNAT_IN2OUT_NEXT_REASS; goto trace00; } + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } } key0.addr = ip0->src_address; @@ -1131,8 +1136,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE - (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (proto1 == ~0)) { next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace01; @@ -1143,6 +1147,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, next1 = SNAT_IN2OUT_NEXT_REASS; goto trace01; } + + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } } key1.addr = ip1->src_address; @@ -1346,8 +1356,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE - (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (proto0 == ~0)) { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace0; @@ -1358,6 +1367,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, next0 = SNAT_IN2OUT_NEXT_REASS; goto trace0; } + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } } key0.addr = ip0->src_address; @@ -1672,6 +1687,7 @@ nat44_in2out_reass_node_fn (vlib_main_t * vm, nat_reass_ip4_t *reass0; udp_header_t *udp0; tcp_header_t *tcp0; + icmp46_header_t *icmp0; snat_session_key_t key0; clib_bihash_kv_8_8_t kv0, value0; snat_session_t *s0 = 0; @@ -1704,6 +1720,7 @@ nat44_in2out_reass_node_fn (vlib_main_t * vm, ip0 = (ip4_header_t *) vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; proto0 = ip_proto_to_snat_proto (ip0->protocol); reass0 = nat_ip4_reass_find_or_create (ip0->src_address, @@ -1722,6 +1739,25 @@ nat44_in2out_reass_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) { + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); + + if (PREDICT_TRUE (next0 != SNAT_IN2OUT_NEXT_DROP)) + { + if (s0) + reass0->sess_index = s0 - per_thread_data->sessions; + else + reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; + nat_ip4_reass_get_frags (reass0, + &fragments_to_loopback); + } + + goto trace0; + } + key0.addr = ip0->src_address; key0.port = udp0->src_port; key0.protocol = proto0; diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index 8db53c081428..f9f8d776eb46 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -1960,11 +1960,8 @@ nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm, } /* Hairpinning */ - if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_ICMP)) - nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port, - s0->ext_host_port, proto0, 1); - else - snat_icmp_hairpinning (sm, b0, ip0, icmp0, 1); + nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port, + s0->ext_host_port, proto0, 1); /* Accounting */ nat44_session_update_counters (s0, now, diff --git a/src/plugins/nat/nat44_hairpinning.c b/src/plugins/nat/nat44_hairpinning.c index c07427d6bcb7..09ea419e637c 100644 --- a/src/plugins/nat/nat44_hairpinning.c +++ b/src/plugins/nat/nat44_hairpinning.c @@ -286,39 +286,6 @@ snat_icmp_hairpinning (snat_main_t * sm, } else { - if (!is_ed) - { - icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1); - u16 icmp_id0 = echo0->identifier; - key0.addr = ip0->dst_address; - key0.port = icmp_id0; - key0.protocol = SNAT_PROTOCOL_ICMP; - key0.fib_index = sm->outside_fib_index; - kv0.key = key0.as_u64; - if (sm->num_workers > 1) - ti = - (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread; - else - ti = sm->num_workers; - int rv = - clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, - &value0); - if (!rv) - { - si = value0.value; - s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); - new_dst_addr0 = s0->in2out.addr.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; - echo0->identifier = s0->in2out.port; - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port, - icmp_echo_header_t, identifier); - icmp0->checksum = ip_csum_fold (sum0); - goto change_addr; - } - ti = 0; - } - key0.addr = ip0->dst_address; key0.port = 0; key0.protocol = 0; @@ -327,7 +294,44 @@ snat_icmp_hairpinning (snat_main_t * sm, if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv0, &value0)) - return 1; + { + if (!is_ed) + { + icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1); + u16 icmp_id0 = echo0->identifier; + key0.addr = ip0->dst_address; + key0.port = icmp_id0; + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.fib_index = sm->outside_fib_index; + kv0.key = key0.as_u64; + if (sm->num_workers > 1) + ti = + (clib_net_to_host_u16 (icmp_id0) - + 1024) / sm->port_per_thread; + else + ti = sm->num_workers; + int rv = + clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, + &value0); + if (!rv) + { + si = value0.value; + s0 = + pool_elt_at_index (sm->per_thread_data[ti].sessions, si); + new_dst_addr0 = s0->in2out.addr.as_u32; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + s0->in2out.fib_index; + echo0->identifier = s0->in2out.port; + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port, + icmp_echo_header_t, identifier); + icmp0->checksum = ip_csum_fold (sum0); + goto change_addr; + } + } + + return 1; + } m0 = pool_elt_at_index (sm->static_mappings, value0.value); diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index eeecf1652647..c4d1fbf69003 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -775,17 +775,17 @@ snat_out2in_node_fn (vlib_main_t * vm, goto trace0; } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (ip4_is_fragment (ip0))) { - next0 = icmp_out2in_slow_path - (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, thread_index, &s0); + next0 = SNAT_OUT2IN_NEXT_REASS; goto trace0; } - if (PREDICT_FALSE (ip4_is_fragment (ip0))) + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { - next0 = SNAT_OUT2IN_NEXT_REASS; + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); goto trace0; } @@ -936,17 +936,17 @@ snat_out2in_node_fn (vlib_main_t * vm, goto trace1; } - if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (ip4_is_fragment (ip1))) { - next1 = icmp_out2in_slow_path - (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now, thread_index, &s1); + next1 = SNAT_OUT2IN_NEXT_REASS; goto trace1; } - if (PREDICT_FALSE (ip4_is_fragment (ip1))) + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) { - next1 = SNAT_OUT2IN_NEXT_REASS; + next1 = icmp_out2in_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, thread_index, &s1); goto trace1; } @@ -1134,17 +1134,17 @@ snat_out2in_node_fn (vlib_main_t * vm, goto trace00; } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (ip4_is_fragment (ip0))) { - next0 = icmp_out2in_slow_path - (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, thread_index, &s0); + next0 = SNAT_OUT2IN_NEXT_REASS; goto trace00; } - if (PREDICT_FALSE (ip4_is_fragment (ip0))) + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { - next0 = SNAT_OUT2IN_NEXT_REASS; + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); goto trace00; } @@ -1336,6 +1336,7 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm, nat_reass_ip4_t *reass0; udp_header_t *udp0; tcp_header_t *tcp0; + icmp46_header_t *icmp0; snat_session_key_t key0, sm0; clib_bihash_kv_8_8_t kv0, value0; snat_session_t *s0 = 0; @@ -1369,6 +1370,7 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm, ip0 = (ip4_header_t *) vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; proto0 = ip_proto_to_snat_proto (ip0->protocol); reass0 = nat_ip4_reass_find_or_create (ip0->src_address, @@ -1387,6 +1389,26 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) { + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); + + if (PREDICT_TRUE (next0 != SNAT_OUT2IN_NEXT_DROP)) + { + if (s0) + reass0->sess_index = s0 - per_thread_data->sessions; + else + reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; + reass0->thread_index = thread_index; + nat_ip4_reass_get_frags (reass0, + &fragments_to_loopback); + } + + goto trace0; + } + key0.addr = ip0->dst_address; key0.port = udp0->dst_port; key0.protocol = proto0; diff --git a/test/test_nat.py b/test/test_nat.py index e9e7dfa3068b..e26aa27ddbd9 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -3335,86 +3335,36 @@ def test_frag_in_order(self): self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, is_inside=0) - data = "A" * 4 + "B" * 16 + "C" * 3 - self.tcp_port_in = random.randint(1025, 65535) - - reass = self.vapi.nat_reass_dump() - reass_n_start = len(reass) - - # in2out - pkts = self.create_stream_frag(self.pg0, - self.pg1.remote_ip4, - self.tcp_port_in, - 20, - data) - self.pg0.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - frags = self.pg1.get_capture(len(pkts)) - p = self.reass_frags_and_verify(frags, - self.nat_addr, - self.pg1.remote_ip4) - self.assertEqual(p[TCP].dport, 20) - self.assertNotEqual(p[TCP].sport, self.tcp_port_in) - self.tcp_port_out = p[TCP].sport - self.assertEqual(data, p[Raw].load) - - # out2in - pkts = self.create_stream_frag(self.pg1, - self.nat_addr, - 20, - self.tcp_port_out, - data) - self.pg1.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - frags = self.pg0.get_capture(len(pkts)) - p = self.reass_frags_and_verify(frags, - self.pg1.remote_ip4, - self.pg0.remote_ip4) - self.assertEqual(p[TCP].sport, 20) - self.assertEqual(p[TCP].dport, self.tcp_port_in) - self.assertEqual(data, p[Raw].load) - - reass = self.vapi.nat_reass_dump() - reass_n_end = len(reass) - - self.assertEqual(reass_n_end - reass_n_start, 2) + self.frag_in_order(proto=IP_PROTOS.tcp) + self.frag_in_order(proto=IP_PROTOS.udp) + self.frag_in_order(proto=IP_PROTOS.icmp) def test_reass_hairpinning(self): """ NAT44 fragments hairpinning """ - server = self.pg0.remote_hosts[1] - host_in_port = random.randint(1025, 65535) - server_in_port = random.randint(1025, 65535) - server_out_port = random.randint(1025, 65535) - data = "A" * 4 + "B" * 16 + "C" * 3 + self.server = self.pg0.remote_hosts[1] + self.host_in_port = random.randint(1025, 65535) + self.server_in_port = random.randint(1025, 65535) + self.server_out_port = random.randint(1025, 65535) self.nat44_add_address(self.nat_addr) self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, is_inside=0) # add static mapping for server - self.nat44_add_static_mapping(server.ip4, self.nat_addr, - server_in_port, server_out_port, + self.nat44_add_static_mapping(self.server.ip4, self.nat_addr, + self.server_in_port, + self.server_out_port, proto=IP_PROTOS.tcp) + self.nat44_add_static_mapping(self.server.ip4, self.nat_addr, + self.server_in_port, + self.server_out_port, + proto=IP_PROTOS.udp) + self.nat44_add_static_mapping(self.server.ip4, self.nat_addr) - # send packet from host to server - pkts = self.create_stream_frag(self.pg0, - self.nat_addr, - host_in_port, - server_out_port, - data) - self.pg0.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - frags = self.pg0.get_capture(len(pkts)) - p = self.reass_frags_and_verify(frags, - self.nat_addr, - server.ip4) - self.assertNotEqual(p[TCP].sport, host_in_port) - self.assertEqual(p[TCP].dport, server_in_port) - self.assertEqual(data, p[Raw].load) + self.reass_hairpinning(proto=IP_PROTOS.tcp) + self.reass_hairpinning(proto=IP_PROTOS.udp) + self.reass_hairpinning(proto=IP_PROTOS.icmp) def test_frag_out_of_order(self): """ NAT44 translate fragments arriving out of order """ @@ -3424,45 +3374,9 @@ def test_frag_out_of_order(self): self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, is_inside=0) - data = "A" * 4 + "B" * 16 + "C" * 3 - random.randint(1025, 65535) - - # in2out - pkts = self.create_stream_frag(self.pg0, - self.pg1.remote_ip4, - self.tcp_port_in, - 20, - data) - pkts.reverse() - self.pg0.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - frags = self.pg1.get_capture(len(pkts)) - p = self.reass_frags_and_verify(frags, - self.nat_addr, - self.pg1.remote_ip4) - self.assertEqual(p[TCP].dport, 20) - self.assertNotEqual(p[TCP].sport, self.tcp_port_in) - self.tcp_port_out = p[TCP].sport - self.assertEqual(data, p[Raw].load) - - # out2in - pkts = self.create_stream_frag(self.pg1, - self.nat_addr, - 20, - self.tcp_port_out, - data) - pkts.reverse() - self.pg1.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - frags = self.pg0.get_capture(len(pkts)) - p = self.reass_frags_and_verify(frags, - self.pg1.remote_ip4, - self.pg0.remote_ip4) - self.assertEqual(p[TCP].sport, 20) - self.assertEqual(p[TCP].dport, self.tcp_port_in) - self.assertEqual(data, p[Raw].load) + self.frag_out_of_order(proto=IP_PROTOS.tcp) + self.frag_out_of_order(proto=IP_PROTOS.udp) + self.frag_out_of_order(proto=IP_PROTOS.icmp) def test_port_restricted(self): """ Port restricted NAT44 (MAP-E CE) """ @@ -3971,8 +3885,7 @@ def test_reass_hairpinning(self): self.server_in_port, self.server_out_port, proto=IP_PROTOS.udp) - self.nat44_add_static_mapping(self.server.ip4, self.nat_addr, - proto=IP_PROTOS.icmp) + self.nat44_add_static_mapping(self.server.ip4, self.nat_addr) self.reass_hairpinning(proto=IP_PROTOS.tcp) self.reass_hairpinning(proto=IP_PROTOS.udp) From c8f3638beacc0075f5da2af8781783a2a72aef01 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Fri, 19 Oct 2018 17:49:00 -0700 Subject: [PATCH 32/70] tcp: count first lost hole (VPP-1465) Change-Id: I3ac136e2a10796d8fa86ddb6f0d6cabe5fa749f8 Signed-off-by: Florin Coras --- src/plugins/unittest/tcp_test.c | 32 +++++++++++++++++++++++++++++--- src/vnet/tcp/tcp_input.c | 31 +++++++++++++++++-------------- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index 608f1efc53d8..d06578771c0e 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -129,7 +129,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((sb->last_sacked_bytes == 400), "last sacked bytes %d", sb->last_sacked_bytes); TCP_TEST ((sb->high_sacked == 900), "high sacked %u", sb->high_sacked); - TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->lost_bytes == 300), "lost bytes %u", sb->lost_bytes); /* * Inject odd blocks @@ -276,7 +276,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) "\n%U", format_tcp_scoreboard, sb, tc); TCP_TEST ((pool_elts (sb->holes) == 5), "scoreboard has %d elements", pool_elts (sb->holes)); - TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->lost_bytes == 300), "lost bytes %u", sb->lost_bytes); tcp_rcv_sacks (tc, 950); @@ -368,7 +368,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) sb->last_sacked_bytes); TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", sb->last_bytes_delivered); - TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->lost_bytes == 300), "lost bytes %u", sb->lost_bytes); tc->snd_una = 100; tcp_rcv_sacks (tc, 200); @@ -410,6 +410,32 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", sb->last_bytes_delivered); TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); + + /* + * Add [1200, 1500] and test that [1000, 1200] is lost (bytes condition) + * snd_una = 1000 and snd_una_max = 1600 + */ + tc->snd_una = 1000; + tc->snd_una_max = 1600; + vec_reset_length (tc->rcv_opts.sacks); + block.start = 1200; + block.end = 1500; + vec_add1 (tc->rcv_opts.sacks, block); + tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks); + tcp_rcv_sacks (tc, 1000); + if (verbose) + vlib_cli_output (vm, "\nacked [1200, 1500] test first hole is lost:\n%U", + format_tcp_scoreboard, sb, tc); + TCP_TEST ((pool_elts (sb->holes) == 2), "scoreboard has %d elements", + pool_elts (sb->holes)); + TCP_TEST ((sb->sacked_bytes == 300), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->last_sacked_bytes == 300), "last sacked bytes %d", + sb->last_sacked_bytes); + TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", + sb->last_bytes_delivered); + TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); + return 0; } diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index e75c77d0e2fa..87bacc243546 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -680,29 +680,32 @@ scoreboard_update_bytes (tcp_connection_t * tc, sack_scoreboard_t * sb) { bytes = sb->high_sacked - left->end; blks = 1; - if (bytes > (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss - && left->prev == TCP_INVALID_SACK_HOLE_INDEX) - sb->lost_bytes += scoreboard_hole_bytes (left); } - right = left; - while ((left = scoreboard_prev_hole (sb, right)) - && (bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss - && blks < TCP_DUPACK_THRESHOLD)) + while ((right = left) + && bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss + && blks < TCP_DUPACK_THRESHOLD + /* left not updated if above conditions fail */ + && (left = scoreboard_prev_hole (sb, right))) { bytes += right->start - left->end; blks++; - right = left; } - while (left) + /* left is first lost */ + if (left) { - bytes += right->start - left->end; - sb->lost_bytes += scoreboard_hole_bytes (left); - left->is_lost = 1; - right = left; - left = scoreboard_prev_hole (sb, left); + do + { + sb->lost_bytes += scoreboard_hole_bytes (right); + left->is_lost = 1; + left = scoreboard_prev_hole (sb, right); + if (left) + bytes += right->start - left->end; + } + while ((right = left)); } + sb->sacked_bytes = bytes; } From 6a7103de937ccf3c8efae0357b12db3b6f8f08b4 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Sun, 21 Oct 2018 10:50:48 -0700 Subject: [PATCH 33/70] lisp-gpe: register udp port only if enabled (VPP-1468) Change-Id: I7d0930a19d927bbd7ba3fc879d5a0c8064827629 Signed-off-by: Florin Coras --- src/vnet/lisp-gpe/lisp_gpe.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c index c7b3d887cfa8..66304ae344aa 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.c +++ b/src/vnet/lisp-gpe/lisp_gpe.c @@ -193,10 +193,15 @@ clib_error_t * vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) { lisp_gpe_main_t *lgm = &lisp_gpe_main; + vlib_main_t *vm = vlib_get_main (); if (a->is_en) { lgm->is_en = 1; + udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe, + lisp_gpe_ip4_input_node.index, 1 /* is_ip4 */ ); + udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe6, + lisp_gpe_ip6_input_node.index, 0 /* is_ip4 */ ); } else { @@ -206,6 +211,8 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) /* disable all l3 ifaces */ lisp_gpe_tenant_flush (); + udp_unregister_dst_port (vm, UDP_DST_PORT_lisp_gpe, 0 /* is_ip4 */ ); + udp_unregister_dst_port (vm, UDP_DST_PORT_lisp_gpe6, 1 /* is_ip4 */ ); lgm->is_en = 0; } @@ -612,11 +619,6 @@ lisp_gpe_init (vlib_main_t * vm) lgm->lisp_gpe_fwd_entries = hash_create_mem (0, sizeof (lisp_gpe_fwd_entry_key_t), sizeof (uword)); - udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe, - lisp_gpe_ip4_input_node.index, 1 /* is_ip4 */ ); - udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe6, - lisp_gpe_ip6_input_node.index, 0 /* is_ip4 */ ); - lgm->lisp_stats_index_by_key = hash_create_mem (0, sizeof (lisp_stats_key_t), sizeof (uword)); memset (&lgm->counters, 0, sizeof (lgm->counters)); From 9734c0a494a0cb62bfb4bd9fff19086bd95ba5fa Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Mon, 22 Oct 2018 09:41:29 +0200 Subject: [PATCH 34/70] stats: Add wrapper for vec_free The result vector from stat_segment_ls must be freed by the caller. Add wrapper for non-C language bindings. Change-Id: I7eee7f80ec98b41696d354add47b26978e12ef0f Signed-off-by: Ole Troan (cherry picked from commit 8254018c21bbdbbc11225ebc444b1d072606caf7) --- src/vpp-api/client/libvppapiclient.map | 1 + src/vpp-api/client/stat_client.c | 6 ++++++ src/vpp-api/client/stat_client.h | 1 + 3 files changed, 8 insertions(+) diff --git a/src/vpp-api/client/libvppapiclient.map b/src/vpp-api/client/libvppapiclient.map index 00a26fbc90e2..cb3d18b4de0e 100644 --- a/src/vpp-api/client/libvppapiclient.map +++ b/src/vpp-api/client/libvppapiclient.map @@ -21,5 +21,6 @@ VPPAPICLIENT_18.10 { stat_segment_heartbeat; stat_segment_string_vector; stat_segment_vec_len; + stat_segment_vec_free; local: *; }; diff --git a/src/vpp-api/client/stat_client.c b/src/vpp-api/client/stat_client.c index 1c099edc7897..0042a2be4e43 100644 --- a/src/vpp-api/client/stat_client.c +++ b/src/vpp-api/client/stat_client.c @@ -364,6 +364,12 @@ stat_segment_vec_len (void *vec) return vec_len (vec); } +void +stat_segment_vec_free (void *vec) +{ + vec_free (vec); +} + /* Create a vector from a string (or add to existing) */ u8 ** stat_segment_string_vector (u8 ** string_vector, char *string) diff --git a/src/vpp-api/client/stat_client.h b/src/vpp-api/client/stat_client.h index c1a0ecf47537..ef16e4246dcb 100644 --- a/src/vpp-api/client/stat_client.h +++ b/src/vpp-api/client/stat_client.h @@ -38,6 +38,7 @@ int stat_segment_connect (char *socket_name); void stat_segment_disconnect (void); uint8_t **stat_segment_string_vector (uint8_t ** string_vector, char *string); int stat_segment_vec_len (void *vec); +void stat_segment_vec_free (void *vec); uint32_t *stat_segment_ls (uint8_t ** pattern); stat_segment_data_t *stat_segment_dump (uint32_t * counter_vec); stat_segment_data_t *stat_segment_dump_entry (uint32_t index); From 78d828eff35fc03c143810de35ceb11c4e5224ed Mon Sep 17 00:00:00 2001 From: Juraj Sloboda Date: Tue, 16 Oct 2018 12:18:21 +0200 Subject: [PATCH 35/70] Fix buffer overflow when fragmenting packets (VPP-1383) Change-Id: Idcda9ae55fa2efb0b2e928bac3e8e86ff8d19eba Signed-off-by: Juraj Sloboda --- src/vnet/ip/ip_frag.c | 5 +++-- test/test_ip4.py | 50 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index 628d9d66474c..8de4dfc5d58f 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -101,7 +101,8 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b); rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t); - max = (mtu - sizeof (ip4_header_t)) & ~0x7; + max = + (clib_min (mtu, VLIB_BUFFER_DATA_SIZE) - sizeof (ip4_header_t)) & ~0x7; if (rem > (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t))) @@ -152,7 +153,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, ip4_header_t *to_ip4; u8 *to_data; - len = (rem > (mtu - sizeof (ip4_header_t)) ? max : rem); + len = (rem > max ? max : rem); if (len != rem) /* Last fragment does not need to divisible by 8 */ len &= ~0x7; if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0) diff --git a/test/test_ip4.py b/test/test_ip4.py index e9ec71a2830e..ca461f1d2760 100644 --- a/test/test_ip4.py +++ b/test/test_ip4.py @@ -1554,5 +1554,55 @@ def test_ip_lpm(self): rx = self.send_and_expect(self.pg0, p_24 * 65, self.pg1) +class TestIPv4Frag(VppTestCase): + """ IPv4 fragmentation """ + + @classmethod + def setUpClass(cls): + super(TestIPv4Frag, cls).setUpClass() + + cls.create_pg_interfaces([0, 1]) + cls.src_if = cls.pg0 + cls.dst_if = cls.pg1 + + # setup all interfaces + for i in cls.pg_interfaces: + i.admin_up() + i.config_ip4() + i.resolve_arp() + + def test_frag_large_packets(self): + """ Fragmentation of large packets """ + + p = (Ether(dst=self.src_if.local_mac, src=self.src_if.remote_mac) / + IP(src=self.src_if.remote_ip4, dst=self.dst_if.remote_ip4) / + UDP(sport=1234, dport=5678) / Raw()) + self.extend_packet(p, 6000, "abcde") + saved_payload = p[Raw].load + + # Force fragmentation by setting MTU of output interface + # lower than packet size + self.vapi.sw_interface_set_mtu(self.dst_if.sw_if_index, + [5000, 0, 0, 0]) + + self.pg_enable_capture() + self.src_if.add_stream(p) + self.pg_start() + + # Expecting 3 fragments because size of created fragments currently + # cannot be larger then VPP buffer size (which is 2048) + packets = self.dst_if.get_capture(3) + + # Assume VPP sends the fragments in order + payload = '' + for p in packets: + payload_offset = p.frag * 8 + if payload_offset > 0: + payload_offset -= 8 # UDP header is not in payload + self.assert_equal(payload_offset, len(payload)) + payload += p[Raw].load + self.assert_equal(payload, saved_payload, "payload") + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) From 3a9a6f72d18aa72e4038422a4c882927037441e7 Mon Sep 17 00:00:00 2001 From: Marco Varlese Date: Thu, 18 Oct 2018 09:19:15 +0200 Subject: [PATCH 36/70] Release Notes for 18.10 Change-Id: I3500113f30d6d98eae69d39b59b90569c796e011 Signed-off-by: Marco Varlese --- RELEASE.md | 316 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 57ff828d13c7..c931d8aa11ea 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,5 +1,7 @@ # Release Notes {#release_notes} +* @subpage release_notes_1810 +* @subpage release_notes_1807 * @subpage release_notes_1804 * @subpage release_notes_18012 * @subpage release_notes_18011 @@ -12,6 +14,320 @@ * @subpage release_notes_1609 * @subpage release_notes_1606 +@page release_notes_1810 Release notes for VPP 18.10 + +More than 632 commits since the 18.07 release. + +## Features + +### Infrastructure +- DPDK 18.08 integration +- New Stats infrastructure (interface, error, node performance counters) +- Add configurable "Doug Lea malloc" support + +### VNET & Plugins +- Load balancing: support per-port VIP and all-port VIP +- Port NSH plugin to VPP +- NAT + - Configurable port range + - Virtual Fragmentation Reassembly for endpoint-dependent mode + - Client-IP based session affinity for load-balancing + - TCP MSS clamping + - Session timeout + - Bug-fixing and performance optimizations + +### Host stack +- Support for applications with multiple workers +- Support for binds from multiple app workers to same ip:port +- Switched to a message queue for io and control event notifications +- Support for eventfd based notifications as alternative to mutext-condvar pair +- VCL refactor to support async event notifications and multiple workers +- TLS async support in client for HW accleration +- Performance optimizations and bug-fixing +- A number of binary APIs will be deprecated in favor of using the event + message queue. Details in the API section. + +## Known issues + +For the full list of issues please refer to fd.io [JIRA](https://jira.fd.io). + +## Issues fixed + +For the full list of fixed issues please refer to: +- fd.io [JIRA](https://jira.fd.io) +- git [commit log](https://git.fd.io/vpp/log/?h=stable/1810) + +## API changes + +Description of results: + +* _Definition changed_: indicates that the API file was modified between releases. +* _Only in image_: indicates the API is new for this release. +* _Only in file_: indicates the API has been removed in this release. + + Message Name Result +api_versions_reply definition changed +app_cut_through_registration_add definition changed +app_worker_add_del definition changed +application_attach_reply definition changed +bd_ip_mac_details only in image +bd_ip_mac_dump only in image +bfd_udp_get_echo_source definition changed +bier_imp_details definition changed +bier_route_details definition changed +bind_sock definition changed +bridge_domain_details definition changed +bridge_flags definition changed +classify_add_del_session definition changed +classify_add_del_table definition changed +connect_sock definition changed +create_vhost_user_if definition changed +get_first_msg_id_reply definition changed +gpe_add_del_fwd_entry_reply definition changed +gpe_fwd_entry_path_details definition changed +ip6_fib_details definition changed +ip6nd_proxy_details definition changed +ip_add_del_route_reply definition changed +ip_address_details definition changed +ip_details definition changed +ip_fib_details definition changed +ip_mfib_details definition changed +ip_mroute_add_del_reply definition changed +ip_neighbor_add_del_reply definition changed +ip_neighbor_details definition changed +ip_reassembly_get_reply definition changed +ip_unnumbered_details definition changed +ipip_6rd_add_tunnel definition changed +ipip_add_tunnel definition changed +ipsec_spds_details only in image +ipsec_spds_dump only in image +l2_interface_efp_filter definition changed +lisp_eid_table_vni_details definition changed +map_another_segment definition changed +mfib_signal_details definition changed +mpls_route_add_del_reply definition changed +mpls_tunnel_add_del definition changed +mpls_tunnel_add_del_reply definition changed +mpls_tunnel_details definition changed +mpls_tunnel_dump definition changed +one_eid_table_vni_details definition changed +qos_mark_enable_disable definition changed +qos_record_enable_disable definition changed +reset_session_reply definition changed +rpc_call definition changed +show_threads definition changed +sockclnt_create_reply definition changed +sockclnt_delete definition changed +sockclnt_delete_reply definition changed +sw_interface_rx_placement_details only in image +sw_interface_rx_placement_dump only in image +sw_interface_set_ip_directed_broadcast definition changed +sw_interface_set_l2_bridge definition changed +sw_interface_set_rx_placement definition changed +sw_interface_set_vxlan_gbp_bypass definition changed +udp_encap_add definition changed +udp_encap_add_del_reply only in file +udp_encap_add_reply only in image +udp_encap_del definition changed +udp_encap_details definition changed +unbind_sock definition changed +vxlan_gbp_tunnel_add_del definition changed +vxlan_gbp_tunnel_details only in image +vxlan_gbp_tunnel_dump only in image +Found 68 api message signature differences + +### Patches that changed API definitions + +| @c src/plugins/avf/avf.api || +| ------- | ------- | +| [149d0e28](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=149d0e28) | avf: RSS support | +| [4e6014fc](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=4e6014fc) | avf: api fix | + +| @c src/plugins/gbp/gbp.api || +| ------- | ------- | +| [c0a93143](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=c0a93143) | GBP Endpoint Updates | +| [61b94c6b](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=61b94c6b) | vxlan-gbp: Add support for vxlan gbp | + +| @c src/plugins/igmp/igmp.api || +| ------- | ------- | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | + +| @c src/plugins/lb/lb.api || +| ------- | ------- | +| [6a4375e0](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6a4375e0) | LB: fix flush flow table issue | +| [49ca2601](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=49ca2601) | Add flush flag on del as command | +| [219cc90c](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=219cc90c) | Support lb on both vip and per-port-vip case | + +| @c src/plugins/nat/nat.api || +| ------- | ------- | +| [bb4e0225](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bb4e0225) | NAT: TCP MSS clamping | +| [5d28c7af](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=5d28c7af) | NAT: add support for configurable port range (VPP-1346) | +| [ea5b5be4](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=ea5b5be4) | NAT44: client-IP based session affinity for load-balancing (VPP-1297) | +| [878c646a](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=878c646a) | NAT44: add support for session timeout (VPP-1272) | +| [69ce30d6](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=69ce30d6) | NAT: update nat_show_config_reply API (VPP-1403) | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | +| [c6c0d2a0](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=c6c0d2a0) | NAT44: LB NAT - local backends in multiple VRFs (VPP-1345) | + +| @c src/plugins/vmxnet3/vmxnet3.api || +| ------- | ------- | +| [df7f8e8c](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=df7f8e8c) | vmxnet3 device driver | + +| @c src/plugins/nsh/nsh.api || +| ------- | ------- | +| [d313f9e6](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=d313f9e6) | Port NSH plugin to VPP | + +| @c src/plugins/nsim/nsim.api || +| ------- | ------- | +| [9e3252b5](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=9e3252b5) | Network delay simulator plugin | + +| @c src/plugins/svs/svs.api || +| ------- | ------- | +| [d1e68ab7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=d1e68ab7) | Source VRF Select | + +| @c src/vlibmemory/memclnt.api || +| ------- | ------- | +| [94495f2a](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=94495f2a) | PAPI: Use UNIX domain sockets instead of shared memory | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | +| [75282457](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=75282457) | Fix "Old Style VLA" build warnings | + +| @c src/vnet/interface.api || +| ------- | ------- | +| [f0b42f48](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=f0b42f48) | itf: dump interface rx-placement | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | +| [54f7c51f](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=54f7c51f) | rx-placement: Add API call for interface rx-placement | +| [1855b8e4](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=1855b8e4) | IP directed broadcast | + +| @c src/vnet/bfd/bfd.api || +| ------- | ------- | +| [2d3c7b9c](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=2d3c7b9c) | BFD: add get echo source API (VPP-1367) | + +| @c src/vnet/bier/bier.api || +| ------- | ------- | +| [ef90ed08](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=ef90ed08) | BIER API and load-balancing fixes | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | + +| @c src/vnet/classify/classify.api || +| ------- | ------- | +| [34eb5d42](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=34eb5d42) | classify_add_del_session API: Use more descriptive docstring (VPP-1385) | +| [75282457](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=75282457) | Fix "Old Style VLA" build warnings | + +| @c src/vnet/devices/pipe/pipe.api || +| ------- | ------- | +| [208c29aa](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=208c29aa) | VOM: support for pipes | + +| @c src/vnet/devices/virtio/vhost_user.api || +| ------- | ------- | +| [ee2e58f6](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=ee2e58f6) | vhost-user: Add disable feature support in api | + +| @c src/vnet/ethernet/ethernet_types.api || +| ------- | ------- | +| [de5b08fb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=de5b08fb) | Introduce a mac_address_t on the API and in VPP | + +| @c src/vnet/ip/ip_types.api || +| ------- | ------- | +| [d0df49f2](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=d0df49f2) | Use IP address types on UDP encap API | + +| @c src/vnet/ip/ip.api || +| ------- | ------- | +| [412ecd32](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=412ecd32) | Improve ip_mroute_add_del documentation | +| [14260393](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=14260393) | Add adjacency counters to the stats segment | +| [28c142e3](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=28c142e3) | mroute routers in the stats segment | +| [008dbe10](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=008dbe10) | Route counters in the stats segment | +| [de5b08fb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=de5b08fb) | Introduce a mac_address_t on the API and in VPP | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | +| [b11f903a](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=b11f903a) | Fix context field position in API definition | + +| @c src/vnet/ipip/ipip.api || +| ------- | ------- | +| [61502115](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=61502115) | IPIP and SIXRD tunnels create API needs table-IDs not fib-indexes | + +| @c src/vnet/ipsec/ipsec.api || +| ------- | ------- | +| [a9a0b2ce](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=a9a0b2ce) | IPsec: add API for SPDs dump (VPP-1363) | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | + +| @c src/vnet/l2/l2.api || +| ------- | ------- | +| [0a4e0063](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=0a4e0063) | Fix documentation about sw_interface_set_l2_bridge | +| [b474380f](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=b474380f) | L2 BD: introduce a BD interface on which to send UU packets | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | +| [5c7c49d1](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=5c7c49d1) | Fix documentation for SHG in bridge domain | +| [5d82d2f1](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=5d82d2f1) | l2: arp termination dump | +| [6b9b41c8](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6b9b41c8) | L2 EFP: byteswap sw_if_index, enable flag can be u8 on .api | + +| @c src/vnet/lisp-cp/lisp.api || +| ------- | ------- | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | + +| @c src/vnet/lisp-cp/one.api || +| ------- | ------- | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | + +| @c src/vnet/lisp-gpe/lisp_gpe.api || +| ------- | ------- | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | +| [b11f903a](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=b11f903a) | Fix context field position in API definition | + +| @c src/vnet/mpls/mpls.api || +| ------- | ------- | +| [f5fa5ae2](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=f5fa5ae2) | MPLS tunnel dump: use sw_if_index not tunnel_index | +| [6a30b5f9](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6a30b5f9) | MPLS tunnel dump fix | +| [008dbe10](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=008dbe10) | Route counters in the stats segment | +| [7c922dc4](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=7c922dc4) | SR-MPLS: fixes and tests | + +| @c src/vnet/qos/qos.api || +| ------- | ------- | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | +| [ed234e7f](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=ed234e7f) | Enum type on the API for QoS sources | + +| @c src/vnet/session/session.api || +| ------- | ------- | +| [ab2f6dbf](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=ab2f6dbf) | session: support multiple worker binds | +| [134a996a](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=134a996a) | vcl: add support for multi-worker apps | +| [1553197f](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=1553197f) | session: add support for multiple app workers | +| [6bd197eb](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6bd197eb) | Remove client_index field from replies in API | +| [99368315](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=99368315) | vcl: support for eventfd mq signaling | + +| @c src/vnet/span/span.api || +| ------- | ------- | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | + +| @c src/vnet/udp/udp.api || +| ------- | ------- | +| [9c0a3c42](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=9c0a3c42) | UDP-Encap: name counters for the stats segment | +| [d0df49f2](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=d0df49f2) | Use IP address types on UDP encap API | + +| @c src/vnet/unix/tap.api || +| ------- | ------- | +| [bdc0e6b7](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=bdc0e6b7) | Trivial: Clean up some typos. | + +| @c src/vnet/vxlan-gbp/vxlan_gbp.api || +| ------- | ------- | +| [79a05f54](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=79a05f54) | VXLAN-GBP: use common types on the API | +| [61b94c6b](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=61b94c6b) | vxlan-gbp: Add support for vxlan gbp | + +| @c src/vpp/api/vpe.api || +| ------- | ------- | +| [5d64c786](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=5d64c786) | thread: Add show threads api | +| [ec11b13a](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=ec11b13a) | Trivial: Cleanup some typos. | + +| @c src/vpp/stats/stats.api || +| ------- | ------- | +| [ec11b13a](https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=ec11b13a) | Trivial: Cleanup some typos. | + +### Notice of future API deprecation +- bind_uri_reply +- accept_session +- accept_session_reply +- disconnect_session_reply +- reset_session +- reset_session_reply +- bind_sock_reply +- connect_session_reply + + @page release_notes_1807 Release notes for VPP 18.07 More than 533 commits since the 18.04 release. From 64c5a5c65667ab919f05aff57ddb6cd8048d6bf2 Mon Sep 17 00:00:00 2001 From: Steven Date: Mon, 22 Oct 2018 22:03:34 -0700 Subject: [PATCH 37/70] vmxnet3: add logging support to the control plane [VPP-1470] There are different flavors of vmxnet3 device, esxi server, vm fusion, vmware workstation, and vmware player, that we need to communicate with. Each of them also has different versions. We really need the control plane logging to debug when things don't work as expected. Change-Id: Idab6896e3d8bf841f1cd877c13a21531fa110568 Signed-off-by: Steven --- src/plugins/vmxnet3/cli.c | 3 ++ src/plugins/vmxnet3/output.c | 3 +- src/plugins/vmxnet3/vmxnet3.c | 64 +++++++++++++++++++++++++++++------ src/plugins/vmxnet3/vmxnet3.h | 34 ++++++++++++++++--- 4 files changed, 87 insertions(+), 17 deletions(-) diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c index 170f9ad7f5b1..096791b10037 100644 --- a/src/plugins/vmxnet3/cli.c +++ b/src/plugins/vmxnet3/cli.c @@ -567,9 +567,12 @@ VLIB_CLI_COMMAND (show_vmxnet3_command, static) = { clib_error_t * vmxnet3_cli_init (vlib_main_t * vm) { + vmxnet3_main_t *vmxm = &vmxnet3_main; + /* initialize binary API */ vmxnet3_plugin_api_hookup (vm); + vmxm->log_default = vlib_log_register_class ("vmxnet3", 0); return 0; } diff --git a/src/plugins/vmxnet3/output.c b/src/plugins/vmxnet3/output.c index 2a8494ed447c..1dc394a62ef0 100644 --- a/src/plugins/vmxnet3/output.c +++ b/src/plugins/vmxnet3/output.c @@ -197,7 +197,8 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, * Device can start reading the packet */ txq->tx_desc[first_idx].flags[0] ^= VMXNET3_TXF_GEN; - vmxnet3_reg_write (vd, 0, VMXNET3_REG_TXPROD, txq->tx_ring.produce); + vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_TXPROD, + txq->tx_ring.produce); buffers++; n_left--; diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c index ac99411a4e21..ccc76dae06f4 100644 --- a/src/plugins/vmxnet3/vmxnet3.c +++ b/src/plugins/vmxnet3/vmxnet3.c @@ -319,7 +319,7 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_CMD); if (ret != 0) { - error = clib_error_return (0, "error on quisecing device rc (%u)", ret); + error = clib_error_return (0, "error on quiescing device rc (%u)", ret); return error; } @@ -497,6 +497,9 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) clib_error_return (error, "queue size must be <= 4096, >= 64, " "and multiples of 64"); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "queue size must be <= 4096, >= 64, and multiples of 64"); return; } @@ -507,6 +510,8 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) args->rv = VNET_API_ERROR_INVALID_VALUE; args->error = clib_error_return (error, "PCI address in use"); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, "pci address in use"); return; } })); @@ -528,37 +533,70 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) args->error = clib_error_return (error, "pci-addr %U", format_vlib_pci_addr, &args->addr); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "error encountered on pci device open"); return; } - vd->pci_dev_handle = h; + /* + * Do not use vmxnet3_log_error prior to this line since the macro + * references vd->pci_dev_handle + */ + vd->pci_dev_handle = h; vlib_pci_set_private_data (h, vd->dev_instance); if ((error = vlib_pci_bus_master_enable (h))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci bus master enable"); + goto error; + } if ((error = vlib_pci_map_region (h, 0, (void **) &vd->bar[0]))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci map region for bar 0"); + goto error; + } if ((error = vlib_pci_map_region (h, 1, (void **) &vd->bar[1]))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci map region for bar 1"); + goto error; + } if ((error = vlib_pci_register_msix_handler (h, 0, 1, &vmxnet3_irq_0_handler))) - goto error; + { + vmxnet3_log_error (vd, + "error encountered on pci register msix handler 0"); + goto error; + } if ((error = vlib_pci_register_msix_handler (h, 1, 1, &vmxnet3_irq_1_handler))) - goto error; + { + vmxnet3_log_error (vd, + "error encountered on pci register msix handler 1"); + goto error; + } if ((error = vlib_pci_enable_msix_irq (h, 0, 2))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci enable msix irq"); + goto error; + } if ((error = vlib_pci_intr_enable (h))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci interrupt enable"); + goto error; + } if ((error = vmxnet3_device_init (vm, vd, args))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on device init"); + goto error; + } /* create interface */ error = ethernet_register_interface (vnm, vmxnet3_device_class.index, @@ -566,7 +604,11 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) &vd->hw_if_index, vmxnet3_flag_change); if (error) - goto error; + { + vmxnet3_log_error (vd, + "error encountered on ethernet register interface"); + goto error; + } vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vd->hw_if_index); vd->sw_if_index = sw->sw_if_index; diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h index 491b8c102463..daf6275ec0f1 100644 --- a/src/plugins/vmxnet3/vmxnet3.h +++ b/src/plugins/vmxnet3/vmxnet3.h @@ -166,7 +166,7 @@ enum _(7, GET_DEV_EXTRA_INFO, "get dev extra info") \ _(8, GET_CONF_INTR, "get conf intr") \ _(9, GET_ADAPTIVE_RING_INFO, "get adaptive ring info") \ - _(10, GET_TXDATA_DESC_SIZE, "gte txdata desc size") \ + _(10, GET_TXDATA_DESC_SIZE, "get txdata desc size") \ _(11, RESERVED5, "reserved5") enum @@ -496,6 +496,7 @@ typedef struct vlib_physmem_region_index_t physmem_region; u32 physmem_region_alloc; u16 msg_id_base; + vlib_log_class_t log_default; } vmxnet3_main_t; extern vmxnet3_main_t vmxnet3_main; @@ -531,16 +532,39 @@ format_function_t format_vmxnet3_device; format_function_t format_vmxnet3_device_name; format_function_t format_vmxnet3_input_trace; +#define vmxnet3_log_debug(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, vmxnet3_main.log_default, "%U: " f, \ + format_vlib_pci_addr, vlib_pci_get_addr(dev->pci_dev_handle), \ + ## __VA_ARGS__) + +#define vmxnet3_log_error(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, vmxnet3_main.log_default, "%U: " f, \ + format_vlib_pci_addr, vlib_pci_get_addr(dev->pci_dev_handle), \ + ## __VA_ARGS__) + +/* no log version, called by data plane */ static_always_inline void -vmxnet3_reg_write (vmxnet3_device_t * vd, u8 bar, u32 addr, u32 val) +vmxnet3_reg_write_inline (vmxnet3_device_t * vd, u8 bar, u32 addr, u32 val) { *(volatile u32 *) ((u8 *) vd->bar[bar] + addr) = val; } +static_always_inline void +vmxnet3_reg_write (vmxnet3_device_t * vd, u8 bar, u32 addr, u32 val) +{ + vmxnet3_log_debug (vd, "reg wr bar %u addr 0x%x val 0x%x", bar, addr, val); + vmxnet3_reg_write_inline (vd, bar, addr, val); +} + static_always_inline u32 vmxnet3_reg_read (vmxnet3_device_t * vd, u8 bar, u32 addr) { - return *(volatile u32 *) (vd->bar[bar] + addr); + u32 val; + + val = *(volatile u32 *) (vd->bar[bar] + addr); + vmxnet3_log_debug (vd, "reg rd bar %u addr 0x%x val 0x%x", bar, addr, val); + + return val; } static_always_inline uword @@ -600,7 +624,7 @@ vmxnet3_rxq_refill_ring0 (vlib_main_t * vm, vmxnet3_device_t * vd, n_alloc--; } - vmxnet3_reg_write (vd, 0, VMXNET3_REG_RXPROD, ring->produce); + vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD, ring->produce); return 0; } @@ -642,7 +666,7 @@ vmxnet3_rxq_refill_ring1 (vlib_main_t * vm, vmxnet3_device_t * vd, n_alloc--; } - vmxnet3_reg_write (vd, 0, VMXNET3_REG_RXPROD2, ring->produce); + vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD2, ring->produce); return 0; } From 975b4b1f7a568ec3d6704232286418c554c04cc1 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Tue, 30 Oct 2018 22:52:15 -0700 Subject: [PATCH 38/70] Makefile: support both RHEL and CentOS in install-dep (VPP-1481) Change-Id: I31fd8700bfac462944b7621947edd3710fd247b9 Signed-off-by: Yichen Wang --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 36c7a5a741d0..a2d256239db1 100644 --- a/Makefile +++ b/Makefile @@ -296,8 +296,12 @@ endif @sudo -E apt-get update @sudo -E apt-get $(APT_ARGS) $(CONFIRM) $(FORCE) install $(DEB_DEPENDS) else ifneq ("$(wildcard /etc/redhat-release)","") - @sudo -E yum groupinstall $(CONFIRM) $(RPM_DEPENDS_GROUPS) +ifeq ($(OS_ID),rhel) + @sudo -E yum-config-manager --enable rhel-server-rhscl-7-rpms +else ifeq ($(OS_ID),centos) @sudo -E yum install $(CONFIRM) centos-release-scl-rh +endif + @sudo -E yum groupinstall $(CONFIRM) $(RPM_DEPENDS_GROUPS) @sudo -E yum install $(CONFIRM) $(RPM_DEPENDS) @sudo -E debuginfo-install $(CONFIRM) glibc openssl-libs mbedtls-devel zlib else ifeq ($(filter opensuse-tumbleweed,$(OS_ID)),$(OS_ID)) @@ -310,7 +314,7 @@ else ifeq ($(filter opensuse,$(OS_ID)),$(OS_ID)) @sudo -E zypper refresh @sudo -E zypper install -y $(RPM_SUSE_DEPENDS) else - $(error "This option currently works only on Ubuntu, Debian, Centos or openSUSE systems") + $(error "This option currently works only on Ubuntu, Debian, RHEL, CentOS or openSUSE systems") endif define make From c92341d5c6ab052d458471e26d6c27f9e303fe31 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Fri, 31 Aug 2018 02:51:45 -0700 Subject: [PATCH 39/70] Use throttle_t for ND throttling (VPP-1480) Change-Id: I93c6b7bccd1a1ab71625ae29c99c974581186c4d Signed-off-by: Neale Ranns --- src/vnet/ip/ip4_forward.c | 8 ++++++-- src/vnet/ip/ip6.h | 6 ++---- src/vnet/ip/ip6_input.c | 9 +-------- src/vnet/ip/ip6_neighbor.c | 37 ++++++++---------------------------- src/vnet/ip/ip6_packet.h | 12 ++++++++++++ src/vnet/util/throttle.h | 12 +++++++----- test/test_neighbor.py | 39 ++++++++++++++++++++++++++++++++++---- 7 files changed, 71 insertions(+), 52 deletions(-) diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 69a8dbad8050..ffb873a96112 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1750,7 +1750,7 @@ ip4_arp_inline (vlib_main_t * vm, u32 *from, *to_next_drop; uword n_left_from, n_left_to_next_drop, next_index; u32 thread_index = vm->thread_index; - u32 seed; + u64 seed; if (node->flags & VLIB_NODE_FLAG_TRACE) ip4_forward_next_trace (vm, node, frame, VLIB_TX); @@ -1770,10 +1770,11 @@ ip4_arp_inline (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next_drop > 0) { - u32 pi0, adj_index0, r0, sw_if_index0, drop0; + u32 pi0, adj_index0, sw_if_index0, drop0; ip_adjacency_t *adj0; vlib_buffer_t *p0; ip4_header_t *ip0; + u64 r0; pi0 = from[0]; @@ -1798,6 +1799,9 @@ ip4_arp_inline (vlib_main_t * vm, { r0 = adj0->sub_type.nbr.next_hop.ip4.data_u32; } + /* combine the address and interface for the hash key */ + r0 = r0 << 32; + r0 |= sw_if_index0; drop0 = throttle_check (&im->arp_throttle, thread_index, r0, seed); diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index bc89a0821ae4..e807886cd2af 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -49,6 +49,7 @@ #include #include #include +#include /* * Default size of the ip6 fib hash table @@ -220,10 +221,7 @@ typedef struct ip6_main_t u8 hbh_enabled; /** ND throttling */ - uword **nd_throttle_bitmaps; - u64 *nd_throttle_seeds; - f64 *nd_throttle_last_seed_change_time; - + throttle_t nd_throttle; } ip6_main_t; #define ND_THROTTLE_BITS 512 diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c index 977d2703d191..a01920a7b487 100644 --- a/src/vnet/ip/ip6_input.c +++ b/src/vnet/ip/ip6_input.c @@ -277,16 +277,9 @@ ip6_main_loop_enter (vlib_main_t * vm) { ip6_main_t *im = &ip6_main; vlib_thread_main_t *tm = &vlib_thread_main; - u32 n_vlib_mains = tm->n_vlib_mains; - int i; - vec_validate (im->nd_throttle_bitmaps, n_vlib_mains); - vec_validate (im->nd_throttle_seeds, n_vlib_mains); - vec_validate (im->nd_throttle_last_seed_change_time, n_vlib_mains); + throttle_init (&im->nd_throttle, tm->n_vlib_mains, 1e-3); - for (i = 0; i < n_vlib_mains; i++) - vec_validate (im->nd_throttle_bitmaps[i], - (ND_THROTTLE_BITS / BITS (uword)) - 1); return 0; } diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 8466ba703135..b6889157cab6 100755 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -3163,7 +3163,6 @@ ip6_discover_neighbor_inline (vlib_main_t * vm, ip_lookup_main_t *lm = &im->lookup_main; u32 *from, *to_next_drop; uword n_left_from, n_left_to_next_drop; - f64 time_now; u64 seed; u32 thread_index = vm->thread_index; int bogus_length; @@ -3172,16 +3171,7 @@ ip6_discover_neighbor_inline (vlib_main_t * vm, if (node->flags & VLIB_NODE_FLAG_TRACE) ip6_forward_next_trace (vm, node, frame, VLIB_TX); - time_now = vlib_time_now (vm); - if (time_now - im->nd_throttle_last_seed_change_time[thread_index] > 1e-3) - { - (void) random_u64 (&im->nd_throttle_seeds[thread_index]); - memset (im->nd_throttle_bitmaps[thread_index], 0, - ND_THROTTLE_BITS / BITS (u8)); - - im->nd_throttle_last_seed_change_time[thread_index] = time_now; - } - seed = im->nd_throttle_seeds[thread_index]; + seed = throttle_seed (&im->nd_throttle, thread_index, vlib_time_now (vm)); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -3193,15 +3183,12 @@ ip6_discover_neighbor_inline (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next_drop > 0) { - vlib_buffer_t *p0; - ip6_header_t *ip0; - u32 pi0, adj_index0, w0, sw_if_index0, drop0; - u64 r0; - uword m0; - ip_adjacency_t *adj0; + u32 pi0, adj_index0, sw_if_index0, drop0, r0, next0; vnet_hw_interface_t *hw_if0; ip6_radv_t *radv_info; - u32 next0; + ip_adjacency_t *adj0; + vlib_buffer_t *p0; + ip6_header_t *ip0; pi0 = from[0]; @@ -3224,18 +3211,10 @@ ip6_discover_neighbor_inline (vlib_main_t * vm, sw_if_index0 = adj0->rewrite_header.sw_if_index; vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; - /* Compute the ND throttle bitmap hash */ - r0 = ip0->dst_address.as_u64[0] ^ ip0->dst_address.as_u64[1] ^ seed; - - /* Find the word and bit */ - r0 &= ND_THROTTLE_BITS - 1; - w0 = r0 / BITS (uword); - m0 = (uword) 1 << (r0 % BITS (uword)); + /* combine the address and interface for a hash */ + r0 = ip6_address_hash_to_u64 (&ip0->dst_address) ^ sw_if_index0; - /* If the bit is set, drop the ND request */ - drop0 = (im->nd_throttle_bitmaps[thread_index][w0] & m0) != 0; - /* (unconditionally) mark the bit "inuse" */ - im->nd_throttle_bitmaps[thread_index][w0] |= m0; + drop0 = throttle_check (&im->nd_throttle, thread_index, r0, seed); from += 1; n_left_from -= 1; diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index ea2fa155b5ed..cdcffa59b8b4 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -347,6 +347,18 @@ ip6_is_solicited_node_multicast_address (const ip6_address_t * a) && a->as_u8[12] == 0xff); } +always_inline u32 +ip6_address_hash_to_u32 (const ip6_address_t * a) +{ + return (a->as_u32[0] ^ a->as_u32[1] ^ a->as_u32[2] ^ a->as_u32[3]); +} + +always_inline u64 +ip6_address_hash_to_u64 (const ip6_address_t * a) +{ + return (a->as_u64[0] ^ a->as_u64[1]); +} + typedef struct { /* 4 bit version, 8 bit traffic class and 20 bit flow label. */ diff --git a/src/vnet/util/throttle.h b/src/vnet/util/throttle.h index 97ebb2597b3a..28bf7aa2217d 100644 --- a/src/vnet/util/throttle.h +++ b/src/vnet/util/throttle.h @@ -17,6 +17,7 @@ #define __THROTTLE_H__ #include +#include /** * @brief A throttle @@ -28,7 +29,7 @@ typedef struct throttle_t_ { f64 time; uword **bitmaps; - u32 *seeds; + u64 *seeds; f64 *last_seed_change_time; } throttle_t; @@ -36,12 +37,12 @@ typedef struct throttle_t_ extern void throttle_init (throttle_t * t, u32 n_threads, f64 time); -always_inline u32 +always_inline u64 throttle_seed (throttle_t * t, u32 thread_index, f64 time_now) { if (time_now - t->last_seed_change_time[thread_index] > t->time) { - (void) random_u32 (&t->seeds[thread_index]); + (void) random_u64 (&t->seeds[thread_index]); memset (t->bitmaps[thread_index], 0, THROTTLE_BITS / BITS (u8)); t->last_seed_change_time[thread_index] = time_now; @@ -50,13 +51,14 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now) } always_inline int -throttle_check (throttle_t * t, u32 thread_index, u32 hash, u32 seed) +throttle_check (throttle_t * t, u32 thread_index, u64 hash, u64 seed) { int drop; uword m; u32 w; - hash ^= seed; + hash = clib_xxhash (hash ^ seed); + /* Select bit number */ hash &= THROTTLE_BITS - 1; w = hash / BITS (uword); diff --git a/test/test_neighbor.py b/test/test_neighbor.py index a15106af1f9a..674240487974 100644 --- a/test/test_neighbor.py +++ b/test/test_neighbor.py @@ -6,11 +6,12 @@ from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor, find_nbr from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, \ - VppIpTable + VppIpTable, DpoProto from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP, Dot1Q from scapy.layers.inet import IP, UDP +from scapy.layers.inet6 import IPv6 from scapy.contrib.mpls import MPLS from scapy.layers.inet6 import IPv6 @@ -1321,14 +1322,16 @@ def test_arp_incomplete(self): """ Incomplete Entries """ # - # ensure that we throttle the ARP requests + # ensure that we throttle the ARP and ND requests # self.pg0.generate_remote_hosts(2) + # + # IPv4/ARP + # ip_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32, [VppRoutePath(self.pg0.remote_hosts[1].ip4, - self.pg0.sw_if_index, - labels=[55])]) + self.pg0.sw_if_index)]) ip_10_0_0_1.add_vpp_config() p1 = (Ether(dst=self.pg1.local_mac, @@ -1349,6 +1352,34 @@ def test_arp_incomplete(self): # self.assertTrue(len(rx) < 64) + # + # IPv6/ND + # + ip_10_1 = VppIpRoute(self, "10::1", 128, + [VppRoutePath(self.pg0.remote_hosts[1].ip6, + self.pg0.sw_if_index, + proto=DpoProto.DPO_PROTO_IP6)], + is_ip6=1) + ip_10_1.add_vpp_config() + + p1 = (Ether(dst=self.pg1.local_mac, + src=self.pg1.remote_mac) / + IPv6(src=self.pg1.remote_ip6, + dst="10::1") / + UDP(sport=1234, dport=1234) / + Raw()) + + self.pg1.add_stream(p1 * 257) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = self.pg0._get_capture(1) + + # + # how many we get is going to be dependent on the time for packet + # processing but it should be small + # + self.assertTrue(len(rx) < 64) + class NeighborStatsTestCase(VppTestCase): """ ARP Test Case """ From d23f37eeafafe4db0e70de8c898dc8f51bc4482c Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 31 Oct 2018 10:59:02 +0100 Subject: [PATCH 40/70] vlib: define minimum chained buffer segment size Change-Id: I9b5f7b264f9978e3dd97b2d1eb103b7d10ac3170 Signed-off-by: Damjan Marion (cherry picked from commit bd0da97e5ac0f84e1ea8e6e7f9549dd4e1a6a4ab) --- src/vlib/buffer.h | 11 +++++++++++ src/vnet/bier/bier_lookup.c | 2 +- src/vnet/dpo/replicate_dpo.c | 3 ++- src/vnet/l2/l2_flood.c | 4 +--- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 651e7f0dc8fb..9254cfb804f4 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -51,6 +51,14 @@ #define VLIB_BUFFER_DATA_SIZE (2048) #define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE +/* Minimum buffer chain segment size. Does not apply to last buffer in chain. + Dataplane code can safely asume that specified amount of data is not split + into 2 chained buffers */ +#define VLIB_BUFFER_MIN_CHAIN_SEG_SIZE (128) + +/* Amount of head buffer data copied to each replica head buffer */ +#define VLIB_BUFFER_CLONE_HEAD_SIZE (256) + typedef u8 vlib_buffer_free_list_index_t; /** \file @@ -212,6 +220,9 @@ vlib_buffer_advance (vlib_buffer_t * b, word l) ASSERT (b->current_length >= l); b->current_data += l; b->current_length -= l; + + ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0 || + b->current_length >= VLIB_BUFFER_MIN_CHAIN_SEG_SIZE); } /** \brief Check if there is enough space in buffer to advance diff --git a/src/vnet/bier/bier_lookup.c b/src/vnet/bier/bier_lookup.c index d4500823f13f..51011c980083 100644 --- a/src/vnet/bier/bier_lookup.c +++ b/src/vnet/bier/bier_lookup.c @@ -223,7 +223,7 @@ bier_lookup (vlib_main_t * vm, num_cloned = vlib_buffer_clone(vm, bi0, blm->blm_clones[thread_index], n_clones, - n_bytes + 8); + VLIB_BUFFER_CLONE_HEAD_SIZE); if (num_cloned != vec_len(blm->blm_fmasks[thread_index])) { diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index 6742bff41aa6..bc4db0cd6966 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -673,7 +673,8 @@ replicate_inline (vlib_main_t * vm, vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1); num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index], - rep0->rep_n_buckets, 128); + rep0->rep_n_buckets, + VLIB_BUFFER_CLONE_HEAD_SIZE); if (num_cloned != rep0->rep_n_buckets) { diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c index 8908c4312d39..aeac8ff10d84 100644 --- a/src/vnet/l2/l2_flood.c +++ b/src/vnet/l2/l2_flood.c @@ -223,9 +223,7 @@ l2flood_node_fn (vlib_main_t * vm, n_cloned = vlib_buffer_clone (vm, bi0, msm->clones[thread_index], n_clones, - (vnet_buffer (b0)->l2.l2_len + - sizeof (udp_header_t) + - 2 * sizeof (ip6_header_t))); + VLIB_BUFFER_CLONE_HEAD_SIZE); if (PREDICT_FALSE (n_cloned != n_clones)) { From 12806a3cf083976286146684afd97213c78ec70a Mon Sep 17 00:00:00 2001 From: Filip Varga Date: Fri, 2 Nov 2018 13:51:44 +0100 Subject: [PATCH 41/70] VPP-1450: binary api call for dumping SPD to interface registration Change-Id: Idd4a5f8bab5d39e5f33f5c130601175af70a20d4 Signed-off-by: Filip Varga Signed-off-by: Dave Barach --- src/vnet/ipsec/ipsec.api | 27 ++++++++++++++++++- src/vnet/ipsec/ipsec_api.c | 55 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index d6a28017fff1..793422d86fba 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -455,7 +455,7 @@ define ipsec_spds_dump { @param spd_id - SPD instance id (control plane allocated) @param npolicies - number of policies in SPD */ - define ipsec_spds_details { +define ipsec_spds_details { u32 context; u32 spd_id; u32 npolicies; @@ -515,6 +515,31 @@ define ipsec_spd_details { u64 packets; }; +/** \brief IPsec: Get SPD interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param spd_index - SPD index + @param spd_index_valid - if 1 spd_index is used to filter + spd_index's, if 0 no filtering is done +*/ +define ipsec_spd_interface_dump { + u32 client_index; + u32 context; + u32 spd_index; + u8 spd_index_valid; +}; + +/** \brief IPsec: SPD interface response + @param context - sender context which was passed in the request + @param spd_index - SPD index + @param sw_if_index - index of the interface +*/ +define ipsec_spd_interface_details { + u32 context; + u32 spd_index; + u32 sw_if_index; +}; + /** \brief Add or delete IPsec tunnel interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 37daee0b64fb..c0e526c37404 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -56,6 +56,7 @@ _(IPSEC_SA_SET_KEY, ipsec_sa_set_key) \ _(IPSEC_SA_DUMP, ipsec_sa_dump) \ _(IPSEC_SPDS_DUMP, ipsec_spds_dump) \ _(IPSEC_SPD_DUMP, ipsec_spd_dump) \ +_(IPSEC_SPD_INTERFACE_DUMP, ipsec_spd_interface_dump) \ _(IPSEC_TUNNEL_IF_ADD_DEL, ipsec_tunnel_if_add_del) \ _(IPSEC_TUNNEL_IF_SET_KEY, ipsec_tunnel_if_set_key) \ _(IPSEC_TUNNEL_IF_SET_SA, ipsec_tunnel_if_set_sa) \ @@ -366,6 +367,60 @@ vl_api_ipsec_spd_dump_t_handler (vl_api_ipsec_spd_dump_t * mp) #endif } +static void +send_ipsec_spd_interface_details (vl_api_registration_t * reg, u32 spd_index, + u32 sw_if_index, u32 context) +{ + vl_api_ipsec_spd_interface_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IPSEC_SPD_INTERFACE_DETAILS); + mp->context = context; + + mp->spd_index = htonl (spd_index); + mp->sw_if_index = htonl (sw_if_index); + + vl_api_send_msg (reg, (u8 *) mp); +} + +static void +vl_api_ipsec_spd_interface_dump_t_handler (vl_api_ipsec_spd_interface_dump_t * + mp) +{ + ipsec_main_t *im = &ipsec_main; + vl_api_registration_t *reg; + u32 k, v, spd_index; + +#if WITH_LIBSSL > 0 + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + if (mp->spd_index_valid) + { + spd_index = ntohl (mp->spd_index); + /* *INDENT-OFF* */ + hash_foreach(k, v, im->spd_index_by_sw_if_index, ({ + if (v == spd_index) + send_ipsec_spd_interface_details(reg, v, k, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + /* *INDENT-OFF* */ + hash_foreach(k, v, im->spd_index_by_sw_if_index, ({ + send_ipsec_spd_interface_details(reg, v, k, mp->context); + })); + /* *INDENT-ON* */ + } + +#else + clib_warning ("unimplemented"); +#endif +} + static void vl_api_ipsec_sa_set_key_t_handler (vl_api_ipsec_sa_set_key_t * mp) { From 06eaab0ea805e46191acd3aea9423b05ebcbed5c Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 6 Nov 2018 23:17:31 -0800 Subject: [PATCH 42/70] NAT44: fix undesired dependency between static mapping and address from the pool (VPP-1485) Change-Id: Iaa404361eac2a6612dcdaba3f73bae41a35c5446 Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 7 +++++++ src/plugins/nat/nat_format.c | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 2ebd6834d927..73db65e2d78a 100755 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -558,6 +558,10 @@ is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr) /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ + if (is_addr_only_static_mapping (m) || + is_out2in_only_static_mapping (m) || + is_identity_static_mapping (m)) + continue; if (m->external_addr.as_u32 == addr.as_u32) return 1; })); @@ -954,6 +958,9 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, if (identity_nat) { + if (vrf_id == ~0) + vrf_id = sm->inside_vrf_id; + for (i = 0; i < vec_len (m->locals); i++) { if (m->locals[i].vrf_id == vrf_id) diff --git a/src/plugins/nat/nat_format.c b/src/plugins/nat/nat_format.c index 5ce00dcb1d04..8e5ac4cade44 100644 --- a/src/plugins/nat/nat_format.c +++ b/src/plugins/nat/nat_format.c @@ -226,7 +226,8 @@ format_snat_static_mapping (u8 * s, va_list * args) s = format (s, "identity mapping %U", format_ip4_address, &m->local_addr); else - s = format (s, "identity mapping %U:%d", + s = format (s, "identity mapping %U %U:%d", + format_snat_protocol, m->proto, format_ip4_address, &m->local_addr, m->local_port); /* *INDENT-OFF* */ From 6ff8790c92e36120d08f7be2052075f25506e16a Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Fri, 16 Nov 2018 04:41:31 -0800 Subject: [PATCH 43/70] NAT44: fix bug in TCP close with output-feature interface (VPP-1493) Change-Id: If8c883d6b1ee58de9a03012d3567ec82211a0225 Signed-off-by: Matus Fabian (cherry picked from commit 6c01dceea5c612373453db7f1ccda589a2cd782e) --- src/plugins/nat/in2out_ed.c | 45 ++++++++++++++++++- src/plugins/nat/nat.h | 7 +++ src/plugins/nat/nat_inlines.h | 3 +- src/plugins/nat/out2in_ed.c | 31 ++++++++++++- test/test_nat.py | 82 +++++++++++++++++++++++++++++++++++ 5 files changed, 164 insertions(+), 4 deletions(-) diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index f9f8d776eb46..8c62949b07d2 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -37,7 +37,8 @@ _(BAD_ICMP_TYPE, "unsupported ICMP type") \ _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") \ _(DROP_FRAGMENT, "Drop fragment") \ _(MAX_REASS, "Maximum reassemblies exceeded") \ -_(MAX_FRAG, "Maximum fragments per reassembly exceeded") +_(MAX_FRAG, "Maximum fragments per reassembly exceeded")\ +_(NON_SYN, "non-SYN packet try to create session") typedef enum { @@ -513,7 +514,19 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto, tx_fib_index, src_port, dst_port); if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value)) - return 1; + { + s = pool_elt_at_index (tsm->sessions, value.value); + if (nat44_is_ses_closed (s)) + { + nat_log_debug ("TCP close connection %U", format_snat_session, + &sm->per_thread_data[thread_index], s); + nat_free_session_data (sm, s, thread_index); + nat44_delete_session (sm, s, thread_index); + } + else + s->flags |= SNAT_SESSION_FLAG_OUTPUT_FEATURE; + return 1; + } /* dst NAT check */ make_ed_kv (&kv, &ip->dst_address, &ip->src_address, proto, rx_fib_index, @@ -1021,6 +1034,13 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace00; } + if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + { + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; + next0 = NAT_IN2OUT_ED_NEXT_DROP; + goto trace00; + } + next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node, next0, thread_index, now); @@ -1225,6 +1245,13 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace01; } + if ((proto1 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp1)) + { + b1->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; + next1 = NAT_IN2OUT_ED_NEXT_DROP; + goto trace01; + } + next1 = slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node, next1, thread_index, now); @@ -1458,6 +1485,13 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace0; } + if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + { + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; + next0 = NAT_IN2OUT_ED_NEXT_DROP; + goto trace0; + } + next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node, next0, thread_index, now); @@ -1858,6 +1892,13 @@ nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm, } } + if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + { + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; + next0 = NAT_IN2OUT_ED_NEXT_DROP; + goto trace0; + } + next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node, next0, thread_index, now); diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 134672039172..02d4aaef9f61 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -174,6 +174,7 @@ typedef enum #define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT 16 #define SNAT_SESSION_FLAG_FWD_BYPASS 32 #define SNAT_SESSION_FLAG_AFFINITY 64 +#define SNAT_SESSION_FLAG_OUTPUT_FEATURE 128 /* NAT interface flags */ #define NAT_INTERFACE_FLAG_IS_INSIDE 1 @@ -673,6 +674,12 @@ unformat_function_t unformat_snat_protocol; */ #define is_lb_static_mapping(sm) (sm->flags & NAT_STATIC_MAPPING_FLAG_LB) +/** \brief Check if client initiating TCP connection (received SYN from client) + @param t TCP header + @return 1 if client initiating TCP connection +*/ +#define tcp_is_init(t) ((t->flags & TCP_FLAG_SYN) && !(t->flags & TCP_FLAG_ACK)) + /* logging */ #define nat_log_err(...) \ vlib_log(VLIB_LOG_LEVEL_ERR, snat_main.log_class, __VA_ARGS__) diff --git a/src/plugins/nat/nat_inlines.h b/src/plugins/nat/nat_inlines.h index 4bdb2cb66d9f..730d4400e6a5 100644 --- a/src/plugins/nat/nat_inlines.h +++ b/src/plugins/nat/nat_inlines.h @@ -215,7 +215,8 @@ nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses, if (clib_net_to_host_u32 (tcp->ack_number) > ses->o2i_fin_seq) ses->state |= NAT44_SES_O2I_FIN_ACK; } - if (nat44_is_ses_closed (ses)) + if (nat44_is_ses_closed (ses) + && !(ses->flags & SNAT_SESSION_FLAG_OUTPUT_FEATURE)) { nat_log_debug ("TCP close connection %U", format_snat_session, &sm->per_thread_data[thread_index], ses); diff --git a/src/plugins/nat/out2in_ed.c b/src/plugins/nat/out2in_ed.c index b2dbc513df6c..b4ae6502e0d2 100644 --- a/src/plugins/nat/out2in_ed.c +++ b/src/plugins/nat/out2in_ed.c @@ -39,7 +39,8 @@ _(NO_TRANSLATION, "No translation") \ _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") \ _(DROP_FRAGMENT, "Drop fragment") \ _(MAX_REASS, "Maximum reassemblies exceeded") \ -_(MAX_FRAG, "Maximum fragments per reassembly exceeded") +_(MAX_FRAG, "Maximum fragments per reassembly exceeded")\ +_(NON_SYN, "non-SYN packet try to create session") typedef enum { @@ -875,6 +876,13 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (identity_nat0)) goto trace00; + if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; + next0 = NAT44_ED_OUT2IN_NEXT_DROP; + goto trace00; + } + /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping_ed (sm, b0, l_key0, e_key0, node, @@ -1097,6 +1105,13 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (identity_nat1)) goto trace01; + if ((proto1 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp1)) + { + b1->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; + next1 = NAT44_ED_OUT2IN_NEXT_DROP; + goto trace01; + } + /* Create session initiated by host from external network */ s1 = create_session_for_static_mapping_ed (sm, b1, l_key1, e_key1, node, @@ -1353,6 +1368,13 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (identity_nat0)) goto trace0; + if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; + next0 = NAT44_ED_OUT2IN_NEXT_DROP; + goto trace0; + } + /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping_ed (sm, b0, l_key0, e_key0, node, @@ -1702,6 +1724,13 @@ nat44_ed_out2in_reass_node_fn (vlib_main_t * vm, goto trace0; } + if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; + next0 = NAT44_ED_OUT2IN_NEXT_DROP; + goto trace0; + } + /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping_ed (sm, b0, l_key0, e_key0, node, diff --git a/test/test_nat.py b/test/test_nat.py index e26aa27ddbd9..22e8903caf5c 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -4838,6 +4838,88 @@ def test_twice_nat_interface_addr(self): adresses = self.vapi.nat44_address_dump() self.assertEqual(0, len(adresses)) + def test_tcp_close(self): + """ Close TCP session from inside network - output feature """ + self.vapi.nat44_forwarding_enable_disable(1) + self.nat44_add_address(self.pg1.local_ip4) + twice_nat_addr = '10.0.1.3' + service_ip = '192.168.16.150' + self.nat44_add_address(twice_nat_addr, twice_nat=1) + self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) + self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index, + is_inside=0) + self.vapi.nat44_interface_add_del_output_feature(self.pg1.sw_if_index, + is_inside=0) + self.nat44_add_static_mapping(self.pg0.remote_ip4, + service_ip, + 80, + 80, + proto=IP_PROTOS.tcp, + out2in_only=1, + twice_nat=1) + sessions = self.vapi.nat44_user_session_dump(self.pg0.remote_ip4n, 0) + start_sessnum = len(sessions) + + # SYN packet out->in + p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) / + IP(src=self.pg1.remote_ip4, dst=service_ip) / + TCP(sport=33898, dport=80, flags="S")) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(1) + p = capture[0] + tcp_port = p[TCP].sport + + # SYN + ACK packet in->out + p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=twice_nat_addr) / + TCP(sport=80, dport=tcp_port, flags="SA")) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg1.get_capture(1) + + # ACK packet out->in + p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) / + IP(src=self.pg1.remote_ip4, dst=service_ip) / + TCP(sport=33898, dport=80, flags="A")) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg0.get_capture(1) + + # FIN packet in -> out + p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=twice_nat_addr) / + TCP(sport=80, dport=tcp_port, flags="FA", seq=100, ack=300)) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg1.get_capture(1) + + # FIN+ACK packet out -> in + p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) / + IP(src=self.pg1.remote_ip4, dst=service_ip) / + TCP(sport=33898, dport=80, flags="FA", seq=300, ack=101)) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg0.get_capture(1) + + # ACK packet in -> out + p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=twice_nat_addr) / + TCP(sport=80, dport=tcp_port, flags="A", seq=101, ack=301)) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg1.get_capture(1) + + sessions = self.vapi.nat44_user_session_dump(self.pg0.remote_ip4n, + 0) + self.assertEqual(len(sessions) - start_sessnum, 0) + def test_tcp_session_close_in(self): """ Close TCP session from inside network """ self.tcp_port_out = 10505 From 45ed202905131c0563fcf16aaf9538d9efcd372d Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 20 Nov 2018 02:19:05 -0800 Subject: [PATCH 44/70] NAT44: fix virtual fragmentation reassembly in forwarding mode (VPP-1501) Change-Id: Id86d8aa8753b9b2ff4c709b11e3901ba8d552918 Signed-off-by: Matus Fabian (cherry picked from commit 111add7e5d6581bb4eca05cc862a651ff6a09792) --- src/plugins/nat/out2in.c | 8 ++++++++ test/test_nat.py | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index c4d1fbf69003..8c013d9b0749 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -1443,6 +1443,12 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm, node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; next0 = SNAT_OUT2IN_NEXT_DROP; } + else + { + reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; + nat_ip4_reass_get_frags (reass0, + &fragments_to_loopback); + } goto trace0; } @@ -1474,6 +1480,8 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm, } else { + if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE) + goto trace0; if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) { if (nat_ip4_reass_add_fragment diff --git a/test/test_nat.py b/test/test_nat.py index 22e8903caf5c..bc476239975b 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -3339,6 +3339,32 @@ def test_frag_in_order(self): self.frag_in_order(proto=IP_PROTOS.udp) self.frag_in_order(proto=IP_PROTOS.icmp) + def test_frag_forwarding(self): + """ NAT44 forwarding fragment test """ + self.vapi.nat44_add_interface_addr(self.pg1.sw_if_index) + self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) + self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, + is_inside=0) + self.vapi.nat44_forwarding_enable_disable(1) + + data = "A" * 16 + "B" * 16 + "C" * 3 + pkts = self.create_stream_frag(self.pg1, + self.pg0.remote_ip4, + 4789, + 4789, + data, + proto=IP_PROTOS.udp) + self.pg1.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + frags = self.pg0.get_capture(len(pkts)) + p = self.reass_frags_and_verify(frags, + self.pg1.remote_ip4, + self.pg0.remote_ip4) + self.assertEqual(p[UDP].sport, 4789) + self.assertEqual(p[UDP].dport, 4789) + self.assertEqual(data, p[Raw].load) + def test_reass_hairpinning(self): """ NAT44 fragments hairpinning """ From 1d403abe1baa9d058a99e7113236d93d2bbb93d6 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Wed, 21 Nov 2018 04:53:10 -0800 Subject: [PATCH 45/70] NAT44: Apply transitory timeout on TCP RST (VPP-1494) RFC7857 section 2.2. Change-Id: I031af5fe379b72262e83fd8565c34fa1b772f2c8 Signed-off-by: Matus Fabian (cherry picked from commit 15e8e681813a2e88dad107b5fe238bc8abee17d2) --- src/plugins/nat/nat.h | 1 + src/plugins/nat/nat_inlines.h | 8 ++++++ test/test_nat.py | 51 +++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 02d4aaef9f61..3162e41b696e 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -165,6 +165,7 @@ typedef enum #define NAT44_SES_O2I_FIN_ACK 8 #define NAT44_SES_I2O_SYN 16 #define NAT44_SES_O2I_SYN 32 +#define NAT44_SES_RST 64 /* Session flags */ #define SNAT_SESSION_FLAG_STATIC_MAPPING 1 diff --git a/src/plugins/nat/nat_inlines.h b/src/plugins/nat/nat_inlines.h index 730d4400e6a5..9000a3ddcd35 100644 --- a/src/plugins/nat/nat_inlines.h +++ b/src/plugins/nat/nat_inlines.h @@ -200,6 +200,10 @@ always_inline int nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses, tcp_header_t * tcp, u32 thread_index) { + if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST)) + ses->state = NAT44_SES_RST; + if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST)) + ses->state = 0; if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && (ses->state & NAT44_SES_O2I_SYN)) ses->state = 0; @@ -231,6 +235,10 @@ always_inline int nat44_set_tcp_session_state_o2i (snat_main_t * sm, snat_session_t * ses, tcp_header_t * tcp, u32 thread_index) { + if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST)) + ses->state = NAT44_SES_RST; + if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST)) + ses->state = 0; if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && (ses->state & NAT44_SES_O2I_SYN)) ses->state = 0; diff --git a/test/test_nat.py b/test/test_nat.py index bc476239975b..d3849da4321d 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -5674,6 +5674,57 @@ def test_session_timeout(self): nsessions = nsessions + user.nsessions self.assertLess(nsessions, 2 * max_sessions) + @unittest.skipUnless(running_extended_tests(), "part of extended tests") + def test_session_rst_timeout(self): + """ NAT44 session RST timeouts """ + self.nat44_add_address(self.nat_addr) + self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) + self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, + is_inside=0) + self.vapi.nat_set_timeouts(tcp_transitory=5) + + nat44_config = self.vapi.nat_show_config() + + self.initiate_tcp_session(self.pg0, self.pg1) + p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) / + TCP(sport=self.tcp_port_in, dport=self.tcp_external_port, + flags="R")) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg1.get_capture(1) + + pkts_num = nat44_config.max_translations_per_user - 1 + pkts = [] + for i in range(0, pkts_num): + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) / + UDP(sport=1025 + i, dport=53)) + pkts.append(p) + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg1.get_capture(pkts_num) + + sleep(6) + + p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) / + TCP(sport=self.tcp_port_in + 1, dport=self.tcp_external_port + 1, + flags="S")) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg1.get_capture(1) + + nsessions = 0 + users = self.vapi.nat44_user_dump() + self.assertEqual(len(users), 1) + self.assertEqual(users[0].ip_address, self.pg0.remote_ip4n) + self.assertEqual(users[0].nsessions, + nat44_config.max_translations_per_user) + @unittest.skipUnless(running_extended_tests(), "part of extended tests") def test_session_limit_per_user(self): """ Maximum sessions per user limit """ From 9e182dcacab5d590a3a1f40ca17cf4a0853ebbb8 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 21 Nov 2018 08:56:53 +0100 Subject: [PATCH 46/70] acl-plugin: optimize hash memory usage + fix the startup config parsing for memory sizes [VPP-1502] In a couple of places vec_add1()-style was repeatedly called in a loop for smallish vectors where the number of additions was known in advance. With a test with large number of ACEs these numbers contribute to heap fragmentation noticeably. Minimize the number of allocations by preallocating the known size and then resetting the length accordingly, and then calling vec_add1() Also unify the parsing of the memory-related startup config parameters. Change-Id: If8fba344eb1dee8f865ffe7b396ca3b6bd9dc1d0 Signed-off-by: Andrew Yourtchenko (cherry picked from commit 94f509615eb97cebc9192e7290c84cf166518039) --- src/plugins/acl/acl.c | 16 ++++++++++------ src/plugins/acl/acl.h | 2 +- src/plugins/acl/hash_lookup.c | 22 ++++++++++++++++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 866c6ffc5f09..d28144984165 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -4091,12 +4091,12 @@ acl_plugin_config (vlib_main_t * vm, unformat_input_t * input) { acl_main_t *am = &acl_main; u32 conn_table_hash_buckets; - u32 conn_table_hash_memory_size; + uword conn_table_hash_memory_size; u32 conn_table_max_entries; uword main_heap_size; uword hash_heap_size; u32 hash_lookup_hash_buckets; - u32 hash_lookup_hash_memory; + uword hash_lookup_hash_memory; u32 reclassify_sessions; u32 use_tuple_merge; u32 tuple_merge_split_threshold; @@ -4106,8 +4106,10 @@ acl_plugin_config (vlib_main_t * vm, unformat_input_t * input) if (unformat (input, "connection hash buckets %d", &conn_table_hash_buckets)) am->fa_conn_table_hash_num_buckets = conn_table_hash_buckets; - else if (unformat (input, "connection hash memory %d", - &conn_table_hash_memory_size)) + else + if (unformat + (input, "connection hash memory %U", unformat_memory_size, + &conn_table_hash_memory_size)) am->fa_conn_table_hash_memory_size = conn_table_hash_memory_size; else if (unformat (input, "connection count max %d", &conn_table_max_entries)) @@ -4125,8 +4127,10 @@ acl_plugin_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "hash lookup hash buckets %d", &hash_lookup_hash_buckets)) am->hash_lookup_hash_buckets = hash_lookup_hash_buckets; - else if (unformat (input, "hash lookup hash memory %d", - &hash_lookup_hash_memory)) + else + if (unformat + (input, "hash lookup hash memory %U", unformat_memory_size, + &hash_lookup_hash_memory)) am->hash_lookup_hash_memory = hash_lookup_hash_memory; else if (unformat (input, "use tuple merge %d", &use_tuple_merge)) am->use_tuple_merge = use_tuple_merge; diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index c17946a97195..13e15478512d 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -142,7 +142,7 @@ typedef struct { hash_acl_info_t *hash_acl_infos; /* corresponding hash matching housekeeping info */ clib_bihash_48_8_t acl_lookup_hash; /* ACL lookup hash table. */ u32 hash_lookup_hash_buckets; - u32 hash_lookup_hash_memory; + uword hash_lookup_hash_memory; /* mheap to hold all the miscellaneous allocations related to hash-based lookups */ void *hash_lookup_mheap; diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c index 0087a5e2ab74..4c79f970113a 100644 --- a/src/plugins/acl/hash_lookup.c +++ b/src/plugins/acl/hash_lookup.c @@ -603,6 +603,17 @@ hash_acl_set_heap(acl_main_t *am) am->hash_lookup_mheap = mheap_alloc_with_lock (0 /* use VM */ , am->hash_lookup_mheap_size, 1 /* locked */); +#if USE_DLMALLOC != 0 + /* + * DLMALLOC is being "helpful" in that it ignores the heap size parameter + * by default and tries to allocate the larger amount of memory. + * + * Pin the heap so this does not happen and if we run out of memory + * in this heap, we will bail out with "out of memory", rather than + * an obscure error sometime later. + */ + mspace_disable_expand(am->hash_lookup_mheap); +#endif if (0 == am->hash_lookup_mheap) { clib_error("ACL plugin failed to allocate lookup heap of %U bytes", format_memory_size, am->hash_lookup_mheap_size); @@ -736,6 +747,12 @@ hash_acl_apply(acl_main_t *am, u32 lc_index, int acl_index, u32 acl_position) vec_validate(am->hash_applied_mask_info_vec_by_lc_index, lc_index); + + /* since we know (in case of no split) how much we expand, preallocate that space */ + int old_vec_len = vec_len(*applied_hash_aces); + vec_validate((*applied_hash_aces), old_vec_len + vec_len(ha->rules) - 1); + _vec_len((*applied_hash_aces)) = old_vec_len; + /* add the rules from the ACL to the hash table for lookup and append to the vector*/ for(i=0; i < vec_len(ha->rules); i++) { /* @@ -1171,6 +1188,11 @@ void hash_acl_add(acl_main_t *am, int acl_index) /* walk the newly added ACL entries and ensure that for each of them there is a mask type, increment a reference count for that mask type */ + + /* avoid small requests by preallocating the entire vector before running the additions */ + vec_validate(ha->rules, a->count-1); + vec_reset_length(ha->rules); + for(i=0; i < a->count; i++) { hash_ace_info_t ace_info; fa_5tuple_t mask; From 0858497cee6b80c0aaefb0ca8c2ac34111fddfe3 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Thu, 22 Nov 2018 00:12:15 -0800 Subject: [PATCH 47/70] NAT44: improve expired sessions reuse (VPP-1503) Change-Id: Iab506f127136c94a641df31ded108016de26260b Signed-off-by: Matus Fabian --- src/plugins/nat/in2out_ed.c | 51 +++++++------------- src/plugins/nat/nat.c | 93 +++++++++++++++++++------------------ test/test_nat.py | 19 +------- 3 files changed, 67 insertions(+), 96 deletions(-) diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index 8c62949b07d2..ea30035faf2f 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -255,7 +255,8 @@ slow_path_ed (snat_main_t * sm, u32 rx_fib_index, clib_bihash_kv_16_8_t * kv, snat_session_t ** sessionp, - vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now) + vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now, + tcp_header_t * tcp) { snat_session_t *s = 0; snat_user_t *u; @@ -315,6 +316,15 @@ slow_path_ed (snat_main_t * sm, is_sm = 1; } + if (proto == SNAT_PROTOCOL_TCP) + { + if (!tcp_is_init (tcp)) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; + return NAT_IN2OUT_ED_NEXT_DROP; + } + } + u = nat_user_get_or_create (sm, &key->l_addr, rx_fib_index, thread_index); if (!u) { @@ -626,7 +636,7 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, } next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next, - thread_index, vlib_time_now (sm->vlib_main)); + thread_index, vlib_time_now (sm->vlib_main), 0); if (PREDICT_FALSE (next == NAT_IN2OUT_ED_NEXT_DROP)) goto out; @@ -1034,16 +1044,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace00; } - if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) - { - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; - next0 = NAT_IN2OUT_ED_NEXT_DROP; - goto trace00; - } - next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node, - next0, thread_index, now); + next0, thread_index, now, tcp0); if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP)) goto trace00; @@ -1245,16 +1248,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace01; } - if ((proto1 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp1)) - { - b1->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; - next1 = NAT_IN2OUT_ED_NEXT_DROP; - goto trace01; - } - next1 = slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node, - next1, thread_index, now); + next1, thread_index, now, tcp1); if (PREDICT_FALSE (next1 == NAT_IN2OUT_ED_NEXT_DROP)) goto trace01; @@ -1485,16 +1481,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace0; } - if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) - { - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; - next0 = NAT_IN2OUT_ED_NEXT_DROP; - goto trace0; - } - next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node, - next0, thread_index, now); + next0, thread_index, now, tcp0); if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP)) goto trace0; @@ -1892,15 +1881,9 @@ nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm, } } - if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) - { - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; - next0 = NAT_IN2OUT_ED_NEXT_DROP; - goto trace0; - } - next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, - &s0, node, next0, thread_index, now); + &s0, node, next0, thread_index, now, + tcp0); if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP)) goto trace0; diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 73db65e2d78a..540d3bf8a0be 100755 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -397,64 +397,67 @@ nat_ed_session_alloc (snat_main_t * sm, snat_user_t * u, u32 thread_index, u32 oldest_index; u64 sess_timeout_time; - if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user) + if (PREDICT_FALSE (!(u->nsessions) && !(u->nstaticsessions))) + goto alloc_new; + + oldest_index = + clib_dlist_remove_head (tsm->list_pool, + u->sessions_per_user_list_head_index); + oldest_elt = pool_elt_at_index (tsm->list_pool, oldest_index); + s = pool_elt_at_index (tsm->sessions, oldest_elt->value); + sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); + if (now >= sess_timeout_time) { - oldest_index = - clib_dlist_remove_head (tsm->list_pool, - u->sessions_per_user_list_head_index); - oldest_elt = pool_elt_at_index (tsm->list_pool, oldest_index); - s = pool_elt_at_index (tsm->sessions, oldest_elt->value); - sess_timeout_time = - s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (now >= sess_timeout_time) - { - clib_dlist_addtail (tsm->list_pool, - u->sessions_per_user_list_head_index, - oldest_index); - nat_free_session_data (sm, s, thread_index); - if (snat_is_session_static (s)) - u->nstaticsessions--; - else - u->nsessions--; - s->flags = 0; - s->total_bytes = 0; - s->total_pkts = 0; - s->state = 0; - s->ext_host_addr.as_u32 = 0; - s->ext_host_port = 0; - s->ext_host_nat_addr.as_u32 = 0; - s->ext_host_nat_port = 0; - } + clib_dlist_addtail (tsm->list_pool, + u->sessions_per_user_list_head_index, oldest_index); + nat_free_session_data (sm, s, thread_index); + if (snat_is_session_static (s)) + u->nstaticsessions--; else + u->nsessions--; + s->flags = 0; + s->total_bytes = 0; + s->total_pkts = 0; + s->state = 0; + s->ext_host_addr.as_u32 = 0; + s->ext_host_port = 0; + s->ext_host_nat_addr.as_u32 = 0; + s->ext_host_nat_port = 0; + } + else + { + clib_dlist_addhead (tsm->list_pool, + u->sessions_per_user_list_head_index, oldest_index); + if ((u->nsessions + u->nstaticsessions) >= + sm->max_translations_per_user) { - clib_dlist_addhead (tsm->list_pool, - u->sessions_per_user_list_head_index, - oldest_index); nat_log_warn ("max translations per user %U", format_ip4_address, &u->addr); snat_ipfix_logging_max_entries_per_user (sm->max_translations_per_user, u->addr.as_u32); return 0; } - } - else - { - pool_get (tsm->sessions, s); - memset (s, 0, sizeof (*s)); + else + { + alloc_new: + pool_get (tsm->sessions, s); + memset (s, 0, sizeof (*s)); - /* Create list elts */ - pool_get (tsm->list_pool, per_user_translation_list_elt); - clib_dlist_init (tsm->list_pool, - per_user_translation_list_elt - tsm->list_pool); + /* Create list elts */ + pool_get (tsm->list_pool, per_user_translation_list_elt); + clib_dlist_init (tsm->list_pool, + per_user_translation_list_elt - tsm->list_pool); - per_user_translation_list_elt->value = s - tsm->sessions; - s->per_user_index = per_user_translation_list_elt - tsm->list_pool; - s->per_user_list_head_index = u->sessions_per_user_list_head_index; + per_user_translation_list_elt->value = s - tsm->sessions; + s->per_user_index = per_user_translation_list_elt - tsm->list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; - clib_dlist_addtail (tsm->list_pool, - s->per_user_list_head_index, - per_user_translation_list_elt - tsm->list_pool); + clib_dlist_addtail (tsm->list_pool, + s->per_user_list_head_index, + per_user_translation_list_elt - tsm->list_pool); + } } + return s; } diff --git a/test/test_nat.py b/test/test_nat.py index d3849da4321d..3094dd429e2f 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -5658,7 +5658,7 @@ def test_session_timeout(self): pkts = [] for i in range(0, max_sessions): - src = "10.10.%u.%u" % ((i & 0xFF00) >> 8, i & 0xFF) + src = "10.11.%u.%u" % ((i & 0xFF00) >> 8, i & 0xFF) p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src=src, dst=self.pg1.remote_ip4) / ICMP(id=1026, type='echo-request')) @@ -5683,8 +5683,6 @@ def test_session_rst_timeout(self): is_inside=0) self.vapi.nat_set_timeouts(tcp_transitory=5) - nat44_config = self.vapi.nat_show_config() - self.initiate_tcp_session(self.pg0, self.pg1) p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) / @@ -5695,18 +5693,6 @@ def test_session_rst_timeout(self): self.pg_start() self.pg1.get_capture(1) - pkts_num = nat44_config.max_translations_per_user - 1 - pkts = [] - for i in range(0, pkts_num): - p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / - IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) / - UDP(sport=1025 + i, dport=53)) - pkts.append(p) - self.pg0.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - self.pg1.get_capture(pkts_num) - sleep(6) p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / @@ -5722,8 +5708,7 @@ def test_session_rst_timeout(self): users = self.vapi.nat44_user_dump() self.assertEqual(len(users), 1) self.assertEqual(users[0].ip_address, self.pg0.remote_ip4n) - self.assertEqual(users[0].nsessions, - nat44_config.max_translations_per_user) + self.assertEqual(users[0].nsessions, 1) @unittest.skipUnless(running_extended_tests(), "part of extended tests") def test_session_limit_per_user(self): From 277681ebfd5f7a6742490522239eeee575f21c71 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Fri, 23 Nov 2018 09:22:10 +0100 Subject: [PATCH 48/70] acl-plugin: fix coverity error that the fix related for [VPP-1502] has triggered Fix the trivial use-before-check copypaste error. There was a more subtle issue with that patch that Coverity didn't notice: namely, vec_validate(v, len-1) is a terrible idea if len happens to be == 0. Fix that. Change-Id: I0fab8b1750e9e9973eefb5d39f35e4c3a13fc66f Signed-off-by: Andrew Yourtchenko (cherry picked from commit e0152461cbc84d6d4df3f05dddabe992c1c59052) --- src/plugins/acl/hash_lookup.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c index 4c79f970113a..495395443f1c 100644 --- a/src/plugins/acl/hash_lookup.c +++ b/src/plugins/acl/hash_lookup.c @@ -603,6 +603,10 @@ hash_acl_set_heap(acl_main_t *am) am->hash_lookup_mheap = mheap_alloc_with_lock (0 /* use VM */ , am->hash_lookup_mheap_size, 1 /* locked */); + if (0 == am->hash_lookup_mheap) { + clib_error("ACL plugin failed to allocate lookup heap of %U bytes", + format_memory_size, am->hash_lookup_mheap_size); + } #if USE_DLMALLOC != 0 /* * DLMALLOC is being "helpful" in that it ignores the heap size parameter @@ -614,10 +618,6 @@ hash_acl_set_heap(acl_main_t *am) */ mspace_disable_expand(am->hash_lookup_mheap); #endif - if (0 == am->hash_lookup_mheap) { - clib_error("ACL plugin failed to allocate lookup heap of %U bytes", - format_memory_size, am->hash_lookup_mheap_size); - } } void *oldheap = clib_mem_set_heap(am->hash_lookup_mheap); return oldheap; @@ -749,9 +749,11 @@ hash_acl_apply(acl_main_t *am, u32 lc_index, int acl_index, u32 acl_position) vec_validate(am->hash_applied_mask_info_vec_by_lc_index, lc_index); /* since we know (in case of no split) how much we expand, preallocate that space */ - int old_vec_len = vec_len(*applied_hash_aces); - vec_validate((*applied_hash_aces), old_vec_len + vec_len(ha->rules) - 1); - _vec_len((*applied_hash_aces)) = old_vec_len; + if (vec_len(ha->rules) > 0) { + int old_vec_len = vec_len(*applied_hash_aces); + vec_validate((*applied_hash_aces), old_vec_len + vec_len(ha->rules) - 1); + _vec_len((*applied_hash_aces)) = old_vec_len; + } /* add the rules from the ACL to the hash table for lookup and append to the vector*/ for(i=0; i < vec_len(ha->rules); i++) { @@ -1190,8 +1192,10 @@ void hash_acl_add(acl_main_t *am, int acl_index) is a mask type, increment a reference count for that mask type */ /* avoid small requests by preallocating the entire vector before running the additions */ - vec_validate(ha->rules, a->count-1); - vec_reset_length(ha->rules); + if (a->count > 0) { + vec_validate(ha->rules, a->count-1); + vec_reset_length(ha->rules); + } for(i=0; i < a->count; i++) { hash_ace_info_t ace_info; From 00adcceaf09db5c94860c2bae8590c3efdd224fd Mon Sep 17 00:00:00 2001 From: Onong Tayeng Date: Fri, 23 Nov 2018 15:33:41 +0530 Subject: [PATCH 49/70] Fix permission for vpp_papi The permission for the top-level vpp_papi dir under /usr/lib/python2.7/site-packages is set to 644 which means that non-root users cannot import vpp_papi. As a result, devstack setup with VPP/networking-vpp fails since it is run as non-root user. Change-Id: Id85b468b2dcc92efb3a64c51ffb23ef6d596e4ad Signed-off-by: Onong Tayeng (cherry picked from commit 9b0ce0215b6e699851a3b54fb2a7003800ca53e4) --- extras/rpm/vpp.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extras/rpm/vpp.spec b/extras/rpm/vpp.spec index cd90a27494a2..bf2d83a84c8c 100644 --- a/extras/rpm/vpp.spec +++ b/extras/rpm/vpp.spec @@ -392,7 +392,7 @@ fi /usr/share/java/* %files api-python -%defattr(644,root,root) +%defattr(644,root,root,755) %{python2_sitelib}/vpp_* %files selinux-policy From ad5f2de9041070c007cedb87f94b72193125db17 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Fri, 30 Nov 2018 09:15:11 +0000 Subject: [PATCH 50/70] IPSEC-AH: fix packet drop Change-Id: I45b97cfd0c3785bfbf6d142d362bd3d4d56bae00 Signed-off-by: Neale Ranns --- src/vnet/ipsec/ah_decrypt.c | 5 ----- src/vnet/ipsec/esp_decrypt.c | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c index abe2e6f5f80f..29c59fdfd248 100644 --- a/src/vnet/ipsec/ah_decrypt.c +++ b/src/vnet/ipsec/ah_decrypt.c @@ -166,11 +166,8 @@ ah_decrypt_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (rv)) { - clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); vlib_node_increment_counter (vm, ah_decrypt_node.index, AH_DECRYPT_ERROR_REPLAY, 1); - to_next[0] = i_bi0; - to_next += 1; goto trace; } } @@ -220,8 +217,6 @@ ah_decrypt_node_fn (vlib_main_t * vm, vlib_node_increment_counter (vm, ah_decrypt_node.index, AH_DECRYPT_ERROR_INTEG_ERROR, 1); - to_next[0] = i_bi0; - to_next += 1; goto trace; } diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index a0eeed464da6..7f9be89ee4c0 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -185,7 +185,6 @@ esp_decrypt_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (rv)) { - clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); vlib_node_increment_counter (vm, esp_decrypt_node.index, ESP_DECRYPT_ERROR_REPLAY, 1); o_bi0 = i_bi0; @@ -330,7 +329,6 @@ esp_decrypt_node_fn (vlib_main_t * vm, next0 = ESP_DECRYPT_NEXT_IP6_INPUT; else { - clib_warning ("next header: 0x%x", f0->next_header); vlib_node_increment_counter (vm, esp_decrypt_node.index, ESP_DECRYPT_ERROR_DECRYPTION_FAILED, 1); From c90a2aa0f159401823107ca1715813ec108f793f Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 4 Dec 2018 09:39:40 +0000 Subject: [PATCH 51/70] MPLS: buffer over-run with incorrectly init'd vector. fix VAT dump Change-Id: Ifdbb4c4cffd90c4ec8b39513d284ebf7be39eca5 Signed-off-by: Neale Ranns (cherry picked from commit 44cea225e2238a3c549f17f315cd1fbc6978c277) --- src/vat/api_format.c | 4 ++-- src/vnet/mpls/interface.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 96cf80269005..56ded020bca2 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -20428,14 +20428,14 @@ vl_api_mpls_fib_path_print (vat_main_t * vam, vl_api_fib_path_t * fp) print (vam->ofp, " weight %d, sw_if_index %d, is_local %d, is_drop %d, " "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", - ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, + fp->weight, ntohl (fp->sw_if_index), fp->is_local, fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, format_ip6_address, fp->next_hop); else if (fp->afi == IP46_TYPE_IP4) print (vam->ofp, " weight %d, sw_if_index %d, is_local %d, is_drop %d, " "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", - ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, + fp->weight, ntohl (fp->sw_if_index), fp->is_local, fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, format_ip4_address, fp->next_hop); } diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index ec541f760de3..46d80f07a387 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -62,7 +62,7 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm, fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS, (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); - vec_validate(mm->fib_index_by_sw_if_index, 0); + vec_validate(mm->fib_index_by_sw_if_index, sw_if_index); mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; } else From e351f3501953773fbd3071cd04809c1012ea64b3 Mon Sep 17 00:00:00 2001 From: Filip Tehlar Date: Fri, 30 Nov 2018 07:27:27 -0800 Subject: [PATCH 52/70] Add UDP encap flag Change-Id: Ic6a8b9aaec7e5dee4fb1971168988dbe4f931f86 Signed-off-by: Filip Tehlar --- src/vnet/ipsec/ipsec.api | 2 ++ src/vnet/ipsec/ipsec_api.c | 1 + 2 files changed, 3 insertions(+) diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index 793422d86fba..523def284c32 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -562,6 +562,7 @@ define ipsec_spd_interface_details { @param remote_integ_key - integrity key for inbound IPsec SA @param renumber - intf display name uses a specified instance if != 0 @param show_instance - instance to display for intf if renumber is set + @param udp_encap - enable UDP encapsulation for NAT traversal */ define ipsec_tunnel_if_add_del { u32 client_index; @@ -585,6 +586,7 @@ define ipsec_tunnel_if_add_del { u8 remote_integ_key[128]; u8 renumber; u32 show_instance; + u8 udp_encap; }; /** \brief Add/delete IPsec tunnel interface response diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index c0e526c37404..a6bccf7a57c5 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -469,6 +469,7 @@ vl_api_ipsec_tunnel_if_add_del_t_handler (vl_api_ipsec_tunnel_if_add_del_t * tun.integ_alg = mp->integ_alg; tun.local_integ_key_len = mp->local_integ_key_len; tun.remote_integ_key_len = mp->remote_integ_key_len; + tun.udp_encap = mp->udp_encap; memcpy (&tun.local_ip, mp->local_ip, 4); memcpy (&tun.remote_ip, mp->remote_ip, 4); memcpy (&tun.local_crypto_key, &mp->local_crypto_key, From 4d1f9564da436314c2b910077a93d99df1b43f71 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Fri, 30 Nov 2018 16:46:29 -0500 Subject: [PATCH 53/70] Delete shared memory segment files when vpp starts Should have been done this way years ago. My bad. Change-Id: Ic7bf937fb6c4dc5c1b6ae64f2ecf8608b62e7039 Signed-off-by: Dave Barach (cherry picked from commit b2204671dad112e3195771854b4ef00bb388d4e6) --- build-root/deb/debian/vpp.service | 1 - build-root/deb/debian/vpp.upstart | 3 +-- src/vlibmemory/memory_api.c | 22 ++++++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/build-root/deb/debian/vpp.service b/build-root/deb/debian/vpp.service index aa1651c41197..2e86941de8b5 100644 --- a/build-root/deb/debian/vpp.service +++ b/build-root/deb/debian/vpp.service @@ -4,7 +4,6 @@ After=network.target [Service] Type=simple -ExecStartPre=-/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api ExecStartPre=-/sbin/modprobe uio_pci_generic ExecStart=/usr/bin/vpp -c /etc/vpp/startup.conf ExecStopPost=/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api diff --git a/build-root/deb/debian/vpp.upstart b/build-root/deb/debian/vpp.upstart index 62e1d2780e66..4a451dd45d01 100644 --- a/build-root/deb/debian/vpp.upstart +++ b/build-root/deb/debian/vpp.upstart @@ -1,12 +1,11 @@ description "vector packet processing engine" -author "Cisco Systems, Inc " +author "Cisco Systems, Inc " manual respawn pre-start script - rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api || true # should be there via dkms, but if not, start anyway modprobe uio_pci_generic || true end script diff --git a/src/vlibmemory/memory_api.c b/src/vlibmemory/memory_api.c index 1f5da4c786e9..5849d719faed 100644 --- a/src/vlibmemory/memory_api.c +++ b/src/vlibmemory/memory_api.c @@ -898,6 +898,28 @@ vlibmemory_init (vlib_main_t * vm) api_main_t *am = &api_main; svm_map_region_args_t _a, *a = &_a; clib_error_t *error; + u8 *remove_path1, *remove_path2; + + /* + * By popular request / to avoid support fires, remove any old api segment + * files Right Here. + */ + if (am->root_path == 0) + { + remove_path1 = format (0, "/dev/shm/global_vm%c", 0); + remove_path2 = format (0, "/dev/shm/vpe-api%c", 0); + } + else + { + remove_path1 = format (0, "/dev/shm/%s-global_vm%c", am->root_path, 0); + remove_path2 = format (0, "/dev/shm/%s-vpe-api%c", am->root_path, 0); + } + + (void) unlink ((char *) remove_path1); + (void) unlink ((char *) remove_path2); + + vec_free (remove_path1); + vec_free (remove_path2); memset (a, 0, sizeof (*a)); a->root_path = am->root_path; From 55670421c89637ca16c2abc9539deacb96037723 Mon Sep 17 00:00:00 2001 From: Zhiyong Yang Date: Thu, 13 Dec 2018 14:09:40 +0800 Subject: [PATCH 54/70] ip4_rewrite_inline: fix variable error Change-Id: I0470b4b13095583fe018f565f100342fab45715e Signed-off-by: Zhiyong Yang (cherry picked from commit b2ecc5d4156467c785c28493d614e874bc287cbd) --- src/vnet/ip/ip4_forward.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index ffb873a96112..9dac828a77de 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2348,7 +2348,7 @@ ip4_rewrite_inline (vlib_main_t * vm, adj0->sub_type.midchain.fixup_func (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data); adj1->sub_type.midchain.fixup_func - (vm, adj1, b[1], adj0->sub_type.midchain.fixup_data); + (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data); } if (is_mcast) @@ -2360,7 +2360,7 @@ ip4_rewrite_inline (vlib_main_t * vm, adj0->rewrite_header.dst_mcast_offset, &ip0->dst_address.as_u32, (u8 *) ip0); vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, - adj0->rewrite_header.dst_mcast_offset, + adj1->rewrite_header.dst_mcast_offset, &ip1->dst_address.as_u32, (u8 *) ip1); } From a8e3001e68d8f5ea6cf526b131c92f5993597f81 Mon Sep 17 00:00:00 2001 From: "juraj.linkes" Date: Tue, 4 Dec 2018 13:03:15 +0100 Subject: [PATCH 55/70] Fix vat dependencies Needed for arm machines in CI. Change-Id: Ib16a8b63e145116c7cb22376243e9026d9545c8a Signed-off-by: juraj.linkes (cherry picked from commit a409f2729ac2431aeee5a18889b4d2e5634c713f) --- src/vat/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vat/CMakeLists.txt b/src/vat/CMakeLists.txt index 2ff907026db3..0df3bb384c8a 100644 --- a/src/vat/CMakeLists.txt +++ b/src/vat/CMakeLists.txt @@ -29,6 +29,8 @@ add_vpp_executable(vpp_api_test ENABLE_EXPORTS plugin.c json_format.c + DEPENDS api_headers + LINK_LIBRARIES vlibmemoryclient svm From 69a9fc053d2d2bc11399e0c74d23983298da8187 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Thu, 27 Dec 2018 09:07:32 -0800 Subject: [PATCH 56/70] install-dep: force osleap boost dep install jobs in stable/1810 failed to verify even after many rechecks. This is found in the failure log from https://gerrit.fd.io/r/#/c/16728/ 13:01:56 2 Problems: 13:01:56 Problem: libboost_headers1_68_0-devel-1.68.0-lp150.243.1.x86_64 conflicts with namespace:otherproviders(libboost_headers-devel) provided by libboost_headers-devel-1.69.0-lp150.1.1.noarch 13:01:56 Problem: libboost_thread1_68_0-devel-1.68.0-lp150.243.1.x86_64 conflicts with namespace:otherproviders(libboost_thread-devel) provided by libboost_thread-devel-1.69.0-lp150.1.1.noarch 13:01:56 13:01:56 Problem: libboost_headers1_68_0-devel-1.68.0-lp150.243.1.x86_64 conflicts with namespace:otherproviders(libboost_headers-devel) provided by libboost_headers-devel-1.69.0-lp150.1.1.noarch 13:01:56 Solution 1: Following actions will be done: 13:01:56 deinstallation of libboost_headers1_68_0-devel-1.68.0-lp150.243.1.x86_64 13:01:56 deinstallation of libboost_chrono1_68_0-devel-1.68.0-lp150.243.1.x86_64 13:01:56 deinstallation of libboost_date_time1_68_0-devel-1.68.0-lp150.243.1.x86_64 13:01:56 Solution 2: do not install libboost_headers-devel-1.69.0-lp150.1.1.noarch 13:01:56 13:01:56 Choose from above solutions by number or skip, retry or cancel [1/2/s/r/c] (c): c 13:01:56 make: *** [Makefile:315: install-dep] Error 4 A test patch was created to include both 16631 and 16728 as found in https://gerrit.fd.io/r/#/c/16986/ The job was verified successfully. It proves to me that stable/1810 is missing 16631. Change-Id: I4a053f41eef138fc0e6db7e2650860c0ac999552 Signed-off-by: Florin Coras Signed-off-by: Paul Vinciguerra (cherry picked from commit 223548d479c0bde67aa8d05a1f0f13e0afb0aab1) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index a2d256239db1..1dedf8804f4c 100644 --- a/Makefile +++ b/Makefile @@ -131,14 +131,14 @@ RPM_SUSE_PLATFORM_DEPS = distribution-release shadow rpm-build ifeq ($(OS_ID),opensuse) ifeq ($(SUSE_NAME),Tumbleweed) - RPM_SUSE_DEVEL_DEPS = libboost_headers-devel libboost_thread-devel gcc + RPM_SUSE_DEVEL_DEPS = libboost_headers1_68_0-devel-1.68.0 libboost_thread1_68_0-devel-1.68.0 gcc RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv endif ifeq ($(SUSE_ID),15.0) - RPM_SUSE_DEVEL_DEPS = libboost_headers-devel libboost_thread-devel gcc6 + RPM_SUSE_DEVEL_DEPS = libboost_headers1_68_0-devel-1.68.0 libboost_thread1_68_0-devel-1.68.0 gcc6 RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv else - RPM_SUSE_DEVEL_DEPS += boost_1_61-devel gcc6 + RPM_SUSE_DEVEL_DEPS += libboost_headers1_68_0-devel-1.68.0 gcc6 RPM_SUSE_PYTHON_DEPS += python-virtualenv endif endif @@ -309,7 +309,7 @@ else ifeq ($(filter opensuse-tumbleweed,$(OS_ID)),$(OS_ID)) @sudo -E zypper install -y $(RPM_SUSE_DEPENDS) else ifeq ($(filter opensuse-leap,$(OS_ID)),$(OS_ID)) @sudo -E zypper refresh - @sudo -E zypper install -y $(RPM_SUSE_DEPENDS) + @sudo -E zypper install -y $(RPM_SUSE_DEPENDS) else ifeq ($(filter opensuse,$(OS_ID)),$(OS_ID)) @sudo -E zypper refresh @sudo -E zypper install -y $(RPM_SUSE_DEPENDS) From 9858d374ad1f789c8c860e00e2b8d4d01fdc1e73 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 20 Dec 2018 10:44:47 +0100 Subject: [PATCH 57/70] virtio: fix kick race issue [VPP-1489] Change-Id: I25b2a28513821bc5eab9ac6890a3964d412b0399 Signed-off-by: Damjan Marion (cherry picked from commit e40231b1ecf4b49faaa9ce7b615a7d867104825b) --- src/vnet/devices/virtio/device.c | 10 +++++----- src/vnet/devices/virtio/node.c | 18 +++++++++++++----- src/vnet/devices/virtio/virtio.h | 11 +++++++++++ 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c index c7efe6519cd2..d50ef88d3fbc 100644 --- a/src/vnet/devices/virtio/device.c +++ b/src/vnet/devices/virtio/device.c @@ -169,7 +169,6 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi, return n_added; } - static_always_inline uword virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, virtio_if_t * vif) @@ -184,6 +183,10 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, clib_spinlock_lock_if_init (&vif->lockp); + if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 && + vring->last_kick_avail_idx != vring->avail->idx) + virtio_kick (vring); + /* free consumed buffers */ virtio_free_used_desc (vm, vring); @@ -209,10 +212,7 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vring->desc_next = next; vring->desc_in_use = used; if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) - { - u64 x = 1; - CLIB_UNUSED (int r) = write (vring->kick_fd, &x, sizeof (x)); - } + virtio_kick (vring); } diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index 339c48c93f56..419b025b3ae7 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -87,17 +87,23 @@ virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) u16 sz = vring->size; u16 mask = sz - 1; +more: used = vring->desc_in_use; if (sz - used < sz / 8) return; - n_slots = sz - used; + /* deliver free buffers in chunks of 64 */ + n_slots = clib_min (sz - used, 64); + next = vring->desc_next; avail = vring->avail->idx; n_slots = vlib_buffer_alloc_to_ring (vm, vring->buffers, next, vring->size, n_slots); + if (n_slots == 0) + return; + while (n_slots) { struct vring_desc *d = &vring->desc[next];; @@ -117,10 +123,8 @@ virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) vring->desc_in_use = used; if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) - { - u64 b = 1; - CLIB_UNUSED (int r) = write (vring->kick_fd, &b, sizeof (b)); - } + virtio_kick (vring); + goto more; } static_always_inline uword @@ -140,6 +144,10 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u16 last = vring->last_used_idx; u16 n_left = vring->used->idx - last; + if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 && + vring->last_kick_avail_idx != vring->avail->idx) + virtio_kick (vring); + if (n_left == 0) goto refill; diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index 5fc521672d9a..8ac87c8ccfd6 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -86,6 +86,7 @@ typedef struct u32 call_file_index; u32 *buffers; u16 last_used_idx; + u16 last_kick_avail_idx; } virtio_vring_t; typedef struct @@ -136,6 +137,16 @@ extern void virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring); format_function_t format_virtio_device_name; +static_always_inline void +virtio_kick (virtio_vring_t * vring) +{ + u64 x = 1; + int __clib_unused r; + + r = write (vring->kick_fd, &x, sizeof (x)); + vring->last_kick_avail_idx = vring->avail->idx; +} + #endif /* _VNET_DEVICES_VIRTIO_VIRTIO_H_ */ /* From 13f5dcf9152287e06b9b5d67774b9f4b576ebaa7 Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 17 Jan 2019 15:11:29 -0800 Subject: [PATCH 58/70] bond: packet drops on VPP bond interface [VPP-1544] We register callback for VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION and VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION to add and remove the slave interface from the bond interface accordingly. For static bonding without lacp, one would think that it is good enough to put the slave interface into the ective slave set as soon as it is configured. Wrong, sometimes the slave interface is configured to be part of the bonding without ever bringing up the hardware carrier or setting the admin state to up. In that case, we send traffic to the "dead" slave interface. The fix is to make sure both the carrier and admin state are up before we put the slave into the active set for forwarding traffic. Change-Id: I93b1c36d5481ca76cc8b87e8ca1b375ca3bd453b Signed-off-by: Steven (cherry picked from commit e43278f75fe3188551580c7d7991958805756e2f) --- src/vnet/bonding/cli.c | 8 +++++--- src/vnet/bonding/node.c | 33 ++++++++++++++++----------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index 846fbdb38c9e..26d9eefad96e 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -512,11 +512,13 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) ethernet_set_rx_redirect (vnm, sif_hw, 1); } - if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable) + if (bif->mode == BOND_MODE_LACP) { - (*bm->lacp_enable_disable) (vm, bif, sif, 1); + if (bm->lacp_enable_disable) + (*bm->lacp_enable_disable) (vm, bif, sif, 1); } - else + else if (sif->port_enabled && + (sif_hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) { bond_enable_collecting_distributing (vm, sif); } diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c index 361509c549dd..725fde02e6c7 100644 --- a/src/vnet/bonding/node.c +++ b/src/vnet/bonding/node.c @@ -400,19 +400,21 @@ bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) if (sif) { sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + if (sif->lacp_enabled) + return 0; + if (sif->port_enabled == 0) { - if (sif->lacp_enabled == 0) - { - bond_disable_collecting_distributing (vm, sif); - } + bond_disable_collecting_distributing (vm, sif); } else { - if (sif->lacp_enabled == 0) - { - bond_enable_collecting_distributing (vm, sif); - } + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw = + vnet_get_sup_hw_interface (vnm, sw_if_index); + + if (hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) + bond_enable_collecting_distributing (vm, sif); } } @@ -433,19 +435,16 @@ bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) sif = bond_get_slave_by_sw_if_index (sw->sw_if_index); if (sif) { + if (sif->lacp_enabled) + return 0; + if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) { - if (sif->lacp_enabled == 0) - { - bond_disable_collecting_distributing (vm, sif); - } + bond_disable_collecting_distributing (vm, sif); } - else + else if (sif->port_enabled) { - if (sif->lacp_enabled == 0) - { - bond_enable_collecting_distributing (vm, sif); - } + bond_enable_collecting_distributing (vm, sif); } } From a867edfb6b7b22ab6e65de8b6925927125af89b6 Mon Sep 17 00:00:00 2001 From: Steven Luong Date: Fri, 1 Feb 2019 10:23:56 -0800 Subject: [PATCH 59/70] vhost: VPP stalls with vhost performing control plane actions Symptom ------- With NDR traffic blasting at VPP, bringing up a new VM with vhost connection to VPP causes packet drops. I am able to recreate this problem easily using a simple setup like this. TREX-------------- switch ---- VPP |---------------| |-------| Cause ----- The reason for the packet drops is due to vhost holding onto the worker barrier lock for too long in vhost_user_socket_read(). There are quite a few of system calls inside the routine. At the end of the routine, it unconditionally calls vhost_user_update_iface_state() for all message types. vhost_user_update_iface_state() also unconditionally calls vhost_user_rx_thread_placement() and vhost_user_tx_thread_placement(). vhost_user_rx_thread_placement scraps out all existing cpu/queue mappings for the interface and creates brand new cpu/queue mappings for the interface. This process is very disruptive and very expensive. In my opinion, this area of code needs a makeover. Fixes ----- * vhost_user_socket_read() is rewritten that it should not hold onto the worker barrier lock for system calls, or at least minimize the need for doing it. * Remove the call to vhost_user_update_iface_state as a default route at the end of vhost_user_socket_read(). There is only a couple of message types which really need to call vhost_user_update_iface_state(). We put the call to those message types which need it. * Remove vhost_user_rx_thread_placement() and vhost_user_tx_thread_placement from vhost_user_update_iface_state(). There is no need to repetatively change the cpu/queue mappings. * vhost_user_rx_thread_placement() is actually quite expensive. It should be called only once per queue for the interface. There is no need to scrap the existing cpu/queue mappings and create new cpu/queue mappings when the additional queues becomes active/enable. * Change to create the cpu/queue mappings for the first RX when the interface is created. Dont remove the cpu/queue mapping when the interface is disconnected. Remove the cpu/queue mapping only when the interface is deleted. The create vhost user interface CLI also has some very expensive system calls if the command is entered with the optional keyword "server" As a bonus, This patch makes the create vhost user interface binary-api and CLI thread safe. Do the protection for the small amount of code which is thread unsafe. Change-Id: I4a19cbf7e9cc37ea01286169882e5603e6d7eb77 Signed-off-by: Steven Luong --- src/vnet/devices/virtio/vhost_user.c | 459 ++++++++++++----------- src/vnet/devices/virtio/vhost_user.h | 11 +- src/vnet/devices/virtio/vhost_user_api.c | 3 + 3 files changed, 259 insertions(+), 214 deletions(-) diff --git a/src/vnet/devices/virtio/vhost_user.c b/src/vnet/devices/virtio/vhost_user.c index 1342030a6518..48c5d1a5fbae 100644 --- a/src/vnet/devices/virtio/vhost_user.c +++ b/src/vnet/devices/virtio/vhost_user.c @@ -116,12 +116,13 @@ unmap_all_mem_regions (vhost_user_intf_t * vui) } } -static void +static_always_inline void vhost_user_tx_thread_placement (vhost_user_intf_t * vui) { //Let's try to assign one queue to each thread - u32 qid = 0; + u32 qid; u32 thread_index = 0; + vui->use_tx_spinlock = 0; while (1) { @@ -156,67 +157,27 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui) * @brief Unassign existing interface/queue to thread mappings and re-assign * new interface/queue to thread mappings */ -static void -vhost_user_rx_thread_placement () +static_always_inline void +vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid) { - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - vhost_user_vring_t *txvq; + vhost_user_vring_t *txvq = &vui->vrings[qid]; vnet_main_t *vnm = vnet_get_main (); - u32 qid; int rv; - u16 *queue; - - // Scrap all existing mappings for all interfaces/queues - /* *INDENT-OFF* */ - pool_foreach (vui, vum->vhost_user_interfaces, { - vec_foreach (queue, vui->rx_queues) - { - rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index, - *queue); - if (rv) - vu_log_warn (vui, "unable to unassign interface %d, " - "queue %d: rc=%d", vui->hw_if_index, *queue, rv); - } - vec_reset_length (vui->rx_queues); - }); - /* *INDENT-ON* */ - - // Create the rx_queues for all interfaces - /* *INDENT-OFF* */ - pool_foreach (vui, vum->vhost_user_interfaces, { - for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) - { - txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; - if (txvq->started) - { - if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN) - /* Set polling as the default */ - txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; - vec_add1 (vui->rx_queues, qid); - } - } - }); - /* *INDENT-ON* */ - - // Assign new mappings for all interfaces/queues - /* *INDENT-OFF* */ - pool_foreach (vui, vum->vhost_user_interfaces, { - vnet_hw_interface_set_input_node (vnm, vui->hw_if_index, - vhost_user_input_node.index); - vec_foreach (queue, vui->rx_queues) - { - vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue, - ~0); - txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; - rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue, - txvq->mode); - if (rv) - vu_log_warn (vui, "unable to set rx mode for interface %d, " - "queue %d: rc=%d", vui->hw_if_index, *queue, rv); - } - }); - /* *INDENT-ON* */ + u32 q = qid >> 1; + + ASSERT ((qid & 1) == 1); // should be odd + // Assign new queue mappings for the interface + vnet_hw_interface_set_input_node (vnm, vui->hw_if_index, + vhost_user_input_node.index); + vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, q, ~0); + if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN) + /* Set polling as the default */ + txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; + txvq->qid = q; + rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, q, txvq->mode); + if (rv) + vu_log_warn (vui, "unable to set rx mode for interface %d, " + "queue %d: rc=%d", vui->hw_if_index, q, rv); } /** @brief Returns whether at least one TX and one RX vring are enabled */ @@ -232,7 +193,7 @@ vhost_user_intf_ready (vhost_user_intf_t * vui) return found[0] && found[1]; } -static void +static_always_inline void vhost_user_update_iface_state (vhost_user_intf_t * vui) { /* if we have pointers to descriptor table, go up */ @@ -247,8 +208,6 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui) : 0); vui->is_ready = is_ready; } - vhost_user_rx_thread_placement (); - vhost_user_tx_thread_placement (vui); } static void @@ -278,6 +237,18 @@ vhost_user_callfd_read_ready (clib_file_t * uf) return 0; } +static_always_inline void +vhost_user_thread_placement (vhost_user_intf_t * vui, u32 qid) +{ + if (qid & 1) // RX is odd, TX is even + { + if (vui->vrings[qid].qid == -1) + vhost_user_rx_thread_placement (vui, qid); + } + else + vhost_user_tx_thread_placement (vui); +} + static clib_error_t * vhost_user_kickfd_read_ready (clib_file_t * uf) { @@ -293,10 +264,12 @@ vhost_user_kickfd_read_ready (clib_file_t * uf) if (!vui->vrings[qid].started || (vhost_user_intf_ready (vui) != vui->is_ready)) { - vlib_worker_thread_barrier_sync (vlib_get_main ()); - vui->vrings[qid].started = 1; - vhost_user_update_iface_state (vui); - vlib_worker_thread_barrier_release (vlib_get_main ()); + if (vui->vrings[qid].started == 0) + { + vui->vrings[qid].started = 1; + vhost_user_thread_placement (vui, qid); + vhost_user_update_iface_state (vui); + } } vhost_user_set_interrupt_pending (vui, uf->private_data); @@ -311,6 +284,7 @@ vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid) vring->kickfd_idx = ~0; vring->callfd_idx = ~0; vring->errfd = -1; + vring->qid = -1; /* * We have a bug with some qemu 2.5, and this may be a fix. @@ -329,6 +303,7 @@ static_always_inline void vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) { vhost_user_vring_t *vring = &vui->vrings[qid]; + if (vring->kickfd_idx != ~0) { clib_file_t *uf = pool_elt_at_index (file_main.file_pool, @@ -348,7 +323,12 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) close (vring->errfd); vring->errfd = -1; } + + // save the qid so that we don't need to unassign and assign_rx_thread + // when the interface comes back up. They are expensive calls. + u16 q = vui->vrings[qid].qid; vhost_user_vring_init (vui, qid); + vui->vrings[qid].qid = q; } static_always_inline void @@ -377,7 +357,7 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui) static clib_error_t * vhost_user_socket_read (clib_file_t * uf) { - int n, i; + int n, i, j; int fd, number_of_fds = 0; int fds[VHOST_MEMORY_MAX_NREGIONS]; vhost_user_msg_t msg; @@ -389,6 +369,7 @@ vhost_user_socket_read (clib_file_t * uf) u8 q; clib_file_t template = { 0 }; vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); @@ -411,9 +392,6 @@ vhost_user_socket_read (clib_file_t * uf) n = recvmsg (uf->file_descriptor, &mh, 0); - /* Stop workers to avoid end of the world */ - vlib_worker_thread_barrier_sync (vlib_get_main ()); - if (n != VHOST_USER_MSG_HDR_SZ) { if (n == -1) @@ -488,6 +466,13 @@ vhost_user_socket_read (clib_file_t * uf) msg.size = sizeof (msg.u64); vu_log_debug (vui, "if %d msg VHOST_USER_GET_FEATURES - reply " "0x%016llx", vui->hw_if_index, msg.u64); + n = + send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + { + vu_log_debug (vui, "could not send message response"); + goto close_socket; + } break; case VHOST_USER_SET_FEATURES: @@ -509,10 +494,6 @@ vhost_user_socket_read (clib_file_t * uf) ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE); vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); vui->is_ready = 0; - - /*for (q = 0; q < VHOST_VRING_MAX_N; q++) - vhost_user_vring_close(&vui->vrings[q]); */ - break; case VHOST_USER_SET_MEM_TABLE: @@ -522,10 +503,8 @@ vhost_user_socket_read (clib_file_t * uf) if ((msg.memory.nregions < 1) || (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) { - vu_log_debug (vui, "number of mem regions must be between 1 and %i", VHOST_MEMORY_MAX_NREGIONS); - goto close_socket; } @@ -534,39 +513,50 @@ vhost_user_socket_read (clib_file_t * uf) vu_log_debug (vui, "each memory region must have FD"); goto close_socket; } - unmap_all_mem_regions (vui); + + /* Do the mmap without barrier sync */ + void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]; for (i = 0; i < msg.memory.nregions; i++) { - clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i], - sizeof (vhost_user_memory_region_t)); - long page_sz = get_huge_page_size (fds[i]); /* align size to page */ - ssize_t map_sz = (vui->regions[i].memory_size + - vui->regions[i].mmap_offset + + ssize_t map_sz = (msg.memory.regions[i].memory_size + + msg.memory.regions[i].mmap_offset + page_sz - 1) & ~(page_sz - 1); - vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE, - MAP_SHARED, fds[i], 0); - vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr; - vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr + - vui->regions[i].memory_size; - - vu_log_debug (vui, "map memory region %d addr 0 len 0x%lx fd %d " - "mapped 0x%lx page_sz 0x%x", i, map_sz, fds[i], - vui->region_mmap_addr[i], page_sz); - - if (vui->region_mmap_addr[i] == MAP_FAILED) + region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, fds[i], 0); + if (region_mmap_addr[i] == MAP_FAILED) { vu_log_err (vui, "failed to map memory. errno is %d", errno); + for (j = 0; j < i; j++) + munmap (region_mmap_addr[j], map_sz); goto close_socket; } + vu_log_debug (vui, "map memory region %d addr 0 len 0x%lx fd %d " + "mapped 0x%lx page_sz 0x%x", i, map_sz, fds[i], + region_mmap_addr[i], page_sz); + } + + vlib_worker_thread_barrier_sync (vm); + unmap_all_mem_regions (vui); + for (i = 0; i < msg.memory.nregions; i++) + { + clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i], + sizeof (vhost_user_memory_region_t)); + + vui->region_mmap_addr[i] = region_mmap_addr[i]; + vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr; + vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr + + vui->regions[i].memory_size; + vui->region_mmap_addr[i] += vui->regions[i].mmap_offset; vui->region_mmap_fd[i] = fds[i]; vui->nregions++; } + vlib_worker_thread_barrier_release (vm); break; case VHOST_USER_SET_VRING_NUM: @@ -598,22 +588,22 @@ vhost_user_socket_read (clib_file_t * uf) goto close_socket; } - vui->vrings[msg.state.index].desc = (vring_desc_t *) - map_user_mem (vui, msg.addr.desc_user_addr); - vui->vrings[msg.state.index].used = (vring_used_t *) - map_user_mem (vui, msg.addr.used_user_addr); - vui->vrings[msg.state.index].avail = (vring_avail_t *) - map_user_mem (vui, msg.addr.avail_user_addr); + vring_desc_t *desc = map_user_mem (vui, msg.addr.desc_user_addr); + vring_used_t *used = map_user_mem (vui, msg.addr.used_user_addr); + vring_avail_t *avail = map_user_mem (vui, msg.addr.avail_user_addr); - if ((vui->vrings[msg.state.index].desc == NULL) || - (vui->vrings[msg.state.index].used == NULL) || - (vui->vrings[msg.state.index].avail == NULL)) + if ((desc == NULL) || (used == NULL) || (avail == NULL)) { vu_log_debug (vui, "failed to map user memory for hw_if_index %d", vui->hw_if_index); goto close_socket; } + vlib_worker_thread_barrier_sync (vm); + vui->vrings[msg.state.index].desc = desc; + vui->vrings[msg.state.index].used = used; + vui->vrings[msg.state.index].avail = avail; + vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr; vui->vrings[msg.state.index].log_used = (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0; @@ -621,9 +611,7 @@ vhost_user_socket_read (clib_file_t * uf) /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized in an enabled state. */ if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES))) - { - vui->vrings[msg.state.index].enabled = 1; - } + vui->vrings[msg.state.index].enabled = 1; vui->vrings[msg.state.index].last_used_idx = vui->vrings[msg.state.index].last_avail_idx = @@ -631,6 +619,8 @@ vhost_user_socket_read (clib_file_t * uf) /* tell driver that we don't want interrupts */ vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; + vlib_worker_thread_barrier_release (vm); + vhost_user_update_iface_state (vui); break; case VHOST_USER_SET_OWNER: @@ -709,8 +699,9 @@ vhost_user_socket_read (clib_file_t * uf) //When no kickfd is set, the queue is initialized as started vui->vrings[q].kickfd_idx = ~0; vui->vrings[q].started = 1; + vhost_user_thread_placement (vui, q); } - + vhost_user_update_iface_state (vui); break; case VHOST_USER_SET_VRING_ERR: @@ -731,14 +722,14 @@ vhost_user_socket_read (clib_file_t * uf) } else vui->vrings[q].errfd = -1; - break; case VHOST_USER_SET_VRING_BASE: vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d", vui->hw_if_index, msg.state.index, msg.state.num); - + vlib_worker_thread_barrier_sync (vm); vui->vrings[msg.state.index].last_avail_idx = msg.state.num; + vlib_worker_thread_barrier_release (vm); break; case VHOST_USER_GET_VRING_BASE: @@ -749,6 +740,8 @@ vhost_user_socket_read (clib_file_t * uf) goto close_socket; } + /* protection is needed to prevent rx/tx from changing last_avail_idx */ + vlib_worker_thread_barrier_sync (vm); /* * Copy last_avail_idx from the vring before closing it because * closing the vring also initializes the vring last_avail_idx @@ -757,68 +750,84 @@ vhost_user_socket_read (clib_file_t * uf) msg.flags |= 4; msg.size = sizeof (msg.state); - /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */ + /* + * Spec says: Client must [...] stop ring upon receiving + * VHOST_USER_GET_VRING_BASE + */ vhost_user_vring_close (vui, msg.state.index); + vlib_worker_thread_barrier_release (vm); vu_log_debug (vui, "if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", vui->hw_if_index, msg.state.index, msg.state.num); + n = + send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + { + vu_log_debug (vui, "could not send message response"); + goto close_socket; + } + vhost_user_update_iface_state (vui); break; case VHOST_USER_NONE: vu_log_debug (vui, "if %d msg VHOST_USER_NONE", vui->hw_if_index); - break; case VHOST_USER_SET_LOG_BASE: - { - vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_BASE", - vui->hw_if_index); - - if (msg.size != sizeof (msg.log)) - { - vu_log_debug (vui, "invalid msg size for VHOST_USER_SET_LOG_BASE:" - " %d instead of %d", msg.size, sizeof (msg.log)); - goto close_socket; - } + vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_BASE", + vui->hw_if_index); - if (! - (vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) - { - vu_log_debug (vui, "VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but " - "VHOST_USER_SET_LOG_BASE received"); - goto close_socket; - } + if (msg.size != sizeof (msg.log)) + { + vu_log_debug (vui, "invalid msg size for VHOST_USER_SET_LOG_BASE:" + " %d instead of %d", msg.size, sizeof (msg.log)); + goto close_socket; + } - fd = fds[0]; - /* align size to page */ - long page_sz = get_huge_page_size (fd); - ssize_t map_sz = - (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1); + if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) + { + vu_log_debug (vui, "VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but " + "VHOST_USER_SET_LOG_BASE received"); + goto close_socket; + } - vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); + fd = fds[0]; + /* align size to page */ + long page_sz = get_huge_page_size (fd); + ssize_t map_sz = + (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1); - vu_log_debug (vui, "map log region addr 0 len 0x%lx off 0x%lx fd %d " - "mapped 0x%lx", map_sz, msg.log.offset, fd, - vui->log_base_addr); + void *log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); - if (vui->log_base_addr == MAP_FAILED) - { - vu_log_err (vui, "failed to map memory. errno is %d", errno); - goto close_socket; - } + vu_log_debug (vui, "map log region addr 0 len 0x%lx off 0x%lx fd %d " + "mapped 0x%lx", map_sz, msg.log.offset, fd, + log_base_addr); - vui->log_base_addr += msg.log.offset; - vui->log_size = msg.log.size; + if (log_base_addr == MAP_FAILED) + { + vu_log_err (vui, "failed to map memory. errno is %d", errno); + goto close_socket; + } - msg.flags |= 4; - msg.size = sizeof (msg.u64); + vlib_worker_thread_barrier_sync (vm); + vui->log_base_addr = log_base_addr; + vui->log_base_addr += msg.log.offset; + vui->log_size = msg.log.size; + vlib_worker_thread_barrier_release (vm); - break; - } + msg.flags |= 4; + msg.size = sizeof (msg.u64); + n = + send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + { + vu_log_debug (vui, "could not send message response"); + goto close_socket; + } + break; case VHOST_USER_SET_LOG_FD: vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index); - break; case VHOST_USER_GET_PROTOCOL_FEATURES: @@ -828,14 +837,19 @@ vhost_user_socket_read (clib_file_t * uf) msg.size = sizeof (msg.u64); vu_log_debug (vui, "if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - " "reply 0x%016llx", vui->hw_if_index, msg.u64); + n = + send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + { + vu_log_debug (vui, "could not send message response"); + goto close_socket; + } break; case VHOST_USER_SET_PROTOCOL_FEATURES: vu_log_debug (vui, "if %d msg VHOST_USER_SET_PROTOCOL_FEATURES " "features 0x%016llx", vui->hw_if_index, msg.u64); - vui->protocol_features = msg.u64; - break; case VHOST_USER_GET_QUEUE_NUM: @@ -844,6 +858,13 @@ vhost_user_socket_read (clib_file_t * uf) msg.size = sizeof (msg.u64); vu_log_debug (vui, "if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d", vui->hw_if_index, msg.u64); + n = + send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + { + vu_log_debug (vui, "could not send message response"); + goto close_socket; + } break; case VHOST_USER_SET_VRING_ENABLE: @@ -858,6 +879,8 @@ vhost_user_socket_read (clib_file_t * uf) } vui->vrings[msg.state.index].enabled = msg.state.num; + vhost_user_thread_placement (vui, msg.state.index); + vhost_user_update_iface_state (vui); break; default: @@ -866,26 +889,13 @@ vhost_user_socket_read (clib_file_t * uf) goto close_socket; } - /* if we need to reply */ - if (msg.flags & 4) - { - n = - send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); - if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) - { - vu_log_debug (vui, "could not send message response"); - goto close_socket; - } - } - - vhost_user_update_iface_state (vui); - vlib_worker_thread_barrier_release (vlib_get_main ()); return 0; close_socket: + vlib_worker_thread_barrier_sync (vm); vhost_user_if_disconnect (vui); + vlib_worker_thread_barrier_release (vm); vhost_user_update_iface_state (vui); - vlib_worker_thread_barrier_release (vlib_get_main ()); return 0; } @@ -900,7 +910,6 @@ vhost_user_socket_error (clib_file_t * uf) vu_log_debug (vui, "socket error on if %d", vui->sw_if_index); vlib_worker_thread_barrier_sync (vm); vhost_user_if_disconnect (vui); - vhost_user_rx_thread_placement (); vlib_worker_thread_barrier_release (vm); return 0; } @@ -984,7 +993,7 @@ vhost_user_send_interrupt_process (vlib_main_t * vm, f64 timeout = 3153600000.0 /* 100 years */ ; uword event_type, *event_data = 0; vhost_user_main_t *vum = &vhost_user_main; - u16 *queue; + u16 qid; f64 now, poll_time_remaining; f64 next_timeout; u8 stop_timer = 0; @@ -1022,13 +1031,13 @@ vhost_user_send_interrupt_process (vlib_main_t * vm, /* *INDENT-OFF* */ pool_foreach (vui, vum->vhost_user_interfaces, { next_timeout = timeout; - vec_foreach (queue, vui->rx_queues) + for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid += 2) { - vhost_user_vring_t *rxvq = - &vui->vrings[VHOST_VRING_IDX_RX (*queue)]; - vhost_user_vring_t *txvq = - &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + vhost_user_vring_t *rxvq = &vui->vrings[qid]; + vhost_user_vring_t *txvq = &vui->vrings[qid + 1]; + if (txvq->qid == -1) + continue; if (txvq->n_since_last_int) { if (now >= txvq->int_deadline) @@ -1196,6 +1205,24 @@ vhost_user_term_if (vhost_user_intf_t * vui) for (q = 0; q < VHOST_VRING_MAX_N; q++) { + // Remove existing queue mapping for the interface + if (q & 1) + { + int rv; + vnet_main_t *vnm = vnet_get_main (); + vhost_user_vring_t *txvq = &vui->vrings[q]; + + if (txvq->qid != -1) + { + rv = vnet_hw_interface_unassign_rx_thread (vnm, + vui->hw_if_index, + q >> 1); + if (rv) + vu_log_warn (vui, "unable to unassign interface %d, " + "queue %d: rc=%d", vui->hw_if_index, q >> 1, rv); + } + } + clib_mem_free ((void *) vui->vring_locks[q]); } @@ -1220,7 +1247,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vhost_user_intf_t *vui; int rv = 0; vnet_hw_interface_t *hwif; - u16 *queue; + u16 qid; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_device_class.index) @@ -1231,27 +1258,28 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vu_log_debug (vui, "Deleting vhost-user interface %s (instance %d)", hwif->name, hwif->dev_instance); - vec_foreach (queue, vui->rx_queues) - { - vhost_user_vring_t *txvq; + for (qid = 1; qid < VHOST_VRING_MAX_N / 2; qid += 2) + { + vhost_user_vring_t *txvq = &vui->vrings[qid]; - txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; - if ((vum->ifq_count > 0) && - ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || - (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))) - { - vum->ifq_count--; - // Stop the timer if there is no more interrupt interface/queue - if ((vum->ifq_count == 0) && - (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) - { - vlib_process_signal_event (vm, - vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_STOP_TIMER, 0); - break; - } - } - } + if (txvq->qid == -1) + continue; + if ((vum->ifq_count > 0) && + ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))) + { + vum->ifq_count--; + // Stop the timer if there is no more interrupt interface/queue + if ((vum->ifq_count == 0) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + { + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_STOP_TIMER, 0); + break; + } + } + } // Disable and reset interface vhost_user_term_if (vui); @@ -1467,12 +1495,16 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, } } + /* Protect the uninitialized vui from being dispatched by rx/tx */ + vlib_worker_thread_barrier_sync (vm); pool_get (vhost_user_main.vhost_user_interfaces, vui); - vhost_user_create_ethernet (vnm, vm, vui, hwaddr); + vlib_worker_thread_barrier_release (vm); + vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename, feature_mask, &sw_if_idx); vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000); + vhost_user_rx_thread_placement (vui, 1); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -1711,7 +1743,7 @@ show_vhost_user_command_fn (vlib_main_t * vm, vhost_user_intf_t *vui; u32 hw_if_index, *hw_if_indices = 0; vnet_hw_interface_t *hi; - u16 *queue; + u16 qid; u32 ci; int i, j, q; int show_descr = 0; @@ -1818,20 +1850,24 @@ show_vhost_user_command_fn (vlib_main_t * vm, vlib_cli_output (vm, " rx placement: "); - vec_foreach (queue, vui->rx_queues) - { - vnet_main_t *vnm = vnet_get_main (); - uword thread_index; - vnet_hw_interface_rx_mode mode; - - thread_index = vnet_get_device_input_thread_index (vnm, - vui->hw_if_index, - *queue); - vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode); - vlib_cli_output (vm, " thread %d on vring %d, %U\n", - thread_index, VHOST_VRING_IDX_TX (*queue), - format_vnet_hw_interface_rx_mode, mode); - } + for (qid = 1; qid < VHOST_VRING_MAX_N / 2; qid += 2) + { + vnet_main_t *vnm = vnet_get_main (); + uword thread_index; + vnet_hw_interface_rx_mode mode; + vhost_user_vring_t *txvq = &vui->vrings[qid]; + + if (txvq->qid == -1) + continue; + thread_index = + vnet_get_device_input_thread_index (vnm, vui->hw_if_index, + qid >> 1); + vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, qid >> 1, + &mode); + vlib_cli_output (vm, " thread %d on vring %d, %U\n", + thread_index, qid, + format_vnet_hw_interface_rx_mode, mode); + } vlib_cli_output (vm, " tx placement: %s\n", vui->use_tx_spinlock ? "spin-lock" : "lock-free"); @@ -1986,6 +2022,7 @@ VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { .short_help = "create vhost-user socket [server] " "[feature-mask ] [hwaddr ] [renumber ] ", .function = vhost_user_connect_command_fn, + .is_mp_safe = 1, }; /* *INDENT-ON* */ diff --git a/src/vnet/devices/virtio/vhost_user.h b/src/vnet/devices/virtio/vhost_user.h index f2ed2dffd468..7dadfed23340 100644 --- a/src/vnet/devices/virtio/vhost_user.h +++ b/src/vnet/devices/virtio/vhost_user.h @@ -250,6 +250,14 @@ typedef struct /* The rx queue policy (interrupt/adaptive/polling) for this queue */ u32 mode; + + /* + * It contains the device queue number. -1 if it does not. The idea is + * to not invoke vnet_hw_interface_assign_rx_thread and + * vnet_hw_interface_unassign_rx_thread more than once for the duration of + * the interface even if it is disconnected and reconnected. + */ + i16 qid; } vhost_user_vring_t; #define VHOST_USER_EVENT_START_TIMER 1 @@ -293,9 +301,6 @@ typedef struct /* Whether to use spinlock or per_cpu_tx_qid assignment */ u8 use_tx_spinlock; u16 *per_cpu_tx_qid; - - /* Vector of active rx queues for this interface */ - u16 *rx_queues; } vhost_user_intf_t; typedef struct diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index 016ccbd26878..bd5f7e5773d9 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -244,6 +244,9 @@ vhost_user_api_hookup (vlib_main_t * vm) foreach_vpe_api_msg; #undef _ + /* Mark CREATE_VHOST_USER_IF as mp safe */ + am->is_mp_safe[VL_API_CREATE_VHOST_USER_IF] = 1; + /* * Set up the (msg_name, crc, message-id) table */ From 1dd1a77cfd210a40deb0f5ed5ea59621ac644320 Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Thu, 28 Feb 2019 16:38:53 +0100 Subject: [PATCH 60/70] reassembly fixes merged into one for stable/18.10. Change-Id: Ic389807abaa10433c4ba6f111c5c27d035731fa8 Signed-off-by: Ole Troan --- src/vlib/buffer_funcs.h | 137 ++++++++++++++++ src/vnet/ip/ip6_reassembly.c | 304 ++++++++++++++--------------------- 2 files changed, 260 insertions(+), 181 deletions(-) diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index d8abdf31d79b..7ab41567c4d7 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -1269,6 +1269,143 @@ vlib_buffer_chain_compress (vlib_main_t * vm, (first->flags & VLIB_BUFFER_NEXT_PRESENT)); } +always_inline u32 +vlib_buffer_space_left_at_end (vlib_main_t * vm, vlib_buffer_t * b) +{ + return b->data + VLIB_BUFFER_DATA_SIZE - + ((u8 *) vlib_buffer_get_current (b) + b->current_length); +} + +always_inline u32 +vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *db = b, *sb, *first = b; + int is_cloned = 0; + u32 bytes_left = 0, data_size; + u16 src_left, dst_left, n_buffers = 1; + u8 *dp, *sp; + u32 to_free = 0; + + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) + return 1; + + data_size = VLIB_BUFFER_DATA_SIZE; + + dst_left = vlib_buffer_space_left_at_end (vm, b); + + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + if (b->n_add_refs > 0) + is_cloned = 1; + bytes_left += b->current_length; + n_buffers++; + } + + /* if buffer is cloned, create completely new chain - unless everything fits + * into one buffer */ + if (is_cloned && bytes_left >= dst_left) + { + u32 len = 0; + u32 space_needed = bytes_left - dst_left; + u32 tail; + + if (vlib_buffer_alloc (vm, &tail, 1) == 0) + return 0; + + ++n_buffers; + len += data_size; + b = vlib_get_buffer (vm, tail); + + while (len < space_needed) + { + u32 bi; + if (vlib_buffer_alloc (vm, &bi, 1) == 0) + { + vlib_buffer_free_one (vm, tail); + return 0; + } + b->flags = VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = bi; + b = vlib_get_buffer (vm, bi); + len += data_size; + n_buffers++; + } + sb = vlib_get_buffer (vm, first->next_buffer); + to_free = first->next_buffer; + first->next_buffer = tail; + } + else + sb = vlib_get_buffer (vm, first->next_buffer); + + src_left = sb->current_length; + sp = vlib_buffer_get_current (sb); + dp = vlib_buffer_get_tail (db); + + while (bytes_left) + { + u16 bytes_to_copy; + + if (dst_left == 0) + { + if (db != first) + db->current_data = 0; + db->current_length = dp - (u8 *) vlib_buffer_get_current (db); + ASSERT (db->flags & VLIB_BUFFER_NEXT_PRESENT); + db = vlib_get_buffer (vm, db->next_buffer); + dst_left = data_size; + dp = db->data; + } + + while (src_left == 0) + { + ASSERT (sb->flags & VLIB_BUFFER_NEXT_PRESENT); + sb = vlib_get_buffer (vm, sb->next_buffer); + src_left = sb->current_length; + sp = vlib_buffer_get_current (sb); + } + + bytes_to_copy = clib_min (dst_left, src_left); + + if (dp != sp) + { + if (sb == db) + bytes_to_copy = clib_min (bytes_to_copy, sp - dp); + + clib_memcpy (dp, sp, bytes_to_copy); + } + + src_left -= bytes_to_copy; + dst_left -= bytes_to_copy; + dp += bytes_to_copy; + sp += bytes_to_copy; + bytes_left -= bytes_to_copy; + } + if (db != first) + db->current_data = 0; + db->current_length = dp - (u8 *) vlib_buffer_get_current (db); + + if (is_cloned && to_free) + vlib_buffer_free_one (vm, to_free); + else + { + if (db->flags & VLIB_BUFFER_NEXT_PRESENT) + vlib_buffer_free_one (vm, db->next_buffer); + db->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + b = first; + n_buffers = 1; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + ++n_buffers; + } + } + + first->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + + return n_buffers; +} + #endif /* included_vlib_buffer_funcs_h */ /* diff --git a/src/vnet/ip/ip6_reassembly.c b/src/vnet/ip/ip6_reassembly.c index 50445f2a1816..5d6426395516 100644 --- a/src/vnet/ip/ip6_reassembly.c +++ b/src/vnet/ip/ip6_reassembly.c @@ -32,7 +32,12 @@ #define IP6_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP6_REASS_HT_LOAD_FACTOR (0.75) -static vlib_node_registration_t ip6_reass_node; +typedef enum +{ + IP6_REASS_RC_OK, + IP6_REASS_RC_INTERNAL_ERROR, + IP6_REASS_RC_NO_BUF, +} ip6_reass_rc_t; typedef struct { @@ -51,37 +56,21 @@ typedef struct }; } ip6_reass_key_t; -always_inline u32 -ip6_reass_buffer_get_data_offset_no_check (vlib_buffer_t * b) -{ - vnet_buffer_opaque_t *vnb = vnet_buffer (b); - return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first; -} - always_inline u32 ip6_reass_buffer_get_data_offset (vlib_buffer_t * b) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); - ASSERT (vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first); - return ip6_reass_buffer_get_data_offset_no_check (b); + return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first; } always_inline u16 -ip6_reass_buffer_get_data_len_no_check (vlib_buffer_t * b) +ip6_reass_buffer_get_data_len (vlib_buffer_t * b) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); return clib_min (vnb->ip.reass.range_last, vnb->ip.reass.fragment_last) - (vnb->ip.reass.fragment_first + ip6_reass_buffer_get_data_offset (b)) + 1; } -always_inline u16 -ip6_reass_buffer_get_data_len (vlib_buffer_t * b) -{ - vnet_buffer_opaque_t *vnb = vnet_buffer (b); - ASSERT (vnb->ip.reass.range_last > vnb->ip.reass.fragment_first); - return ip6_reass_buffer_get_data_len_no_check (b); -} - typedef struct { // hash table key @@ -108,7 +97,6 @@ typedef struct { ip6_reass_t *pool; u32 reass_n; - u32 buffers_n; u32 id_counter; clib_spinlock_t lock; } ip6_reass_per_thread_t; @@ -188,8 +176,8 @@ ip6_reass_trace_details (vlib_main_t * vm, u32 bi, vnet_buffer_opaque_t *vnb = vnet_buffer (b); trace->range_first = vnb->ip.reass.range_first; trace->range_last = vnb->ip.reass.range_last; - trace->data_offset = ip6_reass_buffer_get_data_offset_no_check (b); - trace->data_len = ip6_reass_buffer_get_data_len_no_check (b); + trace->data_offset = ip6_reass_buffer_get_data_offset (b); + trace->data_len = ip6_reass_buffer_get_data_len (b); trace->range_bi = bi; } @@ -297,11 +285,12 @@ ip6_reass_free (ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, always_inline void ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, - ip6_reass_t * reass, u32 ** vec_drop_bi) + ip6_reass_t * reass) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; vnet_buffer_opaque_t *range_vnb; + u32 *to_free = NULL; while (~0 != range_bi) { range_b = vlib_get_buffer (vm, range_bi); @@ -309,7 +298,7 @@ ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, u32 bi = range_bi; while (~0 != bi) { - vec_add1 (*vec_drop_bi, bi); + vec_add1 (to_free, bi); vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (b->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -323,12 +312,14 @@ ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, } range_bi = range_vnb->ip.reass.next_range_bi; } + vlib_buffer_free (vm, to_free, vec_len (to_free)); + vec_free (to_free); } always_inline void ip6_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_t * reass, - u32 * icmp_bi, u32 ** vec_timeout) + u32 * icmp_bi) { if (~0 == reass->first_bi) { @@ -358,14 +349,13 @@ ip6_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, ICMP6_time_exceeded_fragment_reassembly_time_exceeded, 0); } - ip6_reass_drop_all (vm, rm, reass, vec_timeout); + ip6_reass_drop_all (vm, rm, reass); } always_inline ip6_reass_t * ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_key_t * k, u32 * icmp_bi, - u32 ** vec_timeout) + ip6_reass_key_t * k, u32 * icmp_bi) { ip6_reass_t *reass = NULL; f64 now = vlib_time_now (rm->vlib_main); @@ -382,7 +372,7 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, reass = pool_elt_at_index (rt->pool, value.value); if (now > reass->last_heard + rm->timeout) { - ip6_reass_on_timeout (vm, node, rm, reass, icmp_bi, vec_timeout); + ip6_reass_on_timeout (vm, node, rm, reass, icmp_bi); ip6_reass_free (rm, rt, reass); reass = NULL; } @@ -430,13 +420,12 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, return reass; } -always_inline void +always_inline ip6_reass_rc_t ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, u32 ** vec_drop_compress, bool is_feature) + ip6_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, + bool is_feature) { - ASSERT (~0 != reass->first_bi); *bi0 = reass->first_bi; *error0 = IP6_ERROR_NONE; ip6_frag_hdr_t *frag_hdr; @@ -445,10 +434,20 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, u32 total_length = 0; u32 buf_cnt = 0; u32 dropped_cnt = 0; + u32 *vec_drop_compress = NULL; + ip6_reass_rc_t rv = IP6_REASS_RC_OK; do { u32 tmp_bi = sub_chain_bi; vlib_buffer_t *tmp = vlib_get_buffer (vm, tmp_bi); + vnet_buffer_opaque_t *vnb = vnet_buffer (tmp); + if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) && + !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first)) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } + u32 data_len = ip6_reass_buffer_get_data_len (tmp); u32 trim_front = vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr) + ip6_reass_buffer_get_data_offset (tmp); @@ -457,12 +456,20 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, if (tmp_bi == reass->first_bi) { /* first buffer - keep ip6 header */ - ASSERT (0 == ip6_reass_buffer_get_data_offset (tmp)); + if (0 != ip6_reass_buffer_get_data_offset (tmp)) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } trim_front = 0; trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len - (vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr)); - ASSERT (vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0); + if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0)) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } } u32 keep_data = vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end; @@ -474,10 +481,13 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, if (trim_front > tmp->current_length) { /* drop whole buffer */ - vec_add1 (*vec_drop_compress, tmp_bi); - ++dropped_cnt; + vec_add1 (vec_drop_compress, tmp_bi); trim_front -= tmp->current_length; - ASSERT (tmp->flags & VLIB_BUFFER_NEXT_PRESENT); + if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT; tmp_bi = tmp->next_buffer; tmp = vlib_get_buffer (vm, tmp_bi); @@ -505,14 +515,22 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, else { keep_data -= tmp->current_length; - ASSERT (tmp->flags & VLIB_BUFFER_NEXT_PRESENT); + if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } } total_length += tmp->current_length; } else { - vec_add1 (*vec_drop_compress, tmp_bi); - ASSERT (reass->first_bi != tmp_bi); + vec_add1 (vec_drop_compress, tmp_bi); + if (reass->first_bi == tmp_bi) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } ++dropped_cnt; } if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) @@ -531,10 +549,18 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } while (~0 != sub_chain_bi); - ASSERT (last_b != NULL); + if (!last_b) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi); - ASSERT (total_length >= first_b->current_length); + if (total_length < first_b->current_length) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } total_length -= first_b->current_length; first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; first_b->total_length_not_including_first_buffer = total_length; @@ -553,7 +579,11 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, { ip->protocol = frag_hdr->next_hdr; } - ASSERT ((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset); + if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset)) + { + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } memmove (frag_hdr, (u8 *) frag_hdr + sizeof (*frag_hdr), first_b->current_length - ip6_frag_hdr_offset - sizeof (ip6_frag_hdr_t)); @@ -561,7 +591,11 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip->payload_length = clib_host_to_net_u16 (total_length + first_b->current_length - sizeof (*ip)); - vlib_buffer_chain_compress (vm, first_b, vec_drop_compress); + if (!vlib_buffer_chain_linearize (vm, first_b)) + { + rv = IP6_REASS_RC_NO_BUF; + goto free_buffers_and_return; + } if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED)) { ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi, FINALIZE, 0); @@ -603,25 +637,10 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length; ip6_reass_free (rm, rt, reass); reass = NULL; -} - -always_inline u32 -ip6_reass_get_buffer_chain_length (vlib_main_t * vm, vlib_buffer_t * b) -{ - u32 len = 0; - while (b) - { - ++len; - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b = vlib_get_buffer (vm, b->next_buffer); - } - else - { - break; - } - } - return len; +free_buffers_and_return: + vlib_buffer_free (vm, vec_drop_compress, vec_len (vec_drop_compress)); + vec_free (vec_drop_compress); + return rv; } always_inline void @@ -649,16 +668,13 @@ ip6_reass_insert_range_in_chain (vlib_main_t * vm, ip6_reass_main_t * rm, reass->first_bi = new_next_bi; } reass->data_len += ip6_reass_buffer_get_data_len (new_next_b); - rt->buffers_n += ip6_reass_get_buffer_chain_length (vm, new_next_b); } -always_inline void +always_inline ip6_reass_rc_t ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, ip6_frag_hdr_t * frag_hdr, - u32 ** vec_drop_overlap, u32 ** vec_drop_compress, - bool is_feature) + ip6_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, + ip6_frag_hdr_t * frag_hdr, bool is_feature) { int consumed = 0; vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); @@ -667,9 +683,13 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, fvnb->ip.reass.ip6_frag_hdr_offset = (u8 *) frag_hdr - (u8 *) vlib_buffer_get_current (fb); ip6_header_t *fip = vlib_buffer_get_current (fb); - ASSERT (fb->current_length > sizeof (*fip)); - ASSERT (fvnb->ip.reass.ip6_frag_hdr_offset > 0 && - fvnb->ip.reass.ip6_frag_hdr_offset < fb->current_length); + if (fb->current_length < sizeof (*fip) || + fvnb->ip.reass.ip6_frag_hdr_offset == 0 || + fvnb->ip.reass.ip6_frag_hdr_offset >= fb->current_length) + { + return IP6_REASS_RC_INTERNAL_ERROR; + } + u32 fragment_first = fvnb->ip.reass.fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr); u32 fragment_length = @@ -698,7 +718,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, } reass->min_fragment_length = clib_net_to_host_u16 (fip->payload_length); *bi0 = ~0; - return; + return IP6_REASS_RC_OK; } reass->min_fragment_length = clib_min (clib_net_to_host_u16 (fip->payload_length), @@ -738,7 +758,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, else { // overlapping fragment - not allowed by RFC 8200 - ip6_reass_drop_all (vm, rm, reass, vec_drop_overlap); + ip6_reass_drop_all (vm, rm, reass); ip6_reass_free (rm, rt, reass); if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { @@ -760,8 +780,8 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, if (~0 != reass->last_packet_octet && reass->data_len == reass->last_packet_octet + 1) { - ip6_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0, - vec_drop_compress, is_feature); + return ip6_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0, + is_feature); } else { @@ -776,6 +796,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, *error0 = IP6_ERROR_REASS_DUPLICATE_FRAGMENT; } } + return IP6_REASS_RC_OK; } always_inline bool @@ -858,64 +879,15 @@ ip6_reassembly_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - static u32 *vec_timeout = NULL; // indexes of buffers which timed out - static u32 *vec_drop_overlap = NULL; // indexes of buffers dropped due to overlap - static u32 *vec_drop_compress = NULL; // indexes of buffers dropped due to buffer compression - while (n_left_from > 0 || vec_len (vec_timeout) > 0 || - vec_len (vec_drop_overlap) > 0 || vec_len (vec_drop_compress) > 0) + while (n_left_from > 0) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (vec_len (vec_timeout) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_timeout); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP6_ERROR_REASS_TIMEOUT]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP6_REASSEMBLY_NEXT_DROP); - ASSERT (rt->buffers_n > 0); - --rt->buffers_n; - } - - while (vec_len (vec_drop_overlap) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_overlap); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP6_ERROR_REASS_OVERLAPPING_FRAGMENT]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP6_REASSEMBLY_NEXT_DROP); - ASSERT (rt->buffers_n > 0); - --rt->buffers_n; - } - - while (vec_len (vec_drop_compress) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_compress); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP6_ERROR_NONE]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP6_REASSEMBLY_NEXT_DROP); - ASSERT (rt->buffers_n > 0); - --rt->buffers_n; - } - while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t *b0; - u32 next0; + u32 next0 = IP6_REASSEMBLY_NEXT_DROP; u32 error0 = IP6_ERROR_NONE; u32 icmp_bi = ~0; @@ -965,14 +937,25 @@ ip6_reassembly_inline (vlib_main_t * vm, sw_if_index[VLIB_RX] << 32 | frag_hdr->identification; k.as_u64[5] = ip0->protocol; ip6_reass_t *reass = - ip6_reass_find_or_create (vm, node, rm, rt, &k, &icmp_bi, - &vec_timeout); + ip6_reass_find_or_create (vm, node, rm, rt, &k, &icmp_bi); if (reass) { - ip6_reass_update (vm, node, rm, rt, reass, &bi0, &next0, - &error0, frag_hdr, &vec_drop_overlap, - &vec_drop_compress, is_feature); + switch (ip6_reass_update (vm, node, rm, rt, reass, &bi0, &next0, + &error0, frag_hdr, is_feature)) + { + case IP6_REASS_RC_OK: + /* nothing to do here */ + break; + case IP6_REASS_RC_NO_BUF: + /* fallthrough */ + case IP6_REASS_RC_INTERNAL_ERROR: + /* drop everything and start with a clean slate */ + ip6_reass_drop_all (vm, rm, reass); + ip6_reass_free (rm, rt, reass); + goto next_packet; + break; + } } else { @@ -1007,6 +990,7 @@ ip6_reassembly_inline (vlib_main_t * vm, n_left_to_next, icmp_bi, next0); } + next_packet: from += 1; n_left_from -= 1; } @@ -1193,7 +1177,7 @@ ip6_reass_init_function (vlib_main_t * vm) rm->vlib_main = vm; rm->vnet_main = vnet_get_main (); - vec_validate (rm->per_thread_data, vlib_num_workers () + 1); + vec_validate (rm->per_thread_data, vlib_num_workers ()); ip6_reass_per_thread_t *rt; vec_foreach (rt, rm->per_thread_data) { @@ -1257,12 +1241,11 @@ ip6_reass_walk_expired (vlib_main_t * vm, f64 now = vlib_time_now (vm); ip6_reass_t *reass; - u32 *vec_timeout = NULL; int *pool_indexes_to_free = NULL; uword thread_index = 0; int index; - const uword nthreads = os_get_nthreads (); + const uword nthreads = vlib_num_workers () + 1; u32 *vec_icmp_bi = NULL; for (thread_index = 0; thread_index < nthreads; ++thread_index) { @@ -1285,7 +1268,6 @@ ip6_reass_walk_expired (vlib_main_t * vm, { ip6_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); u32 icmp_bi = ~0; - u32 before = vec_len (vec_timeout); vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi); if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) { @@ -1296,15 +1278,10 @@ ip6_reass_walk_expired (vlib_main_t * vm, b->flags &= ~VLIB_BUFFER_IS_TRACED; } } - ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi, &vec_timeout); - u32 after = vec_len (vec_timeout); - ASSERT (rt->buffers_n >= (after - before)); - rt->buffers_n -= (after - before); + ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi); if (~0 != icmp_bi) { vec_add1 (vec_icmp_bi, icmp_bi); - ASSERT (rt->buffers_n > 0); - --rt->buffers_n; } ip6_reass_free (rm, rt, reass); } @@ -1313,39 +1290,6 @@ ip6_reass_walk_expired (vlib_main_t * vm, clib_spinlock_unlock (&rt->lock); } - while (vec_len (vec_timeout) > 0) - { - vlib_frame_t *f = vlib_get_frame_to_node (vm, rm->ip6_drop_idx); - u32 *to_next = vlib_frame_vector_args (f); - u32 n_left_to_next = VLIB_FRAME_SIZE - f->n_vectors; - int trace_frame = 0; - while (vec_len (vec_timeout) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_timeout); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - if (pool_is_free_index (vm->trace_main.trace_buffer_pool, - b->trace_index)) - { - /* the trace is gone, don't trace this buffer anymore */ - b->flags &= ~VLIB_BUFFER_IS_TRACED; - } - else - { - trace_frame = 1; - } - } - b->error = node->errors[IP6_ERROR_REASS_TIMEOUT]; - to_next[0] = bi; - ++f->n_vectors; - to_next += 1; - n_left_to_next -= 1; - } - f->frame_flags |= (trace_frame * VLIB_FRAME_TRACE); - vlib_put_frame_to_node (vm, rm->ip6_drop_idx, f); - } - while (vec_len (vec_icmp_bi) > 0) { vlib_frame_t *f = @@ -1381,7 +1325,6 @@ ip6_reass_walk_expired (vlib_main_t * vm, } vec_free (pool_indexes_to_free); - vec_free (vec_timeout); vec_free (vec_icmp_bi); if (event_data) { @@ -1438,8 +1381,8 @@ format_ip6_reass (u8 * s, va_list * args) "fragment[%u, %u]\n", counter, vnb->ip.reass.range_first, vnb->ip.reass.range_last, bi, - ip6_reass_buffer_get_data_offset_no_check (b), - ip6_reass_buffer_get_data_len_no_check (b), + ip6_reass_buffer_get_data_offset (b), + ip6_reass_buffer_get_data_len (b), vnb->ip.reass.fragment_first, vnb->ip.reass.fragment_last); if (b->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -1472,7 +1415,7 @@ show_ip6_reass (vlib_main_t * vm, unformat_input_t * input, u64 sum_buffers_n = 0; ip6_reass_t *reass; uword thread_index; - const uword nthreads = os_get_nthreads (); + const uword nthreads = vlib_num_workers () + 1; for (thread_index = 0; thread_index < nthreads; ++thread_index) { ip6_reass_per_thread_t *rt = &rm->per_thread_data[thread_index]; @@ -1486,7 +1429,6 @@ show_ip6_reass (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-ON* */ } sum_reass_n += rt->reass_n; - sum_buffers_n += rt->buffers_n; clib_spinlock_unlock (&rt->lock); } vlib_cli_output (vm, "---------------------"); From ca561dffe1888021d48ad8020fdd8c3b13ff17de Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 12 Dec 2018 08:29:41 -0500 Subject: [PATCH 61/70] VPP-1529: check hop-by-hop header length Fix a single packet-of-death case, caught by vlib_buffer_advance() in debug images. Change-Id: I9c107f20d7c053c3e40a0756dd7ca1c3be276a1a Signed-off-by: Dave Barach --- src/plugins/ioam/udp-ping/udp_ping_node.c | 44 +++++++++++++++++++---- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/src/plugins/ioam/udp-ping/udp_ping_node.c b/src/plugins/ioam/udp-ping/udp_ping_node.c index 7a725258d655..59e4511eb7db 100644 --- a/src/plugins/ioam/udp-ping/udp_ping_node.c +++ b/src/plugins/ioam/udp-ping/udp_ping_node.c @@ -38,6 +38,23 @@ typedef enum UDP_PING_N_NEXT, } udp_ping_next_t; +#define foreach_udp_ping_error \ +_(BADHBH, "Malformed hop-by-hop header") + +typedef enum +{ +#define _(sym,str) UDP_PING_ERROR_##sym, + foreach_udp_ping_error +#undef _ + UDP_PING_N_ERROR, +} udp_ping_error_t; + +static char *udp_ping_error_strings[] = { +#define _(sym,string) string, + foreach_udp_ping_error +#undef _ +}; + udp_ping_main_t udp_ping_main; uword @@ -502,15 +519,26 @@ udp_ping_analyse_hbh (vlib_buffer_t * b0, * */ void -udp_ping_local_analyse (vlib_buffer_t * b0, - ip6_header_t * ip0, - ip6_hop_by_hop_header_t * hbh0, u16 * next0) +udp_ping_local_analyse (vlib_node_runtime_t * node, vlib_buffer_t * b0, + ip6_header_t * ip0, ip6_hop_by_hop_header_t * hbh0, + u16 * next0) { ip6_main_t *im = &ip6_main; ip_lookup_main_t *lm = &im->lookup_main; *next0 = UDP_PING_NEXT_IP6_DROP; + /* + * Sanity check: hbh header length must be less than + * b0->current_length. + */ + if (PREDICT_FALSE ((hbh0->length + 1) << 3) >= b0->current_length) + { + *next0 = UDP_PING_NEXT_DROP; + b0->error = node->errors[UDP_PING_ERROR_BADHBH]; + return; + } + if (PREDICT_TRUE (hbh0->protocol == IP_PROTOCOL_UDP)) { ip6_hop_by_hop_option_t *opt0; @@ -600,7 +628,7 @@ udp_ping_local_analyse (vlib_buffer_t * b0, * @par Graph mechanics: buffer, next index usage * * Uses: - * - udp_ping_local_analyse(p0, ip0, hbh0, &next0) + * - udp_ping_local_analyse(node, p0, ip0, hbh0, &next0) * - Checks packet type - request/respnse and process them. * * Next Index: @@ -660,8 +688,8 @@ udp_ping_local_node_fn (vlib_main_t * vm, hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1); - udp_ping_local_analyse (p0, ip0, hbh0, &next0); - udp_ping_local_analyse (p1, ip1, hbh1, &next1); + udp_ping_local_analyse (node, p0, ip0, hbh0, &next0); + udp_ping_local_analyse (node, p1, ip1, hbh1, &next1); if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) { @@ -727,7 +755,7 @@ udp_ping_local_node_fn (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); - udp_ping_local_analyse (p0, ip0, hbh0, &next0); + udp_ping_local_analyse (node, p0, ip0, hbh0, &next0); if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) { @@ -774,6 +802,8 @@ VLIB_REGISTER_NODE (udp_ping_local, static) = .format_trace = format_udp_ping_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_next_nodes = UDP_PING_N_NEXT, + .n_errors = UDP_PING_N_ERROR, + .error_strings = udp_ping_error_strings, .next_nodes = { [UDP_PING_NEXT_DROP] = "error-drop", From 3d09e9992d198c62dee468e5019bf9dd9c2dffca Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Fri, 1 Mar 2019 10:22:14 +0100 Subject: [PATCH 62/70] 1810 version of VPP-1573 fix crash in ip6 reassembly Change-Id: I20d15ee45957e9fb5a7b3e7edd10cd34b308b664 Signed-off-by: Ole Troan --- src/vnet/ip/ip6_reassembly.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/vnet/ip/ip6_reassembly.c b/src/vnet/ip/ip6_reassembly.c index 5d6426395516..0162ad98e635 100644 --- a/src/vnet/ip/ip6_reassembly.c +++ b/src/vnet/ip/ip6_reassembly.c @@ -712,13 +712,9 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, // starting a new reassembly ip6_reass_insert_range_in_chain (vm, rm, rt, reass, prev_range_bi, *bi0); - if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) - { - ip6_reass_add_trace (vm, node, rm, reass, *bi0, RANGE_NEW, 0); - } reass->min_fragment_length = clib_net_to_host_u16 (fip->payload_length); - *bi0 = ~0; - return IP6_REASS_RC_OK; + consumed = 1; + goto check_if_done_maybe; } reass->min_fragment_length = clib_min (clib_net_to_host_u16 (fip->payload_length), @@ -767,9 +763,11 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, } *next0 = IP6_REASSEMBLY_NEXT_DROP; *error0 = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT; + return IP6_REASS_RC_OK; } break; } +check_if_done_maybe: if (consumed) { if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) @@ -792,7 +790,6 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, else { *next0 = IP6_REASSEMBLY_NEXT_DROP; - ; *error0 = IP6_ERROR_REASS_DUPLICATE_FRAGMENT; } } From a0005702c9593e3bd4367ca9d58e52c3bb576e75 Mon Sep 17 00:00:00 2001 From: Ed Kern Date: Tue, 19 Feb 2019 10:27:23 -0700 Subject: [PATCH 63/70] Makefile: correct opensuse 15.0 dep naming This covers both naming option for opensuse leap15 dep package naming Change-Id: I2ade004e44e75c08afde0f3af42b33cd97ce0ebc Signed-off-by: Ed Kern --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 1dedf8804f4c..692ea7bf8a1c 100644 --- a/Makefile +++ b/Makefile @@ -135,8 +135,7 @@ ifeq ($(SUSE_NAME),Tumbleweed) RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv endif ifeq ($(SUSE_ID),15.0) - RPM_SUSE_DEVEL_DEPS = libboost_headers1_68_0-devel-1.68.0 libboost_thread1_68_0-devel-1.68.0 gcc6 - RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv + RPM_SUSE_DEVEL_DEPS = libboost_headers-devel libboost_thread-devel gcc6 else RPM_SUSE_DEVEL_DEPS += libboost_headers1_68_0-devel-1.68.0 gcc6 RPM_SUSE_PYTHON_DEPS += python-virtualenv @@ -145,8 +144,8 @@ endif ifeq ($(OS_ID),opensuse-leap) ifeq ($(SUSE_ID),15.0) - RPM_SUSE_DEVEL_DEPS = libboost_headers-devel libboost_thread-devel gcc6 - RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv + RPM_SUSE_DEVEL_DEPS = libboost_headers-devel libboost_thread-devel gcc + RPM_SUSE_PYTHON_DEPS += python3-ply python2-virtualenv endif endif From 0f867653e4fb7c2cbaf0fa49ff72457f17355e8d Mon Sep 17 00:00:00 2001 From: "Igor Mikhailov (imichail)" Date: Fri, 11 Jan 2019 14:03:53 -0800 Subject: [PATCH 64/70] Fix 'show interface span' field length Allow to display longer interface names, e.g. VirtualEthernet0/0/0.102 The field length (32) is now the same as for 'show interface'. Change-Id: I1cb1efd459acb800bfaeeec40b672c8b17cd8c3d Signed-off-by: Igor Mikhailov (imichail) (cherry picked from commit 0ac827e15c5ee2134a15bf5e023e03967ddcbaa8) --- src/vnet/span/span.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c index b7292cf1e800..fc923e0ce31b 100644 --- a/src/vnet/span/span.c +++ b/src/vnet/span/span.c @@ -206,7 +206,7 @@ show_interfaces_span_command_fn (vlib_main_t * vm, clib_bitmap_t *b = clib_bitmap_dup_or (d, l); if (header) { - vlib_cli_output (vm, "%-20s %-20s %6s %6s", "Source", "Destination", + vlib_cli_output (vm, "%-32s %-32s %6s %6s", "Source", "Destination", "Device", "L2"); header = 0; } @@ -219,7 +219,7 @@ show_interfaces_span_command_fn (vlib_main_t * vm, int l2 = (clib_bitmap_get (lrxm->mirror_ports, i) + clib_bitmap_get (ltxm->mirror_ports, i) * 2); - vlib_cli_output (vm, "%-20v %-20U (%6s) (%6s)", s, + vlib_cli_output (vm, "%-32v %-32U (%6s) (%6s)", s, format_vnet_sw_if_index_name, vnm, i, states[device], states[l2]); vec_reset_length (s); From 43e7d25d9f91dd44b4779358095a3f54ab4f3029 Mon Sep 17 00:00:00 2001 From: Joe Zhou Date: Wed, 6 Mar 2019 23:05:32 -0800 Subject: [PATCH 65/70] assign flood_class to vnet_sw_interface_t template in subif api handle function Change-Id: I352f4a4adcf8771c21530657efcaecb532416612 Signed-off-by: Joe Zhou (cherry picked from commit 715f94ed94638ea883f919361bff7a3f46fd1d1b) --- src/vnet/interface_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 644babef894b..3d8f47d4cb17 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -1129,6 +1129,7 @@ vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp) memset (&template, 0, sizeof (template)); template.type = VNET_SW_INTERFACE_TYPE_SUB; + template.flood_class = VNET_FLOOD_CLASS_NORMAL; template.sup_sw_if_index = hi->sw_if_index; template.sub.id = id; template.sub.eth.raw_flags = 0; @@ -1209,6 +1210,7 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp) memset (&template, 0, sizeof (template)); template.type = VNET_SW_INTERFACE_TYPE_SUB; + template.flood_class = VNET_FLOOD_CLASS_NORMAL; template.sup_sw_if_index = sw_if_index; template.sub.id = sub_id; template.sub.eth.flags.no_tags = mp->no_tags; From a6562a22e40688cc97cb1ec97a59706d02c40082 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 11 Mar 2019 05:34:50 -0700 Subject: [PATCH 66/70] FIB: path parsing, table-id not fib-index (VPP-1586) Change-Id: Ib27952935393163eaabf005c69b1cbc2feca2b98 Signed-off-by: Neale Ranns --- src/vnet/fib/fib_api.c | 12 +++++++++++- test/test_abf.py | 30 +++++++++++++++++++++++++++--- test/test_bier.py | 10 ++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/vnet/fib/fib_api.c b/src/vnet/fib/fib_api.c index 3c832eb01fbd..1a7d6fde5736 100644 --- a/src/vnet/fib/fib_api.c +++ b/src/vnet/fib/fib_api.c @@ -55,10 +55,20 @@ fib_path_api_parse (const vl_api_fib_path_t *in, out->frp_proto = in->afi; // .frp_addr = (NULL == next_hop ? zero_addr : *next_hop), out->frp_sw_if_index = ntohl(in->sw_if_index); - out->frp_fib_index = ntohl(in->table_id); out->frp_weight = in->weight; out->frp_preference = in->preference; + if (DPO_PROTO_IP4 == out->frp_proto || + DPO_PROTO_IP6 == out->frp_proto || + DPO_PROTO_MPLS == out->frp_proto) + { + out->frp_fib_index = fib_table_find (dpo_proto_to_fib(out->frp_proto), + ntohl (in->table_id)); + + if (~0 == out->frp_fib_index) + return (VNET_API_ERROR_NO_SUCH_FIB); + } + /* * the special INVALID label meams we are not recursing via a * label. Exp-null value is never a valid via-label so that diff --git a/test/test_abf.py b/test/test_abf.py index fb30fc3018ce..55eb552803a1 100644 --- a/test/test_abf.py +++ b/test/test_abf.py @@ -3,7 +3,7 @@ from framework import VppTestCase, VppTestRunner from vpp_udp_encap import * from vpp_ip import DpoProto -from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpTable +from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsLabel, VppIpTable from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP @@ -144,9 +144,9 @@ class TestAbf(VppTestCase): def setUp(self): super(TestAbf, self).setUp() - self.create_pg_interfaces(range(4)) + self.create_pg_interfaces(range(5)) - for i in self.pg_interfaces: + for i in self.pg_interfaces[:4]: i.admin_up() i.config_ip4() i.resolve_arp() @@ -266,6 +266,30 @@ def test_abf4(self): self.send_and_assert_no_replies(self.pg1, p_2 * 65, "Detached") + # + # Swap to route via a next-hop in the non-default table + # + table_20 = VppIpTable(self, 20) + table_20.add_vpp_config() + + self.pg4.set_table_ip4(table_20.table_id) + self.pg4.admin_up() + self.pg4.config_ip4() + self.pg4.resolve_arp() + + abf_13 = VppAbfPolicy(self, 13, acl_1, + [VppRoutePath(self.pg4.remote_ip4, + 0xffffffff, + nh_table_id=table_20.table_id)]) + abf_13.add_vpp_config() + attach_5 = VppAbfAttach(self, 13, self.pg0.sw_if_index, 30) + attach_5.add_vpp_config() + + self.send_and_expect(self.pg0, p_1*65, self.pg4) + + self.pg4.unconfig_ip4() + self.pg4.set_table_ip4(0) + def test_abf6(self): """ IPv6 ACL Based Forwarding """ diff --git a/test/test_bier.py b/test/test_bier.py index cc4c9b3ea0ca..5d69ec7cbb4d 100644 --- a/test/test_bier.py +++ b/test/test_bier.py @@ -581,10 +581,12 @@ def bier_e2e(self, hdr_len_id, n_bytes, max_bp): proto=DpoProto.DPO_PROTO_BIER, nh_table_id=8)]) bier_route_1.add_vpp_config() - bier_route_max = VppBierRoute(self, bti, max_bp, - [VppRoutePath("0.0.0.0", - 0xffffffff, - nh_table_id=8)]) + bier_route_max = VppBierRoute( + self, bti, max_bp, + [VppRoutePath("0.0.0.0", + 0xffffffff, + nh_table_id=8, + proto=DpoProto.DPO_PROTO_BIER)]) bier_route_max.add_vpp_config() # From fe18c808e2bf82508448046a2e4f89dfce0c0cee Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 14 Feb 2019 14:58:40 +0000 Subject: [PATCH 67/70] FIB: pass a copy the walk context since it can realloc when new ctx are added. If not we can get some nasty memory corruption. Change-Id: I617709c3013acbcb8aee07dc147894f0de896555 Signed-off-by: Neale Ranns (cherry picked from commit 58085f2f6757e464196b283cc8335ab5cf34a012) --- src/vnet/fib/fib_walk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c index d0942401153f..3fe586e37d68 100644 --- a/src/vnet/fib/fib_walk.c +++ b/src/vnet/fib/fib_walk.c @@ -350,7 +350,9 @@ fib_walk_advance (fib_node_index_t fwi) while (ii < n_ctxs) { - wrc = fib_node_back_walk_one(&sibling, &fwalk->fw_ctx[ii]); + fib_node_back_walk_ctx_t ctx = fwalk->fw_ctx[ii]; + + wrc = fib_node_back_walk_one(&sibling, &ctx); ii++; fwalk = fib_walk_get(fwi); From 1161ddaa6f7136cfbc541d4179420308a590d36e Mon Sep 17 00:00:00 2001 From: "Igor Mikhailov (imichail)" Date: Fri, 29 Mar 2019 19:25:15 -0700 Subject: [PATCH 68/70] SPAN: Add pending frame on current thread, not on main Previously, all frames were put for next node on the main thread, even if the execution was happening on a worker thread. Also, refactor to use API function vnet_get_main() Change-Id: Ibefb1b3871563a78aa30352a37b9216537e15bf7 Signed-off-by: Igor Mikhailov (imichail) (cherry picked from commit 2d6fc6b082c9b4bf9481b58f68def13792822805) --- src/vnet/span/node.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c index 67f1d6e42bba..f4be2e928955 100644 --- a/src/vnet/span/node.c +++ b/src/vnet/span/node.c @@ -35,7 +35,7 @@ format_span_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); span_trace_t *t = va_arg (*args, span_trace_t *); - vnet_main_t *vnm = &vnet_main; + vnet_main_t *vnm = vnet_get_main (); s = format (s, "SPAN: mirrored %U -> %U", format_vnet_sw_if_index_name, vnm, t->src_sw_if_index, format_vnet_sw_if_index_name, vnm, t->mirror_sw_if_index); @@ -67,7 +67,7 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, { vlib_buffer_t *c0; span_main_t *sm = &span_main; - vnet_main_t *vnm = &vnet_main; + vnet_main_t *vnm = vnet_get_main (); u32 *to_mirror_next = 0; u32 i; span_interface_t *si0; @@ -92,8 +92,7 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, if (mirror_frames[i] == 0) { if (sf == SPAN_FEAT_L2) - mirror_frames[i] = vlib_get_frame_to_node (vnm->vlib_main, - l2output_node.index); + mirror_frames[i] = vlib_get_frame_to_node (vm, l2output_node.index); else mirror_frames[i] = vnet_get_frame_to_sw_interface (vnm, i); } @@ -134,7 +133,7 @@ span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, span_feat_t sf) { span_main_t *sm = &span_main; - vnet_main_t *vnm = &vnet_main; + vnet_main_t *vnm = vnet_get_main (); u32 n_left_from, *from, *to_next; u32 n_span_packets = 0; u32 next_index; @@ -262,7 +261,7 @@ span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, continue; if (sf == SPAN_FEAT_L2) - vlib_put_frame_to_node (vnm->vlib_main, l2output_node.index, f); + vlib_put_frame_to_node (vm, l2output_node.index, f); else vnet_put_frame_to_sw_interface (vnm, sw_if_index, f); mirror_frames[sw_if_index] = 0; From c2f8265c1db9daccd3c39e717e55c071e50132c3 Mon Sep 17 00:00:00 2001 From: Michal Cmarada Date: Wed, 14 Nov 2018 14:05:42 +0100 Subject: [PATCH 69/70] VPP-1477: Replace DatatypeConverter.printHexBinary with bytesToHex As of Java 11 javax.xml.bind.DatatypeConverter is no longer part of standard Java distribution, therefore it is replaced by equivalent method. Change-Id: I51726d0d0d02782bd3bb1dbdc54df5bd63bd8f15 Signed-off-by: Michal Cmarada (cherry picked from commit feb7092544a9e49370037b6d90b43e98c65e7a41) --- .../fd/vpp/jvpp/core/examples/L2AclExample.java | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/extras/japi/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java b/extras/japi/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java index f89043a3b0a0..9a17136a6d95 100644 --- a/extras/japi/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java +++ b/extras/japi/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java @@ -34,7 +34,6 @@ import io.fd.vpp.jvpp.core.dto.InputAclSetInterface; import io.fd.vpp.jvpp.core.dto.InputAclSetInterfaceReply; import io.fd.vpp.jvpp.core.future.FutureJVppCoreFacade; -import javax.xml.bind.DatatypeConverter; /** *

Tests L2 ACL creation and read.
Equivalent to the following vppctl commands:
@@ -50,6 +49,8 @@ public class L2AclExample { private static final int LOCAL0_IFACE_ID = 0; + private static final char[] hexArray = "0123456789ABCDEF".toCharArray(); + private static ClassifyAddDelTable createClassifyTable() { ClassifyAddDelTable request = new ClassifyAddDelTable(); @@ -67,6 +68,16 @@ private static ClassifyAddDelTable createClassifyTable() { return request; } + private static String bytesToHex(byte[] bytes) { + char[] hexChars = new char[bytes.length * 2]; + for ( int j = 0; j < bytes.length; j++ ) { + int v = bytes[j] & 0xFF; + hexChars[j * 2] = hexArray[v >>> 4]; + hexChars[j * 2 + 1] = hexArray[v & 0x0F]; + } + return new String(hexChars); + } + private static ClassifyTableInfo createClassifyTableInfoRequest(final int tableId) { ClassifyTableInfo request = new ClassifyTableInfo(); request.tableId = tableId; @@ -120,7 +131,7 @@ private static void print(ClassifyTableIdsReply reply) { private static void print(final ClassifyTableInfoReply reply) { System.out.println(reply); if (reply != null) { - System.out.println("Mask hex: " + DatatypeConverter.printHexBinary(reply.mask)); + System.out.println("Mask hex: " + bytesToHex(reply.mask)); } } @@ -132,7 +143,7 @@ private static void print(final ClassifySessionDetailsReplyDump reply) { System.out.println(reply); reply.classifySessionDetails.forEach(detail -> { System.out.println(detail); - System.out.println("Match hex: " + DatatypeConverter.printHexBinary(detail.match)); + System.out.println("Match hex: " + bytesToHex(detail.match)); }); } From cce845e371f5c61eef9e2e8b64d2d1088c4e4b55 Mon Sep 17 00:00:00 2001 From: Steven Luong Date: Thu, 25 Apr 2019 11:19:49 -0700 Subject: [PATCH 70/70] mp_safe SW_INTERFACE_DUMP, SW_INTERFACE_DETAILS, SW_INTERFACE_TAG_ADD_DEL, BRIDGE_DOMAIN_DUMP, CONTROL_PING, CONTROL_PING_REPLY, and show interface CLI Change-Id: I2927573b66bb5dd134b37ffb72af0e6676750917 Signed-off-by: Steven Luong (cherry picked from commit 15c31921a628c5500cbed2ebc588d7ddbaa970a3) --- src/vnet/interface_api.c | 5 +++++ src/vnet/interface_cli.c | 1 + src/vnet/l2/l2_api.c | 3 +++ src/vpp/api/api.c | 2 ++ 4 files changed, 11 insertions(+) diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 3d8f47d4cb17..8713ae6e48b0 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -1378,6 +1378,11 @@ interface_api_hookup (vlib_main_t * vm) foreach_vpe_api_msg; #undef _ + /* Mark these APIs as mp safe */ + am->is_mp_safe[VL_API_SW_INTERFACE_DUMP] = 1; + am->is_mp_safe[VL_API_SW_INTERFACE_DETAILS] = 1; + am->is_mp_safe[VL_API_SW_INTERFACE_TAG_ADD_DEL] = 1; + /* * Set up the (msg_name, crc, message-id) table */ diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index 360898ea0fb1..fdbc1a82f6fa 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -469,6 +469,7 @@ VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = { .path = "show interface", .short_help = "show interface [address|addr|features|feat] [ [ [..]]] [verbose]", .function = show_sw_interfaces, + .is_mp_safe = 1, }; /* *INDENT-ON* */ diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index eb04459f2347..1e14b1c7a508 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -986,6 +986,9 @@ l2_api_hookup (vlib_main_t * vm) foreach_vpe_api_msg; #undef _ + /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */ + am->is_mp_safe[VL_API_BRIDGE_DOMAIN_DUMP] = 1; + /* * Set up the (msg_name, crc, message-id) table */ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 86865099532d..dca33e99e47d 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -525,6 +525,8 @@ vpe_api_hookup (vlib_main_t * vm) /* * Thread-safe API messages */ + am->is_mp_safe[VL_API_CONTROL_PING] = 1; + am->is_mp_safe[VL_API_CONTROL_PING_REPLY] = 1; am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1; am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;