diff --git a/Documentation/topics/dpdk/bridge.rst b/Documentation/topics/dpdk/bridge.rst index b79d9871b12..ceee9101548 100644 --- a/Documentation/topics/dpdk/bridge.rst +++ b/Documentation/topics/dpdk/bridge.rst @@ -81,6 +81,30 @@ using the following command:: $ ovs-vsctl get Interface statistics +Simple Match Lookup +------------------- + +There are cases where users might want simple forwarding or drop rules for all +packets received from a specific port, e.g :: + + in_port=1,actions=2 + in_port=2,actions=IN_PORT + in_port=3,vlan_tci=0x1234/0x1fff,actions=drop + in_port=4,actions=push_vlan:0x8100,set_field:4196->vlan_vid,output:3 + +There are also cases where complex OpenFlow rules can be simplified down to +datapath flows with very simple match criteria. + +In theory, for very simple forwarding, OVS doesn't need to parse packets at all +in order to follow these rules. In practice, due to various implementation +constraints, userspace datapath has to match at least on a small set of packet +fileds. Some matching criteria (for example, ingress port) are not related to +the packet itself and others (for example, VLAN tag or Ethernet type) can be +extracted without fully parsing the packet. This allows OVS to significantly +speed up packet forwarding for these flows with simple match criteria. +Statistics on the number of packets matched in this way can be found in a +`simple match hits` counter of `ovs-appctl dpif-netdev/pmd-stats-show` command. + EMC Insertion Probability ------------------------- diff --git a/NEWS b/NEWS index 8b983f516b4..aaaa038b897 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ Post-v2.16.0 --------------------- + - Userspace datapath: + * Optimized flow lookups for datapath flows with simple match criteria. + See 'Simple Match Lookup' in Documentation/topics/dpdk/bridge.rst. - DPDK: * EAL argument --socket-mem is no longer configured by default upon start-up. If dpdk-socket-mem and dpdk-alloc-mem are not specified, diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c index 560f3decb1a..b7131ba3f15 100644 --- a/lib/dpif-netdev-avx512.c +++ b/lib/dpif-netdev-avx512.c @@ -198,7 +198,8 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, if (mfex_hit) { pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf); } else { - pkt_meta[i].tcp_flags = parse_tcp_flags(packet); + pkt_meta[i].tcp_flags = parse_tcp_flags(packet, + NULL, NULL, NULL); } pkt_meta[i].bytes = dp_packet_size(packet); diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c index d7676ea2b27..a2a7d8f0b88 100644 --- a/lib/dpif-netdev-perf.c +++ b/lib/dpif-netdev-perf.c @@ -232,10 +232,10 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0; ds_put_format(str, - " Iterations: %12"PRIu64" (%.2f us/it)\n" - " - Used TSC cycles: %12"PRIu64" (%5.1f %% of total cycles)\n" - " - idle iterations: %12"PRIu64" (%5.1f %% of used cycles)\n" - " - busy iterations: %12"PRIu64" (%5.1f %% of used cycles)\n", + " Iterations: %12"PRIu64" (%.2f us/it)\n" + " - Used TSC cycles: %12"PRIu64" (%5.1f %% of total cycles)\n" + " - idle iterations: %12"PRIu64" (%5.1f %% of used cycles)\n" + " - busy iterations: %12"PRIu64" (%5.1f %% of used cycles)\n", tot_iter, tot_cycles * us_per_cycle / tot_iter, tot_cycles, 100.0 * (tot_cycles / duration) / tsc_hz, idle_iter, @@ -244,16 +244,17 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, 100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles); if (rx_packets > 0) { ds_put_format(str, - " Rx packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n" - " Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n" - " - PHWOL hits: %12"PRIu64" (%5.1f %%)\n" - " - MFEX Opt hits: %12"PRIu64" (%5.1f %%)\n" - " - EMC hits: %12"PRIu64" (%5.1f %%)\n" - " - SMC hits: %12"PRIu64" (%5.1f %%)\n" - " - Megaflow hits: %12"PRIu64" (%5.1f %%, %.2f " - "subtbl lookups/hit)\n" - " - Upcalls: %12"PRIu64" (%5.1f %%, %.1f us/upcall)\n" - " - Lost upcalls: %12"PRIu64" (%5.1f %%)\n", + " Rx packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n" + " Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n" + " - PHWOL hits: %12"PRIu64" (%5.1f %%)\n" + " - MFEX Opt hits: %12"PRIu64" (%5.1f %%)\n" + " - Simple Match hits:%12"PRIu64" (%5.1f %%)\n" + " - EMC hits: %12"PRIu64" (%5.1f %%)\n" + " - SMC hits: %12"PRIu64" (%5.1f %%)\n" + " - Megaflow hits: %12"PRIu64" (%5.1f %%, %.2f " + "subtbl lookups/hit)\n" + " - Upcalls: %12"PRIu64" (%5.1f %%, %.1f us/upcall)\n" + " - Lost upcalls: %12"PRIu64" (%5.1f %%)\n", rx_packets, (rx_packets / duration) / 1000, 1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets, passes, rx_packets ? 1.0 * passes / rx_packets : 0, @@ -261,6 +262,8 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, 100.0 * stats[PMD_STAT_PHWOL_HIT] / passes, stats[PMD_STAT_MFEX_OPT_HIT], 100.0 * stats[PMD_STAT_MFEX_OPT_HIT] / passes, + stats[PMD_STAT_SIMPLE_HIT], + 100.0 * stats[PMD_STAT_SIMPLE_HIT] / passes, stats[PMD_STAT_EXACT_HIT], 100.0 * stats[PMD_STAT_EXACT_HIT] / passes, stats[PMD_STAT_SMC_HIT], @@ -275,16 +278,18 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, stats[PMD_STAT_LOST], 100.0 * stats[PMD_STAT_LOST] / passes); } else { - ds_put_format(str, " Rx packets: %12d\n", 0); + ds_put_format(str, + " Rx packets: %12d\n", 0); } if (tx_packets > 0) { ds_put_format(str, - " Tx packets: %12"PRIu64" (%.0f Kpps)\n" - " Tx batches: %12"PRIu64" (%.2f pkts/batch)\n", + " Tx packets: %12"PRIu64" (%.0f Kpps)\n" + " Tx batches: %12"PRIu64" (%.2f pkts/batch)\n", tx_packets, (tx_packets / duration) / 1000, tx_batches, 1.0 * tx_packets / tx_batches); } else { - ds_put_format(str, " Tx packets: %12d\n\n", 0); + ds_put_format(str, + " Tx packets: %12d\n\n", 0); } } diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h index 834c2626009..9673dddd835 100644 --- a/lib/dpif-netdev-perf.h +++ b/lib/dpif-netdev-perf.h @@ -58,6 +58,7 @@ extern "C" { enum pmd_stat_type { PMD_STAT_PHWOL_HIT, /* Packets that had a partial HWOL hit (phwol). */ PMD_STAT_MFEX_OPT_HIT, /* Packets that had miniflow optimized match. */ + PMD_STAT_SIMPLE_HIT, /* Packets that had a simple match hit. */ PMD_STAT_EXACT_HIT, /* Packets that had an exact match (emc). */ PMD_STAT_SMC_HIT, /* Packets that had a sig match hit (SMC). */ PMD_STAT_MASKED_HIT, /* Packets that matched in the flow table. */ diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h index 30306606758..66016eb0995 100644 --- a/lib/dpif-netdev-private-flow.h +++ b/lib/dpif-netdev-private-flow.h @@ -87,6 +87,8 @@ struct dp_netdev_flow { /* Hash table index by unmasked flow. */ const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */ /* 'flow_table'. */ + const struct cmap_node simple_match_node; /* In dp_netdev_pmd_thread's + 'simple_match_table'. */ const struct cmap_node mark_node; /* In owning flow_mark's mark_to_flow */ const ovs_u128 ufid; /* Unique flow identifier. */ const ovs_u128 mega_ufid; /* Unique mega flow identifier. */ @@ -100,7 +102,8 @@ struct dp_netdev_flow { struct ovs_refcount ref_cnt; bool dead; - uint32_t mark; /* Unique flow mark assigned to a flow */ + uint32_t mark; /* Unique flow mark for netdev offloading. */ + uint64_t simple_match_mark; /* Unique flow mark for the simple match. */ /* Statistics. */ struct dp_netdev_flow_stats stats; diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h index ac4885538c4..020047ea68d 100644 --- a/lib/dpif-netdev-private-thread.h +++ b/lib/dpif-netdev-private-thread.h @@ -26,6 +26,7 @@ #include #include +#include "ccmap.h" #include "cmap.h" #include "dpif-netdev-private-dfc.h" @@ -86,12 +87,18 @@ struct dp_netdev_pmd_thread { /* Flow-Table and classifiers * - * Writers of 'flow_table' must take the 'flow_mutex'. Corresponding - * changes to 'classifiers' must be made while still holding the - * 'flow_mutex'. + * Writers of 'flow_table'/'simple_match_table' and their n* ccmap's must + * take the 'flow_mutex'. Corresponding changes to 'classifiers' must be + * made while still holding the 'flow_mutex'. */ struct ovs_mutex flow_mutex; struct cmap flow_table OVS_GUARDED; /* Flow table. */ + struct cmap simple_match_table OVS_GUARDED; /* Flow table with simple + match flows only. */ + /* Number of flows in the 'flow_table' per in_port. */ + struct ccmap n_flows OVS_GUARDED; + /* Number of flows in the 'simple_match_table' per in_port. */ + struct ccmap n_simple_flows OVS_GUARDED; /* One classifier per in_port polled by the pmd */ struct cmap classifiers; diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man index 607750badfa..8cd84741693 100644 --- a/lib/dpif-netdev-unixctl.man +++ b/lib/dpif-netdev-unixctl.man @@ -11,10 +11,11 @@ Shows performance statistics for one or all pmd threads of the datapath \fIdp\fR. The special thread "main" sums up the statistics of every non pmd thread. -The sum of "emc hits", "smc hits", "megaflow hits" and "miss" is the number of -packet lookups performed by the datapath. Beware that a recirculated packet -experiences one additional lookup per recirculation, so there may be -more lookups than forwarded packets in the datapath. +The sum of "phwol hits", "simple match hits", "emc hits", "smc hits", +"megaflow hits" and "miss" is the number of packet lookups performed by the +datapath. Beware that a recirculated packet experiences one additional lookup +per recirculation, so there may be more lookups than forwarded packets in the +datapath. The MFEX Opt hits displays the number of packets that are processed by the optimized miniflow extract implementations. @@ -140,8 +141,9 @@ pmd thread numa_id 0 core_id 1: Datapath passes: 3599415 (1.50 passes/pkt) - PHWOL hits: 0 ( 0.0 %) - MFEX Opt hits: 3570133 ( 99.2 %) + - Simple Match hits: 0 ( 0.0 %) - EMC hits: 336472 ( 9.3 %) - - SMC hits: 0 ( 0.0 %) + - SMC hits: 0 ( 0.0 %) - Megaflow hits: 3262943 ( 90.7 %, 1.00 subtbl lookups/hit) - Upcalls: 0 ( 0.0 %, 0.0 us/upcall) - Lost upcalls: 0 ( 0.0 %) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a790df5fd69..6c07ffaf48d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -35,6 +35,7 @@ #include #include "bitmap.h" +#include "ccmap.h" #include "cmap.h" #include "conntrack.h" #include "conntrack-tp.h" @@ -560,6 +561,20 @@ pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd); static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow); +static void dp_netdev_simple_match_insert(struct dp_netdev_pmd_thread *pmd, + struct dp_netdev_flow *flow) + OVS_REQUIRES(pmd->flow_mutex); +static void dp_netdev_simple_match_remove(struct dp_netdev_pmd_thread *pmd, + struct dp_netdev_flow *flow) + OVS_REQUIRES(pmd->flow_mutex); + +static bool dp_netdev_flow_is_simple_match(const struct match *); +static bool dp_netdev_simple_match_enabled(const struct dp_netdev_pmd_thread *, + odp_port_t in_port); +static struct dp_netdev_flow *dp_netdev_simple_match_lookup( + const struct dp_netdev_pmd_thread *, + odp_port_t in_port, ovs_be16 dp_type, uint8_t nw_frag, ovs_be16 vlan_tci); + /* Updates the time in PMD threads context and should be called in three cases: * * 1. PMD structure initialization: @@ -659,6 +674,7 @@ pmd_info_show_stats(struct ds *reply, " avg. datapath passes per packet: %.02f\n" " phwol hits: %"PRIu64"\n" " mfex opt hits: %"PRIu64"\n" + " simple match hits: %"PRIu64"\n" " emc hits: %"PRIu64"\n" " smc hits: %"PRIu64"\n" " megaflow hits: %"PRIu64"\n" @@ -668,8 +684,11 @@ pmd_info_show_stats(struct ds *reply, " avg. packets per output batch: %.02f\n", total_packets, stats[PMD_STAT_RECIRC], passes_per_pkt, stats[PMD_STAT_PHWOL_HIT], - stats[PMD_STAT_MFEX_OPT_HIT], stats[PMD_STAT_EXACT_HIT], - stats[PMD_STAT_SMC_HIT], stats[PMD_STAT_MASKED_HIT], + stats[PMD_STAT_MFEX_OPT_HIT], + stats[PMD_STAT_SIMPLE_HIT], + stats[PMD_STAT_EXACT_HIT], + stats[PMD_STAT_SMC_HIT], + stats[PMD_STAT_MASKED_HIT], lookups_per_hit, stats[PMD_STAT_MISS], stats[PMD_STAT_LOST], packets_per_batch); @@ -1956,6 +1975,7 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) stats->n_flows += cmap_count(&pmd->flow_table); pmd_perf_read_counters(&pmd->perf_stats, pmd_stats); stats->n_hit += pmd_stats[PMD_STAT_PHWOL_HIT]; + stats->n_hit += pmd_stats[PMD_STAT_SIMPLE_HIT]; stats->n_hit += pmd_stats[PMD_STAT_EXACT_HIT]; stats->n_hit += pmd_stats[PMD_STAT_SMC_HIT]; stats->n_hit += pmd_stats[PMD_STAT_MASKED_HIT]; @@ -2824,7 +2844,9 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); ovs_assert(cls != NULL); dpcls_remove(cls, &flow->cr); + dp_netdev_simple_match_remove(pmd, flow); cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid)); + ccmap_dec(&pmd->n_flows, odp_to_u32(in_port)); if (flow->mark != INVALID_FLOW_MARK) { queue_netdev_flow_del(pmd, flow); } @@ -3580,6 +3602,177 @@ dp_netdev_get_mega_ufid(const struct match *match, ovs_u128 *mega_ufid) odp_flow_key_hash(&masked_flow, sizeof masked_flow, mega_ufid); } +static uint64_t +dp_netdev_simple_match_mark(odp_port_t in_port, ovs_be16 dl_type, + uint8_t nw_frag, ovs_be16 vlan_tci) +{ + /* Simple Match Mark: + * + * BE: + * +-----------------+-------------++---------+---+-----------+ + * | in_port | dl_type || nw_frag |CFI| VID(12) | + * +-----------------+-------------++---------+---+-----------+ + * 0 32 47 49 51 52 63 + * + * LE: + * +-----------------+-------------+------++-------+---+------+ + * | in_port | dl_type |VID(8)||nw_frag|CFI|VID(4)| + * +-----------------+-------------+------++-------+---+------+ + * 0 32 47 48 55 57 59 60 61 63 + * + * Big Endian Little Endian + * in_port : 32 bits [ 0..31] in_port : 32 bits [ 0..31] + * dl_type : 16 bits [32..47] dl_type : 16 bits [32..47] + * : 1 bit [48..48] vlan VID: 8 bits [48..55] + * nw_frag : 2 bits [49..50] : 1 bit [56..56] + * vlan CFI: 1 bit [51..51] nw_frag : 2 bits [57..59] + * vlan VID: 12 bits [52..63] vlan CFI: 1 bit [60..60] + * vlan VID: 4 bits [61..63] + * + * Layout is different for LE and BE in order to save a couple of + * network to host translations. + * */ + return ((uint64_t) odp_to_u32(in_port) << 32) + | ((OVS_FORCE uint32_t) dl_type << 16) +#if WORDS_BIGENDIAN + | (((uint16_t) nw_frag & FLOW_NW_FRAG_MASK) << VLAN_PCP_SHIFT) +#else + | ((nw_frag & FLOW_NW_FRAG_MASK) << (VLAN_PCP_SHIFT - 8)) +#endif + | (OVS_FORCE uint16_t) (vlan_tci & htons(VLAN_VID_MASK | VLAN_CFI)); +} + +static struct dp_netdev_flow * +dp_netdev_simple_match_lookup(const struct dp_netdev_pmd_thread *pmd, + odp_port_t in_port, ovs_be16 dl_type, + uint8_t nw_frag, ovs_be16 vlan_tci) +{ + uint64_t mark = dp_netdev_simple_match_mark(in_port, dl_type, + nw_frag, vlan_tci); + uint32_t hash = hash_uint64(mark); + struct dp_netdev_flow *flow; + bool found = false; + + CMAP_FOR_EACH_WITH_HASH (flow, simple_match_node, + hash, &pmd->simple_match_table) { + if (flow->simple_match_mark == mark) { + found = true; + break; + } + } + return found ? flow : NULL; +} + +static bool +dp_netdev_simple_match_enabled(const struct dp_netdev_pmd_thread *pmd, + odp_port_t in_port) +{ + return ccmap_find(&pmd->n_flows, odp_to_u32(in_port)) + == ccmap_find(&pmd->n_simple_flows, odp_to_u32(in_port)); +} + +static void +dp_netdev_simple_match_insert(struct dp_netdev_pmd_thread *pmd, + struct dp_netdev_flow *dp_flow) + OVS_REQUIRES(pmd->flow_mutex) +{ + odp_port_t in_port = dp_flow->flow.in_port.odp_port; + ovs_be16 vlan_tci = dp_flow->flow.vlans[0].tci; + ovs_be16 dl_type = dp_flow->flow.dl_type; + uint8_t nw_frag = dp_flow->flow.nw_frag; + + if (!dp_netdev_flow_ref(dp_flow)) { + return; + } + + /* Avoid double insertion. Should not happen in practice. */ + dp_netdev_simple_match_remove(pmd, dp_flow); + + uint64_t mark = dp_netdev_simple_match_mark(in_port, dl_type, + nw_frag, vlan_tci); + uint32_t hash = hash_uint64(mark); + + dp_flow->simple_match_mark = mark; + cmap_insert(&pmd->simple_match_table, + CONST_CAST(struct cmap_node *, &dp_flow->simple_match_node), + hash); + ccmap_inc(&pmd->n_simple_flows, odp_to_u32(in_port)); + + VLOG_DBG("Simple match insert: " + "core_id(%d),in_port(%"PRIu32"),mark(0x%016"PRIx64").", + pmd->core_id, in_port, mark); +} + +static void +dp_netdev_simple_match_remove(struct dp_netdev_pmd_thread *pmd, + struct dp_netdev_flow *dp_flow) + OVS_REQUIRES(pmd->flow_mutex) +{ + odp_port_t in_port = dp_flow->flow.in_port.odp_port; + ovs_be16 vlan_tci = dp_flow->flow.vlans[0].tci; + ovs_be16 dl_type = dp_flow->flow.dl_type; + uint8_t nw_frag = dp_flow->flow.nw_frag; + struct dp_netdev_flow *flow; + uint64_t mark = dp_netdev_simple_match_mark(in_port, dl_type, + nw_frag, vlan_tci); + uint32_t hash = hash_uint64(mark); + + flow = dp_netdev_simple_match_lookup(pmd, in_port, dl_type, + nw_frag, vlan_tci); + if (flow == dp_flow) { + VLOG_DBG("Simple match remove: " + "core_id(%d),in_port(%"PRIu32"),mark(0x%016"PRIx64").", + pmd->core_id, in_port, mark); + cmap_remove(&pmd->simple_match_table, + CONST_CAST(struct cmap_node *, &flow->simple_match_node), + hash); + ccmap_dec(&pmd->n_simple_flows, odp_to_u32(in_port)); + dp_netdev_flow_unref(flow); + } +} + +static bool +dp_netdev_flow_is_simple_match(const struct match *match) +{ + const struct flow *flow = &match->flow; + const struct flow_wildcards *wc = &match->wc; + + if (flow->recirc_id || flow->packet_type != htonl(PT_ETH)) { + return false; + } + + /* Check that flow matches only minimal set of fields that always set. + * Also checking that VLAN VID+CFI is an exact match, because these + * are not mandatory and could be masked. */ + struct flow_wildcards *minimal = xmalloc(sizeof *minimal); + ovs_be16 vlan_tci_mask = htons(VLAN_VID_MASK | VLAN_CFI); + + flow_wildcards_init_catchall(minimal); + /* 'dpif-netdev' always has following in exact match: + * - recirc_id <-- recirc_id == 0 checked on input. + * - in_port <-- Will be checked on input. + * - packet_type <-- Assuming all packets are PT_ETH. + * - dl_type <-- Need to match with. + * - vlan_tci <-- Need to match with. + * - and nw_frag for ip packets. <-- Need to match with. + */ + WC_MASK_FIELD(minimal, recirc_id); + WC_MASK_FIELD(minimal, in_port); + WC_MASK_FIELD(minimal, packet_type); + WC_MASK_FIELD(minimal, dl_type); + WC_MASK_FIELD_MASK(minimal, vlans[0].tci, vlan_tci_mask); + WC_MASK_FIELD_MASK(minimal, nw_frag, FLOW_NW_FRAG_MASK); + + if (flow_wildcards_has_extra(minimal, wc) + || wc->masks.vlans[0].tci != vlan_tci_mask) { + free(minimal); + return false; + } + free(minimal); + + return true; +} + static struct dp_netdev_flow * dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, struct match *match, const ovs_u128 *ufid, @@ -3649,6 +3842,11 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node), dp_netdev_flow_hash(&flow->ufid)); + ccmap_inc(&pmd->n_flows, odp_to_u32(in_port)); + + if (dp_netdev_flow_is_simple_match(match)) { + dp_netdev_simple_match_insert(pmd, flow); + } queue_netdev_flow_put(pmd, flow, match, actions, actions_len, orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD); @@ -3774,7 +3972,7 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) * Netlink and struct flow representations, we have to do the same * here. This must be in sync with 'match' in handle_packet_upcall(). */ if (!match.wc.masks.vlans[0].tci) { - match.wc.masks.vlans[0].tci = htons(0xffff); + match.wc.masks.vlans[0].tci = htons(VLAN_VID_MASK | VLAN_CFI); } /* Must produce a netdev_flow_key for lookup. @@ -6771,6 +6969,9 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, ovs_mutex_init(&pmd->bond_mutex); cmap_init(&pmd->flow_table); cmap_init(&pmd->classifiers); + cmap_init(&pmd->simple_match_table); + ccmap_init(&pmd->n_flows); + ccmap_init(&pmd->n_simple_flows); pmd->ctx.last_rxq = NULL; pmd_thread_ctx_time_update(pmd); pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL; @@ -6824,6 +7025,9 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) } cmap_destroy(&pmd->classifiers); cmap_destroy(&pmd->flow_table); + cmap_destroy(&pmd->simple_match_table); + ccmap_destroy(&pmd->n_flows); + ccmap_destroy(&pmd->n_simple_flows); ovs_mutex_destroy(&pmd->flow_mutex); seq_destroy(pmd->reload_seq); ovs_mutex_destroy(&pmd->port_mutex); @@ -7351,6 +7555,33 @@ dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd, return 0; } +/* Enqueues already classified packet into per-flow batches or the flow map, + * depending on the fact if batching enabled. */ +static inline void +dfc_processing_enqueue_classified_packet(struct dp_packet *packet, + struct dp_netdev_flow *flow, + uint16_t tcp_flags, + bool batch_enable, + struct packet_batch_per_flow *batches, + size_t *n_batches, + struct dp_packet_flow_map *flow_map, + size_t *map_cnt) + +{ + if (OVS_LIKELY(batch_enable)) { + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, + n_batches); + } else { + /* Flow batching should be performed only after fast-path + * processing is also completed for packets with emc miss + * or else it will result in reordering of packets with + * same datapath flows. */ + packet_enqueue_to_flow_map(packet, flow, tcp_flags, + flow_map, (*map_cnt)++); + } + +} + /* Try to process all ('cnt') the 'packets' using only the datapath flow cache * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the * miniflow is copied into 'keys' and the packet pointer is moved at the @@ -7376,25 +7607,32 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, size_t *n_flows, uint8_t *index_map, bool md_is_valid, odp_port_t port_no) { - struct netdev_flow_key *key = &keys[0]; - size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0, n_mfex_opt_hit = 0; + const bool netdev_flow_api = netdev_is_flow_api_enabled(); + const uint32_t recirc_depth = *recirc_depth_get(); + const size_t cnt = dp_packet_batch_size(packets_); + size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0; + size_t n_mfex_opt_hit = 0, n_simple_hit = 0; struct dfc_cache *cache = &pmd->flow_cache; + struct netdev_flow_key *key = &keys[0]; struct dp_packet *packet; - const size_t cnt = dp_packet_batch_size(packets_); - uint32_t cur_min = pmd->ctx.emc_insert_min; - const uint32_t recirc_depth = *recirc_depth_get(); - const bool netdev_flow_api = netdev_is_flow_api_enabled(); - int i; - uint16_t tcp_flags; size_t map_cnt = 0; bool batch_enable = true; + const bool simple_match_enabled = + !md_is_valid && dp_netdev_simple_match_enabled(pmd, port_no); + /* 'simple_match_table' is a full flow table. If the flow is not there, + * upcall is required, and there is no chance to find a match in caches. */ + const bool smc_enable_db = !simple_match_enabled && pmd->ctx.smc_enable_db; + const uint32_t cur_min = simple_match_enabled + ? 0 : pmd->ctx.emc_insert_min; + pmd_perf_update_counter(&pmd->perf_stats, md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV, cnt); - + int i; DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { - struct dp_netdev_flow *flow; + struct dp_netdev_flow *flow = NULL; + uint16_t tcp_flags; if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { dp_packet_delete(packet); @@ -7421,19 +7659,27 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, continue; } if (OVS_LIKELY(flow)) { - tcp_flags = parse_tcp_flags(packet); + tcp_flags = parse_tcp_flags(packet, NULL, NULL, NULL); n_phwol_hit++; - if (OVS_LIKELY(batch_enable)) { - dp_netdev_queue_batches(packet, flow, tcp_flags, batches, - n_batches); - } else { - /* Flow batching should be performed only after fast-path - * processing is also completed for packets with emc miss - * or else it will result in reordering of packets with - * same datapath flows. */ - packet_enqueue_to_flow_map(packet, flow, tcp_flags, - flow_map, map_cnt++); - } + dfc_processing_enqueue_classified_packet( + packet, flow, tcp_flags, batch_enable, + batches, n_batches, flow_map, &map_cnt); + continue; + } + } + + if (!flow && simple_match_enabled) { + ovs_be16 dl_type = 0, vlan_tci = 0; + uint8_t nw_frag = 0; + + tcp_flags = parse_tcp_flags(packet, &dl_type, &nw_frag, &vlan_tci); + flow = dp_netdev_simple_match_lookup(pmd, port_no, dl_type, + nw_frag, vlan_tci); + if (OVS_LIKELY(flow)) { + n_simple_hit++; + dfc_processing_enqueue_classified_packet( + packet, flow, tcp_flags, batch_enable, + batches, n_batches, flow_map, &map_cnt); continue; } } @@ -7450,17 +7696,9 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, if (OVS_LIKELY(flow)) { tcp_flags = miniflow_get_tcp_flags(&key->mf); n_emc_hit++; - if (OVS_LIKELY(batch_enable)) { - dp_netdev_queue_batches(packet, flow, tcp_flags, batches, - n_batches); - } else { - /* Flow batching should be performed only after fast-path - * processing is also completed for packets with emc miss - * or else it will result in reordering of packets with - * same datapath flows. */ - packet_enqueue_to_flow_map(packet, flow, tcp_flags, - flow_map, map_cnt++); - } + dfc_processing_enqueue_classified_packet( + packet, flow, tcp_flags, batch_enable, + batches, n_batches, flow_map, &map_cnt); } else { /* Exact match cache missed. Group missed packets together at * the beginning of the 'packets' array. */ @@ -7488,9 +7726,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT, n_phwol_hit); pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MFEX_OPT_HIT, n_mfex_opt_hit); + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SIMPLE_HIT, + n_simple_hit); pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, n_emc_hit); - if (!pmd->ctx.smc_enable_db) { + if (!smc_enable_db) { return dp_packet_batch_size(packets_); } @@ -7539,7 +7779,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, * Netlink and struct flow representations, we have to do the same * here. This must be in sync with 'match' in dpif_netdev_flow_put(). */ if (!match.wc.masks.vlans[0].tci) { - match.wc.masks.vlans[0].tci = htons(0xffff); + match.wc.masks.vlans[0].tci = htons(VLAN_VID_MASK | VLAN_CFI); } /* We can't allow the packet batching in the next loop to execute diff --git a/lib/flow.c b/lib/flow.c index a021bc0ebaf..dd523c889b7 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1114,22 +1114,29 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) } static ovs_be16 -parse_dl_type(const void **datap, size_t *sizep) +parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p) { union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS]; - parse_vlan(datap, sizep, vlans); + if (parse_vlan(datap, sizep, vlans) && first_vlan_tci_p) { + *first_vlan_tci_p = vlans[0].tci; + } return parse_ethertype(datap, sizep); } /* Parses and return the TCP flags in 'packet', converted to host byte order. * If 'packet' is not an Ethernet packet embedding TCP, returns 0. + * 'dl_type_p' will be set only if the 'packet' is an Ethernet packet. + * 'nw_frag_p' will be set only if the 'packet' is an IP packet. + * 'first_vlan_tci' will be set only if the 'packet' contains vlan header. * * The caller must ensure that 'packet' is at least ETH_HEADER_LEN bytes * long.'*/ uint16_t -parse_tcp_flags(struct dp_packet *packet) +parse_tcp_flags(struct dp_packet *packet, + ovs_be16 *dl_type_p, uint8_t *nw_frag_p, + ovs_be16 *first_vlan_tci_p) { const void *data = dp_packet_data(packet); const char *frame = (const char *)data; @@ -1143,7 +1150,10 @@ parse_tcp_flags(struct dp_packet *packet) dp_packet_reset_offsets(packet); - dl_type = parse_dl_type(&data, &size); + dl_type = parse_dl_type(&data, &size, first_vlan_tci_p); + if (dl_type_p) { + *dl_type_p = dl_type; + } if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { packet->l2_5_ofs = (char *)data - frame; } @@ -1190,6 +1200,10 @@ parse_tcp_flags(struct dp_packet *packet) return 0; } + if (nw_frag_p) { + *nw_frag_p = nw_frag; + } + packet->l4_ofs = (uint16_t)((char *)data - frame); if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP && size >= TCP_HEADER_LEN) { diff --git a/lib/flow.h b/lib/flow.h index 467b2801da1..c647ad83c25 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -134,7 +134,8 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto, uint8_t *nw_frag, const struct ovs_16aligned_ip6_frag **frag_hdr); bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key); -uint16_t parse_tcp_flags(struct dp_packet *packet); +uint16_t parse_tcp_flags(struct dp_packet *packet, ovs_be16 *dl_type_p, + uint8_t *nw_frag_p, ovs_be16 *first_vlan_tci_p); static inline uint64_t flow_get_xreg(const struct flow *flow, int idx) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 4023531256a..26f705c5358 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -2434,7 +2434,7 @@ netdev_offload_dpdk_hw_miss_packet_recover(struct netdev *netdev, ret = EOPNOTSUPP; goto close_vport_netdev; } - parse_tcp_flags(packet); + parse_tcp_flags(packet, NULL, NULL, NULL); if (vport_netdev->netdev_class->pop_header(packet) == NULL) { /* If there is an error with popping the header, the packet is * freed. In this case it should not continue SW processing. diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 53eee185add..b58df036552 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -192,7 +192,7 @@ skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc ovs-appctl revalidator/wait # Dump the datapath flow to see that it goes to p2 ("actions:2"). AT_CHECK([ovs-appctl dpif/dump-flows br0], [0], [dnl -recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=1000,pcp=5),encap(eth_type(0x0800),ipv4(frag=no)), packets:0, bytes:0, used:never, actions:2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=1000,pcp=5/0x0),encap(eth_type(0x0800),ipv4(frag=no)), packets:0, bytes:0, used:never, actions:2 ]) # Delete the flows, then add new flows that would not match the same @@ -210,7 +210,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00: ovs-appctl revalidator/wait # Dump the datapath flow to see that it goes to p1 ("actions:IN_PORT"). AT_CHECK([ovs-appctl dpif/dump-flows br0 | strip_timers], [0], [dnl -recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=1000,pcp=5),encap(eth_type(0x0800),ipv4(frag=no)), packets:1, bytes:64, used:0.0s, actions:1 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=1000,pcp=5/0x0),encap(eth_type(0x0800),ipv4(frag=no)), packets:1, bytes:64, used:0.0s, actions:1 ]) OVS_VSWITCHD_STOP AT_CLEANUP]) @@ -428,7 +428,7 @@ skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc # Check that flow successfully offloaded. OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, mark: 1 +p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000/0x1fff,nw_frag=no, mark: 1 ]) # Check that datapath flow installed successfully. AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl @@ -439,7 +439,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), a # Check for succesfull packet matching with installed offloaded flow. AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000,nw_frag=no with mark: 1 +p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000/0x1fff,nw_frag=no with mark: 1 ]) ovs-appctl revalidator/wait @@ -495,11 +495,11 @@ packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type # Check that flow successfully offloaded. OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 1 +p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 1 ]) # Check that datapath flow installed successfully. AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl -recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0800),ipv4(src=127.0.0.1,proto=17,frag=no),udp(dst=82)), actions: +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7/0x0),encap(eth_type(0x0800),ipv4(src=127.0.0.1,proto=17,frag=no),udp(dst=82)), actions: ]) # Inject the same packet again. AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet --len 64], [0]) @@ -507,13 +507,13 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp= # Check for succesfull packet matching with installed offloaded flow. AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl p1: packet: udp,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=81,tp_dst=82 dnl -matches with flow: recirc_id=0,eth,udp,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 1 +matches with flow: recirc_id=0,eth,udp,dl_vlan=99,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 1 ]) ovs-appctl revalidator/wait # Dump the datapath flow to see that actions was executed for a packet. AT_CHECK([ovs-appctl dpif/dump-flows br0 | strip_timers], [0], [dnl -recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0800),ipv4(src=127.0.0.1,proto=17,frag=no),udp(dst=82)), dnl +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7/0x0),encap(eth_type(0x0800),ipv4(src=127.0.0.1,proto=17,frag=no),udp(dst=82)), dnl packets:1, bytes:64, used:0.0s, actions:set(ipv4(src=192.168.0.7)),set(udp(dst=3773)),1 ]) diff --git a/tests/nsh.at b/tests/nsh.at index b958be253a1..4d49f120170 100644 --- a/tests/nsh.at +++ b/tests/nsh.at @@ -173,7 +173,7 @@ AT_CHECK([ ovs-appctl dpctl/dump-flows dummy@ovs-dummy | strip_used | grep -v ipv6 | sort ], [0], [flow-dump from the main thread: recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:push_vlan(vid=100,pcp=0),push_nsh(flags=0,ttl=63,mdtype=1,np=3,spi=0x0,si=255,c1=0x0,c2=0x0,c3=0x0,c4=0x0),pop_nsh(),recirc(0x4) -recirc_id(0x4),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=100,pcp=0),encap(eth_type(0x0800),ipv4(frag=no)), packets:1, bytes:102, used:0.0s, actions:2 +recirc_id(0x4),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=100),encap(eth_type(0x0800),ipv4(frag=no)), packets:1, bytes:102, used:0.0s, actions:2 ]) OVS_VSWITCHD_STOP diff --git a/tests/pmd.at b/tests/pmd.at index c875a744f54..a2f9d34a2a0 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -380,13 +380,14 @@ dummy@ovs-dummy: hit:0 missed:0 p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=, requested_rx_queues=4, requested_tx_queues=) ]) -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 11], [0], [dnl +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 12], [0], [dnl pmd thread numa_id core_id : packets received: 0 packet recirculations: 0 avg. datapath passes per packet: 0.00 phwol hits: 0 mfex opt hits: 0 + simple match hits: 0 emc hits: 0 smc hits: 0 megaflow hits: 0 @@ -413,13 +414,14 @@ AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: ]) -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 11], [0], [dnl +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 12], [0], [dnl pmd thread numa_id core_id : packets received: 20 packet recirculations: 0 avg. datapath passes per packet: 1.00 phwol hits: 0 mfex opt hits: 0 + simple match hits: 0 emc hits: 19 smc hits: 0 megaflow hits: 0