diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3bafb5124ac08..3e793ac66baf7 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -424,6 +424,8 @@ enum flow_block_binder_type { FLOW_BLOCK_BINDER_TYPE_UNSPEC, FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS, + FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP, + FLOW_BLOCK_BINDER_TYPE_RED_MARK, }; struct flow_block { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ff017e5b3ea27..690a7f49c8f90 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -32,6 +32,12 @@ struct tcf_block_ext_info { u32 block_index; }; +struct tcf_qevent { + struct tcf_block *block; + struct tcf_block_ext_info info; + struct tcf_proto __rcu *filter_chain; +}; + struct tcf_block_cb; bool tcf_queue_work(struct rcu_work *rwork, work_func_t func); @@ -553,6 +559,49 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, void *cb_priv, u32 *flags, unsigned int *in_hw_count); unsigned int tcf_exts_num_actions(struct tcf_exts *exts); +#ifdef CONFIG_NET_CLS_ACT +int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, + enum flow_block_binder_type binder_type, + struct nlattr *block_index_attr, + struct netlink_ext_ack *extack); +void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch); +int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, + struct netlink_ext_ack *extack); +struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, + spinlock_t *root_lock, struct sk_buff **to_free, int *ret); +int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe); +#else +static inline int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, + enum flow_block_binder_type binder_type, + struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static inline void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch) +{ +} + +static inline int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static inline struct sk_buff * +tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, + spinlock_t *root_lock, struct sk_buff **to_free, int *ret) +{ + return skb; +} + +static inline int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe) +{ + return 0; +} +#endif + struct tc_cls_u32_knode { struct tcf_exts *exts; struct tcf_result *res; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c510b03b97513..fceb3d63c9256 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -57,6 +57,7 @@ struct qdisc_skb_head { struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, + spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *sch); unsigned int flags; @@ -241,6 +242,7 @@ struct Qdisc_ops { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, + spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); @@ -788,11 +790,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, #endif } -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { qdisc_calculate_pkt_len(skb, sch); - return sch->enqueue(skb, sch, to_free); + return sch->enqueue(skb, sch, root_lock, to_free); } static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a95f3ae7ab37c..9e7c2c6078456 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -257,6 +257,8 @@ enum { TCA_RED_STAB, TCA_RED_MAX_P, TCA_RED_FLAGS, /* bitfield32 */ + TCA_RED_EARLY_DROP_BLOCK, /* u32 */ + TCA_RED_MARK_BLOCK, /* u32 */ __TCA_RED_MAX, }; diff --git a/net/core/dev.c b/net/core/dev.c index 3a46b86cbd67f..c02bae9278122 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3749,7 +3749,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = q->enqueue(skb, q, NULL, &to_free) & NET_XMIT_MASK; qdisc_run(q); if (unlikely(to_free)) @@ -3792,7 +3792,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = q->enqueue(skb, q, root_lock, &to_free) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5bfa6b985bb89..1b14d5f57e7f6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3748,6 +3748,125 @@ unsigned int tcf_exts_num_actions(struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_num_actions); +#ifdef CONFIG_NET_CLS_ACT +static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr, + u32 *p_block_index, + struct netlink_ext_ack *extack) +{ + *p_block_index = nla_get_u32(block_index_attr); + if (!*p_block_index) { + NL_SET_ERR_MSG(extack, "Block number may not be zero"); + return -EINVAL; + } + + return 0; +} + +int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, + enum flow_block_binder_type binder_type, + struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + u32 block_index; + int err; + + if (!block_index_attr) + return 0; + + err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); + if (err) + return err; + + if (!block_index) + return 0; + + qe->info.binder_type = binder_type; + qe->info.chain_head_change = tcf_chain_head_change_dflt; + qe->info.chain_head_change_priv = &qe->filter_chain; + qe->info.block_index = block_index; + + return tcf_block_get_ext(&qe->block, sch, &qe->info, extack); +} +EXPORT_SYMBOL(tcf_qevent_init); + +void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch) +{ + if (qe->info.block_index) + tcf_block_put_ext(qe->block, sch, &qe->info); +} +EXPORT_SYMBOL(tcf_qevent_destroy); + +int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + u32 block_index; + int err; + + if (!block_index_attr) + return 0; + + err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); + if (err) + return err; + + /* Bounce newly-configured block or change in block. */ + if (block_index != qe->info.block_index) { + NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(tcf_qevent_validate_change); + +struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, + spinlock_t *root_lock, struct sk_buff **to_free, int *ret) +{ + struct tcf_result cl_res; + struct tcf_proto *fl; + + if (!qe->info.block_index) + return skb; + + fl = rcu_dereference_bh(qe->filter_chain); + + if (root_lock) + spin_unlock(root_lock); + + switch (tcf_classify(skb, fl, &cl_res, false)) { + case TC_ACT_SHOT: + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); + *ret = __NET_XMIT_BYPASS; + return NULL; + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + case TC_ACT_TRAP: + __qdisc_drop(skb, to_free); + *ret = __NET_XMIT_STOLEN; + return NULL; + case TC_ACT_REDIRECT: + skb_do_redirect(skb); + *ret = __NET_XMIT_STOLEN; + return NULL; + } + + if (root_lock) + spin_lock(root_lock); + + return skb; +} +EXPORT_SYMBOL(tcf_qevent_handle); + +int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe) +{ + if (!qe->info.block_index) + return 0; + return nla_put_u32(skb, attr_name, qe->info.block_index); +} +EXPORT_SYMBOL(tcf_qevent_dump); +#endif + static __net_init int tcf_net_init(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ee12ca9f55b4f..fb6b16c4e46d6 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -374,7 +374,7 @@ static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, /* --------------------------- Qdisc operations ---------------------------- */ -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct atm_qdisc_data *p = qdisc_priv(sch); @@ -432,7 +432,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, #endif } - ret = qdisc_enqueue(skb, flow->q, to_free); + ret = qdisc_enqueue(skb, flow->q, root_lock, to_free); if (ret != NET_XMIT_SUCCESS) { drop: __maybe_unused if (net_xmit_drop_count(ret)) { diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index a7f7667ae9849..187644657c4f9 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -13,7 +13,7 @@ #include #include -static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { qdisc_drop(skb, sch, to_free); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 65a95cb094e8b..e9c502dd29a27 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1687,7 +1687,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, static void cake_reconfigure(struct Qdisc *sch); -static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cake_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 39b427dc75128..052d4a1af69a4 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -356,7 +356,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) } static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbq_sched_data *q = qdisc_priv(sch); @@ -373,7 +373,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return ret; } - ret = qdisc_enqueue(skb, cl->q, to_free); + ret = qdisc_enqueue(skb, cl->q, root_lock, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; cbq_mark_toplevel(q, cl); diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 2eaac2ff380fa..7af15ebe07f78 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -77,7 +77,7 @@ struct cbs_sched_data { s64 sendslope; /* in bytes/s */ s64 idleslope; /* in bytes/s */ struct qdisc_watchdog watchdog; - int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, + int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); struct Qdisc *qdisc; @@ -85,13 +85,13 @@ struct cbs_sched_data { }; static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct Qdisc *child, + struct Qdisc *child, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); int err; - err = child->ops->enqueue(skb, child, to_free); + err = child->ops->enqueue(skb, child, root_lock, to_free); if (err != NET_XMIT_SUCCESS) return err; @@ -101,16 +101,16 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } -static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, +static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; - return cbs_child_enqueue(skb, sch, qdisc, to_free); + return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free); } -static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, +static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); @@ -124,15 +124,15 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, q->last = ktime_get_ns(); } - return cbs_child_enqueue(skb, sch, qdisc, to_free); + return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free); } -static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); - return q->enqueue(skb, sch, to_free); + return q->enqueue(skb, sch, root_lock, to_free); } /* timediff is in ns, slope is in bytes/s */ diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index bd618b00d3193..baf3faee31aac 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -210,7 +210,7 @@ static bool choke_match_random(const struct choke_sched_data *q, return choke_match_flow(oskb, nskb); } -static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct choke_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 30169b3adbbb0..1d94837abdd83 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -108,7 +108,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) return skb; } -static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct codel_sched_data *q; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 07a2b0b354954..0d5c9a8ec61da 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -337,7 +337,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, return NULL; } -static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -355,7 +355,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 05605b30bef3a..fbe49fffcdbb3 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -198,7 +198,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, /* --------------------------- Qdisc operations ---------------------------- */ -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -267,7 +267,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, } } - err = qdisc_enqueue(skb, p->q, to_free); + err = qdisc_enqueue(skb, p->q, root_lock, to_free); if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index c48f91075b5c6..7a7c50a681154 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -160,7 +160,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) } static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, - struct sk_buff **to_free) + spinlock_t *root_lock, struct sk_buff **to_free) { struct etf_sched_data *q = qdisc_priv(sch); struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index a87e9159338c4..373dc5855d4e8 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -415,7 +415,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch, return &q->classes[band]; } -static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -433,7 +433,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index a579a4131d22d..b4da5b624ad86 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -16,7 +16,7 @@ /* 1 band FIFO pseudo-"scheduler" */ -static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) @@ -25,7 +25,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } -static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { if (likely(sch->q.qlen < sch->limit)) @@ -34,7 +34,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } -static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int prev_backlog; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 2fb76fc0cc31b..a90d745c41e05 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -439,7 +439,7 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); } -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 459a784056c0f..6bf979f955091 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -181,7 +181,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, return idx; } -static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct fq_codel_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index fb760cee824e4..a27a250ab8f9a 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -125,7 +125,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, skb->next = NULL; } -static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct fq_pie_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 265a61d011dfa..715cde1df9e46 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -520,7 +520,7 @@ EXPORT_SYMBOL(netif_carrier_off); cheaper. */ -static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, +static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock, struct sk_buff **to_free) { __qdisc_drop(skb, to_free); @@ -614,7 +614,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, return &priv->q[band]; } -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock, struct sk_buff **to_free) { int band = prio2band[skb->priority & TC_PRIO_MAX]; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 8599c6f31b057..7d67c6cd6605b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -161,7 +161,7 @@ static bool gred_per_vq_red_flags_used(struct gred_sched *table) return false; } -static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct gred_sched_data *q = NULL; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 433f2190960fe..7f6670044f0a9 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1528,8 +1528,8 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) return -1; } -static int -hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, + struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; @@ -1545,7 +1545,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 420ede8753229..ddc6bf1d85d0e 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -368,7 +368,7 @@ static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free) return bucket - q->buckets; } -static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct hhf_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 8184c87da8bec..52fc513688b1b 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -576,7 +576,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) cl->prio_activity = 0; } -static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { int uninitialized_var(ret); @@ -599,7 +599,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, __qdisc_drop(skb, to_free); return ret; #endif - } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, + } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, root_lock, to_free)) != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) { qdisc_qstats_drop(sch); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 1330ad2249317..648611f5c1052 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -57,7 +57,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) } static int -multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct Qdisc *qdisc; @@ -74,7 +74,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } #endif - ret = qdisc_enqueue(skb, qdisc, to_free); + ret = qdisc_enqueue(skb, qdisc, root_lock, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 84f82771cdf5d..8fb17483a34f3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -431,7 +431,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, * NET_XMIT_DROP: queue length didn't change. * NET_XMIT_SUCCESS: one skb was queued. */ -static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct netem_sched_data *q = qdisc_priv(sch); @@ -480,7 +480,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; - rootq->enqueue(skb2, rootq, to_free); + rootq->enqueue(skb2, rootq, root_lock, to_free); q->duplicate = dupsave; rc_drop = NET_XMIT_SUCCESS; } @@ -604,7 +604,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; last_len = segs->len; - rc = qdisc_enqueue(segs, sch, to_free); + rc = qdisc_enqueue(segs, sch, root_lock, to_free); if (rc != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(rc)) qdisc_qstats_drop(sch); @@ -720,7 +720,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct sk_buff *to_free = NULL; int err; - err = qdisc_enqueue(skb, q->qdisc, &to_free); + err = qdisc_enqueue(skb, q->qdisc, NULL, &to_free); kfree_skb_list(to_free); if (err != NET_XMIT_SUCCESS && net_xmit_drop_count(err)) { diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index c65077f0c0f39..b305313b64e30 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -82,7 +82,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, } EXPORT_SYMBOL_GPL(pie_drop_early); -static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct pie_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index cbc2ebca4548c..e5f8b4769b4d6 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -84,7 +84,7 @@ struct plug_sched_data { u32 pkts_to_release; }; -static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct plug_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 647941702f9fc..a3e187f2603c0 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -65,8 +65,8 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) return q->queues[band]; } -static int -prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, + struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct Qdisc *qdisc; @@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } #endif - ret = qdisc_enqueue(skb, qdisc, to_free); + ret = qdisc_enqueue(skb, qdisc, root_lock, to_free); if (ret == NET_XMIT_SUCCESS) { sch->qstats.backlog += len; sch->q.qlen++; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 0b05ac7c848eb..ede854516825c 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1194,7 +1194,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb), gso_segs; @@ -1225,7 +1225,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); if (net_xmit_drop_count(err)) { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 555a1b9e467fc..de2be4d04ed6b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -46,6 +46,8 @@ struct red_sched_data { struct red_vars vars; struct red_stats stats; struct Qdisc *qdisc; + struct tcf_qevent qe_early_drop; + struct tcf_qevent qe_mark; }; #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) @@ -65,7 +67,7 @@ static int red_use_nodrop(struct red_sched_data *q) return q->flags & TC_RED_NODROP; } -static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct red_sched_data *q = qdisc_priv(sch); @@ -92,6 +94,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (INET_ECN_set_ce(skb)) { q->stats.prob_mark++; + skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret); + if (!skb) + return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { q->stats.prob_drop++; goto congestion_drop; @@ -109,6 +114,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (INET_ECN_set_ce(skb)) { q->stats.forced_mark++; + skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret); + if (!skb) + return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { q->stats.forced_drop++; goto congestion_drop; @@ -118,7 +126,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; } - ret = qdisc_enqueue(skb, child, to_free); + ret = qdisc_enqueue(skb, child, root_lock, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; @@ -129,6 +137,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, return ret; congestion_drop: + skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, root_lock, to_free, &ret); + if (!skb) + return NET_XMIT_CN | ret; + qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; } @@ -202,6 +214,8 @@ static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); + tcf_qevent_destroy(&q->qe_mark, sch); + tcf_qevent_destroy(&q->qe_early_drop, sch); del_timer_sync(&q->adapt_timer); red_offload(sch, false); qdisc_put(q->qdisc); @@ -213,14 +227,15 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, [TCA_RED_MAX_P] = { .type = NLA_U32 }, [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS), + [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 }, + [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 }, }; -static int red_change(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) +static int __red_change(struct Qdisc *sch, struct nlattr **tb, + struct netlink_ext_ack *extack) { struct Qdisc *old_child = NULL, *child = NULL; struct red_sched_data *q = qdisc_priv(sch); - struct nlattr *tb[TCA_RED_MAX + 1]; struct nla_bitfield32 flags_bf; struct tc_red_qopt *ctl; unsigned char userbits; @@ -228,14 +243,6 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, int err; u32 max_P; - if (opt == NULL) - return -EINVAL; - - err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, - NULL); - if (err < 0) - return err; - if (tb[TCA_RED_PARMS] == NULL || tb[TCA_RED_STAB] == NULL) return -EINVAL; @@ -323,11 +330,74 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct red_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_RED_MAX + 1]; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, + extack); + if (err < 0) + return err; q->qdisc = &noop_qdisc; q->sch = sch; timer_setup(&q->adapt_timer, red_adaptative_timer, 0); - return red_change(sch, opt, extack); + + err = __red_change(sch, tb, extack); + if (err) + return err; + + err = tcf_qevent_init(&q->qe_early_drop, sch, + FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP, + tb[TCA_RED_EARLY_DROP_BLOCK], extack); + if (err) + goto err_early_drop_init; + + err = tcf_qevent_init(&q->qe_mark, sch, + FLOW_BLOCK_BINDER_TYPE_RED_MARK, + tb[TCA_RED_MARK_BLOCK], extack); + if (err) + goto err_mark_init; + + return 0; + +err_mark_init: + tcf_qevent_destroy(&q->qe_early_drop, sch); +err_early_drop_init: + del_timer_sync(&q->adapt_timer); + red_offload(sch, false); + qdisc_put(q->qdisc); + return err; +} + +static int red_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_RED_MAX + 1]; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, + extack); + if (err < 0) + return err; + + err = tcf_qevent_validate_change(&q->qe_early_drop, + tb[TCA_RED_EARLY_DROP_BLOCK], extack); + if (err) + return err; + + err = tcf_qevent_validate_change(&q->qe_mark, + tb[TCA_RED_MARK_BLOCK], extack); + if (err) + return err; + + return __red_change(sch, tb, extack); } static int red_dump_offload_stats(struct Qdisc *sch) @@ -371,7 +441,9 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || nla_put_bitfield32(skb, TCA_RED_FLAGS, - q->flags, TC_RED_SUPPORTED_FLAGS)) + q->flags, TC_RED_SUPPORTED_FLAGS) || + tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) || + tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop)) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 4074c50ac3d73..d2a6e78262bbc 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -276,7 +276,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, return false; } -static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { @@ -399,7 +399,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } enqueue: - ret = qdisc_enqueue(skb, child, to_free); + ret = qdisc_enqueue(skb, child, root_lock, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 5a6def5e4e6df..46cdefd69e44d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q) } static int -sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash, dropped; diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 7a5e4c4547156..f75f237c44369 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -65,7 +65,7 @@ static u16 calc_new_low_prio(const struct skbprio_sched_data *q) return SKBPRIO_MAX_PRIORITY - 1; } -static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index e981992634ddf..daef2ff60a985 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -410,7 +410,7 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) return txtime; } -static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); @@ -435,7 +435,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; - return qdisc_enqueue(skb, child, to_free); + return qdisc_enqueue(skb, child, root_lock, to_free); } static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 78e79029dc631..c3eb5cdb83a82 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -187,7 +187,7 @@ static int tbf_offload_dump(struct Qdisc *sch) /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ -static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -206,7 +206,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; len += segs->len; - ret = qdisc_enqueue(segs, q->qdisc, to_free); + ret = qdisc_enqueue(segs, q->qdisc, root_lock, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); @@ -221,7 +221,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } -static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -231,10 +231,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_validate_mac_len(skb, q->max_size)) - return tbf_segment(skb, sch, to_free); + return tbf_segment(skb, sch, root_lock, to_free); return qdisc_drop(skb, sch, to_free); } - ret = qdisc_enqueue(skb, q->qdisc, to_free); + ret = qdisc_enqueue(skb, q->qdisc, root_lock, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 689ef6f3ded80..5119646534761 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -72,8 +72,8 @@ struct teql_sched_data { /* "teql*" qdisc routines */ -static int -teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +static int teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, + struct sk_buff **to_free) { struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh new file mode 100755 index 0000000000000..e714bae473fb4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -0,0 +1,492 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends one stream of traffic from H1 through a TBF shaper, to a RED +# within TBF shaper on $swp3. The two shapers have the same configuration, and +# thus the resulting stream should fill all available bandwidth on the latter +# shaper. A second stream is sent from H2 also via $swp3, and used to inject +# additional traffic. Since all available bandwidth is taken, this traffic has +# to go to backlog. +# +# +--------------------------+ +--------------------------+ +# | H1 | | H2 | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | TBF 10Mbps | | | | +# +-----|--------------------+ +-----|--------------------+ +# | | +# +-----|------------------------------------------------|--------------------+ +# | SW | | | +# | +--|------------------------------------------------|----------------+ | +# | | + $swp1 + $swp2 | | +# | | BR | | +# | | | | +# | | + $swp3 | | +# | | | TBF 10Mbps / RED | | +# | +--------------------------------|-----------------------------------+ | +# | | | +# +-----------------------------------|---------------------------------------+ +# | +# +-----|--------------------+ +# | H3 | | +# | + $h1 | +# | 192.0.2.3/28 | +# | | +# +--------------------------+ + +ALL_TESTS=" + ping_ipv4 + ecn_test + ecn_nodrop_test + red_test + red_qevent_test + ecn_qevent_test +" + +NUM_NETIFS=6 +CHECK_TC="yes" +source lib.sh + +BACKLOG=30000 +PKTSZ=1400 + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + mtu_set $h1 10000 + tc qdisc replace dev $h1 root handle 1: tbf \ + rate 10Mbit burst 10K limit 1M +} + +h1_destroy() +{ + tc qdisc del dev $h1 root + mtu_restore $h1 + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + mtu_set $h2 10000 +} + +h2_destroy() +{ + mtu_restore $h2 + simple_if_fini $h2 192.0.2.2/28 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.3/28 + mtu_set $h3 10000 +} + +h3_destroy() +{ + mtu_restore $h3 + simple_if_fini $h3 192.0.2.3/28 +} + +switch_create() +{ + ip link add dev br up type bridge + ip link set dev $swp1 up master br + ip link set dev $swp2 up master br + ip link set dev $swp3 up master br + + mtu_set $swp1 10000 + mtu_set $swp2 10000 + mtu_set $swp3 10000 + + tc qdisc replace dev $swp3 root handle 1: tbf \ + rate 10Mbit burst 10K limit 1M + ip link add name _drop_test up type dummy +} + +switch_destroy() +{ + ip link del dev _drop_test + tc qdisc del dev $swp3 root + + mtu_restore $h3 + mtu_restore $h2 + mtu_restore $h1 + + ip link set dev $swp3 down nomaster + ip link set dev $swp2 down nomaster + ip link set dev $swp1 down nomaster + ip link del dev br +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3_mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.3 " from host 1" + ping_test $h2 192.0.2.3 " from host 2" +} + +get_qdisc_backlog() +{ + qdisc_stats_get $swp3 11: .backlog +} + +get_nmarked() +{ + qdisc_stats_get $swp3 11: .marked +} + +get_qdisc_npackets() +{ + qdisc_stats_get $swp3 11: .packets +} + +get_nmirrored() +{ + link_stats_get _drop_test tx packets +} + +send_packets() +{ + local proto=$1; shift + local pkts=$1; shift + + $MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@" +} + +# This sends traffic in an attempt to build a backlog of $size. Returns 0 on +# success. After 10 failed attempts it bails out and returns 1. It dumps the +# backlog size to stdout. +build_backlog() +{ + local size=$1; shift + local proto=$1; shift + + local i=0 + + while :; do + local cur=$(get_qdisc_backlog) + local diff=$((size - cur)) + local pkts=$(((diff + PKTSZ - 1) / PKTSZ)) + + if ((cur >= size)); then + echo $cur + return 0 + elif ((i++ > 10)); then + echo $cur + return 1 + fi + + send_packets $proto $pkts "$@" + sleep 1 + done +} + +check_marking() +{ + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets) + local nmarked_0=$(get_nmarked) + sleep 5 + local npackets_1=$(get_qdisc_npackets) + local nmarked_1=$(get_nmarked) + + local nmarked_d=$((nmarked_1 - nmarked_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmarked_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +check_mirroring() +{ + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets) + local nmirrored_0=$(get_nmirrored) + sleep 5 + local npackets_1=$(get_qdisc_npackets) + local nmirrored_1=$(get_nmirrored) + + local nmirrored_d=$((nmirrored_1 - nmirrored_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmirrored_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +ecn_test_common() +{ + local name=$1; shift + local limit=$1; shift + local backlog + local pct + + # Build the below-the-limit backlog using UDP. We could use TCP just + # fine, but this way we get a proof that UDP is accepted when queue + # length is below the limit. The main stream is using TCP, and if the + # limit is misconfigured, we would see this traffic being ECN marked. + RET=0 + backlog=$(build_backlog $((2 * limit / 3)) udp) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "$name backlog < limit" + + # Now push TCP, because non-TCP traffic would be early-dropped after the + # backlog crosses the limit, and we want to make sure that the backlog + # is above the limit. + RET=0 + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." + log_test "$name backlog > limit" +} + +do_ecn_test() +{ + local limit=$1; shift + local name=ECN + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + ecn_test_common "$name" $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, it should all get dropped, and backlog + # building should fail. + RET=0 + build_backlog $((2 * limit)) udp >/dev/null + check_fail $? "UDP traffic went into backlog instead of being early-dropped" + log_test "$name backlog > limit: UDP early-dropped" + + stop_traffic + sleep 1 +} + +do_ecn_nodrop_test() +{ + local limit=$1; shift + local name="ECN nodrop" + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + ecn_test_common "$name" $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, in nodrop mode, make sure it goes to + # backlog as well. + RET=0 + build_backlog $((2 * limit)) udp >/dev/null + check_err $? "UDP traffic was early-dropped instead of getting into backlog" + log_test "$name backlog > limit: UDP not dropped" + + stop_traffic + sleep 1 +} + +do_red_test() +{ + local limit=$1; shift + local backlog + local pct + + # Use ECN-capable TCP to verify there's no marking even though the queue + # is above limit. + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + + # Pushing below the queue limit should work. + RET=0 + backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "RED backlog < limit" + + # Pushing above should not. + RET=0 + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_fail $? "Traffic went into backlog instead of being early-dropped" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "RED backlog > limit" + + stop_traffic + sleep 1 +} + +do_red_qevent_test() +{ + local limit=$1; shift + local backlog + local base + local now + local pct + + RET=0 + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t udp -q & + sleep 1 + + tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ + action mirred egress mirror dev _drop_test + + # Push to the queue until it's at the limit. The configured limit is + # rounded by the qdisc, so this is the best we can do to get to the real + # limit. + build_backlog $((3 * limit / 2)) udp >/dev/null + + base=$(get_nmirrored) + send_packets udp 100 + sleep 1 + now=$(get_nmirrored) + ((now >= base + 100)) + check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen" + + tc filter del block 10 pref 1234 handle 102 matchall + + base=$(get_nmirrored) + send_packets udp 100 + sleep 1 + now=$(get_nmirrored) + ((now == base)) + check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen" + + log_test "RED early_dropped packets mirrored" + + stop_traffic + sleep 1 +} + +do_ecn_qevent_test() +{ + local limit=$1; shift + local name=ECN + + RET=0 + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ + action mirred egress mirror dev _drop_test + + backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_mirroring "== 0") + check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0." + + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_mirroring ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95." + + tc filter del block 10 pref 1234 handle 102 matchall + + log_test "ECN marked packets mirrored" + + stop_traffic + sleep 1 +} + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc replace dev $swp3 parent 1:1 handle 11: red \ + limit 1M avpkt $PKTSZ probability 1 \ + min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 parent 1:1 +} + +ecn_test() +{ + install_qdisc ecn + do_ecn_test $BACKLOG + uninstall_qdisc +} + +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + do_ecn_nodrop_test $BACKLOG + uninstall_qdisc +} + +red_test() +{ + install_qdisc + do_red_test $BACKLOG + uninstall_qdisc +} + +red_qevent_test() +{ + install_qdisc qevent early_drop block 10 + do_red_qevent_test $BACKLOG + uninstall_qdisc +} + +ecn_qevent_test() +{ + install_qdisc ecn qevent mark block 10 + do_ecn_qevent_test $BACKLOG + uninstall_qdisc +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS