diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 93ca77c129c35..c2642402b7a14 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -176,10 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */ char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */ struct xen_netif_rx_back_ring rx; struct sk_buff_head rx_queue; - RING_IDX rx_last_skb_slots; - unsigned long status; - struct timer_list rx_stalled; + unsigned int rx_queue_max; + unsigned int rx_queue_len; struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; @@ -199,18 +198,14 @@ struct xenvif_queue { /* Per-queue data for xenvif */ struct xenvif_stats stats; }; +/* Maximum number of Rx slots a to-guest packet may use, including the + * slot needed for GSO meta-data. + */ +#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1) + enum state_bit_shift { /* This bit marks that the vif is connected */ VIF_STATUS_CONNECTED, - /* This bit signals the RX thread that queuing was stopped (in - * start_xmit), and either the timer fired or an RX interrupt came - */ - QUEUE_STATUS_RX_PURGE_EVENT, - /* This bit tells the interrupt handler that this queue was the reason - * for the carrier off, so it should kick the thread. Only queues which - * brought it down can turn on the carrier. - */ - QUEUE_STATUS_RX_STALLED }; struct xenvif { @@ -246,6 +241,14 @@ struct xenvif { struct net_device *dev; }; +struct xenvif_rx_cb { + unsigned long expires; + int meta_slots_used; + bool full_coalesce; +}; + +#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) + static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif) { return to_xenbus_device(vif->dev->dev.parent); @@ -291,6 +294,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue); int xenvif_dealloc_kthread(void *data); +void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); + /* Determine whether the needed number of slots (req) are available, * and set req_event if not. */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index c6759b1ec18d4..a134d52f55b4d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -43,6 +43,9 @@ #define XENVIF_QUEUE_LENGTH 32 #define XENVIF_NAPI_WEIGHT 64 +/* Number of bytes allowed on the internal guest Rx queue. */ +#define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE) + /* This function is used to set SKBTX_DEV_ZEROCOPY as well as * increasing the inflight counter. We need to increase the inflight * counter because core driver calls into xenvif_zerocopy_callback @@ -63,7 +66,8 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && - test_bit(VIF_STATUS_CONNECTED, &vif->status); + test_bit(VIF_STATUS_CONNECTED, &vif->status) && + !vif->disabled; } static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) @@ -104,16 +108,7 @@ int xenvif_poll(struct napi_struct *napi, int budget) static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; - struct netdev_queue *net_queue = - netdev_get_tx_queue(queue->vif->dev, queue->id); - /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR - * the carrier went down and this queue was previously blocked - */ - if (unlikely(netif_tx_queue_stopped(net_queue) || - (!netif_carrier_ok(queue->vif->dev) && - test_bit(QUEUE_STATUS_RX_STALLED, &queue->status)))) - set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); xenvif_kick_thread(queue); return IRQ_HANDLED; @@ -141,24 +136,13 @@ void xenvif_wake_queue(struct xenvif_queue *queue) netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); } -/* Callback to wake the queue's thread and turn the carrier off on timeout */ -static void xenvif_rx_stalled(unsigned long data) -{ - struct xenvif_queue *queue = (struct xenvif_queue *)data; - - if (xenvif_queue_stopped(queue)) { - set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); - xenvif_kick_thread(queue); - } -} - static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; unsigned int num_queues = vif->num_queues; u16 index; - int min_slots_needed; + struct xenvif_rx_cb *cb; BUG_ON(skb->dev != dev); @@ -181,30 +165,10 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) !xenvif_schedulable(vif)) goto drop; - /* At best we'll need one slot for the header and one for each - * frag. - */ - min_slots_needed = 1 + skb_shinfo(skb)->nr_frags; + cb = XENVIF_RX_CB(skb); + cb->expires = jiffies + rx_drain_timeout_jiffies; - /* If the skb is GSO then we'll also need an extra slot for the - * metadata. - */ - if (skb_is_gso(skb)) - min_slots_needed++; - - /* If the skb can't possibly fit in the remaining slots - * then turn off the queue to give the ring a chance to - * drain. - */ - if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) { - queue->rx_stalled.function = xenvif_rx_stalled; - queue->rx_stalled.data = (unsigned long)queue; - netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - mod_timer(&queue->rx_stalled, - jiffies + rx_drain_timeout_jiffies); - } - - skb_queue_tail(&queue->rx_queue, skb); + xenvif_rx_queue_tail(queue, skb); xenvif_kick_thread(queue); return NETDEV_TX_OK; @@ -498,6 +462,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) init_timer(&queue->credit_timeout); queue->credit_window_start = get_jiffies_64(); + queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES; + skb_queue_head_init(&queue->rx_queue); skb_queue_head_init(&queue->tx_queue); @@ -529,8 +495,6 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; } - init_timer(&queue->rx_stalled); - return 0; } @@ -664,7 +628,6 @@ void xenvif_disconnect(struct xenvif *vif) netif_napi_del(&queue->napi); if (queue->task) { - del_timer_sync(&queue->rx_stalled); kthread_stop(queue->task); queue->task = NULL; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 08f65996534cb..57aa3b507d320 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -55,8 +55,8 @@ bool separate_tx_rx_irq = 1; module_param(separate_tx_rx_irq, bool, 0644); -/* When guest ring is filled up, qdisc queues the packets for us, but we have - * to timeout them, otherwise other guests' packets can get stuck there +/* The time that packets can stay on the guest Rx internal queue + * before they are dropped. */ unsigned int rx_drain_timeout_msecs = 10000; module_param(rx_drain_timeout_msecs, uint, 0444); @@ -83,7 +83,6 @@ static void make_tx_response(struct xenvif_queue *queue, s8 st); static inline int tx_work_todo(struct xenvif_queue *queue); -static inline int rx_work_todo(struct xenvif_queue *queue); static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue, u16 id, @@ -163,6 +162,69 @@ bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed) return false; } +void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; + if (queue->rx_queue_len > queue->rx_queue_max) + netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); + + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); +} + +static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + spin_lock_irq(&queue->rx_queue.lock); + + skb = __skb_dequeue(&queue->rx_queue); + if (skb) + queue->rx_queue_len -= skb->len; + + spin_unlock_irq(&queue->rx_queue.lock); + + return skb; +} + +static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) +{ + spin_lock_irq(&queue->rx_queue.lock); + + if (queue->rx_queue_len < queue->rx_queue_max) + netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); + + spin_unlock_irq(&queue->rx_queue.lock); +} + + +static void xenvif_rx_queue_purge(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + while ((skb = xenvif_rx_dequeue(queue)) != NULL) + kfree_skb(skb); +} + +static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + for(;;) { + skb = skb_peek(&queue->rx_queue); + if (!skb) + break; + if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) + break; + xenvif_rx_dequeue(queue); + kfree_skb(skb); + } +} + /* * Returns true if we should start a new receive buffer instead of * adding 'size' bytes to a buffer which currently contains 'offset' @@ -237,13 +299,6 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue, return meta; } -struct xenvif_rx_cb { - int meta_slots_used; - bool full_coalesce; -}; - -#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) - /* * Set up the grant operations for this fragment. If it's a flipping * interface, we also set up the unmap request from here. @@ -587,7 +642,8 @@ static void xenvif_rx_action(struct xenvif_queue *queue) skb_queue_head_init(&rxq); - while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) { + while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX) + && (skb = xenvif_rx_dequeue(queue)) != NULL) { RING_IDX max_slots_needed; RING_IDX old_req_cons; RING_IDX ring_slots_used; @@ -634,15 +690,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue) skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) max_slots_needed++; - /* If the skb may not fit then bail out now */ - if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) { - skb_queue_head(&queue->rx_queue, skb); - need_to_notify = true; - queue->rx_last_skb_slots = max_slots_needed; - break; - } else - queue->rx_last_skb_slots = 0; - old_req_cons = queue->rx.req_cons; XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue); ring_slots_used = queue->rx.req_cons - old_req_cons; @@ -1869,12 +1916,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) } } -static inline int rx_work_todo(struct xenvif_queue *queue) -{ - return (!skb_queue_empty(&queue->rx_queue) && - xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)); -} - static inline int tx_work_todo(struct xenvif_queue *queue) { if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))) @@ -1931,92 +1972,64 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue, return err; } -static void xenvif_start_queue(struct xenvif_queue *queue) +static bool xenvif_have_rx_work(struct xenvif_queue *queue) { - if (xenvif_schedulable(queue->vif)) - xenvif_wake_queue(queue); + return (!skb_queue_empty(&queue->rx_queue) + && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)) + || kthread_should_stop() + || queue->vif->disabled; } -/* Only called from the queue's thread, it handles the situation when the guest - * doesn't post enough requests on the receiving ring. - * First xenvif_start_xmit disables QDisc and start a timer, and then either the - * timer fires, or the guest send an interrupt after posting new request. If it - * is the timer, the carrier is turned off here. - * */ -static void xenvif_rx_purge_event(struct xenvif_queue *queue) +static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) { - /* Either the last unsuccesful skb or at least 1 slot should fit */ - int needed = queue->rx_last_skb_slots ? - queue->rx_last_skb_slots : 1; + struct sk_buff *skb; + long timeout; - /* It is assumed that if the guest post new slots after this, the RX - * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up - * the thread again - */ - set_bit(QUEUE_STATUS_RX_STALLED, &queue->status); - if (!xenvif_rx_ring_slots_available(queue, needed)) { - rtnl_lock(); - if (netif_carrier_ok(queue->vif->dev)) { - /* Timer fired and there are still no slots. Turn off - * everything except the interrupts - */ - netif_carrier_off(queue->vif->dev); - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - if (net_ratelimit()) - netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id); - } else { - /* Probably an another queue already turned the carrier - * off, make sure nothing is stucked in the internal - * queue of this queue - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - } - rtnl_unlock(); - } else if (!netif_carrier_ok(queue->vif->dev)) { - unsigned int num_queues = queue->vif->num_queues; - unsigned int i; - /* The carrier was down, but an interrupt kicked - * the thread again after new requests were - * posted - */ - clear_bit(QUEUE_STATUS_RX_STALLED, - &queue->status); - rtnl_lock(); - netif_carrier_on(queue->vif->dev); - netif_tx_wake_all_queues(queue->vif->dev); - rtnl_unlock(); + skb = skb_peek(&queue->rx_queue); + if (!skb) + return MAX_SCHEDULE_TIMEOUT; - for (i = 0; i < num_queues; i++) { - struct xenvif_queue *temp = &queue->vif->queues[i]; + timeout = XENVIF_RX_CB(skb)->expires - jiffies; + return timeout < 0 ? 0 : timeout; +} - xenvif_napi_schedule_or_enable_events(temp); - } - if (net_ratelimit()) - netdev_err(queue->vif->dev, "Carrier on again\n"); - } else { - /* Queuing were stopped, but the guest posted - * new requests and sent an interrupt - */ - clear_bit(QUEUE_STATUS_RX_STALLED, - &queue->status); - del_timer_sync(&queue->rx_stalled); - xenvif_start_queue(queue); +/* Wait until the guest Rx thread has work. + * + * The timeout needs to be adjusted based on the current head of the + * queue (and not just the head at the beginning). In particular, if + * the queue is initially empty an infinite timeout is used and this + * needs to be reduced when a skb is queued. + * + * This cannot be done with wait_event_timeout() because it only + * calculates the timeout once. + */ +static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) +{ + DEFINE_WAIT(wait); + + if (xenvif_have_rx_work(queue)) + return; + + for (;;) { + long ret; + + prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); + if (xenvif_have_rx_work(queue)) + break; + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); + if (!ret) + break; } + finish_wait(&queue->wq, &wait); } int xenvif_kthread_guest_rx(void *data) { struct xenvif_queue *queue = data; - struct sk_buff *skb; + struct xenvif *vif = queue->vif; - while (!kthread_should_stop()) { - wait_event_interruptible(queue->wq, - rx_work_todo(queue) || - queue->vif->disabled || - test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) || - kthread_should_stop()); + for (;;) { + xenvif_wait_for_rx_work(queue); if (kthread_should_stop()) break; @@ -2028,35 +2041,29 @@ int xenvif_kthread_guest_rx(void *data) * context so we defer it here, if this thread is * associated with queue 0. */ - if (unlikely(queue->vif->disabled && queue->id == 0)) { - xenvif_carrier_off(queue->vif); - } else if (unlikely(queue->vif->disabled)) { - /* kthread_stop() would be called upon this thread soon, - * be a bit proactive - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT, - &queue->status))) { - xenvif_rx_purge_event(queue); - } else if (!netif_carrier_ok(queue->vif->dev)) { - /* Another queue stalled and turned the carrier off, so - * purge the internal queue of queues which were not - * blocked - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; + if (unlikely(vif->disabled && queue->id == 0)) { + xenvif_carrier_off(vif); + xenvif_rx_queue_purge(queue); + continue; } if (!skb_queue_empty(&queue->rx_queue)) xenvif_rx_action(queue); + /* Queued packets may have foreign pages from other + * domains. These cannot be queued indefinitely as + * this would starve guests of grant refs and transmit + * slots. + */ + xenvif_rx_queue_drop_expired(queue); + + xenvif_rx_queue_maybe_wake(queue); + cond_resched(); } /* Bin any remaining skbs */ - while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) - dev_kfree_skb(skb); + xenvif_rx_queue_purge(queue); return 0; } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 9060857c90228..96a754d8e5175 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -52,6 +52,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v) struct xenvif_queue *queue = m->private; struct xen_netif_tx_back_ring *tx_ring = &queue->tx; struct xen_netif_rx_back_ring *rx_ring = &queue->rx; + struct netdev_queue *dev_queue; if (tx_ring->sring) { struct xen_netif_tx_sring *sring = tx_ring->sring; @@ -112,6 +113,13 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v) queue->credit_timeout.expires, jiffies); + dev_queue = netdev_get_tx_queue(queue->vif->dev, queue->id); + + seq_printf(m, "\nRx internal queue: len %u max %u pkts %u %s\n", + queue->rx_queue_len, queue->rx_queue_max, + skb_queue_len(&queue->rx_queue), + netif_tx_queue_stopped(dev_queue) ? "stopped" : "running"); + return 0; }