[Click] [RFC] [PATCH] PollDevice/ToDevice improvement

springbo@cs.wisc.edu springbo at cs.wisc.edu
Thu Nov 15 13:23:15 EST 2007


Hello Joonwoo,

The new patch compiled without any warnings on x86_64.

After an hour of use with an Intel PRO/1000 PT Dual Port Server Adapter
NIC I haven't seen any major problems with the patch so far. It seems to
be working fine.

I did see a Tx Unit Hang when shutting down kclick:
e1000: eth2: e1000_clean_tx_irq: Detected Tx Unit Hang
  Tx Queue             <0>
  TDH                  <cf>
  TDT                  <cf>
  next_to_use          <cf>
  next_to_clean        <62>
buffer_info[next_to_clean]
  time_stamp           <100258906>
  next_to_watch        <62>
  jiffies              <100284bf5>
  next_to_watch.status <1>

I haven't crunched the numbers for performance yet, but I did notice the
machine seemed to be much more responsive.

Thanks again!
Kevin Springborn



> 2007/11/14, Joonwoo Park <joonwpark81 at gmail.com>:
>> I think polling packet method of the click which is implemented with
>> polldevice and todevice is one of great advantage of it.
>> But IMO it has some disadvantage too because click does old fashioned
>> polling packets which is spinning infinitely.
>> Therefore I think that the spinning eats much of processor time and
>> makes influence to other tasks.
>> Then I managed to interrupt driven polling packet method for click which
>> is using NAPI.
>> It makes limited number of interrupts in a sec and notifications to
>> polldevice and todevice.
>> Especially It makes polldevice to use rx descriptors of nic (e1000) too.
>> Actually because old polldevice does spinning infinitely, it does not
>> give chance to nic for queuing.
>>
>
> Some bugs have been fixed.
> - Actually, my last patch included improvement of removing unnecessary
> writing of RDT in e1000_rx_refill. But there was a bug which
> makes rx hang in my work. It has fixed.
> - Fixed to call poll_intr when a rx overrun interrupted.
> - And some improvements, use defined constant value for dev->polling,
> remove compiler warnings. (Thanks Kevin)
>
> Thanks.
>
> p.s. Kevin, Don't you see compiler warning on x86_64 now?
>
> joonwpark.tistory.com
> Joonwoo
>
>
> [PollDevice/ToDevice]: Interrupt driven polling packet method for click
> which is using NAPI.
> It makes limited number of interrupts in a sec and notifications to
> polldevice and todevice.
> Especially It makes polldevice to use rx descriptors of nic (e1000) too.
> Actually because old polldevice does spinning infinitely, it does not give
> chance to nic for queuing.
>
> Signed-off-by: Joonwoo Park <joonwpark81 at gmail.com>
>
> ---
>  drivers/e1000-7.x/src/e1000.h      |    1 +
>  drivers/e1000-7.x/src/e1000_main.c |  100
> ++++++++++++++++++++++++++++-------
>  elements/linuxmodule/polldevice.cc |   75 ++++++++++++++++++++++++---
>  elements/linuxmodule/polldevice.hh |    5 ++
>  elements/linuxmodule/todevice.cc   |    7 ++-
>  elements/linuxmodule/todevice.hh   |    2 +-
>  6 files changed, 158 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/e1000-7.x/src/e1000.h b/drivers/e1000-7.x/src/e1000.h
> index 0b28ddd..28a7d94 100644
> --- a/drivers/e1000-7.x/src/e1000.h
> +++ b/drivers/e1000-7.x/src/e1000.h
> @@ -392,6 +392,7 @@ struct e1000_adapter {
>          unsigned long rx_quiet_jiffies;      /* jiffies timeout for the
> QUIET state */
>          int prev_rdfh;             /* prev value of Rcv Data Fifo Head
> register */
>          int prev_rdft;             /* prev value of Rcv Data Fifo Tail
> register */
> +	int (*poll_intr)(struct net_device *, void *);
>  };
>
>  enum e1000_state_t {
> diff --git a/drivers/e1000-7.x/src/e1000_main.c
> b/drivers/e1000-7.x/src/e1000_main.c
> index 2a3c5f0..bd1fd9d 100644
> --- a/drivers/e1000-7.x/src/e1000_main.c
> +++ b/drivers/e1000-7.x/src/e1000_main.c
> @@ -237,7 +237,7 @@ static int e1000_rx_refill(struct net_device *dev,
> struct sk_buff **);
>  static int e1000_tx_eob(struct net_device *dev);
>  static struct sk_buff *e1000_tx_clean(struct net_device *dev);
>  static struct sk_buff *e1000_rx_poll(struct net_device *dev, int *want);
> -static int e1000_poll_on(struct net_device *dev);
> +static int e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct
> net_device *, void *));
>  static int e1000_poll_off(struct net_device *dev);
>
>  #ifdef CONFIG_NET_POLL_CONTROLLER
> @@ -438,6 +438,13 @@ e1000_irq_enable(struct e1000_adapter *adapter)
>  		E1000_WRITE_FLUSH(&adapter->hw);
>  	}
>  }
> +
> +static void
> +e1000_poll_irq_enable(struct net_device *netdev)
> +{
> +	struct e1000_adapter *adapter = (struct
> e1000_adapter*)netdev_priv(netdev);
> +	e1000_irq_enable(adapter);
> +}
>  #ifdef NETIF_F_HW_VLAN_TX
>
>  static void
> @@ -745,6 +752,12 @@ e1000_reinit_locked(struct e1000_adapter *adapter)
>  	e1000_down(adapter);
>  	e1000_up(adapter);
>  	clear_bit(__E1000_RESETTING, &adapter->flags);
> +
> +	if (adapter->netdev->polling) {
> +		mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
> +		mod_timer(&adapter->watchdog_timer, jiffies + 1);
> +		mod_timer(&adapter->phy_info_timer, jiffies + 1);
> +	}
>  }
>
>  void
> @@ -1045,6 +1058,8 @@ e1000_probe(struct pci_dev *pdev,
>          netdev->tx_clean = e1000_tx_clean;
>          netdev->poll_off = e1000_poll_off;
>          netdev->poll_on = e1000_poll_on;
> +        netdev->poll_irq_enable = e1000_poll_irq_enable;
> +        adapter->poll_intr = NULL;
>
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	netdev->poll_controller = e1000_netpoll;
> @@ -2684,8 +2699,8 @@ e1000_82547_tx_fifo_stall(unsigned long data)
>  static void
>  e1000_watchdog(unsigned long data)
>  {
> -	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
> -  if(adapter->netdev->polling){
> +  struct e1000_adapter *adapter = (struct e1000_adapter *) data;
> +  if(adapter->netdev->polling == 2){
>      adapter->do_poll_watchdog = 1;
>    } else {
>      e1000_watchdog_1(adapter);
> @@ -4055,7 +4070,21 @@ e1000_intr(int irq, void *data)
>  		E1000_WRITE_REG(hw, IMC, ~0);
>  		E1000_WRITE_FLUSH(hw);
>  	}
> -	if (likely(netif_rx_schedule_prep(netdev))) {
> +	if (netdev->polling == CLICK_POLLING_INTERRUPTS) {
> +		if (likely(adapter->poll_intr)) {
> +			unsigned long flags = 0;
> +			if (icr & E1000_ICR_TXDW)
> +				flags |= CLICK_POLL_INTERRUPT_TX;
> +			if (icr & (E1000_ICR_RXT0 | E1000_ICR_RXO))
> +				flags |= CLICK_POLL_INTERRUPT_RX;
> +			if (likely(flags))
> +				adapter->poll_intr(netdev, (void*)flags);
> +			else
> +				e1000_irq_enable(adapter);
> +		}
> +		else
> +			e1000_irq_enable(adapter);
> +	} else if (likely(netif_rx_schedule_prep(netdev))) {
>  		adapter->total_tx_bytes = 0;
>  		adapter->total_tx_packets = 0;
>  		adapter->total_rx_bytes = 0;
> @@ -5700,7 +5729,7 @@ e1000_rx_refill(struct net_device *dev, struct
> sk_buff **skbs)
>    struct pci_dev *pdev = adapter->pdev;
>    struct e1000_rx_desc *rx_desc;
>    struct sk_buff *skb;
> -  int next;
> +  int i;
>
>    /*
>     * Update statistics counters, check link.
> @@ -5719,11 +5748,9 @@ e1000_rx_refill(struct net_device *dev, struct
> sk_buff **skbs)
>    if(skbs == 0)
>      return E1000_DESC_UNUSED(rx_ring);
>
> -  for( next = (rx_ring->next_to_use + 1) % rx_ring->count;
> -       next != rx_ring->next_to_clean;
> -       rx_ring->next_to_use = next,
> -	 next = (rx_ring->next_to_use + 1) % rx_ring->count ) {
> -    int i = rx_ring->next_to_use;
> +  i = rx_ring->next_to_use;
> +
> +  while (1) {
>      if(rx_ring->buffer_info[i].skb != NULL)
>        break;
>
> @@ -5743,14 +5770,20 @@ e1000_rx_refill(struct net_device *dev, struct
> sk_buff **skbs)
>
>      rx_desc = E1000_RX_DESC(*rx_ring, i);
>      rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma);
> -
> -    /* Intel documnetation says: "Software adds receive descriptors by
> -     * writing the tail pointer with the index of the entry beyond the
> -     * last valid descriptor." (ref 11337 p 27) */
>
> -    E1000_WRITE_REG(&adapter->hw, RDT, next);
> +	if (unlikely(++i == rx_ring->count))
> +		i = 0;
> +  }
> +
> +  if (likely(rx_ring->next_to_use != i)) {
> +    rx_ring->next_to_use = i;
> +    if (unlikely(i -- == 0))
> +		i = (rx_ring->count - 1);
> +
> +	wmb();
> +	writel(i, adapter->hw.hw_addr + rx_ring->rdt);
>    }
> -
> +
>    return E1000_DESC_UNUSED(adapter->rx_ring);
>  }
>
> @@ -5780,6 +5813,7 @@ e1000_tx_pqueue(struct net_device *netdev, struct
> sk_buff *skb)
>    if(E1000_DESC_UNUSED(adapter->tx_ring) <= (txd_needed + 1)) {
>      adapter->net_stats.tx_dropped++;
>      netif_stop_queue(netdev);
> +    mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
>      return -1;
>    }
>
> @@ -5883,7 +5917,7 @@ e1000_tx_clean(struct net_device *netdev)
>  }
>
>  static int
> -e1000_poll_on(struct net_device *dev)
> +e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct net_device
> *, void *))
>  {
>    struct e1000_adapter *adapter = dev->priv;
>    unsigned long flags;
> @@ -5891,11 +5925,16 @@ e1000_poll_on(struct net_device *dev)
>    if (!dev->polling) {
>      printk("e1000_poll_on\n");
>      local_irq_save(flags);
> -    local_irq_disable();
> +
> +    set_bit(__E1000_DOWN, &adapter->flags);
> +
>      e1000_irq_disable(adapter);
>
>      /* reset the card - start in a clean state */
>
> +#ifdef CONFIG_E1000_NAPI
> +    netif_poll_disable(dev);
> +#endif
>      /* taken from e1000_down() */
>      e1000_reset(adapter);
>      e1000_clean_tx_ring(adapter, adapter->tx_ring);
> @@ -5907,8 +5946,21 @@ e1000_poll_on(struct net_device *dev)
>      e1000_configure_rx(adapter);
>      e1000_alloc_rx_buffers(adapter, adapter->rx_ring,
>                            E1000_DESC_UNUSED(adapter->rx_ring));
> +#ifdef CONFIG_E1000_NAPI
> +    netif_poll_enable(dev);
> +#endif
> +
> +    if (poll_intr) {
> +      dev->polling = CLICK_POLLING_INTERRUPTS;
> +      e1000_irq_enable(adapter);
> +    } else
> +      dev->polling = CLICK_POLLING_BUSY_WAITING;
> +
> +    adapter->poll_intr = poll_intr;
> +    wmb();
> +
> +    clear_bit(__E1000_DOWN, &adapter->flags);
>
> -    dev->polling = 2;
>      local_irq_restore(flags);
>    }
>
> @@ -5921,8 +5973,16 @@ e1000_poll_off(struct net_device *dev)
>    struct e1000_adapter *adapter = dev->priv;
>
>    if(dev->polling > 0){
> +    unsigned long flags;
> +    local_irq_save(flags);
> +
>      dev->polling = 0;
> -    e1000_irq_enable(adapter);
> +    if (!adapter->poll_intr)
> +	e1000_irq_enable(adapter);
> +    adapter->poll_intr = 0;
> +
> +    local_irq_restore(flags);
> +
>      printk("e1000_poll_off\n");
>    }
>
> diff --git a/elements/linuxmodule/polldevice.cc
> b/elements/linuxmodule/polldevice.cc
> index 7b84712..c0ec4ff 100644
> --- a/elements/linuxmodule/polldevice.cc
> +++ b/elements/linuxmodule/polldevice.cc
> @@ -46,6 +46,35 @@ extern "C" {
>  static int device_notifier_hook(struct notifier_block *nb, unsigned long
> val, void *v);
>  }
>
> +extern AnyDeviceMap to_device_map;
> +
> +int
> +PollDevice::poll_intr(net_device* dev, void *v)
> +{
> +    unsigned long flags = (unsigned long)v;
> +    if (flags & CLICK_POLL_INTERRUPT_TX) {
> +	to_device_map.lock(false);
> +	ToDevice *td = (ToDevice *)to_device_map.lookup(dev, 0);
> +	if (td) {
> +	    td->tx_wake_queue(dev);
> +	}
> +	to_device_map.unlock(false);
> +    }
> +
> +    if (flags & CLICK_POLL_INTERRUPT_RX) {
> +	poll_device_map.lock(false);
> +	PollDevice *pd = (PollDevice *)poll_device_map.lookup(dev, 0);
> +	if (pd) {
> +	    pd->_intr = true;
> +	    pd->_task.reschedule();
> +	}
> +	poll_device_map.unlock(false);
> +    } else
> +	dev->poll_irq_enable(dev);
> +
> +    return 0;
> +}
> +
>  void
>  PollDevice::static_initialize()
>  {
> @@ -64,6 +93,7 @@ PollDevice::static_cleanup()
>
>  PollDevice::PollDevice()
>  {
> +    _intr = false;
>  }
>
>  PollDevice::~PollDevice()
> @@ -74,22 +104,32 @@ PollDevice::~PollDevice()
>  int
>  PollDevice::configure(Vector<String> &conf, ErrorHandler *errh)
>  {
> -    _burst = 8;
>      _headroom = 64;
> +    unsigned burst = 0;
>      bool promisc = false, allow_nonexistent = false, quiet = false,
> timestamp = true;
>      if (cp_va_kparse(conf, this, errh,
>  		     "DEVNAME", cpkP+cpkM, cpString, &_devname,
>  		     "PROMISC", cpkP, cpBool, &promisc,
> -		     "BURST", cpkP, cpUnsigned, &_burst,
> +		     "BURST", cpkP, cpUnsigned, &burst,
>  		     "TIMESTAMP", 0, cpBool, &timestamp,
>  		     "QUIET", 0, cpBool, &quiet,
>  		     "ALLOW_NONEXISTENT", 0, cpBool, &allow_nonexistent,
>  		     "HEADROOM", 0, cpUnsigned, &_headroom,
> +		     "INTERRUPT", 0, cpBool, &_useintr,
>  		     cpEnd) < 0)
>  	return -1;
>      set_device_flags(promisc, timestamp, allow_nonexistent, quiet);
>
>  #if HAVE_LINUX_POLLING
> +    if (burst) {
> +	_burst = burst;
> +	if (_useintr && _burst < 32)
> +	    errh->warning("%s too small burst size for interrupting",
> _devname.c_str());
> +    } else {
> +	/** by default, e1000-napi makes up to 8000 interrupts in a sec.
> +	 * for more performance, burst should be large enough */
> +	_burst = _useintr ? 64 : 8;
> +    }
>      if (find_device(&poll_device_map, errh) < 0)
>  	return -1;
>      if (_dev && (!_dev->poll_on || _dev->polling < 0))
> @@ -124,8 +164,9 @@ you include a ToDevice for the same device. Try
> adding\n\
>
>      if (_dev && !_dev->polling) {
>  	/* turn off interrupt if interrupts weren't already off */
> -	_dev->poll_on(_dev);
> -	if (_dev->polling != 2)
> +	_dev->poll_on(_dev, _useintr ? poll_intr : NULL);
> +	if (_dev->polling != CLICK_POLLING_BUSY_WAITING
> +	  && _dev->polling != CLICK_POLLING_INTERRUPTS)
>  	    return errh->error("PollDevice detected wrong version of polling
> patch");
>      }
>
> @@ -182,8 +223,15 @@ PollDevice::cleanup(CleanupStage)
>      clear_device(&poll_device_map);
>
>      poll_device_map.lock(false);
> -    if (had_dev && had_dev->polling > 0 &&
> !poll_device_map.lookup(had_dev, 0))
> +    if (had_dev && had_dev->polling > 0
> +      && !poll_device_map.lookup(had_dev, 0)) {
>  	had_dev->poll_off(had_dev);
> +	if (_useintr && _intr && had_dev->poll_irq_enable) {
> +	   _intr = false;
> +	   had_dev->poll_irq_enable(had_dev);
> +	}
> +    }
> +
>      poll_device_map.unlock(false);
>  #endif
>  }
> @@ -249,6 +297,7 @@ PollDevice::run_task(Task *)
>  	    _buffers_reused++;
>  	skbmgr_recycle_skbs(new_skbs);
>      }
> +
>    }
>
>    for (int i = 0; i < got; i++) {
> @@ -300,7 +349,12 @@ PollDevice::run_task(Task *)
>  # endif
>
>    adjust_tickets(got);
> -  _task.fast_reschedule();
> +  if (!_useintr)
> +    _task.fast_reschedule();
> +  else if (_intr && _dev && _dev->poll_irq_enable) {
> +    _intr = false;
> +    _dev->poll_irq_enable(_dev);
> +  }
>    return got > 0;
>  #else
>    return false;
> @@ -321,13 +375,18 @@ PollDevice::change_device(net_device *dev)
>  	dev = 0;
>      }
>
> -    if (_dev)
> +    if (_dev) {
>  	_dev->poll_off(_dev);
> +	if (_useintr && _intr && _dev->poll_irq_enable) {
> +	   _intr = false;
> +	   _dev->poll_irq_enable(_dev);
> +	}
> +    }
>
>      set_device(dev, &poll_device_map, true);
>
>      if (_dev && !_dev->polling)
> -	_dev->poll_on(_dev);
> +	_dev->poll_on(_dev, _useintr ? poll_intr : 0);
>
>      if (_dev)
>  	_task.strong_reschedule();
> diff --git a/elements/linuxmodule/polldevice.hh
> b/elements/linuxmodule/polldevice.hh
> index 0cc5617..8b749f4 100644
> --- a/elements/linuxmodule/polldevice.hh
> +++ b/elements/linuxmodule/polldevice.hh
> @@ -82,6 +82,7 @@ Resets C<count> counter to zero when written.
>  =a FromDevice, ToDevice, FromHost, ToHost */
>
>  #include "elements/linuxmodule/anydevice.hh"
> +#include <click/atomic.hh>
>
>  class PollDevice : public AnyTaskDevice { public:
>
> @@ -90,6 +91,7 @@ class PollDevice : public AnyTaskDevice { public:
>
>    static void static_initialize();
>    static void static_cleanup();
> +  static int poll_intr(net_device* dev, void *v);
>
>    const char *class_name() const	{ return "PollDevice"; }
>    const char *port_count() const	{ return PORTS_0_1; }
> @@ -137,6 +139,9 @@ class PollDevice : public AnyTaskDevice { public:
>
>      unsigned _burst;
>      unsigned _headroom;
> +    bool _useintr;
> +
> +    volatile bool _intr;
>
>  };
>
> diff --git a/elements/linuxmodule/todevice.cc
> b/elements/linuxmodule/todevice.cc
> index 1c774d0..a5d5851 100644
> --- a/elements/linuxmodule/todevice.cc
> +++ b/elements/linuxmodule/todevice.cc
> @@ -41,7 +41,7 @@ CLICK_CXX_UNPROTECT
>  #include <click/cxxunprotect.h>
>
>  /* for watching when devices go offline */
> -static AnyDeviceMap to_device_map;
> +AnyDeviceMap to_device_map;
>  static struct notifier_block device_notifier;
>  extern "C" {
>  static int device_notifier_hook(struct notifier_block *nb, unsigned long
> val, void *v);
> @@ -78,7 +78,7 @@ ToDevice::static_cleanup()
>  #endif
>  }
>
> -inline void
> +void
>  ToDevice::tx_wake_queue(net_device *dev)
>  {
>      //click_chatter("%{element}::%s for dev %s\n", this, __func__,
> dev->name);
> @@ -364,7 +364,8 @@ ToDevice::run_task(Task *)
>
>  #if HAVE_LINUX_POLLING
>      if (is_polling) {
> -	reschedule = true;
> +	if (_dev->polling == CLICK_POLLING_BUSY_WAITING)
> +	    reschedule = true;
>  	// 9/18/06: Frederic Van Quickenborne reports (1/24/05) that ticket
>  	// adjustments in FromDevice+ToDevice cause odd behavior.  The ticket
>  	// adjustments actually don't feel necessary to me in From/ToDevice
> diff --git a/elements/linuxmodule/todevice.hh
> b/elements/linuxmodule/todevice.hh
> index 01aa264..6eaf305 100644
> --- a/elements/linuxmodule/todevice.hh
> +++ b/elements/linuxmodule/todevice.hh
> @@ -115,7 +115,7 @@ class ToDevice : public AnyTaskDevice { public:
>    bool run_task(Task *);
>
>    void reset_counts();
> -  inline void tx_wake_queue(net_device *);
> +  void tx_wake_queue(net_device *);
>    bool tx_intr();
>    void change_device(net_device *);
>
> ---
>
> ---
> Index: include/linux/netdevice.h
> ===================================================================
> --- include/linux/netdevice.h	(revision 10505)
> +++ include/linux/netdevice.h	(working copy)
> @@ -528,9 +528,14 @@
>  	 * device supports polling but interrupts are on, and > 0 if polling
>  	 * is on.
>  	 */
> +#define CLICK_POLL_INTERRUPT_RX 0x01
> +#define CLICK_POLL_INTERRUPT_TX 0x02
> +#define CLICK_POLLING_BUSY_WAITING 2
> +#define CLICK_POLLING_INTERRUPTS   3
>  	int			polling;
> -	int			(*poll_on)(struct net_device *);
> +	int			(*poll_on)(struct net_device *, int (*poll_intr)(struct net_device
> *, void *));
>  	int			(*poll_off)(struct net_device *);
> +	void		(*poll_irq_enable)(struct net_device*);
>  	/*
>  	 * rx_poll returns to caller a linked list of sk_buff objects received
>  	 * by the device. on call, the want argument specifies the number of
> ---
> _______________________________________________
> click mailing list
> click at amsterdam.lcs.mit.edu
> https://amsterdam.lcs.mit.edu/mailman/listinfo/click
>




More information about the click mailing list