[Click] [RFC] [PATCH] PollDevice/ToDevice improvement

Joonwoo Park joonwpark81 at gmail.com
Thu Nov 15 02:54:02 EST 2007


2007/11/14, Joonwoo Park <joonwpark81 at gmail.com>:
> I think polling packet method of the click which is implemented with polldevice and todevice is one of great advantage of it.
> But IMO it has some disadvantage too because click does old fashioned polling packets which is spinning infinitely.
> Therefore I think that the spinning eats much of processor time and makes influence to other tasks.
> Then I managed to interrupt driven polling packet method for click which is using NAPI.
> It makes limited number of interrupts in a sec and notifications to polldevice and todevice.
> Especially It makes polldevice to use rx descriptors of nic (e1000) too.
> Actually because old polldevice does spinning infinitely, it does not give chance to nic for queuing.
> 

Some bugs have been fixed.
- Actually, my last patch included improvement of removing unnecessary writing of RDT in e1000_rx_refill. But there was a bug which
makes rx hang in my work. It has fixed.
- Fixed to call poll_intr when a rx overrun interrupted.
- And some improvements, use defined constant value for dev->polling, remove compiler warnings. (Thanks Kevin)

Thanks.

p.s. Kevin, Don't you see compiler warning on x86_64 now?

joonwpark.tistory.com
Joonwoo


[PollDevice/ToDevice]: Interrupt driven polling packet method for click which is using NAPI.
It makes limited number of interrupts in a sec and notifications to polldevice and todevice.
Especially It makes polldevice to use rx descriptors of nic (e1000) too.
Actually because old polldevice does spinning infinitely, it does not give chance to nic for queuing.

Signed-off-by: Joonwoo Park <joonwpark81 at gmail.com>

---
 drivers/e1000-7.x/src/e1000.h      |    1 +
 drivers/e1000-7.x/src/e1000_main.c |  100 ++++++++++++++++++++++++++++-------
 elements/linuxmodule/polldevice.cc |   75 ++++++++++++++++++++++++---
 elements/linuxmodule/polldevice.hh |    5 ++
 elements/linuxmodule/todevice.cc   |    7 ++-
 elements/linuxmodule/todevice.hh   |    2 +-
 6 files changed, 158 insertions(+), 32 deletions(-)

diff --git a/drivers/e1000-7.x/src/e1000.h b/drivers/e1000-7.x/src/e1000.h
index 0b28ddd..28a7d94 100644
--- a/drivers/e1000-7.x/src/e1000.h
+++ b/drivers/e1000-7.x/src/e1000.h
@@ -392,6 +392,7 @@ struct e1000_adapter {
         unsigned long rx_quiet_jiffies;      /* jiffies timeout for the QUIET state */
         int prev_rdfh;             /* prev value of Rcv Data Fifo Head register */
         int prev_rdft;             /* prev value of Rcv Data Fifo Tail register */
+	int (*poll_intr)(struct net_device *, void *);
 };
 
 enum e1000_state_t {
diff --git a/drivers/e1000-7.x/src/e1000_main.c b/drivers/e1000-7.x/src/e1000_main.c
index 2a3c5f0..bd1fd9d 100644
--- a/drivers/e1000-7.x/src/e1000_main.c
+++ b/drivers/e1000-7.x/src/e1000_main.c
@@ -237,7 +237,7 @@ static int e1000_rx_refill(struct net_device *dev, struct sk_buff **);
 static int e1000_tx_eob(struct net_device *dev);
 static struct sk_buff *e1000_tx_clean(struct net_device *dev);
 static struct sk_buff *e1000_rx_poll(struct net_device *dev, int *want);
-static int e1000_poll_on(struct net_device *dev);
+static int e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct net_device *, void *));
 static int e1000_poll_off(struct net_device *dev);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -438,6 +438,13 @@ e1000_irq_enable(struct e1000_adapter *adapter)
 		E1000_WRITE_FLUSH(&adapter->hw);
 	}
 }
+
+static void
+e1000_poll_irq_enable(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter*)netdev_priv(netdev);
+	e1000_irq_enable(adapter);
+}
 #ifdef NETIF_F_HW_VLAN_TX
 
 static void
@@ -745,6 +752,12 @@ e1000_reinit_locked(struct e1000_adapter *adapter)
 	e1000_down(adapter);
 	e1000_up(adapter);
 	clear_bit(__E1000_RESETTING, &adapter->flags);
+
+	if (adapter->netdev->polling) {
+		mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
+		mod_timer(&adapter->watchdog_timer, jiffies + 1);
+		mod_timer(&adapter->phy_info_timer, jiffies + 1);
+	}
 }
 
 void
@@ -1045,6 +1058,8 @@ e1000_probe(struct pci_dev *pdev,
         netdev->tx_clean = e1000_tx_clean;
         netdev->poll_off = e1000_poll_off;
         netdev->poll_on = e1000_poll_on;
+        netdev->poll_irq_enable = e1000_poll_irq_enable;
+        adapter->poll_intr = NULL;
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	netdev->poll_controller = e1000_netpoll;
@@ -2684,8 +2699,8 @@ e1000_82547_tx_fifo_stall(unsigned long data)
 static void
 e1000_watchdog(unsigned long data)
 {
-	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
-  if(adapter->netdev->polling){
+  struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+  if(adapter->netdev->polling == 2){
     adapter->do_poll_watchdog = 1;
   } else {
     e1000_watchdog_1(adapter);
@@ -4055,7 +4070,21 @@ e1000_intr(int irq, void *data)
 		E1000_WRITE_REG(hw, IMC, ~0);
 		E1000_WRITE_FLUSH(hw);
 	}
-	if (likely(netif_rx_schedule_prep(netdev))) {
+	if (netdev->polling == CLICK_POLLING_INTERRUPTS) {
+		if (likely(adapter->poll_intr)) {
+			unsigned long flags = 0;
+			if (icr & E1000_ICR_TXDW)
+				flags |= CLICK_POLL_INTERRUPT_TX;
+			if (icr & (E1000_ICR_RXT0 | E1000_ICR_RXO))
+				flags |= CLICK_POLL_INTERRUPT_RX;
+			if (likely(flags))
+				adapter->poll_intr(netdev, (void*)flags);
+			else
+				e1000_irq_enable(adapter);
+		}
+		else
+			e1000_irq_enable(adapter);
+	} else if (likely(netif_rx_schedule_prep(netdev))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
@@ -5700,7 +5729,7 @@ e1000_rx_refill(struct net_device *dev, struct sk_buff **skbs)
   struct pci_dev *pdev = adapter->pdev;
   struct e1000_rx_desc *rx_desc;
   struct sk_buff *skb;
-  int next;
+  int i;
 
   /*
    * Update statistics counters, check link.
@@ -5719,11 +5748,9 @@ e1000_rx_refill(struct net_device *dev, struct sk_buff **skbs)
   if(skbs == 0)
     return E1000_DESC_UNUSED(rx_ring);
 
-  for( next = (rx_ring->next_to_use + 1) % rx_ring->count;
-       next != rx_ring->next_to_clean;
-       rx_ring->next_to_use = next,
-	 next = (rx_ring->next_to_use + 1) % rx_ring->count ) {
-    int i = rx_ring->next_to_use;
+  i = rx_ring->next_to_use;
+
+  while (1) {
     if(rx_ring->buffer_info[i].skb != NULL)
       break;
     
@@ -5743,14 +5770,20 @@ e1000_rx_refill(struct net_device *dev, struct sk_buff **skbs)
 
     rx_desc = E1000_RX_DESC(*rx_ring, i);
     rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma);
-    
-    /* Intel documnetation says: "Software adds receive descriptors by
-     * writing the tail pointer with the index of the entry beyond the
-     * last valid descriptor." (ref 11337 p 27) */
 
-    E1000_WRITE_REG(&adapter->hw, RDT, next);
+	if (unlikely(++i == rx_ring->count))
+		i = 0;
+  }
+
+  if (likely(rx_ring->next_to_use != i)) {
+    rx_ring->next_to_use = i;
+    if (unlikely(i -- == 0))
+		i = (rx_ring->count - 1);
+
+	wmb();
+	writel(i, adapter->hw.hw_addr + rx_ring->rdt);
   }
-  
+
   return E1000_DESC_UNUSED(adapter->rx_ring);
 }
 
@@ -5780,6 +5813,7 @@ e1000_tx_pqueue(struct net_device *netdev, struct sk_buff *skb)
   if(E1000_DESC_UNUSED(adapter->tx_ring) <= (txd_needed + 1)) {
     adapter->net_stats.tx_dropped++;
     netif_stop_queue(netdev);
+    mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
     return -1;
   }
 
@@ -5883,7 +5917,7 @@ e1000_tx_clean(struct net_device *netdev)
 }
 
 static int
-e1000_poll_on(struct net_device *dev)
+e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct net_device *, void *))
 {
   struct e1000_adapter *adapter = dev->priv;
   unsigned long flags;
@@ -5891,11 +5925,16 @@ e1000_poll_on(struct net_device *dev)
   if (!dev->polling) {
     printk("e1000_poll_on\n");
     local_irq_save(flags);
-    local_irq_disable();
+
+    set_bit(__E1000_DOWN, &adapter->flags);
+
     e1000_irq_disable(adapter);
 
     /* reset the card - start in a clean state */
 
+#ifdef CONFIG_E1000_NAPI
+    netif_poll_disable(dev);
+#endif
     /* taken from e1000_down() */
     e1000_reset(adapter);
     e1000_clean_tx_ring(adapter, adapter->tx_ring);
@@ -5907,8 +5946,21 @@ e1000_poll_on(struct net_device *dev)
     e1000_configure_rx(adapter);
     e1000_alloc_rx_buffers(adapter, adapter->rx_ring,
                           E1000_DESC_UNUSED(adapter->rx_ring));
+#ifdef CONFIG_E1000_NAPI
+    netif_poll_enable(dev);
+#endif
+
+    if (poll_intr) {
+      dev->polling = CLICK_POLLING_INTERRUPTS;
+      e1000_irq_enable(adapter);
+    } else
+      dev->polling = CLICK_POLLING_BUSY_WAITING;
+
+    adapter->poll_intr = poll_intr;
+    wmb();
+
+    clear_bit(__E1000_DOWN, &adapter->flags);
 
-    dev->polling = 2;
     local_irq_restore(flags);
   }
 
@@ -5921,8 +5973,16 @@ e1000_poll_off(struct net_device *dev)
   struct e1000_adapter *adapter = dev->priv;
 
   if(dev->polling > 0){
+    unsigned long flags;
+    local_irq_save(flags);
+
     dev->polling = 0;
-    e1000_irq_enable(adapter);
+    if (!adapter->poll_intr)
+	e1000_irq_enable(adapter);
+    adapter->poll_intr = 0;
+
+    local_irq_restore(flags);
+
     printk("e1000_poll_off\n");
   }
 
diff --git a/elements/linuxmodule/polldevice.cc b/elements/linuxmodule/polldevice.cc
index 7b84712..c0ec4ff 100644
--- a/elements/linuxmodule/polldevice.cc
+++ b/elements/linuxmodule/polldevice.cc
@@ -46,6 +46,35 @@ extern "C" {
 static int device_notifier_hook(struct notifier_block *nb, unsigned long val, void *v);
 }
 
+extern AnyDeviceMap to_device_map;
+
+int
+PollDevice::poll_intr(net_device* dev, void *v)
+{
+    unsigned long flags = (unsigned long)v;
+    if (flags & CLICK_POLL_INTERRUPT_TX) {
+	to_device_map.lock(false);
+	ToDevice *td = (ToDevice *)to_device_map.lookup(dev, 0);
+	if (td) {
+	    td->tx_wake_queue(dev);
+	}
+	to_device_map.unlock(false);
+    }
+
+    if (flags & CLICK_POLL_INTERRUPT_RX) {
+	poll_device_map.lock(false);
+	PollDevice *pd = (PollDevice *)poll_device_map.lookup(dev, 0);
+	if (pd) {
+	    pd->_intr = true;
+	    pd->_task.reschedule();
+	}
+	poll_device_map.unlock(false);
+    } else
+	dev->poll_irq_enable(dev);
+
+    return 0;
+}
+
 void
 PollDevice::static_initialize()
 {
@@ -64,6 +93,7 @@ PollDevice::static_cleanup()
 
 PollDevice::PollDevice()
 {
+    _intr = false;
 }
 
 PollDevice::~PollDevice()
@@ -74,22 +104,32 @@ PollDevice::~PollDevice()
 int
 PollDevice::configure(Vector<String> &conf, ErrorHandler *errh)
 {
-    _burst = 8;
     _headroom = 64;
+    unsigned burst = 0;
     bool promisc = false, allow_nonexistent = false, quiet = false, timestamp = true;
     if (cp_va_kparse(conf, this, errh,
 		     "DEVNAME", cpkP+cpkM, cpString, &_devname,
 		     "PROMISC", cpkP, cpBool, &promisc,
-		     "BURST", cpkP, cpUnsigned, &_burst,
+		     "BURST", cpkP, cpUnsigned, &burst,
 		     "TIMESTAMP", 0, cpBool, &timestamp,
 		     "QUIET", 0, cpBool, &quiet,
 		     "ALLOW_NONEXISTENT", 0, cpBool, &allow_nonexistent,
 		     "HEADROOM", 0, cpUnsigned, &_headroom,
+		     "INTERRUPT", 0, cpBool, &_useintr,
 		     cpEnd) < 0)
 	return -1;
     set_device_flags(promisc, timestamp, allow_nonexistent, quiet);
     
 #if HAVE_LINUX_POLLING
+    if (burst) {
+	_burst = burst;
+	if (_useintr && _burst < 32)
+	    errh->warning("%s too small burst size for interrupting", _devname.c_str());
+    } else {
+	/** by default, e1000-napi makes up to 8000 interrupts in a sec.
+	 * for more performance, burst should be large enough */
+	_burst = _useintr ? 64 : 8;
+    }
     if (find_device(&poll_device_map, errh) < 0)
 	return -1;
     if (_dev && (!_dev->poll_on || _dev->polling < 0))
@@ -124,8 +164,9 @@ you include a ToDevice for the same device. Try adding\n\
 
     if (_dev && !_dev->polling) {
 	/* turn off interrupt if interrupts weren't already off */
-	_dev->poll_on(_dev);
-	if (_dev->polling != 2)
+	_dev->poll_on(_dev, _useintr ? poll_intr : NULL);
+	if (_dev->polling != CLICK_POLLING_BUSY_WAITING
+	  && _dev->polling != CLICK_POLLING_INTERRUPTS)
 	    return errh->error("PollDevice detected wrong version of polling patch");
     }
 
@@ -182,8 +223,15 @@ PollDevice::cleanup(CleanupStage)
     clear_device(&poll_device_map);
 
     poll_device_map.lock(false);
-    if (had_dev && had_dev->polling > 0 && !poll_device_map.lookup(had_dev, 0))
+    if (had_dev && had_dev->polling > 0
+      && !poll_device_map.lookup(had_dev, 0)) {
 	had_dev->poll_off(had_dev);
+	if (_useintr && _intr && had_dev->poll_irq_enable) {
+	   _intr = false;
+	   had_dev->poll_irq_enable(had_dev);
+	}
+    }
+
     poll_device_map.unlock(false);
 #endif
 }
@@ -249,6 +297,7 @@ PollDevice::run_task(Task *)
 	    _buffers_reused++;
 	skbmgr_recycle_skbs(new_skbs);
     }
+
   }
 
   for (int i = 0; i < got; i++) {
@@ -300,7 +349,12 @@ PollDevice::run_task(Task *)
 # endif
 
   adjust_tickets(got);
-  _task.fast_reschedule();
+  if (!_useintr)
+    _task.fast_reschedule();
+  else if (_intr && _dev && _dev->poll_irq_enable) {
+    _intr = false;
+    _dev->poll_irq_enable(_dev);
+  }
   return got > 0;
 #else
   return false;
@@ -321,13 +375,18 @@ PollDevice::change_device(net_device *dev)
 	dev = 0;
     }
     
-    if (_dev)
+    if (_dev) {
 	_dev->poll_off(_dev);
+	if (_useintr && _intr && _dev->poll_irq_enable) {
+	   _intr = false;
+	   _dev->poll_irq_enable(_dev);
+	}
+    }
 
     set_device(dev, &poll_device_map, true);
     
     if (_dev && !_dev->polling)
-	_dev->poll_on(_dev);
+	_dev->poll_on(_dev, _useintr ? poll_intr : 0);
 
     if (_dev)
 	_task.strong_reschedule();
diff --git a/elements/linuxmodule/polldevice.hh b/elements/linuxmodule/polldevice.hh
index 0cc5617..8b749f4 100644
--- a/elements/linuxmodule/polldevice.hh
+++ b/elements/linuxmodule/polldevice.hh
@@ -82,6 +82,7 @@ Resets C<count> counter to zero when written.
 =a FromDevice, ToDevice, FromHost, ToHost */
 
 #include "elements/linuxmodule/anydevice.hh"
+#include <click/atomic.hh>
 
 class PollDevice : public AnyTaskDevice { public:
   
@@ -90,6 +91,7 @@ class PollDevice : public AnyTaskDevice { public:
 
   static void static_initialize();
   static void static_cleanup();
+  static int poll_intr(net_device* dev, void *v);
   
   const char *class_name() const	{ return "PollDevice"; }
   const char *port_count() const	{ return PORTS_0_1; }
@@ -137,6 +139,9 @@ class PollDevice : public AnyTaskDevice { public:
 
     unsigned _burst;
     unsigned _headroom;
+    bool _useintr;
+
+    volatile bool _intr;
 
 };
 
diff --git a/elements/linuxmodule/todevice.cc b/elements/linuxmodule/todevice.cc
index 1c774d0..a5d5851 100644
--- a/elements/linuxmodule/todevice.cc
+++ b/elements/linuxmodule/todevice.cc
@@ -41,7 +41,7 @@ CLICK_CXX_UNPROTECT
 #include <click/cxxunprotect.h>
 
 /* for watching when devices go offline */
-static AnyDeviceMap to_device_map;
+AnyDeviceMap to_device_map;
 static struct notifier_block device_notifier;
 extern "C" {
 static int device_notifier_hook(struct notifier_block *nb, unsigned long val, void *v);
@@ -78,7 +78,7 @@ ToDevice::static_cleanup()
 #endif
 }
 
-inline void
+void
 ToDevice::tx_wake_queue(net_device *dev) 
 {
     //click_chatter("%{element}::%s for dev %s\n", this, __func__, dev->name);
@@ -364,7 +364,8 @@ ToDevice::run_task(Task *)
     
 #if HAVE_LINUX_POLLING
     if (is_polling) {
-	reschedule = true;
+	if (_dev->polling == CLICK_POLLING_BUSY_WAITING)
+	    reschedule = true;
 	// 9/18/06: Frederic Van Quickenborne reports (1/24/05) that ticket
 	// adjustments in FromDevice+ToDevice cause odd behavior.  The ticket
 	// adjustments actually don't feel necessary to me in From/ToDevice
diff --git a/elements/linuxmodule/todevice.hh b/elements/linuxmodule/todevice.hh
index 01aa264..6eaf305 100644
--- a/elements/linuxmodule/todevice.hh
+++ b/elements/linuxmodule/todevice.hh
@@ -115,7 +115,7 @@ class ToDevice : public AnyTaskDevice { public:
   bool run_task(Task *);
 
   void reset_counts();
-  inline void tx_wake_queue(net_device *);
+  void tx_wake_queue(net_device *);
   bool tx_intr();
   void change_device(net_device *);
 
---

---
Index: include/linux/netdevice.h
===================================================================
--- include/linux/netdevice.h	(revision 10505)
+++ include/linux/netdevice.h	(working copy)
@@ -528,9 +528,14 @@
 	 * device supports polling but interrupts are on, and > 0 if polling
 	 * is on.
 	 */
+#define CLICK_POLL_INTERRUPT_RX 0x01
+#define CLICK_POLL_INTERRUPT_TX 0x02
+#define CLICK_POLLING_BUSY_WAITING 2
+#define CLICK_POLLING_INTERRUPTS   3	
 	int			polling;
-	int			(*poll_on)(struct net_device *);
+	int			(*poll_on)(struct net_device *, int (*poll_intr)(struct net_device *, void *));
 	int			(*poll_off)(struct net_device *);
+	void		(*poll_irq_enable)(struct net_device*);
 	/*
 	 * rx_poll returns to caller a linked list of sk_buff objects received
 	 * by the device. on call, the want argument specifies the number of
---
-------------- next part --------------
A non-text attachment was scrubbed...
Name: polldevice_napi_head.1.patch
Type: application/octet-stream
Size: 14777 bytes
Desc: not available
Url : https://pdos.csail.mit.edu/pipermail/click/attachments/20071115/a3c8a864/polldevice_napi_head.1-0001.obj
-------------- next part --------------
A non-text attachment was scrubbed...
Name: linux_netdevice.h.1.patch
Type: application/octet-stream
Size: 841 bytes
Desc: not available
Url : https://pdos.csail.mit.edu/pipermail/click/attachments/20071115/a3c8a864/linux_netdevice.h.1-0001.obj


More information about the click mailing list