[Click] [RFC] [PATCH] PollDevice/ToDevice improvement

Joonwoo Park joonwpark81 at gmail.com
Wed Nov 14 02:07:05 EST 2007


I think polling packet method of the click which is implemented with polldevice and todevice is one of great advantage of it.
But IMO it has some disadvantage too because click does old fashioned polling packets which is spinning infinitely.
Therefore I think that the spinning eats much of processor time and makes influence to other tasks.
Then I managed to interrupt driven polling packet method for click which is using NAPI.
It makes limited number of interrupts in a sec and notifications to polldevice and todevice.
Especially It makes polldevice to use rx descriptors of nic (e1000) too.
Actually because old polldevice does spinning infinitely, it does not give chance to nic for queuing.

In simple elements test (to maximizing performance for spinning method), my patch had slightly increased or almost same performance
comparing to spinning method and processor usage was greatly decreased. (about 1050000pps receive and 45% processor usage on Intel
core2 1.86 with e1000 82573L gigabit nic)

* I don't have precise network benchmark system like a smartbit, so I'll be so much pleased someone measures the performance of this
patch.
* All test was performed on e1000 52547L, I'm afraid it won't work on the other e1000 nic. 
* Please apply patch for linux after click it selves. (2.6.19.2) 
* Patch contains e1000 link detection fix too
(partially https://pdos.csail.mit.edu/pipermail/click/2007-October/006447.html)
* It does not contain ToDevice multi-threading fix (https://pdos.csail.mit.edu/pipermail/click/2007-October/006436.html)

Eddie, Can you check this please?

Thanks in advance.

joonwpark.tistory.com
Joonwoo


[PollDevice/ToDevice]: Interrupt driven polling packet method for click which is using NAPI.
It makes limited number of interrupts in a sec and notifications to polldevice and todevice.
Especially It makes polldevice to use rx descriptors of nic (e1000) too.
Actually because old polldevice does spinning infinitely, it does not give chance to nic for queuing.

Signed-off-by: Joonwoo Park <joonwpark81 at gmail.com>

---
 drivers/e1000-7.x/src/e1000.h      |    1 +
 drivers/e1000-7.x/src/e1000_main.c |   80 +++++++++++++++++++++++++++++++-----
 elements/linuxmodule/polldevice.cc |   74 +++++++++++++++++++++++++++++----
 elements/linuxmodule/polldevice.hh |    5 ++
 elements/linuxmodule/todevice.cc   |    7 ++-
 elements/linuxmodule/todevice.hh   |    2 +-
 6 files changed, 146 insertions(+), 23 deletions(-)

diff --git a/drivers/e1000-7.x/src/e1000.h b/drivers/e1000-7.x/src/e1000.h
index 0b28ddd..28a7d94 100644
--- a/drivers/e1000-7.x/src/e1000.h
+++ b/drivers/e1000-7.x/src/e1000.h
@@ -392,6 +392,7 @@ struct e1000_adapter {
         unsigned long rx_quiet_jiffies;      /* jiffies timeout for the QUIET state */
         int prev_rdfh;             /* prev value of Rcv Data Fifo Head register */
         int prev_rdft;             /* prev value of Rcv Data Fifo Tail register */
+	int (*poll_intr)(struct net_device *, void *);
 };
 
 enum e1000_state_t {
diff --git a/drivers/e1000-7.x/src/e1000_main.c b/drivers/e1000-7.x/src/e1000_main.c
index 2a3c5f0..374ee10 100644
--- a/drivers/e1000-7.x/src/e1000_main.c
+++ b/drivers/e1000-7.x/src/e1000_main.c
@@ -237,7 +237,7 @@ static int e1000_rx_refill(struct net_device *dev, struct sk_buff **);
 static int e1000_tx_eob(struct net_device *dev);
 static struct sk_buff *e1000_tx_clean(struct net_device *dev);
 static struct sk_buff *e1000_rx_poll(struct net_device *dev, int *want);
-static int e1000_poll_on(struct net_device *dev);
+static int e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct net_device *, void *));
 static int e1000_poll_off(struct net_device *dev);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -438,6 +438,13 @@ e1000_irq_enable(struct e1000_adapter *adapter)
 		E1000_WRITE_FLUSH(&adapter->hw);
 	}
 }
+
+static void
+e1000_poll_irq_enable(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter*)netdev_priv(netdev);
+	e1000_irq_enable(adapter);
+}
 #ifdef NETIF_F_HW_VLAN_TX
 
 static void
@@ -745,6 +752,12 @@ e1000_reinit_locked(struct e1000_adapter *adapter)
 	e1000_down(adapter);
 	e1000_up(adapter);
 	clear_bit(__E1000_RESETTING, &adapter->flags);
+
+	if (adapter->netdev->polling) {
+		mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
+		mod_timer(&adapter->watchdog_timer, jiffies + 1);
+		mod_timer(&adapter->phy_info_timer, jiffies + 1);
+	}
 }
 
 void
@@ -1045,6 +1058,8 @@ e1000_probe(struct pci_dev *pdev,
         netdev->tx_clean = e1000_tx_clean;
         netdev->poll_off = e1000_poll_off;
         netdev->poll_on = e1000_poll_on;
+        netdev->poll_irq_enable = e1000_poll_irq_enable;
+        adapter->poll_intr = NULL;
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	netdev->poll_controller = e1000_netpoll;
@@ -2684,8 +2699,8 @@ e1000_82547_tx_fifo_stall(unsigned long data)
 static void
 e1000_watchdog(unsigned long data)
 {
-	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
-  if(adapter->netdev->polling){
+  struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+  if(adapter->netdev->polling == 2){
     adapter->do_poll_watchdog = 1;
   } else {
     e1000_watchdog_1(adapter);
@@ -4055,7 +4070,21 @@ e1000_intr(int irq, void *data)
 		E1000_WRITE_REG(hw, IMC, ~0);
 		E1000_WRITE_FLUSH(hw);
 	}
-	if (likely(netif_rx_schedule_prep(netdev))) {
+	if (netdev->polling == 3) {
+		if (likely(adapter->poll_intr)) {
+			uint32_t flags = 0;
+			if (icr & E1000_ICR_TXDW)
+				flags |= CLICK_POLL_INTERRUPT_TX;
+			if (icr & E1000_ICR_RXT0)
+				flags |= CLICK_POLL_INTERRUPT_RX;
+			if (likely(flags))
+				adapter->poll_intr(netdev, (void*)flags);
+			else
+				e1000_irq_enable(adapter);
+		}
+		else
+			e1000_irq_enable(adapter);
+	} else if (likely(netif_rx_schedule_prep(netdev))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
@@ -5700,7 +5729,7 @@ e1000_rx_refill(struct net_device *dev, struct sk_buff **skbs)
   struct pci_dev *pdev = adapter->pdev;
   struct e1000_rx_desc *rx_desc;
   struct sk_buff *skb;
-  int next;
+  int next = -1;
 
   /*
    * Update statistics counters, check link.
@@ -5743,14 +5772,16 @@ e1000_rx_refill(struct net_device *dev, struct sk_buff **skbs)
 
     rx_desc = E1000_RX_DESC(*rx_ring, i);
     rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma);
-    
+  }
+
+  if (next > -1) {
     /* Intel documnetation says: "Software adds receive descriptors by
      * writing the tail pointer with the index of the entry beyond the
      * last valid descriptor." (ref 11337 p 27) */
 
     E1000_WRITE_REG(&adapter->hw, RDT, next);
   }
-  
+
   return E1000_DESC_UNUSED(adapter->rx_ring);
 }
 
@@ -5780,6 +5811,7 @@ e1000_tx_pqueue(struct net_device *netdev, struct sk_buff *skb)
   if(E1000_DESC_UNUSED(adapter->tx_ring) <= (txd_needed + 1)) {
     adapter->net_stats.tx_dropped++;
     netif_stop_queue(netdev);
+    mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
     return -1;
   }
 
@@ -5883,7 +5915,7 @@ e1000_tx_clean(struct net_device *netdev)
 }
 
 static int
-e1000_poll_on(struct net_device *dev)
+e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct net_device *, void *))
 {
   struct e1000_adapter *adapter = dev->priv;
   unsigned long flags;
@@ -5891,11 +5923,16 @@ e1000_poll_on(struct net_device *dev)
   if (!dev->polling) {
     printk("e1000_poll_on\n");
     local_irq_save(flags);
-    local_irq_disable();
+
+    set_bit(__E1000_DOWN, &adapter->flags);
+
     e1000_irq_disable(adapter);
 
     /* reset the card - start in a clean state */
 
+#ifdef CONFIG_E1000_NAPI
+    netif_poll_disable(dev);
+#endif
     /* taken from e1000_down() */
     e1000_reset(adapter);
     e1000_clean_tx_ring(adapter, adapter->tx_ring);
@@ -5907,8 +5944,21 @@ e1000_poll_on(struct net_device *dev)
     e1000_configure_rx(adapter);
     e1000_alloc_rx_buffers(adapter, adapter->rx_ring,
                           E1000_DESC_UNUSED(adapter->rx_ring));
+#ifdef CONFIG_E1000_NAPI
+    netif_poll_enable(dev);
+#endif
+
+    if (poll_intr) {
+      dev->polling = 3;
+      e1000_irq_enable(adapter);
+    } else
+      dev->polling = 2;
+
+    adapter->poll_intr = poll_intr;
+    wmb();
+
+    clear_bit(__E1000_DOWN, &adapter->flags);
 
-    dev->polling = 2;
     local_irq_restore(flags);
   }
 
@@ -5921,8 +5971,16 @@ e1000_poll_off(struct net_device *dev)
   struct e1000_adapter *adapter = dev->priv;
 
   if(dev->polling > 0){
+    unsigned long flags;
+    local_irq_save(flags);
+
     dev->polling = 0;
-    e1000_irq_enable(adapter);
+    if (!adapter->poll_intr)
+	e1000_irq_enable(adapter);
+    adapter->poll_intr = 0;
+
+    local_irq_restore(flags);
+
     printk("e1000_poll_off\n");
   }
 
diff --git a/elements/linuxmodule/polldevice.cc b/elements/linuxmodule/polldevice.cc
index 7b84712..f22888f 100644
--- a/elements/linuxmodule/polldevice.cc
+++ b/elements/linuxmodule/polldevice.cc
@@ -46,6 +46,35 @@ extern "C" {
 static int device_notifier_hook(struct notifier_block *nb, unsigned long val, void *v);
 }
 
+extern AnyDeviceMap to_device_map;
+
+int
+PollDevice::poll_intr(net_device* dev, void *v)
+{
+    uint32_t flags = (uint32_t)v;
+    if (flags & CLICK_POLL_INTERRUPT_TX) {
+	to_device_map.lock(false);
+	ToDevice *td = (ToDevice *)to_device_map.lookup(dev, 0);
+	if (td) {
+	    td->tx_wake_queue(dev);
+	}
+	to_device_map.unlock(false);
+    }
+
+    if (flags & CLICK_POLL_INTERRUPT_RX) {
+	poll_device_map.lock(false);
+	PollDevice *pd = (PollDevice *)poll_device_map.lookup(dev, 0);
+	if (pd) {
+	    pd->_intr = true;
+	    pd->_task.reschedule();
+	}
+	poll_device_map.unlock(false);
+    } else
+	dev->poll_irq_enable(dev);
+
+    return 0;
+}
+
 void
 PollDevice::static_initialize()
 {
@@ -64,6 +93,7 @@ PollDevice::static_cleanup()
 
 PollDevice::PollDevice()
 {
+    _intr = false;
 }
 
 PollDevice::~PollDevice()
@@ -74,22 +104,32 @@ PollDevice::~PollDevice()
 int
 PollDevice::configure(Vector<String> &conf, ErrorHandler *errh)
 {
-    _burst = 8;
     _headroom = 64;
+    unsigned burst = 0;
     bool promisc = false, allow_nonexistent = false, quiet = false, timestamp = true;
     if (cp_va_kparse(conf, this, errh,
 		     "DEVNAME", cpkP+cpkM, cpString, &_devname,
 		     "PROMISC", cpkP, cpBool, &promisc,
-		     "BURST", cpkP, cpUnsigned, &_burst,
+		     "BURST", cpkP, cpUnsigned, &burst,
 		     "TIMESTAMP", 0, cpBool, &timestamp,
 		     "QUIET", 0, cpBool, &quiet,
 		     "ALLOW_NONEXISTENT", 0, cpBool, &allow_nonexistent,
 		     "HEADROOM", 0, cpUnsigned, &_headroom,
+		     "INTERRUPT", 0, cpBool, &_useintr,
 		     cpEnd) < 0)
 	return -1;
     set_device_flags(promisc, timestamp, allow_nonexistent, quiet);
     
 #if HAVE_LINUX_POLLING
+    if (burst) {
+	_burst = burst;
+	if (_useintr && _burst < 32)
+	    errh->warning("%s too small burst size for interrupting", _devname.c_str());
+    } else {
+	/** by default, e1000-napi makes up to 8000 interrupts in a sec.
+	 * for more performance, burst should be large enough */
+	_burst = _useintr ? 64 : 8;
+    }
     if (find_device(&poll_device_map, errh) < 0)
 	return -1;
     if (_dev && (!_dev->poll_on || _dev->polling < 0))
@@ -124,8 +164,8 @@ you include a ToDevice for the same device. Try adding\n\
 
     if (_dev && !_dev->polling) {
 	/* turn off interrupt if interrupts weren't already off */
-	_dev->poll_on(_dev);
-	if (_dev->polling != 2)
+	_dev->poll_on(_dev, _useintr ? poll_intr : NULL);
+	if (_dev->polling != 2 && _dev->polling != 3)
 	    return errh->error("PollDevice detected wrong version of polling patch");
     }
 
@@ -182,8 +222,15 @@ PollDevice::cleanup(CleanupStage)
     clear_device(&poll_device_map);
 
     poll_device_map.lock(false);
-    if (had_dev && had_dev->polling > 0 && !poll_device_map.lookup(had_dev, 0))
+    if (had_dev && had_dev->polling > 0
+      && !poll_device_map.lookup(had_dev, 0)) {
 	had_dev->poll_off(had_dev);
+	if (_useintr && _intr && had_dev->poll_irq_enable) {
+	   _intr = false;
+	   had_dev->poll_irq_enable(had_dev);
+	}
+    }
+
     poll_device_map.unlock(false);
 #endif
 }
@@ -249,6 +296,7 @@ PollDevice::run_task(Task *)
 	    _buffers_reused++;
 	skbmgr_recycle_skbs(new_skbs);
     }
+
   }
 
   for (int i = 0; i < got; i++) {
@@ -300,7 +348,12 @@ PollDevice::run_task(Task *)
 # endif
 
   adjust_tickets(got);
-  _task.fast_reschedule();
+  if (!_useintr)
+    _task.fast_reschedule();
+  else if (_intr && _dev && _dev->poll_irq_enable) {
+    _intr = false;
+    _dev->poll_irq_enable(_dev);
+  }
   return got > 0;
 #else
   return false;
@@ -321,13 +374,18 @@ PollDevice::change_device(net_device *dev)
 	dev = 0;
     }
     
-    if (_dev)
+    if (_dev) {
 	_dev->poll_off(_dev);
+	if (_useintr && _intr && _dev->poll_irq_enable) {
+	   _intr = false;
+	   _dev->poll_irq_enable(_dev);
+	}
+    }
 
     set_device(dev, &poll_device_map, true);
     
     if (_dev && !_dev->polling)
-	_dev->poll_on(_dev);
+	_dev->poll_on(_dev, _useintr ? poll_intr : 0);
 
     if (_dev)
 	_task.strong_reschedule();
diff --git a/elements/linuxmodule/polldevice.hh b/elements/linuxmodule/polldevice.hh
index 0cc5617..8b749f4 100644
--- a/elements/linuxmodule/polldevice.hh
+++ b/elements/linuxmodule/polldevice.hh
@@ -82,6 +82,7 @@ Resets C<count> counter to zero when written.
 =a FromDevice, ToDevice, FromHost, ToHost */
 
 #include "elements/linuxmodule/anydevice.hh"
+#include <click/atomic.hh>
 
 class PollDevice : public AnyTaskDevice { public:
   
@@ -90,6 +91,7 @@ class PollDevice : public AnyTaskDevice { public:
 
   static void static_initialize();
   static void static_cleanup();
+  static int poll_intr(net_device* dev, void *v);
   
   const char *class_name() const	{ return "PollDevice"; }
   const char *port_count() const	{ return PORTS_0_1; }
@@ -137,6 +139,9 @@ class PollDevice : public AnyTaskDevice { public:
 
     unsigned _burst;
     unsigned _headroom;
+    bool _useintr;
+
+    volatile bool _intr;
 
 };
 
diff --git a/elements/linuxmodule/todevice.cc b/elements/linuxmodule/todevice.cc
index 1c774d0..5ab459f 100644
--- a/elements/linuxmodule/todevice.cc
+++ b/elements/linuxmodule/todevice.cc
@@ -41,7 +41,7 @@ CLICK_CXX_UNPROTECT
 #include <click/cxxunprotect.h>
 
 /* for watching when devices go offline */
-static AnyDeviceMap to_device_map;
+AnyDeviceMap to_device_map;
 static struct notifier_block device_notifier;
 extern "C" {
 static int device_notifier_hook(struct notifier_block *nb, unsigned long val, void *v);
@@ -78,7 +78,7 @@ ToDevice::static_cleanup()
 #endif
 }
 
-inline void
+void
 ToDevice::tx_wake_queue(net_device *dev) 
 {
     //click_chatter("%{element}::%s for dev %s\n", this, __func__, dev->name);
@@ -364,7 +364,8 @@ ToDevice::run_task(Task *)
     
 #if HAVE_LINUX_POLLING
     if (is_polling) {
-	reschedule = true;
+	if (_dev->polling == 2)
+	    reschedule = true;
 	// 9/18/06: Frederic Van Quickenborne reports (1/24/05) that ticket
 	// adjustments in FromDevice+ToDevice cause odd behavior.  The ticket
 	// adjustments actually don't feel necessary to me in From/ToDevice
diff --git a/elements/linuxmodule/todevice.hh b/elements/linuxmodule/todevice.hh
index 01aa264..6eaf305 100644
--- a/elements/linuxmodule/todevice.hh
+++ b/elements/linuxmodule/todevice.hh
@@ -115,7 +115,7 @@ class ToDevice : public AnyTaskDevice { public:
   bool run_task(Task *);
 
   void reset_counts();
-  inline void tx_wake_queue(net_device *);
+  void tx_wake_queue(net_device *);
   bool tx_intr();
   void change_device(net_device *);
 
---

---
Index: include/linux/netdevice.h
===================================================================
--- include/linux/netdevice.h
+++ include/linux/netdevice.h
@@ -528,9 +528,12 @@
 	 * device supports polling but interrupts are on, and > 0 if polling
 	 * is on.
 	 */
+#define CLICK_POLL_INTERRUPT_RX 0x01
+#define CLICK_POLL_INTERRUPT_TX 0x02
 	int			polling;
-	int			(*poll_on)(struct net_device *);
+	int			(*poll_on)(struct net_device *, int (*poll_intr)(struct net_device *, void *));
 	int			(*poll_off)(struct net_device *);
+	void		(*poll_irq_enable)(struct net_device*);
 	/*
 	 * rx_poll returns to caller a linked list of sk_buff objects received
 	 * by the device. on call, the want argument specifies the number of
---



More information about the click mailing list