[Click] [RFC] [PATCH] PollDevice/ToDevice improvement

Joonwoo Park joonwpark81 at gmail.com
Wed Nov 14 20:11:32 EST 2007


Hi Beyers!

I'm attaching as a file.
I'll looking forward your result :)

Thanks.

joonwpark.tistory.com
Joonwoo


2007/11/15, Beyers Cronje <bcronje at gmail.com>:
> Hi Joonwoo,
>
> I'm having problems applying your patch. Can you send me the patch as
> an attachment offlist please?
>
> Cheers
>
> Beyers
>
> On Nov 14, 2007 9:07 AM, Joonwoo Park <joonwpark81 at gmail.com> wrote:
> > I think polling packet method of the click which is implemented with polldevice and todevice is one of great advantage of it.
> > But IMO it has some disadvantage too because click does old fashioned polling packets which is spinning infinitely.
> > Therefore I think that the spinning eats much of processor time and makes influence to other tasks.
> > Then I managed to interrupt driven polling packet method for click which is using NAPI.
> > It makes limited number of interrupts in a sec and notifications to polldevice and todevice.
> > Especially It makes polldevice to use rx descriptors of nic (e1000) too.
> > Actually because old polldevice does spinning infinitely, it does not give chance to nic for queuing.
> >
> > In simple elements test (to maximizing performance for spinning method), my patch had slightly increased or almost same performance
> > comparing to spinning method and processor usage was greatly decreased. (about 1050000pps receive and 45% processor usage on Intel
> > core2 1.86 with e1000 82573L gigabit nic)
> >
> > * I don't have precise network benchmark system like a smartbit, so I'll be so much pleased someone measures the performance of this
> > patch.
> > * All test was performed on e1000 52547L, I'm afraid it won't work on the other e1000 nic.
> > * Please apply patch for linux after click it selves. (2.6.19.2)
> > * Patch contains e1000 link detection fix too
> > (partially https://pdos.csail.mit.edu/pipermail/click/2007-October/006447.html)
> > * It does not contain ToDevice multi-threading fix (https://pdos.csail.mit.edu/pipermail/click/2007-October/006436.html)
> >
> > Eddie, Can you check this please?
> >
> > Thanks in advance.
> >
> > joonwpark.tistory.com
> > Joonwoo
> >
> >
> > [PollDevice/ToDevice]: Interrupt driven polling packet method for click which is using NAPI.
> > It makes limited number of interrupts in a sec and notifications to polldevice and todevice.
> > Especially It makes polldevice to use rx descriptors of nic (e1000) too.
> > Actually because old polldevice does spinning infinitely, it does not give chance to nic for queuing.
> >
> > Signed-off-by: Joonwoo Park <joonwpark81 at gmail.com>
> >
> > ---
> >  drivers/e1000-7.x/src/e1000.h      |    1 +
> >  drivers/e1000-7.x/src/e1000_main.c |   80 +++++++++++++++++++++++++++++++-----
> >  elements/linuxmodule/polldevice.cc |   74 +++++++++++++++++++++++++++++----
> >  elements/linuxmodule/polldevice.hh |    5 ++
> >  elements/linuxmodule/todevice.cc   |    7 ++-
> >  elements/linuxmodule/todevice.hh   |    2 +-
> >  6 files changed, 146 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/e1000-7.x/src/e1000.h b/drivers/e1000-7.x/src/e1000.h
> > index 0b28ddd..28a7d94 100644
> > --- a/drivers/e1000-7.x/src/e1000.h
> > +++ b/drivers/e1000-7.x/src/e1000.h
> > @@ -392,6 +392,7 @@ struct e1000_adapter {
> >          unsigned long rx_quiet_jiffies;      /* jiffies timeout for the QUIET state */
> >          int prev_rdfh;             /* prev value of Rcv Data Fifo Head register */
> >          int prev_rdft;             /* prev value of Rcv Data Fifo Tail register */
> > +       int (*poll_intr)(struct net_device *, void *);
> >  };
> >
> >  enum e1000_state_t {
> > diff --git a/drivers/e1000-7.x/src/e1000_main.c b/drivers/e1000-7.x/src/e1000_main.c
> > index 2a3c5f0..374ee10 100644
> > --- a/drivers/e1000-7.x/src/e1000_main.c
> > +++ b/drivers/e1000-7.x/src/e1000_main.c
> > @@ -237,7 +237,7 @@ static int e1000_rx_refill(struct net_device *dev, struct sk_buff **);
> >  static int e1000_tx_eob(struct net_device *dev);
> >  static struct sk_buff *e1000_tx_clean(struct net_device *dev);
> >  static struct sk_buff *e1000_rx_poll(struct net_device *dev, int *want);
> > -static int e1000_poll_on(struct net_device *dev);
> > +static int e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct net_device *, void *));
> >  static int e1000_poll_off(struct net_device *dev);
> >
> >  #ifdef CONFIG_NET_POLL_CONTROLLER
> > @@ -438,6 +438,13 @@ e1000_irq_enable(struct e1000_adapter *adapter)
> >                 E1000_WRITE_FLUSH(&adapter->hw);
> >         }
> >  }
> > +
> > +static void
> > +e1000_poll_irq_enable(struct net_device *netdev)
> > +{
> > +       struct e1000_adapter *adapter = (struct e1000_adapter*)netdev_priv(netdev);
> > +       e1000_irq_enable(adapter);
> > +}
> >  #ifdef NETIF_F_HW_VLAN_TX
> >
> >  static void
> > @@ -745,6 +752,12 @@ e1000_reinit_locked(struct e1000_adapter *adapter)
> >         e1000_down(adapter);
> >         e1000_up(adapter);
> >         clear_bit(__E1000_RESETTING, &adapter->flags);
> > +
> > +       if (adapter->netdev->polling) {
> > +               mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
> > +               mod_timer(&adapter->watchdog_timer, jiffies + 1);
> > +               mod_timer(&adapter->phy_info_timer, jiffies + 1);
> > +       }
> >  }
> >
> >  void
> > @@ -1045,6 +1058,8 @@ e1000_probe(struct pci_dev *pdev,
> >          netdev->tx_clean = e1000_tx_clean;
> >          netdev->poll_off = e1000_poll_off;
> >          netdev->poll_on = e1000_poll_on;
> > +        netdev->poll_irq_enable = e1000_poll_irq_enable;
> > +        adapter->poll_intr = NULL;
> >
> >  #ifdef CONFIG_NET_POLL_CONTROLLER
> >         netdev->poll_controller = e1000_netpoll;
> > @@ -2684,8 +2699,8 @@ e1000_82547_tx_fifo_stall(unsigned long data)
> >  static void
> >  e1000_watchdog(unsigned long data)
> >  {
> > -       struct e1000_adapter *adapter = (struct e1000_adapter *) data;
> > -  if(adapter->netdev->polling){
> > +  struct e1000_adapter *adapter = (struct e1000_adapter *) data;
> > +  if(adapter->netdev->polling == 2){
> >      adapter->do_poll_watchdog = 1;
> >    } else {
> >      e1000_watchdog_1(adapter);
> > @@ -4055,7 +4070,21 @@ e1000_intr(int irq, void *data)
> >                 E1000_WRITE_REG(hw, IMC, ~0);
> >                 E1000_WRITE_FLUSH(hw);
> >         }
> > -       if (likely(netif_rx_schedule_prep(netdev))) {
> > +       if (netdev->polling == 3) {
> > +               if (likely(adapter->poll_intr)) {
> > +                       uint32_t flags = 0;
> > +                       if (icr & E1000_ICR_TXDW)
> > +                               flags |= CLICK_POLL_INTERRUPT_TX;
> > +                       if (icr & E1000_ICR_RXT0)
> > +                               flags |= CLICK_POLL_INTERRUPT_RX;
> > +                       if (likely(flags))
> > +                               adapter->poll_intr(netdev, (void*)flags);
> > +                       else
> > +                               e1000_irq_enable(adapter);
> > +               }
> > +               else
> > +                       e1000_irq_enable(adapter);
> > +       } else if (likely(netif_rx_schedule_prep(netdev))) {
> >                 adapter->total_tx_bytes = 0;
> >                 adapter->total_tx_packets = 0;
> >                 adapter->total_rx_bytes = 0;
> > @@ -5700,7 +5729,7 @@ e1000_rx_refill(struct net_device *dev, struct sk_buff **skbs)
> >    struct pci_dev *pdev = adapter->pdev;
> >    struct e1000_rx_desc *rx_desc;
> >    struct sk_buff *skb;
> > -  int next;
> > +  int next = -1;
> >
> >    /*
> >     * Update statistics counters, check link.
> > @@ -5743,14 +5772,16 @@ e1000_rx_refill(struct net_device *dev, struct sk_buff **skbs)
> >
> >      rx_desc = E1000_RX_DESC(*rx_ring, i);
> >      rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma);
> > -
> > +  }
> > +
> > +  if (next > -1) {
> >      /* Intel documnetation says: "Software adds receive descriptors by
> >       * writing the tail pointer with the index of the entry beyond the
> >       * last valid descriptor." (ref 11337 p 27) */
> >
> >      E1000_WRITE_REG(&adapter->hw, RDT, next);
> >    }
> > -
> > +
> >    return E1000_DESC_UNUSED(adapter->rx_ring);
> >  }
> >
> > @@ -5780,6 +5811,7 @@ e1000_tx_pqueue(struct net_device *netdev, struct sk_buff *skb)
> >    if(E1000_DESC_UNUSED(adapter->tx_ring) <= (txd_needed + 1)) {
> >      adapter->net_stats.tx_dropped++;
> >      netif_stop_queue(netdev);
> > +    mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1);
> >      return -1;
> >    }
> >
> > @@ -5883,7 +5915,7 @@ e1000_tx_clean(struct net_device *netdev)
> >  }
> >
> >  static int
> > -e1000_poll_on(struct net_device *dev)
> > +e1000_poll_on(struct net_device *dev, int (*poll_intr)(struct net_device *, void *))
> >  {
> >    struct e1000_adapter *adapter = dev->priv;
> >    unsigned long flags;
> > @@ -5891,11 +5923,16 @@ e1000_poll_on(struct net_device *dev)
> >    if (!dev->polling) {
> >      printk("e1000_poll_on\n");
> >      local_irq_save(flags);
> > -    local_irq_disable();
> > +
> > +    set_bit(__E1000_DOWN, &adapter->flags);
> > +
> >      e1000_irq_disable(adapter);
> >
> >      /* reset the card - start in a clean state */
> >
> > +#ifdef CONFIG_E1000_NAPI
> > +    netif_poll_disable(dev);
> > +#endif
> >      /* taken from e1000_down() */
> >      e1000_reset(adapter);
> >      e1000_clean_tx_ring(adapter, adapter->tx_ring);
> > @@ -5907,8 +5944,21 @@ e1000_poll_on(struct net_device *dev)
> >      e1000_configure_rx(adapter);
> >      e1000_alloc_rx_buffers(adapter, adapter->rx_ring,
> >                            E1000_DESC_UNUSED(adapter->rx_ring));
> > +#ifdef CONFIG_E1000_NAPI
> > +    netif_poll_enable(dev);
> > +#endif
> > +
> > +    if (poll_intr) {
> > +      dev->polling = 3;
> > +      e1000_irq_enable(adapter);
> > +    } else
> > +      dev->polling = 2;
> > +
> > +    adapter->poll_intr = poll_intr;
> > +    wmb();
> > +
> > +    clear_bit(__E1000_DOWN, &adapter->flags);
> >
> > -    dev->polling = 2;
> >      local_irq_restore(flags);
> >    }
> >
> > @@ -5921,8 +5971,16 @@ e1000_poll_off(struct net_device *dev)
> >    struct e1000_adapter *adapter = dev->priv;
> >
> >    if(dev->polling > 0){
> > +    unsigned long flags;
> > +    local_irq_save(flags);
> > +
> >      dev->polling = 0;
> > -    e1000_irq_enable(adapter);
> > +    if (!adapter->poll_intr)
> > +       e1000_irq_enable(adapter);
> > +    adapter->poll_intr = 0;
> > +
> > +    local_irq_restore(flags);
> > +
> >      printk("e1000_poll_off\n");
> >    }
> >
> > diff --git a/elements/linuxmodule/polldevice.cc b/elements/linuxmodule/polldevice.cc
> > index 7b84712..f22888f 100644
> > --- a/elements/linuxmodule/polldevice.cc
> > +++ b/elements/linuxmodule/polldevice.cc
> > @@ -46,6 +46,35 @@ extern "C" {
> >  static int device_notifier_hook(struct notifier_block *nb, unsigned long val, void *v);
> >  }
> >
> > +extern AnyDeviceMap to_device_map;
> > +
> > +int
> > +PollDevice::poll_intr(net_device* dev, void *v)
> > +{
> > +    uint32_t flags = (uint32_t)v;
> > +    if (flags & CLICK_POLL_INTERRUPT_TX) {
> > +       to_device_map.lock(false);
> > +       ToDevice *td = (ToDevice *)to_device_map.lookup(dev, 0);
> > +       if (td) {
> > +           td->tx_wake_queue(dev);
> > +       }
> > +       to_device_map.unlock(false);
> > +    }
> > +
> > +    if (flags & CLICK_POLL_INTERRUPT_RX) {
> > +       poll_device_map.lock(false);
> > +       PollDevice *pd = (PollDevice *)poll_device_map.lookup(dev, 0);
> > +       if (pd) {
> > +           pd->_intr = true;
> > +           pd->_task.reschedule();
> > +       }
> > +       poll_device_map.unlock(false);
> > +    } else
> > +       dev->poll_irq_enable(dev);
> > +
> > +    return 0;
> > +}
> > +
> >  void
> >  PollDevice::static_initialize()
> >  {
> > @@ -64,6 +93,7 @@ PollDevice::static_cleanup()
> >
> >  PollDevice::PollDevice()
> >  {
> > +    _intr = false;
> >  }
> >
> >  PollDevice::~PollDevice()
> > @@ -74,22 +104,32 @@ PollDevice::~PollDevice()
> >  int
> >  PollDevice::configure(Vector<String> &conf, ErrorHandler *errh)
> >  {
> > -    _burst = 8;
> >      _headroom = 64;
> > +    unsigned burst = 0;
> >      bool promisc = false, allow_nonexistent = false, quiet = false, timestamp = true;
> >      if (cp_va_kparse(conf, this, errh,
> >                      "DEVNAME", cpkP+cpkM, cpString, &_devname,
> >                      "PROMISC", cpkP, cpBool, &promisc,
> > -                    "BURST", cpkP, cpUnsigned, &_burst,
> > +                    "BURST", cpkP, cpUnsigned, &burst,
> >                      "TIMESTAMP", 0, cpBool, &timestamp,
> >                      "QUIET", 0, cpBool, &quiet,
> >                      "ALLOW_NONEXISTENT", 0, cpBool, &allow_nonexistent,
> >                      "HEADROOM", 0, cpUnsigned, &_headroom,
> > +                    "INTERRUPT", 0, cpBool, &_useintr,
> >                      cpEnd) < 0)
> >         return -1;
> >      set_device_flags(promisc, timestamp, allow_nonexistent, quiet);
> >
> >  #if HAVE_LINUX_POLLING
> > +    if (burst) {
> > +       _burst = burst;
> > +       if (_useintr && _burst < 32)
> > +           errh->warning("%s too small burst size for interrupting", _devname.c_str());
> > +    } else {
> > +       /** by default, e1000-napi makes up to 8000 interrupts in a sec.
> > +        * for more performance, burst should be large enough */
> > +       _burst = _useintr ? 64 : 8;
> > +    }
> >      if (find_device(&poll_device_map, errh) < 0)
> >         return -1;
> >      if (_dev && (!_dev->poll_on || _dev->polling < 0))
> > @@ -124,8 +164,8 @@ you include a ToDevice for the same device. Try adding\n\
> >
> >      if (_dev && !_dev->polling) {
> >         /* turn off interrupt if interrupts weren't already off */
> > -       _dev->poll_on(_dev);
> > -       if (_dev->polling != 2)
> > +       _dev->poll_on(_dev, _useintr ? poll_intr : NULL);
> > +       if (_dev->polling != 2 && _dev->polling != 3)
> >             return errh->error("PollDevice detected wrong version of polling patch");
> >      }
> >
> > @@ -182,8 +222,15 @@ PollDevice::cleanup(CleanupStage)
> >      clear_device(&poll_device_map);
> >
> >      poll_device_map.lock(false);
> > -    if (had_dev && had_dev->polling > 0 && !poll_device_map.lookup(had_dev, 0))
> > +    if (had_dev && had_dev->polling > 0
> > +      && !poll_device_map.lookup(had_dev, 0)) {
> >         had_dev->poll_off(had_dev);
> > +       if (_useintr && _intr && had_dev->poll_irq_enable) {
> > +          _intr = false;
> > +          had_dev->poll_irq_enable(had_dev);
> > +       }
> > +    }
> > +
> >      poll_device_map.unlock(false);
> >  #endif
> >  }
> > @@ -249,6 +296,7 @@ PollDevice::run_task(Task *)
> >             _buffers_reused++;
> >         skbmgr_recycle_skbs(new_skbs);
> >      }
> > +
> >    }
> >
> >    for (int i = 0; i < got; i++) {
> > @@ -300,7 +348,12 @@ PollDevice::run_task(Task *)
> >  # endif
> >
> >    adjust_tickets(got);
> > -  _task.fast_reschedule();
> > +  if (!_useintr)
> > +    _task.fast_reschedule();
> > +  else if (_intr && _dev && _dev->poll_irq_enable) {
> > +    _intr = false;
> > +    _dev->poll_irq_enable(_dev);
> > +  }
> >    return got > 0;
> >  #else
> >    return false;
> > @@ -321,13 +374,18 @@ PollDevice::change_device(net_device *dev)
> >         dev = 0;
> >      }
> >
> > -    if (_dev)
> > +    if (_dev) {
> >         _dev->poll_off(_dev);
> > +       if (_useintr && _intr && _dev->poll_irq_enable) {
> > +          _intr = false;
> > +          _dev->poll_irq_enable(_dev);
> > +       }
> > +    }
> >
> >      set_device(dev, &poll_device_map, true);
> >
> >      if (_dev && !_dev->polling)
> > -       _dev->poll_on(_dev);
> > +       _dev->poll_on(_dev, _useintr ? poll_intr : 0);
> >
> >      if (_dev)
> >         _task.strong_reschedule();
> > diff --git a/elements/linuxmodule/polldevice.hh b/elements/linuxmodule/polldevice.hh
> > index 0cc5617..8b749f4 100644
> > --- a/elements/linuxmodule/polldevice.hh
> > +++ b/elements/linuxmodule/polldevice.hh
> > @@ -82,6 +82,7 @@ Resets C<count> counter to zero when written.
> >  =a FromDevice, ToDevice, FromHost, ToHost */
> >
> >  #include "elements/linuxmodule/anydevice.hh"
> > +#include <click/atomic.hh>
> >
> >  class PollDevice : public AnyTaskDevice { public:
> >
> > @@ -90,6 +91,7 @@ class PollDevice : public AnyTaskDevice { public:
> >
> >    static void static_initialize();
> >    static void static_cleanup();
> > +  static int poll_intr(net_device* dev, void *v);
> >
> >    const char *class_name() const       { return "PollDevice"; }
> >    const char *port_count() const       { return PORTS_0_1; }
> > @@ -137,6 +139,9 @@ class PollDevice : public AnyTaskDevice { public:
> >
> >      unsigned _burst;
> >      unsigned _headroom;
> > +    bool _useintr;
> > +
> > +    volatile bool _intr;
> >
> >  };
> >
> > diff --git a/elements/linuxmodule/todevice.cc b/elements/linuxmodule/todevice.cc
> > index 1c774d0..5ab459f 100644
> > --- a/elements/linuxmodule/todevice.cc
> > +++ b/elements/linuxmodule/todevice.cc
> > @@ -41,7 +41,7 @@ CLICK_CXX_UNPROTECT
> >  #include <click/cxxunprotect.h>
> >
> >  /* for watching when devices go offline */
> > -static AnyDeviceMap to_device_map;
> > +AnyDeviceMap to_device_map;
> >  static struct notifier_block device_notifier;
> >  extern "C" {
> >  static int device_notifier_hook(struct notifier_block *nb, unsigned long val, void *v);
> > @@ -78,7 +78,7 @@ ToDevice::static_cleanup()
> >  #endif
> >  }
> >
> > -inline void
> > +void
> >  ToDevice::tx_wake_queue(net_device *dev)
> >  {
> >      //click_chatter("%{element}::%s for dev %s\n", this, __func__, dev->name);
> > @@ -364,7 +364,8 @@ ToDevice::run_task(Task *)
> >
> >  #if HAVE_LINUX_POLLING
> >      if (is_polling) {
> > -       reschedule = true;
> > +       if (_dev->polling == 2)
> > +           reschedule = true;
> >         // 9/18/06: Frederic Van Quickenborne reports (1/24/05) that ticket
> >         // adjustments in FromDevice+ToDevice cause odd behavior.  The ticket
> >         // adjustments actually don't feel necessary to me in From/ToDevice
> > diff --git a/elements/linuxmodule/todevice.hh b/elements/linuxmodule/todevice.hh
> > index 01aa264..6eaf305 100644
> > --- a/elements/linuxmodule/todevice.hh
> > +++ b/elements/linuxmodule/todevice.hh
> > @@ -115,7 +115,7 @@ class ToDevice : public AnyTaskDevice { public:
> >    bool run_task(Task *);
> >
> >    void reset_counts();
> > -  inline void tx_wake_queue(net_device *);
> > +  void tx_wake_queue(net_device *);
> >    bool tx_intr();
> >    void change_device(net_device *);
> >
> > ---
> >
> > ---
> > Index: include/linux/netdevice.h
> > ===================================================================
> > --- include/linux/netdevice.h
> > +++ include/linux/netdevice.h
> > @@ -528,9 +528,12 @@
> >          * device supports polling but interrupts are on, and > 0 if polling
> >          * is on.
> >          */
> > +#define CLICK_POLL_INTERRUPT_RX 0x01
> > +#define CLICK_POLL_INTERRUPT_TX 0x02
> >         int                     polling;
> > -       int                     (*poll_on)(struct net_device *);
> > +       int                     (*poll_on)(struct net_device *, int (*poll_intr)(struct net_device *, void *));
> >         int                     (*poll_off)(struct net_device *);
> > +       void            (*poll_irq_enable)(struct net_device*);
> >         /*
> >          * rx_poll returns to caller a linked list of sk_buff objects received
> >          * by the device. on call, the want argument specifies the number of
> > ---
> >
> > _______________________________________________
> > click mailing list
> > click at amsterdam.lcs.mit.edu
> > https://amsterdam.lcs.mit.edu/mailman/listinfo/click
> >
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: polldevice_napi_head.patch
Type: application/octet-stream
Size: 13901 bytes
Desc: not available
Url : https://pdos.csail.mit.edu/pipermail/click/attachments/20071114/b5d4e943/polldevice_napi_head-0001.obj
-------------- next part --------------
A non-text attachment was scrubbed...
Name: linux_netdevice.h.patch
Type: application/octet-stream
Size: 731 bytes
Desc: not available
Url : https://pdos.csail.mit.edu/pipermail/click/attachments/20071114/b5d4e943/linux_netdevice.h-0001.obj


More information about the click mailing list