X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fnet%2Fiseries_veth.c;h=ff015e15f5d1af8ec9e2c11b696ddc20511a76ce;hb=5f8ae5c537d937bab9cfeb83a30a9b670c3cfb35;hp=855f8b2cf13b6f83d1b41011253d514b22e5cc2c;hpb=1da177e4c3f41524e886b7f1b8a0c1fc7321cac2;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index 855f8b2..ff015e1 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -4,6 +4,7 @@ * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp. * Substantially cleaned up by: * Copyright (C) 2003 David Gibson , IBM Corporation. + * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -55,9 +56,7 @@ * number of packets outstanding to a remote partition at a time. */ -#include #include -#include #include #include #include @@ -69,22 +68,67 @@ #include #include #include -#include -#include -#include +#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include "iseries_veth.h" +#undef DEBUG MODULE_AUTHOR("Kyle Lucke "); MODULE_DESCRIPTION("iSeries Virtual ethernet driver"); MODULE_LICENSE("GPL"); +#define VETH_EVENT_CAP (0) +#define VETH_EVENT_FRAMES (1) +#define VETH_EVENT_MONITOR (2) +#define VETH_EVENT_FRAMES_ACK (3) + +#define VETH_MAX_ACKS_PER_MSG (20) +#define VETH_MAX_FRAMES_PER_MSG (6) + +struct veth_frames_data { + u32 addr[VETH_MAX_FRAMES_PER_MSG]; + u16 len[VETH_MAX_FRAMES_PER_MSG]; + u32 eofmask; +}; +#define VETH_EOF_SHIFT (32-VETH_MAX_FRAMES_PER_MSG) + +struct veth_frames_ack_data { + u16 token[VETH_MAX_ACKS_PER_MSG]; +}; + +struct veth_cap_data { + u8 caps_version; + u8 rsvd1; + u16 num_buffers; + u16 ack_threshold; + u16 rsvd2; + u32 ack_timeout; + u32 rsvd3; + u64 rsvd4[3]; +}; + +struct veth_lpevent { + struct HvLpEvent base_event; + union { + struct veth_cap_data caps_data; + struct veth_frames_data frames_data; + struct veth_frames_ack_data frames_ack_data; + } u; + +}; + +#define DRV_NAME "iseries_veth" +#define DRV_VERSION "2.0" + #define VETH_NUMBUFFERS (120) #define VETH_ACKTIMEOUT (1000000) /* microseconds */ #define VETH_MAX_MCAST (12) @@ -113,53 +157,59 @@ MODULE_LICENSE("GPL"); struct veth_msg { struct veth_msg *next; - struct VethFramesData data; + struct veth_frames_data data; int token; - unsigned long in_use; + int in_use; struct sk_buff *skb; struct device *dev; }; struct veth_lpar_connection { HvLpIndex remote_lp; - struct work_struct statemachine_wq; + struct delayed_work statemachine_wq; struct veth_msg *msgs; int num_events; - struct VethCapData local_caps; + struct veth_cap_data local_caps; + struct kobject kobject; struct timer_list ack_timer; + struct timer_list reset_timer; + unsigned int reset_timeout; + unsigned long last_contact; + int outstanding_tx; + spinlock_t lock; unsigned long state; HvLpInstanceId src_inst; HvLpInstanceId dst_inst; - struct VethLpEvent cap_event, cap_ack_event; + struct veth_lpevent cap_event, cap_ack_event; u16 pending_acks[VETH_MAX_ACKS_PER_MSG]; u32 num_pending_acks; int num_ack_events; - struct VethCapData remote_caps; + struct veth_cap_data remote_caps; u32 ack_timeout; - spinlock_t msg_stack_lock; struct veth_msg *msg_stack_head; }; struct veth_port { struct device *dev; - struct net_device_stats stats; u64 mac_addr; HvLpIndexMap lpar_map; - spinlock_t pending_gate; - struct sk_buff *pending_skb; - HvLpIndexMap pending_lpmask; + /* queue_lock protects the stopped_map and dev's queue. */ + spinlock_t queue_lock; + HvLpIndexMap stopped_map; + /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */ rwlock_t mcast_gate; int promiscuous; - int all_mcast; int num_mcast; u64 mcast_addr[VETH_MAX_MCAST]; + + struct kobject kobject; }; static HvLpIndex this_lp; @@ -168,44 +218,56 @@ static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */ static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev); static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *); -static void veth_flush_pending(struct veth_lpar_connection *cnx); -static void veth_receive(struct veth_lpar_connection *, struct VethLpEvent *); -static void veth_timed_ack(unsigned long connectionPtr); +static void veth_wake_queues(struct veth_lpar_connection *cnx); +static void veth_stop_queues(struct veth_lpar_connection *cnx); +static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *); +static void veth_release_connection(struct kobject *kobject); +static void veth_timed_ack(unsigned long ptr); +static void veth_timed_reset(unsigned long ptr); /* * Utility functions */ -#define veth_printk(prio, fmt, args...) \ - printk(prio "%s: " fmt, __FILE__, ## args) +#define veth_info(fmt, args...) \ + printk(KERN_INFO DRV_NAME ": " fmt, ## args) #define veth_error(fmt, args...) \ - printk(KERN_ERR "(%s:%3.3d) ERROR: " fmt, __FILE__, __LINE__ , ## args) + printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args) +#ifdef DEBUG +#define veth_debug(fmt, args...) \ + printk(KERN_DEBUG DRV_NAME ": " fmt, ## args) +#else +#define veth_debug(fmt, args...) do {} while (0) +#endif + +/* You must hold the connection's lock when you call this function. */ static inline void veth_stack_push(struct veth_lpar_connection *cnx, struct veth_msg *msg) { - unsigned long flags; - - spin_lock_irqsave(&cnx->msg_stack_lock, flags); msg->next = cnx->msg_stack_head; cnx->msg_stack_head = msg; - spin_unlock_irqrestore(&cnx->msg_stack_lock, flags); } +/* You must hold the connection's lock when you call this function. */ static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx) { - unsigned long flags; struct veth_msg *msg; - spin_lock_irqsave(&cnx->msg_stack_lock, flags); msg = cnx->msg_stack_head; if (msg) cnx->msg_stack_head = cnx->msg_stack_head->next; - spin_unlock_irqrestore(&cnx->msg_stack_lock, flags); + return msg; } +/* You must hold the connection's lock when you call this function. */ +static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx) +{ + return cnx->msg_stack_head == NULL; +} + static inline HvLpEvent_Rc veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype, HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype, @@ -246,10 +308,11 @@ static void veth_complete_allocation(void *parm, int number) static int veth_allocate_events(HvLpIndex rlp, int number) { - struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 }; + struct veth_allocation vc = + { COMPLETION_INITIALIZER_ONSTACK(vc.c), 0 }; mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan, - sizeof(struct VethLpEvent), number, + sizeof(struct veth_lpevent), number, &veth_complete_allocation, &vc); wait_for_completion(&vc.c); @@ -257,16 +320,147 @@ static int veth_allocate_events(HvLpIndex rlp, int number) } /* + * sysfs support + */ + +struct veth_cnx_attribute { + struct attribute attr; + ssize_t (*show)(struct veth_lpar_connection *, char *buf); + ssize_t (*store)(struct veth_lpar_connection *, const char *buf); +}; + +static ssize_t veth_cnx_attribute_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct veth_cnx_attribute *cnx_attr; + struct veth_lpar_connection *cnx; + + cnx_attr = container_of(attr, struct veth_cnx_attribute, attr); + cnx = container_of(kobj, struct veth_lpar_connection, kobject); + + if (!cnx_attr->show) + return -EIO; + + return cnx_attr->show(cnx, buf); +} + +#define CUSTOM_CNX_ATTR(_name, _format, _expression) \ +static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\ +{ \ + return sprintf(buf, _format, _expression); \ +} \ +struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name) + +#define SIMPLE_CNX_ATTR(_name) \ + CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name) + +SIMPLE_CNX_ATTR(outstanding_tx); +SIMPLE_CNX_ATTR(remote_lp); +SIMPLE_CNX_ATTR(num_events); +SIMPLE_CNX_ATTR(src_inst); +SIMPLE_CNX_ATTR(dst_inst); +SIMPLE_CNX_ATTR(num_pending_acks); +SIMPLE_CNX_ATTR(num_ack_events); +CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout)); +CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout)); +CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state); +CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ? + jiffies_to_msecs(jiffies - cnx->last_contact) : 0); + +#define GET_CNX_ATTR(_name) (&veth_cnx_attr_##_name.attr) + +static struct attribute *veth_cnx_default_attrs[] = { + GET_CNX_ATTR(outstanding_tx), + GET_CNX_ATTR(remote_lp), + GET_CNX_ATTR(num_events), + GET_CNX_ATTR(reset_timeout), + GET_CNX_ATTR(last_contact), + GET_CNX_ATTR(state), + GET_CNX_ATTR(src_inst), + GET_CNX_ATTR(dst_inst), + GET_CNX_ATTR(num_pending_acks), + GET_CNX_ATTR(num_ack_events), + GET_CNX_ATTR(ack_timeout), + NULL +}; + +static struct sysfs_ops veth_cnx_sysfs_ops = { + .show = veth_cnx_attribute_show +}; + +static struct kobj_type veth_lpar_connection_ktype = { + .release = veth_release_connection, + .sysfs_ops = &veth_cnx_sysfs_ops, + .default_attrs = veth_cnx_default_attrs +}; + +struct veth_port_attribute { + struct attribute attr; + ssize_t (*show)(struct veth_port *, char *buf); + ssize_t (*store)(struct veth_port *, const char *buf); +}; + +static ssize_t veth_port_attribute_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct veth_port_attribute *port_attr; + struct veth_port *port; + + port_attr = container_of(attr, struct veth_port_attribute, attr); + port = container_of(kobj, struct veth_port, kobject); + + if (!port_attr->show) + return -EIO; + + return port_attr->show(port, buf); +} + +#define CUSTOM_PORT_ATTR(_name, _format, _expression) \ +static ssize_t _name##_show(struct veth_port *port, char *buf) \ +{ \ + return sprintf(buf, _format, _expression); \ +} \ +struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name) + +#define SIMPLE_PORT_ATTR(_name) \ + CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name) + +SIMPLE_PORT_ATTR(promiscuous); +SIMPLE_PORT_ATTR(num_mcast); +CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map); +CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map); +CUSTOM_PORT_ATTR(mac_addr, "0x%llX\n", port->mac_addr); + +#define GET_PORT_ATTR(_name) (&veth_port_attr_##_name.attr) +static struct attribute *veth_port_default_attrs[] = { + GET_PORT_ATTR(mac_addr), + GET_PORT_ATTR(lpar_map), + GET_PORT_ATTR(stopped_map), + GET_PORT_ATTR(promiscuous), + GET_PORT_ATTR(num_mcast), + NULL +}; + +static struct sysfs_ops veth_port_sysfs_ops = { + .show = veth_port_attribute_show +}; + +static struct kobj_type veth_port_ktype = { + .sysfs_ops = &veth_port_sysfs_ops, + .default_attrs = veth_port_default_attrs +}; + +/* * LPAR connection code */ static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx) { - schedule_work(&cnx->statemachine_wq); + schedule_delayed_work(&cnx->statemachine_wq, 0); } static void veth_take_cap(struct veth_lpar_connection *cnx, - struct VethLpEvent *event) + struct veth_lpevent *event) { unsigned long flags; @@ -278,7 +472,7 @@ static void veth_take_cap(struct veth_lpar_connection *cnx, HvLpEvent_Type_VirtualLan); if (cnx->state & VETH_STATE_GOTCAPS) { - veth_error("Received a second capabilities from lpar %d\n", + veth_error("Received a second capabilities from LPAR %d.\n", cnx->remote_lp); event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable; HvCallEvent_ackLpEvent((struct HvLpEvent *) event); @@ -291,17 +485,17 @@ static void veth_take_cap(struct veth_lpar_connection *cnx, } static void veth_take_cap_ack(struct veth_lpar_connection *cnx, - struct VethLpEvent *event) + struct veth_lpevent *event) { unsigned long flags; spin_lock_irqsave(&cnx->lock, flags); if (cnx->state & VETH_STATE_GOTCAPACK) { - veth_error("Received a second capabilities ack from lpar %d\n", + veth_error("Received a second capabilities ack from LPAR %d.\n", cnx->remote_lp); } else { memcpy(&cnx->cap_ack_event, event, - sizeof(&cnx->cap_ack_event)); + sizeof(cnx->cap_ack_event)); cnx->state |= VETH_STATE_GOTCAPACK; veth_kick_statemachine(cnx); } @@ -309,19 +503,24 @@ static void veth_take_cap_ack(struct veth_lpar_connection *cnx, } static void veth_take_monitor_ack(struct veth_lpar_connection *cnx, - struct VethLpEvent *event) + struct veth_lpevent *event) { unsigned long flags; spin_lock_irqsave(&cnx->lock, flags); - veth_printk(KERN_DEBUG, "Monitor ack returned for lpar %d\n", - cnx->remote_lp); - cnx->state |= VETH_STATE_RESET; - veth_kick_statemachine(cnx); + veth_debug("cnx %d: lost connection.\n", cnx->remote_lp); + + /* Avoid kicking the statemachine once we're shutdown. + * It's unnecessary and it could break veth_stop_connection(). */ + + if (! (cnx->state & VETH_STATE_SHUTDOWN)) { + cnx->state |= VETH_STATE_RESET; + veth_kick_statemachine(cnx); + } spin_unlock_irqrestore(&cnx->lock, flags); } -static void veth_handle_ack(struct VethLpEvent *event) +static void veth_handle_ack(struct veth_lpevent *event) { HvLpIndex rlp = event->base_event.xTargetLp; struct veth_lpar_connection *cnx = veth_cnx[rlp]; @@ -329,79 +528,88 @@ static void veth_handle_ack(struct VethLpEvent *event) BUG_ON(! cnx); switch (event->base_event.xSubtype) { - case VethEventTypeCap: + case VETH_EVENT_CAP: veth_take_cap_ack(cnx, event); break; - case VethEventTypeMonitor: + case VETH_EVENT_MONITOR: veth_take_monitor_ack(cnx, event); break; default: - veth_error("Unknown ack type %d from lpar %d\n", - event->base_event.xSubtype, rlp); + veth_error("Unknown ack type %d from LPAR %d.\n", + event->base_event.xSubtype, rlp); }; } -static void veth_handle_int(struct VethLpEvent *event) +static void veth_handle_int(struct veth_lpevent *event) { HvLpIndex rlp = event->base_event.xSourceLp; struct veth_lpar_connection *cnx = veth_cnx[rlp]; unsigned long flags; - int i; + int i, acked = 0; BUG_ON(! cnx); switch (event->base_event.xSubtype) { - case VethEventTypeCap: + case VETH_EVENT_CAP: veth_take_cap(cnx, event); break; - case VethEventTypeMonitor: + case VETH_EVENT_MONITOR: /* do nothing... this'll hang out here til we're dead, * and the hypervisor will return it for us. */ break; - case VethEventTypeFramesAck: + case VETH_EVENT_FRAMES_ACK: spin_lock_irqsave(&cnx->lock, flags); + for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) { u16 msgnum = event->u.frames_ack_data.token[i]; - if (msgnum < VETH_NUMBUFFERS) + if (msgnum < VETH_NUMBUFFERS) { veth_recycle_msg(cnx, cnx->msgs + msgnum); + cnx->outstanding_tx--; + acked++; + } } + + if (acked > 0) { + cnx->last_contact = jiffies; + veth_wake_queues(cnx); + } + spin_unlock_irqrestore(&cnx->lock, flags); - veth_flush_pending(cnx); break; - case VethEventTypeFrames: + case VETH_EVENT_FRAMES: veth_receive(cnx, event); break; default: - veth_error("Unknown interrupt type %d from lpar %d\n", - event->base_event.xSubtype, rlp); + veth_error("Unknown interrupt type %d from LPAR %d.\n", + event->base_event.xSubtype, rlp); }; } -static void veth_handle_event(struct HvLpEvent *event, struct pt_regs *regs) +static void veth_handle_event(struct HvLpEvent *event) { - struct VethLpEvent *veth_event = (struct VethLpEvent *)event; + struct veth_lpevent *veth_event = (struct veth_lpevent *)event; - if (event->xFlags.xFunction == HvLpEvent_Function_Ack) + if (hvlpevent_is_ack(event)) veth_handle_ack(veth_event); - else if (event->xFlags.xFunction == HvLpEvent_Function_Int) + else veth_handle_int(veth_event); } static int veth_process_caps(struct veth_lpar_connection *cnx) { - struct VethCapData *remote_caps = &cnx->remote_caps; + struct veth_cap_data *remote_caps = &cnx->remote_caps; int num_acks_needed; /* Convert timer to jiffies */ cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000; - if ( (remote_caps->num_buffers == 0) - || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG) - || (remote_caps->ack_threshold == 0) - || (cnx->ack_timeout == 0) ) { - veth_error("Received incompatible capabilities from lpar %d\n", - cnx->remote_lp); + if ( (remote_caps->num_buffers == 0) || + (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG) || + (remote_caps->ack_threshold == 0) || + (cnx->ack_timeout == 0) ) { + veth_error("Received incompatible capabilities from LPAR %d.\n", + cnx->remote_lp); return HvLpEvent_Rc_InvalidSubtypeData; } @@ -418,8 +626,8 @@ static int veth_process_caps(struct veth_lpar_connection *cnx) cnx->num_ack_events += num; if (cnx->num_ack_events < num_acks_needed) { - veth_error("Couldn't allocate enough ack events for lpar %d\n", - cnx->remote_lp); + veth_error("Couldn't allocate enough ack events " + "for LPAR %d.\n", cnx->remote_lp); return HvLpEvent_Rc_BufferNotAvailable; } @@ -430,9 +638,11 @@ static int veth_process_caps(struct veth_lpar_connection *cnx) } /* FIXME: The gotos here are a bit dubious */ -static void veth_statemachine(void *p) +static void veth_statemachine(struct work_struct *work) { - struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)p; + struct veth_lpar_connection *cnx = + container_of(work, struct veth_lpar_connection, + statemachine_wq.work); int rlp = cnx->remote_lp; int rc; @@ -440,15 +650,15 @@ static void veth_statemachine(void *p) restart: if (cnx->state & VETH_STATE_RESET) { - int i; - - del_timer(&cnx->ack_timer); - if (cnx->state & VETH_STATE_OPEN) HvCallEvent_closeLpEventPath(cnx->remote_lp, HvLpEvent_Type_VirtualLan); - /* reset ack data */ + /* + * Reset ack data. This prevents the ack_timer actually + * doing anything, even if it runs one more time when + * we drop the lock below. + */ memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks)); cnx->num_pending_acks = 0; @@ -458,14 +668,32 @@ static void veth_statemachine(void *p) | VETH_STATE_SENTCAPACK | VETH_STATE_READY); /* Clean up any leftover messages */ - if (cnx->msgs) + if (cnx->msgs) { + int i; for (i = 0; i < VETH_NUMBUFFERS; ++i) veth_recycle_msg(cnx, cnx->msgs + i); + } + + cnx->outstanding_tx = 0; + veth_wake_queues(cnx); + + /* Drop the lock so we can do stuff that might sleep or + * take other locks. */ spin_unlock_irq(&cnx->lock); - veth_flush_pending(cnx); + + del_timer_sync(&cnx->ack_timer); + del_timer_sync(&cnx->reset_timer); + spin_lock_irq(&cnx->lock); + if (cnx->state & VETH_STATE_RESET) goto restart; + + /* Hack, wait for the other end to reset itself. */ + if (! (cnx->state & VETH_STATE_SHUTDOWN)) { + schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ); + goto out; + } } if (cnx->state & VETH_STATE_SHUTDOWN) @@ -486,9 +714,9 @@ static void veth_statemachine(void *p) cnx->state |= VETH_STATE_OPEN; } - if ( (cnx->state & VETH_STATE_OPEN) - && !(cnx->state & VETH_STATE_SENTMON) ) { - rc = veth_signalevent(cnx, VethEventTypeMonitor, + if ( (cnx->state & VETH_STATE_OPEN) && + !(cnx->state & VETH_STATE_SENTMON) ) { + rc = veth_signalevent(cnx, VETH_EVENT_MONITOR, HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_DeferredAck, 0, 0, 0, 0, 0, 0); @@ -496,11 +724,10 @@ static void veth_statemachine(void *p) if (rc == HvLpEvent_Rc_Good) { cnx->state |= VETH_STATE_SENTMON; } else { - if ( (rc != HvLpEvent_Rc_PartitionDead) - && (rc != HvLpEvent_Rc_PathClosed) ) - veth_error("Error sending monitor to " - "lpar %d, rc=%x\n", - rlp, (int) rc); + if ( (rc != HvLpEvent_Rc_PartitionDead) && + (rc != HvLpEvent_Rc_PathClosed) ) + veth_error("Error sending monitor to LPAR %d, " + "rc = %d\n", rlp, rc); /* Oh well, hope we get a cap from the other * end and do better when that kicks us */ @@ -508,11 +735,11 @@ static void veth_statemachine(void *p) } } - if ( (cnx->state & VETH_STATE_OPEN) - && !(cnx->state & VETH_STATE_SENTCAPS)) { + if ( (cnx->state & VETH_STATE_OPEN) && + !(cnx->state & VETH_STATE_SENTCAPS)) { u64 *rawcap = (u64 *)&cnx->local_caps; - rc = veth_signalevent(cnx, VethEventTypeCap, + rc = veth_signalevent(cnx, VETH_EVENT_CAP, HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, 0, rawcap[0], rawcap[1], rawcap[2], @@ -521,20 +748,20 @@ static void veth_statemachine(void *p) if (rc == HvLpEvent_Rc_Good) { cnx->state |= VETH_STATE_SENTCAPS; } else { - if ( (rc != HvLpEvent_Rc_PartitionDead) - && (rc != HvLpEvent_Rc_PathClosed) ) - veth_error("Error sending caps to " - "lpar %d, rc=%x\n", - rlp, (int) rc); + if ( (rc != HvLpEvent_Rc_PartitionDead) && + (rc != HvLpEvent_Rc_PathClosed) ) + veth_error("Error sending caps to LPAR %d, " + "rc = %d\n", rlp, rc); + /* Oh well, hope we get a cap from the other * end and do better when that kicks us */ goto out; } } - if ((cnx->state & VETH_STATE_GOTCAPS) - && !(cnx->state & VETH_STATE_SENTCAPACK)) { - struct VethCapData *remote_caps = &cnx->remote_caps; + if ((cnx->state & VETH_STATE_GOTCAPS) && + !(cnx->state & VETH_STATE_SENTCAPACK)) { + struct veth_cap_data *remote_caps = &cnx->remote_caps; memcpy(remote_caps, &cnx->cap_event.u.caps_data, sizeof(*remote_caps)); @@ -556,19 +783,17 @@ static void veth_statemachine(void *p) goto cant_cope; } - if ((cnx->state & VETH_STATE_GOTCAPACK) - && (cnx->state & VETH_STATE_GOTCAPS) - && !(cnx->state & VETH_STATE_READY)) { + if ((cnx->state & VETH_STATE_GOTCAPACK) && + (cnx->state & VETH_STATE_GOTCAPS) && + !(cnx->state & VETH_STATE_READY)) { if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) { /* Start the ACK timer */ cnx->ack_timer.expires = jiffies + cnx->ack_timeout; add_timer(&cnx->ack_timer); cnx->state |= VETH_STATE_READY; } else { - veth_printk(KERN_ERR, "Caps rejected (rc=%d) by " - "lpar %d\n", - cnx->cap_ack_event.base_event.xRc, - rlp); + veth_error("Caps rejected by LPAR %d, rc = %d\n", + rlp, cnx->cap_ack_event.base_event.xRc); goto cant_cope; } } @@ -581,8 +806,8 @@ static void veth_statemachine(void *p) /* FIXME: we get here if something happens we really can't * cope with. The link will never work once we get here, and * all we can do is not lock the rest of the system up */ - veth_error("Badness on connection to lpar %d (state=%04lx) " - " - shutting down\n", rlp, cnx->state); + veth_error("Unrecoverable error on connection to LPAR %d, shutting down" + " (state = 0x%04lx)\n", rlp, cnx->state); cnx->state |= VETH_STATE_SHUTDOWN; spin_unlock_irq(&cnx->lock); } @@ -593,34 +818,42 @@ static int veth_init_connection(u8 rlp) struct veth_msg *msgs; int i; - if ( (rlp == this_lp) - || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) ) + if ( (rlp == this_lp) || + ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) ) return 0; - cnx = kmalloc(sizeof(*cnx), GFP_KERNEL); + cnx = kzalloc(sizeof(*cnx), GFP_KERNEL); if (! cnx) return -ENOMEM; - memset(cnx, 0, sizeof(*cnx)); cnx->remote_lp = rlp; spin_lock_init(&cnx->lock); - INIT_WORK(&cnx->statemachine_wq, veth_statemachine, cnx); + INIT_DELAYED_WORK(&cnx->statemachine_wq, veth_statemachine); + init_timer(&cnx->ack_timer); cnx->ack_timer.function = veth_timed_ack; cnx->ack_timer.data = (unsigned long) cnx; + + init_timer(&cnx->reset_timer); + cnx->reset_timer.function = veth_timed_reset; + cnx->reset_timer.data = (unsigned long) cnx; + cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000); + memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks)); veth_cnx[rlp] = cnx; - msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL); + /* This gets us 1 reference, which is held on behalf of the driver + * infrastructure. It's released at module unload. */ + kobject_init(&cnx->kobject, &veth_lpar_connection_ktype); + + msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL); if (! msgs) { - veth_error("Can't allocate buffers for lpar %d\n", rlp); + veth_error("Can't allocate buffers for LPAR %d.\n", rlp); return -ENOMEM; } cnx->msgs = msgs; - memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg)); - spin_lock_init(&cnx->msg_stack_lock); for (i = 0; i < VETH_NUMBUFFERS; i++) { msgs[i].token = i; @@ -630,8 +863,7 @@ static int veth_init_connection(u8 rlp) cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS); if (cnx->num_events < (2 + VETH_NUMBUFFERS)) { - veth_error("Can't allocate events for lpar %d, only got %d\n", - rlp, cnx->num_events); + veth_error("Can't allocate enough events for LPAR %d.\n", rlp); return -ENOMEM; } @@ -642,11 +874,9 @@ static int veth_init_connection(u8 rlp) return 0; } -static void veth_stop_connection(u8 rlp) +static void veth_stop_connection(struct veth_lpar_connection *cnx) { - struct veth_lpar_connection *cnx = veth_cnx[rlp]; - - if (! cnx) + if (!cnx) return; spin_lock_irq(&cnx->lock); @@ -654,12 +884,23 @@ static void veth_stop_connection(u8 rlp) veth_kick_statemachine(cnx); spin_unlock_irq(&cnx->lock); + /* There's a slim chance the reset code has just queued the + * statemachine to run in five seconds. If so we need to cancel + * that and requeue the work to run now. */ + if (cancel_delayed_work(&cnx->statemachine_wq)) { + spin_lock_irq(&cnx->lock); + veth_kick_statemachine(cnx); + spin_unlock_irq(&cnx->lock); + } + + /* Wait for the state machine to run. */ flush_scheduled_work(); +} - /* FIXME: not sure if this is necessary - will already have - * been deleted by the state machine, just want to make sure - * its not running any more */ - del_timer_sync(&cnx->ack_timer); +static void veth_destroy_connection(struct veth_lpar_connection *cnx) +{ + if (!cnx) + return; if (cnx->num_events > 0) mf_deallocate_lp_events(cnx->remote_lp, @@ -671,18 +912,18 @@ static void veth_stop_connection(u8 rlp) HvLpEvent_Type_VirtualLan, cnx->num_ack_events, NULL, NULL); -} - -static void veth_destroy_connection(u8 rlp) -{ - struct veth_lpar_connection *cnx = veth_cnx[rlp]; - - if (! cnx) - return; kfree(cnx->msgs); + veth_cnx[cnx->remote_lp] = NULL; kfree(cnx); - veth_cnx[rlp] = NULL; +} + +static void veth_release_connection(struct kobject *kobj) +{ + struct veth_lpar_connection *cnx; + cnx = container_of(kobj, struct veth_lpar_connection, kobject); + veth_stop_connection(cnx); + veth_destroy_connection(cnx); } /* @@ -691,9 +932,6 @@ static void veth_destroy_connection(u8 rlp) static int veth_open(struct net_device *dev) { - struct veth_port *port = (struct veth_port *) dev->priv; - - memset(&port->stats, 0, sizeof (port->stats)); netif_start_queue(dev); return 0; } @@ -704,13 +942,6 @@ static int veth_close(struct net_device *dev) return 0; } -static struct net_device_stats *veth_get_stats(struct net_device *dev) -{ - struct veth_port *port = (struct veth_port *) dev->priv; - - return &port->stats; -} - static int veth_change_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU)) @@ -721,26 +952,24 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu) static void veth_set_multicast_list(struct net_device *dev) { - struct veth_port *port = (struct veth_port *) dev->priv; + struct veth_port *port = netdev_priv(dev); unsigned long flags; write_lock_irqsave(&port->mcast_gate, flags); - if (dev->flags & IFF_PROMISC) { /* set promiscuous mode */ - printk(KERN_INFO "%s: Promiscuous mode enabled.\n", - dev->name); + if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) || + (netdev_mc_count(dev) > VETH_MAX_MCAST)) { port->promiscuous = 1; - } else if ( (dev->flags & IFF_ALLMULTI) - || (dev->mc_count > VETH_MAX_MCAST) ) { - port->all_mcast = 1; } else { struct dev_mc_list *dmi = dev->mc_list; int i; + port->promiscuous = 0; + /* Update table */ port->num_mcast = 0; - for (i = 0; i < dev->mc_count; i++) { + for (i = 0; i < netdev_mc_count(dev); i++) { u8 *addr = dmi->dmi_addr; u64 xaddr = 0; @@ -758,9 +987,10 @@ static void veth_set_multicast_list(struct net_device *dev) static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strncpy(info->driver, "veth", sizeof(info->driver) - 1); + strncpy(info->driver, DRV_NAME, sizeof(info->driver) - 1); info->driver[sizeof(info->driver) - 1] = '\0'; - strncpy(info->version, "1.0", sizeof(info->version) - 1); + strncpy(info->version, DRV_VERSION, sizeof(info->version) - 1); + info->version[sizeof(info->version) - 1] = '\0'; } static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) @@ -785,59 +1015,38 @@ static u32 veth_get_link(struct net_device *dev) return 1; } -static struct ethtool_ops ops = { +static const struct ethtool_ops ops = { .get_drvinfo = veth_get_drvinfo, .get_settings = veth_get_settings, .get_link = veth_get_link, }; -static void veth_tx_timeout(struct net_device *dev) -{ - struct veth_port *port = (struct veth_port *)dev->priv; - struct net_device_stats *stats = &port->stats; - unsigned long flags; - int i; - - stats->tx_errors++; - - spin_lock_irqsave(&port->pending_gate, flags); - - printk(KERN_WARNING "%s: Tx timeout! Resetting lp connections: %08x\n", - dev->name, port->pending_lpmask); - - /* If we've timed out the queue must be stopped, which should - * only ever happen when there is a pending packet. */ - WARN_ON(! port->pending_lpmask); - - for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { - struct veth_lpar_connection *cnx = veth_cnx[i]; - - if (! (port->pending_lpmask & (1<lock); - cnx->state |= VETH_STATE_RESET; - veth_kick_statemachine(cnx); - spin_unlock(&cnx->lock); - } - - spin_unlock_irqrestore(&port->pending_gate, flags); -} +static const struct net_device_ops veth_netdev_ops = { + .ndo_open = veth_open, + .ndo_stop = veth_close, + .ndo_start_xmit = veth_start_xmit, + .ndo_change_mtu = veth_change_mtu, + .ndo_set_multicast_list = veth_set_multicast_list, + .ndo_set_mac_address = NULL, + .ndo_validate_addr = eth_validate_addr, +}; -static struct net_device * __init veth_probe_one(int vlan, struct device *vdev) +static struct net_device *veth_probe_one(int vlan, + struct vio_dev *vio_dev) { struct net_device *dev; struct veth_port *port; + struct device *vdev = &vio_dev->dev; int i, rc; + const unsigned char *mac_addr; + + mac_addr = vio_get_attribute(vio_dev, "local-mac-address", NULL); + if (mac_addr == NULL) + mac_addr = vio_get_attribute(vio_dev, "mac-address", NULL); + if (mac_addr == NULL) { + veth_error("Unable to fetch MAC address from device tree.\n"); + return NULL; + } dev = alloc_etherdev(sizeof (struct veth_port)); if (! dev) { @@ -845,10 +1054,11 @@ static struct net_device * __init veth_probe_one(int vlan, struct device *vdev) return NULL; } - port = (struct veth_port *) dev->priv; + port = netdev_priv(dev); - spin_lock_init(&port->pending_gate); + spin_lock_init(&port->queue_lock); rwlock_init(&port->mcast_gate); + port->stopped_map = 0; for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { HvLpVirtualLanIndexMap map; @@ -861,42 +1071,30 @@ static struct net_device * __init veth_probe_one(int vlan, struct device *vdev) } port->dev = vdev; - dev->dev_addr[0] = 0x02; - dev->dev_addr[1] = 0x01; - dev->dev_addr[2] = 0xff; - dev->dev_addr[3] = vlan; - dev->dev_addr[4] = 0xff; - dev->dev_addr[5] = this_lp; + memcpy(dev->dev_addr, mac_addr, ETH_ALEN); dev->mtu = VETH_MAX_MTU; - memcpy(&port->mac_addr, dev->dev_addr, 6); + memcpy(&port->mac_addr, mac_addr, ETH_ALEN); - dev->open = veth_open; - dev->hard_start_xmit = veth_start_xmit; - dev->stop = veth_close; - dev->get_stats = veth_get_stats; - dev->change_mtu = veth_change_mtu; - dev->set_mac_address = NULL; - dev->set_multicast_list = veth_set_multicast_list; + dev->netdev_ops = &veth_netdev_ops; SET_ETHTOOL_OPS(dev, &ops); - dev->watchdog_timeo = 2 * (VETH_ACKTIMEOUT * HZ / 1000000); - dev->tx_timeout = veth_tx_timeout; - SET_NETDEV_DEV(dev, vdev); rc = register_netdev(dev); if (rc != 0) { - veth_printk(KERN_ERR, - "Failed to register ethernet device for vlan %d\n", - vlan); + veth_error("Failed registering net device for vlan%d.\n", vlan); free_netdev(dev); return NULL; } - veth_printk(KERN_DEBUG, "%s attached to iSeries vlan %d (lpar_map=0x%04x)\n", - dev->name, vlan, port->lpar_map); + kobject_init(&port->kobject, &veth_port_ktype); + if (0 != kobject_add(&port->kobject, &dev->dev.kobj, "veth_port")) + veth_error("Failed adding port for %s to sysfs.\n", dev->name); + + veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n", + dev->name, vlan, port->lpar_map); return dev; } @@ -909,100 +1107,96 @@ static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp, struct net_device *dev) { struct veth_lpar_connection *cnx = veth_cnx[rlp]; - struct veth_port *port = (struct veth_port *) dev->priv; + struct veth_port *port = netdev_priv(dev); HvLpEvent_Rc rc; - u32 dma_address, dma_length; struct veth_msg *msg = NULL; - int err = 0; unsigned long flags; - if (! cnx) { - port->stats.tx_errors++; - dev_kfree_skb(skb); + if (! cnx) return 0; - } spin_lock_irqsave(&cnx->lock, flags); - if (! cnx->state & VETH_STATE_READY) - goto drop; + if (! (cnx->state & VETH_STATE_READY)) + goto no_error; - if ((skb->len - 14) > VETH_MAX_MTU) + if ((skb->len - ETH_HLEN) > VETH_MAX_MTU) goto drop; msg = veth_stack_pop(cnx); - - if (! msg) { - err = 1; + if (! msg) goto drop; - } - dma_length = skb->len; - dma_address = dma_map_single(port->dev, skb->data, - dma_length, DMA_TO_DEVICE); + msg->in_use = 1; + msg->skb = skb_get(skb); - if (dma_mapping_error(dma_address)) + msg->data.addr[0] = dma_map_single(port->dev, skb->data, + skb->len, DMA_TO_DEVICE); + + if (dma_mapping_error(port->dev, msg->data.addr[0])) goto recycle_and_drop; - /* Is it really necessary to check the length and address - * fields of the first entry here? */ - msg->skb = skb; msg->dev = port->dev; - msg->data.addr[0] = dma_address; - msg->data.len[0] = dma_length; + msg->data.len[0] = skb->len; msg->data.eofmask = 1 << VETH_EOF_SHIFT; - set_bit(0, &(msg->in_use)); - rc = veth_signaldata(cnx, VethEventTypeFrames, msg->token, &msg->data); + + rc = veth_signaldata(cnx, VETH_EVENT_FRAMES, msg->token, &msg->data); if (rc != HvLpEvent_Rc_Good) goto recycle_and_drop; + /* If the timer's not already running, start it now. */ + if (0 == cnx->outstanding_tx) + mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout); + + cnx->last_contact = jiffies; + cnx->outstanding_tx++; + + if (veth_stack_is_empty(cnx)) + veth_stop_queues(cnx); + + no_error: spin_unlock_irqrestore(&cnx->lock, flags); return 0; recycle_and_drop: - msg->skb = NULL; - /* need to set in use to make veth_recycle_msg in case this - * was a mapping failure */ - set_bit(0, &msg->in_use); veth_recycle_msg(cnx, msg); drop: - port->stats.tx_errors++; - dev_kfree_skb(skb); spin_unlock_irqrestore(&cnx->lock, flags); - return err; + return 1; } -static HvLpIndexMap veth_transmit_to_many(struct sk_buff *skb, +static void veth_transmit_to_many(struct sk_buff *skb, HvLpIndexMap lpmask, struct net_device *dev) { - struct veth_port *port = (struct veth_port *) dev->priv; - int i; - int rc; + int i, success, error; + + success = error = 0; for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { if ((lpmask & (1 << i)) == 0) continue; - rc = veth_transmit_to_one(skb_get(skb), i, dev); - if (! rc) - lpmask &= ~(1<stats.tx_packets++; - port->stats.tx_bytes += skb->len; - } + if (error) + dev->stats.tx_errors++; - return lpmask; + if (success) { + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + } } static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned char *frame = skb->data; - struct veth_port *port = (struct veth_port *) dev->priv; - unsigned long flags; + struct veth_port *port = netdev_priv(dev); HvLpIndexMap lpmask; if (! (frame[0] & 0x01)) { @@ -1011,7 +1205,7 @@ static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev) if ( ! ((1 << rlp) & port->lpar_map) ) { dev_kfree_skb(skb); - return 0; + return NETDEV_TX_OK; } lpmask = 1 << rlp; @@ -1019,42 +1213,27 @@ static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev) lpmask = port->lpar_map; } - spin_lock_irqsave(&port->pending_gate, flags); - - lpmask = veth_transmit_to_many(skb, lpmask, dev); - - if (! lpmask) { - dev_kfree_skb(skb); - } else { - if (port->pending_skb) { - veth_error("%s: Tx while skb was pending!\n", - dev->name); - dev_kfree_skb(skb); - spin_unlock_irqrestore(&port->pending_gate, flags); - return 1; - } - - port->pending_skb = skb; - port->pending_lpmask = lpmask; - netif_stop_queue(dev); - } + veth_transmit_to_many(skb, lpmask, dev); - spin_unlock_irqrestore(&port->pending_gate, flags); + dev_kfree_skb(skb); - return 0; + return NETDEV_TX_OK; } +/* You must hold the connection's lock when you call this function. */ static void veth_recycle_msg(struct veth_lpar_connection *cnx, struct veth_msg *msg) { u32 dma_address, dma_length; - if (test_and_clear_bit(0, &msg->in_use)) { + if (msg->in_use) { + msg->in_use = 0; dma_address = msg->data.addr[0]; dma_length = msg->data.len[0]; - dma_unmap_single(msg->dev, dma_address, dma_length, - DMA_TO_DEVICE); + if (!dma_mapping_error(msg->dev, dma_address)) + dma_unmap_single(msg->dev, dma_address, dma_length, + DMA_TO_DEVICE); if (msg->skb) { dev_kfree_skb_any(msg->skb); @@ -1063,15 +1242,16 @@ static void veth_recycle_msg(struct veth_lpar_connection *cnx, memset(&msg->data, 0, sizeof(msg->data)); veth_stack_push(cnx, msg); - } else - if (cnx->state & VETH_STATE_OPEN) - veth_error("Bogus frames ack from lpar %d (#%d)\n", - cnx->remote_lp, msg->token); + } else if (cnx->state & VETH_STATE_OPEN) { + veth_error("Non-pending frame (# %d) acked by LPAR %d.\n", + cnx->remote_lp, msg->token); + } } -static void veth_flush_pending(struct veth_lpar_connection *cnx) +static void veth_wake_queues(struct veth_lpar_connection *cnx) { int i; + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { struct net_device *dev = veth_dev[i]; struct veth_port *port; @@ -1080,25 +1260,82 @@ static void veth_flush_pending(struct veth_lpar_connection *cnx) if (! dev) continue; - port = (struct veth_port *)dev->priv; + port = netdev_priv(dev); if (! (port->lpar_map & (1<remote_lp))) continue; - spin_lock_irqsave(&port->pending_gate, flags); - if (port->pending_skb) { - port->pending_lpmask = - veth_transmit_to_many(port->pending_skb, - port->pending_lpmask, - dev); - if (! port->pending_lpmask) { - dev_kfree_skb_any(port->pending_skb); - port->pending_skb = NULL; - netif_wake_queue(dev); - } + spin_lock_irqsave(&port->queue_lock, flags); + + port->stopped_map &= ~(1 << cnx->remote_lp); + + if (0 == port->stopped_map && netif_queue_stopped(dev)) { + veth_debug("cnx %d: woke queue for %s.\n", + cnx->remote_lp, dev->name); + netif_wake_queue(dev); + } + spin_unlock_irqrestore(&port->queue_lock, flags); + } +} + +static void veth_stop_queues(struct veth_lpar_connection *cnx) +{ + int i; + + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + struct net_device *dev = veth_dev[i]; + struct veth_port *port; + + if (! dev) + continue; + + port = netdev_priv(dev); + + /* If this cnx is not on the vlan for this port, continue */ + if (! (port->lpar_map & (1 << cnx->remote_lp))) + continue; + + spin_lock(&port->queue_lock); + + netif_stop_queue(dev); + port->stopped_map |= (1 << cnx->remote_lp); + + veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n", + cnx->remote_lp, dev->name, port->stopped_map); + + spin_unlock(&port->queue_lock); + } +} + +static void veth_timed_reset(unsigned long ptr) +{ + struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr; + unsigned long trigger_time, flags; + + /* FIXME is it possible this fires after veth_stop_connection()? + * That would reschedule the statemachine for 5 seconds and probably + * execute it after the module's been unloaded. Hmm. */ + + spin_lock_irqsave(&cnx->lock, flags); + + if (cnx->outstanding_tx > 0) { + trigger_time = cnx->last_contact + cnx->reset_timeout; + + if (trigger_time < jiffies) { + cnx->state |= VETH_STATE_RESET; + veth_kick_statemachine(cnx); + veth_error("%d packets not acked by LPAR %d within %d " + "seconds, resetting.\n", + cnx->outstanding_tx, cnx->remote_lp, + cnx->reset_timeout / HZ); + } else { + /* Reschedule the timer */ + trigger_time = jiffies + cnx->reset_timeout; + mod_timer(&cnx->reset_timer, trigger_time); } - spin_unlock_irqrestore(&port->pending_gate, flags); } + + spin_unlock_irqrestore(&cnx->lock, flags); } /* @@ -1114,12 +1351,9 @@ static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr) if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) ) return 1; - if (! (((char *) &mac_addr)[0] & 0x01)) - return 0; - read_lock_irqsave(&port->mcast_gate, flags); - if (port->promiscuous || port->all_mcast) { + if (port->promiscuous) { wanted = 1; goto out; } @@ -1150,18 +1384,18 @@ static inline void veth_build_dma_list(struct dma_chunk *list, unsigned long done; int i = 1; - /* FIXME: skbs are continguous in real addresses. Do we + /* FIXME: skbs are contiguous in real addresses. Do we * really need to break it into PAGE_SIZE chunks, or can we do * it just at the granularity of iSeries real->absolute * mapping? Indeed, given the way the allocator works, can we * count on them being absolutely contiguous? */ - list[0].addr = ISERIES_HV_ADDR(p); + list[0].addr = iseries_hv_addr(p); list[0].size = min(length, PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK)); done = list[0].size; while (done < length) { - list[i].addr = ISERIES_HV_ADDR(p + done); + list[i].addr = iseries_hv_addr(p + done); list[i].size = min(length-done, PAGE_SIZE); done += list[i].size; i++; @@ -1172,21 +1406,21 @@ static void veth_flush_acks(struct veth_lpar_connection *cnx) { HvLpEvent_Rc rc; - rc = veth_signaldata(cnx, VethEventTypeFramesAck, + rc = veth_signaldata(cnx, VETH_EVENT_FRAMES_ACK, 0, &cnx->pending_acks); if (rc != HvLpEvent_Rc_Good) - veth_error("Error 0x%x acking frames from lpar %d!\n", - (unsigned)rc, cnx->remote_lp); + veth_error("Failed acking frames from LPAR %d, rc = %d\n", + cnx->remote_lp, (int)rc); cnx->num_pending_acks = 0; memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks)); } static void veth_receive(struct veth_lpar_connection *cnx, - struct VethLpEvent *event) + struct veth_lpevent *event) { - struct VethFramesData *senddata = &event->u.frames_data; + struct veth_frames_data *senddata = &event->u.frames_data; int startchunk = 0; int nchunks; unsigned long flags; @@ -1213,9 +1447,10 @@ static void veth_receive(struct veth_lpar_connection *cnx, /* make sure that we have at least 1 EOF entry in the * remaining entries */ if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) { - veth_error("missing EOF frag in event " - "eofmask=0x%x startchunk=%d\n", - (unsigned) senddata->eofmask, startchunk); + veth_error("Missing EOF fragment in event " + "eofmask = 0x%x startchunk = %d\n", + (unsigned)senddata->eofmask, + startchunk); break; } @@ -1234,8 +1469,9 @@ static void veth_receive(struct veth_lpar_connection *cnx, /* nchunks == # of chunks in this frame */ if ((length - ETH_HLEN) > VETH_MAX_MTU) { - veth_error("Received oversize frame from lpar %d " - "(length=%d)\n", cnx->remote_lp, length); + veth_error("Received oversize frame from LPAR %d " + "(length = %d)\n", + cnx->remote_lp, length); continue; } @@ -1252,8 +1488,8 @@ static void veth_receive(struct veth_lpar_connection *cnx, cnx->dst_inst, HvLpDma_AddressType_RealAddress, HvLpDma_AddressType_TceIndex, - ISERIES_HV_ADDR(&local_list), - ISERIES_HV_ADDR(&remote_list), + iseries_hv_addr(&local_list), + iseries_hv_addr(&remote_list), length); if (rc != HvLpDma_Rc_Good) { dev_kfree_skb_irq(skb); @@ -1262,15 +1498,20 @@ static void veth_receive(struct veth_lpar_connection *cnx, vlan = skb->data[9]; dev = veth_dev[vlan]; - if (! dev) - /* Some earlier versions of the driver sent - broadcasts down all connections, even to - lpars that weren't on the relevant vlan. - So ignore packets belonging to a vlan we're - not on. */ + if (! dev) { + /* + * Some earlier versions of the driver sent + * broadcasts down all connections, even to lpars + * that weren't on the relevant vlan. So ignore + * packets belonging to a vlan we're not on. + * We can also be here if we receive packets while + * the driver is going down, because then dev is NULL. + */ + dev_kfree_skb_irq(skb); continue; + } - port = (struct veth_port *)dev->priv; + port = netdev_priv(dev); dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000; if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) { @@ -1283,12 +1524,11 @@ static void veth_receive(struct veth_lpar_connection *cnx, } skb_put(skb, length); - skb->dev = dev; skb->protocol = eth_type_trans(skb, dev); skb->ip_summed = CHECKSUM_NONE; netif_rx(skb); /* send it up */ - port->stats.rx_packets++; - port->stats.rx_bytes += length; + dev->stats.rx_packets++; + dev->stats.rx_bytes += length; } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG); /* Ack it */ @@ -1298,8 +1538,8 @@ static void veth_receive(struct veth_lpar_connection *cnx, cnx->pending_acks[cnx->num_pending_acks++] = event->base_event.xCorrelationToken; - if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold) - || (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) ) + if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold) || + (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) ) veth_flush_acks(cnx); spin_unlock_irqrestore(&cnx->lock, flags); @@ -1323,15 +1563,33 @@ static void veth_timed_ack(unsigned long ptr) static int veth_remove(struct vio_dev *vdev) { - int i = vdev->unit_address; + struct veth_lpar_connection *cnx; struct net_device *dev; + struct veth_port *port; + int i; - dev = veth_dev[i]; - if (dev != NULL) { - veth_dev[i] = NULL; - unregister_netdev(dev); - free_netdev(dev); + dev = veth_dev[vdev->unit_address]; + + if (! dev) + return 0; + + port = netdev_priv(dev); + + for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { + cnx = veth_cnx[i]; + + if (cnx && (port->lpar_map & (1 << i))) { + /* Drop our reference to connections on our VLAN */ + kobject_put(&cnx->kobject); + } } + + veth_dev[vdev->unit_address] = NULL; + kobject_del(&port->kobject); + kobject_put(&port->kobject); + unregister_netdev(dev); + free_netdev(dev); + return 0; } @@ -1339,19 +1597,32 @@ static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id) { int i = vdev->unit_address; struct net_device *dev; + struct veth_port *port; - dev = veth_probe_one(i, &vdev->dev); + dev = veth_probe_one(i, vdev); if (dev == NULL) { veth_remove(vdev); return 1; } veth_dev[i] = dev; - /* Start the state machine on each connection, to commence - * link negotiation */ - for (i = 0; i < HVMAXARCHITECTEDLPS; i++) - if (veth_cnx[i]) - veth_kick_statemachine(veth_cnx[i]); + port = (struct veth_port*)netdev_priv(dev); + + /* Start the state machine on each connection on this vlan. If we're + * the first dev to do so this will commence link negotiation */ + for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { + struct veth_lpar_connection *cnx; + + if (! (port->lpar_map & (1 << i))) + continue; + + cnx = veth_cnx[i]; + if (!cnx) + continue; + + kobject_get(&cnx->kobject); + veth_kick_statemachine(cnx); + } return 0; } @@ -1361,62 +1632,97 @@ static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id) * support. */ static struct vio_device_id veth_device_table[] __devinitdata = { - { "vlan", "" }, - { NULL, NULL } + { "network", "IBM,iSeries-l-lan" }, + { "", "" } }; MODULE_DEVICE_TABLE(vio, veth_device_table); static struct vio_driver veth_driver = { - .name = "iseries_veth", .id_table = veth_device_table, .probe = veth_probe, - .remove = veth_remove + .remove = veth_remove, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + } }; /* * Module initialization/cleanup */ -void __exit veth_module_cleanup(void) +static void __exit veth_module_cleanup(void) { int i; + struct veth_lpar_connection *cnx; - vio_unregister_driver(&veth_driver); - - for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) - veth_stop_connection(i); - + /* Disconnect our "irq" to stop events coming from the Hypervisor. */ HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan); - /* Hypervisor callbacks may have scheduled more work while we - * were destroying connections. Now that we've disconnected from - * the hypervisor make sure everything's finished. */ + /* Make sure any work queued from Hypervisor callbacks is finished. */ flush_scheduled_work(); - for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) - veth_destroy_connection(i); + for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { + cnx = veth_cnx[i]; + + if (!cnx) + continue; + /* Remove the connection from sysfs */ + kobject_del(&cnx->kobject); + /* Drop the driver's reference to the connection */ + kobject_put(&cnx->kobject); + } + + /* Unregister the driver, which will close all the netdevs and stop + * the connections when they're no longer referenced. */ + vio_unregister_driver(&veth_driver); } module_exit(veth_module_cleanup); -int __init veth_module_init(void) +static int __init veth_module_init(void) { int i; int rc; + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + return -ENODEV; + this_lp = HvLpConfig_getLpIndex_outline(); for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { rc = veth_init_connection(i); - if (rc != 0) { - veth_module_cleanup(); - return rc; - } + if (rc != 0) + goto error; } HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan, &veth_handle_event); - return vio_register_driver(&veth_driver); + rc = vio_register_driver(&veth_driver); + if (rc != 0) + goto error; + + for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { + struct kobject *kobj; + + if (!veth_cnx[i]) + continue; + + kobj = &veth_cnx[i]->kobject; + /* If the add failes, complain but otherwise continue */ + if (0 != driver_add_kobj(&veth_driver.driver, kobj, + "cnx%.2d", veth_cnx[i]->remote_lp)) + veth_error("cnx %d: Failed adding to sysfs.\n", i); + } + + return 0; + +error: + for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { + veth_destroy_connection(veth_cnx[i]); + } + + return rc; } module_init(veth_module_init);