[PATCH] iseries_veth: Use kobjects to track lifecycle of connection structs
[safe/jmp/linux-2.6] / drivers / net / iseries_veth.c
1 /* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. */
2 /*
3  * IBM eServer iSeries Virtual Ethernet Device Driver
4  * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.
5  * Substantially cleaned up by:
6  * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21  * USA
22  *
23  *
24  * This module implements the virtual ethernet device for iSeries LPAR
25  * Linux.  It uses hypervisor message passing to implement an
26  * ethernet-like network device communicating between partitions on
27  * the iSeries.
28  *
29  * The iSeries LPAR hypervisor currently allows for up to 16 different
30  * virtual ethernets.  These are all dynamically configurable on
31  * OS/400 partitions, but dynamic configuration is not supported under
32  * Linux yet.  An ethXX network device will be created for each
33  * virtual ethernet this partition is connected to.
34  *
35  * - This driver is responsible for routing packets to and from other
36  *   partitions.  The MAC addresses used by the virtual ethernets
37  *   contains meaning and must not be modified.
38  *
39  * - Having 2 virtual ethernets to the same remote partition DOES NOT
40  *   double the available bandwidth.  The 2 devices will share the
41  *   available hypervisor bandwidth.
42  *
43  * - If you send a packet to your own mac address, it will just be
44  *   dropped, you won't get it on the receive side.
45  *
46  * - Multicast is implemented by sending the frame frame to every
47  *   other partition.  It is the responsibility of the receiving
48  *   partition to filter the addresses desired.
49  *
50  * Tunable parameters:
51  *
52  * VETH_NUMBUFFERS: This compile time option defaults to 120.  It
53  * controls how much memory Linux will allocate per remote partition
54  * it is communicating with.  It can be thought of as the maximum
55  * number of packets outstanding to a remote partition at a time.
56  */
57
58 #include <linux/config.h>
59 #include <linux/module.h>
60 #include <linux/version.h>
61 #include <linux/types.h>
62 #include <linux/errno.h>
63 #include <linux/ioport.h>
64 #include <linux/kernel.h>
65 #include <linux/netdevice.h>
66 #include <linux/etherdevice.h>
67 #include <linux/skbuff.h>
68 #include <linux/init.h>
69 #include <linux/delay.h>
70 #include <linux/mm.h>
71 #include <linux/ethtool.h>
72 #include <asm/iSeries/mf.h>
73 #include <asm/iSeries/iSeries_pci.h>
74 #include <asm/uaccess.h>
75
76 #include <asm/iSeries/HvLpConfig.h>
77 #include <asm/iSeries/HvTypes.h>
78 #include <asm/iSeries/HvLpEvent.h>
79 #include <asm/iommu.h>
80 #include <asm/vio.h>
81
82 #undef DEBUG
83
84 #include "iseries_veth.h"
85
86 MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
87 MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
88 MODULE_LICENSE("GPL");
89
90 #define VETH_NUMBUFFERS         (120)
91 #define VETH_ACKTIMEOUT         (1000000) /* microseconds */
92 #define VETH_MAX_MCAST          (12)
93
94 #define VETH_MAX_MTU            (9000)
95
96 #if VETH_NUMBUFFERS < 10
97 #define ACK_THRESHOLD           (1)
98 #elif VETH_NUMBUFFERS < 20
99 #define ACK_THRESHOLD           (4)
100 #elif VETH_NUMBUFFERS < 40
101 #define ACK_THRESHOLD           (10)
102 #else
103 #define ACK_THRESHOLD           (20)
104 #endif
105
106 #define VETH_STATE_SHUTDOWN     (0x0001)
107 #define VETH_STATE_OPEN         (0x0002)
108 #define VETH_STATE_RESET        (0x0004)
109 #define VETH_STATE_SENTMON      (0x0008)
110 #define VETH_STATE_SENTCAPS     (0x0010)
111 #define VETH_STATE_GOTCAPACK    (0x0020)
112 #define VETH_STATE_GOTCAPS      (0x0040)
113 #define VETH_STATE_SENTCAPACK   (0x0080)
114 #define VETH_STATE_READY        (0x0100)
115
116 struct veth_msg {
117         struct veth_msg *next;
118         struct VethFramesData data;
119         int token;
120         int in_use;
121         struct sk_buff *skb;
122         struct device *dev;
123 };
124
125 struct veth_lpar_connection {
126         HvLpIndex remote_lp;
127         struct work_struct statemachine_wq;
128         struct veth_msg *msgs;
129         int num_events;
130         struct VethCapData local_caps;
131
132         struct kobject kobject;
133         struct timer_list ack_timer;
134
135         spinlock_t lock;
136         unsigned long state;
137         HvLpInstanceId src_inst;
138         HvLpInstanceId dst_inst;
139         struct VethLpEvent cap_event, cap_ack_event;
140         u16 pending_acks[VETH_MAX_ACKS_PER_MSG];
141         u32 num_pending_acks;
142
143         int num_ack_events;
144         struct VethCapData remote_caps;
145         u32 ack_timeout;
146
147         struct veth_msg *msg_stack_head;
148 };
149
150 struct veth_port {
151         struct device *dev;
152         struct net_device_stats stats;
153         u64 mac_addr;
154         HvLpIndexMap lpar_map;
155
156         spinlock_t pending_gate;
157         struct sk_buff *pending_skb;
158         HvLpIndexMap pending_lpmask;
159
160         rwlock_t mcast_gate;
161         int promiscuous;
162         int num_mcast;
163         u64 mcast_addr[VETH_MAX_MCAST];
164 };
165
166 static HvLpIndex this_lp;
167 static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */
168 static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */
169
170 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
171 static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
172 static void veth_flush_pending(struct veth_lpar_connection *cnx);
173 static void veth_receive(struct veth_lpar_connection *, struct VethLpEvent *);
174 static void veth_timed_ack(unsigned long connectionPtr);
175 static void veth_release_connection(struct kobject *kobject);
176
177 static struct kobj_type veth_lpar_connection_ktype = {
178         .release        = veth_release_connection
179 };
180
181 /*
182  * Utility functions
183  */
184
185 #define veth_info(fmt, args...) \
186         printk(KERN_INFO "iseries_veth: " fmt, ## args)
187
188 #define veth_error(fmt, args...) \
189         printk(KERN_ERR "iseries_veth: Error: " fmt, ## args)
190
191 #ifdef DEBUG
192 #define veth_debug(fmt, args...) \
193         printk(KERN_DEBUG "iseries_veth: " fmt, ## args)
194 #else
195 #define veth_debug(fmt, args...) do {} while (0)
196 #endif
197
198 /* You must hold the connection's lock when you call this function. */
199 static inline void veth_stack_push(struct veth_lpar_connection *cnx,
200                                    struct veth_msg *msg)
201 {
202         msg->next = cnx->msg_stack_head;
203         cnx->msg_stack_head = msg;
204 }
205
206 /* You must hold the connection's lock when you call this function. */
207 static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
208 {
209         struct veth_msg *msg;
210
211         msg = cnx->msg_stack_head;
212         if (msg)
213                 cnx->msg_stack_head = cnx->msg_stack_head->next;
214
215         return msg;
216 }
217
218 static inline HvLpEvent_Rc
219 veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
220                  HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
221                  u64 token,
222                  u64 data1, u64 data2, u64 data3, u64 data4, u64 data5)
223 {
224         return HvCallEvent_signalLpEventFast(cnx->remote_lp,
225                                              HvLpEvent_Type_VirtualLan,
226                                              subtype, ackind, acktype,
227                                              cnx->src_inst,
228                                              cnx->dst_inst,
229                                              token, data1, data2, data3,
230                                              data4, data5);
231 }
232
233 static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,
234                                            u16 subtype, u64 token, void *data)
235 {
236         u64 *p = (u64 *) data;
237
238         return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,
239                                 HvLpEvent_AckType_ImmediateAck,
240                                 token, p[0], p[1], p[2], p[3], p[4]);
241 }
242
243 struct veth_allocation {
244         struct completion c;
245         int num;
246 };
247
248 static void veth_complete_allocation(void *parm, int number)
249 {
250         struct veth_allocation *vc = (struct veth_allocation *)parm;
251
252         vc->num = number;
253         complete(&vc->c);
254 }
255
256 static int veth_allocate_events(HvLpIndex rlp, int number)
257 {
258         struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 };
259
260         mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,
261                             sizeof(struct VethLpEvent), number,
262                             &veth_complete_allocation, &vc);
263         wait_for_completion(&vc.c);
264
265         return vc.num;
266 }
267
268 /*
269  * LPAR connection code
270  */
271
272 static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
273 {
274         schedule_work(&cnx->statemachine_wq);
275 }
276
277 static void veth_take_cap(struct veth_lpar_connection *cnx,
278                           struct VethLpEvent *event)
279 {
280         unsigned long flags;
281
282         spin_lock_irqsave(&cnx->lock, flags);
283         /* Receiving caps may mean the other end has just come up, so
284          * we need to reload the instance ID of the far end */
285         cnx->dst_inst =
286                 HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,
287                                                   HvLpEvent_Type_VirtualLan);
288
289         if (cnx->state & VETH_STATE_GOTCAPS) {
290                 veth_error("Received a second capabilities from LPAR %d.\n",
291                            cnx->remote_lp);
292                 event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;
293                 HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
294         } else {
295                 memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));
296                 cnx->state |= VETH_STATE_GOTCAPS;
297                 veth_kick_statemachine(cnx);
298         }
299         spin_unlock_irqrestore(&cnx->lock, flags);
300 }
301
302 static void veth_take_cap_ack(struct veth_lpar_connection *cnx,
303                               struct VethLpEvent *event)
304 {
305         unsigned long flags;
306
307         spin_lock_irqsave(&cnx->lock, flags);
308         if (cnx->state & VETH_STATE_GOTCAPACK) {
309                 veth_error("Received a second capabilities ack from LPAR %d.\n",
310                            cnx->remote_lp);
311         } else {
312                 memcpy(&cnx->cap_ack_event, event,
313                        sizeof(&cnx->cap_ack_event));
314                 cnx->state |= VETH_STATE_GOTCAPACK;
315                 veth_kick_statemachine(cnx);
316         }
317         spin_unlock_irqrestore(&cnx->lock, flags);
318 }
319
320 static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
321                                   struct VethLpEvent *event)
322 {
323         unsigned long flags;
324
325         spin_lock_irqsave(&cnx->lock, flags);
326         veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
327
328         /* Avoid kicking the statemachine once we're shutdown.
329          * It's unnecessary and it could break veth_stop_connection(). */
330
331         if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
332                 cnx->state |= VETH_STATE_RESET;
333                 veth_kick_statemachine(cnx);
334         }
335         spin_unlock_irqrestore(&cnx->lock, flags);
336 }
337
338 static void veth_handle_ack(struct VethLpEvent *event)
339 {
340         HvLpIndex rlp = event->base_event.xTargetLp;
341         struct veth_lpar_connection *cnx = veth_cnx[rlp];
342
343         BUG_ON(! cnx);
344
345         switch (event->base_event.xSubtype) {
346         case VethEventTypeCap:
347                 veth_take_cap_ack(cnx, event);
348                 break;
349         case VethEventTypeMonitor:
350                 veth_take_monitor_ack(cnx, event);
351                 break;
352         default:
353                 veth_error("Unknown ack type %d from LPAR %d.\n",
354                                 event->base_event.xSubtype, rlp);
355         };
356 }
357
358 static void veth_handle_int(struct VethLpEvent *event)
359 {
360         HvLpIndex rlp = event->base_event.xSourceLp;
361         struct veth_lpar_connection *cnx = veth_cnx[rlp];
362         unsigned long flags;
363         int i;
364
365         BUG_ON(! cnx);
366
367         switch (event->base_event.xSubtype) {
368         case VethEventTypeCap:
369                 veth_take_cap(cnx, event);
370                 break;
371         case VethEventTypeMonitor:
372                 /* do nothing... this'll hang out here til we're dead,
373                  * and the hypervisor will return it for us. */
374                 break;
375         case VethEventTypeFramesAck:
376                 spin_lock_irqsave(&cnx->lock, flags);
377                 for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
378                         u16 msgnum = event->u.frames_ack_data.token[i];
379
380                         if (msgnum < VETH_NUMBUFFERS)
381                                 veth_recycle_msg(cnx, cnx->msgs + msgnum);
382                 }
383                 spin_unlock_irqrestore(&cnx->lock, flags);
384                 veth_flush_pending(cnx);
385                 break;
386         case VethEventTypeFrames:
387                 veth_receive(cnx, event);
388                 break;
389         default:
390                 veth_error("Unknown interrupt type %d from LPAR %d.\n",
391                                 event->base_event.xSubtype, rlp);
392         };
393 }
394
395 static void veth_handle_event(struct HvLpEvent *event, struct pt_regs *regs)
396 {
397         struct VethLpEvent *veth_event = (struct VethLpEvent *)event;
398
399         if (event->xFlags.xFunction == HvLpEvent_Function_Ack)
400                 veth_handle_ack(veth_event);
401         else if (event->xFlags.xFunction == HvLpEvent_Function_Int)
402                 veth_handle_int(veth_event);
403 }
404
405 static int veth_process_caps(struct veth_lpar_connection *cnx)
406 {
407         struct VethCapData *remote_caps = &cnx->remote_caps;
408         int num_acks_needed;
409
410         /* Convert timer to jiffies */
411         cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;
412
413         if ( (remote_caps->num_buffers == 0)
414              || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG)
415              || (remote_caps->ack_threshold == 0)
416              || (cnx->ack_timeout == 0) ) {
417                 veth_error("Received incompatible capabilities from LPAR %d.\n",
418                                 cnx->remote_lp);
419                 return HvLpEvent_Rc_InvalidSubtypeData;
420         }
421
422         num_acks_needed = (remote_caps->num_buffers
423                            / remote_caps->ack_threshold) + 1;
424
425         /* FIXME: locking on num_ack_events? */
426         if (cnx->num_ack_events < num_acks_needed) {
427                 int num;
428
429                 num = veth_allocate_events(cnx->remote_lp,
430                                            num_acks_needed-cnx->num_ack_events);
431                 if (num > 0)
432                         cnx->num_ack_events += num;
433
434                 if (cnx->num_ack_events < num_acks_needed) {
435                         veth_error("Couldn't allocate enough ack events "
436                                         "for LPAR %d.\n", cnx->remote_lp);
437
438                         return HvLpEvent_Rc_BufferNotAvailable;
439                 }
440         }
441
442
443         return HvLpEvent_Rc_Good;
444 }
445
446 /* FIXME: The gotos here are a bit dubious */
447 static void veth_statemachine(void *p)
448 {
449         struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)p;
450         int rlp = cnx->remote_lp;
451         int rc;
452
453         spin_lock_irq(&cnx->lock);
454
455  restart:
456         if (cnx->state & VETH_STATE_RESET) {
457                 int i;
458
459                 if (cnx->state & VETH_STATE_OPEN)
460                         HvCallEvent_closeLpEventPath(cnx->remote_lp,
461                                                      HvLpEvent_Type_VirtualLan);
462
463                 /*
464                  * Reset ack data. This prevents the ack_timer actually
465                  * doing anything, even if it runs one more time when
466                  * we drop the lock below.
467                  */
468                 memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
469                 cnx->num_pending_acks = 0;
470
471                 cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON
472                                 | VETH_STATE_OPEN | VETH_STATE_SENTCAPS
473                                 | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS
474                                 | VETH_STATE_SENTCAPACK | VETH_STATE_READY);
475
476                 /* Clean up any leftover messages */
477                 if (cnx->msgs)
478                         for (i = 0; i < VETH_NUMBUFFERS; ++i)
479                                 veth_recycle_msg(cnx, cnx->msgs + i);
480
481                 /* Drop the lock so we can do stuff that might sleep or
482                  * take other locks. */
483                 spin_unlock_irq(&cnx->lock);
484
485                 del_timer_sync(&cnx->ack_timer);
486                 veth_flush_pending(cnx);
487
488                 spin_lock_irq(&cnx->lock);
489
490                 if (cnx->state & VETH_STATE_RESET)
491                         goto restart;
492
493                 /* Hack, wait for the other end to reset itself. */
494                 if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
495                         schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
496                         goto out;
497                 }
498         }
499
500         if (cnx->state & VETH_STATE_SHUTDOWN)
501                 /* It's all over, do nothing */
502                 goto out;
503
504         if ( !(cnx->state & VETH_STATE_OPEN) ) {
505                 if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) )
506                         goto cant_cope;
507
508                 HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan);
509                 cnx->src_inst =
510                         HvCallEvent_getSourceLpInstanceId(rlp,
511                                                           HvLpEvent_Type_VirtualLan);
512                 cnx->dst_inst =
513                         HvCallEvent_getTargetLpInstanceId(rlp,
514                                                           HvLpEvent_Type_VirtualLan);
515                 cnx->state |= VETH_STATE_OPEN;
516         }
517
518         if ( (cnx->state & VETH_STATE_OPEN)
519              && !(cnx->state & VETH_STATE_SENTMON) ) {
520                 rc = veth_signalevent(cnx, VethEventTypeMonitor,
521                                       HvLpEvent_AckInd_DoAck,
522                                       HvLpEvent_AckType_DeferredAck,
523                                       0, 0, 0, 0, 0, 0);
524
525                 if (rc == HvLpEvent_Rc_Good) {
526                         cnx->state |= VETH_STATE_SENTMON;
527                 } else {
528                         if ( (rc != HvLpEvent_Rc_PartitionDead)
529                              && (rc != HvLpEvent_Rc_PathClosed) )
530                                 veth_error("Error sending monitor to LPAR %d, "
531                                                 "rc = %d\n", rlp, rc);
532
533                         /* Oh well, hope we get a cap from the other
534                          * end and do better when that kicks us */
535                         goto out;
536                 }
537         }
538
539         if ( (cnx->state & VETH_STATE_OPEN)
540              && !(cnx->state & VETH_STATE_SENTCAPS)) {
541                 u64 *rawcap = (u64 *)&cnx->local_caps;
542
543                 rc = veth_signalevent(cnx, VethEventTypeCap,
544                                       HvLpEvent_AckInd_DoAck,
545                                       HvLpEvent_AckType_ImmediateAck,
546                                       0, rawcap[0], rawcap[1], rawcap[2],
547                                       rawcap[3], rawcap[4]);
548
549                 if (rc == HvLpEvent_Rc_Good) {
550                         cnx->state |= VETH_STATE_SENTCAPS;
551                 } else {
552                         if ( (rc != HvLpEvent_Rc_PartitionDead)
553                              && (rc != HvLpEvent_Rc_PathClosed) )
554                                 veth_error("Error sending caps to LPAR %d, "
555                                                 "rc = %d\n", rlp, rc);
556
557                         /* Oh well, hope we get a cap from the other
558                          * end and do better when that kicks us */
559                         goto out;
560                 }
561         }
562
563         if ((cnx->state & VETH_STATE_GOTCAPS)
564             && !(cnx->state & VETH_STATE_SENTCAPACK)) {
565                 struct VethCapData *remote_caps = &cnx->remote_caps;
566
567                 memcpy(remote_caps, &cnx->cap_event.u.caps_data,
568                        sizeof(*remote_caps));
569
570                 spin_unlock_irq(&cnx->lock);
571                 rc = veth_process_caps(cnx);
572                 spin_lock_irq(&cnx->lock);
573
574                 /* We dropped the lock, so recheck for anything which
575                  * might mess us up */
576                 if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN))
577                         goto restart;
578
579                 cnx->cap_event.base_event.xRc = rc;
580                 HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event);
581                 if (rc == HvLpEvent_Rc_Good)
582                         cnx->state |= VETH_STATE_SENTCAPACK;
583                 else
584                         goto cant_cope;
585         }
586
587         if ((cnx->state & VETH_STATE_GOTCAPACK)
588             && (cnx->state & VETH_STATE_GOTCAPS)
589             && !(cnx->state & VETH_STATE_READY)) {
590                 if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) {
591                         /* Start the ACK timer */
592                         cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
593                         add_timer(&cnx->ack_timer);
594                         cnx->state |= VETH_STATE_READY;
595                 } else {
596                         veth_error("Caps rejected by LPAR %d, rc = %d\n",
597                                         rlp, cnx->cap_ack_event.base_event.xRc);
598                         goto cant_cope;
599                 }
600         }
601
602  out:
603         spin_unlock_irq(&cnx->lock);
604         return;
605
606  cant_cope:
607         /* FIXME: we get here if something happens we really can't
608          * cope with.  The link will never work once we get here, and
609          * all we can do is not lock the rest of the system up */
610         veth_error("Unrecoverable error on connection to LPAR %d, shutting down"
611                         " (state = 0x%04lx)\n", rlp, cnx->state);
612         cnx->state |= VETH_STATE_SHUTDOWN;
613         spin_unlock_irq(&cnx->lock);
614 }
615
616 static int veth_init_connection(u8 rlp)
617 {
618         struct veth_lpar_connection *cnx;
619         struct veth_msg *msgs;
620         int i, rc;
621
622         if ( (rlp == this_lp)
623              || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
624                 return 0;
625
626         cnx = kmalloc(sizeof(*cnx), GFP_KERNEL);
627         if (! cnx)
628                 return -ENOMEM;
629         memset(cnx, 0, sizeof(*cnx));
630
631         cnx->remote_lp = rlp;
632         spin_lock_init(&cnx->lock);
633         INIT_WORK(&cnx->statemachine_wq, veth_statemachine, cnx);
634         init_timer(&cnx->ack_timer);
635         cnx->ack_timer.function = veth_timed_ack;
636         cnx->ack_timer.data = (unsigned long) cnx;
637         memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
638
639         veth_cnx[rlp] = cnx;
640
641         /* This gets us 1 reference, which is held on behalf of the driver
642          * infrastructure. It's released at module unload. */
643         kobject_init(&cnx->kobject);
644         cnx->kobject.ktype = &veth_lpar_connection_ktype;
645         rc = kobject_set_name(&cnx->kobject, "cnx%.2d", rlp);
646         if (rc != 0)
647                 return rc;
648
649         msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL);
650         if (! msgs) {
651                 veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
652                 return -ENOMEM;
653         }
654
655         cnx->msgs = msgs;
656         memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg));
657
658         for (i = 0; i < VETH_NUMBUFFERS; i++) {
659                 msgs[i].token = i;
660                 veth_stack_push(cnx, msgs + i);
661         }
662
663         cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS);
664
665         if (cnx->num_events < (2 + VETH_NUMBUFFERS)) {
666                 veth_error("Can't allocate enough events for LPAR %d.\n", rlp);
667                 return -ENOMEM;
668         }
669
670         cnx->local_caps.num_buffers = VETH_NUMBUFFERS;
671         cnx->local_caps.ack_threshold = ACK_THRESHOLD;
672         cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT;
673
674         return 0;
675 }
676
677 static void veth_stop_connection(struct veth_lpar_connection *cnx)
678 {
679         if (!cnx)
680                 return;
681
682         spin_lock_irq(&cnx->lock);
683         cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN;
684         veth_kick_statemachine(cnx);
685         spin_unlock_irq(&cnx->lock);
686
687         /* There's a slim chance the reset code has just queued the
688          * statemachine to run in five seconds. If so we need to cancel
689          * that and requeue the work to run now. */
690         if (cancel_delayed_work(&cnx->statemachine_wq)) {
691                 spin_lock_irq(&cnx->lock);
692                 veth_kick_statemachine(cnx);
693                 spin_unlock_irq(&cnx->lock);
694         }
695
696         /* Wait for the state machine to run. */
697         flush_scheduled_work();
698 }
699
700 static void veth_destroy_connection(struct veth_lpar_connection *cnx)
701 {
702         if (!cnx)
703                 return;
704
705         if (cnx->num_events > 0)
706                 mf_deallocate_lp_events(cnx->remote_lp,
707                                       HvLpEvent_Type_VirtualLan,
708                                       cnx->num_events,
709                                       NULL, NULL);
710         if (cnx->num_ack_events > 0)
711                 mf_deallocate_lp_events(cnx->remote_lp,
712                                       HvLpEvent_Type_VirtualLan,
713                                       cnx->num_ack_events,
714                                       NULL, NULL);
715
716         kfree(cnx->msgs);
717         veth_cnx[cnx->remote_lp] = NULL;
718         kfree(cnx);
719 }
720
721 static void veth_release_connection(struct kobject *kobj)
722 {
723         struct veth_lpar_connection *cnx;
724         cnx = container_of(kobj, struct veth_lpar_connection, kobject);
725         veth_stop_connection(cnx);
726         veth_destroy_connection(cnx);
727 }
728
729 /*
730  * net_device code
731  */
732
733 static int veth_open(struct net_device *dev)
734 {
735         struct veth_port *port = (struct veth_port *) dev->priv;
736
737         memset(&port->stats, 0, sizeof (port->stats));
738         netif_start_queue(dev);
739         return 0;
740 }
741
742 static int veth_close(struct net_device *dev)
743 {
744         netif_stop_queue(dev);
745         return 0;
746 }
747
748 static struct net_device_stats *veth_get_stats(struct net_device *dev)
749 {
750         struct veth_port *port = (struct veth_port *) dev->priv;
751
752         return &port->stats;
753 }
754
755 static int veth_change_mtu(struct net_device *dev, int new_mtu)
756 {
757         if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU))
758                 return -EINVAL;
759         dev->mtu = new_mtu;
760         return 0;
761 }
762
763 static void veth_set_multicast_list(struct net_device *dev)
764 {
765         struct veth_port *port = (struct veth_port *) dev->priv;
766         unsigned long flags;
767
768         write_lock_irqsave(&port->mcast_gate, flags);
769
770         if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
771                         (dev->mc_count > VETH_MAX_MCAST)) {
772                 port->promiscuous = 1;
773         } else {
774                 struct dev_mc_list *dmi = dev->mc_list;
775                 int i;
776
777                 port->promiscuous = 0;
778
779                 /* Update table */
780                 port->num_mcast = 0;
781
782                 for (i = 0; i < dev->mc_count; i++) {
783                         u8 *addr = dmi->dmi_addr;
784                         u64 xaddr = 0;
785
786                         if (addr[0] & 0x01) {/* multicast address? */
787                                 memcpy(&xaddr, addr, ETH_ALEN);
788                                 port->mcast_addr[port->num_mcast] = xaddr;
789                                 port->num_mcast++;
790                         }
791                         dmi = dmi->next;
792                 }
793         }
794
795         write_unlock_irqrestore(&port->mcast_gate, flags);
796 }
797
798 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
799 {
800         strncpy(info->driver, "veth", sizeof(info->driver) - 1);
801         info->driver[sizeof(info->driver) - 1] = '\0';
802         strncpy(info->version, "1.0", sizeof(info->version) - 1);
803 }
804
805 static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
806 {
807         ecmd->supported = (SUPPORTED_1000baseT_Full
808                           | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
809         ecmd->advertising = (SUPPORTED_1000baseT_Full
810                             | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
811         ecmd->port = PORT_FIBRE;
812         ecmd->transceiver = XCVR_INTERNAL;
813         ecmd->phy_address = 0;
814         ecmd->speed = SPEED_1000;
815         ecmd->duplex = DUPLEX_FULL;
816         ecmd->autoneg = AUTONEG_ENABLE;
817         ecmd->maxtxpkt = 120;
818         ecmd->maxrxpkt = 120;
819         return 0;
820 }
821
822 static u32 veth_get_link(struct net_device *dev)
823 {
824         return 1;
825 }
826
827 static struct ethtool_ops ops = {
828         .get_drvinfo = veth_get_drvinfo,
829         .get_settings = veth_get_settings,
830         .get_link = veth_get_link,
831 };
832
833 static void veth_tx_timeout(struct net_device *dev)
834 {
835         struct veth_port *port = (struct veth_port *)dev->priv;
836         struct net_device_stats *stats = &port->stats;
837         unsigned long flags;
838         int i;
839
840         stats->tx_errors++;
841
842         spin_lock_irqsave(&port->pending_gate, flags);
843
844         if (!port->pending_lpmask) {
845                 spin_unlock_irqrestore(&port->pending_gate, flags);
846                 return;
847         }
848
849         printk(KERN_WARNING "%s: Tx timeout!  Resetting lp connections: %08x\n",
850                dev->name, port->pending_lpmask);
851
852         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
853                 struct veth_lpar_connection *cnx = veth_cnx[i];
854
855                 if (! (port->pending_lpmask & (1<<i)))
856                         continue;
857
858                 /* If we're pending on it, we must be connected to it,
859                  * so we should certainly have a structure for it. */
860                 BUG_ON(! cnx);
861
862                 /* Theoretically we could be kicking a connection
863                  * which doesn't deserve it, but in practice if we've
864                  * had a Tx timeout, the pending_lpmask will have
865                  * exactly one bit set - the connection causing the
866                  * problem. */
867                 spin_lock(&cnx->lock);
868                 cnx->state |= VETH_STATE_RESET;
869                 veth_kick_statemachine(cnx);
870                 spin_unlock(&cnx->lock);
871         }
872
873         spin_unlock_irqrestore(&port->pending_gate, flags);
874 }
875
876 static struct net_device * __init veth_probe_one(int vlan, struct device *vdev)
877 {
878         struct net_device *dev;
879         struct veth_port *port;
880         int i, rc;
881
882         dev = alloc_etherdev(sizeof (struct veth_port));
883         if (! dev) {
884                 veth_error("Unable to allocate net_device structure!\n");
885                 return NULL;
886         }
887
888         port = (struct veth_port *) dev->priv;
889
890         spin_lock_init(&port->pending_gate);
891         rwlock_init(&port->mcast_gate);
892
893         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
894                 HvLpVirtualLanIndexMap map;
895
896                 if (i == this_lp)
897                         continue;
898                 map = HvLpConfig_getVirtualLanIndexMapForLp(i);
899                 if (map & (0x8000 >> vlan))
900                         port->lpar_map |= (1 << i);
901         }
902         port->dev = vdev;
903
904         dev->dev_addr[0] = 0x02;
905         dev->dev_addr[1] = 0x01;
906         dev->dev_addr[2] = 0xff;
907         dev->dev_addr[3] = vlan;
908         dev->dev_addr[4] = 0xff;
909         dev->dev_addr[5] = this_lp;
910
911         dev->mtu = VETH_MAX_MTU;
912
913         memcpy(&port->mac_addr, dev->dev_addr, 6);
914
915         dev->open = veth_open;
916         dev->hard_start_xmit = veth_start_xmit;
917         dev->stop = veth_close;
918         dev->get_stats = veth_get_stats;
919         dev->change_mtu = veth_change_mtu;
920         dev->set_mac_address = NULL;
921         dev->set_multicast_list = veth_set_multicast_list;
922         SET_ETHTOOL_OPS(dev, &ops);
923
924         dev->watchdog_timeo = 2 * (VETH_ACKTIMEOUT * HZ / 1000000);
925         dev->tx_timeout = veth_tx_timeout;
926
927         SET_NETDEV_DEV(dev, vdev);
928
929         rc = register_netdev(dev);
930         if (rc != 0) {
931                 veth_error("Failed registering net device for vlan%d.\n", vlan);
932                 free_netdev(dev);
933                 return NULL;
934         }
935
936         veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n",
937                         dev->name, vlan, port->lpar_map);
938
939         return dev;
940 }
941
942 /*
943  * Tx path
944  */
945
946 static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
947                                 struct net_device *dev)
948 {
949         struct veth_lpar_connection *cnx = veth_cnx[rlp];
950         struct veth_port *port = (struct veth_port *) dev->priv;
951         HvLpEvent_Rc rc;
952         struct veth_msg *msg = NULL;
953         int err = 0;
954         unsigned long flags;
955
956         if (! cnx) {
957                 port->stats.tx_errors++;
958                 dev_kfree_skb(skb);
959                 return 0;
960         }
961
962         spin_lock_irqsave(&cnx->lock, flags);
963
964         if (! (cnx->state & VETH_STATE_READY))
965                 goto drop;
966
967         if ((skb->len - 14) > VETH_MAX_MTU)
968                 goto drop;
969
970         msg = veth_stack_pop(cnx);
971
972         if (! msg) {
973                 err = 1;
974                 goto drop;
975         }
976
977         msg->in_use = 1;
978
979         msg->data.addr[0] = dma_map_single(port->dev, skb->data,
980                                 skb->len, DMA_TO_DEVICE);
981
982         if (dma_mapping_error(msg->data.addr[0]))
983                 goto recycle_and_drop;
984
985         /* Is it really necessary to check the length and address
986          * fields of the first entry here? */
987         msg->skb = skb;
988         msg->dev = port->dev;
989         msg->data.len[0] = skb->len;
990         msg->data.eofmask = 1 << VETH_EOF_SHIFT;
991
992         rc = veth_signaldata(cnx, VethEventTypeFrames, msg->token, &msg->data);
993
994         if (rc != HvLpEvent_Rc_Good)
995                 goto recycle_and_drop;
996
997         spin_unlock_irqrestore(&cnx->lock, flags);
998         return 0;
999
1000  recycle_and_drop:
1001         /* we free the skb below, so tell veth_recycle_msg() not to. */
1002         msg->skb = NULL;
1003         veth_recycle_msg(cnx, msg);
1004  drop:
1005         port->stats.tx_errors++;
1006         dev_kfree_skb(skb);
1007         spin_unlock_irqrestore(&cnx->lock, flags);
1008         return err;
1009 }
1010
1011 static HvLpIndexMap veth_transmit_to_many(struct sk_buff *skb,
1012                                           HvLpIndexMap lpmask,
1013                                           struct net_device *dev)
1014 {
1015         struct veth_port *port = (struct veth_port *) dev->priv;
1016         int i;
1017         int rc;
1018
1019         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1020                 if ((lpmask & (1 << i)) == 0)
1021                         continue;
1022
1023                 rc = veth_transmit_to_one(skb_get(skb), i, dev);
1024                 if (! rc)
1025                         lpmask &= ~(1<<i);
1026         }
1027
1028         if (! lpmask) {
1029                 port->stats.tx_packets++;
1030                 port->stats.tx_bytes += skb->len;
1031         }
1032
1033         return lpmask;
1034 }
1035
1036 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
1037 {
1038         unsigned char *frame = skb->data;
1039         struct veth_port *port = (struct veth_port *) dev->priv;
1040         unsigned long flags;
1041         HvLpIndexMap lpmask;
1042
1043         if (! (frame[0] & 0x01)) {
1044                 /* unicast packet */
1045                 HvLpIndex rlp = frame[5];
1046
1047                 if ( ! ((1 << rlp) & port->lpar_map) ) {
1048                         dev_kfree_skb(skb);
1049                         return 0;
1050                 }
1051
1052                 lpmask = 1 << rlp;
1053         } else {
1054                 lpmask = port->lpar_map;
1055         }
1056
1057         spin_lock_irqsave(&port->pending_gate, flags);
1058
1059         lpmask = veth_transmit_to_many(skb, lpmask, dev);
1060
1061         dev->trans_start = jiffies;
1062
1063         if (! lpmask) {
1064                 dev_kfree_skb(skb);
1065         } else {
1066                 if (port->pending_skb) {
1067                         veth_error("%s: TX while skb was pending!\n",
1068                                    dev->name);
1069                         dev_kfree_skb(skb);
1070                         spin_unlock_irqrestore(&port->pending_gate, flags);
1071                         return 1;
1072                 }
1073
1074                 port->pending_skb = skb;
1075                 port->pending_lpmask = lpmask;
1076                 netif_stop_queue(dev);
1077         }
1078
1079         spin_unlock_irqrestore(&port->pending_gate, flags);
1080
1081         return 0;
1082 }
1083
1084 /* You must hold the connection's lock when you call this function. */
1085 static void veth_recycle_msg(struct veth_lpar_connection *cnx,
1086                              struct veth_msg *msg)
1087 {
1088         u32 dma_address, dma_length;
1089
1090         if (msg->in_use) {
1091                 msg->in_use = 0;
1092                 dma_address = msg->data.addr[0];
1093                 dma_length = msg->data.len[0];
1094
1095                 if (!dma_mapping_error(dma_address))
1096                         dma_unmap_single(msg->dev, dma_address, dma_length,
1097                                         DMA_TO_DEVICE);
1098
1099                 if (msg->skb) {
1100                         dev_kfree_skb_any(msg->skb);
1101                         msg->skb = NULL;
1102                 }
1103
1104                 memset(&msg->data, 0, sizeof(msg->data));
1105                 veth_stack_push(cnx, msg);
1106         } else if (cnx->state & VETH_STATE_OPEN) {
1107                 veth_error("Non-pending frame (# %d) acked by LPAR %d.\n",
1108                                 cnx->remote_lp, msg->token);
1109         }
1110 }
1111
1112 static void veth_flush_pending(struct veth_lpar_connection *cnx)
1113 {
1114         int i;
1115         for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1116                 struct net_device *dev = veth_dev[i];
1117                 struct veth_port *port;
1118                 unsigned long flags;
1119
1120                 if (! dev)
1121                         continue;
1122
1123                 port = (struct veth_port *)dev->priv;
1124
1125                 if (! (port->lpar_map & (1<<cnx->remote_lp)))
1126                         continue;
1127
1128                 spin_lock_irqsave(&port->pending_gate, flags);
1129                 if (port->pending_skb) {
1130                         port->pending_lpmask =
1131                                 veth_transmit_to_many(port->pending_skb,
1132                                                       port->pending_lpmask,
1133                                                       dev);
1134                         if (! port->pending_lpmask) {
1135                                 dev_kfree_skb_any(port->pending_skb);
1136                                 port->pending_skb = NULL;
1137                                 netif_wake_queue(dev);
1138                         }
1139                 }
1140                 spin_unlock_irqrestore(&port->pending_gate, flags);
1141         }
1142 }
1143
1144 /*
1145  * Rx path
1146  */
1147
1148 static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr)
1149 {
1150         int wanted = 0;
1151         int i;
1152         unsigned long flags;
1153
1154         if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
1155                 return 1;
1156
1157         read_lock_irqsave(&port->mcast_gate, flags);
1158
1159         if (port->promiscuous) {
1160                 wanted = 1;
1161                 goto out;
1162         }
1163
1164         for (i = 0; i < port->num_mcast; ++i) {
1165                 if (port->mcast_addr[i] == mac_addr) {
1166                         wanted = 1;
1167                         break;
1168                 }
1169         }
1170
1171  out:
1172         read_unlock_irqrestore(&port->mcast_gate, flags);
1173
1174         return wanted;
1175 }
1176
1177 struct dma_chunk {
1178         u64 addr;
1179         u64 size;
1180 };
1181
1182 #define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 )
1183
1184 static inline void veth_build_dma_list(struct dma_chunk *list,
1185                                        unsigned char *p, unsigned long length)
1186 {
1187         unsigned long done;
1188         int i = 1;
1189
1190         /* FIXME: skbs are continguous in real addresses.  Do we
1191          * really need to break it into PAGE_SIZE chunks, or can we do
1192          * it just at the granularity of iSeries real->absolute
1193          * mapping?  Indeed, given the way the allocator works, can we
1194          * count on them being absolutely contiguous? */
1195         list[0].addr = ISERIES_HV_ADDR(p);
1196         list[0].size = min(length,
1197                            PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK));
1198
1199         done = list[0].size;
1200         while (done < length) {
1201                 list[i].addr = ISERIES_HV_ADDR(p + done);
1202                 list[i].size = min(length-done, PAGE_SIZE);
1203                 done += list[i].size;
1204                 i++;
1205         }
1206 }
1207
1208 static void veth_flush_acks(struct veth_lpar_connection *cnx)
1209 {
1210         HvLpEvent_Rc rc;
1211
1212         rc = veth_signaldata(cnx, VethEventTypeFramesAck,
1213                              0, &cnx->pending_acks);
1214
1215         if (rc != HvLpEvent_Rc_Good)
1216                 veth_error("Failed acking frames from LPAR %d, rc = %d\n",
1217                                 cnx->remote_lp, (int)rc);
1218
1219         cnx->num_pending_acks = 0;
1220         memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks));
1221 }
1222
1223 static void veth_receive(struct veth_lpar_connection *cnx,
1224                          struct VethLpEvent *event)
1225 {
1226         struct VethFramesData *senddata = &event->u.frames_data;
1227         int startchunk = 0;
1228         int nchunks;
1229         unsigned long flags;
1230         HvLpDma_Rc rc;
1231
1232         do {
1233                 u16 length = 0;
1234                 struct sk_buff *skb;
1235                 struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME];
1236                 struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG];
1237                 u64 dest;
1238                 HvLpVirtualLanIndex vlan;
1239                 struct net_device *dev;
1240                 struct veth_port *port;
1241
1242                 /* FIXME: do we need this? */
1243                 memset(local_list, 0, sizeof(local_list));
1244                 memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG));
1245
1246                 /* a 0 address marks the end of the valid entries */
1247                 if (senddata->addr[startchunk] == 0)
1248                         break;
1249
1250                 /* make sure that we have at least 1 EOF entry in the
1251                  * remaining entries */
1252                 if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) {
1253                         veth_error("Missing EOF fragment in event "
1254                                         "eofmask = 0x%x startchunk = %d\n",
1255                                         (unsigned)senddata->eofmask,
1256                                         startchunk);
1257                         break;
1258                 }
1259
1260                 /* build list of chunks in this frame */
1261                 nchunks = 0;
1262                 do {
1263                         remote_list[nchunks].addr =
1264                                 (u64) senddata->addr[startchunk+nchunks] << 32;
1265                         remote_list[nchunks].size =
1266                                 senddata->len[startchunk+nchunks];
1267                         length += remote_list[nchunks].size;
1268                 } while (! (senddata->eofmask &
1269                             (1 << (VETH_EOF_SHIFT + startchunk + nchunks++))));
1270
1271                 /* length == total length of all chunks */
1272                 /* nchunks == # of chunks in this frame */
1273
1274                 if ((length - ETH_HLEN) > VETH_MAX_MTU) {
1275                         veth_error("Received oversize frame from LPAR %d "
1276                                         "(length = %d)\n",
1277                                         cnx->remote_lp, length);
1278                         continue;
1279                 }
1280
1281                 skb = alloc_skb(length, GFP_ATOMIC);
1282                 if (!skb)
1283                         continue;
1284
1285                 veth_build_dma_list(local_list, skb->data, length);
1286
1287                 rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
1288                                             event->base_event.xSourceLp,
1289                                             HvLpDma_Direction_RemoteToLocal,
1290                                             cnx->src_inst,
1291                                             cnx->dst_inst,
1292                                             HvLpDma_AddressType_RealAddress,
1293                                             HvLpDma_AddressType_TceIndex,
1294                                             ISERIES_HV_ADDR(&local_list),
1295                                             ISERIES_HV_ADDR(&remote_list),
1296                                             length);
1297                 if (rc != HvLpDma_Rc_Good) {
1298                         dev_kfree_skb_irq(skb);
1299                         continue;
1300                 }
1301
1302                 vlan = skb->data[9];
1303                 dev = veth_dev[vlan];
1304                 if (! dev) {
1305                         /*
1306                          * Some earlier versions of the driver sent
1307                          * broadcasts down all connections, even to lpars
1308                          * that weren't on the relevant vlan. So ignore
1309                          * packets belonging to a vlan we're not on.
1310                          * We can also be here if we receive packets while
1311                          * the driver is going down, because then dev is NULL.
1312                          */
1313                         dev_kfree_skb_irq(skb);
1314                         continue;
1315                 }
1316
1317                 port = (struct veth_port *)dev->priv;
1318                 dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
1319
1320                 if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) {
1321                         dev_kfree_skb_irq(skb);
1322                         continue;
1323                 }
1324                 if (! veth_frame_wanted(port, dest)) {
1325                         dev_kfree_skb_irq(skb);
1326                         continue;
1327                 }
1328
1329                 skb_put(skb, length);
1330                 skb->dev = dev;
1331                 skb->protocol = eth_type_trans(skb, dev);
1332                 skb->ip_summed = CHECKSUM_NONE;
1333                 netif_rx(skb);  /* send it up */
1334                 port->stats.rx_packets++;
1335                 port->stats.rx_bytes += length;
1336         } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG);
1337
1338         /* Ack it */
1339         spin_lock_irqsave(&cnx->lock, flags);
1340         BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG);
1341
1342         cnx->pending_acks[cnx->num_pending_acks++] =
1343                 event->base_event.xCorrelationToken;
1344
1345         if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold)
1346              || (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) )
1347                 veth_flush_acks(cnx);
1348
1349         spin_unlock_irqrestore(&cnx->lock, flags);
1350 }
1351
1352 static void veth_timed_ack(unsigned long ptr)
1353 {
1354         struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr;
1355         unsigned long flags;
1356
1357         /* Ack all the events */
1358         spin_lock_irqsave(&cnx->lock, flags);
1359         if (cnx->num_pending_acks > 0)
1360                 veth_flush_acks(cnx);
1361
1362         /* Reschedule the timer */
1363         cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
1364         add_timer(&cnx->ack_timer);
1365         spin_unlock_irqrestore(&cnx->lock, flags);
1366 }
1367
1368 static int veth_remove(struct vio_dev *vdev)
1369 {
1370         struct veth_lpar_connection *cnx;
1371         struct net_device *dev;
1372         struct veth_port *port;
1373         int i;
1374
1375         dev = veth_dev[vdev->unit_address];
1376
1377         if (! dev)
1378                 return 0;
1379
1380         port = netdev_priv(dev);
1381
1382         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1383                 cnx = veth_cnx[i];
1384
1385                 if (cnx && (port->lpar_map & (1 << i))) {
1386                         /* Drop our reference to connections on our VLAN */
1387                         kobject_put(&cnx->kobject);
1388                 }
1389         }
1390
1391         veth_dev[vdev->unit_address] = NULL;
1392         unregister_netdev(dev);
1393         free_netdev(dev);
1394
1395         return 0;
1396 }
1397
1398 static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1399 {
1400         int i = vdev->unit_address;
1401         struct net_device *dev;
1402         struct veth_port *port;
1403
1404         dev = veth_probe_one(i, &vdev->dev);
1405         if (dev == NULL) {
1406                 veth_remove(vdev);
1407                 return 1;
1408         }
1409         veth_dev[i] = dev;
1410
1411         port = (struct veth_port*)netdev_priv(dev);
1412
1413         /* Start the state machine on each connection on this vlan. If we're
1414          * the first dev to do so this will commence link negotiation */
1415         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1416                 struct veth_lpar_connection *cnx;
1417
1418                 if (! (port->lpar_map & (1 << i)))
1419                         continue;
1420
1421                 cnx = veth_cnx[i];
1422                 if (!cnx)
1423                         continue;
1424
1425                 kobject_get(&cnx->kobject);
1426                 veth_kick_statemachine(cnx);
1427         }
1428
1429         return 0;
1430 }
1431
1432 /**
1433  * veth_device_table: Used by vio.c to match devices that we
1434  * support.
1435  */
1436 static struct vio_device_id veth_device_table[] __devinitdata = {
1437         { "vlan", "" },
1438         { "", "" }
1439 };
1440 MODULE_DEVICE_TABLE(vio, veth_device_table);
1441
1442 static struct vio_driver veth_driver = {
1443         .name = "iseries_veth",
1444         .id_table = veth_device_table,
1445         .probe = veth_probe,
1446         .remove = veth_remove
1447 };
1448
1449 /*
1450  * Module initialization/cleanup
1451  */
1452
1453 void __exit veth_module_cleanup(void)
1454 {
1455         int i;
1456         struct veth_lpar_connection *cnx;
1457
1458         /* Disconnect our "irq" to stop events coming from the Hypervisor. */
1459         HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
1460
1461         /* Make sure any work queued from Hypervisor callbacks is finished. */
1462         flush_scheduled_work();
1463
1464         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1465                 cnx = veth_cnx[i];
1466
1467                 if (!cnx)
1468                         continue;
1469
1470                 /* Drop the driver's reference to the connection */
1471                 kobject_put(&cnx->kobject);
1472         }
1473
1474         /* Unregister the driver, which will close all the netdevs and stop
1475          * the connections when they're no longer referenced. */
1476         vio_unregister_driver(&veth_driver);
1477 }
1478 module_exit(veth_module_cleanup);
1479
1480 int __init veth_module_init(void)
1481 {
1482         int i;
1483         int rc;
1484
1485         this_lp = HvLpConfig_getLpIndex_outline();
1486
1487         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1488                 rc = veth_init_connection(i);
1489                 if (rc != 0)
1490                         goto error;
1491         }
1492
1493         HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
1494                                   &veth_handle_event);
1495
1496         rc = vio_register_driver(&veth_driver);
1497         if (rc != 0)
1498                 goto error;
1499
1500         return 0;
1501
1502 error:
1503         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1504                 veth_destroy_connection(veth_cnx[i]);
1505         }
1506
1507         return rc;
1508 }
1509 module_init(veth_module_init);