e4eb8be3bb0c0c4baab4561d8f6d4927a7db1515
[safe/jmp/linux-2.6] / drivers / infiniband / core / addr.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/mutex.h>
37 #include <linux/inetdevice.h>
38 #include <linux/workqueue.h>
39 #include <linux/if_arp.h>
40 #include <net/arp.h>
41 #include <net/neighbour.h>
42 #include <net/route.h>
43 #include <net/netevent.h>
44 #include <rdma/ib_addr.h>
45
46 MODULE_AUTHOR("Sean Hefty");
47 MODULE_DESCRIPTION("IB Address Translation");
48 MODULE_LICENSE("Dual BSD/GPL");
49
50 struct addr_req {
51         struct list_head list;
52         struct sockaddr src_addr;
53         struct sockaddr dst_addr;
54         struct rdma_dev_addr *addr;
55         struct rdma_addr_client *client;
56         void *context;
57         void (*callback)(int status, struct sockaddr *src_addr,
58                          struct rdma_dev_addr *addr, void *context);
59         unsigned long timeout;
60         int status;
61 };
62
63 static void process_req(struct work_struct *work);
64
65 static DEFINE_MUTEX(lock);
66 static LIST_HEAD(req_list);
67 static DECLARE_DELAYED_WORK(work, process_req);
68 static struct workqueue_struct *addr_wq;
69
70 void rdma_addr_register_client(struct rdma_addr_client *client)
71 {
72         atomic_set(&client->refcount, 1);
73         init_completion(&client->comp);
74 }
75 EXPORT_SYMBOL(rdma_addr_register_client);
76
77 static inline void put_client(struct rdma_addr_client *client)
78 {
79         if (atomic_dec_and_test(&client->refcount))
80                 complete(&client->comp);
81 }
82
83 void rdma_addr_unregister_client(struct rdma_addr_client *client)
84 {
85         put_client(client);
86         wait_for_completion(&client->comp);
87 }
88 EXPORT_SYMBOL(rdma_addr_unregister_client);
89
90 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
91                      const unsigned char *dst_dev_addr)
92 {
93         switch (dev->type) {
94         case ARPHRD_INFINIBAND:
95                 dev_addr->dev_type = RDMA_NODE_IB_CA;
96                 break;
97         case ARPHRD_ETHER:
98                 dev_addr->dev_type = RDMA_NODE_RNIC;
99                 break;
100         default:
101                 return -EADDRNOTAVAIL;
102         }
103
104         memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
105         memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
106         if (dst_dev_addr)
107                 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
108         return 0;
109 }
110 EXPORT_SYMBOL(rdma_copy_addr);
111
112 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
113 {
114         struct net_device *dev;
115         __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
116         int ret;
117
118         dev = ip_dev_find(&init_net, ip);
119         if (!dev)
120                 return -EADDRNOTAVAIL;
121
122         ret = rdma_copy_addr(dev_addr, dev, NULL);
123         dev_put(dev);
124         return ret;
125 }
126 EXPORT_SYMBOL(rdma_translate_ip);
127
128 static void set_timeout(unsigned long time)
129 {
130         unsigned long delay;
131
132         cancel_delayed_work(&work);
133
134         delay = time - jiffies;
135         if ((long)delay <= 0)
136                 delay = 1;
137
138         queue_delayed_work(addr_wq, &work, delay);
139 }
140
141 static void queue_req(struct addr_req *req)
142 {
143         struct addr_req *temp_req;
144
145         mutex_lock(&lock);
146         list_for_each_entry_reverse(temp_req, &req_list, list) {
147                 if (time_after_eq(req->timeout, temp_req->timeout))
148                         break;
149         }
150
151         list_add(&req->list, &temp_req->list);
152
153         if (req_list.next == &req->list)
154                 set_timeout(req->timeout);
155         mutex_unlock(&lock);
156 }
157
158 static void addr_send_arp(struct sockaddr_in *dst_in)
159 {
160         struct rtable *rt;
161         struct flowi fl;
162         __be32 dst_ip = dst_in->sin_addr.s_addr;
163
164         memset(&fl, 0, sizeof fl);
165         fl.nl_u.ip4_u.daddr = dst_ip;
166         if (ip_route_output_key(&init_net, &rt, &fl))
167                 return;
168
169         neigh_event_send(rt->u.dst.neighbour, NULL);
170         ip_rt_put(rt);
171 }
172
173 static int addr_resolve_remote(struct sockaddr_in *src_in,
174                                struct sockaddr_in *dst_in,
175                                struct rdma_dev_addr *addr)
176 {
177         __be32 src_ip = src_in->sin_addr.s_addr;
178         __be32 dst_ip = dst_in->sin_addr.s_addr;
179         struct flowi fl;
180         struct rtable *rt;
181         struct neighbour *neigh;
182         int ret;
183
184         memset(&fl, 0, sizeof fl);
185         fl.nl_u.ip4_u.daddr = dst_ip;
186         fl.nl_u.ip4_u.saddr = src_ip;
187         ret = ip_route_output_key(&init_net, &rt, &fl);
188         if (ret)
189                 goto out;
190
191         /* If the device does ARP internally, return 'done' */
192         if (rt->idev->dev->flags & IFF_NOARP) {
193                 rdma_copy_addr(addr, rt->idev->dev, NULL);
194                 goto put;
195         }
196
197         neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
198         if (!neigh) {
199                 ret = -ENODATA;
200                 goto put;
201         }
202
203         if (!(neigh->nud_state & NUD_VALID)) {
204                 ret = -ENODATA;
205                 goto release;
206         }
207
208         if (!src_ip) {
209                 src_in->sin_family = dst_in->sin_family;
210                 src_in->sin_addr.s_addr = rt->rt_src;
211         }
212
213         ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
214 release:
215         neigh_release(neigh);
216 put:
217         ip_rt_put(rt);
218 out:
219         return ret;
220 }
221
222 static void process_req(struct work_struct *work)
223 {
224         struct addr_req *req, *temp_req;
225         struct sockaddr_in *src_in, *dst_in;
226         struct list_head done_list;
227
228         INIT_LIST_HEAD(&done_list);
229
230         mutex_lock(&lock);
231         list_for_each_entry_safe(req, temp_req, &req_list, list) {
232                 if (req->status == -ENODATA) {
233                         src_in = (struct sockaddr_in *) &req->src_addr;
234                         dst_in = (struct sockaddr_in *) &req->dst_addr;
235                         req->status = addr_resolve_remote(src_in, dst_in,
236                                                           req->addr);
237                         if (req->status && time_after_eq(jiffies, req->timeout))
238                                 req->status = -ETIMEDOUT;
239                         else if (req->status == -ENODATA)
240                                 continue;
241                 }
242                 list_move_tail(&req->list, &done_list);
243         }
244
245         if (!list_empty(&req_list)) {
246                 req = list_entry(req_list.next, struct addr_req, list);
247                 set_timeout(req->timeout);
248         }
249         mutex_unlock(&lock);
250
251         list_for_each_entry_safe(req, temp_req, &done_list, list) {
252                 list_del(&req->list);
253                 req->callback(req->status, &req->src_addr, req->addr,
254                               req->context);
255                 put_client(req->client);
256                 kfree(req);
257         }
258 }
259
260 static int addr_resolve_local(struct sockaddr_in *src_in,
261                               struct sockaddr_in *dst_in,
262                               struct rdma_dev_addr *addr)
263 {
264         struct net_device *dev;
265         __be32 src_ip = src_in->sin_addr.s_addr;
266         __be32 dst_ip = dst_in->sin_addr.s_addr;
267         int ret;
268
269         dev = ip_dev_find(&init_net, dst_ip);
270         if (!dev)
271                 return -EADDRNOTAVAIL;
272
273         if (ipv4_is_zeronet(src_ip)) {
274                 src_in->sin_family = dst_in->sin_family;
275                 src_in->sin_addr.s_addr = dst_ip;
276                 ret = rdma_copy_addr(addr, dev, dev->dev_addr);
277         } else if (ipv4_is_loopback(src_ip)) {
278                 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
279                 if (!ret)
280                         memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
281         } else {
282                 ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
283                 if (!ret)
284                         memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
285         }
286
287         dev_put(dev);
288         return ret;
289 }
290
291 int rdma_resolve_ip(struct rdma_addr_client *client,
292                     struct sockaddr *src_addr, struct sockaddr *dst_addr,
293                     struct rdma_dev_addr *addr, int timeout_ms,
294                     void (*callback)(int status, struct sockaddr *src_addr,
295                                      struct rdma_dev_addr *addr, void *context),
296                     void *context)
297 {
298         struct sockaddr_in *src_in, *dst_in;
299         struct addr_req *req;
300         int ret = 0;
301
302         req = kzalloc(sizeof *req, GFP_KERNEL);
303         if (!req)
304                 return -ENOMEM;
305
306         if (src_addr)
307                 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
308         memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
309         req->addr = addr;
310         req->callback = callback;
311         req->context = context;
312         req->client = client;
313         atomic_inc(&client->refcount);
314
315         src_in = (struct sockaddr_in *) &req->src_addr;
316         dst_in = (struct sockaddr_in *) &req->dst_addr;
317
318         req->status = addr_resolve_local(src_in, dst_in, addr);
319         if (req->status == -EADDRNOTAVAIL)
320                 req->status = addr_resolve_remote(src_in, dst_in, addr);
321
322         switch (req->status) {
323         case 0:
324                 req->timeout = jiffies;
325                 queue_req(req);
326                 break;
327         case -ENODATA:
328                 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
329                 queue_req(req);
330                 addr_send_arp(dst_in);
331                 break;
332         default:
333                 ret = req->status;
334                 atomic_dec(&client->refcount);
335                 kfree(req);
336                 break;
337         }
338         return ret;
339 }
340 EXPORT_SYMBOL(rdma_resolve_ip);
341
342 void rdma_addr_cancel(struct rdma_dev_addr *addr)
343 {
344         struct addr_req *req, *temp_req;
345
346         mutex_lock(&lock);
347         list_for_each_entry_safe(req, temp_req, &req_list, list) {
348                 if (req->addr == addr) {
349                         req->status = -ECANCELED;
350                         req->timeout = jiffies;
351                         list_move(&req->list, &req_list);
352                         set_timeout(req->timeout);
353                         break;
354                 }
355         }
356         mutex_unlock(&lock);
357 }
358 EXPORT_SYMBOL(rdma_addr_cancel);
359
360 static int netevent_callback(struct notifier_block *self, unsigned long event,
361         void *ctx)
362 {
363         if (event == NETEVENT_NEIGH_UPDATE) {
364                 struct neighbour *neigh = ctx;
365
366                 if (neigh->nud_state & NUD_VALID) {
367                         set_timeout(jiffies);
368                 }
369         }
370         return 0;
371 }
372
373 static struct notifier_block nb = {
374         .notifier_call = netevent_callback
375 };
376
377 static int addr_init(void)
378 {
379         addr_wq = create_singlethread_workqueue("ib_addr");
380         if (!addr_wq)
381                 return -ENOMEM;
382
383         register_netevent_notifier(&nb);
384         return 0;
385 }
386
387 static void addr_cleanup(void)
388 {
389         unregister_netevent_notifier(&nb);
390         destroy_workqueue(addr_wq);
391 }
392
393 module_init(addr_init);
394 module_exit(addr_cleanup);