tunnels: fix netns vs proto registration ordering
[safe/jmp/linux-2.6] / net / ipv6 / ipv6_sockglue.c
index 7516b88..430454e 100644 (file)
@@ -1,14 +1,12 @@
 /*
  *     IPv6 BSD socket options interface
- *     Linux INET6 implementation 
+ *     Linux INET6 implementation
  *
  *     Authors:
- *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *     Pedro Roque             <roque@di.fc.ul.pt>
  *
  *     Based on linux/net/ipv4/ip_sockglue.c
  *
- *     $Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $
- *
  *     This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
@@ -16,7 +14,6 @@
  *
  *     FIXME: Make the setsockopt code POSIX compliant: That is
  *
- *     o       Return -EINVAL for setsockopt of short lengths
  *     o       Truncate getsockopt returns
  *     o       Return an optlen of the truncated length if need be
  *
  */
 
 #include <linux/module.h>
-#include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/in6.h>
+#include <linux/mroute6.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/init.h>
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <net/udplite.h>
 #include <net/xfrm.h>
+#include <net/compat.h>
 
 #include <asm/uaccess.h>
 
-DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
-
-static struct packet_type ipv6_packet_type = {
-       .type = __constant_htons(ETH_P_IPV6), 
-       .func = ipv6_rcv,
-};
+DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
 
 struct ip6_ra_chain *ip6_ra_chain;
 DEFINE_RWLOCK(ip6_ra_lock);
 
-int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
+int ip6_ra_control(struct sock *sk, int sel)
 {
        struct ip6_ra_chain *ra, *new_ra, **rap;
 
        /* RA packet may be delivered ONLY to IPPROTO_RAW socket */
-       if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
-               return -EINVAL;
+       if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW)
+               return -ENOPROTOOPT;
 
        new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
@@ -80,16 +74,13 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
                if (ra->sk == sk) {
                        if (sel>=0) {
                                write_unlock_bh(&ip6_ra_lock);
-                               if (new_ra)
-                                       kfree(new_ra);
+                               kfree(new_ra);
                                return -EADDRINUSE;
                        }
 
                        *rap = ra->next;
                        write_unlock_bh(&ip6_ra_lock);
 
-                       if (ra->destructor)
-                               ra->destructor(sk);
                        sock_put(sk);
                        kfree(ra);
                        return 0;
@@ -101,7 +92,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
        }
        new_ra->sk = sk;
        new_ra->sel = sel;
-       new_ra->destructor = destructor;
        new_ra->next = ra;
        *rap = new_ra;
        sock_hold(sk);
@@ -109,37 +99,74 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
        return 0;
 }
 
-int ipv6_setsockopt(struct sock *sk, int level, int optname,
-                   char __user *optval, int optlen)
+static
+struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
+                                          struct ipv6_txoptions *opt)
+{
+       if (inet_sk(sk)->is_icsk) {
+               if (opt &&
+                   !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
+                   inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
+                       struct inet_connection_sock *icsk = inet_csk(sk);
+                       icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
+                       icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+               }
+               opt = xchg(&inet6_sk(sk)->opt, opt);
+       } else {
+               write_lock(&sk->sk_dst_lock);
+               opt = xchg(&inet6_sk(sk)->opt, opt);
+               write_unlock(&sk->sk_dst_lock);
+       }
+       sk_dst_reset(sk);
+
+       return opt;
+}
+
+static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+                   char __user *optval, unsigned int optlen)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
+       struct net *net = sock_net(sk);
        int val, valbool;
        int retv = -ENOPROTOOPT;
 
-       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
-               return udp_prot.setsockopt(sk, level, optname, optval, optlen);
-
-       if(level!=SOL_IPV6)
-               goto out;
-
        if (optval == NULL)
                val=0;
-       else if (get_user(val, (int __user *) optval))
-               return -EFAULT;
+       else {
+               if (optlen >= sizeof(int)) {
+                       if (get_user(val, (int __user *) optval))
+                               return -EFAULT;
+               } else
+                       val = 0;
+       }
 
        valbool = (val!=0);
 
+       if (ip6_mroute_opt(optname))
+               return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+
        lock_sock(sk);
 
        switch (optname) {
 
        case IPV6_ADDRFORM:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                if (val == PF_INET) {
                        struct ipv6_txoptions *opt;
                        struct sk_buff *pktopt;
 
-                       if (sk->sk_protocol != IPPROTO_UDP &&
-                           sk->sk_protocol != IPPROTO_TCP)
+                       if (sk->sk_type == SOCK_RAW)
+                               break;
+
+                       if (sk->sk_protocol == IPPROTO_UDP ||
+                           sk->sk_protocol == IPPROTO_UDPLITE) {
+                               struct udp_sock *up = udp_sk(sk);
+                               if (up->pending == AF_INET6) {
+                                       retv = -EBUSY;
+                                       break;
+                               }
+                       } else if (sk->sk_protocol != IPPROTO_TCP)
                                break;
 
                        if (sk->sk_state != TCP_ESTABLISHED) {
@@ -148,7 +175,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                        }
 
                        if (ipv6_only_sock(sk) ||
-                           !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) {
+                           !ipv6_addr_v4mapped(&np->daddr)) {
                                retv = -EADDRNOTAVAIL;
                                break;
                        }
@@ -164,23 +191,26 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                        sk_refcnt_debug_dec(sk);
 
                        if (sk->sk_protocol == IPPROTO_TCP) {
-                               struct tcp_sock *tp = tcp_sk(sk);
-
+                               struct inet_connection_sock *icsk = inet_csk(sk);
                                local_bh_disable();
-                               sock_prot_dec_use(sk->sk_prot);
-                               sock_prot_inc_use(&tcp_prot);
+                               sock_prot_inuse_add(net, sk->sk_prot, -1);
+                               sock_prot_inuse_add(net, &tcp_prot, 1);
                                local_bh_enable();
                                sk->sk_prot = &tcp_prot;
-                               tp->af_specific = &ipv4_specific;
+                               icsk->icsk_af_ops = &ipv4_specific;
                                sk->sk_socket->ops = &inet_stream_ops;
                                sk->sk_family = PF_INET;
-                               tcp_sync_mss(sk, tp->pmtu_cookie);
+                               tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                        } else {
+                               struct proto *prot = &udp_prot;
+
+                               if (sk->sk_protocol == IPPROTO_UDPLITE)
+                                       prot = &udplite_prot;
                                local_bh_disable();
-                               sock_prot_dec_use(sk->sk_prot);
-                               sock_prot_inc_use(&udp_prot);
+                               sock_prot_inuse_add(net, sk->sk_prot, -1);
+                               sock_prot_inuse_add(net, prot, 1);
                                local_bh_enable();
-                               sk->sk_prot = &udp_prot;
+                               sk->sk_prot = prot;
                                sk->sk_socket->ops = &inet_dgram_ops;
                                sk->sk_family = PF_INET;
                        }
@@ -188,8 +218,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                        if (opt)
                                sock_kfree_s(sk, opt, opt->tot_len);
                        pktopt = xchg(&np->pktoptions, NULL);
-                       if (pktopt)
-                               kfree_skb(pktopt);
+                       kfree_skb(pktopt);
 
                        sk->sk_destruct = inet_sock_destruct;
                        /*
@@ -204,45 +233,189 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                goto e_inval;
 
        case IPV6_V6ONLY:
-               if (inet_sk(sk)->num)
+               if (optlen < sizeof(int) ||
+                   inet_sk(sk)->inet_num)
                        goto e_inval;
                np->ipv6only = valbool;
                retv = 0;
                break;
 
-       case IPV6_PKTINFO:
+       case IPV6_RECVPKTINFO:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                np->rxopt.bits.rxinfo = valbool;
                retv = 0;
                break;
 
-       case IPV6_HOPLIMIT:
+       case IPV6_2292PKTINFO:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               np->rxopt.bits.rxoinfo = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_RECVHOPLIMIT:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                np->rxopt.bits.rxhlim = valbool;
                retv = 0;
                break;
 
-       case IPV6_RTHDR:
-               if (val < 0 || val > 2)
+       case IPV6_2292HOPLIMIT:
+               if (optlen < sizeof(int))
                        goto e_inval;
-               np->rxopt.bits.srcrt = val;
+               np->rxopt.bits.rxohlim = valbool;
                retv = 0;
                break;
 
-       case IPV6_HOPOPTS:
+       case IPV6_RECVRTHDR:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               np->rxopt.bits.srcrt = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_2292RTHDR:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               np->rxopt.bits.osrcrt = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_RECVHOPOPTS:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                np->rxopt.bits.hopopts = valbool;
                retv = 0;
                break;
 
-       case IPV6_DSTOPTS:
+       case IPV6_2292HOPOPTS:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               np->rxopt.bits.ohopopts = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_RECVDSTOPTS:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                np->rxopt.bits.dstopts = valbool;
                retv = 0;
                break;
 
+       case IPV6_2292DSTOPTS:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               np->rxopt.bits.odstopts = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_TCLASS:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               if (val < -1 || val > 0xff)
+                       goto e_inval;
+               /* RFC 3542, 6.5: default traffic class of 0x0 */
+               if (val == -1)
+                       val = 0;
+               np->tclass = val;
+               retv = 0;
+               break;
+
+       case IPV6_RECVTCLASS:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               np->rxopt.bits.rxtclass = valbool;
+               retv = 0;
+               break;
+
        case IPV6_FLOWINFO:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                np->rxopt.bits.rxflow = valbool;
                retv = 0;
                break;
 
-       case IPV6_PKTOPTIONS:
+       case IPV6_HOPOPTS:
+       case IPV6_RTHDRDSTOPTS:
+       case IPV6_RTHDR:
+       case IPV6_DSTOPTS:
+       {
+               struct ipv6_txoptions *opt;
+
+               /* remove any sticky options header with a zero option
+                * length, per RFC3542.
+                */
+               if (optlen == 0)
+                       optval = NULL;
+               else if (optval == NULL)
+                       goto e_inval;
+               else if (optlen < sizeof(struct ipv6_opt_hdr) ||
+                        optlen & 0x7 || optlen > 8 * 255)
+                       goto e_inval;
+
+               /* hop-by-hop / destination options are privileged option */
+               retv = -EPERM;
+               if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
+                       break;
+
+               opt = ipv6_renew_options(sk, np->opt, optname,
+                                        (struct ipv6_opt_hdr __user *)optval,
+                                        optlen);
+               if (IS_ERR(opt)) {
+                       retv = PTR_ERR(opt);
+                       break;
+               }
+
+               /* routing header option needs extra check */
+               retv = -EINVAL;
+               if (optname == IPV6_RTHDR && opt && opt->srcrt) {
+                       struct ipv6_rt_hdr *rthdr = opt->srcrt;
+                       switch (rthdr->type) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+                       case IPV6_SRCRT_TYPE_2:
+                               if (rthdr->hdrlen != 2 ||
+                                   rthdr->segments_left != 1)
+                                       goto sticky_done;
+
+                               break;
+#endif
+                       default:
+                               goto sticky_done;
+                       }
+               }
+
+               retv = 0;
+               opt = ipv6_update_options(sk, opt);
+sticky_done:
+               if (opt)
+                       sock_kfree_s(sk, opt, opt->tot_len);
+               break;
+       }
+
+       case IPV6_PKTINFO:
+       {
+               struct in6_pktinfo pkt;
+
+               if (optlen == 0)
+                       goto e_inval;
+               else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL)
+                       goto e_inval;
+
+               if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) {
+                               retv = -EFAULT;
+                               break;
+               }
+               if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if)
+                       goto e_inval;
+
+               np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
+               ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr);
+               retv = 0;
+               break;
+       }
+
+       case IPV6_2292PKTOPTIONS:
        {
                struct ipv6_txoptions *opt = NULL;
                struct msghdr msg;
@@ -251,6 +424,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 
                fl.fl6_flowlabel = 0;
                fl.oif = sk->sk_bound_dev_if;
+               fl.mark = sk->sk_mark;
 
                if (optlen == 0)
                        goto update;
@@ -276,36 +450,20 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                msg.msg_controllen = optlen;
                msg.msg_control = (void*)(opt+1);
 
-               retv = datagram_send_ctl(&msg, &fl, opt, &junk);
+               retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
                if (retv)
                        goto done;
 update:
                retv = 0;
-               if (sk->sk_type == SOCK_STREAM) {
-                       if (opt) {
-                               struct tcp_sock *tp = tcp_sk(sk);
-                               if (!((1 << sk->sk_state) &
-                                     (TCPF_LISTEN | TCPF_CLOSE))
-                                   && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-                                       tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-                                       tcp_sync_mss(sk, tp->pmtu_cookie);
-                               }
-                       }
-                       opt = xchg(&np->opt, opt);
-                       sk_dst_reset(sk);
-               } else {
-                       write_lock(&sk->sk_dst_lock);
-                       opt = xchg(&np->opt, opt);
-                       write_unlock(&sk->sk_dst_lock);
-                       sk_dst_reset(sk);
-               }
-
+               opt = ipv6_update_options(sk, opt);
 done:
                if (opt)
                        sock_kfree_s(sk, opt, opt->tot_len);
                break;
        }
        case IPV6_UNICAST_HOPS:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                if (val > 255 || val < -1)
                        goto e_inval;
                np->hop_limit = val;
@@ -314,6 +472,8 @@ done:
 
        case IPV6_MULTICAST_HOPS:
                if (sk->sk_type == SOCK_STREAM)
+                       break;
+               if (optlen < sizeof(int))
                        goto e_inval;
                if (val > 255 || val < -1)
                        goto e_inval;
@@ -322,19 +482,32 @@ done:
                break;
 
        case IPV6_MULTICAST_LOOP:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               if (val != valbool)
+                       goto e_inval;
                np->mc_loop = valbool;
                retv = 0;
                break;
 
        case IPV6_MULTICAST_IF:
                if (sk->sk_type == SOCK_STREAM)
-                       goto e_inval;
-               if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+                       break;
+               if (optlen < sizeof(int))
                        goto e_inval;
 
-               if (__dev_get_by_index(val) == NULL) {
-                       retv = -ENODEV;
-                       break;
+               if (val) {
+                       struct net_device *dev;
+
+                       if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+                               goto e_inval;
+
+                       dev = dev_get_by_index(net, val);
+                       if (!dev) {
+                               retv = -ENODEV;
+                               break;
+                       }
+                       dev_put(dev);
                }
                np->mcast_oif = val;
                retv = 0;
@@ -344,6 +517,13 @@ done:
        {
                struct ipv6_mreq mreq;
 
+               if (optlen < sizeof(struct ipv6_mreq))
+                       goto e_inval;
+
+               retv = -EPROTO;
+               if (inet_sk(sk)->is_icsk)
+                       break;
+
                retv = -EFAULT;
                if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
                        break;
@@ -359,7 +539,7 @@ done:
        {
                struct ipv6_mreq mreq;
 
-               if (optlen != sizeof(struct ipv6_mreq))
+               if (optlen < sizeof(struct ipv6_mreq))
                        goto e_inval;
 
                retv = -EFAULT;
@@ -378,6 +558,9 @@ done:
                struct group_req greq;
                struct sockaddr_in6 *psin6;
 
+               if (optlen < sizeof(struct group_req))
+                       goto e_inval;
+
                retv = -EFAULT;
                if (copy_from_user(&greq, optval, sizeof(struct group_req)))
                        break;
@@ -402,7 +585,7 @@ done:
                struct group_source_req greqs;
                int omode, add;
 
-               if (optlen != sizeof(struct group_source_req))
+               if (optlen < sizeof(struct group_source_req))
                        goto e_inval;
                if (copy_from_user(&greqs, optval, sizeof(greqs))) {
                        retv = -EFAULT;
@@ -448,8 +631,8 @@ done:
                        retv = -ENOBUFS;
                        break;
                }
-               gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
-               if (gsf == 0) {
+               gsf = kmalloc(optlen,GFP_KERNEL);
+               if (!gsf) {
                        retv = -ENOBUFS;
                        break;
                }
@@ -476,27 +659,37 @@ done:
                break;
        }
        case IPV6_ROUTER_ALERT:
-               retv = ip6_ra_control(sk, val, NULL);
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               retv = ip6_ra_control(sk, val);
                break;
        case IPV6_MTU_DISCOVER:
-               if (val<0 || val>2)
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
                        goto e_inval;
                np->pmtudisc = val;
                retv = 0;
                break;
        case IPV6_MTU:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                if (val && val < IPV6_MIN_MTU)
                        goto e_inval;
                np->frag_size = val;
                retv = 0;
                break;
        case IPV6_RECVERR:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                np->recverr = valbool;
                if (!val)
                        skb_queue_purge(&sk->sk_error_queue);
                retv = 0;
                break;
        case IPV6_FLOWINFO_SEND:
+               if (optlen < sizeof(int))
+                       goto e_inval;
                np->sndflow = valbool;
                retv = 0;
                break;
@@ -511,17 +704,72 @@ done:
                retv = xfrm_user_policy(sk, optname, optval, optlen);
                break;
 
-#ifdef CONFIG_NETFILTER
-       default:
-               retv = nf_setsockopt(sk, PF_INET6, optname, optval, 
-                                           optlen);
-               break;
-#endif
+       case IPV6_ADDR_PREFERENCES:
+           {
+               unsigned int pref = 0;
+               unsigned int prefmask = ~0;
+
+               if (optlen < sizeof(int))
+                       goto e_inval;
+
+               retv = -EINVAL;
 
+               /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+               switch (val & (IPV6_PREFER_SRC_PUBLIC|
+                              IPV6_PREFER_SRC_TMP|
+                              IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+               case IPV6_PREFER_SRC_PUBLIC:
+                       pref |= IPV6_PREFER_SRC_PUBLIC;
+                       break;
+               case IPV6_PREFER_SRC_TMP:
+                       pref |= IPV6_PREFER_SRC_TMP;
+                       break;
+               case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+                       break;
+               case 0:
+                       goto pref_skip_pubtmp;
+               default:
+                       goto e_inval;
+               }
+
+               prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
+                             IPV6_PREFER_SRC_TMP);
+pref_skip_pubtmp:
+
+               /* check HOME/COA conflicts */
+               switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
+               case IPV6_PREFER_SRC_HOME:
+                       break;
+               case IPV6_PREFER_SRC_COA:
+                       pref |= IPV6_PREFER_SRC_COA;
+               case 0:
+                       goto pref_skip_coa;
+               default:
+                       goto e_inval;
+               }
+
+               prefmask &= ~IPV6_PREFER_SRC_COA;
+pref_skip_coa:
+
+               /* check CGA/NONCGA conflicts */
+               switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+               case IPV6_PREFER_SRC_CGA:
+               case IPV6_PREFER_SRC_NONCGA:
+               case 0:
+                       break;
+               default:
+                       goto e_inval;
+               }
+
+               np->srcprefs = (np->srcprefs & prefmask) | pref;
+               retv = 0;
+
+               break;
+           }
        }
+
        release_sock(sk);
 
-out:
        return retv;
 
 e_inval:
@@ -529,24 +777,122 @@ e_inval:
        return -EINVAL;
 }
 
-int ipv6_getsockopt(struct sock *sk, int level, int optname,
+int ipv6_setsockopt(struct sock *sk, int level, int optname,
+                   char __user *optval, unsigned int optlen)
+{
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+               return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+       if (level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible ENOPROTOOPTs except default case */
+       if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+                       optname != IPV6_XFRM_POLICY) {
+               lock_sock(sk);
+               err = nf_setsockopt(sk, PF_INET6, optname, optval,
+                               optlen);
+               release_sock(sk);
+       }
+#endif
+       return err;
+}
+
+EXPORT_SYMBOL(ipv6_setsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
+                          char __user *optval, unsigned int optlen)
+{
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+               if (udp_prot.compat_setsockopt != NULL)
+                       return udp_prot.compat_setsockopt(sk, level, optname,
+                                                         optval, optlen);
+               return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+       }
+
+       if (level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+               return compat_mc_setsockopt(sk, level, optname, optval, optlen,
+                       ipv6_setsockopt);
+
+       err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible ENOPROTOOPTs except default case */
+       if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+           optname != IPV6_XFRM_POLICY) {
+               lock_sock(sk);
+               err = compat_nf_setsockopt(sk, PF_INET6, optname,
+                                          optval, optlen);
+               release_sock(sk);
+       }
+#endif
+       return err;
+}
+
+EXPORT_SYMBOL(compat_ipv6_setsockopt);
+#endif
+
+static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
+                                 int optname, char __user *optval, int len)
+{
+       struct ipv6_opt_hdr *hdr;
+
+       if (!opt)
+               return 0;
+
+       switch(optname) {
+       case IPV6_HOPOPTS:
+               hdr = opt->hopopt;
+               break;
+       case IPV6_RTHDRDSTOPTS:
+               hdr = opt->dst0opt;
+               break;
+       case IPV6_RTHDR:
+               hdr = (struct ipv6_opt_hdr *)opt->srcrt;
+               break;
+       case IPV6_DSTOPTS:
+               hdr = opt->dst1opt;
+               break;
+       default:
+               return -EINVAL; /* should not happen */
+       }
+
+       if (!hdr)
+               return 0;
+
+       len = min_t(unsigned int, len, ipv6_optlen(hdr));
+       if (copy_to_user(optval, hdr, len))
+               return -EFAULT;
+       return len;
+}
+
+static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
                    char __user *optval, int __user *optlen)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        int len;
        int val;
 
-       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
-               return udp_prot.getsockopt(sk, level, optname, optval, optlen);
-       if(level!=SOL_IPV6)
-               return -ENOPROTOOPT;
+       if (ip6_mroute_opt(optname))
+               return ip6_mroute_getsockopt(sk, optname, optval, optlen);
+
        if (get_user(len, optlen))
                return -EFAULT;
        switch (optname) {
        case IPV6_ADDRFORM:
                if (sk->sk_protocol != IPPROTO_UDP &&
+                   sk->sk_protocol != IPPROTO_UDPLITE &&
                    sk->sk_protocol != IPPROTO_TCP)
-                       return -EINVAL;
+                       return -ENOPROTOOPT;
                if (sk->sk_state != TCP_ESTABLISHED)
                        return -ENOTCONN;
                val = sk->sk_family;
@@ -560,6 +906,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                        return -EINVAL;
                if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
                        return -EFAULT;
+               if (gsf.gf_group.ss_family != AF_INET6)
+                       return -EADDRNOTAVAIL;
                lock_sock(sk);
                err = ip6_mc_msfget(sk, &gsf,
                        (struct group_filter __user *)optval, optlen);
@@ -567,7 +915,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                return err;
        }
 
-       case IPV6_PKTOPTIONS:
+       case IPV6_2292PKTOPTIONS:
        {
                struct msghdr msg;
                struct sk_buff *skb;
@@ -593,14 +941,28 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                } else {
                        if (np->rxopt.bits.rxinfo) {
                                struct in6_pktinfo src_info;
-                               src_info.ipi6_ifindex = np->mcast_oif;
-                               ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
+                               src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+                                       np->sticky_pktinfo.ipi6_ifindex;
+                               np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
+                                       ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
                                put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
                        }
                        if (np->rxopt.bits.rxhlim) {
                                int hlim = np->mcast_hops;
                                put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
                        }
+                       if (np->rxopt.bits.rxoinfo) {
+                               struct in6_pktinfo src_info;
+                               src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+                                       np->sticky_pktinfo.ipi6_ifindex;
+                               np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
+                                       ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+                               put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
+                       }
+                       if (np->rxopt.bits.rxohlim) {
+                               int hlim = np->mcast_hops;
+                               put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
+                       }
                }
                len -= msg.msg_controllen;
                return put_user(len, optlen);
@@ -608,7 +970,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
        case IPV6_MTU:
        {
                struct dst_entry *dst;
-               val = 0;        
+               val = 0;
                lock_sock(sk);
                dst = sk_dst_get(sk);
                if (dst) {
@@ -625,37 +987,94 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                val = np->ipv6only;
                break;
 
-       case IPV6_PKTINFO:
+       case IPV6_RECVPKTINFO:
                val = np->rxopt.bits.rxinfo;
                break;
 
-       case IPV6_HOPLIMIT:
+       case IPV6_2292PKTINFO:
+               val = np->rxopt.bits.rxoinfo;
+               break;
+
+       case IPV6_RECVHOPLIMIT:
                val = np->rxopt.bits.rxhlim;
                break;
 
-       case IPV6_RTHDR:
+       case IPV6_2292HOPLIMIT:
+               val = np->rxopt.bits.rxohlim;
+               break;
+
+       case IPV6_RECVRTHDR:
                val = np->rxopt.bits.srcrt;
                break;
 
+       case IPV6_2292RTHDR:
+               val = np->rxopt.bits.osrcrt;
+               break;
+
        case IPV6_HOPOPTS:
+       case IPV6_RTHDRDSTOPTS:
+       case IPV6_RTHDR:
+       case IPV6_DSTOPTS:
+       {
+
+               lock_sock(sk);
+               len = ipv6_getsockopt_sticky(sk, np->opt,
+                                            optname, optval, len);
+               release_sock(sk);
+               /* check if ipv6_getsockopt_sticky() returns err code */
+               if (len < 0)
+                       return len;
+               return put_user(len, optlen);
+       }
+
+       case IPV6_RECVHOPOPTS:
                val = np->rxopt.bits.hopopts;
                break;
 
-       case IPV6_DSTOPTS:
+       case IPV6_2292HOPOPTS:
+               val = np->rxopt.bits.ohopopts;
+               break;
+
+       case IPV6_RECVDSTOPTS:
                val = np->rxopt.bits.dstopts;
                break;
 
+       case IPV6_2292DSTOPTS:
+               val = np->rxopt.bits.odstopts;
+               break;
+
+       case IPV6_TCLASS:
+               val = np->tclass;
+               break;
+
+       case IPV6_RECVTCLASS:
+               val = np->rxopt.bits.rxtclass;
+               break;
+
        case IPV6_FLOWINFO:
                val = np->rxopt.bits.rxflow;
                break;
 
        case IPV6_UNICAST_HOPS:
-               val = np->hop_limit;
-               break;
-
        case IPV6_MULTICAST_HOPS:
-               val = np->mcast_hops;
+       {
+               struct dst_entry *dst;
+
+               if (optname == IPV6_UNICAST_HOPS)
+                       val = np->hop_limit;
+               else
+                       val = np->mcast_hops;
+
+               dst = sk_dst_get(sk);
+               if (dst) {
+                       if (val < 0)
+                               val = ip6_dst_hoplimit(dst);
+                       dst_release(dst);
+               }
+               if (val < 0)
+                       val = sock_net(sk)->ipv6.devconf_all->hop_limit;
                break;
+       }
 
        case IPV6_MULTICAST_LOOP:
                val = np->mc_loop;
@@ -677,18 +1096,26 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                val = np->sndflow;
                break;
 
+       case IPV6_ADDR_PREFERENCES:
+               val = 0;
+
+               if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+                       val |= IPV6_PREFER_SRC_TMP;
+               else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+                       val |= IPV6_PREFER_SRC_PUBLIC;
+               else {
+                       /* XXX: should we return system default? */
+                       val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
+               }
+
+               if (np->srcprefs & IPV6_PREFER_SRC_COA)
+                       val |= IPV6_PREFER_SRC_COA;
+               else
+                       val |= IPV6_PREFER_SRC_HOME;
+               break;
+
        default:
-#ifdef CONFIG_NETFILTER
-               lock_sock(sk);
-               val = nf_getsockopt(sk, PF_INET6, optname, optval, 
-                                   &len);
-               release_sock(sk);
-               if (val >= 0)
-                       val = put_user(len, optlen);
-               return val;
-#else
-               return -EINVAL;
-#endif
+               return -ENOPROTOOPT;
        }
        len = min_t(unsigned int, sizeof(int), len);
        if(put_user(len, optlen))
@@ -698,12 +1125,79 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
        return 0;
 }
 
-void __init ipv6_packet_init(void)
+int ipv6_getsockopt(struct sock *sk, int level, int optname,
+                   char __user *optval, int __user *optlen)
 {
-       dev_add_pack(&ipv6_packet_type);
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+               return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+       if(level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible ENOPROTOOPTs except default case */
+       if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
+               int len;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               lock_sock(sk);
+               err = nf_getsockopt(sk, PF_INET6, optname, optval,
+                               &len);
+               release_sock(sk);
+               if (err >= 0)
+                       err = put_user(len, optlen);
+       }
+#endif
+       return err;
 }
 
-void ipv6_packet_cleanup(void)
+EXPORT_SYMBOL(ipv6_getsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
+                          char __user *optval, int __user *optlen)
 {
-       dev_remove_pack(&ipv6_packet_type);
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+               if (udp_prot.compat_getsockopt != NULL)
+                       return udp_prot.compat_getsockopt(sk, level, optname,
+                                                         optval, optlen);
+               return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+       }
+
+       if (level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       if (optname == MCAST_MSFILTER)
+               return compat_mc_getsockopt(sk, level, optname, optval, optlen,
+                       ipv6_getsockopt);
+
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible ENOPROTOOPTs except default case */
+       if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
+               int len;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               lock_sock(sk);
+               err = compat_nf_getsockopt(sk, PF_INET6,
+                                          optname, optval, &len);
+               release_sock(sk);
+               if (err >= 0)
+                       err = put_user(len, optlen);
+       }
+#endif
+       return err;
 }
+
+EXPORT_SYMBOL(compat_ipv6_getsockopt);
+#endif
+