* 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
+ * Kris Katterjohn - Added many additional checks in sk_chk_filter()
*/
#include <linux/module.h>
#include <linux/types.h>
-#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
+#include <linux/gfp.h>
#include <net/ip.h>
#include <net/protocol.h>
+#include <net/netlink.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <asm/unaligned.h>
#include <linux/filter.h>
/* No hurry in this branch */
-static u8 *load_pointer(struct sk_buff *skb, int k)
+static void *__load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
if (k >= SKF_NET_OFF)
- ptr = skb->nh.raw + k - SKF_NET_OFF;
+ ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
- ptr = skb->mac.raw + k - SKF_LL_OFF;
+ ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
- if (ptr >= skb->head && ptr < skb->tail)
+ if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
return ptr;
return NULL;
}
+static inline void *load_pointer(struct sk_buff *skb, int k,
+ unsigned int size, void *buffer)
+{
+ if (k >= 0)
+ return skb_header_pointer(skb, k, size, buffer);
+ else {
+ if (k >= SKF_AD_OFF)
+ return NULL;
+ return __load_pointer(skb, k);
+ }
+}
+
+/**
+ * sk_filter - run a packet through a socket filter
+ * @sk: sock associated with &sk_buff
+ * @skb: buffer to filter
+ *
+ * Run the filter code and then cut skb->data to correct size returned by
+ * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * than pkt_len we keep whole skb->data. This is the socket level
+ * wrapper to sk_run_filter. It returns 0 if the packet should
+ * be accepted or -EPERM if the packet should be tossed.
+ *
+ */
+int sk_filter(struct sock *sk, struct sk_buff *skb)
+{
+ int err;
+ struct sk_filter *filter;
+
+ err = security_sock_rcv_skb(sk, skb);
+ if (err)
+ return err;
+
+ rcu_read_lock_bh();
+ filter = rcu_dereference_bh(sk->sk_filter);
+ if (filter) {
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+ filter->len);
+ err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+ }
+ rcu_read_unlock_bh();
+
+ return err;
+}
+EXPORT_SYMBOL(sk_filter);
+
/**
- * sk_run_filter - run a filter on a socket
+ * sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
* @flen: length of filter
* filtering, filter is the array of filter instructions, and
* len is the number of filter blocks in the array.
*/
-
-int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
- unsigned char *data = skb->data;
- /* len is UNSIGNED. Byte wide insns relies only on implicit
- type casts to prevent reading arbitrary memory locations.
- */
- unsigned int len = skb->len-skb->data_len;
struct sock_filter *fentry; /* We walk down these */
- u32 A = 0; /* Accumulator */
- u32 X = 0; /* Index Register */
+ void *ptr;
+ u32 A = 0; /* Accumulator */
+ u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ u32 tmp;
int k;
int pc;
*/
for (pc = 0; pc < flen; pc++) {
fentry = &filter[pc];
-
+
switch (fentry->code) {
case BPF_ALU|BPF_ADD|BPF_X:
A += X;
A /= X;
continue;
case BPF_ALU|BPF_DIV|BPF_K:
- if (fentry->k == 0)
- return 0;
A /= fentry->k;
continue;
case BPF_ALU|BPF_AND|BPF_X:
continue;
case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k;
- load_w:
- if (k < 0) {
- u8 *ptr;
-
- if (k >= SKF_AD_OFF)
- break;
- ptr = load_pointer(skb, k);
- if (ptr) {
- A = ntohl(*(u32*)ptr);
- continue;
- }
- } else {
- u32 _tmp, *p;
- p = skb_header_pointer(skb, k, 4, &_tmp);
- if (p != NULL) {
- A = ntohl(*p);
- continue;
- }
+load_w:
+ ptr = load_pointer(skb, k, 4, &tmp);
+ if (ptr != NULL) {
+ A = get_unaligned_be32(ptr);
+ continue;
}
- return 0;
+ break;
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
- load_h:
- if (k < 0) {
- u8 *ptr;
-
- if (k >= SKF_AD_OFF)
- break;
- ptr = load_pointer(skb, k);
- if (ptr) {
- A = ntohs(*(u16*)ptr);
- continue;
- }
- } else {
- u16 _tmp, *p;
- p = skb_header_pointer(skb, k, 2, &_tmp);
- if (p != NULL) {
- A = ntohs(*p);
- continue;
- }
+load_h:
+ ptr = load_pointer(skb, k, 2, &tmp);
+ if (ptr != NULL) {
+ A = get_unaligned_be16(ptr);
+ continue;
}
- return 0;
+ break;
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
load_b:
- if (k < 0) {
- u8 *ptr;
-
- if (k >= SKF_AD_OFF)
- break;
- ptr = load_pointer(skb, k);
- if (ptr) {
- A = *ptr;
- continue;
- }
- } else {
- u8 _tmp, *p;
- p = skb_header_pointer(skb, k, 1, &_tmp);
- if (p != NULL) {
- A = *p;
- continue;
- }
+ ptr = load_pointer(skb, k, 1, &tmp);
+ if (ptr != NULL) {
+ A = *(u8 *)ptr;
+ continue;
}
- return 0;
+ break;
case BPF_LD|BPF_W|BPF_LEN:
- A = len;
+ A = skb->len;
continue;
case BPF_LDX|BPF_W|BPF_LEN:
- X = len;
+ X = skb->len;
continue;
case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k;
k = X + fentry->k;
goto load_b;
case BPF_LDX|BPF_B|BPF_MSH:
- if (fentry->k >= len)
- return 0;
- X = (data[fentry->k] & 0xf) << 2;
- continue;
+ ptr = load_pointer(skb, fentry->k, 1, &tmp);
+ if (ptr != NULL) {
+ X = (*(u8 *)ptr & 0xf) << 2;
+ continue;
+ }
+ return 0;
case BPF_LD|BPF_IMM:
A = fentry->k;
continue;
A = X;
continue;
case BPF_RET|BPF_K:
- return ((unsigned int)fentry->k);
+ return fentry->k;
case BPF_RET|BPF_A:
- return ((unsigned int)A);
+ return A;
case BPF_ST:
mem[fentry->k] = A;
continue;
mem[fentry->k] = X;
continue;
default:
- /* Invalid instruction counts as RET */
+ WARN_ON(1);
return 0;
}
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
- A = htons(skb->protocol);
+ A = ntohs(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
+ case SKF_AD_MARK:
+ A = skb->mark;
+ continue;
+ case SKF_AD_QUEUE:
+ A = skb->queue_mapping;
+ continue;
+ case SKF_AD_NLATTR: {
+ struct nlattr *nla;
+
+ if (skb_is_nonlinear(skb))
+ return 0;
+ if (A > skb->len - sizeof(struct nlattr))
+ return 0;
+
+ nla = nla_find((struct nlattr *)&skb->data[A],
+ skb->len - A, X);
+ if (nla)
+ A = (void *)nla - (void *)skb->data;
+ else
+ A = 0;
+ continue;
+ }
+ case SKF_AD_NLATTR_NEST: {
+ struct nlattr *nla;
+
+ if (skb_is_nonlinear(skb))
+ return 0;
+ if (A > skb->len - sizeof(struct nlattr))
+ return 0;
+
+ nla = (struct nlattr *)&skb->data[A];
+ if (nla->nla_len > A - skb->len)
+ return 0;
+
+ nla = nla_find_nested(nla, X);
+ if (nla)
+ A = (void *)nla - (void *)skb->data;
+ else
+ A = 0;
+ continue;
+ }
default:
return 0;
}
return 0;
}
+EXPORT_SYMBOL(sk_run_filter);
/**
* sk_chk_filter - verify socket filter code
*
* Check the user's filter code. If we let some ugly
* filter code slip through kaboom! The filter must contain
- * no references or jumps that are out of range, no illegal instructions
- * and no backward jumps. It must end with a RET instruction
+ * no references or jumps that are out of range, no illegal
+ * instructions, and must end with a RET instruction.
*
- * Returns 0 if the rule set is legal or a negative errno code if not.
+ * All jumps are forward as they are not signed.
+ *
+ * Returns 0 if the rule set is legal or -EINVAL if not.
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
{
struct sock_filter *ftest;
int pc;
- if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0)
+ if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
/* check the filter code now */
for (pc = 0; pc < flen; pc++) {
- /* all jumps are forward as they are not signed */
ftest = &filter[pc];
- if (BPF_CLASS(ftest->code) == BPF_JMP) {
- /* but they mustn't jump off the end */
- if (BPF_OP(ftest->code) == BPF_JA) {
- /*
- * Note, the large ftest->k might cause loops.
- * Compare this with conditional jumps below,
- * where offsets are limited. --ANK (981016)
- */
- if (ftest->k >= (unsigned)(flen-pc-1))
- return -EINVAL;
- } else {
- /* for conditionals both must be safe */
- if (pc + ftest->jt +1 >= flen ||
- pc + ftest->jf +1 >= flen)
- return -EINVAL;
- }
- }
- /* check that memory operations use valid addresses. */
- if (ftest->k >= BPF_MEMWORDS) {
- /* but it might not be a memory operation... */
- switch (ftest->code) {
- case BPF_ST:
- case BPF_STX:
- case BPF_LD|BPF_MEM:
- case BPF_LDX|BPF_MEM:
+ /* Only allow valid instructions */
+ switch (ftest->code) {
+ case BPF_ALU|BPF_ADD|BPF_K:
+ case BPF_ALU|BPF_ADD|BPF_X:
+ case BPF_ALU|BPF_SUB|BPF_K:
+ case BPF_ALU|BPF_SUB|BPF_X:
+ case BPF_ALU|BPF_MUL|BPF_K:
+ case BPF_ALU|BPF_MUL|BPF_X:
+ case BPF_ALU|BPF_DIV|BPF_X:
+ case BPF_ALU|BPF_AND|BPF_K:
+ case BPF_ALU|BPF_AND|BPF_X:
+ case BPF_ALU|BPF_OR|BPF_K:
+ case BPF_ALU|BPF_OR|BPF_X:
+ case BPF_ALU|BPF_LSH|BPF_K:
+ case BPF_ALU|BPF_LSH|BPF_X:
+ case BPF_ALU|BPF_RSH|BPF_K:
+ case BPF_ALU|BPF_RSH|BPF_X:
+ case BPF_ALU|BPF_NEG:
+ case BPF_LD|BPF_W|BPF_ABS:
+ case BPF_LD|BPF_H|BPF_ABS:
+ case BPF_LD|BPF_B|BPF_ABS:
+ case BPF_LD|BPF_W|BPF_LEN:
+ case BPF_LD|BPF_W|BPF_IND:
+ case BPF_LD|BPF_H|BPF_IND:
+ case BPF_LD|BPF_B|BPF_IND:
+ case BPF_LD|BPF_IMM:
+ case BPF_LDX|BPF_W|BPF_LEN:
+ case BPF_LDX|BPF_B|BPF_MSH:
+ case BPF_LDX|BPF_IMM:
+ case BPF_MISC|BPF_TAX:
+ case BPF_MISC|BPF_TXA:
+ case BPF_RET|BPF_K:
+ case BPF_RET|BPF_A:
+ break;
+
+ /* Some instructions need special checks */
+
+ case BPF_ALU|BPF_DIV|BPF_K:
+ /* check for division by zero */
+ if (ftest->k == 0)
return -EINVAL;
- }
+ break;
+
+ case BPF_LD|BPF_MEM:
+ case BPF_LDX|BPF_MEM:
+ case BPF_ST:
+ case BPF_STX:
+ /* check for invalid memory addresses */
+ if (ftest->k >= BPF_MEMWORDS)
+ return -EINVAL;
+ break;
+
+ case BPF_JMP|BPF_JA:
+ /*
+ * Note, the large ftest->k might cause loops.
+ * Compare this with conditional jumps below,
+ * where offsets are limited. --ANK (981016)
+ */
+ if (ftest->k >= (unsigned)(flen-pc-1))
+ return -EINVAL;
+ break;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ case BPF_JMP|BPF_JGE|BPF_K:
+ case BPF_JMP|BPF_JGE|BPF_X:
+ case BPF_JMP|BPF_JGT|BPF_K:
+ case BPF_JMP|BPF_JGT|BPF_X:
+ case BPF_JMP|BPF_JSET|BPF_K:
+ case BPF_JMP|BPF_JSET|BPF_X:
+ /* for conditionals both must be safe */
+ if (pc + ftest->jt + 1 >= flen ||
+ pc + ftest->jf + 1 >= flen)
+ return -EINVAL;
+ break;
+
+ default:
+ return -EINVAL;
}
}
- /*
- * The program must end with a return. We don't care where they
- * jumped within the script (its always forwards) but in the end
- * they _will_ hit this.
- */
- return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+ return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL(sk_chk_filter);
+
+/**
+ * sk_filter_rcu_release: Release a socket filter by rcu_head
+ * @rcu: rcu_head that contains the sk_filter to free
+ */
+static void sk_filter_rcu_release(struct rcu_head *rcu)
+{
+ struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+
+ sk_filter_release(fp);
+}
+
+static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
+{
+ unsigned int size = sk_filter_len(fp);
+
+ atomic_sub(size, &sk->sk_omem_alloc);
+ call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
}
/**
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
- struct sk_filter *fp;
+ struct sk_filter *fp, *old_fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
/* Make sure new filter is there and in the right amounts. */
- if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
- return -EINVAL;
+ if (fprog->filter == NULL)
+ return -EINVAL;
fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
if (!fp)
return -ENOMEM;
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
- sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+ sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return -EFAULT;
}
fp->len = fprog->len;
err = sk_chk_filter(fp->insns, fp->len);
- if (!err) {
- struct sk_filter *old_fp;
-
- spin_lock_bh(&sk->sk_lock.slock);
- old_fp = sk->sk_filter;
- sk->sk_filter = fp;
- spin_unlock_bh(&sk->sk_lock.slock);
- fp = old_fp;
+ if (err) {
+ sk_filter_uncharge(sk, fp);
+ return err;
}
- if (fp)
- sk_filter_release(sk, fp);
- return err;
+ rcu_read_lock_bh();
+ old_fp = rcu_dereference_bh(sk->sk_filter);
+ rcu_assign_pointer(sk->sk_filter, fp);
+ rcu_read_unlock_bh();
+
+ if (old_fp)
+ sk_filter_delayed_uncharge(sk, old_fp);
+ return 0;
}
+EXPORT_SYMBOL_GPL(sk_attach_filter);
-EXPORT_SYMBOL(sk_chk_filter);
-EXPORT_SYMBOL(sk_run_filter);
+int sk_detach_filter(struct sock *sk)
+{
+ int ret = -ENOENT;
+ struct sk_filter *filter;
+
+ rcu_read_lock_bh();
+ filter = rcu_dereference_bh(sk->sk_filter);
+ if (filter) {
+ rcu_assign_pointer(sk->sk_filter, NULL);
+ sk_filter_delayed_uncharge(sk, filter);
+ ret = 0;
+ }
+ rcu_read_unlock_bh();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sk_detach_filter);