X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Ffib_trie.c;h=d58b49115386f0ca0fd3ebddb2b0684728c117e6;hb=b902e5735272b6a79fe2853180b2ad6658aa9678;hp=d16ae4623be64cbae61f97f0e28edbf7d2ca59b4;hpb=76e6ebfb40a2455c18234dcb0f9df37533215461;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d16ae46..d58b491 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -123,6 +123,7 @@ struct tnode { union { struct rcu_head rcu; struct work_struct work; + struct tnode *tnode_free; }; struct node *child[0]; }; @@ -161,6 +162,16 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, static struct node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); +/* tnodes to free after resize(); protected by RTNL */ +static struct tnode *tnode_free_head; +static size_t tnode_free_size; + +/* + * synchronize_rcu after call_rcu for that many pages; it should be especially + * useful before resizing the root node with PREEMPT_NONE configs; the value was + * obtained experimentally, aiming to avoid visible slowdown. + */ +static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; @@ -313,9 +324,11 @@ static inline void check_tnode(const struct tnode *tn) static const int halve_threshold = 25; static const int inflate_threshold = 50; -static const int halve_threshold_root = 8; -static const int inflate_threshold_root = 15; +static const int halve_threshold_root = 15; +static const int inflate_threshold_root = 25; +static int inflate_threshold_root_fix; +#define INFLATE_FIX_MAX 10 /* a comment in resize() */ static void __alias_free_mem(struct rcu_head *head) { @@ -385,6 +398,31 @@ static inline void tnode_free(struct tnode *tn) call_rcu(&tn->rcu, __tnode_free_rcu); } +static void tnode_free_safe(struct tnode *tn) +{ + BUG_ON(IS_LEAF(tn)); + tn->tnode_free = tnode_free_head; + tnode_free_head = tn; + tnode_free_size += sizeof(struct tnode) + + (sizeof(struct node *) << tn->bits); +} + +static void tnode_free_flush(void) +{ + struct tnode *tn; + + while ((tn = tnode_free_head)) { + tnode_free_head = tn->tnode_free; + tn->tnode_free = NULL; + tnode_free(tn); + } + + if (tnode_free_size >= PAGE_SIZE * sync_pages) { + tnode_free_size = 0; + synchronize_rcu(); + } +} + static struct leaf *leaf_new(void) { struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); @@ -495,7 +533,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* No children */ if (tn->empty_children == tnode_child_length(tn)) { - tnode_free(tn); + tnode_free_safe(tn); return NULL; } /* One child */ @@ -509,7 +547,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* compress one level */ node_set_parent(n, NULL); - tnode_free(tn); + tnode_free_safe(tn); return n; } /* @@ -581,7 +619,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ if (!tn->parent) - inflate_threshold_use = inflate_threshold_root; + inflate_threshold_use = inflate_threshold_root + + inflate_threshold_root_fix; else inflate_threshold_use = inflate_threshold; @@ -605,15 +644,27 @@ static struct node *resize(struct trie *t, struct tnode *tn) } if (max_resize < 0) { - if (!tn->parent) - pr_warning("Fix inflate_threshold_root." - " Now=%d size=%d bits\n", - inflate_threshold_root, tn->bits); - else + if (!tn->parent) { + /* + * It was observed that during large updates even + * inflate_threshold_root = 35 might be needed to avoid + * this warning; but it should be temporary, so let's + * try to handle this automatically. + */ + if (inflate_threshold_root_fix < INFLATE_FIX_MAX) + inflate_threshold_root_fix++; + else + pr_warning("Fix inflate_threshold_root." + " Now=%d size=%d bits fix=%d\n", + inflate_threshold_root, tn->bits, + inflate_threshold_root_fix); + } else { pr_warning("Fix inflate_threshold." " Now=%d size=%d bits\n", inflate_threshold, tn->bits); - } + } + } else if (max_resize > 3 && !tn->parent && inflate_threshold_root_fix) + inflate_threshold_root_fix--; check_tnode(tn); @@ -670,7 +721,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* compress one level */ node_set_parent(n, NULL); - tnode_free(tn); + tnode_free_safe(tn); return n; } @@ -756,7 +807,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) put_child(t, tn, 2*i, inode->child[0]); put_child(t, tn, 2*i+1, inode->child[1]); - tnode_free(inode); + tnode_free_safe(inode); continue; } @@ -801,9 +852,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) put_child(t, tn, 2*i, resize(t, left)); put_child(t, tn, 2*i+1, resize(t, right)); - tnode_free(inode); + tnode_free_safe(inode); } - tnode_free(oldtnode); + tnode_free_safe(oldtnode); return tn; nomem: { @@ -885,7 +936,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) put_child(t, newBinNode, 1, right); put_child(t, tn, i/2, resize(t, newBinNode)); } - tnode_free(oldtnode); + tnode_free_safe(oldtnode); return tn; nomem: { @@ -983,12 +1034,14 @@ fib_find_node(struct trie *t, u32 key) return NULL; } -static struct node *trie_rebalance(struct trie *t, struct tnode *tn) +static void trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -999,6 +1052,10 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) tp = node_parent((struct node *) tn); if (!tp) + rcu_assign_pointer(t->trie, (struct node *)tn); + + tnode_free_flush(); + if (!tp) break; tn = tp; } @@ -1007,7 +1064,10 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - return (struct node *)tn; + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); + + return; } /* only used from updater-side */ @@ -1155,7 +1215,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) /* Rebalance the trie */ - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); done: return fa_head; } @@ -1347,8 +1407,7 @@ static int check_leaf(struct trie *t, struct leaf *l, if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(&li->falh, flp, res, - htonl(l->key), mask, plen); + err = fib_semantic_match(&li->falh, flp, res, plen); #ifdef CONFIG_IP_FIB_TRIE_STATS if (err <= 0) @@ -1357,17 +1416,17 @@ static int check_leaf(struct trie *t, struct leaf *l, t->stats.semantic_match_miss++; #endif if (err <= 0) - return plen; + return err; } - return -1; + return 1; } static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; - int plen, ret = 0; + int ret; struct node *n; struct tnode *pn; int pos, bits; @@ -1391,10 +1450,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) - goto failed; - ret = 0; + ret = check_leaf(t, (struct leaf *)n, key, flp, res); goto found; } @@ -1409,7 +1465,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), pos, bits); - n = tnode_get_child(pn, cindex); + n = tnode_get_child_rcu(pn, cindex); if (n == NULL) { #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1419,11 +1475,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) + ret = check_leaf(t, (struct leaf *)n, key, flp, res); + if (ret > 0) goto backtrace; - - ret = 0; goto found; } @@ -1546,7 +1600,7 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - struct tnode *parent = node_parent((struct node *) pn); + struct tnode *parent = node_parent_rcu((struct node *) pn); if (!parent) goto failed; @@ -1580,7 +1634,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); } else rcu_assign_pointer(t->trie, NULL); @@ -1759,7 +1813,7 @@ static struct leaf *trie_firstleaf(struct trie *t) static struct leaf *trie_nextleaf(struct leaf *l) { struct node *c = (struct node *) l; - struct tnode *p = node_parent(c); + struct tnode *p = node_parent_rcu(c); if (!p) return NULL; /* trie with just one leaf */ @@ -2256,25 +2310,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net; - - net = get_proc_net(inode); - if (net == NULL) - return -ENXIO; - err = single_open(file, fib_triestat_seq_show, net); - if (err < 0) { - put_net(net); - return err; - } - return 0; -} - -static int fib_triestat_seq_release(struct inode *ino, struct file *f) -{ - struct seq_file *seq = f->private_data; - put_net(seq->private); - return single_release(ino, f); + return single_open_net(inode, file, fib_triestat_seq_show); } static const struct file_operations fib_triestat_fops = { @@ -2282,7 +2318,7 @@ static const struct file_operations fib_triestat_fops = { .open = fib_triestat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = fib_triestat_seq_release, + .release = single_release_net, }; static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) @@ -2422,8 +2458,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- " NIPQUAD_FMT "/%d %d %d %d\n", - NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, + seq_printf(seq, " +-- %pI4/%d %d %d %d\n", + &prf, tn->pos, tn->bits, tn->full_children, tn->empty_children); } else { @@ -2433,7 +2469,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) __be32 val = htonl(l->key); seq_indent(seq, iter->depth); - seq_printf(seq, " |-- " NIPQUAD_FMT "\n", NIPQUAD(val)); + seq_printf(seq, " |-- %pI4\n", &val); hlist_for_each_entry_rcu(li, node, &l->list, hlist) { struct fib_alias *fa;