diff --git a/include/linux/unicache.h b/include/linux/unicache.h new file mode 100644 index 0000000..b40d57e --- /dev/null +++ b/include/linux/unicache.h @@ -0,0 +1,130 @@ +/* + * Lookup based on LC-trie/TRASH + * + * Copyright (C) 2006 Robert Olsson + * Uppsala, Sweden + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * References: TRASH. A dynamic LC-trie and hash data structure + * Robert Olsson Uppsala University, Stefan Nilsson KTH. ISSN 1653-7092 + * + */ + +/* unicache leaf state flags */ + +#define UNICACHE_KEY_INVALID (1<<0) +#define UNICACHE_TCP_SYN (1<<1) +#define UNICACHE_TCP_ACK (1<<2) +#define UNICACHE_TCP_FIN (1<<3) +#define UNICACHE_TCP_RST (1<<4) +#define UNICACHE_TCP_ESTABLISHED (1<<10) + +/* bits controlling garbage collection */ +#define UNICACHE_GC_PKTGEN (1<<0) +#define UNICACHE_GC_TCP (1<<1) + +#ifdef __KERNEL__ + +#define MAXBITS 19 /* Max node size */ +#define GC_THRESH 100000 +#define GC_GOAL 100 +#define NL_MAX_PAYLOAD 1024 + +int __init unicache_proc_init(void); +void __init unicache_proc_exit(void); +void __init unicache_init(void); + +void flow_to_key(const struct flowi *flp, u32 *key); +void fill_key_ipv4_trash(__u32 *key, u32 a, u32 b, u32 c, u32 d); +struct leaf *unicache_insert_key(const struct flowi *flp, int *err); +void rt_del(struct trie *t, struct rtable *rt, struct trie_ops *ops); +struct rtable *unicache_lookup(struct trie *t, u32 *key); +struct rtable *rt_next(struct trie *t, struct rtable *last); + +int unicache_tcp_establish(struct sk_buff *skb); +int unicache_timestamp_flow(void); +int nl_unicache_flows(struct rtable *rt, __u32 group, __u16 type); +u32 unicache_create_key_ipv4(__u32 *key, struct sk_buff *skb, u32 daddr, u32 saddr); +int unicache_garbage_collect(struct trie *t); +int unicache_garbage_collect_active(struct trie *t, struct sk_buff *skb); + +#endif + +/* Netlink groups for unicache API */ + +#define UNICACHE_GRP_IPV4 0x1 +#define UNICACHE_GRP_IPV6 0x2 +#define UNICACHE_GRP_DECnet 0x4 + + +/* Netlink Types of messages for unicache */ + +enum { + UNICACHE_LOG_FLOW_START, +#define UNICACHE_LOG_FLOW_START UNICACHE_LOG_FLOW_START + + UNICACHE_LOG_FLOW_END, +#define UNICACHE_LOG_FLOW_END UNICACHE_LOG_FLOW_END + + UNICACHE_SET_GC_THRESH, +#define UNICACHE_SET_GC_THRESH UNICACHE_SET_GC_THRESH + + UNICACHE_GET_GC_THRESH, +#define UNICACHE_GET_GC_THRESH UNICACHE_GET_GC_THRESH + + UNICACHE_SET_GC_GOAL, +#define UNICACHE_SET_GC_GOAL UNICACHE_SET_GC_GOAL + + UNICACHE_GET_GC_GOAL, +#define UNICACHE_GET_GC_GOAL UNICACHE_GET_GC_GOAL + + UNICACHE_SET_GC_LEVEL, +#define UNICACHE_SET_GC_LEVEL UNICACHE_SET_GC_LEVEL + + UNICACHE_GET_GC_LEVEL, +#define UNICACHE_GET_GC_LEVEL UNICACHE_GET_GC_LEVEL + + UNICACHE_SET_LOG_MASK, +#define UNICACHE_SET_LOG_MASK UNICACHE_SET_LOG_MASK + + UNICACHE_GET_LOG_MASK, +#define UNICACHE_GET_LOG_MASK UNICACHE_GET_LOG_MASK + + UNICACHE_SET_LOG_SAMPLE, +#define UNICACHE_SET_LOG_SAMPLE UNICACHE_SET_LOG_SAMPLE + + UNICACHE_SET_TIMESTAMP_FLOW, +#define UNICACHE_SET_TIMESTAMP_FLOW UNICACHE_SET_TIMESTAMP_FLOW + + UNICACHE_GET_TIMESTAMP_FLOW, +#define UNICACHE_GET_TIMESTAMP_FLOW UNICACHE_GET_TIMESTAMP_FLOW + + UNICACHE_FLUSH, +#define UNICACHE_FLUSH UNICACHE_FLUSH + + UNICACHE_DUMP_FLOW, +#define UNICACHE_DUMP_FLOW UNICACHE_DUMP_FLOW + + UNICACHE_INSERT_FLOW, +#define UNICACHE_INSERT_FLOW UNICACHE_INSERT_FLOW + + UNICACHE_REMOVE_FLOW, +#define UNICACHE_REMOVE_FLOW UNICACHE_REMOVE_FLOW + + __UNICACHE_MAX, +#define UNICACHE_MAX (((__UNICACHE_MAX + 3) & ~3) - 1) +}; diff --git a/net/core/unicache.c b/net/core/unicache.c new file mode 100644 index 0000000..bdc6906 --- /dev/null +++ b/net/core/unicache.c @@ -0,0 +1,960 @@ +/* + * Lookup based on LC-trie/TRASH + * + * Copyright (C) 2006 Robert Olsson + * Uppsala, Sweden + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * References: TRASH. A dynamic LC-trie and hash data structure + * Robert Olsson Uppsala University, Stefan Nilsson KTH. ISSN 1653-7092 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct sock *nl_unicache_sk = NULL; +struct trie *t_unicache; +static unsigned long unicache_flows_seq; +static u32 unicache_flows_sample; +static u32 unicache_timestamp_flow_sample; +unsigned long unicache_flow_cnt; +u32 unicache_gc_level = 0x1; +static void nl_unicache_init(void); +static inline void unicache_dump_flow(struct rtable *rt); +int unicache_garbage_collect(struct trie *t); +static void unicache_free(struct leaf *l, struct trie_ops *ops); + +extern unsigned int rt_hash_rnd; +extern spinlock_t trie_write_lock; +extern void rt_free(struct rtable *rt); +extern void __rt_free(struct rtable *rt); +extern void ip_rt_new_size(struct trie *t); + +struct trie_ops unicache_ops = { + .gc = unicache_garbage_collect, + .dump = unicache_dump_flow, + .free = unicache_free, +}; + +struct trie_iter { + struct tnode *tnode; + struct trie *trie; + unsigned index; + unsigned depth; + int nflows; +}; + +void unicache_init(void) +{ + t_unicache = kmalloc(sizeof(struct trie), GFP_KERNEL); + if(!t_unicache) + panic("unicache_init: No memory\n"); + + trie_init(t_unicache, MAXBITS, GC_THRESH, GC_GOAL, + HALVE_THRESHOLD_DEFAULT, + INFLATE_THRESHOLD_DEFAULT, + HALVE_THRESHOLD_ROOT_DEFAULT, + INFLATE_THRESHOLD_ROOT_DEFAULT); + nl_unicache_init(); +} + +static unsigned int unicache_hash_code(u32 daddr, u32 saddr, u32 flow) +{ + return (jhash_3words(daddr, saddr, flow, rt_hash_rnd)); +} + +void fill_key_ipv4_trash(__u32 *key, u32 a, u32 b, u32 c, u32 d) +{ + u32 tmp; + + memcpy(&key[1], &a, sizeof(u32)); + memcpy(&key[2], &b, sizeof(u32)); + memcpy(&key[3], &c, sizeof(u32)); + memcpy(&key[4], &d, sizeof(u32)); + + tmp = ( c ^ d); + tmp = unicache_hash_code(a, b, tmp); + memcpy(&key[0], &tmp, sizeof(u32)); +} + +void flow_to_key(const struct flowi *flp, u32 *key) +{ + u32 tmp = (flp->fl_ip_dport << 16) | flp->fl_ip_sport ; + + fill_key_ipv4_trash(key, flp->fl4_src, flp->fl4_dst, + tmp, (flp->proto<<16)); +} + +struct leaf *unicache_insert_key(const struct flowi *flp, int *err) +{ + struct leaf *l = NULL; + u32 key[LPK]; + int pgc; + + flow_to_key(flp, key); + l = trie_insert(t_unicache, &pgc, err, key, &unicache_ops); + + if(l && *err != -EEXIST) + l->obj = NULL; + + return l; +} + +struct rtable *unicache_lookup(struct trie *t, u32 *key) +{ + struct leaf *l = trie_lookup(t, key); + + if(l) + return (struct rtable *) l->obj; + + return NULL; +} + +void unicache_free(struct leaf *l, struct trie_ops *ops) +{ + struct rtable *next, *rt = (struct rtable *) l->obj; + + /* + * Only calls from flush has rt-entries at this point + */ + + if(rt) { + rcu_assign_pointer(l->obj, NULL); + + for (; rt; rt = next) { + next = rt->u.dst.rt_next; + ops->dump(rt); + __rt_free(rt); + } + } +} + +void __rt_del(struct trie *t, struct rtable *rt, struct trie_ops *ops) +{ + struct rtable **rthp; + struct leaf *l; + + /* + * All leaf rt-entries should be removed + * before leaf is removed + */ + + l = rt->parent; + + BUG_ON(!l); + + rthp = (struct rtable **) &l->obj; + + for ( ; *rthp; rthp = &(*rthp)->u.dst.rt_next) { + if (*rthp == rt) { + + if(t->token == rt) + t->token = rt_next(t, rt); + + rcu_assign_pointer(*rthp , rt->u.dst.rt_next); + rt_free(rt); + break; + } + } +} + +void rt_del(struct trie *t, struct rtable *rt, struct trie_ops *ops) +{ + spin_lock_bh(&trie_write_lock); + __rt_del(t, rt, ops); + spin_unlock_bh(&trie_write_lock); +} + +static void inline unicache_create_key_ipv4_reverse(u32 *odir_key, u32 *key) +{ + u32 tmp = (key[3]>>16) | (key[3]<<16); + fill_key_ipv4_trash(odir_key, key[2], key[1], tmp, key[4]); +} + +struct rtable *rt_next(struct trie *t, struct rtable *last) +{ + struct leaf *l; + struct rtable *next; + + /* Cold start */ + + if(!last) { + l = trie_nextleaf(t, NULL); + + if(!l) + return NULL; /* Empty */ + return (struct rtable *) l->obj; + } + + next = rcu_dereference(last->u.dst.rt_next); + if(next) + return next; + + /* Else get nextleaf */ + l = last->parent; + + BUG_ON(!l); + + l = trie_nextleaf(t, l); + if(!l) + return NULL; /* End */ + + return (struct rtable *) l->obj; +} + +/* trie_write_lock should be held */ + +/* Temporary pdc monitor */ +int refcnt; +int ggoal; +int gc_last; +int loop1; +int loop2; +int agg; + +int unicache_garbage_collect(struct trie *t) +{ + struct rtable *lrt, *cand, *rth; + + int i, j; + int goal; + int old_size; + int agressive, res; + u32 min_score; + + if(t->size < t->gc_thresh) + return 0; + + lrt = (struct rtable *)t->token; + + if(!lrt) { + lrt = rt_next(t, NULL); + if(!lrt) + return 0; /* Empty */ + } + + refcnt = 0; + + old_size = t->size; + goal = max(t->gc_goal, t->size - t->gc_thresh); + goal = min(goal, t->size); + res = 0; + ggoal = goal; + loop2 = 0; + loop1 = 0; + agg = 0; + agressive = 0; + +restart: + + for (i = 0; i < goal; i++) { + min_score = ~(u32)0; + cand = NULL; + + loop1++; + for (j = 0; j < 8; j++) { + u32 score; + + loop2++; + + rth = rt_next(t, lrt); + + if(!rth) { + rth = rt_next(t, NULL); /* Starting over */ + if(!rth) + goto done; + } + + if (!agressive && atomic_read(&rth->u.dst.__refcnt)) { + refcnt++; + continue; + } + + score = rt_score(rth); + + if (score <= min_score) { + cand = rth; + min_score = score; + } + + lrt = rth; + } + + if(cand == lrt) + lrt = rt_next(t, lrt); + + if(cand) + __rt_del(t, cand, &unicache_ops); + } +done: + gc_last = res = old_size - t->size; + + /* + * if gc_goal not achived make GC + * more agressive and rerun process + */ + + if(!agressive && res < goal) { + goal -= res; + agressive = 1; + agg = 1; + goto restart; + } + + t->token = (void *) lrt; + return res; +} + +static int match_pktgen(struct sk_buff *skb) +{ + u8 *data; + + if (!pskb_may_pull(skb, 20)) { + printk("pktgen match discard \n"); + goto discard_it; + } + + data = (u8 *) skb->data+20; + + /* src and dst port 9 --> pktgen */ + if(data[0] == 0 && + data[1] == 9 && + data[2] == 0 && + data[3] == 9) { + + /* pktgen end-of-flow magic */ + if(data[8] == 0xbe && + data[9] == 0x9b && + data[10] == 0xe9 && + data[11] == 0x54 ) + return 1; + } + return 0; + +discard_it:; + return 0; +} + +int unicache_tcp_establish(struct sk_buff *skb) +{ + int est; + struct iphdr *iph; + struct tcphdr *th; + struct rtable *rt; + + + if(!skb || !skb->dst) + return 0; + + iph = skb->nh.iph; + + if( iph->protocol != IPPROTO_TCP ) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + rt = (struct rtable *)skb->dst; + est = 0; + + th = (struct tcphdr *)(skb->data + iph->ihl*4); + + /* + * 1) A -----> SYN+ACK B | SYN_RCVD Flow AB + * 2) A SYN+ACK <----- B | ESTABLISHED A Flow BA + * 3) A ---------> ACK B | ESTABLISHED B Flow AB + */ + + if(th->syn) + rt->rt_flags |= RTCF_TCP_SYN; + else { + if( th->ack && rt->rt_flags & RTCF_TCP_SYN) { + rt->rt_flags |= RTCF_TCP_EST; + est = 1; + } + rt->rt_flags &= ~RTCF_TCP_SYN; + } + return est; +} + +static int inline unicache_remove_connection(struct rtable *rt) +{ + u32 key[LPK]; + struct leaf *l; + + l = rt->parent; + + if(!l) + return 0; + + /* key for reverse flow */ + unicache_create_key_ipv4_reverse(key, l->key); + + trie_remove_by_key(t_unicache, l->key, &unicache_ops); +// trie_remove_by_key(t_unicache, key, &unicache_ops); + return 2; +} + +static inline int unicache_agc_udp(struct trie *t, struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + if(unicache_gc_level & UNICACHE_GC_PKTGEN && match_pktgen(skb)) { + rt_del(t, (struct rtable *)skb->dst, &unicache_ops); + unicache_timestamp_flow_sample++; + return 1; + } +} + +static inline int unicache_agc_tcp(struct trie *t, struct sk_buff *skb) +{ + struct rtable *rt; + struct iphdr *iph = skb->nh.iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + rt = (struct rtable *)skb->dst; + th = (struct tcphdr *)(skb->data + iph->ihl*4); + + if( unicache_gc_level & UNICACHE_GC_TCP) { + + if(th->rst) + return unicache_remove_connection(rt); + + + /* + * 1) A -----> FIN+ACK B | CLOSE_WAIT Flow AB + * 2) A FIN+ACK <----- B | LAST_ACK Flow BA + * 3) A ---------> ACK B | CLOSED Flow AB + */ + + if(th->fin) + rt->rt_flags |= RTCF_TCP_FIN; + else { + if(th->ack && rt->rt_flags & RTCF_TCP_FIN) { + + /* + * We're at #3 for Flow AB. We can remove Flow BA too + */ + + return unicache_remove_connection(rt); + } + } + } + return 0; +} + +int unicache_garbage_collect_active(struct trie *t, struct sk_buff *skb) +{ + int agc; + struct iphdr *iph; + + if(!skb || !skb->dst) + return 0; + + iph = skb->nh.iph; + agc = 0; + + switch (iph->protocol) { + + case IPPROTO_UDP: + agc = unicache_agc_udp(t,skb); + break; + + case IPPROTO_TCP: + agc = unicache_agc_tcp(t,skb); + break; + + default: + break; + } + return agc; +} + +static inline void unicache_dump_flow(struct rtable *rt) +{ + if( (!unicache_flows_sample || !(unicache_flow_cnt % unicache_flows_sample))) + nl_unicache_flows(rt, UNICACHE_GRP_IPV4, UNICACHE_LOG_FLOW_END); +} + +inline int unicache_timestamp_flow(void) +{ + if (unicache_timestamp_flow_sample && !(unicache_flow_cnt % unicache_timestamp_flow_sample)) + return 1; + return 0; +} + +static int unicache_flows_seq_show(struct seq_file *seq, void *v) +{ + int i; + struct leaf *l = v; + struct rtable *rt; + + if (v == SEQ_START_TOKEN) + return 0; + + rt = (struct rtable *) l->obj; + + seq_printf(seq, "%08x %08x ", 0, rt->u.dst.__use); + + for(i = 0; i < LPK-1; i++) + seq_printf(seq, "%08x ", l->key[i]); + + seq_printf(seq, "%08x ", l->key[LPK-1]| rt->fl.iif); +// seq_printf(seq, "%08x %08x", (unsigned int) l->start.tv_sec, (unsigned int) l->start.tv_usec); + seq_printf(seq, "\n"); + + return 0; +} + +struct leaf *unicache_get_idx(struct trie *t, loff_t *pos) +{ + struct leaf *l = NULL; + int i; + + for (i = 0; (l = trie_nextleaf(t, l)) != NULL; i++) + if( i == *pos) return l; + + return NULL; +} + +static void *unicache_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct trie_iter *iter; + struct trie *t; + + rcu_read_lock_bh(); + + iter = seq->private; + t = iter->trie; + + if (*pos == 0) + return SEQ_START_TOKEN; + + return unicache_get_idx(t, pos); +} + +static void *unicache_flows_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct trie_iter *iter = seq->private; + struct trie *t = iter->trie; + struct leaf *l = v; + + ++*pos; + if (v == SEQ_START_TOKEN) { + iter->nflows = 0; + l = trie_nextleaf(t, NULL); + if(l) + iter->nflows++; + return l; + } + + v = trie_nextleaf(t, l); + if(v) + iter->nflows++; + + BUG_ON(v == l); + + return v; +} + +static void unicache_seq_stop(struct seq_file *seq, void *v) +{ + rcu_read_unlock_bh(); +} + +/* + * This outputs /proc/net/unicache_stats + */ +static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) +{ + unsigned i, max, pointers, bytes, avdepth; + + if (stat->leaves) + avdepth = stat->totdepth*100 / stat->leaves; + else + avdepth = 0; + + seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); + seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); + + seq_printf(seq, "\tLeaves: %u\n", stat->leaves); + + bytes = sizeof(struct leaf) * stat->leaves; + seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); + bytes += sizeof(struct tnode) * stat->tnodes; + + max = MAX_STAT_DEPTH; + while (max > 0 && stat->nodesizes[max-1] == 0) + max--; + + pointers = 0; + for (i = 1; i <= max; i++) + if (stat->nodesizes[i] != 0) { + seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); + pointers += (1<nodesizes[i]; + } + seq_putc(seq, '\n'); + seq_printf(seq, "\tPointers: %d\n", pointers); + + bytes += sizeof(struct node *) * pointers; + seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); + seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); + seq_printf(seq, "GC refcnt=%d goal=%d gc_last=%d loop1=%d loop2=%d agg=%d\n", + refcnt, ggoal, gc_last, loop1, loop2, agg); +} + +static int unicache_stat_seq_show(struct seq_file *seq, void *v) +{ + struct trie_stat *stat; + + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) + return -ENOMEM; + + seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", + sizeof(struct leaf), sizeof(struct tnode)); + + seq_printf(seq, "trie:\n"); + trie_collect_stats(t_unicache, stat); + trie_show_stats(seq, stat); + + kfree(stat); + + return 0; +} + +static int unicache_stat_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, unicache_stat_seq_show, NULL); +} + +static struct seq_operations unicache_flows_seq_ops = { + .start = unicache_seq_start, + .next = unicache_flows_seq_next, + .stop = unicache_seq_stop, + .show = unicache_flows_seq_show, +}; + +static int unicache_flows_seq_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int rc = -ENOMEM; + struct trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); + + if (!s) + goto out; + + memset(s, 0, sizeof(*s)); + + rc = seq_open(file, &unicache_flows_seq_ops); + if (rc) + goto out_kfree; + + s->trie = t_unicache; + + seq = file->private_data; + seq->private = s; + + out: + return rc; + out_kfree: + kfree(s); + goto out; +} + +static struct file_operations unicache_flows_fops = { + .owner = THIS_MODULE, + .open = unicache_flows_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static struct file_operations unicache_stat_fops = { + .owner = THIS_MODULE, + .open = unicache_stat_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int __init unicache_proc_init(void) +{ + + if (!proc_net_fops_create("unicache_stat", S_IRUGO, &unicache_stat_fops)) + goto out; + + if (!proc_net_fops_create("unicache_flows", S_IRUGO, &unicache_flows_fops)) + goto out1; + + return 0; +out1: + proc_net_remove("unicache_stat"); +out:; + return -ENOMEM; +} + +void __init unicache_proc_exit(void) +{ + proc_net_remove("unicache_stat"); + proc_net_remove("unicache_flows_stat"); +} + +/* Just for testing */ +static void nl_unicache_insert_flow(struct nlmsghdr *nlh) +{ + __u32 key[LPK]; + struct leaf *l = NULL; + int err = 0; + int i; + size_t size = LPK * 4; + int pgc = 0; + + static struct trie_ops ops = { + .gc = NULL, + .dump = NULL, + .free = NULL, + }; + + memcpy(&key, NLMSG_DATA(nlh), size); + + for(i = 0; i < size/4; i++) + printk("%08x ", key[i]); + + l = trie_insert(t_unicache, &pgc, &err, key, &ops); + + if(err) + printk("nl_unicache_insert_flow err=%d\n", err); + + if(l) + l->obj = NULL; +} + +static void nl_unicache_flush(void) +{ + static struct trie_ops ops = { + .gc = NULL, + .dump = NULL, + .free = NULL, + }; + + trie_flush(t_unicache, &ops); +} + +static void nl_unicache_input(struct sock *sk, int len) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh = NULL; + __u32 *key; + int reply = 0; + + sk = nl_unicache_sk; + + skb = skb_dequeue(&sk->sk_receive_queue); + nlh = (struct nlmsghdr *)skb->data; + + if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || + nlh->nlmsg_len < NLMSG_LENGTH(LPK*4)) { + kfree_skb(skb); + return; + } + + switch (nlh->nlmsg_type) { + + case UNICACHE_LOG_FLOW_START: + case UNICACHE_LOG_FLOW_END: + break; + + case UNICACHE_SET_GC_THRESH: + spin_lock_bh(&trie_write_lock); + memcpy(&t_unicache->gc_thresh, NLMSG_DATA(nlh), sizeof(int)); + spin_unlock_bh(&trie_write_lock); + ip_rt_new_size(t_unicache); + break; + + case UNICACHE_GET_GC_THRESH: + memcpy(NLMSG_DATA(nlh), &t_unicache->gc_thresh, sizeof(int)); + reply = 1; + break; + + case UNICACHE_SET_GC_GOAL: + spin_lock_bh(&trie_write_lock); + memcpy(&t_unicache->gc_goal, NLMSG_DATA(nlh), sizeof(int)); + spin_unlock_bh(&trie_write_lock); + break; + + case UNICACHE_GET_GC_GOAL: + memcpy(NLMSG_DATA(nlh), &t_unicache->gc_goal, sizeof(int)); + reply = 1; + break; + + case UNICACHE_SET_LOG_MASK: + key = NLMSG_DATA(nlh); + break; + + case UNICACHE_GET_LOG_MASK: + break; + + case UNICACHE_SET_TIMESTAMP_FLOW: + memcpy(&unicache_timestamp_flow_sample, NLMSG_DATA(nlh), sizeof(int)); + break; + + case UNICACHE_GET_TIMESTAMP_FLOW: + memcpy(NLMSG_DATA(nlh), &unicache_timestamp_flow_sample, sizeof(int)); + reply = 1; + break; + + case UNICACHE_SET_GC_LEVEL: + memcpy(&unicache_gc_level, NLMSG_DATA(nlh), sizeof(int)); + break; + + case UNICACHE_GET_GC_LEVEL: + memcpy(NLMSG_DATA(nlh), &unicache_gc_level, sizeof(int)); + reply = 1; + break; + + case UNICACHE_FLUSH: + nl_unicache_flush(); + break; + + case UNICACHE_DUMP_FLOW: + break; + + case UNICACHE_INSERT_FLOW: + local_bh_disable(); + nl_unicache_insert_flow(nlh); + local_bh_enable(); + break; + + case UNICACHE_REMOVE_FLOW: + break; + + default: + printk("Unkown unicache NETLINK type=%d\n", nlh->nlmsg_type); + } + + if( reply) { + NETLINK_CB(skb).pid = 0; /* from kernel */ + NETLINK_CB(skb).sid = nlh->nlmsg_pid; /* receiving process */ + NETLINK_CB(skb).dst_group = UNICACHE_GRP_IPV4; /* 0 unicast */ + netlink_broadcast(nl_unicache_sk, skb, 0, UNICACHE_GRP_IPV4, GFP_ATOMIC); + return; + } + + kfree(skb); +} + +int nl_unicache_flows(struct rtable *rt, __u32 group, __u16 type) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh = NULL; + struct leaf *l; + __u32 tmp; + size_t size; + char *p; + int s; + + + if( ! netlink_has_listeners(nl_unicache_sk, group)) + return -ESRCH; + + skb = alloc_skb(NLMSG_SPACE(NL_MAX_PAYLOAD), GFP_ATOMIC); + + if(!skb) + return -ENOBUFS; + + s = sizeof(unsigned int); + + l = rt->parent; + if(!l) + return -EINVAL; + + /* bytecnt, key, timestamp */ + size = s + LPK*4 + sizeof(struct skb_timeval); + + nlh = NLMSG_NEW(skb, 0, unicache_flows_seq++, type, size, 0); + p = NLMSG_DATA(nlh); + + memcpy(&p[0], &rt->u.dst.__use, s); + memcpy(&p[s], &l->key[1], (LPK-2)*4); + + tmp = l->key[4] | rt->fl.iif; /* For compact dump */ + memcpy(&p[(LPK-1)*4], &tmp, 4); + +// memcpy(&p[s+LPK*4], &l->start, sizeof(struct skb_timeval)); + + NLMSG_END(skb, nlh); + + NETLINK_CB(skb).pid = 0; /* from kernel */ + NETLINK_CB(skb).sid = 0; + NETLINK_CB(skb).dst_group = group; + + return netlink_broadcast(nl_unicache_sk, skb, 0, group, GFP_ATOMIC); + +nlmsg_failure: + kfree_skb(skb); + return -EINVAL; + +} + +static void nl_unicache_init(void) +{ + nl_unicache_sk = netlink_kernel_create(NETLINK_UNICACHE, 0, + nl_unicache_input, THIS_MODULE); + if (nl_unicache_sk == NULL) + panic("unicache_init: cannot initialize netlink\n"); + + netlink_set_nonroot(NETLINK_UNICACHE, NL_NONROOT_RECV); +} + +