|  | /* | 
|  | * Bloom filter tracking which scores are present in our arenas | 
|  | * and (more importantly) which are not. | 
|  | */ | 
|  |  | 
|  | #include "stdinc.h" | 
|  | #include "dat.h" | 
|  | #include "fns.h" | 
|  |  | 
|  | int ignorebloom; | 
|  |  | 
|  | int | 
|  | bloominit(Bloom *b, vlong vsize, u8int *data) | 
|  | { | 
|  | ulong size; | 
|  |  | 
|  | size = vsize; | 
|  | if(size != vsize){	/* truncation */ | 
|  | werrstr("bloom data too big"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | b->size = size; | 
|  | b->nhash = 32;	/* will be fixed by caller on initialization */ | 
|  | if(data != nil) | 
|  | if(unpackbloomhead(b, data) < 0) | 
|  | return -1; | 
|  |  | 
|  | b->bitmask = (b->size<<3) - 1; | 
|  | b->data = data; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | void | 
|  | wbbloomhead(Bloom *b) | 
|  | { | 
|  | packbloomhead(b, b->data); | 
|  | } | 
|  |  | 
|  | Bloom* | 
|  | readbloom(Part *p) | 
|  | { | 
|  | uchar buf[512]; | 
|  | Bloom *b; | 
|  |  | 
|  | b = vtmallocz(sizeof *b); | 
|  | if(readpart(p, 0, buf, sizeof buf) < 0) | 
|  | return nil; | 
|  | /* | 
|  | * pass buf as b->data so that bloominit | 
|  | * can parse header.  won't be used for | 
|  | * accessing bits (cleared below). | 
|  | */ | 
|  | if(bloominit(b, 0, buf) < 0){ | 
|  | vtfree(b); | 
|  | return nil; | 
|  | }else{ | 
|  | /* | 
|  | * default block size is system page size. | 
|  | * the bloom filter is usually very big. | 
|  | * bump the block size up to speed i/o. | 
|  | */ | 
|  | if(p->blocksize < (1<<20)){ | 
|  | p->blocksize = 1<<20; | 
|  | if(p->blocksize > p->size) | 
|  | p->blocksize = p->size; | 
|  | } | 
|  | } | 
|  | b->part = p; | 
|  | b->data = nil; | 
|  | return b; | 
|  | } | 
|  |  | 
|  | int | 
|  | resetbloom(Bloom *b) | 
|  | { | 
|  | uchar *data; | 
|  |  | 
|  | data = vtmallocz(b->size); | 
|  | b->data = data; | 
|  | if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */ | 
|  | addstat(StatBloomBits, b->size*8-1); | 
|  | else | 
|  | addstat(StatBloomBits, b->size*8); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | loadbloom(Bloom *b) | 
|  | { | 
|  | int i, n; | 
|  | uint ones; | 
|  | uchar *data; | 
|  | u32int *a; | 
|  |  | 
|  | data = vtmallocz(b->size); | 
|  | if(readpart(b->part, 0, data, b->size) < 0){ | 
|  | vtfree(b); | 
|  | vtfree(data); | 
|  | return -1; | 
|  | } | 
|  | b->data = data; | 
|  |  | 
|  | a = (u32int*)b->data; | 
|  | n = b->size/4; | 
|  | ones = 0; | 
|  | for(i=0; i<n; i++) | 
|  | ones += countbits(a[i]); | 
|  | addstat(StatBloomOnes, ones); | 
|  |  | 
|  | if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */ | 
|  | addstat(StatBloomBits, b->size*8-1); | 
|  | else | 
|  | addstat(StatBloomBits, b->size*8); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | writebloom(Bloom *b) | 
|  | { | 
|  | wbbloomhead(b); | 
|  | if(writepart(b->part, 0, b->data, b->size) < 0) | 
|  | return -1; | 
|  | if(flushpart(b->part) < 0) | 
|  | return -1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Derive two random 32-bit quantities a, b from the score | 
|  | * and then use a+b*i as a sequence of bloom filter indices. | 
|  | * Michael Mitzenmacher has a recent (2005) paper saying this is okay. | 
|  | * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header. | 
|  | */ | 
|  | static void | 
|  | gethashes(u8int *score, ulong *h) | 
|  | { | 
|  | int i; | 
|  | u32int a, b; | 
|  |  | 
|  | a = 0; | 
|  | b = 0; | 
|  | for(i=4; i+8<=VtScoreSize; i+=8){ | 
|  | a ^= *(u32int*)(score+i); | 
|  | b ^= *(u32int*)(score+i+4); | 
|  | } | 
|  | if(i+4 <= VtScoreSize)	/* 20 is not 4-aligned */ | 
|  | a ^= *(u32int*)(score+i); | 
|  | for(i=0; i<BloomMaxHash; i++, a+=b) | 
|  | h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a; | 
|  | } | 
|  |  | 
|  | static void | 
|  | _markbloomfilter(Bloom *b, u8int *score) | 
|  | { | 
|  | int i, nnew; | 
|  | ulong h[BloomMaxHash]; | 
|  | u32int x, *y, z, *tab; | 
|  |  | 
|  | trace("markbloomfilter", "markbloomfilter %V", score); | 
|  | gethashes(score, h); | 
|  | nnew = 0; | 
|  | tab = (u32int*)b->data; | 
|  | for(i=0; i<b->nhash; i++){ | 
|  | x = h[i]; | 
|  | y = &tab[(x&b->bitmask)>>5]; | 
|  | z = 1<<(x&31); | 
|  | if(!(*y&z)){ | 
|  | nnew++; | 
|  | *y |= z; | 
|  | } | 
|  | } | 
|  | if(nnew) | 
|  | addstat(StatBloomOnes, nnew); | 
|  |  | 
|  | trace("markbloomfilter", "markbloomfilter exit"); | 
|  | } | 
|  |  | 
|  | static int | 
|  | _inbloomfilter(Bloom *b, u8int *score) | 
|  | { | 
|  | int i; | 
|  | ulong h[BloomMaxHash], x; | 
|  | u32int *tab; | 
|  |  | 
|  | gethashes(score, h); | 
|  | tab = (u32int*)b->data; | 
|  | for(i=0; i<b->nhash; i++){ | 
|  | x = h[i]; | 
|  | if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31)))) | 
|  | return 0; | 
|  | } | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int | 
|  | inbloomfilter(Bloom *b, u8int *score) | 
|  | { | 
|  | int r; | 
|  |  | 
|  | if(b == nil || b->data == nil) | 
|  | return 1; | 
|  |  | 
|  | if(ignorebloom) | 
|  | return 1; | 
|  |  | 
|  | rlock(&b->lk); | 
|  | r = _inbloomfilter(b, score); | 
|  | runlock(&b->lk); | 
|  | addstat(StatBloomLookup, 1); | 
|  | if(r) | 
|  | addstat(StatBloomMiss, 1); | 
|  | else | 
|  | addstat(StatBloomHit, 1); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | void | 
|  | markbloomfilter(Bloom *b, u8int *score) | 
|  | { | 
|  | if(b == nil || b->data == nil) | 
|  | return; | 
|  |  | 
|  | rlock(&b->lk); | 
|  | qlock(&b->mod); | 
|  | _markbloomfilter(b, score); | 
|  | qunlock(&b->mod); | 
|  | runlock(&b->lk); | 
|  | } | 
|  |  | 
|  | void | 
|  | markbloomfiltern(Bloom *b, u8int score[][20], int n) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | if(b == nil || b->data == nil) | 
|  | return; | 
|  |  | 
|  | rlock(&b->lk); | 
|  | qlock(&b->mod); | 
|  | for(i=0; i<n; i++) | 
|  | _markbloomfilter(b, score[i]); | 
|  | qunlock(&b->mod); | 
|  | runlock(&b->lk); | 
|  | } | 
|  |  | 
|  | static void | 
|  | bloomwriteproc(void *v) | 
|  | { | 
|  | int ret; | 
|  | Bloom *b; | 
|  |  | 
|  | threadsetname("bloomwriteproc"); | 
|  | b = v; | 
|  | for(;;){ | 
|  | recv(b->writechan, 0); | 
|  | if((ret=writebloom(b)) < 0) | 
|  | fprint(2, "oops! writing bloom: %r\n"); | 
|  | else | 
|  | ret = 0; | 
|  | sendul(b->writedonechan, ret); | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | startbloomproc(Bloom *b) | 
|  | { | 
|  | b->writechan = chancreate(sizeof(void*), 0); | 
|  | b->writedonechan = chancreate(sizeof(ulong), 0); | 
|  | vtproc(bloomwriteproc, b); | 
|  | } |