blob: 42418d1c93493a1f8b5f369b7ceb71cdf8e2eb4b [file] [log] [blame]
/*
* Bloom filter tracking which scores are present in our arenas
* and (more importantly) which are not.
*/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
int ignorebloom;
int
bloominit(Bloom *b, vlong vsize, u8int *data)
{
ulong size;
size = vsize;
if(size != vsize){ /* truncation */
werrstr("bloom data too big");
return -1;
}
b->size = size;
b->nhash = 32; /* will be fixed by caller on initialization */
if(data != nil)
if(unpackbloomhead(b, data) < 0)
return -1;
b->bitmask = (b->size<<3) - 1;
b->data = data;
return 0;
}
void
wbbloomhead(Bloom *b)
{
packbloomhead(b, b->data);
}
Bloom*
readbloom(Part *p)
{
uchar buf[512];
Bloom *b;
b = vtmallocz(sizeof *b);
if(readpart(p, 0, buf, sizeof buf) < 0)
return nil;
/*
* pass buf as b->data so that bloominit
* can parse header. won't be used for
* accessing bits (cleared below).
*/
if(bloominit(b, 0, buf) < 0){
vtfree(b);
return nil;
}else{
/*
* default block size is system page size.
* the bloom filter is usually very big.
* bump the block size up to speed i/o.
*/
if(p->blocksize < (1<<20)){
p->blocksize = 1<<20;
if(p->blocksize > p->size)
p->blocksize = p->size;
}
}
b->part = p;
b->data = nil;
return b;
}
int
resetbloom(Bloom *b)
{
uchar *data;
data = vtmallocz(b->size);
b->data = data;
if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
addstat(StatBloomBits, b->size*8-1);
else
addstat(StatBloomBits, b->size*8);
return 0;
}
int
loadbloom(Bloom *b)
{
int i, n;
uint ones;
uchar *data;
u32int *a;
data = vtmallocz(b->size);
if(readpart(b->part, 0, data, b->size) < 0){
vtfree(b);
vtfree(data);
return -1;
}
b->data = data;
a = (u32int*)b->data;
n = b->size/4;
ones = 0;
for(i=0; i<n; i++)
ones += countbits(a[i]);
addstat(StatBloomOnes, ones);
if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
addstat(StatBloomBits, b->size*8-1);
else
addstat(StatBloomBits, b->size*8);
return 0;
}
int
writebloom(Bloom *b)
{
wbbloomhead(b);
if(writepart(b->part, 0, b->data, b->size) < 0)
return -1;
if(flushpart(b->part) < 0)
return -1;
return 0;
}
/*
* Derive two random 32-bit quantities a, b from the score
* and then use a+b*i as a sequence of bloom filter indices.
* Michael Mitzenmacher has a recent (2005) paper saying this is okay.
* We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
*/
static void
gethashes(u8int *score, ulong *h)
{
int i;
u32int a, b;
a = 0;
b = 0;
for(i=4; i+8<=VtScoreSize; i+=8){
a ^= *(u32int*)(score+i);
b ^= *(u32int*)(score+i+4);
}
if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
a ^= *(u32int*)(score+i);
for(i=0; i<BloomMaxHash; i++, a+=b)
h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
}
static void
_markbloomfilter(Bloom *b, u8int *score)
{
int i, nnew;
ulong h[BloomMaxHash];
u32int x, *y, z, *tab;
trace("markbloomfilter", "markbloomfilter %V", score);
gethashes(score, h);
nnew = 0;
tab = (u32int*)b->data;
for(i=0; i<b->nhash; i++){
x = h[i];
y = &tab[(x&b->bitmask)>>5];
z = 1<<(x&31);
if(!(*y&z)){
nnew++;
*y |= z;
}
}
if(nnew)
addstat(StatBloomOnes, nnew);
trace("markbloomfilter", "markbloomfilter exit");
}
static int
_inbloomfilter(Bloom *b, u8int *score)
{
int i;
ulong h[BloomMaxHash], x;
u32int *tab;
gethashes(score, h);
tab = (u32int*)b->data;
for(i=0; i<b->nhash; i++){
x = h[i];
if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
return 0;
}
return 1;
}
int
inbloomfilter(Bloom *b, u8int *score)
{
int r;
if(b == nil || b->data == nil)
return 1;
if(ignorebloom)
return 1;
rlock(&b->lk);
r = _inbloomfilter(b, score);
runlock(&b->lk);
addstat(StatBloomLookup, 1);
if(r)
addstat(StatBloomMiss, 1);
else
addstat(StatBloomHit, 1);
return r;
}
void
markbloomfilter(Bloom *b, u8int *score)
{
if(b == nil || b->data == nil)
return;
rlock(&b->lk);
qlock(&b->mod);
_markbloomfilter(b, score);
qunlock(&b->mod);
runlock(&b->lk);
}
void
markbloomfiltern(Bloom *b, u8int score[][20], int n)
{
int i;
if(b == nil || b->data == nil)
return;
rlock(&b->lk);
qlock(&b->mod);
for(i=0; i<n; i++)
_markbloomfilter(b, score[i]);
qunlock(&b->mod);
runlock(&b->lk);
}
static void
bloomwriteproc(void *v)
{
int ret;
Bloom *b;
threadsetname("bloomwriteproc");
b = v;
for(;;){
recv(b->writechan, 0);
if((ret=writebloom(b)) < 0)
fprint(2, "oops! writing bloom: %r\n");
else
ret = 0;
sendul(b->writedonechan, ret);
}
}
void
startbloomproc(Bloom *b)
{
b->writechan = chancreate(sizeof(void*), 0);
b->writedonechan = chancreate(sizeof(ulong), 0);
vtproc(bloomwriteproc, b);
}