blob: 76677871f4d4bffad8bf89afb6c2757fa39813a8 [file] [log] [blame]
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
Bloom b;
void
usage(void)
{
fprint(2, "usage: fmtbloom [-n nblocks | -N nhash] [-s size] file\n");
threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
Part *part;
char *file;
vlong bits, size, size2;
int nhash;
vlong nblocks;
ventifmtinstall();
statsinit();
size = 0;
nhash = 0;
nblocks = 0;
ARGBEGIN{
case 'n':
if(nhash || nblocks)
usage();
nblocks = unittoull(EARGF(usage()));
break;
case 'N':
if(nhash || nblocks)
usage();
nhash = unittoull(EARGF(usage()));
if(nhash > BloomMaxHash){
fprint(2, "maximum possible is -N %d", BloomMaxHash);
usage();
}
break;
case 's':
size = unittoull(ARGF());
if(size == ~0)
usage();
break;
default:
usage();
break;
}ARGEND
if(argc != 1)
usage();
file = argv[0];
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
sysfatal("can't open partition %s: %r", file);
if(size == 0)
size = part->size;
if(size < 1024*1024)
sysfatal("bloom filter too small");
if(size > MaxBloomSize){
fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
size, (vlong)MaxBloomSize);
size = MaxBloomSize;
}
if(size&(size-1)){
for(size2=1; size2<size; size2*=2)
;
size = size2/2;
fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
}
if(nblocks){
/*
* no use for more than 32 bits per block
* shoot for less than 64 bits per block
*/
size2 = size;
while(size2*8 >= nblocks*64)
size2 >>= 1;
if(size2 != size){
size = size2;
fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
size/1024/1024);
}
/*
* optimal is to use ln 2 times as many hash functions as we have bits per blocks.
*/
bits = (8*size)/nblocks;
nhash = bits*7/10;
if(nhash > BloomMaxHash)
nhash = BloomMaxHash;
}
if(!nhash)
nhash = BloomMaxHash;
if(bloominit(&b, size, nil) < 0)
sysfatal("bloominit: %r");
b.nhash = nhash;
bits = nhash*10/7;
nblocks = (8*size)/bits;
fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
b.data = vtmallocz(size);
b.part = part;
if(writebloom(&b) < 0)
sysfatal("writing %s: %r", file);
threadexitsall(0);
}