1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
| import math import time import redis from hashlib import md5
class SimpleHash(object): def __init__(self, cap, seed): self.cap = cap self.seed = seed
def hash(self, value): ret = 0 for i in range(len(value)): ret += self.seed * ret + ord(value[i]) return (self.cap - 1) & ret
class BloomFilter(object): SEEDS = [543, 460, 171, 876, 796, 607, 650, 81, 837, 545, 591, 946, 846, 521, 913, 636, 878, 735, 414, 372, 344, 324, 223, 180, 327, 891, 798, 933, 493, 293, 836, 10, 6, 544, 924, 849, 438, 41, 862, 648, 338, 465, 562, 693, 979, 52, 763, 103, 387, 374, 349, 94, 384, 680, 574, 480, 307, 580, 71, 535, 300, 53, 481, 519, 644, 219, 686, 236, 424, 326, 244, 212, 909, 202, 951, 56, 812, 901, 926, 250, 507, 739, 371, 63, 584, 154, 7, 284, 617, 332, 472, 140, 605, 262, 355, 526, 647, 923, 199, 518]
def __init__(self, capacity=100000000, error_rate=0.0000001, redis_con=None, key="bloomfilter"): self.bit_size = math.ceil(capacity * math.log2(math.e) * math.log2(1 / error_rate)) self.hash_time = math.ceil(math.log1p(2) * self.bit_size / capacity) self.memery = math.ceil(self.bit_size / 8 / 1024 / 1024) self.block_num = math.ceil(self.memery / 512) self.seeds = self.SEEDS[0:self.hash_time] self.key = key self.N = 2 ** 31 - 1 self.hash_func = [SimpleHash(self.bit_size, seed) for seed in self.seeds] self.redis_con = redis_con
def get_key(self, value): return self.key + str(int(value[0:2], 16) % self.block_num)
def is_contains(self, str_input): try: if not str_input: return False m5 = md5() m5.update(str_input) str_input = m5.hexdigest() ret = True name = self.get_key(str_input) for f in self.hash_func: loc = f.hash(str_input) ret = ret & self.redis_con.getbit(name, loc) return ret except Exception as e: raise
def insert(self, str_input): try: m5 = md5() m5.update(str_input) str_input = m5.hexdigest() name = self.get_key(str_input) for f in self.hash_func: loc = f.hash(str_input) self.redis_con.setbit(name, loc, 1) except Exception as e: raise
|