[Pkg-clamav-commits] [SCM] Debian repository for ClamAV branch, debian/unstable, updated. debian/0.95+dfsg-1-6156-g094ec9b
aCaB
acab at clamav.net
Sun Apr 4 01:14:37 UTC 2010
The following commit has been merged in the debian/unstable branch:
commit 402f4b19d991f9f2f32a3941be27041492587cd3
Author: aCaB <acab at clamav.net>
Date: Thu Jan 14 04:38:31 2010 +0100
caching final
diff --git a/countme.pl b/countme.pl
new file mode 100644
index 0000000..71866c5
--- /dev/null
+++ b/countme.pl
@@ -0,0 +1,33 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my %h = ();
+my $added = 0;
+my $found = 0;
+my $notfound = 0;
+
+while(1) {
+ my $hash = '';
+ last if(read(STDIN, $hash, 17) != 17);
+ my $op = substr($hash, 0, 1);
+ $hash = substr($hash, 1);
+ if($op eq "A") {
+ $h{$hash} = 1;
+ $added++;
+ } elsif ($op eq "C") {
+ if(exists($h{$hash})) {
+ $found++;
+ } else {
+ $notfound++;
+ }
+ } else {
+ die "bad command $op\n";
+ }
+}
+
+my $lookups = $found + $notfound;
+print "added: $added\nlooked up: $lookups (found $found, not found $notfound)\n";
+printf "items in the hash: ".(scalar keys %h)."\n";
+
diff --git a/hashes.lzma b/hashes.lzma
new file mode 100644
index 0000000..8f6fcc9
Binary files /dev/null and b/hashes.lzma differ
diff --git a/libclamav/cache.c b/libclamav/cache.c
index 4fca2d9..8fec98c 100644
--- a/libclamav/cache.c
+++ b/libclamav/cache.c
@@ -46,7 +46,7 @@ static mpool_t *mempool = NULL;
#ifdef USE_LRUHASHCACHE
struct cache_key {
- char digest[16];
+ int64_t digest[2];
uint32_t size; /* 0 is used to mark an empty hash slot! */
struct cache_key *lru_next, *lru_prev;
};
@@ -55,7 +55,9 @@ struct cache_set {
struct cache_key *data;
size_t capacity;
size_t maxelements; /* considering load factor */
+ size_t maxdeleted;
size_t elements;
+ size_t deleted;
size_t version;
struct cache_key *lru_head, *lru_tail;
};
@@ -99,28 +101,41 @@ static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
if (old == map->lru_tail)
map->lru_tail = 0;
map->elements--;
+ map->deleted++;
}
}
-int cacheset_lookup_internal(struct cache_set *map, unsigned char *md5, size_t size, uint32_t *insert_pos, int deletedok)
+static inline int cacheset_lookup_internal(struct cache_set *map,
+ const char *md5, size_t size,
+ uint32_t *insert_pos, int deletedok)
{
- uint32_t idx = cli_readint32(md5+8) & (map->capacity -1);
- uint32_t tries = 0;
- struct cache_key *k = &map->data[idx];
- while (k->size != CACHE_KEY_EMPTY && tries < map->capacity) {
- if (k->size == size &&
- !memcmp(k->digest, md5, 16)) {
+ const struct cache_key*data = map->data;
+ uint32_t capmask = map->capacity - 1;
+ const struct cache_key *k;
+ uint32_t idx, tries = 0;
+ uint64_t md5_0, md5_1;
+ uint64_t md5a[2];
+
+ memcpy(&md5a, md5, 16);
+ md5_0 = md5a[0];
+ md5_1 = md5a[1];
+ idx = md5_1 & capmask;
+ k = &data[idx];
+ while (k->size != CACHE_KEY_EMPTY && tries <= capmask) {
+ if (k->digest[0] == md5_0 &&
+ k->digest[1] == md5_1 &&
+ k->size == size) {
/* found key */
*insert_pos = idx;
return 1;
}
- if (deletedok && k->size == CACHE_KEY_DELETED) {
+ if (deletedok && k->size == CACHE_KEY_DELETED) {
/* treat deleted slot as empty */
*insert_pos = idx;
return 0;
- }
- idx = (idx + tries++)&(map->capacity-1);
- k = &map->data[idx];
+ }
+ idx = (idx + tries++) & capmask;
+ k = &data[idx];
}
/* found empty pos */
*insert_pos = idx;
@@ -148,17 +163,52 @@ static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
map->lru_tail = newkey;
}
+static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size);
+static int cacheset_init(struct cache_set *map, unsigned int entries);
+
+static void cacheset_rehash(struct cache_set *map)
+{
+ unsigned i;
+ int ret;
+ struct cache_set tmp_set;
+ struct cache_key *key;
+ pthread_mutex_lock(&pool_mutex);
+ ret = cacheset_init(&tmp_set, map->capacity);
+ pthread_mutex_unlock(&pool_mutex);
+ if (ret)
+ return;
+
+ key = map->lru_head;
+ for (i=0;key && i < tmp_set.maxelements/2;i++) {
+ cacheset_add(&tmp_set, (unsigned char*)&key->digest, key->size);
+ key = key->lru_next;
+ }
+ pthread_mutex_lock(&pool_mutex);
+ mpool_free(mempool, map->data);
+ pthread_mutex_unlock(&pool_mutex);
+ memcpy(map, &tmp_set, sizeof(tmp_set));
+}
+
static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size)
{
int ret;
uint32_t pos;
struct cache_key *newkey;
- if (map->elements >= map->maxelements)
+
+ if (map->elements >= map->maxelements) {
cacheset_lru_remove(map, 1);
+ if (map->deleted >= map->maxdeleted) {
+ cacheset_rehash(map);
+ }
+ }
assert(map->elements < map->maxelements);
ret = cacheset_lookup_internal(map, md5, size, &pos, 1);
newkey = &map->data[pos];
+ if (newkey->size == CACHE_KEY_DELETED)
+ map->deleted--;
if (ret) {
/* was already added, remove from LRU list */
lru_remove(map, newkey);
@@ -178,6 +228,7 @@ static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t siz
struct cache_key *newkey;
int ret;
uint32_t pos;
+
ret = cacheset_lookup_internal(map, md5, size, &pos, 0);
if (!ret)
return CACHE_INVALID_VERSION;
@@ -185,17 +236,16 @@ static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t siz
/* update LRU position: move to tail */
lru_remove(map, newkey);
lru_addtail(map, newkey);
-
return map->version;
}
-
static int cacheset_init(struct cache_set *map, unsigned int entries) {
map->data = mpool_calloc(mempool, entries, sizeof(*map->data));
if (!map->data)
return CL_EMEM;
map->capacity = entries;
map->maxelements = 80*entries / 100;
+ map->maxdeleted = map->capacity - map->maxelements - 1;
map->elements = 0;
map->version = CACHE_INVALID_VERSION;
map->lru_head = map->lru_tail = NULL;
@@ -241,21 +291,86 @@ static int cacheset_init(struct cache_set *cs, unsigned int entries) {
return 0;
}
+/* static inline int64_t cmp(int64_t *a, int64_t *b) { */
+/* int64_t ret = a[1] - b[1]; */
+/* if(!ret) ret = a[0] - b[0]; */
+/* return ret; */
+/* } */
+
static inline int cmp(int64_t *a, int64_t *b) {
- int64_t ret = a[1] - b[1];
- if(!ret) ret = a[0] - b[0];
- return ret;
+ if(a[1] < b[1]) return -1;
+ if(a[1] > b[1]) return 1;
+ if(a[0] == b[0]) return 0;
+ if(a[0] < b[0]) return -1;
+ return 1;
}
+
+//#define PRINT_TREE
+#ifdef PRINT_TREE
+#define ptree printf
+#else
+#define ptree (void)
+#endif
+
+//#define CHECK_TREE
+#ifdef CHECK_TREE
+static int printtree(struct cache_set *cs, struct node *n, int d) {
+ int i;
+ int ab = 0;
+ if (n == NULL) return 0;
+ if(n == cs->root) ptree("--------------------------\n");
+ ab |= printtree(cs, n->right, d+1);
+ if(n->right) {
+ if(cmp(n->digest, n->right->digest) >= 0) {
+ for (i=0; i<d; i++) ptree(" ");
+ ptree("^^^^ %lld >= %lld - %lld\n", n->digest[1], n->right->digest[1], cmp(n->digest, n->right->digest));
+ ab = 1;
+ }
+ }
+ for (i=0; i<d; i++) ptree(" ");
+ ptree("%08x(%02u)\n", n->digest[1]>>48, n - cs->data);
+ if(n->left) {
+ if(cmp(n->digest, n->left->digest) <= 0) {
+ for (i=0; i<d; i++) ptree(" ");
+ ptree("vvvv %lld <= %lld - %lld\n", n->digest[1], n->left->digest[1], cmp(n->digest, n->left->digest));
+ ab = 1;
+ }
+ }
+ if(d){
+ if(!n->up) {
+ ptree("no parent!\n");
+ ab = 1;
+ } else {
+ if(n->up->left != n && n->up->right != n) {
+ ptree("broken parent\n");
+ ab = 1;
+ }
+ }
+ } else {
+ if(n->up) {
+ ptree("root with a parent!\n");
+ ab = 1;
+ }
+ }
+ ab |= printtree(cs, n->left, d+1);
+ return ab;
+}
+#else
+static inline int printtree(struct cache_set *cs, struct node *n, int d) {
+ return 0;
+}
+#endif
+
static int splay(int64_t *md5, struct cache_set *cs) {
struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
- int ret = 0;
+ int comp, found = 0;
if(!root)
return 0;
while(1) {
- int comp = cmp(md5, root->digest);
+ comp = cmp(md5, root->digest);
if(comp < 0) {
if(!root->left) break;
if(cmp(md5, root->left->digest) < 0) {
@@ -287,7 +402,7 @@ static int splay(int64_t *md5, struct cache_set *cs) {
left = root;
root = root->right;
} else {
- ret = 1;
+ found = 1;
break;
}
}
@@ -302,16 +417,78 @@ static int splay(int64_t *md5, struct cache_set *cs) {
if(next.left) next.left->up = root;
root->up = NULL;
cs->root = root;
-
- return ret;
+ return found;
}
-
static int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size) {
int64_t hash[2];
memcpy(hash, md5, 16);
- return splay(hash, cs) * 1337;
+ if(splay(hash, cs)) {
+ struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next;
+#ifdef PRINT_CHAINS
+ printf("promoting %02d\n", p - cs->data);
+ {
+ struct node *x = cs->first;
+ printf("before: ");
+ while(x) {
+ printf("%02d,", x - cs->data);
+ x=x->next;
+ }
+ printf(" --- ");
+ x=cs->last;
+ while(x) {
+ printf("%02d,", x - cs->data);
+ x=x->prev;
+ }
+ printf("\n");
+ }
+#endif
+#define TO_END_OF_CHAIN
+#ifdef TO_END_OF_CHAIN
+ if(q) {
+ if(o)
+ o->next = q;
+ else
+ cs->first = q;
+ q->prev = o;
+ cs->last->next = p;
+ p->prev = cs->last;
+ p->next = NULL;
+ cs->last = p;
+ }
+#else
+ if(cs->last != p) {
+ if(cs->last == q) cs->last = p;
+ if(o) o->next = q;
+ else cs->first = q;
+ p->next = q->next;
+ if(q->next) q->next->prev = p;
+ q->next = p;
+ q->prev = o;
+ p->prev = q;
+ }
+#endif
+#ifdef PRINT_CHAINS
+ {
+ struct node *x = cs->first;
+ printf("after : ");
+ while(x) {
+ printf("%02d,", x - cs->data);
+ x=x->next;
+ }
+ printf(" --- ");
+ x=cs->last;
+ while(x) {
+ printf("%02d,", x - cs->data);
+ x=x->prev;
+ }
+ printf("\n");
+ }
+#endif
+ return 1337;
+ }
+ return 0;
}
static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size) {
@@ -322,39 +499,90 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
if(splay(hash, cs))
return; /* Already there */
+ ptree("1:\n");
+ if(printtree(cs, cs->root, 0)) {
+ abort();
+ }
+
newnode = cs->first;
+ //#define TAKE_FIRST
+#ifdef TAKE_FIRST
+ if((newnode->left || newnode->right || newnode->up)) {
+ if(!splay(newnode->digest, cs)) {
+ cli_errmsg("WTF\n");
+ abort();
+ }
+ if(!newnode->left) {
+ cs->root = newnode->right;
+ newnode->right->up = NULL;
+ } else if(!newnode->right) {
+ cs->root = newnode->left;
+ newnode->left->up = NULL;
+ } else {
+ cs->root = newnode->left;
+ newnode->left->up = NULL;
+ if(splay(newnode->digest, cs)) {
+ cli_errmsg("WTF #2\n");
+ abort();
+ }
+ cs->root->up = NULL;
+ cs->root->right = newnode->right;
+ if(newnode->right) newnode->right->up = cs->root;
+ }
+ newnode->up = NULL;
+ newnode->right = NULL;
+ newnode->left = NULL;
+ if(splay(hash, cs)) {
+ cli_errmsg("WTF #3\n");
+ abort();
+ }
+ }
+ newnode->prev = cs->last;
+ cs->last->next = newnode;
+ cs->last = newnode;
+ newnode->next->prev = NULL;
+ cs->first = newnode->next;
+ newnode->next = NULL;
+
+#else
while(newnode) {
- if(!newnode->right && !newnode->left)
- break;
- newnode = newnode->next;
+ if(!newnode->right && !newnode->left)
+ break;
+ newnode = newnode->next;
}
if(!newnode) {
- cli_errmsg("NO NEWNODE!\n");
- abort();
+ cli_errmsg("NO NEWNODE!\n");
+ abort();
}
if(newnode->up) {
- if(newnode->up->left == newnode)
- newnode->up->left = NULL;
- else
- newnode->up->right = NULL;
+ if(newnode->up->left == newnode)
+ newnode->up->left = NULL;
+ else
+ newnode->up->right = NULL;
}
if(newnode->prev)
- newnode->prev->next = newnode->next;
+ newnode->prev->next = newnode->next;
if(newnode->next)
- newnode->next->prev = newnode->prev;
+ newnode->next->prev = newnode->prev;
if(cs->first == newnode)
- cs->first = newnode->next;
+ cs->first = newnode->next;
newnode->prev = cs->last;
newnode->next = NULL;
cs->last->next = newnode;
cs->last = newnode;
+#endif
+
+ ptree("2:\n");
+ if(printtree(cs, cs->root, 0)) {
+ abort();
+ }
if(!cs->root) {
newnode->left = NULL;
newnode->right = NULL;
} else {
- if(cmp(hash, cs->root->digest)) {
+ if(cmp(hash, cs->root->digest) < 0) {
newnode->left = cs->root->left;
newnode->right = cs->root;
cs->root->left = NULL;
@@ -370,14 +598,19 @@ static void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size)
newnode->digest[1] = hash[1];
newnode->up = NULL;
cs->root = newnode;
+
+ ptree("3: %lld\n", hash[1]);
+ if(printtree(cs, cs->root, 0)) {
+ abort();
+ }
}
#endif /* USE_SPLAY */
-/* #define TREES 1 */
-/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
+#define TREES 1
+static inline unsigned int getkey(uint8_t *hash) { return 0; }
-#define TREES 256
-static inline unsigned int getkey(uint8_t *hash) { return *hash; }
+/* #define TREES 256 */
+/* static inline unsigned int getkey(uint8_t *hash) { return *hash; } */
/* #define TREES 4096 */
/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
--
Debian repository for ClamAV
More information about the Pkg-clamav-commits
mailing list