[Forensics-changes] [yara] 120/415: Faster rule compilation by using hash table for rule names lookup
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:42:53 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit f27c49fbcaaf88ee5c999f1343cb1daead7a1537
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Thu Feb 23 11:20:03 2012 +0000
Faster rule compilation by using hash table for rule names lookup
---
REVISION | 2 +-
libyara/Makefile.am | 1 +
libyara/Makefile.in | 5 +++-
libyara/ast.c | 46 ++++++++++++++++++++++++++----
libyara/hash.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
libyara/hash.h | 23 +++++++++++++++
libyara/libyara.c | 1 +
libyara/yara.h | 15 ++++++++--
8 files changed, 165 insertions(+), 10 deletions(-)
diff --git a/REVISION b/REVISION
index edfbde3..c329d2d 100644
--- a/REVISION
+++ b/REVISION
@@ -1 +1 @@
-#define REVISION "131"
+#define REVISION "136"
diff --git a/libyara/Makefile.am b/libyara/Makefile.am
index 27b1b14..a714fc1 100644
--- a/libyara/Makefile.am
+++ b/libyara/Makefile.am
@@ -18,6 +18,7 @@ libyara_la_SOURCES = \
mem.c \
proc.c \
weight.c \
+ hash.c \
libyara.c \
lex.h \
ast.h \
diff --git a/libyara/Makefile.in b/libyara/Makefile.in
index c29202d..440c94c 100644
--- a/libyara/Makefile.in
+++ b/libyara/Makefile.in
@@ -65,7 +65,8 @@ libLTLIBRARIES_INSTALL = $(INSTALL)
LTLIBRARIES = $(lib_LTLIBRARIES)
libyara_la_DEPENDENCIES = regex/libregex.la
am_libyara_la_OBJECTS = grammar.lo lex.lo ast.lo scan.lo filemap.lo \
- eval.lo exe.lo xtoi.lo mem.lo proc.lo weight.lo libyara.lo
+ eval.lo exe.lo xtoi.lo mem.lo proc.lo weight.lo hash.lo \
+ libyara.lo
libyara_la_OBJECTS = $(am_libyara_la_OBJECTS)
DEFAULT_INCLUDES = -I. at am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -248,6 +249,7 @@ libyara_la_SOURCES = \
mem.c \
proc.c \
weight.c \
+ hash.c \
libyara.c \
lex.h \
ast.h \
@@ -366,6 +368,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/exe.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/filemap.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/grammar.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/hash.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/lex.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libyara.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mem.Plo at am__quote@
diff --git a/libyara/ast.c b/libyara/ast.c
index 58c6b2a..8a20885 100644
--- a/libyara/ast.c
+++ b/libyara/ast.c
@@ -28,17 +28,29 @@ GNU General Public License for more details.
RULE* lookup_rule(RULE_LIST* rules, const char* identifier, NAMESPACE* ns)
{
- RULE* rule = rules->head;
+ RULE_LIST_ENTRY* entry;
+ RULE* rule;
- while (rule != NULL)
+ unsigned int key;
+
+ key = hash(0, identifier, strlen(identifier));
+ key = hash(key, ns->name, strlen(ns->name));
+ key = key % RULE_LIST_HASH_TABLE_SIZE;
+
+ entry = &rules->hash_table[key];
+
+ while (entry != NULL)
{
- if (strcmp(rule->identifier, identifier) == 0 &&
+ rule = (RULE*) entry->rule;
+
+ if (rule != NULL &&
+ strcmp(rule->identifier, identifier) == 0 &&
strcmp(rule->ns->name, ns->name) == 0)
{
return rule;
}
- rule = rule->next;
+ entry = entry->next;
}
return NULL;
@@ -117,9 +129,11 @@ VARIABLE* lookup_variable(VARIABLE* variable_list_head, const char* identifier)
int new_rule(RULE_LIST* rules, char* identifier, NAMESPACE* ns, int flags, TAG* tag_list_head, META* meta_list_head, STRING* string_list_head, TERM* condition)
{
RULE* new_rule;
-
+ RULE_LIST_ENTRY* entry;
+
+ unsigned int key;
int result = ERROR_SUCCESS;
-
+
if (lookup_rule(rules, identifier, ns) == NULL) /* do not allow rules with the same identifier */
{
new_rule = (RULE*) yr_malloc(sizeof(RULE));
@@ -145,6 +159,26 @@ int new_rule(RULE_LIST* rules, char* identifier, NAMESPACE* ns, int flags, TAG*
rules->tail->next = new_rule;
rules->tail = new_rule;
}
+
+ key = hash(0, identifier, strlen(identifier));
+ key = hash(key, ns->name, strlen(ns->name));
+ key = key % RULE_LIST_HASH_TABLE_SIZE;
+
+ if (rules->hash_table[key].rule == NULL)
+ {
+ rules->hash_table[key].rule = new_rule;
+ }
+ else
+ {
+ entry = (RULE_LIST_ENTRY*) yr_malloc(sizeof(RULE_LIST_ENTRY));
+
+ if (entry == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
+
+ entry->rule = new_rule;
+ entry->next = rules->hash_table[key].next;
+ rules->hash_table[key].next = entry;
+ }
}
else
{
diff --git a/libyara/hash.c b/libyara/hash.c
new file mode 100644
index 0000000..39a445a
--- /dev/null
+++ b/libyara/hash.c
@@ -0,0 +1,82 @@
+/*
+
+Copyright(c) 2012. Victor M. Alvarez [plusvic at gmail.com].
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+*/
+
+#include "hash.h"
+
+#define ROTATE_INT32(x, shift) ((x << shift) | (x >> (32 - shift)))
+
+#ifdef WIN32
+#define inline __inline
+#endif
+
+unsigned int byte_to_int32[] =
+{
+ 0xC3113E7F,0x4C353C5F,0x7423810B,0x258D264E,0xDAD39DED,0x75D0B694,0x98CE1216,0x93334482,
+ 0xC5C48EA5,0xF57E0E8B,0x5D7F3723,0x396B1B24,0xA8883D9F,0xB2A74A00,0xF8E171AE,0x3F01FBAB,
+ 0x5C1840CB,0xDDD833C4,0x8D8CCA34,0x32EF223A,0x1A05B871,0x9A9B6BFC,0x50406A0C,0xE7E1FC04,
+ 0x5E07D7F6,0x80B83660,0x20892A62,0xB2C6FEA6,0x6CEC7CAA,0x182F764B,0x3B0353E7,0x57FC2520,
+ 0x4B6812D4,0xACB654E4,0x23C75C04,0xB1DCD731,0xE3AF0733,0xF2366D39,0xC729671B,0xFF3BE6F2,
+ 0xABA37E34,0x3CDAFA38,0xAAD18D03,0xA8D35345,0x08E9A92C,0xF9324059,0x42D821BE,0x1BC152DD,
+ 0x5588811C,0x874A1F9A,0x6E83E9CD,0xDA6F3AF8,0x965D4670,0xA7A565C0,0x68D8A9AF,0xFC8FD8FD,
+ 0x8FF99FF9,0x4C9B42AE,0x2D066A8D,0x4D1802F7,0x557032B2,0x12BCF371,0xDC29D5AE,0x72EA361F,
+ 0xE2835B0B,0xDFC58966,0x13B0F34D,0x3FA02BCD,0xBF282E3D,0x7DC877F5,0xF4848A32,0x861E35F5,
+ 0x7FFA0D7F,0x515F2E4E,0x6B235D5C,0x55F46E24,0x35AD2C99,0x072654A8,0x05163F0F,0x9317B11A,
+ 0xAED1FC10,0x989444F0,0xDB3E1814,0x446C0CF1,0x660BF511,0x2F227D3A,0xFDBA0539,0xC649E621,
+ 0x5204D7CE,0x5FA386D0,0xE5F22005,0x97B6C8A1,0x4AB69EC2,0x5C7CA70D,0x39A48EC6,0x7BACF378,
+ 0x8D0ED3D1,0xE39DE582,0xC5FBE2AB,0x37E3D2D0,0x06F44724,0x73144144,0xBA57E905,0xB05B4307,
+ 0xAEED8D97,0xA68CCAC4,0xE30DA57E,0xED0F194B,0x8C2B9B7A,0x814575D5,0x79588493,0x81D3712A,
+ 0x3FA892F2,0x80F0BB94,0x44EAF51A,0x4E05F1D4,0xFC69F858,0x775E8D60,0x22B20DD7,0x170A87EA,
+ 0x1077DE52,0x3D5EC9FB,0x0B6EB1E5,0xF2F9CCAF,0xA76C7DEB,0xD8C2D873,0xF438C592,0x6239FEEC,
+ 0x26D3D2A9,0x30F6FADF,0x4B2984CC,0x6257F3DA,0x0E0583E2,0x143E5E61,0xBB2732BF,0x9653217A,
+ 0x027A84EA,0x95C9AE8B,0x89B8B82B,0x9F286485,0x29F622FE,0x52A3196B,0x8392D95F,0x33A79167,
+ 0xF5DEE92A,0x6E397DB9,0x11931C01,0x8DD2CD3B,0xF9E6003D,0xAB955AF4,0xD38725F9,0xDCF6F8AE,
+ 0x7667A958,0xE67AD995,0xB7CF979A,0xD88EBE5B,0x5BA889F0,0x078BDD90,0x447238F9,0x3135F672,
+ 0x187B95A8,0x0B7D5751,0xACD59D2A,0x9C5D1929,0x579E5022,0xEA90499B,0x59901800,0x82237DB5,
+ 0x7A375509,0xACA9A22A,0xEC96E649,0x69339DB0,0x081D0D9B,0xD72FB8B9,0xA4184653,0xC057321D,
+ 0xED19CAB9,0xB48F1E3E,0xB9DAC51E,0xDAED2FC7,0x7598CBBD,0x208DF346,0x044BE6EC,0x1C63E6EB,
+ 0xA15F64C1,0xE024A061,0x68309584,0x0758A68D,0xF274E9AE,0x0ABEA0CC,0xED4FB267,0x63D6EC46,
+ 0x9F28E026,0xF0694A17,0x9D6E9115,0xC4600FAD,0x5B121E99,0xD6B4A13B,0xF5364B8A,0x8514B254,
+ 0x0182F8DD,0xDB09F90B,0x78C70B32,0xD8EC3B02,0x8CD7084D,0xA4439838,0x72F35A3D,0x200B48A5,
+ 0xE2351444,0xA5552F5F,0xD8C1E746,0x0FE5EF3C,0xB6A47063,0x61F4E68B,0x08FED99B,0x7E461445,
+ 0x43CB8380,0x28BA03C8,0x21A7A2E2,0x43437ED6,0x2A9E6670,0x89B4A106,0xC6C2F4EE,0x9C4063CC,
+ 0x2FA0DF6C,0xB54DC409,0xCF01538F,0x616431D7,0x02CB0E4D,0x44FFF425,0xAAD5188E,0x0742E9BC,
+ 0xFFF41353,0x130F0A15,0x787BDC10,0x4A327B72,0x702989F7,0x5F704798,0x8156A1BB,0x2BCA3E74,
+ 0x1911A8C4,0x5E1F27D3,0x07949DC7,0xF24C2056,0xB4299EE6,0x9C7045D9,0xA8BF6307,0x7454AAD2,
+ 0x256425E5,0xD87DEF67,0xCFE95452,0xE7548DF7,0xA84956C7,0xD8402C60,0xCFBD0373,0x6B6CDAFE
+};
+
+
+inline unsigned int hash(unsigned int seed, const unsigned char* buffer, int len)
+{
+ int i;
+ unsigned int result = seed;
+
+ for (i = len - 1; i > 0; i--)
+ {
+ result ^= ROTATE_INT32(byte_to_int32[*buffer], i);
+ buffer++;
+ }
+
+ result ^= byte_to_int32[*buffer];
+ return result;
+}
+
+
+inline unsigned int hash_update(unsigned int hash, unsigned char new, unsigned char old, int len)
+{
+ return ROTATE_INT32(hash, 1) ^ ROTATE_INT32(byte_to_int32[old], len) ^ byte_to_int32[new];
+}
+
diff --git a/libyara/hash.h b/libyara/hash.h
new file mode 100644
index 0000000..b886be7
--- /dev/null
+++ b/libyara/hash.h
@@ -0,0 +1,23 @@
+/*
+
+Copyright(c) 2012. Victor M. Alvarez [plusvic at gmail.com].
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+*/
+
+#ifndef _HASH_H
+#define _HASH_H
+
+inline unsigned int hash(unsigned int seed, const unsigned char* buffer, int len);
+inline unsigned int hash_update(unsigned int hash, unsigned char new, unsigned char old, int len);
+
+#endif
diff --git a/libyara/libyara.c b/libyara/libyara.c
index f6af0e6..4d32ebe 100644
--- a/libyara/libyara.c
+++ b/libyara/libyara.c
@@ -61,6 +61,7 @@ YARA_CONTEXT* yr_create_context()
context->fast_match = FALSE;
context->scanning_process_memory = FALSE;
+ memset(context->rule_list.hash_table, 0, sizeof(context->rule_list.hash_table));
memset(context->hash_table.hashed_strings_2b, 0, sizeof(context->hash_table.hashed_strings_2b));
memset(context->hash_table.hashed_strings_1b, 0, sizeof(context->hash_table.hashed_strings_1b));
diff --git a/libyara/yara.h b/libyara/yara.h
index a7f0c0c..db7ffc3 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -228,10 +228,21 @@ typedef struct _STRING_LIST_ENTRY
} STRING_LIST_ENTRY;
+typedef struct _RULE_LIST_ENTRY
+{
+ RULE* rule;
+ struct _RULE_LIST_ENTRY* next;
+
+} RULE_LIST_ENTRY;
+
+
+#define RULE_LIST_HASH_TABLE_SIZE 10007
+
typedef struct _RULE_LIST
{
- RULE* head;
- RULE* tail;
+ RULE* head;
+ RULE* tail;
+ RULE_LIST_ENTRY hash_table[RULE_LIST_HASH_TABLE_SIZE];
} RULE_LIST;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list