[Forensics-changes] [yara] 178/415: Implemented warnings, fast scan mode, and list based nodes in Aho-Corasick automaton.

Hilko Bengen bengen at moszumanska.debian.org
Thu Apr 3 05:43:02 UTC 2014


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to branch debian
in repository yara.

commit fd4a59d613d9c7cf9067083e48b5f6747647a7db
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Wed Jun 26 10:48:30 2013 +0000

    Implemented warnings, fast scan mode, and list based nodes in Aho-Corasick automaton.
---
 Makefile.am               |   2 +-
 libyara/Makefile.am       |   2 +-
 libyara/ahocorasick.c     | 239 ++++++++++++++++++++++++++++++++++------------
 libyara/arena.c           |   4 +-
 libyara/lex.c             |   2 +
 libyara/lex.l             |   2 +
 libyara/parser.c          |  54 ++++++++++-
 libyara/rules.c           |  52 ++++++----
 libyara/yara.h            |  77 +++++++++++----
 yara-python/yara-python.c |  23 +++--
 yara.c                    |  64 ++++++++-----
 yarac.c                   |   8 +-
 12 files changed, 396 insertions(+), 133 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 8e16cfc..9e8466c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,4 +1,4 @@
-AM_CFLAGS=-g -O0
+AM_CFLAGS=-g -O3
 
 # Build the library in the hand subdirectory first.
 SUBDIRS = libyara
diff --git a/libyara/Makefile.am b/libyara/Makefile.am
index 0349a33..d72da7f 100644
--- a/libyara/Makefile.am
+++ b/libyara/Makefile.am
@@ -1,6 +1,6 @@
 AM_YFLAGS = -d
 
-CFLAGS=-g -O0
+CFLAGS=-g -O3
 
 ACLOCAL_AMFLAGS=-I m4
 
diff --git a/libyara/ahocorasick.c b/libyara/ahocorasick.c
index 9a818d7..6038ca8 100644
--- a/libyara/ahocorasick.c
+++ b/libyara/ahocorasick.c
@@ -25,6 +25,7 @@ limitations under the License.
 
 
 #define MAX_TOKEN 4
+#define MAX_TABLE_BASED_STATES_DEPTH 1
 
 
 #ifndef min
@@ -143,8 +144,70 @@ int _yr_ac_queue_is_empty(
 }
 
 
+AC_STATE* _yr_ac_next_child(
+  AC_STATE* state,
+  int64_t* iterator)
+{
+  int i;
+  AC_TABLE_BASED_STATE* table_based_state;
+  AC_LIST_BASED_STATE* list_based_state;
+  AC_STATE_TRANSITION* transition;
+
+  if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+  {
+    for (i = (int) *iterator; i < 256; i++)
+    {
+      table_based_state = (AC_TABLE_BASED_STATE*) state;
+
+      if (table_based_state->transitions[i].state != NULL)
+      {
+        *iterator = i + 1;
+        return table_based_state->transitions[i].state;
+      }
+    }
+  }
+  else
+  {
+    transition = (AC_STATE_TRANSITION*) *iterator;
+
+    if (transition->next != NULL)
+    {
+      *iterator = (int64_t) transition->next;
+      return transition->next->state;
+    }
+  }
+
+  return NULL;
+}
+
+
+AC_STATE* _yr_ac_first_child(
+  AC_STATE* state,
+  int64_t* iterator)
+{
+  AC_LIST_BASED_STATE* list_based_state;
+
+  if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+  {
+    *iterator = 0;
+    return _yr_ac_next_child(state, iterator);
+  }
+  else
+  {
+    list_based_state = (AC_LIST_BASED_STATE*) state;
+
+    if (list_based_state->transitions != NULL)
+    {
+      *iterator = (int64_t) list_based_state->transitions;
+      return list_based_state->transitions->state;
+    }
+  }
+
+  return NULL;
+}
+
 //
-// _yr_ac_next_state
+// yr_ac_next_state
 //
 // Given an automaton state and an input symbol, returns the new state
 // after reading the input symbol.
@@ -157,11 +220,30 @@ int _yr_ac_queue_is_empty(
 //   Pointer to the next automaton state.
 //
 
-AC_STATE* _yr_ac_next_state(
+inline AC_STATE* yr_ac_next_state(
     AC_STATE* state,
     uint8_t input)
 {
-  return state->transitions[input].state;
+  AC_STATE_TRANSITION* transition;
+
+  if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+  {
+    return ((AC_TABLE_BASED_STATE*) state)->transitions[input].state;
+  }
+  else
+  {
+    transition = ((AC_LIST_BASED_STATE*) state)->transitions;
+
+    while (transition != NULL)
+    {
+      if (transition->input == input)
+        return transition->state;
+
+      transition = transition->next;
+    }
+
+    return NULL;
+  }
 }
 
 
@@ -187,33 +269,71 @@ AC_STATE* _yr_ac_create_state(
 {
   int result;
   AC_STATE* new_state;
+  AC_LIST_BASED_STATE* list_based_state;
+  AC_TABLE_BASED_STATE* table_based_state;
+  AC_STATE_TRANSITION* new_transition;
 
-  result = yr_arena_allocate_struct(
-      arena,
-      sizeof(AC_STATE),
-      (void**) &new_state,
-      offsetof(AC_STATE, failure),
-      offsetof(AC_STATE, matches),
-      EOL);
+  if (state->depth < MAX_TABLE_BASED_STATES_DEPTH)
+  {
+    result = yr_arena_allocate_struct(
+        arena,
+        sizeof(AC_TABLE_BASED_STATE),
+        (void**) &new_state,
+        offsetof(AC_TABLE_BASED_STATE, failure),
+        offsetof(AC_TABLE_BASED_STATE, matches),
+        EOL);
+  }
+  else
+  {
+    result = yr_arena_allocate_struct(
+        arena,
+        sizeof(AC_LIST_BASED_STATE),
+        (void**) &new_state,
+        offsetof(AC_LIST_BASED_STATE, failure),
+        offsetof(AC_LIST_BASED_STATE, matches),
+        offsetof(AC_LIST_BASED_STATE, transitions),
+        EOL);
+  }
 
   if (result != ERROR_SUCCESS)
     return NULL;
 
-  result = yr_arena_make_relocatable(
-      arena,
-      state,
-      offsetof(AC_STATE, transitions[input]),
-      EOL);
+  if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+  {
+    result = yr_arena_make_relocatable(
+        arena,
+        state,
+        offsetof(AC_TABLE_BASED_STATE, transitions[input]),
+        EOL);
 
-  if (result != ERROR_SUCCESS)
-    return NULL;
+    if (result != ERROR_SUCCESS)
+      return NULL;
+
+    table_based_state = (AC_TABLE_BASED_STATE*) state;
+    table_based_state->transitions[input].state = new_state;
+  }
+  else
+  {
+    result = yr_arena_allocate_struct(
+        arena,
+        sizeof(AC_STATE_TRANSITION),
+        (void**) &new_transition,
+        offsetof(AC_STATE_TRANSITION, state),
+        offsetof(AC_STATE_TRANSITION, next),
+        EOL);
 
-  state->transitions[input].state = new_state;
+    if (result != ERROR_SUCCESS)
+      return NULL;
 
-  new_state->depth = state->depth + 1;
-  new_state->matches = NULL;
+    list_based_state = (AC_LIST_BASED_STATE*) state;
+
+    new_transition->input = input;
+    new_transition->state = new_state;
+    new_transition->next = list_based_state->transitions;
+    list_based_state->transitions = new_transition;
+  }
 
-  memset(new_state->transitions, 0, sizeof(new_state->transitions));
+  new_state->depth = state->depth + 1;
 
   return new_state;
 }
@@ -596,7 +716,7 @@ void _yr_ac_gen_tokens(
       str = output_buffer;
 
       memcpy(output_buffer, string->string, token_length);
-      ((uint8_t*) output_buffer) += token_length;
+      output_buffer += token_length;
 
       if (STRING_IS_NO_CASE(string))
       {
@@ -663,6 +783,8 @@ void yr_ac_create_failure_links(
 {
   int i;
 
+  int64_t iterator;
+
   AC_STATE* current_state;
   AC_STATE* failure_state;
   AC_STATE* temp_state;
@@ -683,13 +805,13 @@ void yr_ac_create_failure_links(
 
   // Push root's children and set their failure link to root.
 
-  for (i = 0; i < 256; i++)
+  state = _yr_ac_first_child(root_state, &iterator);
+
+  while (state != NULL)
   {
-    if (root_state->transitions[i].state != NULL)
-    {
-      _yr_ac_queue_push(&queue, root_state->transitions[i].state);
-      root_state->transitions[i].state->failure = root_state;
-    }
+    _yr_ac_queue_push(&queue, state);
+    state->failure = root_state;
+    state = _yr_ac_next_child(root_state, &iterator);
   }
 
   // Traverse the trie in BFS order calculating the failure link
@@ -714,19 +836,16 @@ void yr_ac_create_failure_links(
       current_state->matches = root_state->matches;
     }
 
-    for (i = 0; i < 256; i++)
-    {
-      transition_state = current_state->transitions[i].state;
-
-      if (transition_state == NULL)
-        continue;
+    transition_state = _yr_ac_first_child(current_state, &iterator);
 
+    while (transition_state != NULL)
+    {
       _yr_ac_queue_push(&queue, transition_state);
       failure_state = current_state->failure;
 
       while (1)
       {
-        temp_state = _yr_ac_next_state(failure_state, i);
+        temp_state = yr_ac_next_state(failure_state, i);
 
         if (temp_state != NULL)
         {
@@ -761,7 +880,10 @@ void yr_ac_create_failure_links(
           }
         }
       } // while(1)
+
+      transition_state = _yr_ac_next_child(current_state, &iterator);
     }
+
   } // while(!__yr_ac_queue_is_empty(&queue))
 }
 
@@ -791,10 +913,10 @@ int yr_ac_create_automaton(
 
   result = yr_arena_allocate_struct(
       arena,
-      sizeof(AC_STATE),
+      sizeof(AC_TABLE_BASED_STATE),
       (void**) &root_state,
-      offsetof(AC_STATE, failure),
-      offsetof(AC_STATE, matches),
+      offsetof(AC_TABLE_BASED_STATE, failure),
+      offsetof(AC_TABLE_BASED_STATE, matches),
       EOL);
 
   if (result != ERROR_SUCCESS)
@@ -805,8 +927,6 @@ int yr_ac_create_automaton(
   root_state->depth = 0;
   root_state->matches = NULL;
 
-  memset(root_state->transitions, 0, sizeof(root_state->transitions));
-
   return result;
 }
 
@@ -820,7 +940,8 @@ int yr_ac_create_automaton(
 int yr_ac_add_string(
     ARENA* arena,
     AC_AUTOMATON* automaton,
-    STRING* string)
+    STRING* string,
+    int* min_token_length)
 {
   int result;
   int token_length;
@@ -838,7 +959,7 @@ int yr_ac_add_string(
   // for the worst case which is a "ascii wide nocase" text string.
 
   tokens = yr_malloc(
-      2 * MAX_TOKEN * MAX_TOKEN * (2 * sizeof(int) + MAX_TOKEN) + sizeof(int));
+      2 * (1 << MAX_TOKEN) * (2 * sizeof(int) + MAX_TOKEN) + sizeof(int));
 
   if (tokens == NULL)
     return ERROR_INSUFICIENT_MEMORY;
@@ -857,6 +978,8 @@ int yr_ac_add_string(
 
   if (token_length == 0)
   {
+    *min_token_length = 0;
+
     // No token could be extracted from the string, put the string in the
     // automaton's root state. This is far from ideal, because the string will
     // be tried at every data offset during scanning.
@@ -881,31 +1004,21 @@ int yr_ac_add_string(
   {
     // For each token create the states in the automaton.
 
+    *min_token_length = MAX_TOKEN;
+
     while (token_length != 0)
     {
+      if (token_length < *min_token_length)
+        *min_token_length = token_length;
+
       state = automaton->root;
 
       token_backtrack = *((int*) tokens_cursor);
       tokens_cursor += sizeof(int);
 
-      /*if (token_length < 2)
-      {
-        printf("%s\n", string->string);
-        printf("%s\n", string->identifier);
-        for (i = 0; i < token_length; i++)
-          printf("%02X", *(tokens_cursor + i));
-
-        printf("\n");
-
-        tokens_cursor += token_length;
-          token_length = *((int*) tokens_cursor);
-        tokens_cursor += sizeof(int);
-        continue;
-      }*/
-
       for(i = 0; i < token_length; i++)
       {
-        next_state = _yr_ac_next_state(
+        next_state = yr_ac_next_state(
             state,
             *tokens_cursor);
 
@@ -970,8 +1083,10 @@ void _yr_ac_print_automaton_state(
 {
   int i;
   char* identifier;
+  int64_t iterator;
   STRING* string;
   AC_MATCH* match;
+  AC_STATE* child_state;
 
   for (i = 0; i < state->depth; i++)
     printf(" ");
@@ -988,10 +1103,12 @@ void _yr_ac_print_automaton_state(
 
   printf("\n");
 
-  for (i = 0; i < 256; i++)
+  child_state = _yr_ac_first_child(state, &iterator);
+
+  while(child_state != NULL)
   {
-    if (state->transitions[i].state != NULL)
-      _yr_ac_print_automaton_state(state->transitions[i].state);
+    _yr_ac_print_automaton_state(child_state);
+    child_state = _yr_ac_next_child(state, &iterator);
   }
 }
 
diff --git a/libyara/arena.c b/libyara/arena.c
index 313708d..3bae40f 100644
--- a/libyara/arena.c
+++ b/libyara/arena.c
@@ -115,8 +115,8 @@ ARENA_PAGE* _yr_arena_page_for_address(
 
   while (page != NULL)
   {
-    if (address >= page->address &&
-        address < page->address + page->used)
+    if ((uint8_t*) address >= page->address &&
+        (uint8_t*) address < page->address + page->used)
       return page;
     page = page->next;
   }
diff --git a/libyara/lex.c b/libyara/lex.c
index 39069f1..35b2dcc 100644
--- a/libyara/lex.c
+++ b/libyara/lex.c
@@ -2905,6 +2905,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
     if (compiler->error_report_function != NULL)
     {
       compiler->error_report_function(
+          YARA_ERROR_LEVEL_ERROR,
           file_name,
           compiler->last_error_line,
           error_message);
@@ -2919,6 +2920,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
       yr_compiler_get_error_message(compiler, message, sizeof(message));
 
       compiler->error_report_function(
+        YARA_ERROR_LEVEL_ERROR,
         file_name,
         compiler->last_error_line,
         message);
diff --git a/libyara/lex.l b/libyara/lex.l
index c42ffbb..89f48c7 100644
--- a/libyara/lex.l
+++ b/libyara/lex.l
@@ -523,6 +523,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
     if (compiler->error_report_function != NULL)
     {
       compiler->error_report_function(
+          YARA_ERROR_LEVEL_ERROR,
           file_name,
           compiler->last_error_line,
           error_message);
@@ -537,6 +538,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
       yr_compiler_get_error_message(compiler, message, sizeof(message));
 
       compiler->error_report_function(
+        YARA_ERROR_LEVEL_ERROR,
         file_name,
         compiler->last_error_line,
         message);
diff --git a/libyara/parser.c b/libyara/parser.c
index ccf6ec0..b836196 100644
--- a/libyara/parser.c
+++ b/libyara/parser.c
@@ -488,6 +488,10 @@ STRING* yr_parser_reduce_string_declaration(
     SIZED_STRING* str)
 {
   int error_offset;
+  int min_token_length;
+  char* file_name;
+  char warning_message[512];
+
   STRING* string;
   YARA_COMPILER* compiler = yyget_extra(yyscanner);
 
@@ -517,6 +521,14 @@ STRING* yr_parser_reduce_string_declaration(
   if (!(flags & STRING_FLAGS_WIDE))
     flags |= STRING_FLAGS_ASCII;
 
+  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
+  // a single match for the string is enough. This is true in
+  // most cases, except when the string count (#) and string offset (@)
+  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
+  // initially, and unmarked later if required.
+
+  flags |= STRING_FLAGS_SINGLE_MATCH;
+
   string->flags = flags;
   string->mask = NULL;
   string->re.regexp = NULL;
@@ -569,7 +581,30 @@ STRING* yr_parser_reduce_string_declaration(
   compiler->last_result = yr_ac_add_string(
       compiler->automaton_arena,
       compiler->automaton,
-      string);
+      string,
+      &min_token_length);
+
+  if (compiler->file_name_stack_ptr > 0)
+    file_name = compiler->file_name_stack[
+        compiler->file_name_stack_ptr - 1];
+  else
+    file_name = NULL;
+
+  if (min_token_length < 2 && compiler->error_report_function != NULL)
+  {
+    snprintf(
+        warning_message,
+        sizeof(warning_message),
+        "%s is slowing down scanning%s",
+        string->identifier,
+        min_token_length == 0 ? " (critical!)" : "");
+
+    compiler->error_report_function(
+        YARA_ERROR_LEVEL_WARNING,
+        file_name,
+        yyget_lineno(yyscanner),
+        warning_message);
+  }
 
   if (compiler->last_result != ERROR_SUCCESS)
     return NULL;
@@ -689,6 +724,20 @@ int yr_parser_reduce_string_identifier(
     {
       yr_parser_emit(yyscanner, PUSH_A, NULL);
       yr_parser_emit(yyscanner, instruction, NULL);
+
+      if (instruction != SFOUND)
+      {
+        string = compiler->current_rule_strings;
+
+        while(!STRING_IS_NULL(string))
+        {
+          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
+          string = yr_arena_next_address(
+              compiler->strings_arena,
+              string,
+              sizeof(STRING));
+        }
+      }
     }
     else
     {
@@ -707,6 +756,9 @@ int yr_parser_reduce_string_identifier(
           PTR_TO_UINT64(string),
           NULL);
 
+      if (instruction != SFOUND)
+        string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
+
       yr_parser_emit(yyscanner, instruction, NULL);
 
       string->flags |= STRING_FLAGS_REFERENCED;
diff --git a/libyara/rules.c b/libyara/rules.c
index 6d25409..526fb0f 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -595,7 +595,8 @@ void yr_rules_free_matches(
 int yr_rules_scan_mem_block(
     YARA_RULES* rules,
     uint8_t* data,
-    size_t data_size)
+    size_t data_size,
+    int fast_scan_mode)
 {
 
   AC_STATE* next_state;
@@ -615,25 +616,30 @@ int yr_rules_scan_mem_block(
     {
       if (i >= ac_match->backtrack)
       {
-        result = _yr_scan_verify_match(
-            ac_match,
-            data,
-            data_size,
-            i - ac_match->backtrack);
-
-        if (result != ERROR_SUCCESS)
-          return result;
+        if (!(fast_scan_mode &&
+              ac_match->string->flags & STRING_FLAGS_FOUND &&
+              ac_match->string->flags & STRING_FLAGS_SINGLE_MATCH))
+        {
+          result = _yr_scan_verify_match(
+              ac_match,
+              data,
+              data_size,
+              i - ac_match->backtrack);
+
+          if (result != ERROR_SUCCESS)
+            return result;
+        }
       }
 
       ac_match = ac_match->next;
     }
 
-    next_state = current_state->transitions[data[i]].state;
+    next_state = yr_ac_next_state(current_state, data[i]);
 
     while (next_state == NULL && current_state->depth > 0)
     {
       current_state = current_state->failure;
-      next_state = current_state->transitions[data[i]].state;
+      next_state = yr_ac_next_state(current_state, data[i]);
     }
 
     if (next_state != NULL)
@@ -667,7 +673,8 @@ int yr_rules_scan_mem_blocks(
     MEMORY_BLOCK* block,
     int scanning_process_memory,
     YARACALLBACK callback,
-    void* user_data)
+    void* user_data,
+    int fast_scan_mode)
 {
   RULE* rule;
   EVALUATION_CONTEXT context;
@@ -699,7 +706,8 @@ int yr_rules_scan_mem_blocks(
     result = yr_rules_scan_mem_block(
         rules,
         block->data,
-        block->size);
+        block->size,
+        fast_scan_mode);
 
     if (result != ERROR_SUCCESS)
       return result;
@@ -754,7 +762,8 @@ int yr_rules_scan_mem(
     uint8_t* buffer,
     size_t buffer_size,
     YARACALLBACK callback,
-    void* user_data)
+    void* user_data,
+    int fast_scan_mode)
 {
   MEMORY_BLOCK block;
 
@@ -768,7 +777,8 @@ int yr_rules_scan_mem(
       &block,
       FALSE,
       callback,
-      user_data);
+      user_data,
+      fast_scan_mode);
 }
 
 
@@ -776,7 +786,8 @@ int yr_rules_scan_file(
     YARA_RULES* rules,
     const char* filename,
     YARACALLBACK callback,
-    void* user_data)
+    void* user_data,
+    int fast_scan_mode)
 {
   MAPPED_FILE mfile;
   int result;
@@ -790,7 +801,8 @@ int yr_rules_scan_file(
         mfile.data,
         mfile.size,
         callback,
-        user_data);
+        user_data,
+        fast_scan_mode);
 
     yr_filemap_unmap(&mfile);
   }
@@ -803,7 +815,8 @@ int yr_rules_scan_proc(
     YARA_RULES* rules,
     int pid,
     YARACALLBACK callback,
-    void* user_data)
+    void* user_data,
+    int fast_scan_mode)
 {
   MEMORY_BLOCK* first_block;
   MEMORY_BLOCK* next_block;
@@ -819,7 +832,8 @@ int yr_rules_scan_proc(
         first_block,
         TRUE,
         callback,
-        user_data);
+        user_data,
+        fast_scan_mode);
 
   block = first_block;
 
diff --git a/libyara/yara.h b/libyara/yara.h
index 2acac0e..5603ade 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -130,17 +130,17 @@ limitations under the License.
 #define CALLBACK_ABORT     1
 #define CALLBACK_ERROR     2
 
-#define STRING_FLAGS_FOUND        0x01
-#define STRING_FLAGS_REFERENCED   0x02
-#define STRING_FLAGS_HEXADECIMAL  0x04
-#define STRING_FLAGS_NO_CASE      0x08
-#define STRING_FLAGS_ASCII        0x10
-#define STRING_FLAGS_WIDE         0x20
-#define STRING_FLAGS_REGEXP       0x40
-#define STRING_FLAGS_FULL_WORD    0x80
-#define STRING_FLAGS_ANONYMOUS    0x100
-#define STRING_FLAGS_FAST_MATCH   0x200
-#define STRING_FLAGS_NULL         0x1000
+#define STRING_FLAGS_FOUND          0x01
+#define STRING_FLAGS_REFERENCED     0x02
+#define STRING_FLAGS_HEXADECIMAL    0x04
+#define STRING_FLAGS_NO_CASE        0x08
+#define STRING_FLAGS_ASCII          0x10
+#define STRING_FLAGS_WIDE           0x20
+#define STRING_FLAGS_REGEXP         0x40
+#define STRING_FLAGS_FULL_WORD      0x80
+#define STRING_FLAGS_ANONYMOUS      0x100
+#define STRING_FLAGS_SINGLE_MATCH   0x200
+#define STRING_FLAGS_NULL           0x1000
 
 #define STRING_IS_HEX(x) \
     (((x)->flags) & STRING_FLAGS_HEXADECIMAL)
@@ -319,11 +319,41 @@ typedef struct _AC_STATE
 
   DECLARE_REFERENCE(struct _AC_STATE*, failure);
   DECLARE_REFERENCE(AC_MATCH*, matches);
-  DECLARE_REFERENCE(struct _AC_STATE*, state) transitions[256];
 
 } AC_STATE;
 
 
+typedef struct _AC_STATE_TRANSITION
+{
+  uint8_t input;
+  DECLARE_REFERENCE(AC_STATE*, state);
+  DECLARE_REFERENCE(struct _AC_STATE_TRANSITION*, next);
+
+} AC_STATE_TRANSITION;
+
+
+typedef struct _AC_TABLE_BASED_STATE
+{
+  int8_t depth;
+
+  DECLARE_REFERENCE(AC_STATE*, failure);
+  DECLARE_REFERENCE(AC_MATCH*, matches);
+  DECLARE_REFERENCE(AC_STATE*, state) transitions[256];
+
+} AC_TABLE_BASED_STATE;
+
+
+typedef struct _AC_LIST_BASED_STATE
+{
+  int8_t depth;
+
+  DECLARE_REFERENCE(AC_STATE*, failure);
+  DECLARE_REFERENCE(AC_MATCH*, matches);
+  DECLARE_REFERENCE(AC_STATE_TRANSITION*, transitions);
+
+} AC_LIST_BASED_STATE;
+
+
 typedef struct _AC_AUTOMATON
 {
   DECLARE_REFERENCE(AC_STATE*, root);
@@ -362,10 +392,14 @@ typedef struct _HASH_TABLE
 } HASH_TABLE;
 
 
+#define YARA_ERROR_LEVEL_ERROR   0
+#define YARA_ERROR_LEVEL_WARNING 1
+
 typedef void (*YARAREPORT)(
+    int error_level,
     const char* file_name,
     int line_number,
-    const char* error_message);
+    const char* message);
 
 
 typedef int (*YARACALLBACK)(
@@ -528,21 +562,24 @@ int yr_rules_scan_mem(
     uint8_t* buffer,
     size_t buffer_size,
     YARACALLBACK callback,
-    void* user_data);
+    void* user_data,
+    int fast_scan_mode);
 
 
 int yr_rules_scan_file(
     YARA_RULES* rules,
     const char* filename,
     YARACALLBACK callback,
-    void* user_data);
+    void* user_data,
+    int fast_scan_mode);
 
 
 int yr_rules_scan_proc(
     YARA_RULES* rules,
     int pid,
     YARACALLBACK callback,
-    void* user_data);
+    void* user_data,
+    int fast_scan_mode);
 
 
 int yr_rules_save(
@@ -585,7 +622,13 @@ int yr_ac_create_automaton(
 int yr_ac_add_string(
     ARENA* arena,
     AC_AUTOMATON* automaton,
-    STRING* string);
+    STRING* string,
+    int* min_token_length);
+
+
+AC_STATE* yr_ac_next_state(
+    AC_STATE* state,
+    uint8_t input);
 
 
 void yr_ac_create_failure_links(
diff --git a/yara-python/yara-python.c b/yara-python/yara-python.c
index 6508676..1335fc5 100644
--- a/yara-python/yara-python.c
+++ b/yara-python/yara-python.c
@@ -743,7 +743,7 @@ static PyObject * Rules_match(
     PyObject *keywords)
 {
   static char *kwlist[] = {
-      "filepath", "pid", "data", "externals", "callback", NULL};
+      "filepath", "pid", "data", "externals", "callback", "fast", NULL};
 
   char* filepath = NULL;
   char* data = NULL;
@@ -751,8 +751,10 @@ static PyObject * Rules_match(
   int pid = 0;
   int length;
   int error;
+  int fast_mode = FALSE;
 
   PyObject *externals = NULL;
+  PyObject *fast = NULL;
   Rules* object = (Rules*) self;
 
   CALLBACK_DATA callback_data;
@@ -763,14 +765,15 @@ static PyObject * Rules_match(
   if (PyArg_ParseTupleAndKeywords(
         args,
         keywords,
-        "|sis#OO",
+        "|sis#OOO",
         kwlist,
         &filepath,
         &pid,
         &data,
         &length,
         &externals,
-        &callback_data.callback))
+        &callback_data.callback,
+        &fast))
   {
     if (externals != NULL)
     {
@@ -801,6 +804,11 @@ static PyObject * Rules_match(
       }
     }
 
+    if (fast != NULL)
+    {
+      fast_mode = (PyObject_IsTrue(fast) == 1);
+    }
+
     if (filepath != NULL)
     {
       callback_data.matches = PyList_New(0);
@@ -811,7 +819,8 @@ static PyObject * Rules_match(
           object->rules,
           filepath,
           yara_callback,
-          &callback_data);
+          &callback_data,
+          fast_mode);
 
       Py_END_ALLOW_THREADS
 
@@ -836,7 +845,8 @@ static PyObject * Rules_match(
           (unsigned char*) data,
           (unsigned int) length,
           yara_callback,
-          &callback_data);
+          &callback_data,
+          fast_mode);
 
       Py_END_ALLOW_THREADS
 
@@ -860,7 +870,8 @@ static PyObject * Rules_match(
           object->rules,
           pid,
           yara_callback,
-          &callback_data);
+          &callback_data,
+          fast_mode);
 
       Py_END_ALLOW_THREADS
 
diff --git a/yara.c b/yara.c
index 4a8903c..5394416 100644
--- a/yara.c
+++ b/yara.c
@@ -51,6 +51,7 @@ int show_specified_tags = FALSE;
 int show_specified_rules = FALSE;
 int show_strings = FALSE;
 int show_meta = FALSE;
+int fast_scan = FALSE;
 int negate = FALSE;
 int count = 0;
 int limit = 0;
@@ -94,29 +95,27 @@ TAG* specified_tags_list = NULL;
 IDENTIFIER* specified_rules_list = NULL;
 EXTERNAL* externals_list = NULL;
 
-
-////////////////////////////////////////////////////////////////////////////////////////////////
+#define USAGE \
+"usage:  yara [OPTION]... RULES_FILE FILE | PID\n"\
+"options:\n"\
+"  -t <tag>                 only print rules tagged as <tag>.\n"\
+"  -i <identifier>          only print rules named <identifier>.\n"\
+"  -n                       only print not satisfied rules (negate).\n"\
+"  -g                       print tags.\n"\
+"  -m                       print metadata.\n"\
+"  -s                       print matching strings.\n"\
+"  -l <number>              abort scanning after matching <number> rules.\n"\
+"  -d <identifier>=<value>  define external variable.\n"\
+"  -r                       recursively search directories.\n"\
+"  -v                       show version information.\n"
 
 void show_help()
 {
-  printf("usage:  yara [OPTION]... RULES_FILE FILE | PID\n");
-  printf("options:\n");
-  printf("  -t <tag>                  print rules tagged as <tag> and ignore the rest. Can be used more than once.\n");
-  printf("  -i <identifier>           print rules named <identifier> and ignore the rest. Can be used more than once.\n");
-  printf("  -n                        print only not satisfied rules (negate).\n");
-  printf("  -g                        print tags.\n");
-  printf("  -m                        print metadata.\n");
-  printf("  -s                        print matching strings.\n");
-  printf("  -l <number>               abort scanning after a <number> of rules matched.\n");
-  printf("  -d <identifier>=<value>   define external variable.\n");
-  printf("  -r                        recursively search directories.\n");
-  printf("  -v                        show version information.\n");
+  printf(USAGE);
   printf("\nReport bugs to: <%s>\n", PACKAGE_BUGREPORT);
 }
 
 
-////////////////////////////////////////////////////////////////////////////////////////////////
-
 int is_numeric(
     const char *str)
 {
@@ -172,8 +171,12 @@ int scan_dir(
 
       if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
       {
-        //printf("Processing %s...\n", FindFileData.cFileName);
-        result = yr_rules_scan_file(rules, full_path, callback, full_path);
+        result = yr_rules_scan_file(
+            rules,
+            full_path,
+            callback,
+            full_path,
+            TRUE);
       }
       else if (recursive && FindFileData.cFileName[0] != '.' )
       {
@@ -235,7 +238,12 @@ int scan_dir(
       {
         if(S_ISREG(st.st_mode))
         {
-          result = yr_rules_scan_file(rules, full_path, callback, full_path);
+          result = yr_rules_scan_file(
+              rules,
+              full_path,
+              callback,
+              full_path,
+              fast_scan);
         }
         else if(recursive && S_ISDIR(st.st_mode) && de->d_name[0] != '.')
         {
@@ -500,6 +508,10 @@ int process_cmd_line(
         show_strings = TRUE;
         break;
 
+      case 'f':
+        fast_scan = TRUE;
+        break;
+
       case 'n':
         negate = TRUE;
         break;
@@ -607,11 +619,15 @@ int process_cmd_line(
 }
 
 void report_error(
+    int error_level,
     const char* file_name,
     int line_number,
-    const char* error_message)
+    const char* message)
 {
-  fprintf(stderr, "%s:%d: %s\n", file_name, line_number, error_message);
+  if (error_level == YARA_ERROR_LEVEL_ERROR)
+    fprintf(stderr, "%s(%d): error: %s\n", file_name, line_number, message);
+  else
+    fprintf(stderr, "%s(%d): warning: %s\n", file_name, line_number, message);
 }
 
 
@@ -780,7 +796,8 @@ int main(
         rules,
         pid,
         callback,
-        (void*) argv[argc - 1]);
+        (void*) argv[argc - 1],
+        fast_scan);
   }
   else if (is_directory(argv[argc - 1]))
   {
@@ -796,7 +813,8 @@ int main(
         rules,
         argv[argc - 1],
         callback,
-        (void*) argv[argc - 1]);
+        (void*) argv[argc - 1],
+        fast_scan);
   }
 
   switch (result)
diff --git a/yarac.c b/yarac.c
index 603eca1..0d57c12 100644
--- a/yarac.c
+++ b/yarac.c
@@ -136,11 +136,15 @@ int process_cmd_line(
 
 
 void report_error(
+    int error_level,
     const char* file_name,
     int line_number,
-    const char* error_message)
+    const char* message)
 {
-  fprintf(stderr, "%s:%d: %s\n", file_name, line_number, error_message);
+  if (error_level == YARA_ERROR_LEVEL_ERROR)
+    fprintf(stderr, "%s(%d): error: %s\n", file_name, line_number, message);
+  else
+    fprintf(stderr, "%s(%d): warning: %s\n", file_name, line_number, message);
 }
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list