[Forensics-changes] [yara] 116/160: Optimize _yr_rules_clean_matches

Hilko Bengen bengen at moszumanska.debian.org
Sat Jul 1 10:29:24 UTC 2017


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to annotated tag v3.4.0
in repository yara.

commit 1ed8283d44e6adcc3beb7f2b4e748168dd707db7
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Fri May 8 12:07:33 2015 +0200

    Optimize _yr_rules_clean_matches
    
    Store the strings that actually needs to be cleaned instead of iterating over all the existing strings.
---
 libyara/arena.c              |   8 +++-
 libyara/include/yara/scan.h  |   5 +--
 libyara/include/yara/types.h |   3 ++
 libyara/rules.c              | 104 +++++++++++++++++++++++--------------------
 libyara/scan.c               |  57 +++++++++++++++++-------
 5 files changed, 107 insertions(+), 70 deletions(-)

diff --git a/libyara/arena.c b/libyara/arena.c
index 3d2a043..738ab15 100644
--- a/libyara/arena.c
+++ b/libyara/arena.c
@@ -98,7 +98,7 @@ YR_ARENA_PAGE* _yr_arena_new_page(
 //
 // _yr_arena_page_for_address
 //
-// Returns the page within he arena where an address reside.
+// Returns the page within the arena where an address reside.
 //
 // Args:
 //    YR_ARENA* arena   - Pointer to the arena
@@ -302,12 +302,16 @@ void yr_arena_destroy(
 //    YR_ARENA* arena  - Pointer to the arena.
 //
 // Returns:
-//    A pointer
+//    A pointer to the arena's data. NULL if the no data has been written to
+//    the arena yet.
 //
 
 void* yr_arena_base_address(
   YR_ARENA* arena)
 {
+  if (arena->page_list_head->used == 0)
+    return NULL;
+
   return arena->page_list_head->address;
 }
 
diff --git a/libyara/include/yara/scan.h b/libyara/include/yara/scan.h
index 29f53b2..4edf739 100644
--- a/libyara/include/yara/scan.h
+++ b/libyara/include/yara/scan.h
@@ -26,12 +26,11 @@ limitations under the License.
 
 
 int yr_scan_verify_match(
+    YR_SCAN_CONTEXT* context,
     YR_AC_MATCH* ac_match,
     uint8_t* data,
     size_t data_size,
     size_t data_base,
-    size_t offset,
-    YR_ARENA* matches_arena,
-    int flags);
+    size_t offset);
 
 #endif
diff --git a/libyara/include/yara/types.h b/libyara/include/yara/types.h
index d886265..2b21d04 100644
--- a/libyara/include/yara/types.h
+++ b/libyara/include/yara/types.h
@@ -386,6 +386,9 @@ typedef struct _YR_SCAN_CONTEXT
   YR_HASH_TABLE*  objects_table;
   YR_CALLBACK_FUNC  callback;
 
+  YR_ARENA* matches_arena;
+  YR_ARENA* matching_strings_arena;
+
 } YR_SCAN_CONTEXT;
 
 
diff --git a/libyara/rules.c b/libyara/rules.c
index b51d9cb..4f86bd7 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -168,10 +168,10 @@ YR_API int yr_rules_define_string_variable(
 
 
 void _yr_rules_clean_matches(
-    YR_RULES* rules)
+    YR_RULES* rules,
+    YR_SCAN_CONTEXT* context)
 {
   YR_RULE* rule;
-  YR_STRING* string;
 
   int tidx = yr_get_tidx();
 
@@ -179,16 +179,24 @@ void _yr_rules_clean_matches(
   {
     rule->t_flags[tidx] &= ~RULE_TFLAGS_MATCH;
     rule->ns->t_flags[tidx] &= ~NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL;
+  }
 
-    yr_rule_strings_foreach(rule, string)
-    {
-      string->matches[tidx].count = 0;
-      string->matches[tidx].head = NULL;
-      string->matches[tidx].tail = NULL;
-      string->unconfirmed_matches[tidx].count = 0;
-      string->unconfirmed_matches[tidx].head = NULL;
-      string->unconfirmed_matches[tidx].tail = NULL;
-    }
+  YR_STRING** string = (YR_STRING**) yr_arena_base_address(
+      context->matching_strings_arena);
+
+  while (string != NULL)
+  {
+    (*string)->matches[tidx].count = 0;
+    (*string)->matches[tidx].head = NULL;
+    (*string)->matches[tidx].tail = NULL;
+    (*string)->unconfirmed_matches[tidx].count = 0;
+    (*string)->unconfirmed_matches[tidx].head = NULL;
+    (*string)->unconfirmed_matches[tidx].tail = NULL;
+
+    string = yr_arena_next_address(
+        context->matching_strings_arena,
+        string,
+        sizeof(string));
   }
 }
 
@@ -225,22 +233,17 @@ void yr_rules_print_profiling_info(
 #endif
 
 
-int yr_rules_scan_mem_block(
+int _yr_rules_scan_mem_block(
     YR_RULES* rules,
     YR_MEMORY_BLOCK* block,
-    int flags,
+    YR_SCAN_CONTEXT* context,
     int timeout,
-    time_t start_time,
-    YR_ARENA* matches_arena)
+    time_t start_time)
 {
-  YR_AC_STATE* next_state;
   YR_AC_MATCH* ac_match;
-  YR_AC_STATE* current_state;
-
-  size_t i;
+  YR_AC_STATE* current_state = rules->automaton->root;
 
-  current_state = rules->automaton->root;
-  i = 0;
+  size_t i = 0;
 
   while (i < block->size)
   {
@@ -251,19 +254,18 @@ int yr_rules_scan_mem_block(
       if (ac_match->backtrack <= i)
       {
         FAIL_ON_ERROR(yr_scan_verify_match(
+            context,
             ac_match,
             block->data,
             block->size,
             block->base,
-            i - ac_match->backtrack,
-            matches_arena,
-            flags));
+            i - ac_match->backtrack));
       }
 
       ac_match = ac_match->next;
     }
 
-    next_state = yr_ac_next_state(current_state, block->data[i]);
+    YR_AC_STATE* next_state = yr_ac_next_state(current_state, block->data[i]);
 
     while (next_state == NULL && current_state->depth > 0)
     {
@@ -290,13 +292,12 @@ int yr_rules_scan_mem_block(
     if (ac_match->backtrack <= block->size)
     {
       FAIL_ON_ERROR(yr_scan_verify_match(
+          context,
           ac_match,
           block->data,
           block->size,
           block->base,
-          block->size - ac_match->backtrack,
-          matches_arena,
-          flags));
+          block->size - ac_match->backtrack));
     }
 
     ac_match = ac_match->next;
@@ -315,16 +316,7 @@ YR_API int yr_rules_scan_mem_blocks(
     int timeout)
 {
   YR_SCAN_CONTEXT context;
-  YR_RULE* rule;
-  YR_OBJECT* object;
-  YR_EXTERNAL_VARIABLE* external;
-  YR_ARENA* matches_arena = NULL;
-
-  time_t start_time;
-  tidx_mask_t bit;
 
-  int message;
-  int tidx = 0;
   int result = ERROR_SUCCESS;
 
   if (block == NULL)
@@ -337,10 +329,13 @@ YR_API int yr_rules_scan_mem_blocks(
   context.mem_block = block;
   context.entry_point = UNDEFINED;
   context.objects_table = NULL;
+  context.matches_arena = NULL;
+  context.matching_strings_arena = NULL;
 
   _yr_rules_lock(rules);
 
-  bit = 1;
+  int tidx = 0;
+  tidx_mask_t bit = 1;
 
   while (rules->tidx_mask & bit)
   {
@@ -360,7 +355,12 @@ YR_API int yr_rules_scan_mem_blocks(
 
   yr_set_tidx(tidx);
 
-  result = yr_arena_create(1024, 0, &matches_arena);
+  result = yr_arena_create(1024, 0, &context.matches_arena);
+
+  if (result != ERROR_SUCCESS)
+    goto _exit;
+
+  result = yr_arena_create(8, 0, &context.matching_strings_arena);
 
   if (result != ERROR_SUCCESS)
     goto _exit;
@@ -370,10 +370,12 @@ YR_API int yr_rules_scan_mem_blocks(
   if (result != ERROR_SUCCESS)
     goto _exit;
 
-  external = rules->externals_list_head;
+  YR_EXTERNAL_VARIABLE* external = rules->externals_list_head;
 
   while (!EXTERNAL_VARIABLE_IS_NULL(external))
   {
+    YR_OBJECT* object;
+
     result = yr_object_from_external_variable(
         external,
         &object);
@@ -391,7 +393,7 @@ YR_API int yr_rules_scan_mem_blocks(
     external++;
   }
 
-  start_time = time(NULL);
+  time_t start_time = time(NULL);
 
   while (block != NULL)
   {
@@ -408,13 +410,12 @@ YR_API int yr_rules_scan_mem_blocks(
             block->size);
     }
 
-    result = yr_rules_scan_mem_block(
+    result = _yr_rules_scan_mem_block(
         rules,
         block,
-        flags,
+        &context,
         timeout,
-        start_time,
-        matches_arena);
+        start_time);
 
     if (result != ERROR_SUCCESS)
       goto _exit;
@@ -431,6 +432,8 @@ YR_API int yr_rules_scan_mem_blocks(
   if (result != ERROR_SUCCESS)
     goto _exit;
 
+  YR_RULE* rule;
+
   yr_rules_foreach(rules, rule)
   {
     if (RULE_IS_GLOBAL(rule) && !(rule->t_flags[tidx] & RULE_TFLAGS_MATCH))
@@ -441,6 +444,8 @@ YR_API int yr_rules_scan_mem_blocks(
 
   yr_rules_foreach(rules, rule)
   {
+    int message;
+
     if (rule->t_flags[tidx] & RULE_TFLAGS_MATCH &&
         !(rule->ns->t_flags[tidx] & NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL))
     {
@@ -470,15 +475,18 @@ YR_API int yr_rules_scan_mem_blocks(
 
 _exit:
 
+  _yr_rules_clean_matches(rules, &context);
+
   if (flags & SCAN_FLAGS_SHOW_MODULE_INFO)
     yr_modules_print_data(&context);
 
   yr_modules_unload_all(&context);
 
-  _yr_rules_clean_matches(rules);
+  if (context.matches_arena != NULL)
+    yr_arena_destroy(context.matches_arena);
 
-  if (matches_arena != NULL)
-    yr_arena_destroy(matches_arena);
+  if (context.matching_strings_arena != NULL)
+    yr_arena_destroy(context.matching_strings_arena);
 
   if (context.objects_table != NULL)
     yr_hash_table_destroy(
diff --git a/libyara/scan.c b/libyara/scan.c
index 1fbc46e..c1f2bd0 100644
--- a/libyara/scan.c
+++ b/libyara/scan.c
@@ -31,7 +31,7 @@ limitations under the License.
 typedef struct _CALLBACK_ARGS
 {
   YR_STRING* string;
-  YR_ARENA* matches_arena;
+  YR_SCAN_CONTEXT* context;
 
   uint8_t* data;
   size_t data_size;
@@ -423,8 +423,8 @@ void _yr_scan_remove_match_from_list(
 
 
 int _yr_scan_verify_chained_string_match(
-    YR_ARENA* matches_arena,
     YR_STRING* matching_string,
+    YR_SCAN_CONTEXT* context,
     uint8_t* match_data,
     size_t match_base,
     size_t match_offset,
@@ -526,6 +526,18 @@ int _yr_scan_verify_chained_string_match(
           match->prev = NULL;
           match->next = NULL;
 
+          if (string->matches[tidx].count == 0)
+          {
+            // If this is the first match for the string, put the string in the
+            // list of strings whose flags needs to be cleared after the scan.
+
+            FAIL_ON_ERROR(yr_arena_write_data(
+                context->matching_strings_arena,
+                &string,
+                sizeof(string),
+                NULL));
+          }
+
           FAIL_ON_ERROR(_yr_scan_add_match_to_list(
               match, &string->matches[tidx]));
         }
@@ -536,7 +548,7 @@ int _yr_scan_verify_chained_string_match(
     else
     {
       FAIL_ON_ERROR(yr_arena_allocate_memory(
-          matches_arena,
+          context->matches_arena,
           sizeof(YR_MATCH),
           (void**) &new_match));
 
@@ -606,8 +618,8 @@ int _yr_scan_match_callback(
   if (STRING_IS_CHAIN_PART(string))
   {
     result = _yr_scan_verify_chained_string_match(
-        callback_args->matches_arena,
         string,
+        callback_args->context,
         match_data,
         callback_args->data_base,
         match_offset,
@@ -616,8 +628,20 @@ int _yr_scan_match_callback(
   }
   else
   {
+    if (string->matches[tidx].count == 0)
+    {
+      // If this is the first match for the string, put the string in the
+      // list of strings whose flags needs to be cleared after the scan.
+
+      FAIL_ON_ERROR(yr_arena_write_data(
+          callback_args->context->matching_strings_arena,
+          &string,
+          sizeof(string),
+          NULL));
+    }
+
     result = yr_arena_allocate_memory(
-        callback_args->matches_arena,
+        callback_args->context->matches_arena,
         sizeof(YR_MATCH),
         (void**) &new_match);
 
@@ -650,12 +674,12 @@ typedef int (*RE_EXEC_FUNC)(
 
 
 int _yr_scan_verify_re_match(
+    YR_SCAN_CONTEXT* context,
     YR_AC_MATCH* ac_match,
     uint8_t* data,
     size_t data_size,
     size_t data_base,
-    size_t offset,
-    YR_ARENA* matches_arena)
+    size_t offset)
 {
   CALLBACK_ARGS callback_args;
   RE_EXEC_FUNC exec;
@@ -708,10 +732,10 @@ int _yr_scan_verify_re_match(
     return ERROR_SUCCESS;
 
   callback_args.string = ac_match->string;
+  callback_args.context = context;
   callback_args.data = data;
   callback_args.data_size = data_size;
   callback_args.data_base = data_base;
-  callback_args.matches_arena = matches_arena;
   callback_args.forward_matches = forward_matches;
   callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string);
   callback_args.tidx = yr_get_tidx();
@@ -747,12 +771,12 @@ int _yr_scan_verify_re_match(
 
 
 int _yr_scan_verify_literal_match(
+    YR_SCAN_CONTEXT* context,
     YR_AC_MATCH* ac_match,
     uint8_t* data,
     size_t data_size,
     size_t data_base,
-    size_t offset,
-    YR_ARENA* matches_arena)
+    size_t offset)
 {
   int flags = 0;
   int forward_matches = 0;
@@ -814,11 +838,11 @@ int _yr_scan_verify_literal_match(
   if (STRING_IS_NO_CASE(string))
     flags |= RE_FLAGS_NO_CASE;
 
+  callback_args.context = context;
   callback_args.string = string;
   callback_args.data = data;
   callback_args.data_size = data_size;
   callback_args.data_base = data_base;
-  callback_args.matches_arena = matches_arena;
   callback_args.forward_matches = forward_matches;
   callback_args.full_word = STRING_IS_FULL_WORD(string);
   callback_args.tidx = yr_get_tidx();
@@ -831,13 +855,12 @@ int _yr_scan_verify_literal_match(
 
 
 int yr_scan_verify_match(
+    YR_SCAN_CONTEXT* context,
     YR_AC_MATCH* ac_match,
     uint8_t* data,
     size_t data_size,
     size_t data_base,
-    size_t offset,
-    YR_ARENA* matches_arena,
-    int flags)
+    size_t offset)
 {
   YR_STRING* string = ac_match->string;
 
@@ -848,7 +871,7 @@ int yr_scan_verify_match(
   if (data_size - offset <= 0)
     return ERROR_SUCCESS;
 
-  if (flags & SCAN_FLAGS_FAST_MODE &&
+  if (context->flags & SCAN_FLAGS_FAST_MODE &&
       STRING_IS_SINGLE_MATCH(string) &&
       STRING_FOUND(string))
     return ERROR_SUCCESS;
@@ -860,12 +883,12 @@ int yr_scan_verify_match(
   if (STRING_IS_LITERAL(string))
   {
     FAIL_ON_ERROR(_yr_scan_verify_literal_match(
-        ac_match, data, data_size, data_base, offset, matches_arena));
+        context, ac_match, data, data_size, data_base, offset));
   }
   else
   {
     FAIL_ON_ERROR(_yr_scan_verify_re_match(
-        ac_match, data, data_size, data_base, offset, matches_arena));
+        context, ac_match, data, data_size, data_base, offset));
   }
 
   #ifdef PROFILING_ENABLED

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list