[Forensics-changes] [yara] 116/160: Optimize _yr_rules_clean_matches
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:29:24 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.4.0
in repository yara.
commit 1ed8283d44e6adcc3beb7f2b4e748168dd707db7
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Fri May 8 12:07:33 2015 +0200
Optimize _yr_rules_clean_matches
Store the strings that actually needs to be cleaned instead of iterating over all the existing strings.
---
libyara/arena.c | 8 +++-
libyara/include/yara/scan.h | 5 +--
libyara/include/yara/types.h | 3 ++
libyara/rules.c | 104 +++++++++++++++++++++++--------------------
libyara/scan.c | 57 +++++++++++++++++-------
5 files changed, 107 insertions(+), 70 deletions(-)
diff --git a/libyara/arena.c b/libyara/arena.c
index 3d2a043..738ab15 100644
--- a/libyara/arena.c
+++ b/libyara/arena.c
@@ -98,7 +98,7 @@ YR_ARENA_PAGE* _yr_arena_new_page(
//
// _yr_arena_page_for_address
//
-// Returns the page within he arena where an address reside.
+// Returns the page within the arena where an address reside.
//
// Args:
// YR_ARENA* arena - Pointer to the arena
@@ -302,12 +302,16 @@ void yr_arena_destroy(
// YR_ARENA* arena - Pointer to the arena.
//
// Returns:
-// A pointer
+// A pointer to the arena's data. NULL if the no data has been written to
+// the arena yet.
//
void* yr_arena_base_address(
YR_ARENA* arena)
{
+ if (arena->page_list_head->used == 0)
+ return NULL;
+
return arena->page_list_head->address;
}
diff --git a/libyara/include/yara/scan.h b/libyara/include/yara/scan.h
index 29f53b2..4edf739 100644
--- a/libyara/include/yara/scan.h
+++ b/libyara/include/yara/scan.h
@@ -26,12 +26,11 @@ limitations under the License.
int yr_scan_verify_match(
+ YR_SCAN_CONTEXT* context,
YR_AC_MATCH* ac_match,
uint8_t* data,
size_t data_size,
size_t data_base,
- size_t offset,
- YR_ARENA* matches_arena,
- int flags);
+ size_t offset);
#endif
diff --git a/libyara/include/yara/types.h b/libyara/include/yara/types.h
index d886265..2b21d04 100644
--- a/libyara/include/yara/types.h
+++ b/libyara/include/yara/types.h
@@ -386,6 +386,9 @@ typedef struct _YR_SCAN_CONTEXT
YR_HASH_TABLE* objects_table;
YR_CALLBACK_FUNC callback;
+ YR_ARENA* matches_arena;
+ YR_ARENA* matching_strings_arena;
+
} YR_SCAN_CONTEXT;
diff --git a/libyara/rules.c b/libyara/rules.c
index b51d9cb..4f86bd7 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -168,10 +168,10 @@ YR_API int yr_rules_define_string_variable(
void _yr_rules_clean_matches(
- YR_RULES* rules)
+ YR_RULES* rules,
+ YR_SCAN_CONTEXT* context)
{
YR_RULE* rule;
- YR_STRING* string;
int tidx = yr_get_tidx();
@@ -179,16 +179,24 @@ void _yr_rules_clean_matches(
{
rule->t_flags[tidx] &= ~RULE_TFLAGS_MATCH;
rule->ns->t_flags[tidx] &= ~NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL;
+ }
- yr_rule_strings_foreach(rule, string)
- {
- string->matches[tidx].count = 0;
- string->matches[tidx].head = NULL;
- string->matches[tidx].tail = NULL;
- string->unconfirmed_matches[tidx].count = 0;
- string->unconfirmed_matches[tidx].head = NULL;
- string->unconfirmed_matches[tidx].tail = NULL;
- }
+ YR_STRING** string = (YR_STRING**) yr_arena_base_address(
+ context->matching_strings_arena);
+
+ while (string != NULL)
+ {
+ (*string)->matches[tidx].count = 0;
+ (*string)->matches[tidx].head = NULL;
+ (*string)->matches[tidx].tail = NULL;
+ (*string)->unconfirmed_matches[tidx].count = 0;
+ (*string)->unconfirmed_matches[tidx].head = NULL;
+ (*string)->unconfirmed_matches[tidx].tail = NULL;
+
+ string = yr_arena_next_address(
+ context->matching_strings_arena,
+ string,
+ sizeof(string));
}
}
@@ -225,22 +233,17 @@ void yr_rules_print_profiling_info(
#endif
-int yr_rules_scan_mem_block(
+int _yr_rules_scan_mem_block(
YR_RULES* rules,
YR_MEMORY_BLOCK* block,
- int flags,
+ YR_SCAN_CONTEXT* context,
int timeout,
- time_t start_time,
- YR_ARENA* matches_arena)
+ time_t start_time)
{
- YR_AC_STATE* next_state;
YR_AC_MATCH* ac_match;
- YR_AC_STATE* current_state;
-
- size_t i;
+ YR_AC_STATE* current_state = rules->automaton->root;
- current_state = rules->automaton->root;
- i = 0;
+ size_t i = 0;
while (i < block->size)
{
@@ -251,19 +254,18 @@ int yr_rules_scan_mem_block(
if (ac_match->backtrack <= i)
{
FAIL_ON_ERROR(yr_scan_verify_match(
+ context,
ac_match,
block->data,
block->size,
block->base,
- i - ac_match->backtrack,
- matches_arena,
- flags));
+ i - ac_match->backtrack));
}
ac_match = ac_match->next;
}
- next_state = yr_ac_next_state(current_state, block->data[i]);
+ YR_AC_STATE* next_state = yr_ac_next_state(current_state, block->data[i]);
while (next_state == NULL && current_state->depth > 0)
{
@@ -290,13 +292,12 @@ int yr_rules_scan_mem_block(
if (ac_match->backtrack <= block->size)
{
FAIL_ON_ERROR(yr_scan_verify_match(
+ context,
ac_match,
block->data,
block->size,
block->base,
- block->size - ac_match->backtrack,
- matches_arena,
- flags));
+ block->size - ac_match->backtrack));
}
ac_match = ac_match->next;
@@ -315,16 +316,7 @@ YR_API int yr_rules_scan_mem_blocks(
int timeout)
{
YR_SCAN_CONTEXT context;
- YR_RULE* rule;
- YR_OBJECT* object;
- YR_EXTERNAL_VARIABLE* external;
- YR_ARENA* matches_arena = NULL;
-
- time_t start_time;
- tidx_mask_t bit;
- int message;
- int tidx = 0;
int result = ERROR_SUCCESS;
if (block == NULL)
@@ -337,10 +329,13 @@ YR_API int yr_rules_scan_mem_blocks(
context.mem_block = block;
context.entry_point = UNDEFINED;
context.objects_table = NULL;
+ context.matches_arena = NULL;
+ context.matching_strings_arena = NULL;
_yr_rules_lock(rules);
- bit = 1;
+ int tidx = 0;
+ tidx_mask_t bit = 1;
while (rules->tidx_mask & bit)
{
@@ -360,7 +355,12 @@ YR_API int yr_rules_scan_mem_blocks(
yr_set_tidx(tidx);
- result = yr_arena_create(1024, 0, &matches_arena);
+ result = yr_arena_create(1024, 0, &context.matches_arena);
+
+ if (result != ERROR_SUCCESS)
+ goto _exit;
+
+ result = yr_arena_create(8, 0, &context.matching_strings_arena);
if (result != ERROR_SUCCESS)
goto _exit;
@@ -370,10 +370,12 @@ YR_API int yr_rules_scan_mem_blocks(
if (result != ERROR_SUCCESS)
goto _exit;
- external = rules->externals_list_head;
+ YR_EXTERNAL_VARIABLE* external = rules->externals_list_head;
while (!EXTERNAL_VARIABLE_IS_NULL(external))
{
+ YR_OBJECT* object;
+
result = yr_object_from_external_variable(
external,
&object);
@@ -391,7 +393,7 @@ YR_API int yr_rules_scan_mem_blocks(
external++;
}
- start_time = time(NULL);
+ time_t start_time = time(NULL);
while (block != NULL)
{
@@ -408,13 +410,12 @@ YR_API int yr_rules_scan_mem_blocks(
block->size);
}
- result = yr_rules_scan_mem_block(
+ result = _yr_rules_scan_mem_block(
rules,
block,
- flags,
+ &context,
timeout,
- start_time,
- matches_arena);
+ start_time);
if (result != ERROR_SUCCESS)
goto _exit;
@@ -431,6 +432,8 @@ YR_API int yr_rules_scan_mem_blocks(
if (result != ERROR_SUCCESS)
goto _exit;
+ YR_RULE* rule;
+
yr_rules_foreach(rules, rule)
{
if (RULE_IS_GLOBAL(rule) && !(rule->t_flags[tidx] & RULE_TFLAGS_MATCH))
@@ -441,6 +444,8 @@ YR_API int yr_rules_scan_mem_blocks(
yr_rules_foreach(rules, rule)
{
+ int message;
+
if (rule->t_flags[tidx] & RULE_TFLAGS_MATCH &&
!(rule->ns->t_flags[tidx] & NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL))
{
@@ -470,15 +475,18 @@ YR_API int yr_rules_scan_mem_blocks(
_exit:
+ _yr_rules_clean_matches(rules, &context);
+
if (flags & SCAN_FLAGS_SHOW_MODULE_INFO)
yr_modules_print_data(&context);
yr_modules_unload_all(&context);
- _yr_rules_clean_matches(rules);
+ if (context.matches_arena != NULL)
+ yr_arena_destroy(context.matches_arena);
- if (matches_arena != NULL)
- yr_arena_destroy(matches_arena);
+ if (context.matching_strings_arena != NULL)
+ yr_arena_destroy(context.matching_strings_arena);
if (context.objects_table != NULL)
yr_hash_table_destroy(
diff --git a/libyara/scan.c b/libyara/scan.c
index 1fbc46e..c1f2bd0 100644
--- a/libyara/scan.c
+++ b/libyara/scan.c
@@ -31,7 +31,7 @@ limitations under the License.
typedef struct _CALLBACK_ARGS
{
YR_STRING* string;
- YR_ARENA* matches_arena;
+ YR_SCAN_CONTEXT* context;
uint8_t* data;
size_t data_size;
@@ -423,8 +423,8 @@ void _yr_scan_remove_match_from_list(
int _yr_scan_verify_chained_string_match(
- YR_ARENA* matches_arena,
YR_STRING* matching_string,
+ YR_SCAN_CONTEXT* context,
uint8_t* match_data,
size_t match_base,
size_t match_offset,
@@ -526,6 +526,18 @@ int _yr_scan_verify_chained_string_match(
match->prev = NULL;
match->next = NULL;
+ if (string->matches[tidx].count == 0)
+ {
+ // If this is the first match for the string, put the string in the
+ // list of strings whose flags needs to be cleared after the scan.
+
+ FAIL_ON_ERROR(yr_arena_write_data(
+ context->matching_strings_arena,
+ &string,
+ sizeof(string),
+ NULL));
+ }
+
FAIL_ON_ERROR(_yr_scan_add_match_to_list(
match, &string->matches[tidx]));
}
@@ -536,7 +548,7 @@ int _yr_scan_verify_chained_string_match(
else
{
FAIL_ON_ERROR(yr_arena_allocate_memory(
- matches_arena,
+ context->matches_arena,
sizeof(YR_MATCH),
(void**) &new_match));
@@ -606,8 +618,8 @@ int _yr_scan_match_callback(
if (STRING_IS_CHAIN_PART(string))
{
result = _yr_scan_verify_chained_string_match(
- callback_args->matches_arena,
string,
+ callback_args->context,
match_data,
callback_args->data_base,
match_offset,
@@ -616,8 +628,20 @@ int _yr_scan_match_callback(
}
else
{
+ if (string->matches[tidx].count == 0)
+ {
+ // If this is the first match for the string, put the string in the
+ // list of strings whose flags needs to be cleared after the scan.
+
+ FAIL_ON_ERROR(yr_arena_write_data(
+ callback_args->context->matching_strings_arena,
+ &string,
+ sizeof(string),
+ NULL));
+ }
+
result = yr_arena_allocate_memory(
- callback_args->matches_arena,
+ callback_args->context->matches_arena,
sizeof(YR_MATCH),
(void**) &new_match);
@@ -650,12 +674,12 @@ typedef int (*RE_EXEC_FUNC)(
int _yr_scan_verify_re_match(
+ YR_SCAN_CONTEXT* context,
YR_AC_MATCH* ac_match,
uint8_t* data,
size_t data_size,
size_t data_base,
- size_t offset,
- YR_ARENA* matches_arena)
+ size_t offset)
{
CALLBACK_ARGS callback_args;
RE_EXEC_FUNC exec;
@@ -708,10 +732,10 @@ int _yr_scan_verify_re_match(
return ERROR_SUCCESS;
callback_args.string = ac_match->string;
+ callback_args.context = context;
callback_args.data = data;
callback_args.data_size = data_size;
callback_args.data_base = data_base;
- callback_args.matches_arena = matches_arena;
callback_args.forward_matches = forward_matches;
callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string);
callback_args.tidx = yr_get_tidx();
@@ -747,12 +771,12 @@ int _yr_scan_verify_re_match(
int _yr_scan_verify_literal_match(
+ YR_SCAN_CONTEXT* context,
YR_AC_MATCH* ac_match,
uint8_t* data,
size_t data_size,
size_t data_base,
- size_t offset,
- YR_ARENA* matches_arena)
+ size_t offset)
{
int flags = 0;
int forward_matches = 0;
@@ -814,11 +838,11 @@ int _yr_scan_verify_literal_match(
if (STRING_IS_NO_CASE(string))
flags |= RE_FLAGS_NO_CASE;
+ callback_args.context = context;
callback_args.string = string;
callback_args.data = data;
callback_args.data_size = data_size;
callback_args.data_base = data_base;
- callback_args.matches_arena = matches_arena;
callback_args.forward_matches = forward_matches;
callback_args.full_word = STRING_IS_FULL_WORD(string);
callback_args.tidx = yr_get_tidx();
@@ -831,13 +855,12 @@ int _yr_scan_verify_literal_match(
int yr_scan_verify_match(
+ YR_SCAN_CONTEXT* context,
YR_AC_MATCH* ac_match,
uint8_t* data,
size_t data_size,
size_t data_base,
- size_t offset,
- YR_ARENA* matches_arena,
- int flags)
+ size_t offset)
{
YR_STRING* string = ac_match->string;
@@ -848,7 +871,7 @@ int yr_scan_verify_match(
if (data_size - offset <= 0)
return ERROR_SUCCESS;
- if (flags & SCAN_FLAGS_FAST_MODE &&
+ if (context->flags & SCAN_FLAGS_FAST_MODE &&
STRING_IS_SINGLE_MATCH(string) &&
STRING_FOUND(string))
return ERROR_SUCCESS;
@@ -860,12 +883,12 @@ int yr_scan_verify_match(
if (STRING_IS_LITERAL(string))
{
FAIL_ON_ERROR(_yr_scan_verify_literal_match(
- ac_match, data, data_size, data_base, offset, matches_arena));
+ context, ac_match, data, data_size, data_base, offset));
}
else
{
FAIL_ON_ERROR(_yr_scan_verify_re_match(
- ac_match, data, data_size, data_base, offset, matches_arena));
+ context, ac_match, data, data_size, data_base, offset));
}
#ifdef PROFILING_ENABLED
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list