[Forensics-changes] [yara] 178/415: Implemented warnings, fast scan mode, and list based nodes in Aho-Corasick automaton.
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:43:02 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit fd4a59d613d9c7cf9067083e48b5f6747647a7db
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Wed Jun 26 10:48:30 2013 +0000
Implemented warnings, fast scan mode, and list based nodes in Aho-Corasick automaton.
---
Makefile.am | 2 +-
libyara/Makefile.am | 2 +-
libyara/ahocorasick.c | 239 ++++++++++++++++++++++++++++++++++------------
libyara/arena.c | 4 +-
libyara/lex.c | 2 +
libyara/lex.l | 2 +
libyara/parser.c | 54 ++++++++++-
libyara/rules.c | 52 ++++++----
libyara/yara.h | 77 +++++++++++----
yara-python/yara-python.c | 23 +++--
yara.c | 64 ++++++++-----
yarac.c | 8 +-
12 files changed, 396 insertions(+), 133 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 8e16cfc..9e8466c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,4 +1,4 @@
-AM_CFLAGS=-g -O0
+AM_CFLAGS=-g -O3
# Build the library in the hand subdirectory first.
SUBDIRS = libyara
diff --git a/libyara/Makefile.am b/libyara/Makefile.am
index 0349a33..d72da7f 100644
--- a/libyara/Makefile.am
+++ b/libyara/Makefile.am
@@ -1,6 +1,6 @@
AM_YFLAGS = -d
-CFLAGS=-g -O0
+CFLAGS=-g -O3
ACLOCAL_AMFLAGS=-I m4
diff --git a/libyara/ahocorasick.c b/libyara/ahocorasick.c
index 9a818d7..6038ca8 100644
--- a/libyara/ahocorasick.c
+++ b/libyara/ahocorasick.c
@@ -25,6 +25,7 @@ limitations under the License.
#define MAX_TOKEN 4
+#define MAX_TABLE_BASED_STATES_DEPTH 1
#ifndef min
@@ -143,8 +144,70 @@ int _yr_ac_queue_is_empty(
}
+AC_STATE* _yr_ac_next_child(
+ AC_STATE* state,
+ int64_t* iterator)
+{
+ int i;
+ AC_TABLE_BASED_STATE* table_based_state;
+ AC_LIST_BASED_STATE* list_based_state;
+ AC_STATE_TRANSITION* transition;
+
+ if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+ {
+ for (i = (int) *iterator; i < 256; i++)
+ {
+ table_based_state = (AC_TABLE_BASED_STATE*) state;
+
+ if (table_based_state->transitions[i].state != NULL)
+ {
+ *iterator = i + 1;
+ return table_based_state->transitions[i].state;
+ }
+ }
+ }
+ else
+ {
+ transition = (AC_STATE_TRANSITION*) *iterator;
+
+ if (transition->next != NULL)
+ {
+ *iterator = (int64_t) transition->next;
+ return transition->next->state;
+ }
+ }
+
+ return NULL;
+}
+
+
+AC_STATE* _yr_ac_first_child(
+ AC_STATE* state,
+ int64_t* iterator)
+{
+ AC_LIST_BASED_STATE* list_based_state;
+
+ if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+ {
+ *iterator = 0;
+ return _yr_ac_next_child(state, iterator);
+ }
+ else
+ {
+ list_based_state = (AC_LIST_BASED_STATE*) state;
+
+ if (list_based_state->transitions != NULL)
+ {
+ *iterator = (int64_t) list_based_state->transitions;
+ return list_based_state->transitions->state;
+ }
+ }
+
+ return NULL;
+}
+
//
-// _yr_ac_next_state
+// yr_ac_next_state
//
// Given an automaton state and an input symbol, returns the new state
// after reading the input symbol.
@@ -157,11 +220,30 @@ int _yr_ac_queue_is_empty(
// Pointer to the next automaton state.
//
-AC_STATE* _yr_ac_next_state(
+inline AC_STATE* yr_ac_next_state(
AC_STATE* state,
uint8_t input)
{
- return state->transitions[input].state;
+ AC_STATE_TRANSITION* transition;
+
+ if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+ {
+ return ((AC_TABLE_BASED_STATE*) state)->transitions[input].state;
+ }
+ else
+ {
+ transition = ((AC_LIST_BASED_STATE*) state)->transitions;
+
+ while (transition != NULL)
+ {
+ if (transition->input == input)
+ return transition->state;
+
+ transition = transition->next;
+ }
+
+ return NULL;
+ }
}
@@ -187,33 +269,71 @@ AC_STATE* _yr_ac_create_state(
{
int result;
AC_STATE* new_state;
+ AC_LIST_BASED_STATE* list_based_state;
+ AC_TABLE_BASED_STATE* table_based_state;
+ AC_STATE_TRANSITION* new_transition;
- result = yr_arena_allocate_struct(
- arena,
- sizeof(AC_STATE),
- (void**) &new_state,
- offsetof(AC_STATE, failure),
- offsetof(AC_STATE, matches),
- EOL);
+ if (state->depth < MAX_TABLE_BASED_STATES_DEPTH)
+ {
+ result = yr_arena_allocate_struct(
+ arena,
+ sizeof(AC_TABLE_BASED_STATE),
+ (void**) &new_state,
+ offsetof(AC_TABLE_BASED_STATE, failure),
+ offsetof(AC_TABLE_BASED_STATE, matches),
+ EOL);
+ }
+ else
+ {
+ result = yr_arena_allocate_struct(
+ arena,
+ sizeof(AC_LIST_BASED_STATE),
+ (void**) &new_state,
+ offsetof(AC_LIST_BASED_STATE, failure),
+ offsetof(AC_LIST_BASED_STATE, matches),
+ offsetof(AC_LIST_BASED_STATE, transitions),
+ EOL);
+ }
if (result != ERROR_SUCCESS)
return NULL;
- result = yr_arena_make_relocatable(
- arena,
- state,
- offsetof(AC_STATE, transitions[input]),
- EOL);
+ if (state->depth <= MAX_TABLE_BASED_STATES_DEPTH)
+ {
+ result = yr_arena_make_relocatable(
+ arena,
+ state,
+ offsetof(AC_TABLE_BASED_STATE, transitions[input]),
+ EOL);
- if (result != ERROR_SUCCESS)
- return NULL;
+ if (result != ERROR_SUCCESS)
+ return NULL;
+
+ table_based_state = (AC_TABLE_BASED_STATE*) state;
+ table_based_state->transitions[input].state = new_state;
+ }
+ else
+ {
+ result = yr_arena_allocate_struct(
+ arena,
+ sizeof(AC_STATE_TRANSITION),
+ (void**) &new_transition,
+ offsetof(AC_STATE_TRANSITION, state),
+ offsetof(AC_STATE_TRANSITION, next),
+ EOL);
- state->transitions[input].state = new_state;
+ if (result != ERROR_SUCCESS)
+ return NULL;
- new_state->depth = state->depth + 1;
- new_state->matches = NULL;
+ list_based_state = (AC_LIST_BASED_STATE*) state;
+
+ new_transition->input = input;
+ new_transition->state = new_state;
+ new_transition->next = list_based_state->transitions;
+ list_based_state->transitions = new_transition;
+ }
- memset(new_state->transitions, 0, sizeof(new_state->transitions));
+ new_state->depth = state->depth + 1;
return new_state;
}
@@ -596,7 +716,7 @@ void _yr_ac_gen_tokens(
str = output_buffer;
memcpy(output_buffer, string->string, token_length);
- ((uint8_t*) output_buffer) += token_length;
+ output_buffer += token_length;
if (STRING_IS_NO_CASE(string))
{
@@ -663,6 +783,8 @@ void yr_ac_create_failure_links(
{
int i;
+ int64_t iterator;
+
AC_STATE* current_state;
AC_STATE* failure_state;
AC_STATE* temp_state;
@@ -683,13 +805,13 @@ void yr_ac_create_failure_links(
// Push root's children and set their failure link to root.
- for (i = 0; i < 256; i++)
+ state = _yr_ac_first_child(root_state, &iterator);
+
+ while (state != NULL)
{
- if (root_state->transitions[i].state != NULL)
- {
- _yr_ac_queue_push(&queue, root_state->transitions[i].state);
- root_state->transitions[i].state->failure = root_state;
- }
+ _yr_ac_queue_push(&queue, state);
+ state->failure = root_state;
+ state = _yr_ac_next_child(root_state, &iterator);
}
// Traverse the trie in BFS order calculating the failure link
@@ -714,19 +836,16 @@ void yr_ac_create_failure_links(
current_state->matches = root_state->matches;
}
- for (i = 0; i < 256; i++)
- {
- transition_state = current_state->transitions[i].state;
-
- if (transition_state == NULL)
- continue;
+ transition_state = _yr_ac_first_child(current_state, &iterator);
+ while (transition_state != NULL)
+ {
_yr_ac_queue_push(&queue, transition_state);
failure_state = current_state->failure;
while (1)
{
- temp_state = _yr_ac_next_state(failure_state, i);
+ temp_state = yr_ac_next_state(failure_state, i);
if (temp_state != NULL)
{
@@ -761,7 +880,10 @@ void yr_ac_create_failure_links(
}
}
} // while(1)
+
+ transition_state = _yr_ac_next_child(current_state, &iterator);
}
+
} // while(!__yr_ac_queue_is_empty(&queue))
}
@@ -791,10 +913,10 @@ int yr_ac_create_automaton(
result = yr_arena_allocate_struct(
arena,
- sizeof(AC_STATE),
+ sizeof(AC_TABLE_BASED_STATE),
(void**) &root_state,
- offsetof(AC_STATE, failure),
- offsetof(AC_STATE, matches),
+ offsetof(AC_TABLE_BASED_STATE, failure),
+ offsetof(AC_TABLE_BASED_STATE, matches),
EOL);
if (result != ERROR_SUCCESS)
@@ -805,8 +927,6 @@ int yr_ac_create_automaton(
root_state->depth = 0;
root_state->matches = NULL;
- memset(root_state->transitions, 0, sizeof(root_state->transitions));
-
return result;
}
@@ -820,7 +940,8 @@ int yr_ac_create_automaton(
int yr_ac_add_string(
ARENA* arena,
AC_AUTOMATON* automaton,
- STRING* string)
+ STRING* string,
+ int* min_token_length)
{
int result;
int token_length;
@@ -838,7 +959,7 @@ int yr_ac_add_string(
// for the worst case which is a "ascii wide nocase" text string.
tokens = yr_malloc(
- 2 * MAX_TOKEN * MAX_TOKEN * (2 * sizeof(int) + MAX_TOKEN) + sizeof(int));
+ 2 * (1 << MAX_TOKEN) * (2 * sizeof(int) + MAX_TOKEN) + sizeof(int));
if (tokens == NULL)
return ERROR_INSUFICIENT_MEMORY;
@@ -857,6 +978,8 @@ int yr_ac_add_string(
if (token_length == 0)
{
+ *min_token_length = 0;
+
// No token could be extracted from the string, put the string in the
// automaton's root state. This is far from ideal, because the string will
// be tried at every data offset during scanning.
@@ -881,31 +1004,21 @@ int yr_ac_add_string(
{
// For each token create the states in the automaton.
+ *min_token_length = MAX_TOKEN;
+
while (token_length != 0)
{
+ if (token_length < *min_token_length)
+ *min_token_length = token_length;
+
state = automaton->root;
token_backtrack = *((int*) tokens_cursor);
tokens_cursor += sizeof(int);
- /*if (token_length < 2)
- {
- printf("%s\n", string->string);
- printf("%s\n", string->identifier);
- for (i = 0; i < token_length; i++)
- printf("%02X", *(tokens_cursor + i));
-
- printf("\n");
-
- tokens_cursor += token_length;
- token_length = *((int*) tokens_cursor);
- tokens_cursor += sizeof(int);
- continue;
- }*/
-
for(i = 0; i < token_length; i++)
{
- next_state = _yr_ac_next_state(
+ next_state = yr_ac_next_state(
state,
*tokens_cursor);
@@ -970,8 +1083,10 @@ void _yr_ac_print_automaton_state(
{
int i;
char* identifier;
+ int64_t iterator;
STRING* string;
AC_MATCH* match;
+ AC_STATE* child_state;
for (i = 0; i < state->depth; i++)
printf(" ");
@@ -988,10 +1103,12 @@ void _yr_ac_print_automaton_state(
printf("\n");
- for (i = 0; i < 256; i++)
+ child_state = _yr_ac_first_child(state, &iterator);
+
+ while(child_state != NULL)
{
- if (state->transitions[i].state != NULL)
- _yr_ac_print_automaton_state(state->transitions[i].state);
+ _yr_ac_print_automaton_state(child_state);
+ child_state = _yr_ac_next_child(state, &iterator);
}
}
diff --git a/libyara/arena.c b/libyara/arena.c
index 313708d..3bae40f 100644
--- a/libyara/arena.c
+++ b/libyara/arena.c
@@ -115,8 +115,8 @@ ARENA_PAGE* _yr_arena_page_for_address(
while (page != NULL)
{
- if (address >= page->address &&
- address < page->address + page->used)
+ if ((uint8_t*) address >= page->address &&
+ (uint8_t*) address < page->address + page->used)
return page;
page = page->next;
}
diff --git a/libyara/lex.c b/libyara/lex.c
index 39069f1..35b2dcc 100644
--- a/libyara/lex.c
+++ b/libyara/lex.c
@@ -2905,6 +2905,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
if (compiler->error_report_function != NULL)
{
compiler->error_report_function(
+ YARA_ERROR_LEVEL_ERROR,
file_name,
compiler->last_error_line,
error_message);
@@ -2919,6 +2920,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
yr_compiler_get_error_message(compiler, message, sizeof(message));
compiler->error_report_function(
+ YARA_ERROR_LEVEL_ERROR,
file_name,
compiler->last_error_line,
message);
diff --git a/libyara/lex.l b/libyara/lex.l
index c42ffbb..89f48c7 100644
--- a/libyara/lex.l
+++ b/libyara/lex.l
@@ -523,6 +523,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
if (compiler->error_report_function != NULL)
{
compiler->error_report_function(
+ YARA_ERROR_LEVEL_ERROR,
file_name,
compiler->last_error_line,
error_message);
@@ -537,6 +538,7 @@ void yyerror(yyscan_t yyscanner, const char *error_message)
yr_compiler_get_error_message(compiler, message, sizeof(message));
compiler->error_report_function(
+ YARA_ERROR_LEVEL_ERROR,
file_name,
compiler->last_error_line,
message);
diff --git a/libyara/parser.c b/libyara/parser.c
index ccf6ec0..b836196 100644
--- a/libyara/parser.c
+++ b/libyara/parser.c
@@ -488,6 +488,10 @@ STRING* yr_parser_reduce_string_declaration(
SIZED_STRING* str)
{
int error_offset;
+ int min_token_length;
+ char* file_name;
+ char warning_message[512];
+
STRING* string;
YARA_COMPILER* compiler = yyget_extra(yyscanner);
@@ -517,6 +521,14 @@ STRING* yr_parser_reduce_string_declaration(
if (!(flags & STRING_FLAGS_WIDE))
flags |= STRING_FLAGS_ASCII;
+ // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
+ // a single match for the string is enough. This is true in
+ // most cases, except when the string count (#) and string offset (@)
+ // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
+ // initially, and unmarked later if required.
+
+ flags |= STRING_FLAGS_SINGLE_MATCH;
+
string->flags = flags;
string->mask = NULL;
string->re.regexp = NULL;
@@ -569,7 +581,30 @@ STRING* yr_parser_reduce_string_declaration(
compiler->last_result = yr_ac_add_string(
compiler->automaton_arena,
compiler->automaton,
- string);
+ string,
+ &min_token_length);
+
+ if (compiler->file_name_stack_ptr > 0)
+ file_name = compiler->file_name_stack[
+ compiler->file_name_stack_ptr - 1];
+ else
+ file_name = NULL;
+
+ if (min_token_length < 2 && compiler->error_report_function != NULL)
+ {
+ snprintf(
+ warning_message,
+ sizeof(warning_message),
+ "%s is slowing down scanning%s",
+ string->identifier,
+ min_token_length == 0 ? " (critical!)" : "");
+
+ compiler->error_report_function(
+ YARA_ERROR_LEVEL_WARNING,
+ file_name,
+ yyget_lineno(yyscanner),
+ warning_message);
+ }
if (compiler->last_result != ERROR_SUCCESS)
return NULL;
@@ -689,6 +724,20 @@ int yr_parser_reduce_string_identifier(
{
yr_parser_emit(yyscanner, PUSH_A, NULL);
yr_parser_emit(yyscanner, instruction, NULL);
+
+ if (instruction != SFOUND)
+ {
+ string = compiler->current_rule_strings;
+
+ while(!STRING_IS_NULL(string))
+ {
+ string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
+ string = yr_arena_next_address(
+ compiler->strings_arena,
+ string,
+ sizeof(STRING));
+ }
+ }
}
else
{
@@ -707,6 +756,9 @@ int yr_parser_reduce_string_identifier(
PTR_TO_UINT64(string),
NULL);
+ if (instruction != SFOUND)
+ string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
+
yr_parser_emit(yyscanner, instruction, NULL);
string->flags |= STRING_FLAGS_REFERENCED;
diff --git a/libyara/rules.c b/libyara/rules.c
index 6d25409..526fb0f 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -595,7 +595,8 @@ void yr_rules_free_matches(
int yr_rules_scan_mem_block(
YARA_RULES* rules,
uint8_t* data,
- size_t data_size)
+ size_t data_size,
+ int fast_scan_mode)
{
AC_STATE* next_state;
@@ -615,25 +616,30 @@ int yr_rules_scan_mem_block(
{
if (i >= ac_match->backtrack)
{
- result = _yr_scan_verify_match(
- ac_match,
- data,
- data_size,
- i - ac_match->backtrack);
-
- if (result != ERROR_SUCCESS)
- return result;
+ if (!(fast_scan_mode &&
+ ac_match->string->flags & STRING_FLAGS_FOUND &&
+ ac_match->string->flags & STRING_FLAGS_SINGLE_MATCH))
+ {
+ result = _yr_scan_verify_match(
+ ac_match,
+ data,
+ data_size,
+ i - ac_match->backtrack);
+
+ if (result != ERROR_SUCCESS)
+ return result;
+ }
}
ac_match = ac_match->next;
}
- next_state = current_state->transitions[data[i]].state;
+ next_state = yr_ac_next_state(current_state, data[i]);
while (next_state == NULL && current_state->depth > 0)
{
current_state = current_state->failure;
- next_state = current_state->transitions[data[i]].state;
+ next_state = yr_ac_next_state(current_state, data[i]);
}
if (next_state != NULL)
@@ -667,7 +673,8 @@ int yr_rules_scan_mem_blocks(
MEMORY_BLOCK* block,
int scanning_process_memory,
YARACALLBACK callback,
- void* user_data)
+ void* user_data,
+ int fast_scan_mode)
{
RULE* rule;
EVALUATION_CONTEXT context;
@@ -699,7 +706,8 @@ int yr_rules_scan_mem_blocks(
result = yr_rules_scan_mem_block(
rules,
block->data,
- block->size);
+ block->size,
+ fast_scan_mode);
if (result != ERROR_SUCCESS)
return result;
@@ -754,7 +762,8 @@ int yr_rules_scan_mem(
uint8_t* buffer,
size_t buffer_size,
YARACALLBACK callback,
- void* user_data)
+ void* user_data,
+ int fast_scan_mode)
{
MEMORY_BLOCK block;
@@ -768,7 +777,8 @@ int yr_rules_scan_mem(
&block,
FALSE,
callback,
- user_data);
+ user_data,
+ fast_scan_mode);
}
@@ -776,7 +786,8 @@ int yr_rules_scan_file(
YARA_RULES* rules,
const char* filename,
YARACALLBACK callback,
- void* user_data)
+ void* user_data,
+ int fast_scan_mode)
{
MAPPED_FILE mfile;
int result;
@@ -790,7 +801,8 @@ int yr_rules_scan_file(
mfile.data,
mfile.size,
callback,
- user_data);
+ user_data,
+ fast_scan_mode);
yr_filemap_unmap(&mfile);
}
@@ -803,7 +815,8 @@ int yr_rules_scan_proc(
YARA_RULES* rules,
int pid,
YARACALLBACK callback,
- void* user_data)
+ void* user_data,
+ int fast_scan_mode)
{
MEMORY_BLOCK* first_block;
MEMORY_BLOCK* next_block;
@@ -819,7 +832,8 @@ int yr_rules_scan_proc(
first_block,
TRUE,
callback,
- user_data);
+ user_data,
+ fast_scan_mode);
block = first_block;
diff --git a/libyara/yara.h b/libyara/yara.h
index 2acac0e..5603ade 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -130,17 +130,17 @@ limitations under the License.
#define CALLBACK_ABORT 1
#define CALLBACK_ERROR 2
-#define STRING_FLAGS_FOUND 0x01
-#define STRING_FLAGS_REFERENCED 0x02
-#define STRING_FLAGS_HEXADECIMAL 0x04
-#define STRING_FLAGS_NO_CASE 0x08
-#define STRING_FLAGS_ASCII 0x10
-#define STRING_FLAGS_WIDE 0x20
-#define STRING_FLAGS_REGEXP 0x40
-#define STRING_FLAGS_FULL_WORD 0x80
-#define STRING_FLAGS_ANONYMOUS 0x100
-#define STRING_FLAGS_FAST_MATCH 0x200
-#define STRING_FLAGS_NULL 0x1000
+#define STRING_FLAGS_FOUND 0x01
+#define STRING_FLAGS_REFERENCED 0x02
+#define STRING_FLAGS_HEXADECIMAL 0x04
+#define STRING_FLAGS_NO_CASE 0x08
+#define STRING_FLAGS_ASCII 0x10
+#define STRING_FLAGS_WIDE 0x20
+#define STRING_FLAGS_REGEXP 0x40
+#define STRING_FLAGS_FULL_WORD 0x80
+#define STRING_FLAGS_ANONYMOUS 0x100
+#define STRING_FLAGS_SINGLE_MATCH 0x200
+#define STRING_FLAGS_NULL 0x1000
#define STRING_IS_HEX(x) \
(((x)->flags) & STRING_FLAGS_HEXADECIMAL)
@@ -319,11 +319,41 @@ typedef struct _AC_STATE
DECLARE_REFERENCE(struct _AC_STATE*, failure);
DECLARE_REFERENCE(AC_MATCH*, matches);
- DECLARE_REFERENCE(struct _AC_STATE*, state) transitions[256];
} AC_STATE;
+typedef struct _AC_STATE_TRANSITION
+{
+ uint8_t input;
+ DECLARE_REFERENCE(AC_STATE*, state);
+ DECLARE_REFERENCE(struct _AC_STATE_TRANSITION*, next);
+
+} AC_STATE_TRANSITION;
+
+
+typedef struct _AC_TABLE_BASED_STATE
+{
+ int8_t depth;
+
+ DECLARE_REFERENCE(AC_STATE*, failure);
+ DECLARE_REFERENCE(AC_MATCH*, matches);
+ DECLARE_REFERENCE(AC_STATE*, state) transitions[256];
+
+} AC_TABLE_BASED_STATE;
+
+
+typedef struct _AC_LIST_BASED_STATE
+{
+ int8_t depth;
+
+ DECLARE_REFERENCE(AC_STATE*, failure);
+ DECLARE_REFERENCE(AC_MATCH*, matches);
+ DECLARE_REFERENCE(AC_STATE_TRANSITION*, transitions);
+
+} AC_LIST_BASED_STATE;
+
+
typedef struct _AC_AUTOMATON
{
DECLARE_REFERENCE(AC_STATE*, root);
@@ -362,10 +392,14 @@ typedef struct _HASH_TABLE
} HASH_TABLE;
+#define YARA_ERROR_LEVEL_ERROR 0
+#define YARA_ERROR_LEVEL_WARNING 1
+
typedef void (*YARAREPORT)(
+ int error_level,
const char* file_name,
int line_number,
- const char* error_message);
+ const char* message);
typedef int (*YARACALLBACK)(
@@ -528,21 +562,24 @@ int yr_rules_scan_mem(
uint8_t* buffer,
size_t buffer_size,
YARACALLBACK callback,
- void* user_data);
+ void* user_data,
+ int fast_scan_mode);
int yr_rules_scan_file(
YARA_RULES* rules,
const char* filename,
YARACALLBACK callback,
- void* user_data);
+ void* user_data,
+ int fast_scan_mode);
int yr_rules_scan_proc(
YARA_RULES* rules,
int pid,
YARACALLBACK callback,
- void* user_data);
+ void* user_data,
+ int fast_scan_mode);
int yr_rules_save(
@@ -585,7 +622,13 @@ int yr_ac_create_automaton(
int yr_ac_add_string(
ARENA* arena,
AC_AUTOMATON* automaton,
- STRING* string);
+ STRING* string,
+ int* min_token_length);
+
+
+AC_STATE* yr_ac_next_state(
+ AC_STATE* state,
+ uint8_t input);
void yr_ac_create_failure_links(
diff --git a/yara-python/yara-python.c b/yara-python/yara-python.c
index 6508676..1335fc5 100644
--- a/yara-python/yara-python.c
+++ b/yara-python/yara-python.c
@@ -743,7 +743,7 @@ static PyObject * Rules_match(
PyObject *keywords)
{
static char *kwlist[] = {
- "filepath", "pid", "data", "externals", "callback", NULL};
+ "filepath", "pid", "data", "externals", "callback", "fast", NULL};
char* filepath = NULL;
char* data = NULL;
@@ -751,8 +751,10 @@ static PyObject * Rules_match(
int pid = 0;
int length;
int error;
+ int fast_mode = FALSE;
PyObject *externals = NULL;
+ PyObject *fast = NULL;
Rules* object = (Rules*) self;
CALLBACK_DATA callback_data;
@@ -763,14 +765,15 @@ static PyObject * Rules_match(
if (PyArg_ParseTupleAndKeywords(
args,
keywords,
- "|sis#OO",
+ "|sis#OOO",
kwlist,
&filepath,
&pid,
&data,
&length,
&externals,
- &callback_data.callback))
+ &callback_data.callback,
+ &fast))
{
if (externals != NULL)
{
@@ -801,6 +804,11 @@ static PyObject * Rules_match(
}
}
+ if (fast != NULL)
+ {
+ fast_mode = (PyObject_IsTrue(fast) == 1);
+ }
+
if (filepath != NULL)
{
callback_data.matches = PyList_New(0);
@@ -811,7 +819,8 @@ static PyObject * Rules_match(
object->rules,
filepath,
yara_callback,
- &callback_data);
+ &callback_data,
+ fast_mode);
Py_END_ALLOW_THREADS
@@ -836,7 +845,8 @@ static PyObject * Rules_match(
(unsigned char*) data,
(unsigned int) length,
yara_callback,
- &callback_data);
+ &callback_data,
+ fast_mode);
Py_END_ALLOW_THREADS
@@ -860,7 +870,8 @@ static PyObject * Rules_match(
object->rules,
pid,
yara_callback,
- &callback_data);
+ &callback_data,
+ fast_mode);
Py_END_ALLOW_THREADS
diff --git a/yara.c b/yara.c
index 4a8903c..5394416 100644
--- a/yara.c
+++ b/yara.c
@@ -51,6 +51,7 @@ int show_specified_tags = FALSE;
int show_specified_rules = FALSE;
int show_strings = FALSE;
int show_meta = FALSE;
+int fast_scan = FALSE;
int negate = FALSE;
int count = 0;
int limit = 0;
@@ -94,29 +95,27 @@ TAG* specified_tags_list = NULL;
IDENTIFIER* specified_rules_list = NULL;
EXTERNAL* externals_list = NULL;
-
-////////////////////////////////////////////////////////////////////////////////////////////////
+#define USAGE \
+"usage: yara [OPTION]... RULES_FILE FILE | PID\n"\
+"options:\n"\
+" -t <tag> only print rules tagged as <tag>.\n"\
+" -i <identifier> only print rules named <identifier>.\n"\
+" -n only print not satisfied rules (negate).\n"\
+" -g print tags.\n"\
+" -m print metadata.\n"\
+" -s print matching strings.\n"\
+" -l <number> abort scanning after matching <number> rules.\n"\
+" -d <identifier>=<value> define external variable.\n"\
+" -r recursively search directories.\n"\
+" -v show version information.\n"
void show_help()
{
- printf("usage: yara [OPTION]... RULES_FILE FILE | PID\n");
- printf("options:\n");
- printf(" -t <tag> print rules tagged as <tag> and ignore the rest. Can be used more than once.\n");
- printf(" -i <identifier> print rules named <identifier> and ignore the rest. Can be used more than once.\n");
- printf(" -n print only not satisfied rules (negate).\n");
- printf(" -g print tags.\n");
- printf(" -m print metadata.\n");
- printf(" -s print matching strings.\n");
- printf(" -l <number> abort scanning after a <number> of rules matched.\n");
- printf(" -d <identifier>=<value> define external variable.\n");
- printf(" -r recursively search directories.\n");
- printf(" -v show version information.\n");
+ printf(USAGE);
printf("\nReport bugs to: <%s>\n", PACKAGE_BUGREPORT);
}
-////////////////////////////////////////////////////////////////////////////////////////////////
-
int is_numeric(
const char *str)
{
@@ -172,8 +171,12 @@ int scan_dir(
if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
{
- //printf("Processing %s...\n", FindFileData.cFileName);
- result = yr_rules_scan_file(rules, full_path, callback, full_path);
+ result = yr_rules_scan_file(
+ rules,
+ full_path,
+ callback,
+ full_path,
+ TRUE);
}
else if (recursive && FindFileData.cFileName[0] != '.' )
{
@@ -235,7 +238,12 @@ int scan_dir(
{
if(S_ISREG(st.st_mode))
{
- result = yr_rules_scan_file(rules, full_path, callback, full_path);
+ result = yr_rules_scan_file(
+ rules,
+ full_path,
+ callback,
+ full_path,
+ fast_scan);
}
else if(recursive && S_ISDIR(st.st_mode) && de->d_name[0] != '.')
{
@@ -500,6 +508,10 @@ int process_cmd_line(
show_strings = TRUE;
break;
+ case 'f':
+ fast_scan = TRUE;
+ break;
+
case 'n':
negate = TRUE;
break;
@@ -607,11 +619,15 @@ int process_cmd_line(
}
void report_error(
+ int error_level,
const char* file_name,
int line_number,
- const char* error_message)
+ const char* message)
{
- fprintf(stderr, "%s:%d: %s\n", file_name, line_number, error_message);
+ if (error_level == YARA_ERROR_LEVEL_ERROR)
+ fprintf(stderr, "%s(%d): error: %s\n", file_name, line_number, message);
+ else
+ fprintf(stderr, "%s(%d): warning: %s\n", file_name, line_number, message);
}
@@ -780,7 +796,8 @@ int main(
rules,
pid,
callback,
- (void*) argv[argc - 1]);
+ (void*) argv[argc - 1],
+ fast_scan);
}
else if (is_directory(argv[argc - 1]))
{
@@ -796,7 +813,8 @@ int main(
rules,
argv[argc - 1],
callback,
- (void*) argv[argc - 1]);
+ (void*) argv[argc - 1],
+ fast_scan);
}
switch (result)
diff --git a/yarac.c b/yarac.c
index 603eca1..0d57c12 100644
--- a/yarac.c
+++ b/yarac.c
@@ -136,11 +136,15 @@ int process_cmd_line(
void report_error(
+ int error_level,
const char* file_name,
int line_number,
- const char* error_message)
+ const char* message)
{
- fprintf(stderr, "%s:%d: %s\n", file_name, line_number, error_message);
+ if (error_level == YARA_ERROR_LEVEL_ERROR)
+ fprintf(stderr, "%s(%d): error: %s\n", file_name, line_number, message);
+ else
+ fprintf(stderr, "%s(%d): warning: %s\n", file_name, line_number, message);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list