[Forensics-changes] [yara] 260/415: Rename RE_STACK for RE_FIBER_DATA and some readability improvements
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:43:12 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit 7e0932f8214082e0ea5c9257b530b116b6bf5c0b
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Thu Nov 28 18:27:10 2013 +0100
Rename RE_STACK for RE_FIBER_DATA and some readability improvements
---
libyara/re.c | 382 +++++++++++++++++++++++++++++------------------------------
1 file changed, 188 insertions(+), 194 deletions(-)
diff --git a/libyara/re.c b/libyara/re.c
index 713a862..2df9438 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -56,35 +56,35 @@ order to avoid confusion with operating system threads.
// Each fiber has an associated stack, which is used by
// PUSH, POP and JNZ
-typedef struct _RE_STACK
+typedef struct _RE_FIBER_DATA
{
- int top;
- uint16_t items[MAX_RE_STACK];
+ int stack_top;
+ uint16_t stack[MAX_RE_STACK];
- struct _RE_STACK* next;
- struct _RE_STACK* prev;
+ struct _RE_FIBER_DATA* next;
+ struct _RE_FIBER_DATA* prev;
-} RE_STACK;
+} RE_FIBER_DATA;
// Stacks are allocated as needed, and freed stacks are kept in
// a pool for later re-use.
-typedef struct _RE_STACK_POOL
+typedef struct _RE_FIBER_DATA_POOL
{
- RE_STACK* free;
- RE_STACK* used;
+ RE_FIBER_DATA* free;
+ RE_FIBER_DATA* used;
-} RE_STACK_POOL;
+} RE_FIBER_DATA_POOL;
// A fiber is described by its current instruction pointer and
-// its stack.
+// its fiber_data.
typedef struct _RE_FIBER
{
uint8_t* ip;
- RE_STACK* stack;
+ RE_FIBER_DATA* fiber_data;
} RE_FIBER;
@@ -101,7 +101,7 @@ typedef struct _RE_THREAD_STORAGE
{
RE_FIBER_LIST list1;
RE_FIBER_LIST list2;
- RE_STACK_POOL stack_pool;
+ RE_FIBER_DATA_POOL fiber_data_pool;
} RE_THREAD_STORAGE;
@@ -168,8 +168,8 @@ int yr_re_finalize()
int yr_re_finalize_thread()
{
- RE_STACK* stack;
- RE_STACK* next_stack;
+ RE_FIBER_DATA* fiber_data;
+ RE_FIBER_DATA* next_fiber_data;
RE_THREAD_STORAGE* storage;
#ifdef WIN32
@@ -180,13 +180,13 @@ int yr_re_finalize_thread()
if (storage != NULL)
{
- stack = storage->stack_pool.free;
+ fiber_data = storage->fiber_data_pool.free;
- while (stack != NULL)
+ while (fiber_data != NULL)
{
- next_stack = stack->next;
- yr_free(stack);
- stack = next_stack;
+ next_fiber_data = fiber_data->next;
+ yr_free(fiber_data);
+ fiber_data = next_fiber_data;
}
yr_free(storage);
@@ -851,80 +851,81 @@ int yr_re_emit_code(
}
-RE_STACK* _yr_re_alloc_stack(
- RE_STACK_POOL* pool)
+RE_FIBER_DATA* _yr_re_alloc_fiber_data(
+ RE_FIBER_DATA_POOL* pool)
{
- RE_STACK* stack;
+ RE_FIBER_DATA* fiber_data;
if (pool->free != NULL)
{
- stack = pool->free;
- pool->free = stack->next;
+ fiber_data = pool->free;
+ pool->free = fiber_data->next;
if (pool->free != NULL)
pool->free->prev = NULL;
}
else
{
- stack = yr_malloc(sizeof(RE_STACK));
+ fiber_data = yr_malloc(sizeof(RE_FIBER_DATA));
}
- stack->top = -1;
- stack->prev = NULL;
+ fiber_data->pre_matched = 0;
+ fiber_data->stack_top = -1;
+ fiber_data->prev = NULL;
if (pool->used != NULL)
- pool->used->prev = stack;
+ pool->used->prev = fiber_data;
- stack->next = pool->used;
- pool->used = stack;
+ fiber_data->next = pool->used;
+ pool->used = fiber_data;
- return stack;
+ return fiber_data;
}
-RE_STACK* _yr_re_clone_stack(
- RE_STACK* stack,
- RE_STACK_POOL* pool)
+RE_FIBER_DATA* _yr_re_clone_fiber_data(
+ RE_FIBER_DATA* fiber_data,
+ RE_FIBER_DATA_POOL* pool)
{
- RE_STACK* clon;
+ RE_FIBER_DATA* clon;
int i;
- if (stack == NULL)
+ if (fiber_data == NULL)
return NULL;
- clon = _yr_re_alloc_stack(pool);
- clon->top = stack->top;
+ clon = _yr_re_alloc_fiber_data(pool);
+ clon->stack_top = fiber_data->stack_top;
- for (i = 0; i < clon->top; i++)
- clon->items[i] = stack->items[i];
+ for (i = 0; i < clon->stack_top; i++)
+ clon->stack[i] = fiber_data->stack[i];
return clon;
}
-void _yr_re_free_stack(
- RE_STACK* stack,
- RE_STACK_POOL* pool)
+void _yr_re_free_fiber_data(
+ RE_FIBER_DATA* fiber_data,
+ RE_FIBER_DATA_POOL* pool)
{
- if (stack == NULL)
+ if (fiber_data == NULL)
return;
- if (stack->prev != NULL)
- stack->prev->next = stack->next;
+ if (pool->used == fiber_data)
+ pool->used = fiber_data->next;
- if (stack->next != NULL)
- stack->next->prev = stack->prev;
+ if (fiber_data->prev != NULL)
+ fiber_data->prev->next = fiber_data->next;
- stack->next = pool->free;
+ if (fiber_data->next != NULL)
+ fiber_data->next->prev = fiber_data->prev;
- if (pool->free != NULL)
- pool->free->prev = stack;
+ fiber_data->next = pool->free;
- pool->free = stack;
- stack->prev = NULL;
+ if (pool->free != NULL)
+ pool->free->prev = fiber_data;
- if (pool->used == stack)
- pool->used = NULL;
+ pool->free = fiber_data;
+ fiber_data->prev = NULL;
}
@@ -946,16 +947,17 @@ void _yr_re_add_fiber(
RE_FIBER_LIST* fibers,
RE_THREAD_STORAGE* storage,
uint8_t* ip,
- RE_STACK* stack)
+ uint8_t* input,
+ RE_FIBER_DATA* fiber_data)
{
- RE_STACK* new_stack;
+ RE_FIBER_DATA* new_fiber_data;
uint16_t counter_index;
int16_t jmp_offset;
if (_yr_re_fiber_exists(fibers, ip))
{
- _yr_re_free_stack(stack, &storage->stack_pool);
+ _yr_re_free_fiber_data(fiber_data, &storage->fiber_data_pool);
return;
}
@@ -963,56 +965,63 @@ void _yr_re_add_fiber(
{
case RE_OPCODE_JUMP:
jmp_offset = *(int16_t*)(ip + 1);
- _yr_re_add_fiber(fibers, storage, ip + jmp_offset, stack);
+ _yr_re_add_fiber(fibers, storage, ip + jmp_offset, input, fiber_data);
break;
case RE_OPCODE_JNZ:
jmp_offset = *(int16_t*)(ip + 1);
- stack->items[stack->top]--;
+ fiber_data->stack[fiber_data->stack_top]--;
- if (stack->items[stack->top] > 0)
- _yr_re_add_fiber(fibers, storage, ip + jmp_offset, stack);
+ if (fiber_data->stack[fiber_data->stack_top] > 0)
+ _yr_re_add_fiber(fibers, storage, ip + jmp_offset, input, fiber_data);
else
- _yr_re_add_fiber(fibers, storage, ip + 3, stack);
+ _yr_re_add_fiber(fibers, storage, ip + 3, input, fiber_data);
break;
case RE_OPCODE_PUSH:
- if (stack == NULL)
- stack = _yr_re_alloc_stack(&storage->stack_pool);
- stack->items[++stack->top] = *(uint16_t*)(ip + 1);
- _yr_re_add_fiber(fibers, storage, ip + 3, stack);
+ if (fiber_data == NULL)
+ fiber_data = _yr_re_alloc_fiber_data(&storage->fiber_data_pool);
+
+ fiber_data->stack[++fiber_data->stack_top] = *(uint16_t*)(ip + 1);
+ _yr_re_add_fiber(fibers, storage, ip + 3, input, fiber_data);
break;
case RE_OPCODE_POP:
- stack->top--;
- if (stack->top == -1)
+ fiber_data->stack_top--;
+ if (fiber_data->stack_top == -1)
{
- _yr_re_free_stack(stack, &storage->stack_pool);
- stack = NULL;
+ _yr_re_free_fiber_data(fiber_data, &storage->fiber_data_pool);
+ fiber_data = NULL;
}
- _yr_re_add_fiber(fibers, storage, ip + 1, stack);
+ _yr_re_add_fiber(fibers, storage, ip + 1, input, fiber_data);
break;
case RE_OPCODE_SPLIT_A:
jmp_offset = *(int16_t*)(ip + 1);
- new_stack = _yr_re_clone_stack(stack, &storage->stack_pool);
- _yr_re_add_fiber(fibers, storage, ip + 3, stack);
- _yr_re_add_fiber(fibers, storage, ip + jmp_offset, new_stack);
+ new_fiber_data = _yr_re_clone_fiber_data(
+ fiber_data, &storage->fiber_data_pool);
+
+ _yr_re_add_fiber(
+ fibers, storage, ip + 3, input, fiber_data);
+ _yr_re_add_fiber(
+ fibers, storage, ip + jmp_offset, input, new_fiber_data);
break;
case RE_OPCODE_SPLIT_B:
jmp_offset = *(int16_t*)(ip + 1);
- new_stack = _yr_re_clone_stack(stack, &storage->stack_pool);
- _yr_re_add_fiber(fibers, storage, ip + jmp_offset, stack);
- _yr_re_add_fiber(fibers, storage, ip + 3, new_stack);
+ new_fiber_data = _yr_re_clone_fiber_data(
+ fiber_data, &storage->fiber_data_pool);
+
+ _yr_re_add_fiber(fibers, storage, ip + jmp_offset, input, fiber_data);
+ _yr_re_add_fiber(fibers, storage, ip + 3, input, new_fiber_data);
break;
default:
assert(fibers->count < MAX_RE_FIBERS);
fibers->items[fibers->count].ip = ip;
- fibers->items[fibers->count].stack = stack;
+ fibers->items[fibers->count].fiber_data = fiber_data;
fibers->count++;
}
}
@@ -1046,24 +1055,25 @@ void _yr_re_add_fiber(
int yr_re_exec(
uint8_t* code,
- uint8_t* input,
+ uint8_t* input_data,
size_t input_size,
int flags,
RE_MATCH_CALLBACK_FUNC callback,
void* callback_args)
{
- size_t i, t;
+ size_t i;
uint8_t* ip;
- uint8_t* current_input;
+ uint8_t* input;
uint8_t mask;
uint8_t value;
RE_THREAD_STORAGE* storage;
- RE_FIBER_LIST* current_fibers;
+ RE_FIBER_LIST* fibers;
RE_FIBER_LIST* next_fibers;
- RE_STACK* stack;
+ RE_FIBER_DATA* fiber_data;
- int idx;
+ int fiber_idx;
+ int j;
int match;
char character;
int character_size;
@@ -1082,8 +1092,8 @@ int yr_re_exec(
if (storage == NULL)
return ERROR_INSUFICIENT_MEMORY;
- storage->stack_pool.free = NULL;
- storage->stack_pool.used = NULL;
+ storage->fiber_data_pool.free = NULL;
+ storage->fiber_data_pool.used = NULL;
#ifdef WIN32
TlsSetValue(thread_storage_key, storage);
@@ -1092,7 +1102,8 @@ int yr_re_exec(
#endif
}
- current_fibers = &storage->list1;
+ input = input_data;
+ fibers = &storage->list1;
next_fibers = &storage->list2;
if (flags & RE_FLAGS_WIDE)
@@ -1100,42 +1111,37 @@ int yr_re_exec(
else
character_size = 1;
- current_fibers->count = 0;
+ fibers->count = 0;
next_fibers->count = 0;
// Create the initial execution fiber starting at the provided the beginning
- // of the provided code. The stack is initially NULL and will be created
+ // of the provided code. The fiber data is initially NULL and will be created
// dynamically when the first PUSH instruction is found.
- _yr_re_add_fiber(current_fibers, storage, code, NULL);
-
- current_input = input;
+ _yr_re_add_fiber(fibers, storage, code, input, NULL);
for (i = 0; i < min(input_size, RE_SCAN_LIMIT); i += character_size)
{
if ((flags & RE_FLAGS_SCAN) &&
!(flags & RE_FLAGS_START_ANCHORED))
- _yr_re_add_fiber(current_fibers, storage, code, NULL);
+ _yr_re_add_fiber(fibers, storage, code, input, NULL);
- if (current_fibers->count == 0)
+ if (fibers->count == 0)
break;
- for(t = 0; t < current_fibers->count; t++)
+ for(fiber_idx = 0; fiber_idx < fibers->count; fiber_idx++)
{
- ip = current_fibers->items[t].ip;
- stack = current_fibers->items[t].stack;
+ ip = fibers->items[fiber_idx].ip;
+ fiber_data = fibers->items[fiber_idx].fiber_data;
switch(*ip)
{
case RE_OPCODE_LITERAL:
if (flags & RE_FLAGS_NO_CASE)
- match = lowercase[*current_input] == lowercase[*(ip + 1)];
- else
- match = *current_input == *(ip + 1);
- if (match)
- _yr_re_add_fiber(next_fibers, storage, ip + 2, stack);
+ match = lowercase[*input] == lowercase[*(ip + 1)];
else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = (*input == *(ip + 1));
+ ip += 2;
break;
case RE_OPCODE_MASKED_LITERAL:
@@ -1146,76 +1152,58 @@ int yr_re_exec(
// case because this opcode is only used with hex strings,
// which can't be case-insensitive.
- if ((*current_input & mask) == value)
- _yr_re_add_fiber(next_fibers, storage, ip + 3, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = ((*input & mask) == value);
+ ip += 3;
break;
case RE_OPCODE_CLASS:
if (flags & RE_FLAGS_NO_CASE)
- match = CHAR_IN_CLASS(*current_input, ip + 1) ||
- CHAR_IN_CLASS(altercase[*current_input], ip + 1);
- else
- match = CHAR_IN_CLASS(*current_input, ip + 1);
-
- if (match)
- _yr_re_add_fiber(next_fibers, storage, ip + 33, stack);
+ match = CHAR_IN_CLASS(*input, ip + 1) ||
+ CHAR_IN_CLASS(altercase[*input], ip + 1);
else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = CHAR_IN_CLASS(*input, ip + 1);
+ ip += 33;
break;
case RE_OPCODE_WORD_CHAR:
- if (isalnum(*current_input) || *current_input == '_')
- _yr_re_add_fiber(next_fibers, storage, ip + 1, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = (isalnum(*input) || *input == '_');
+ ip += 1;
break;
case RE_OPCODE_NON_WORD_CHAR:
- if (!isalnum(*current_input) && *current_input != '_')
- _yr_re_add_fiber(next_fibers, storage, ip + 1, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = (!isalnum(*input) && *input != '_');
+ ip += 1;
break;
case RE_OPCODE_SPACE:
- if (*current_input == ' ' || *current_input == '\t')
- _yr_re_add_fiber(next_fibers, storage, ip + 1, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = (*input == ' ' || *input == '\t');
+ ip += 1;
break;
case RE_OPCODE_NON_SPACE:
- if (*current_input != ' ' && *current_input != '\t')
- _yr_re_add_fiber(next_fibers, storage, ip + 1, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = (*input != ' ' && *input != '\t');
+ ip += 1;
break;
case RE_OPCODE_DIGIT:
- if (isdigit(*current_input))
- _yr_re_add_fiber(next_fibers, storage, ip + 1, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = isdigit(*input);
+ ip += 1;
break;
case RE_OPCODE_NON_DIGIT:
- if (!isdigit(*current_input))
- _yr_re_add_fiber(next_fibers, storage, ip + 1, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = !isdigit(*input);
+ ip += 1;
break;
case RE_OPCODE_ANY:
- if (*current_input != 0x0A || flags & RE_FLAGS_DOT_ALL)
- _yr_re_add_fiber(next_fibers, storage, ip + 1, stack);
- else
- _yr_re_free_stack(stack, &storage->stack_pool);
+ match = (*input != 0x0A || flags & RE_FLAGS_DOT_ALL);
+ ip += 1;
break;
case RE_OPCODE_MATCH:
- _yr_re_free_stack(stack, &storage->stack_pool);
+
+ match = FALSE;
+ result = i;
if (flags & RE_FLAGS_END_ANCHORED && i < input_size)
break;
@@ -1223,86 +1211,92 @@ int yr_re_exec(
if (flags & RE_FLAGS_EXHAUSTIVE)
{
if (flags & RE_FLAGS_BACKWARDS)
- callback(
- current_input + character_size,
- i,
- flags,
- callback_args);
+ callback(input + character_size, i, flags, callback_args);
else
- callback(
- input,
- i,
- flags,
- callback_args);
-
- result = i;
+ callback(input_data, i, flags, callback_args);
}
else
{
- result = i;
- goto _break;
+ // As we are forcing a jump out of the loop fiber_idx
+ // won't be incremented. Let's do it before exiting.
+
+ //fiber_idx++;
+ goto _exit_loop;
}
+
break;
default:
assert(FALSE);
}
+
+ if (match)
+ _yr_re_add_fiber(
+ next_fibers,
+ storage,
+ ip,
+ input + character_size,
+ fiber_data);
+ else
+ _yr_re_free_fiber_data(
+ fiber_data,
+ &storage->fiber_data_pool);
}
- _break:
+ _exit_loop:
- // Free the stacks for any remaining fiber that didn't
+ // Free the fiber data for any remaining fiber that didn't
// survived for the next step.
- for(; t < current_fibers->count; t++)
- _yr_re_free_stack(
- current_fibers->items[t].stack,
- &storage->stack_pool);
+ for(; fiber_idx < fibers->count; fiber_idx++)
+ _yr_re_free_fiber_data(
+ fibers->items[fiber_idx].fiber_data,
+ &storage->fiber_data_pool);
- swap_fibers(current_fibers, next_fibers);
+ swap_fibers(fibers, next_fibers);
next_fibers->count = 0;
- if (flags & RE_FLAGS_WIDE && *(current_input + 1) != 0)
+ if (flags & RE_FLAGS_WIDE && *(input + 1) != 0)
break;
if (flags & RE_FLAGS_BACKWARDS)
- current_input -= character_size;
+ input -= character_size;
else
- current_input += character_size;
- }
+ input += character_size;
+
+ } //for (i = 0; i < min(input_size, RE_SCAN_LIMIT) ...
if (!(flags & RE_FLAGS_END_ANCHORED) || i == input_size)
{
- for(t = 0; t < current_fibers->count; t++)
+ for(fiber_idx = 0; fiber_idx < fibers->count; fiber_idx++)
{
- if (*current_fibers->items[t].ip == RE_OPCODE_MATCH)
+ if (*fibers->items[fiber_idx].ip != RE_OPCODE_MATCH)
+ continue;
+
+ if (flags & RE_FLAGS_EXHAUSTIVE)
{
- if (flags & RE_FLAGS_EXHAUSTIVE)
- {
- if (flags & RE_FLAGS_BACKWARDS)
- callback(
- current_input + character_size,
- i,
- flags,
- callback_args);
- else
- callback(
- input,
- i,
- flags,
- callback_args);
- }
+ if (flags & RE_FLAGS_BACKWARDS)
+ callback(
+ input + character_size, i, flags, callback_args);
else
- {
- result = i;
- break;
- }
+ callback(
+ input_data, i, flags, callback_args);
+ }
+ else
+ {
+ result = i;
+ break;
}
}
}
- // Ensure that every stack was released
- assert(storage->stack_pool.used == NULL);
+ for(fiber_idx = 0; fiber_idx < fibers->count; fiber_idx++)
+ _yr_re_free_fiber_data(
+ fibers->items[fiber_idx].fiber_data,
+ &storage->fiber_data_pool);
+
+ // Ensure that every fiber data was released
+ assert(storage->fiber_data_pool.used == NULL);
return result;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list