[Forensics-changes] [yara] 178/368: Fix infinite loop with certain regular expressions like (a*)* and (a|)*
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:30:27 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.5.0
in repository yara.
commit a76bfc07fc73160e320abe10ffe73d66f92c183f
Author: plusvic <plusvic at gmail.com>
Date: Mon Feb 22 21:30:06 2016 +0100
Fix infinite loop with certain regular expressions like (a*)* and (a|)*
---
libyara/include/yara/re.h | 1 +
libyara/re.c | 256 ++++++++++++++++++++++++++++++++--------------
libyara/scan.c | 39 +++----
3 files changed, 203 insertions(+), 93 deletions(-)
diff --git a/libyara/include/yara/re.h b/libyara/include/yara/re.h
index d8895ec..d2fe217 100644
--- a/libyara/include/yara/re.h
+++ b/libyara/include/yara/re.h
@@ -88,6 +88,7 @@ typedef struct RE RE;
typedef struct RE_NODE RE_NODE;
typedef struct RE_ERROR RE_ERROR;
+typedef uint8_t RE_SPLIT_ID_TYPE;
typedef uint8_t* RE_CODE;
#define CHAR_IN_CLASS(chr, cls) \
diff --git a/libyara/re.c b/libyara/re.c
index eb8af64..932d1fd 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -45,9 +45,19 @@ order to avoid confusion with operating system threads.
#include <yara/hex_lexer.h>
-#define RE_MAX_STACK 1024 // Maxium stack size for regexp evaluation
-#define RE_MAX_CODE_SIZE 32768 // Maximum code size for a compiled regexp
-#define RE_SCAN_LIMIT 4096 // Maximum input size scanned by yr_re_exec
+// Maximum allowed split ID, also limiting the number of split instructions
+// allowed in a regular expression. This number can't be increased
+// over 255 without changing RE_SPLIT_ID_TYPE.
+#define RE_MAX_SPLIT_ID 255
+
+// Maxium stack size for regexp evaluation
+#define RE_MAX_STACK 1024
+
+// Maximum code size for a compiled regexp
+#define RE_MAX_CODE_SIZE 32768
+
+// Maximum input size scanned by yr_re_exec
+#define RE_SCAN_LIMIT 4096
#define EMIT_BACKWARDS 0x01
@@ -57,6 +67,13 @@ order to avoid confusion with operating system threads.
#define EMIT_DOT_ALL 0x10
+typedef struct _RE_EMIT_CONTEXT {
+
+ YR_ARENA* arena;
+ RE_SPLIT_ID_TYPE next_split_id;
+
+} RE_EMIT_CONTEXT;
+
typedef struct _RE_FIBER
{
RE_CODE ip;
@@ -504,13 +521,13 @@ int yr_re_split_at_chaining_point(
int _yr_emit_inst(
- YR_ARENA* arena,
+ RE_EMIT_CONTEXT* emit_context,
uint8_t opcode,
uint8_t** instruction_addr,
int* code_size)
{
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&opcode,
sizeof(uint8_t),
(void**) instruction_addr));
@@ -522,7 +539,7 @@ int _yr_emit_inst(
int _yr_emit_inst_arg_uint8(
- YR_ARENA* arena,
+ RE_EMIT_CONTEXT* emit_context,
uint8_t opcode,
uint8_t argument,
uint8_t** instruction_addr,
@@ -530,13 +547,13 @@ int _yr_emit_inst_arg_uint8(
int* code_size)
{
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&opcode,
sizeof(uint8_t),
(void**) instruction_addr));
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&argument,
sizeof(uint8_t),
(void**) argument_addr));
@@ -548,7 +565,7 @@ int _yr_emit_inst_arg_uint8(
int _yr_emit_inst_arg_uint16(
- YR_ARENA* arena,
+ RE_EMIT_CONTEXT* emit_context,
uint8_t opcode,
uint16_t argument,
uint8_t** instruction_addr,
@@ -556,13 +573,13 @@ int _yr_emit_inst_arg_uint16(
int* code_size)
{
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&opcode,
sizeof(uint8_t),
(void**) instruction_addr));
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&argument,
sizeof(uint16_t),
(void**) argument_addr));
@@ -574,7 +591,7 @@ int _yr_emit_inst_arg_uint16(
int _yr_emit_inst_arg_uint32(
- YR_ARENA* arena,
+ RE_EMIT_CONTEXT* emit_context,
uint8_t opcode,
uint32_t argument,
uint8_t** instruction_addr,
@@ -582,13 +599,13 @@ int _yr_emit_inst_arg_uint32(
int* code_size)
{
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&opcode,
sizeof(uint8_t),
(void**) instruction_addr));
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&argument,
sizeof(uint32_t),
(void**) argument_addr));
@@ -600,7 +617,7 @@ int _yr_emit_inst_arg_uint32(
int _yr_emit_inst_arg_int16(
- YR_ARENA* arena,
+ RE_EMIT_CONTEXT* emit_context,
uint8_t opcode,
int16_t argument,
uint8_t** instruction_addr,
@@ -608,13 +625,13 @@ int _yr_emit_inst_arg_int16(
int* code_size)
{
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&opcode,
sizeof(uint8_t),
(void**) instruction_addr));
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
&argument,
sizeof(int16_t),
(void**) argument_addr));
@@ -625,9 +642,48 @@ int _yr_emit_inst_arg_int16(
}
+int _yr_emit_split(
+ RE_EMIT_CONTEXT* emit_context,
+ uint8_t opcode,
+ int16_t argument,
+ uint8_t** instruction_addr,
+ int16_t** argument_addr,
+ int* code_size)
+{
+ assert(opcode == RE_OPCODE_SPLIT_A || opcode == RE_OPCODE_SPLIT_B);
+
+ if (emit_context->next_split_id == RE_MAX_SPLIT_ID)
+ return ERROR_INTERNAL_FATAL_ERROR;
+
+ FAIL_ON_ERROR(yr_arena_write_data(
+ emit_context->arena,
+ &opcode,
+ sizeof(uint8_t),
+ (void**) instruction_addr));
+
+ FAIL_ON_ERROR(yr_arena_write_data(
+ emit_context->arena,
+ &emit_context->next_split_id,
+ sizeof(RE_SPLIT_ID_TYPE),
+ NULL));
+
+ emit_context->next_split_id++;
+
+ FAIL_ON_ERROR(yr_arena_write_data(
+ emit_context->arena,
+ &argument,
+ sizeof(int16_t),
+ (void**) argument_addr));
+
+ *code_size = sizeof(uint8_t) + sizeof(RE_SPLIT_ID_TYPE) + sizeof(int16_t);
+
+ return ERROR_SUCCESS;
+}
+
+
int _yr_re_emit(
+ RE_EMIT_CONTEXT* emit_context,
RE_NODE* re_node,
- YR_ARENA* arena,
int flags,
uint8_t** code_addr,
int* code_size)
@@ -652,7 +708,7 @@ int _yr_re_emit(
case RE_NODE_LITERAL:
FAIL_ON_ERROR(_yr_emit_inst_arg_uint8(
- arena,
+ emit_context,
flags & EMIT_NO_CASE ?
RE_OPCODE_LITERAL_NO_CASE :
RE_OPCODE_LITERAL,
@@ -665,7 +721,7 @@ int _yr_re_emit(
case RE_NODE_MASKED_LITERAL:
FAIL_ON_ERROR(_yr_emit_inst_arg_uint16(
- arena,
+ emit_context,
RE_OPCODE_MASKED_LITERAL,
re_node->mask << 8 | re_node->value,
&instruction_addr,
@@ -676,7 +732,7 @@ int _yr_re_emit(
case RE_NODE_WORD_CHAR:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_WORD_CHAR,
&instruction_addr,
code_size));
@@ -685,7 +741,7 @@ int _yr_re_emit(
case RE_NODE_NON_WORD_CHAR:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_NON_WORD_CHAR,
&instruction_addr,
code_size));
@@ -694,7 +750,7 @@ int _yr_re_emit(
case RE_NODE_WORD_BOUNDARY:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_WORD_BOUNDARY,
&instruction_addr,
code_size));
@@ -703,7 +759,7 @@ int _yr_re_emit(
case RE_NODE_NON_WORD_BOUNDARY:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_NON_WORD_BOUNDARY,
&instruction_addr,
code_size));
@@ -712,7 +768,7 @@ int _yr_re_emit(
case RE_NODE_SPACE:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_SPACE,
&instruction_addr,
code_size));
@@ -721,7 +777,7 @@ int _yr_re_emit(
case RE_NODE_NON_SPACE:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_NON_SPACE,
&instruction_addr,
code_size));
@@ -730,7 +786,7 @@ int _yr_re_emit(
case RE_NODE_DIGIT:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_DIGIT,
&instruction_addr,
code_size));
@@ -739,7 +795,7 @@ int _yr_re_emit(
case RE_NODE_NON_DIGIT:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_NON_DIGIT,
&instruction_addr,
code_size));
@@ -748,7 +804,7 @@ int _yr_re_emit(
case RE_NODE_ANY:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
flags & EMIT_DOT_ALL ?
RE_OPCODE_ANY :
RE_OPCODE_ANY_EXCEPT_NEW_LINE,
@@ -759,7 +815,7 @@ int _yr_re_emit(
case RE_NODE_CLASS:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
(flags & EMIT_NO_CASE) ?
RE_OPCODE_CLASS_NO_CASE :
RE_OPCODE_CLASS,
@@ -767,7 +823,7 @@ int _yr_re_emit(
code_size));
FAIL_ON_ERROR(yr_arena_write_data(
- arena,
+ emit_context->arena,
re_node->class_vector,
32,
NULL));
@@ -778,7 +834,7 @@ int _yr_re_emit(
case RE_NODE_ANCHOR_START:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_MATCH_AT_START,
&instruction_addr,
code_size));
@@ -787,7 +843,7 @@ int _yr_re_emit(
case RE_NODE_ANCHOR_END:
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_MATCH_AT_END,
&instruction_addr,
code_size));
@@ -807,8 +863,8 @@ int _yr_re_emit(
}
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
left,
- arena,
flags,
&instruction_addr,
&branch_size));
@@ -816,8 +872,8 @@ int _yr_re_emit(
*code_size += branch_size;
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
right,
- arena,
flags,
NULL,
&branch_size));
@@ -835,16 +891,16 @@ int _yr_re_emit(
// L2:
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->left,
- arena,
flags,
&instruction_addr,
&branch_size));
*code_size += branch_size;
- FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ FAIL_ON_ERROR(_yr_emit_split(
+ emit_context,
re_node->greedy ? RE_OPCODE_SPLIT_B : RE_OPCODE_SPLIT_A,
-branch_size,
NULL,
@@ -863,8 +919,8 @@ int _yr_re_emit(
// jmp L1
// L2:
- FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ FAIL_ON_ERROR(_yr_emit_split(
+ emit_context,
re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
0,
&instruction_addr,
@@ -874,8 +930,8 @@ int _yr_re_emit(
*code_size += split_size;
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->left,
- arena,
flags,
NULL,
&branch_size));
@@ -885,7 +941,7 @@ int _yr_re_emit(
// Emit jump with offset set to 0.
FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ emit_context,
RE_OPCODE_JUMP,
-(branch_size + split_size),
NULL,
@@ -912,8 +968,8 @@ int _yr_re_emit(
// will be updated after we know the size of the code generated for
// the left node (e1).
- FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ FAIL_ON_ERROR(_yr_emit_split(
+ emit_context,
RE_OPCODE_SPLIT_A,
0,
&instruction_addr,
@@ -923,8 +979,8 @@ int _yr_re_emit(
*code_size += split_size;
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->left,
- arena,
flags,
NULL,
&branch_size));
@@ -934,7 +990,7 @@ int _yr_re_emit(
// Emit jump with offset set to 0.
FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ emit_context,
RE_OPCODE_JUMP,
0,
NULL,
@@ -947,8 +1003,8 @@ int _yr_re_emit(
*split_offset_addr = split_size + branch_size + jmp_size;
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->right,
- arena,
flags,
NULL,
&branch_size));
@@ -985,8 +1041,8 @@ int _yr_re_emit(
if (re_node->start > 0)
{
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->left,
- arena,
flags,
&instruction_addr,
&branch_size));
@@ -1001,8 +1057,8 @@ int _yr_re_emit(
// being updated.
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->left,
- arena,
flags | EMIT_DONT_SET_FORWARDS_CODE,
NULL,
&branch_size));
@@ -1014,7 +1070,7 @@ int _yr_re_emit(
if (re_node->end > re_node->start + 1)
{
FAIL_ON_ERROR(_yr_emit_inst_arg_uint16(
- arena,
+ emit_context,
RE_OPCODE_PUSH,
re_node->end - re_node->start - 1,
re_node->start == 0 ? &instruction_addr : NULL,
@@ -1023,8 +1079,8 @@ int _yr_re_emit(
*code_size += inst_size;
- FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ FAIL_ON_ERROR(_yr_emit_split(
+ emit_context,
re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
0,
NULL,
@@ -1034,8 +1090,8 @@ int _yr_re_emit(
*code_size += split_size;
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->left,
- arena,
flags | EMIT_DONT_SET_FORWARDS_CODE | EMIT_DONT_SET_BACKWARDS_CODE,
NULL,
&branch_size));
@@ -1043,7 +1099,7 @@ int _yr_re_emit(
*code_size += branch_size;
FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ emit_context,
RE_OPCODE_JNZ,
-(branch_size + split_size),
NULL,
@@ -1054,7 +1110,7 @@ int _yr_re_emit(
*split_offset_addr = split_size + branch_size + jmp_size;
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ emit_context,
RE_OPCODE_POP,
NULL,
&inst_size));
@@ -1064,8 +1120,8 @@ int _yr_re_emit(
if (re_node->end > re_node->start)
{
- FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
- arena,
+ FAIL_ON_ERROR(_yr_emit_split(
+ emit_context,
re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
0,
NULL,
@@ -1075,8 +1131,8 @@ int _yr_re_emit(
*code_size += split_size;
FAIL_ON_ERROR(_yr_re_emit(
+ emit_context,
re_node->left,
- arena,
flags | EMIT_DONT_SET_FORWARDS_CODE,
re_node->start == 0 && re_node->end == 1 ? &instruction_addr : NULL,
&branch_size));
@@ -1110,6 +1166,8 @@ int yr_re_emit_code(
RE* re,
YR_ARENA* arena)
{
+ RE_EMIT_CONTEXT emit_context;
+
int code_size;
int total_size;
@@ -1121,6 +1179,9 @@ int yr_re_emit_code(
if (re->flags & RE_FLAGS_DOT_ALL)
emit_flags |= EMIT_DOT_ALL;
+ emit_context.arena = arena;
+ emit_context.next_split_id = 0;
+
// Ensure that we have enough contiguous memory space in the arena to
// contain the regular expression code. The code can't span over multiple
// non-contiguous pages.
@@ -1132,8 +1193,8 @@ int yr_re_emit_code(
total_size = 0;
FAIL_ON_ERROR(_yr_re_emit(
+ &emit_context,
re->root_node,
- arena,
emit_flags,
&re->code,
&code_size));
@@ -1141,7 +1202,7 @@ int yr_re_emit_code(
total_size += code_size;
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ &emit_context,
RE_OPCODE_MATCH,
NULL,
&code_size));
@@ -1158,8 +1219,8 @@ int yr_re_emit_code(
total_size = 0;
FAIL_ON_ERROR(_yr_re_emit(
+ &emit_context,
re->root_node,
- arena,
emit_flags | EMIT_BACKWARDS,
NULL,
&code_size));
@@ -1167,7 +1228,7 @@ int yr_re_emit_code(
total_size += code_size;
FAIL_ON_ERROR(_yr_emit_inst(
- arena,
+ &emit_context,
RE_OPCODE_MATCH,
NULL,
&code_size));
@@ -1469,6 +1530,17 @@ int _yr_re_fiber_sync(
RE_FIBER_LIST* fiber_pool,
RE_FIBER* fiber_to_sync)
{
+ // A array for keeping track of which split instructions has been already
+ // executed. Each split instruction within a regexp has an associated ID
+ // between 0 and RE_MAX_SPLIT_ID. Keeping track of executed splits is
+ // required to avoid infinite loops in regexps like (a*)* or (a|)*
+
+ RE_SPLIT_ID_TYPE splits_executed[RE_MAX_SPLIT_ID];
+ RE_SPLIT_ID_TYPE splits_executed_count = 0;
+ RE_SPLIT_ID_TYPE split_id, splits_executed_idx;
+
+ int split_already_executed;
+
RE_FIBER* fiber;
RE_FIBER* last;
RE_FIBER* prev;
@@ -1483,21 +1555,56 @@ int _yr_re_fiber_sync(
switch(*fiber->ip)
{
case RE_OPCODE_SPLIT_A:
- new_fiber = _yr_re_fiber_split(fiber, fiber_list, fiber_pool);
- if (new_fiber == NULL)
- return ERROR_INSUFICIENT_MEMORY;
+ case RE_OPCODE_SPLIT_B:
- new_fiber->ip += *(int16_t*)(fiber->ip + 1);
- fiber->ip += 3;
- break;
+ split_id = *(RE_SPLIT_ID_TYPE*)(fiber->ip + 1);
+ split_already_executed = FALSE;
- case RE_OPCODE_SPLIT_B:
- new_fiber = _yr_re_fiber_split(fiber, fiber_list, fiber_pool);
- if (new_fiber == NULL)
- return ERROR_INSUFICIENT_MEMORY;
+ for (splits_executed_idx = 0;
+ splits_executed_idx < splits_executed_count;
+ splits_executed_idx++)
+ {
+ if (split_id == splits_executed[splits_executed_idx])
+ {
+ split_already_executed = TRUE;
+ break;
+ }
+ }
+
+ if (split_already_executed)
+ {
+ fiber = _yr_re_fiber_kill(fiber_list, fiber_pool, fiber);
+ }
+ else
+ {
+ new_fiber = _yr_re_fiber_split(fiber, fiber_list, fiber_pool);
+
+ if (new_fiber == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
+
+ if (*fiber->ip == RE_OPCODE_SPLIT_A)
+ {
+ new_fiber->ip += *(int16_t*)(
+ fiber->ip
+ + 1 // opcode size
+ + sizeof(RE_SPLIT_ID_TYPE));
+
+ fiber->ip += (sizeof(RE_SPLIT_ID_TYPE) + 3);
+ }
+ else
+ {
+ fiber->ip += *(int16_t*)(
+ fiber->ip
+ + 1 // opcode size
+ + sizeof(RE_SPLIT_ID_TYPE));
+
+ new_fiber->ip += (sizeof(RE_SPLIT_ID_TYPE) + 3);
+ }
+
+ splits_executed[splits_executed_count] = split_id;
+ splits_executed_count++;
+ }
- new_fiber->ip += 3;
- fiber->ip += *(int16_t*)(fiber->ip + 1);
break;
case RE_OPCODE_JUMP:
@@ -1529,6 +1636,7 @@ int _yr_re_fiber_sync(
fiber = fiber->next;
}
}
+
return ERROR_SUCCESS;
}
diff --git a/libyara/scan.c b/libyara/scan.c
index 3fb3f8c..7d6f06f 100644
--- a/libyara/scan.c
+++ b/libyara/scan.c
@@ -173,6 +173,7 @@ int _yr_scan_fast_hex_re_exec(
uint8_t* ip = code;
uint8_t* current_input = input;
uint8_t* next_input;
+ uint8_t* next_opcode;
uint8_t mask;
uint8_t value;
@@ -287,8 +288,8 @@ int _yr_scan_fast_hex_re_exec(
case RE_OPCODE_SPLIT_B:
// This is how the code looks like after the SPLIT:
- // split L3, L4 (3 bytes long)
- // L3: any (1 byte long)
+ // split L3, L4 (3 + sizeof(RE_SPLIT_ID_TYPE) bytes long)
+ // L3: any (1 byte long)
// L4: ...
//
// The opcode following the ANY is located at ip + 4
@@ -296,11 +297,11 @@ int _yr_scan_fast_hex_re_exec(
if (sp >= MAX_FAST_HEX_RE_STACK)
return -4;
- code_stack[sp] = ip + 4;
+ code_stack[sp] = ip + sizeof(RE_SPLIT_ID_TYPE) + 4;
input_stack[sp] = current_input;
matches_stack[sp] = matches;
sp++;
- ip += 3;
+ ip += (3 + sizeof(RE_SPLIT_ID_TYPE));
break;
@@ -310,15 +311,15 @@ int _yr_scan_fast_hex_re_exec(
// generated for a jump. (example: { 01 02 [n-m] 03 04 }) The
// code sequence looks like this:
//
- // push m-n-1 (3 bytes long)
- // L0: split L1, L2 (3 bytes long)
- // L1: any (1 byte long)
- // jnz L0 (3 bytes long)
- // L2: pop (1 byte long)
- // split L3, L4 (3 bytes long)
- // L3: any (1 byte long)
- // L4: ...
- // 15 bytes in total
+ // push m-n-1 (3 bytes long)
+ // L0: split L1, L2 (3 + sizeof(RE_SPLIT_ID_TYPE) bytes long)
+ // L1: any (1 byte long)
+ // jnz L0 (3 bytes long)
+ // L2: pop (1 byte long)
+ // split L3, L4 (3 + sizeof(RE_SPLIT_ID_TYPE) bytes long)
+ // L3: any (1 byte long)
+ // L4:
+ // 15 + 2 * sizeof(RE_SPLIT_ID_TYPE) bytes in total
for (i = *(uint16_t*)(ip + 1) + 1; i > 0; i--)
{
@@ -335,23 +336,23 @@ int _yr_scan_fast_hex_re_exec(
continue;
}
- // The opcode following the sequence is located at ip + 15
+ next_opcode = ip + 2 * sizeof(RE_SPLIT_ID_TYPE) + 15;
- if ( *(ip + 15) != RE_OPCODE_LITERAL ||
- (*(ip + 15) == RE_OPCODE_LITERAL &&
- *(ip + 16) == *next_input))
+ if ( *(next_opcode) != RE_OPCODE_LITERAL ||
+ (*(next_opcode) == RE_OPCODE_LITERAL &&
+ *(next_opcode + 1) == *next_input))
{
if (sp >= MAX_FAST_HEX_RE_STACK)
return -4;
- code_stack[sp] = ip + 15;
+ code_stack[sp] = next_opcode;
input_stack[sp] = next_input;
matches_stack[sp] = matches + i;
sp++;
}
}
- ip += 15;
+ ip = next_opcode;
break;
default:
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list