[Forensics-changes] [yara] 321/415: Use string chaining for every jump over a given threshold
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:43:19 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit 521d4046a953defd4acef4d095a393da13084488
Author: Victor Manuel Alvarez <vmalvarez at virustotal.com>
Date: Fri Dec 20 13:25:12 2013 +0100
Use string chaining for every jump over a given threshold
---
libyara/hex_grammar.c | 210 +++++++++++++++++++++++-------------
libyara/hex_grammar.h | 2 +-
libyara/hex_grammar.y | 61 ++++++++++-
libyara/hex_lexer.c | 26 ++---
libyara/hex_lexer.l | 7 --
libyara/lexer.c | 11 +-
libyara/lexer.l | 2 +-
libyara/re.c | 16 ++-
libyara/re.h | 5 +-
libyara/rules.c | 292 ++++++++++++++++++++++++++++++++++++++++----------
libyara/yara.h | 13 ++-
11 files changed, 469 insertions(+), 176 deletions(-)
diff --git a/libyara/hex_grammar.c b/libyara/hex_grammar.c
index d57ab40..4850d5d 100644
--- a/libyara/hex_grammar.c
+++ b/libyara/hex_grammar.c
@@ -105,6 +105,9 @@
#include <dmalloc.h>
#endif
+#define STR_EXPAND(tok) #tok
+#define STR(tok) STR_EXPAND(tok)
+
#define YYERROR_VERBOSE
#define YYDEBUG 0
@@ -152,13 +155,13 @@ yydebug = 1;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 71 "hex_grammar.y"
+#line 74 "hex_grammar.y"
{
int integer;
RE_NODE *re_node;
}
/* Line 193 of yacc.c. */
-#line 162 "hex_grammar.c"
+#line 165 "hex_grammar.c"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
@@ -171,7 +174,7 @@ typedef union YYSTYPE
/* Line 216 of yacc.c. */
-#line 175 "hex_grammar.c"
+#line 178 "hex_grammar.c"
#ifdef short
# undef short
@@ -386,16 +389,16 @@ union yyalloc
/* YYFINAL -- State number of the termination state. */
#define YYFINAL 10
/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 23
+#define YYLAST 25
/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 15
+#define YYNTOKENS 14
/* YYNNTS -- Number of nonterminals. */
#define YYNNTS 8
/* YYNRULES -- Number of rules. */
-#define YYNRULES 15
+#define YYNRULES 16
/* YYNRULES -- Number of states. */
-#define YYNSTATES 26
+#define YYNSTATES 25
/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
#define YYUNDEFTOK 2
@@ -411,7 +414,7 @@ static const yytype_uint8 yytranslate[] =
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 8, 9, 2, 2, 2, 12, 13, 2, 2, 2,
+ 8, 9, 2, 2, 2, 12, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -419,7 +422,7 @@ static const yytype_uint8 yytranslate[] =
2, 10, 2, 11, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 6, 14, 7, 2, 2, 2, 2,
+ 2, 2, 2, 6, 13, 7, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -442,24 +445,24 @@ static const yytype_uint8 yytranslate[] =
static const yytype_uint8 yyprhs[] =
{
0, 0, 3, 7, 9, 12, 14, 15, 20, 24,
- 26, 30, 33, 35, 39, 41
+ 26, 30, 33, 35, 37, 41, 43
};
/* YYRHS -- A `-1'-separated list of the rules' RHS. */
static const yytype_int8 yyrhs[] =
{
- 16, 0, -1, 6, 17, 7, -1, 18, -1, 17,
- 18, -1, 22, -1, -1, 8, 19, 21, 9, -1,
- 10, 20, 11, -1, 5, -1, 5, 12, 5, -1,
- 13, 13, -1, 17, -1, 21, 14, 17, -1, 3,
- -1, 4, -1
+ 15, 0, -1, 6, 16, 7, -1, 17, -1, 16,
+ 17, -1, 21, -1, -1, 8, 18, 20, 9, -1,
+ 10, 19, 11, -1, 5, -1, 5, 12, 5, -1,
+ 5, 12, -1, 12, -1, 16, -1, 20, 13, 16,
+ -1, 3, -1, 4, -1
};
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
-static const yytype_uint8 yyrline[] =
+static const yytype_uint16 yyrline[] =
{
- 0, 90, 90, 98, 102, 113, 118, 117, 126, 134,
- 149, 172, 198, 202, 214, 222
+ 0, 93, 93, 101, 105, 116, 121, 120, 129, 137,
+ 164, 201, 226, 253, 257, 269, 277
};
#endif
@@ -469,7 +472,7 @@ static const yytype_uint8 yyrline[] =
static const char *const yytname[] =
{
"$end", "error", "$undefined", "_BYTE_", "_MASKED_BYTE_", "_NUMBER_",
- "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'.'", "'|'", "$accept",
+ "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'|'", "$accept",
"hex_string", "tokens", "token", "@1", "range", "alternatives", "byte", 0
};
#endif
@@ -480,22 +483,22 @@ static const char *const yytname[] =
static const yytype_uint16 yytoknum[] =
{
0, 256, 257, 258, 259, 260, 123, 125, 40, 41,
- 91, 93, 45, 46, 124
+ 91, 93, 45, 124
};
# endif
/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
static const yytype_uint8 yyr1[] =
{
- 0, 15, 16, 17, 17, 18, 19, 18, 18, 20,
- 20, 20, 21, 21, 22, 22
+ 0, 14, 15, 16, 16, 17, 18, 17, 17, 19,
+ 19, 19, 19, 20, 20, 21, 21
};
/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
static const yytype_uint8 yyr2[] =
{
0, 2, 3, 1, 2, 1, 0, 4, 3, 1,
- 3, 2, 1, 3, 1, 1
+ 3, 2, 1, 1, 3, 1, 1
};
/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -503,9 +506,9 @@ static const yytype_uint8 yyr2[] =
means the default is an error. */
static const yytype_uint8 yydefact[] =
{
- 0, 0, 0, 14, 15, 6, 0, 0, 3, 5,
- 1, 0, 9, 0, 0, 2, 4, 12, 0, 0,
- 11, 8, 7, 0, 10, 13
+ 0, 0, 0, 15, 16, 6, 0, 0, 3, 5,
+ 1, 0, 9, 12, 0, 2, 4, 13, 0, 11,
+ 8, 7, 0, 10, 14
};
/* YYDEFGOTO[NTERM-NUM]. */
@@ -519,9 +522,9 @@ static const yytype_int8 yydefgoto[] =
#define YYPACT_NINF -11
static const yytype_int8 yypact[] =
{
- -4, 4, 5, -11, -11, -11, -2, 12, -11, -11,
- -11, 4, -6, 8, 6, -11, -11, 4, -5, 18,
- -11, -11, -11, 4, -11, 4
+ -2, 10, 5, -11, -11, -11, 3, -1, -11, -11,
+ -11, 10, 4, -11, 0, -11, -11, 10, 12, 14,
+ -11, -11, 10, -11, 10
};
/* YYPGOTO[NTERM-NUM]. */
@@ -537,25 +540,25 @@ static const yytype_int8 yypgoto[] =
#define YYTABLE_NINF -1
static const yytype_uint8 yytable[] =
{
- 16, 17, 1, 12, 22, 10, 19, 3, 4, 23,
- 16, 13, 5, 25, 6, 3, 4, 21, 16, 15,
- 5, 20, 6, 24
+ 16, 17, 3, 4, 1, 10, 15, 5, 12, 6,
+ 16, 20, 24, 3, 4, 13, 19, 16, 5, 23,
+ 6, 21, 0, 0, 0, 22
};
-static const yytype_uint8 yycheck[] =
+static const yytype_int8 yycheck[] =
{
- 7, 11, 6, 5, 9, 0, 12, 3, 4, 14,
- 17, 13, 8, 23, 10, 3, 4, 11, 25, 7,
- 8, 13, 10, 5
+ 7, 11, 3, 4, 6, 0, 7, 8, 5, 10,
+ 17, 11, 22, 3, 4, 12, 12, 24, 8, 5,
+ 10, 9, -1, -1, -1, 13
};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
symbol of state STATE-NUM. */
static const yytype_uint8 yystos[] =
{
- 0, 6, 16, 3, 4, 8, 10, 17, 18, 22,
- 0, 19, 5, 13, 20, 7, 18, 17, 21, 12,
- 13, 11, 9, 14, 5, 17
+ 0, 6, 15, 3, 4, 8, 10, 16, 17, 21,
+ 0, 18, 5, 12, 19, 7, 17, 16, 20, 12,
+ 11, 9, 13, 5, 16
};
#define yyerrok (yyerrstatus = 0)
@@ -1075,30 +1078,30 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
switch (yytype)
{
- case 17: /* "tokens" */
-#line 82 "hex_grammar.y"
+ case 16: /* "tokens" */
+#line 85 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1082 "hex_grammar.c"
+#line 1085 "hex_grammar.c"
break;
- case 18: /* "token" */
-#line 83 "hex_grammar.y"
+ case 17: /* "token" */
+#line 86 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1087 "hex_grammar.c"
+#line 1090 "hex_grammar.c"
break;
- case 20: /* "range" */
-#line 86 "hex_grammar.y"
+ case 19: /* "range" */
+#line 89 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1092 "hex_grammar.c"
+#line 1095 "hex_grammar.c"
break;
- case 21: /* "alternatives" */
-#line 85 "hex_grammar.y"
+ case 20: /* "alternatives" */
+#line 88 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1097 "hex_grammar.c"
+#line 1100 "hex_grammar.c"
break;
- case 22: /* "byte" */
-#line 84 "hex_grammar.y"
+ case 21: /* "byte" */
+#line 87 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1102 "hex_grammar.c"
+#line 1105 "hex_grammar.c"
break;
default:
@@ -1408,7 +1411,7 @@ yyreduce:
switch (yyn)
{
case 2:
-#line 91 "hex_grammar.y"
+#line 94 "hex_grammar.y"
{
RE* re = yyget_extra(yyscanner);
re->root_node = (yyvsp[(2) - (3)].re_node);
@@ -1416,14 +1419,14 @@ yyreduce:
break;
case 3:
-#line 99 "hex_grammar.y"
+#line 102 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 4:
-#line 103 "hex_grammar.y"
+#line 106 "hex_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
@@ -1434,21 +1437,21 @@ yyreduce:
break;
case 5:
-#line 114 "hex_grammar.y"
+#line 117 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 6:
-#line 118 "hex_grammar.y"
+#line 121 "hex_grammar.y"
{
lex_env->inside_or++;
}
break;
case 7:
-#line 122 "hex_grammar.y"
+#line 125 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(3) - (4)].re_node);
lex_env->inside_or--;
@@ -1456,7 +1459,7 @@ yyreduce:
break;
case 8:
-#line 127 "hex_grammar.y"
+#line 130 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(2) - (3)].re_node);
(yyval.re_node)->greedy = FALSE;
@@ -1464,10 +1467,22 @@ yyreduce:
break;
case 9:
-#line 135 "hex_grammar.y"
+#line 138 "hex_grammar.y"
{
RE_NODE* re_any;
+ if (lex_env->inside_or && (yyvsp[(1) - (1)].integer) > STRING_CHAINING_THRESHOLD)
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup(
+ "jumps over "
+ STR(STRING_CHAINING_THRESHOLD)
+ " now allowed inside alternation (|)");
+
+ YYABORT;
+ }
+
re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1482,15 +1497,29 @@ yyreduce:
break;
case 10:
-#line 150 "hex_grammar.y"
+#line 165 "hex_grammar.y"
{
RE_NODE* re_any;
+ if (lex_env->inside_or &&
+ ((yyvsp[(1) - (3)].integer) > STRING_CHAINING_THRESHOLD ||
+ (yyvsp[(3) - (3)].integer) > STRING_CHAINING_THRESHOLD) )
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup(
+ "jumps over "
+ STR(STRING_CHAINING_THRESHOLD)
+ " now allowed inside alternation (|)");
+
+ YYABORT;
+ }
+
if ((yyvsp[(1) - (3)].integer) > (yyvsp[(3) - (3)].integer))
{
RE* re = yyget_extra(yyscanner);
re->error_code = ERROR_INVALID_HEX_STRING;
- re->error_message = yr_strdup("invalid range");
+ re->error_message = yr_strdup("invalid jump range");
YYABORT;
}
@@ -1508,7 +1537,7 @@ yyreduce:
break;
case 11:
-#line 173 "hex_grammar.y"
+#line 202 "hex_grammar.y"
{
RE_NODE* re_any;
@@ -1516,7 +1545,9 @@ yyreduce:
{
RE* re = yyget_extra(yyscanner);
re->error_code = ERROR_INVALID_HEX_STRING;
- re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+ re->error_message = yr_strdup(
+ "unbounded jumps not allowed inside alternation (|)");
+
YYABORT;
}
@@ -1528,20 +1559,47 @@ yyreduce:
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
- (yyval.re_node)->start = 0;
+ (yyval.re_node)->start = (yyvsp[(1) - (2)].integer);
(yyval.re_node)->end = INT_MAX;
}
break;
case 12:
-#line 199 "hex_grammar.y"
+#line 227 "hex_grammar.y"
+ {
+ RE_NODE* re_any;
+
+ if (lex_env->inside_or)
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup(
+ "unbounded jumps not allowed inside alternation (|)");
+ YYABORT;
+ }
+
+ re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+ ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+ ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ (yyval.re_node)->start = 0;
+ (yyval.re_node)->end = INT_MAX;
+ }
+ break;
+
+ case 13:
+#line 254 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
- case 13:
-#line 203 "hex_grammar.y"
+ case 14:
+#line 258 "hex_grammar.y"
{
mark_as_not_fast_hex_regexp();
@@ -1553,8 +1611,8 @@ yyreduce:
}
break;
- case 14:
-#line 215 "hex_grammar.y"
+ case 15:
+#line 270 "hex_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
@@ -1564,8 +1622,8 @@ yyreduce:
}
break;
- case 15:
-#line 223 "hex_grammar.y"
+ case 16:
+#line 278 "hex_grammar.y"
{
uint8_t mask = (yyvsp[(1) - (1)].integer) >> 8;
@@ -1589,7 +1647,7 @@ yyreduce:
/* Line 1267 of yacc.c. */
-#line 1593 "hex_grammar.c"
+#line 1651 "hex_grammar.c"
default: break;
}
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1803,7 +1861,7 @@ yyreturn:
}
-#line 244 "hex_grammar.y"
+#line 299 "hex_grammar.y"
diff --git a/libyara/hex_grammar.h b/libyara/hex_grammar.h
index bc213c9..1b67276 100644
--- a/libyara/hex_grammar.h
+++ b/libyara/hex_grammar.h
@@ -54,7 +54,7 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 71 "hex_grammar.y"
+#line 74 "hex_grammar.y"
{
int integer;
RE_NODE *re_node;
diff --git a/libyara/hex_grammar.y b/libyara/hex_grammar.y
index 3050e04..e98073f 100644
--- a/libyara/hex_grammar.y
+++ b/libyara/hex_grammar.y
@@ -30,6 +30,9 @@ limitations under the License.
#include <dmalloc.h>
#endif
+#define STR_EXPAND(tok) #tok
+#define STR(tok) STR_EXPAND(tok)
+
#define YYERROR_VERBOSE
#define YYDEBUG 0
@@ -135,6 +138,18 @@ range : _NUMBER_
{
RE_NODE* re_any;
+ if (lex_env->inside_or && $1 > STRING_CHAINING_THRESHOLD)
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup(
+ "jumps over "
+ STR(STRING_CHAINING_THRESHOLD)
+ " now allowed inside alternation (|)");
+
+ YYABORT;
+ }
+
re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -150,11 +165,25 @@ range : _NUMBER_
{
RE_NODE* re_any;
+ if (lex_env->inside_or &&
+ ($1 > STRING_CHAINING_THRESHOLD ||
+ $3 > STRING_CHAINING_THRESHOLD) )
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup(
+ "jumps over "
+ STR(STRING_CHAINING_THRESHOLD)
+ " now allowed inside alternation (|)");
+
+ YYABORT;
+ }
+
if ($1 > $3)
{
RE* re = yyget_extra(yyscanner);
re->error_code = ERROR_INVALID_HEX_STRING;
- re->error_message = yr_strdup("invalid range");
+ re->error_message = yr_strdup("invalid jump range");
YYABORT;
}
@@ -169,7 +198,32 @@ range : _NUMBER_
$$->start = $1;
$$->end = $3;
}
- | '.' '.'
+ | _NUMBER_ '-'
+ {
+ RE_NODE* re_any;
+
+ if (lex_env->inside_or)
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup(
+ "unbounded jumps not allowed inside alternation (|)");
+
+ YYABORT;
+ }
+
+ re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+ ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ $$ = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+ ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ $$->start = $1;
+ $$->end = INT_MAX;
+ }
+ | '-'
{
RE_NODE* re_any;
@@ -177,7 +231,8 @@ range : _NUMBER_
{
RE* re = yyget_extra(yyscanner);
re->error_code = ERROR_INVALID_HEX_STRING;
- re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+ re->error_message = yr_strdup(
+ "unbounded jumps not allowed inside alternation (|)");
YYABORT;
}
diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c
index d462d99..271d58d 100644
--- a/libyara/hex_lexer.c
+++ b/libyara/hex_lexer.c
@@ -47,6 +47,7 @@ typedef int16_t flex_int16_t;
typedef uint16_t flex_uint16_t;
typedef int32_t flex_int32_t;
typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
#else
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
@@ -357,7 +358,7 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
*/
#define YY_DO_BEFORE_ACTION \
yyg->yytext_ptr = yy_bp; \
- yyleng = (size_t) (yy_cp - yy_bp); \
+ yyleng = (yy_size_t) (yy_cp - yy_bp); \
yyg->yy_hold_char = *yy_cp; \
*yy_cp = '\0'; \
yyg->yy_c_buf_p = yy_cp;
@@ -505,7 +506,7 @@ limitations under the License.
#define YY_NO_UNISTD_H 1
#define YY_NO_INPUT 1
-#line 509 "hex_lexer.c"
+#line 510 "hex_lexer.c"
#define INITIAL 0
#define range 1
@@ -741,7 +742,7 @@ YY_DECL
-#line 745 "hex_lexer.c"
+#line 746 "hex_lexer.c"
yylval = yylval_param;
@@ -905,19 +906,12 @@ YY_RULE_SETUP
{
yylval->integer = atoi(yytext);
-
- if (yylval->integer > INT16_MAX)
- {
- yyerror(yyscanner, lex_env, "range value too large");
- yyterminate();
- }
-
return _NUMBER_;
}
YY_BREAK
case 9:
YY_RULE_SETUP
-#line 124 "hex_lexer.l"
+#line 117 "hex_lexer.l"
{
BEGIN(INITIAL);
@@ -927,12 +921,12 @@ YY_RULE_SETUP
case 10:
/* rule 10 can match eol */
YY_RULE_SETUP
-#line 131 "hex_lexer.l"
+#line 124 "hex_lexer.l"
// skip whitespace
YY_BREAK
case 11:
YY_RULE_SETUP
-#line 134 "hex_lexer.l"
+#line 127 "hex_lexer.l"
{
if (yytext[0] >= 32 && yytext[0] < 127)
@@ -948,10 +942,10 @@ YY_RULE_SETUP
YY_BREAK
case 12:
YY_RULE_SETUP
-#line 147 "hex_lexer.l"
+#line 140 "hex_lexer.l"
ECHO;
YY_BREAK
-#line 955 "hex_lexer.c"
+#line 949 "hex_lexer.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(range):
yyterminate();
@@ -2087,7 +2081,7 @@ void hex_yyfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
-#line 147 "hex_lexer.l"
+#line 140 "hex_lexer.l"
diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l
index c9e1df0..ea963e1 100644
--- a/libyara/hex_lexer.l
+++ b/libyara/hex_lexer.l
@@ -111,13 +111,6 @@ hexdigit [a-fA-F0-9]
<range>{digit}+ {
yylval->integer = atoi(yytext);
-
- if (yylval->integer > INT16_MAX)
- {
- yyerror(yyscanner, lex_env, "range value too large");
- yyterminate();
- }
-
return _NUMBER_;
}
diff --git a/libyara/lexer.c b/libyara/lexer.c
index cc3ed40..68f6588 100644
--- a/libyara/lexer.c
+++ b/libyara/lexer.c
@@ -47,6 +47,7 @@ typedef int16_t flex_int16_t;
typedef uint16_t flex_uint16_t;
typedef int32_t flex_int32_t;
typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
#else
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
@@ -357,7 +358,7 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
*/
#define YY_DO_BEFORE_ACTION \
yyg->yytext_ptr = yy_bp; \
- yyleng = (size_t) (yy_cp - yy_bp); \
+ yyleng = (yy_size_t) (yy_cp - yy_bp); \
yyg->yy_hold_char = *yy_cp; \
*yy_cp = '\0'; \
yyg->yy_c_buf_p = yy_cp;
@@ -406,7 +407,7 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 4, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 5, 6, 7, 8, 1, 1, 1, 9,
- 9, 10, 1, 1, 9, 9, 11, 12, 13, 14,
+ 9, 10, 1, 1, 9, 1, 11, 12, 13, 14,
15, 16, 16, 17, 16, 18, 16, 1, 1, 19,
20, 21, 9, 22, 23, 24, 23, 23, 23, 23,
25, 25, 25, 25, 26, 25, 27, 25, 25, 25,
@@ -686,7 +687,7 @@ limitations under the License.
-#line 690 "lexer.c"
+#line 691 "lexer.c"
#define INITIAL 0
#define str 1
@@ -924,7 +925,7 @@ YY_DECL
#line 86 "lexer.l"
-#line 928 "lexer.c"
+#line 929 "lexer.c"
yylval = yylval_param;
@@ -1710,7 +1711,7 @@ YY_RULE_SETUP
#line 512 "lexer.l"
ECHO;
YY_BREAK
-#line 1714 "lexer.c"
+#line 1715 "lexer.c"
case YY_END_OF_BUFFER:
{
diff --git a/libyara/lexer.l b/libyara/lexer.l
index a54982e..2d579ea 100644
--- a/libyara/lexer.l
+++ b/libyara/lexer.l
@@ -479,7 +479,7 @@ $({letter}|{digit}|_)* {
}
-\{({hexdigit}|[ \-|\?\[\]\(\)\.\n\t])+\} {
+\{({hexdigit}|[ \-|\?\[\]\(\)\n\t])+\} {
int len = strlen(yytext);
SIZED_STRING* s = (SIZED_STRING*) yr_malloc(len + sizeof(SIZED_STRING));
diff --git a/libyara/re.c b/libyara/re.c
index ae589f4..611c546 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -322,7 +322,9 @@ SIZED_STRING* yr_re_extract_literal(
int yr_re_split_at_chaining_point(
RE* re,
RE** result_re,
- RE** remainder_re)
+ RE** remainder_re,
+ int32_t* min_gap,
+ int32_t* max_gap)
{
RE_NODE* node = re->root_node;
RE_NODE* child = re->root_node->left;
@@ -332,15 +334,17 @@ int yr_re_split_at_chaining_point(
*result_re = re;
*remainder_re = NULL;
+ *min_gap = 0;
+ *max_gap = 0;
while (child != NULL && child->type == RE_NODE_CONCAT)
{
if (child->right != NULL &&
child->right->type == RE_NODE_RANGE &&
child->right->greedy == FALSE &&
- child->right->start == 0 &&
- child->right->end == INT_MAX &&
- child->right->left->type == RE_NODE_ANY)
+ child->right->left->type == RE_NODE_ANY &&
+ (child->right->start > STRING_CHAINING_THRESHOLD ||
+ child->right->end > STRING_CHAINING_THRESHOLD))
{
result = yr_re_create(remainder_re);
@@ -357,6 +361,10 @@ int yr_re_split_at_chaining_point(
(*result_re)->root_node = node->right;
node->right = NULL;
+
+ *min_gap = child->right->start;
+ *max_gap = child->right->end;
+
yr_re_node_destroy(node);
return ERROR_SUCCESS;
diff --git a/libyara/re.h b/libyara/re.h
index f1a0dae..835c0b8 100644
--- a/libyara/re.h
+++ b/libyara/re.h
@@ -166,10 +166,13 @@ SIZED_STRING* yr_re_extract_literal(
RE* re);
+
int yr_re_split_at_chaining_point(
RE* re,
RE** result_re,
- RE** remainder_re);
+ RE** remainder_re,
+ int32_t* min_gap,
+ int32_t* max_gap);
int yr_re_emit_code(
diff --git a/libyara/rules.c b/libyara/rules.c
index 104db1d..be48741 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -1,4 +1,4 @@
- /*
+ /*
Copyright (c) 2013. Victor M. Alvarez [plusvic at gmail.com].
Licensed under the Apache License, Version 2.0 (the "License");
@@ -304,7 +304,7 @@ void _yr_scan_confirm_matches(
int tidx,
YR_STRING* string,
size_t match_offset,
- int match_length)
+ int32_t match_length)
{
YR_MATCH* match;
YR_MATCH* next_match;
@@ -353,18 +353,220 @@ void _yr_scan_confirm_matches(
}
-void _yr_rules_match_callback(
+void _yr_scan_update_match_chain_length(
+ int tidx,
+ YR_STRING* string,
+ YR_MATCH* match_to_update,
+ int chain_length)
+{
+ YR_MATCH* match;
+ size_t ending_offset;
+
+ match_to_update->chain_length = chain_length;
+
+ if (string->chained_to != NULL)
+ match = string->chained_to->unconfirmed_matches[tidx].head;
+ else
+ match = NULL;
+
+ while (match != NULL)
+ {
+ ending_offset = match->offset + match->length;
+
+ if (ending_offset + string->chain_gap_max >= match_to_update->offset &&
+ ending_offset + string->chain_gap_min <= match_to_update->offset)
+ {
+ _yr_scan_update_match_chain_length(
+ tidx, string->chained_to, match, chain_length + 1);
+ }
+
+ match = match->next;
+ }
+}
+
+
+void _yr_scan_add_match_to_list(
+ YR_MATCH* match,
+ YR_MATCHES* matches_list)
+{
+ YR_MATCH* insertion_point;
+
+ insertion_point = matches_list->tail;
+
+ while (insertion_point != NULL)
+ {
+ if (match->offset == insertion_point->offset)
+ {
+ insertion_point->length = match->length;
+ return;
+ }
+
+ if (match->offset > insertion_point->offset)
+ break;
+
+ insertion_point = insertion_point->prev;
+ }
+
+ match->prev = insertion_point;
+
+ if (insertion_point != NULL)
+ {
+ match->next = insertion_point->next;
+ insertion_point->next = match;
+ }
+ else
+ {
+ match->next = matches_list->head;
+ matches_list->head = match;
+ }
+
+ if (match->next != NULL)
+ match->next->prev = match;
+ else
+ matches_list->tail = match;
+}
+
+
+void _yr_scan_remove_match_from_list(
+ YR_MATCH* match,
+ YR_MATCHES* matches_list)
+{
+ if (match->prev != NULL)
+ match->prev->next = match->next;
+
+ if (match->next != NULL)
+ match->next->prev = match->prev;
+
+ if (matches_list->head == match)
+ matches_list->head = match->next;
+
+ if (matches_list->tail == match)
+ matches_list->tail = match->prev;
+}
+
+void _yr_scan_handle_chained_matches(
+ YR_ARENA* matches_arena,
+ YR_STRING* matching_string,
uint8_t* match_data,
- int match_length,
- int flags,
- void* args)
+ size_t match_offset,
+ int32_t match_length,
+ int tidx)
{
- YR_MATCH* new_match;
+ YR_STRING* string;
YR_MATCH* match;
- YR_MATCHES* matches;
+ YR_MATCH* next_match;
+ YR_MATCH* new_match;
+
+ size_t lower_offset;
+ size_t ending_offset;
+ int32_t full_chain_length;
+
+ int add_match = FALSE;
+
+ if (matching_string->chained_to == NULL)
+ {
+ add_match = TRUE;
+ }
+ else
+ {
+ if (matching_string->unconfirmed_matches[tidx].head != NULL)
+ lower_offset = matching_string->unconfirmed_matches[tidx].head->offset;
+ else
+ lower_offset = match_offset;
+
+ match = matching_string->chained_to->unconfirmed_matches[tidx].head;
+ while (match != NULL)
+ {
+ next_match = match->next;
+ ending_offset = match->offset + match->length;
+
+ if (ending_offset + matching_string->chain_gap_max < lower_offset)
+ {
+ _yr_scan_remove_match_from_list(
+ match, &matching_string->chained_to->unconfirmed_matches[tidx]);
+ }
+ else
+ {
+ if (ending_offset + matching_string->chain_gap_max >= match_offset &&
+ ending_offset + matching_string->chain_gap_min <= match_offset)
+ {
+ _yr_scan_update_match_chain_length(
+ tidx, matching_string->chained_to, match, 1);
+
+ add_match = TRUE;
+ }
+ }
+
+ match = next_match;
+ }
+ }
+
+ if (add_match)
+ {
+ if (STRING_IS_CHAIN_TAIL(matching_string))
+ {
+ full_chain_length = 0;
+ string = matching_string;
+
+ while(string->chained_to != NULL)
+ {
+ full_chain_length++;
+ string = string->chained_to;
+ }
+
+ // "string" points now to the head of the strings chain
+
+ match = string->unconfirmed_matches[tidx].head;
+
+ while (match != NULL)
+ {
+ next_match = match->next;
+
+ if (match->chain_length == full_chain_length)
+ {
+ _yr_scan_remove_match_from_list(
+ match, &string->unconfirmed_matches[tidx]);
+
+ match->length = match_offset - match->offset + match_length;
+ match->data = match_data - match_offset + match->offset;
+
+ _yr_scan_add_match_to_list(
+ match, &string->matches[tidx]);
+ }
+
+ match = next_match;
+ }
+ }
+ else
+ {
+ yr_arena_allocate_memory(
+ matches_arena,
+ sizeof(YR_MATCH),
+ (void**) &new_match);
+
+ new_match->offset = match_offset;
+ new_match->length = match_length;
+ new_match->data = match_data;
+
+ _yr_scan_add_match_to_list(
+ new_match,
+ &matching_string->unconfirmed_matches[tidx]);
+ }
+ }
+}
+
+
+void _yr_scan_match_callback(
+ uint8_t* match_data,
+ int32_t match_length,
+ int flags,
+ void* args)
+{
CALLBACK_ARGS* callback_args = args;
+
YR_STRING* string = callback_args->string;
+ YR_MATCH* new_match;
int character_size;
int tidx = callback_args->tidx;
@@ -384,6 +586,7 @@ void _yr_rules_match_callback(
match_length -= character_size;
// total match length is the sum of backward and forward matches.
+
match_length = match_length + callback_args->forward_matches;
if (callback_args->full_word)
@@ -412,60 +615,31 @@ void _yr_rules_match_callback(
}
}
- if (STRING_IS_CHAIN_TAIL(string))
- {
- _yr_scan_confirm_matches(tidx, string, match_offset, match_length);
- return;
- }
-
if (STRING_IS_CHAIN_PART(string))
- matches = &string->unconfirmed_matches[tidx];
- else
- matches = &string->matches[tidx];
-
- match = matches->tail;
-
- while (match != NULL)
- {
- if (match_length == match->length)
- {
- if (match_offset == match->offset)
- return;
- }
-
- if (match_offset > match->offset)
- break;
-
- match = match->prev;
- }
-
- yr_arena_allocate_memory(
- callback_args->matches_arena,
- sizeof(YR_MATCH),
- (void**) &new_match);
-
- new_match->offset = match_offset;
- new_match->length = match_length;
- new_match->data = match_data;
-
- if (match != NULL)
{
- new_match->next = match->next;
- new_match->prev = match;
- match->next = new_match;
+ _yr_scan_handle_chained_matches(
+ callback_args->matches_arena,
+ string,
+ match_data,
+ match_offset,
+ match_length,
+ tidx);
}
else
{
- new_match->next = matches->head;
- matches->head = new_match;
+ yr_arena_allocate_memory(
+ callback_args->matches_arena,
+ sizeof(YR_MATCH),
+ (void**) &new_match);
+
+ new_match->offset = match_offset;
+ new_match->length = match_length;
+ new_match->data = match_data;
+
+ _yr_scan_add_match_to_list(
+ new_match,
+ &string->matches[tidx]);
}
-
- if (new_match->next != NULL)
- new_match->next->prev = new_match;
- else
- matches->tail = new_match;
-
- new_match->prev = match;
}
@@ -547,12 +721,12 @@ int _yr_scan_verify_re_match(
data + offset,
offset + 1,
flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
- _yr_rules_match_callback,
+ _yr_scan_match_callback,
(void*) &callback_args);
}
else
{
- _yr_rules_match_callback(
+ _yr_scan_match_callback(
data + offset, 0, flags, &callback_args);
}
@@ -661,7 +835,7 @@ int _yr_scan_verify_literal_match(
callback_args.full_word = STRING_IS_FULL_WORD(string);
callback_args.tidx = yr_get_tidx();
- _yr_rules_match_callback(
+ _yr_scan_match_callback(
data + offset, 0, flags, &callback_args);
}
diff --git a/libyara/yara.h b/libyara/yara.h
index aff92f0..6c01e7b 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -95,12 +95,12 @@ typedef pthread_mutex_t mutex_t;
#define CALLBACK_ABORT 1
#define CALLBACK_ERROR 2
-
#define MAX_ATOM_LENGTH 4
#define LOOP_LOCAL_VARS 4
#define MAX_LOOP_NESTING 4
#define MAX_INCLUDE_DEPTH 16
#define MAX_THREADS 32
+#define STRING_CHAINING_THRESHOLD 256
#define LEX_BUF_SIZE 1024
@@ -302,9 +302,13 @@ typedef struct _YR_ARENA
typedef struct _YR_MATCH
{
- uint8_t* data;
- uint32_t length;
int64_t offset;
+ int32_t length;
+
+ union {
+ uint8_t* data; // Confirmed matches use "data",
+ int32_t chain_length; // unconfirmed ones use "chain_length"
+ };
struct _YR_MATCH* prev;
struct _YR_MATCH* next;
@@ -348,6 +352,9 @@ typedef struct _YR_STRING
DECLARE_REFERENCE(uint8_t*, string);
DECLARE_REFERENCE(struct _YR_STRING*, chained_to);
+ int32_t chain_gap_min;
+ int32_t chain_gap_max;
+
YR_MATCHES matches[MAX_THREADS];
YR_MATCHES unconfirmed_matches[MAX_THREADS];
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list