[Forensics-changes] [yara] 16/368: Reject hex strings starting or ending with jumps.
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:30:06 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.5.0
in repository yara.
commit 30fbe81458512c83912131254f5012140c565c6d
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Thu Jul 2 13:29:54 2015 +0200
Reject hex strings starting or ending with jumps.
---
libyara/hex_grammar.c | 313 +++++++++++++++++++++++++++++++++-----------------
libyara/hex_grammar.y | 142 ++++++++++++++++++-----
yara-python/tests.py | 25 +++-
3 files changed, 341 insertions(+), 139 deletions(-)
diff --git a/libyara/hex_grammar.c b/libyara/hex_grammar.c
index 86f48cb..ae10645 100644
--- a/libyara/hex_grammar.c
+++ b/libyara/hex_grammar.c
@@ -377,18 +377,18 @@ union yyalloc
#endif
/* YYFINAL -- State number of the termination state. */
-#define YYFINAL 10
+#define YYFINAL 9
/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 25
+#define YYLAST 30
/* YYNTOKENS -- Number of terminals. */
#define YYNTOKENS 14
/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 8
+#define YYNNTS 10
/* YYNRULES -- Number of rules. */
-#define YYNRULES 16
+#define YYNRULES 20
/* YYNRULES -- Number of states. */
-#define YYNSTATES 25
+#define YYNSTATES 32
/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
#define YYUNDEFTOK 2
@@ -434,25 +434,29 @@ static const yytype_uint8 yytranslate[] =
YYRHS. */
static const yytype_uint8 yyprhs[] =
{
- 0, 0, 3, 7, 9, 12, 14, 15, 20, 24,
- 26, 30, 33, 35, 37, 41, 43
+ 0, 0, 3, 7, 9, 12, 16, 18, 21, 23,
+ 25, 27, 28, 33, 37, 43, 48, 52, 54, 58,
+ 60
};
/* YYRHS -- A `-1'-separated list of the rules' RHS. */
static const yytype_int8 yyrhs[] =
{
- 15, 0, -1, 6, 16, 7, -1, 17, -1, 16,
- 17, -1, 21, -1, -1, 8, 18, 20, 9, -1,
- 10, 19, 11, -1, 5, -1, 5, 12, 5, -1,
- 5, 12, -1, 12, -1, 16, -1, 20, 13, 16,
- -1, 3, -1, 4, -1
+ 15, 0, -1, 6, 16, 7, -1, 19, -1, 19,
+ 19, -1, 19, 17, 19, -1, 18, -1, 17, 18,
+ -1, 19, -1, 21, -1, 23, -1, -1, 8, 20,
+ 22, 9, -1, 10, 5, 11, -1, 10, 5, 12,
+ 5, 11, -1, 10, 5, 12, 11, -1, 10, 12,
+ 11, -1, 16, -1, 22, 13, 16, -1, 3, -1,
+ 4, -1
};
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 84, 84, 93, 97, 122, 127, 126, 135, 144,
- 173, 211, 239, 265, 269, 283, 291
+ 0, 91, 91, 100, 104, 113, 172, 176, 189, 193,
+ 202, 216, 215, 228, 257, 295, 323, 349, 353, 367,
+ 375
};
#endif
@@ -463,7 +467,8 @@ static const char *const yytname[] =
{
"$end", "error", "$undefined", "_BYTE_", "_MASKED_BYTE_", "_NUMBER_",
"'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'|'", "$accept",
- "hex_string", "tokens", "token", "@1", "range", "alternatives", "byte", 0
+ "hex_string", "tokens", "token_sequence", "token_or_range", "token",
+ "@1", "range", "alternatives", "byte", 0
};
#endif
@@ -480,15 +485,17 @@ static const yytype_uint16 yytoknum[] =
/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
static const yytype_uint8 yyr1[] =
{
- 0, 14, 15, 16, 16, 17, 18, 17, 17, 19,
- 19, 19, 19, 20, 20, 21, 21
+ 0, 14, 15, 16, 16, 16, 17, 17, 18, 18,
+ 19, 20, 19, 21, 21, 21, 21, 22, 22, 23,
+ 23
};
/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
static const yytype_uint8 yyr2[] =
{
- 0, 2, 3, 1, 2, 1, 0, 4, 3, 1,
- 3, 2, 1, 1, 3, 1, 1
+ 0, 2, 3, 1, 2, 3, 1, 2, 1, 1,
+ 1, 0, 4, 3, 5, 4, 3, 1, 3, 1,
+ 1
};
/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -496,15 +503,16 @@ static const yytype_uint8 yyr2[] =
means the default is an error. */
static const yytype_uint8 yydefact[] =
{
- 0, 0, 0, 15, 16, 6, 0, 0, 3, 5,
- 1, 0, 9, 12, 0, 2, 4, 13, 0, 11,
- 8, 7, 0, 10, 14
+ 0, 0, 0, 19, 20, 11, 0, 3, 10, 1,
+ 0, 2, 0, 0, 6, 8, 9, 17, 0, 0,
+ 0, 7, 8, 12, 0, 13, 0, 16, 18, 0,
+ 15, 14
};
/* YYDEFGOTO[NTERM-NUM]. */
static const yytype_int8 yydefgoto[] =
{
- -1, 2, 7, 8, 11, 14, 18, 9
+ -1, 2, 6, 13, 14, 7, 10, 16, 18, 8
};
/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
@@ -512,43 +520,47 @@ static const yytype_int8 yydefgoto[] =
#define YYPACT_NINF -11
static const yytype_int8 yypact[] =
{
- -2, 10, 5, -11, -11, -11, 3, -1, -11, -11,
- -11, 10, 4, -11, 0, -11, -11, 10, 12, 14,
- -11, -11, 10, -11, 10
+ 20, 14, 27, -11, -11, -11, 21, -2, -11, -11,
+ 14, -11, -1, -2, -11, -4, -11, -11, 10, 13,
+ 9, -11, 3, -11, 14, -11, 2, -11, -11, 18,
+ -11, -11
};
/* YYPGOTO[NTERM-NUM]. */
static const yytype_int8 yypgoto[] =
{
- -11, -11, -10, -7, -11, -11, -11, -11
+ -11, -11, -10, -11, 17, 8, -11, -11, -11, -11
};
/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
positive, shift that token. If negative, reduce the rule which
number is the opposite. If zero, do what YYDEFACT says.
If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -1
-static const yytype_uint8 yytable[] =
+#define YYTABLE_NINF -6
+static const yytype_int8 yytable[] =
{
- 16, 17, 3, 4, 1, 10, 15, 5, 12, 6,
- 16, 20, 24, 3, 4, 13, 19, 16, 5, 23,
- 6, 21, 0, 0, 0, 22
+ 17, 3, 4, -4, 19, -4, 5, 29, 12, -4,
+ -5, 20, -5, 30, 28, 15, -5, 3, 4, 23,
+ 27, 22, 5, 24, 25, 26, 1, 9, 11, 31,
+ 21
};
-static const yytype_int8 yycheck[] =
+static const yytype_uint8 yycheck[] =
{
- 7, 11, 3, 4, 6, 0, 7, 8, 5, 10,
- 17, 11, 22, 3, 4, 12, 12, 24, 8, 5,
- 10, 9, -1, -1, -1, 13
+ 10, 3, 4, 7, 5, 9, 8, 5, 10, 13,
+ 7, 12, 9, 11, 24, 7, 13, 3, 4, 9,
+ 11, 13, 8, 13, 11, 12, 6, 0, 7, 11,
+ 13
};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
symbol of state STATE-NUM. */
static const yytype_uint8 yystos[] =
{
- 0, 6, 15, 3, 4, 8, 10, 16, 17, 21,
- 0, 18, 5, 12, 19, 7, 17, 16, 20, 12,
- 11, 9, 13, 5, 16
+ 0, 6, 15, 3, 4, 8, 16, 19, 23, 0,
+ 20, 7, 10, 17, 18, 19, 21, 16, 22, 5,
+ 12, 18, 19, 9, 13, 11, 12, 11, 16, 5,
+ 11, 11
};
#define yyerrok (yyerrstatus = 0)
@@ -1069,29 +1081,39 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
switch (yytype)
{
case 16: /* "tokens" */
-#line 75 "hex_grammar.y"
+#line 80 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1075 "hex_grammar.c"
+#line 1087 "hex_grammar.c"
break;
- case 17: /* "token" */
-#line 76 "hex_grammar.y"
+ case 17: /* "token_sequence" */
+#line 81 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1080 "hex_grammar.c"
+#line 1092 "hex_grammar.c"
break;
- case 19: /* "range" */
-#line 79 "hex_grammar.y"
+ case 18: /* "token_or_range" */
+#line 82 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1085 "hex_grammar.c"
+#line 1097 "hex_grammar.c"
break;
- case 20: /* "alternatives" */
-#line 78 "hex_grammar.y"
+ case 19: /* "token" */
+#line 83 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1090 "hex_grammar.c"
+#line 1102 "hex_grammar.c"
break;
- case 21: /* "byte" */
-#line 77 "hex_grammar.y"
+ case 21: /* "range" */
+#line 86 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1095 "hex_grammar.c"
+#line 1107 "hex_grammar.c"
+ break;
+ case 22: /* "alternatives" */
+#line 85 "hex_grammar.y"
+ { yr_re_node_destroy((yyvaluep->re_node)); };
+#line 1112 "hex_grammar.c"
+ break;
+ case 23: /* "byte" */
+#line 84 "hex_grammar.y"
+ { yr_re_node_destroy((yyvaluep->re_node)); };
+#line 1117 "hex_grammar.c"
break;
default:
@@ -1401,7 +1423,7 @@ yyreduce:
switch (yyn)
{
case 2:
-#line 85 "hex_grammar.y"
+#line 92 "hex_grammar.y"
{
RE* re = yyget_extra(yyscanner);
re->root_node = (yyvsp[(2) - (3)].re_node);
@@ -1409,27 +1431,92 @@ yyreduce:
break;
case 3:
-#line 94 "hex_grammar.y"
+#line 101 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 4:
-#line 98 "hex_grammar.y"
+#line 105 "hex_grammar.y"
{
- lex_env->token_count++;
+ (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
+
+ DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
+ DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(2) - (2)].re_node));
+
+ ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+ }
+ break;
- if (lex_env->token_count >= MAX_HEX_STRING_TOKENS)
+ case 5:
+#line 114 "hex_grammar.y"
+ {
+ (yyval.re_node) = NULL;
+
+ // Some portions of the code (i.e: yr_re_split_at_chaining_point)
+ // expect a left-unbalanced tree where the right child of a concat node
+ // can't be another concat node. A concat node must be always the left
+ // child of its parent if the parent is also a concat. For this reason
+ // the can't simply create two new concat nodes arranged like this:
+ //
+ // concat
+ // / \
+ // / \
+ // token's \
+ // subtree concat
+ // / \
+ // / \
+ // / \
+ // token_sequence's token's
+ // subtree subtree
+ //
+ // Instead we must insert the subtree for the first token as the
+ // leftmost node of the token_sequence subtree.
+
+ RE_NODE* leftmost_concat = NULL;
+ RE_NODE* leftmost_node = (yyvsp[(2) - (3)].re_node);
+
+ while (leftmost_node->type == RE_NODE_CONCAT)
{
- yr_re_node_destroy((yyvsp[(1) - (2)].re_node));
- yr_re_node_destroy((yyvsp[(2) - (2)].re_node));
+ leftmost_concat = leftmost_node;
+ leftmost_node = leftmost_node->left;
+ }
- yyerror(yyscanner, lex_env, "string too long");
+ RE_NODE* new_concat = yr_re_node_create(
+ RE_NODE_CONCAT, (yyvsp[(1) - (3)].re_node), leftmost_node);
- YYABORT;
+ if (new_concat != NULL)
+ {
+ if (leftmost_concat != NULL)
+ {
+ leftmost_concat->left = new_concat;
+ (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(2) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
+ }
+ else
+ {
+ (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, new_concat, (yyvsp[(3) - (3)].re_node));
+ }
}
+ DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
+ DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(2) - (3)].re_node));
+ DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(3) - (3)].re_node));
+
+ ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+ }
+ break;
+
+ case 6:
+#line 173 "hex_grammar.y"
+ {
+ (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
+ }
+ break;
+
+ case 7:
+#line 177 "hex_grammar.y"
+ {
(yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1439,48 +1526,64 @@ yyreduce:
}
break;
- case 5:
-#line 123 "hex_grammar.y"
+ case 8:
+#line 190 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
- case 6:
-#line 127 "hex_grammar.y"
+ case 9:
+#line 194 "hex_grammar.y"
{
- lex_env->inside_or++;
+ (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
+ (yyval.re_node)->greedy = FALSE;
}
break;
- case 7:
-#line 131 "hex_grammar.y"
+ case 10:
+#line 203 "hex_grammar.y"
{
- (yyval.re_node) = (yyvsp[(3) - (4)].re_node);
- lex_env->inside_or--;
+ lex_env->token_count++;
+
+ if (lex_env->token_count > MAX_HEX_STRING_TOKENS)
+ {
+ yr_re_node_destroy((yyvsp[(1) - (1)].re_node));
+ yyerror(yyscanner, lex_env, "string too long");
+ YYABORT;
+ }
+
+ (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
- case 8:
-#line 136 "hex_grammar.y"
+ case 11:
+#line 216 "hex_grammar.y"
{
- (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
- (yyval.re_node)->greedy = FALSE;
+ lex_env->inside_or++;
}
break;
- case 9:
-#line 145 "hex_grammar.y"
+ case 12:
+#line 220 "hex_grammar.y"
+ {
+ (yyval.re_node) = (yyvsp[(3) - (4)].re_node);
+ lex_env->inside_or--;
+ }
+ break;
+
+ case 13:
+#line 229 "hex_grammar.y"
{
RE_NODE* re_any;
- if ((yyvsp[(1) - (1)].integer) < 0)
+ if ((yyvsp[(2) - (3)].integer) < 0)
{
yyerror(yyscanner, lex_env, "invalid negative jump length");
YYABORT;
}
- if (lex_env->inside_or && (yyvsp[(1) - (1)].integer) > STRING_CHAINING_THRESHOLD)
+ if (lex_env->inside_or && (yyvsp[(2) - (3)].integer) > STRING_CHAINING_THRESHOLD)
{
yyerror(yyscanner, lex_env, "jumps over "
STR(STRING_CHAINING_THRESHOLD)
@@ -1496,19 +1599,19 @@ yyreduce:
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
- (yyval.re_node)->start = (yyvsp[(1) - (1)].integer);
- (yyval.re_node)->end = (yyvsp[(1) - (1)].integer);
+ (yyval.re_node)->start = (yyvsp[(2) - (3)].integer);
+ (yyval.re_node)->end = (yyvsp[(2) - (3)].integer);
}
break;
- case 10:
-#line 174 "hex_grammar.y"
+ case 14:
+#line 258 "hex_grammar.y"
{
RE_NODE* re_any;
if (lex_env->inside_or &&
- ((yyvsp[(1) - (3)].integer) > STRING_CHAINING_THRESHOLD ||
- (yyvsp[(3) - (3)].integer) > STRING_CHAINING_THRESHOLD) )
+ ((yyvsp[(2) - (5)].integer) > STRING_CHAINING_THRESHOLD ||
+ (yyvsp[(4) - (5)].integer) > STRING_CHAINING_THRESHOLD) )
{
yyerror(yyscanner, lex_env, "jumps over "
STR(STRING_CHAINING_THRESHOLD)
@@ -1517,13 +1620,13 @@ yyreduce:
YYABORT;
}
- if ((yyvsp[(1) - (3)].integer) < 0 || (yyvsp[(3) - (3)].integer) < 0)
+ if ((yyvsp[(2) - (5)].integer) < 0 || (yyvsp[(4) - (5)].integer) < 0)
{
yyerror(yyscanner, lex_env, "invalid negative jump length");
YYABORT;
}
- if ((yyvsp[(1) - (3)].integer) > (yyvsp[(3) - (3)].integer))
+ if ((yyvsp[(2) - (5)].integer) > (yyvsp[(4) - (5)].integer))
{
yyerror(yyscanner, lex_env, "invalid jump range");
YYABORT;
@@ -1537,13 +1640,13 @@ yyreduce:
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
- (yyval.re_node)->start = (yyvsp[(1) - (3)].integer);
- (yyval.re_node)->end = (yyvsp[(3) - (3)].integer);
+ (yyval.re_node)->start = (yyvsp[(2) - (5)].integer);
+ (yyval.re_node)->end = (yyvsp[(4) - (5)].integer);
}
break;
- case 11:
-#line 212 "hex_grammar.y"
+ case 15:
+#line 296 "hex_grammar.y"
{
RE_NODE* re_any;
@@ -1554,7 +1657,7 @@ yyreduce:
YYABORT;
}
- if ((yyvsp[(1) - (2)].integer) < 0)
+ if ((yyvsp[(2) - (4)].integer) < 0)
{
yyerror(yyscanner, lex_env, "invalid negative jump length");
YYABORT;
@@ -1568,13 +1671,13 @@ yyreduce:
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
- (yyval.re_node)->start = (yyvsp[(1) - (2)].integer);
+ (yyval.re_node)->start = (yyvsp[(2) - (4)].integer);
(yyval.re_node)->end = INT_MAX;
}
break;
- case 12:
-#line 240 "hex_grammar.y"
+ case 16:
+#line 324 "hex_grammar.y"
{
RE_NODE* re_any;
@@ -1598,15 +1701,15 @@ yyreduce:
}
break;
- case 13:
-#line 266 "hex_grammar.y"
+ case 17:
+#line 350 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
- case 14:
-#line 270 "hex_grammar.y"
+ case 18:
+#line 354 "hex_grammar.y"
{
mark_as_not_fast_hex_regexp();
@@ -1619,8 +1722,8 @@ yyreduce:
}
break;
- case 15:
-#line 284 "hex_grammar.y"
+ case 19:
+#line 368 "hex_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
@@ -1630,8 +1733,8 @@ yyreduce:
}
break;
- case 16:
-#line 292 "hex_grammar.y"
+ case 20:
+#line 376 "hex_grammar.y"
{
uint8_t mask = (yyvsp[(1) - (1)].integer) >> 8;
@@ -1655,7 +1758,7 @@ yyreduce:
/* Line 1267 of yacc.c. */
-#line 1659 "hex_grammar.c"
+#line 1762 "hex_grammar.c"
default: break;
}
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1869,6 +1972,6 @@ yyreturn:
}
-#line 313 "hex_grammar.y"
+#line 397 "hex_grammar.y"
diff --git a/libyara/hex_grammar.y b/libyara/hex_grammar.y
index 23d41f8..b740e36 100644
--- a/libyara/hex_grammar.y
+++ b/libyara/hex_grammar.y
@@ -70,9 +70,16 @@ limitations under the License.
%token <integer> _MASKED_BYTE_
%token <integer> _NUMBER_
-%type <re_node> tokens token byte alternatives range
+%type <re_node> tokens
+%type <re_node> token_sequence
+%type <re_node> token_or_range
+%type <re_node> token byte
+%type <re_node> alternatives
+%type <re_node> range
%destructor { yr_re_node_destroy($$); } tokens
+%destructor { yr_re_node_destroy($$); } token_sequence
+%destructor { yr_re_node_destroy($$); } token_or_range
%destructor { yr_re_node_destroy($$); } token
%destructor { yr_re_node_destroy($$); } byte
%destructor { yr_re_node_destroy($$); } alternatives
@@ -94,20 +101,80 @@ tokens
{
$$ = $1;
}
- | tokens token
+ | token token
{
- lex_env->token_count++;
+ $$ = yr_re_node_create(RE_NODE_CONCAT, $1, $2);
+
+ DESTROY_NODE_IF($$ == NULL, $1);
+ DESTROY_NODE_IF($$ == NULL, $2);
- if (lex_env->token_count >= MAX_HEX_STRING_TOKENS)
+ ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+ }
+ | token token_sequence token
+ {
+ $$ = NULL;
+
+ // Some portions of the code (i.e: yr_re_split_at_chaining_point)
+ // expect a left-unbalanced tree where the right child of a concat node
+ // can't be another concat node. A concat node must be always the left
+ // child of its parent if the parent is also a concat. For this reason
+ // the can't simply create two new concat nodes arranged like this:
+ //
+ // concat
+ // / \
+ // / \
+ // token's \
+ // subtree concat
+ // / \
+ // / \
+ // / \
+ // token_sequence's token's
+ // subtree subtree
+ //
+ // Instead we must insert the subtree for the first token as the
+ // leftmost node of the token_sequence subtree.
+
+ RE_NODE* leftmost_concat = NULL;
+ RE_NODE* leftmost_node = $2;
+
+ while (leftmost_node->type == RE_NODE_CONCAT)
{
- yr_re_node_destroy($1);
- yr_re_node_destroy($2);
+ leftmost_concat = leftmost_node;
+ leftmost_node = leftmost_node->left;
+ }
- yyerror(yyscanner, lex_env, "string too long");
+ RE_NODE* new_concat = yr_re_node_create(
+ RE_NODE_CONCAT, $1, leftmost_node);
- YYABORT;
+ if (new_concat != NULL)
+ {
+ if (leftmost_concat != NULL)
+ {
+ leftmost_concat->left = new_concat;
+ $$ = yr_re_node_create(RE_NODE_CONCAT, $2, $3);
+ }
+ else
+ {
+ $$ = yr_re_node_create(RE_NODE_CONCAT, new_concat, $3);
+ }
}
+ DESTROY_NODE_IF($$ == NULL, $1);
+ DESTROY_NODE_IF($$ == NULL, $2);
+ DESTROY_NODE_IF($$ == NULL, $3);
+
+ ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+ }
+ ;
+
+
+token_sequence
+ : token_or_range
+ {
+ $$ = $1;
+ }
+ | token_sequence token_or_range
+ {
$$ = yr_re_node_create(RE_NODE_CONCAT, $1, $2);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -118,9 +185,31 @@ tokens
;
+token_or_range
+ : token
+ {
+ $$ = $1;
+ }
+ | range
+ {
+ $$ = $1;
+ $$->greedy = FALSE;
+ }
+ ;
+
+
token
: byte
{
+ lex_env->token_count++;
+
+ if (lex_env->token_count > MAX_HEX_STRING_TOKENS)
+ {
+ yr_re_node_destroy($1);
+ yyerror(yyscanner, lex_env, "string too long");
+ YYABORT;
+ }
+
$$ = $1;
}
| '('
@@ -132,26 +221,21 @@ token
$$ = $3;
lex_env->inside_or--;
}
- | '[' range ']'
- {
- $$ = $2;
- $$->greedy = FALSE;
- }
;
range
- : _NUMBER_
+ : '[' _NUMBER_ ']'
{
RE_NODE* re_any;
- if ($1 < 0)
+ if ($2 < 0)
{
yyerror(yyscanner, lex_env, "invalid negative jump length");
YYABORT;
}
- if (lex_env->inside_or && $1 > STRING_CHAINING_THRESHOLD)
+ if (lex_env->inside_or && $2 > STRING_CHAINING_THRESHOLD)
{
yyerror(yyscanner, lex_env, "jumps over "
STR(STRING_CHAINING_THRESHOLD)
@@ -167,16 +251,16 @@ range
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
- $$->start = $1;
- $$->end = $1;
+ $$->start = $2;
+ $$->end = $2;
}
- | _NUMBER_ '-' _NUMBER_
+ | '[' _NUMBER_ '-' _NUMBER_ ']'
{
RE_NODE* re_any;
if (lex_env->inside_or &&
- ($1 > STRING_CHAINING_THRESHOLD ||
- $3 > STRING_CHAINING_THRESHOLD) )
+ ($2 > STRING_CHAINING_THRESHOLD ||
+ $4 > STRING_CHAINING_THRESHOLD) )
{
yyerror(yyscanner, lex_env, "jumps over "
STR(STRING_CHAINING_THRESHOLD)
@@ -185,13 +269,13 @@ range
YYABORT;
}
- if ($1 < 0 || $3 < 0)
+ if ($2 < 0 || $4 < 0)
{
yyerror(yyscanner, lex_env, "invalid negative jump length");
YYABORT;
}
- if ($1 > $3)
+ if ($2 > $4)
{
yyerror(yyscanner, lex_env, "invalid jump range");
YYABORT;
@@ -205,10 +289,10 @@ range
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
- $$->start = $1;
- $$->end = $3;
+ $$->start = $2;
+ $$->end = $4;
}
- | _NUMBER_ '-'
+ | '[' _NUMBER_ '-' ']'
{
RE_NODE* re_any;
@@ -219,7 +303,7 @@ range
YYABORT;
}
- if ($1 < 0)
+ if ($2 < 0)
{
yyerror(yyscanner, lex_env, "invalid negative jump length");
YYABORT;
@@ -233,10 +317,10 @@ range
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
- $$->start = $1;
+ $$->start = $2;
$$->end = INT_MAX;
}
- | '-'
+ | '[' '-' ']'
{
RE_NODE* re_any;
diff --git a/yara-python/tests.py b/yara-python/tests.py
index ef32836..3769379 100644
--- a/yara-python/tests.py
+++ b/yara-python/tests.py
@@ -268,14 +268,19 @@ class TestYara(unittest.TestCase):
def assertTrueRules(self, rules, data='dummy'):
for r in rules:
- r = yara.compile(source=r)
- self.assertTrue(r.match(data=data))
+ r = yara.compile(source=r)
+ self.assertTrue(r.match(data=data))
def assertFalseRules(self, rules, data='dummy'):
for r in rules:
- r = yara.compile(source=r)
- self.assertFalse(r.match(data=data))
+ r = yara.compile(source=r)
+ self.assertFalse(r.match(data=data))
+
+ def assertSyntaxError(self, rules):
+
+ for r in rules:
+ self.assertRaises(yara.SyntaxError, yara.compile, source=r)
def runReTest(self, test):
@@ -487,10 +492,12 @@ class TestYara(unittest.TestCase):
self.assertTrueRules([
'rule test { strings: $a = { 64 01 00 00 60 01 } condition: $a }',
'rule test { strings: $a = { 64 0? 00 00 ?0 01 } condition: $a }',
+ 'rule test { strings: $a = { 6? 01 00 00 60 0? } condition: $a }',
'rule test { strings: $a = { 64 01 [1-3] 60 01 } condition: $a }',
'rule test { strings: $a = { 64 01 [1-3] (60|61) 01 } condition: $a }',
'rule test { strings: $a = { 4D 5A [-] 6A 2A [-] 58 C3} condition: $a }',
- 'rule test { strings: $a = { 4D 5A [300-] 6A 2A [-] 58 C3} condition: $a }'
+ 'rule test { strings: $a = { 4D 5A [300-] 6A 2A [-] 58 C3} condition: $a }',
+ 'rule test { strings: $a = { 2e 7? (65 | ??) 78 } condition: $a }'
], PE32_FILE)
self.assertFalseRules([
@@ -516,6 +523,14 @@ class TestYara(unittest.TestCase):
'rule test { strings: $a = { 31 32 [0-3] 37 38 } condition: $a }',
], '123456789')
+ self.assertSyntaxError([
+ 'rule test { strings: $a = { [-] 01 02 } condition: $a }',
+ 'rule test { strings: $a = { 01 02 [-] } condition: $a }',
+ 'rule test { strings: $a = { 01 02 ([-] 03 | 04) } condition: $a }',
+ 'rule test { strings: $a = { 01 02 (03 [-] | 04) } condition: $a }',
+ 'rule test { strings: $a = { 01 02 (03 | 04 [-]) } condition: $a }'
+ ])
+
rules = yara.compile(source='rule test { strings: $a = { 61 [0-3] (62|63) } condition: $a }')
matches = rules.match(data='abbb')
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list