[Forensics-changes] [yara] 369/407: Implement \b and \B anchors in regexps
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:28:46 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.3.0
in repository yara.
commit 9198ce67b38c93b6619d9be7baf1fdc74bb93842
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Tue Feb 3 10:27:27 2015 +0100
Implement \b and \B anchors in regexps
---
libyara/atoms.c | 2 +
libyara/include/yara/arena.h | 2 +-
libyara/include/yara/re.h | 27 +++--
libyara/re.c | 58 ++++++++--
libyara/re_grammar.c | 254 ++++++++++++++++++++++++-------------------
libyara/re_grammar.h | 8 +-
libyara/re_grammar.y | 14 +++
libyara/re_lexer.c | 208 +++++++++++++++++++----------------
libyara/re_lexer.l | 9 ++
yara-python/tests.py | 20 ++++
10 files changed, 371 insertions(+), 231 deletions(-)
diff --git a/libyara/atoms.c b/libyara/atoms.c
index 60ea9ae..b6e5357 100644
--- a/libyara/atoms.c
+++ b/libyara/atoms.c
@@ -773,6 +773,8 @@ ATOM_TREE_NODE* _yr_atoms_extract_from_re_node(
case RE_NODE_EMPTY:
case RE_NODE_ANCHOR_START:
case RE_NODE_ANCHOR_END:
+ case RE_NODE_WORD_BOUNDARY:
+ case RE_NODE_NON_WORD_BOUNDARY:
append_current_leaf_to_node(current_node);
return current_node;
diff --git a/libyara/include/yara/arena.h b/libyara/include/yara/arena.h
index ad0d582..a6ede1e 100644
--- a/libyara/include/yara/arena.h
+++ b/libyara/include/yara/arena.h
@@ -23,7 +23,7 @@ limitations under the License.
#define ARENA_FLAGS_FIXED_SIZE 1
#define ARENA_FLAGS_COALESCED 2
-#define ARENA_FILE_VERSION 5
+#define ARENA_FILE_VERSION 6
#define EOL ((size_t) -1)
diff --git a/libyara/include/yara/re.h b/libyara/include/yara/re.h
index 7a3f022..5d1e237 100644
--- a/libyara/include/yara/re.h
+++ b/libyara/include/yara/re.h
@@ -17,6 +17,8 @@ limitations under the License.
#ifndef YR_RE_H
#define YR_RE_H
+#include <ctype.h>
+
#include <yara/arena.h>
#include <yara/sizedstr.h>
@@ -38,6 +40,8 @@ limitations under the License.
#define RE_NODE_EMPTY 16
#define RE_NODE_ANCHOR_START 17
#define RE_NODE_ANCHOR_END 18
+#define RE_NODE_WORD_BOUNDARY 19
+#define RE_NODE_NON_WORD_BOUNDARY 20
#define RE_OPCODE_ANY 0xA0
@@ -54,15 +58,18 @@ limitations under the License.
#define RE_OPCODE_DIGIT 0xAB
#define RE_OPCODE_NON_DIGIT 0xAC
#define RE_OPCODE_MATCH 0xAD
-#define RE_OPCODE_MATCH_AT_END 0xAE
-#define RE_OPCODE_MATCH_AT_START 0xAF
-#define RE_OPCODE_SPLIT_A 0xB0
-#define RE_OPCODE_SPLIT_B 0xB1
-#define RE_OPCODE_PUSH 0xB2
-#define RE_OPCODE_POP 0xB3
-#define RE_OPCODE_JNZ 0xB4
-#define RE_OPCODE_JUMP 0xB5
+#define RE_OPCODE_MATCH_AT_END 0xB0
+#define RE_OPCODE_MATCH_AT_START 0xB1
+#define RE_OPCODE_WORD_BOUNDARY 0xB2
+#define RE_OPCODE_NON_WORD_BOUNDARY 0xB3
+
+#define RE_OPCODE_SPLIT_A 0xC0
+#define RE_OPCODE_SPLIT_B 0xC1
+#define RE_OPCODE_PUSH 0xC2
+#define RE_OPCODE_POP 0xC3
+#define RE_OPCODE_JNZ 0xC4
+#define RE_OPCODE_JUMP 0xC5
#define RE_FLAGS_FAST_HEX_REGEXP 0x02
@@ -85,6 +92,10 @@ typedef uint8_t* RE_CODE;
((cls)[(chr) / 8] & 1 << ((chr) % 8))
+#define IS_WORD_CHAR(chr) \
+ (isalnum(chr) || (chr) == '_')
+
+
struct RE_NODE
{
int type;
diff --git a/libyara/re.c b/libyara/re.c
index f903c04..9084e5c 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -26,7 +26,6 @@ order to avoid confusion with operating system threads.
*/
#include <assert.h>
-#include <ctype.h>
#include <string.h>
#include <limits.h>
@@ -692,6 +691,24 @@ int _yr_re_emit(
code_size));
break;
+ case RE_NODE_WORD_BOUNDARY:
+
+ FAIL_ON_ERROR(_yr_emit_inst(
+ arena,
+ RE_OPCODE_WORD_BOUNDARY,
+ &instruction_addr,
+ code_size));
+ break;
+
+ case RE_NODE_NON_WORD_BOUNDARY:
+
+ FAIL_ON_ERROR(_yr_emit_inst(
+ arena,
+ RE_OPCODE_NON_WORD_BOUNDARY,
+ &instruction_addr,
+ code_size));
+ break;
+
case RE_NODE_SPACE:
FAIL_ON_ERROR(_yr_emit_inst(
@@ -1527,6 +1544,7 @@ int yr_re_exec(
int max_count;
int match;
int character_size;
+ int input_incr;
int kill;
int action;
int result = -1;
@@ -1551,14 +1569,18 @@ int yr_re_exec(
character_size = 1;
input = input_data;
+ input_incr = character_size;
if (flags & RE_FLAGS_BACKWARDS)
+ {
input -= character_size;
+ input_incr = -input_incr;
+ }
max_count = min(input_size, RE_SCAN_LIMIT);
- // round down max_count to a multiple of character size, this way if
- // character_size is 2 and the input size is impair we are ignoring the
+ // Round down max_count to a multiple of character_size, this way if
+ // character_size is 2 and input_size is impair we are ignoring the
// extra byte which can't match anyways.
max_count = max_count - max_count % character_size;
@@ -1645,14 +1667,14 @@ int yr_re_exec(
case RE_OPCODE_WORD_CHAR:
prolog;
- match = (isalnum(*input) || *input == '_');
+ match = IS_WORD_CHAR(*input);
action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
case RE_OPCODE_NON_WORD_CHAR:
prolog;
- match = (!isalnum(*input) && *input != '_');
+ match = !IS_WORD_CHAR(*input);
action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
@@ -1711,6 +1733,26 @@ int yr_re_exec(
fiber->ip += 1;
break;
+ case RE_OPCODE_WORD_BOUNDARY:
+ case RE_OPCODE_NON_WORD_BOUNDARY:
+
+ if (count == 0 &&
+ !(flags & RE_FLAGS_NOT_AT_START) &&
+ !(flags & RE_FLAGS_BACKWARDS))
+ match = TRUE;
+ else if (count >= max_count)
+ match = TRUE;
+ else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input))
+ match = TRUE;
+ else
+ match = FALSE;
+
+ if (*ip == RE_OPCODE_NON_WORD_BOUNDARY)
+ match = !match;
+
+ action = match ? ACTION_CONTINUE : ACTION_KILL;
+ break;
+
case RE_OPCODE_MATCH_AT_START:
if (flags & RE_FLAGS_BACKWARDS)
kill = input_size > count;
@@ -1781,11 +1823,7 @@ int yr_re_exec(
if (flags & RE_FLAGS_WIDE && *(input + 1) != 0)
_yr_re_fiber_kill_all(&fibers, &storage->fiber_pool);
- if (flags & RE_FLAGS_BACKWARDS)
- input -= character_size;
- else
- input += character_size;
-
+ input += input_incr;
count += character_size;
if (flags & RE_FLAGS_SCAN && count < max_count)
diff --git a/libyara/re_grammar.c b/libyara/re_grammar.c
index de26a47..95b5541 100644
--- a/libyara/re_grammar.c
+++ b/libyara/re_grammar.c
@@ -83,7 +83,9 @@
_SPACE_ = 264,
_NON_SPACE_ = 265,
_DIGIT_ = 266,
- _NON_DIGIT_ = 267
+ _NON_DIGIT_ = 267,
+ _WORD_BOUNDARY_ = 268,
+ _NON_WORD_BOUNDARY_ = 269
};
#endif
/* Tokens. */
@@ -97,6 +99,8 @@
#define _NON_SPACE_ 265
#define _DIGIT_ 266
#define _NON_DIGIT_ 267
+#define _WORD_BOUNDARY_ 268
+#define _NON_WORD_BOUNDARY_ 269
@@ -160,7 +164,7 @@ typedef union YYSTYPE
uint8_t* class_vector;
}
/* Line 193 of yacc.c. */
-#line 164 "re_grammar.c"
+#line 168 "re_grammar.c"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
@@ -173,7 +177,7 @@ typedef union YYSTYPE
/* Line 216 of yacc.c. */
-#line 177 "re_grammar.c"
+#line 181 "re_grammar.c"
#ifdef short
# undef short
@@ -386,22 +390,22 @@ union yyalloc
#endif
/* YYFINAL -- State number of the termination state. */
-#define YYFINAL 20
+#define YYFINAL 22
/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 40
+#define YYLAST 43
/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 22
+#define YYNTOKENS 24
/* YYNNTS -- Number of nonterminals. */
#define YYNNTS 6
/* YYNRULES -- Number of rules. */
-#define YYNRULES 28
+#define YYNRULES 30
/* YYNRULES -- Number of states. */
-#define YYNSTATES 32
+#define YYNSTATES 34
/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
#define YYUNDEFTOK 2
-#define YYMAXUTOK 267
+#define YYMAXUTOK 269
#define YYTRANSLATE(YYX) \
((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
@@ -412,16 +416,16 @@ static const yytype_uint8 yytranslate[] =
0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 18, 2, 2, 2,
- 19, 20, 14, 16, 2, 2, 21, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 20, 2, 2, 2,
+ 21, 22, 16, 18, 2, 2, 23, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 15, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 17, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 17, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 19, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 13, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 15, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -435,7 +439,7 @@ static const yytype_uint8 yytranslate[] =
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
- 5, 6, 7, 8, 9, 10, 11, 12
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
};
#if YYDEBUG
@@ -445,28 +449,30 @@ static const yytype_uint8 yyprhs[] =
{
0, 0, 3, 5, 7, 9, 13, 16, 18, 21,
24, 28, 31, 35, 38, 42, 45, 47, 49, 51,
- 55, 57, 59, 61, 63, 65, 67, 69, 71
+ 53, 55, 59, 61, 63, 65, 67, 69, 71, 73,
+ 75
};
/* YYRHS -- A `-1'-separated list of the rules' RHS. */
static const yytype_int8 yyrhs[] =
{
- 23, 0, -1, 24, -1, 1, -1, 25, -1, 24,
- 13, 25, -1, 24, 13, -1, 26, -1, 25, 26,
- -1, 27, 14, -1, 27, 14, 15, -1, 27, 16,
- -1, 27, 16, 15, -1, 27, 15, -1, 27, 15,
- 15, -1, 27, 5, -1, 27, -1, 17, -1, 18,
- -1, 19, 24, 20, -1, 21, -1, 3, -1, 7,
- -1, 8, -1, 9, -1, 10, -1, 11, -1, 12,
- -1, 6, -1
+ 25, 0, -1, 26, -1, 1, -1, 27, -1, 26,
+ 15, 27, -1, 26, 15, -1, 28, -1, 27, 28,
+ -1, 29, 16, -1, 29, 16, 17, -1, 29, 18,
+ -1, 29, 18, 17, -1, 29, 17, -1, 29, 17,
+ 17, -1, 29, 5, -1, 29, -1, 13, -1, 14,
+ -1, 19, -1, 20, -1, 21, 26, 22, -1, 23,
+ -1, 3, -1, 7, -1, 8, -1, 9, -1, 10,
+ -1, 11, -1, 12, -1, 6, -1
};
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 86, 86, 91, 94, 98, 107, 122, 126, 136,
- 143, 152, 159, 168, 178, 189, 199, 203, 209, 217,
- 221, 227, 235, 241, 247, 253, 259, 265, 271
+ 0, 88, 88, 93, 96, 100, 109, 124, 128, 138,
+ 145, 154, 161, 170, 180, 191, 201, 205, 211, 217,
+ 223, 231, 235, 241, 249, 255, 261, 267, 273, 279,
+ 285
};
#endif
@@ -477,9 +483,9 @@ static const char *const yytname[] =
{
"$end", "error", "$undefined", "_CHAR_", "_ANY_", "_RANGE_", "_CLASS_",
"_WORD_CHAR_", "_NON_WORD_CHAR_", "_SPACE_", "_NON_SPACE_", "_DIGIT_",
- "_NON_DIGIT_", "'|'", "'*'", "'?'", "'+'", "'^'", "'$'", "'('", "')'",
- "'.'", "$accept", "re", "alternative", "concatenation", "repeat",
- "single", 0
+ "_NON_DIGIT_", "_WORD_BOUNDARY_", "_NON_WORD_BOUNDARY_", "'|'", "'*'",
+ "'?'", "'+'", "'^'", "'$'", "'('", "')'", "'.'", "$accept", "re",
+ "alternative", "concatenation", "repeat", "single", 0
};
#endif
@@ -489,25 +495,27 @@ static const char *const yytname[] =
static const yytype_uint16 yytoknum[] =
{
0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
- 265, 266, 267, 124, 42, 63, 43, 94, 36, 40,
- 41, 46
+ 265, 266, 267, 268, 269, 124, 42, 63, 43, 94,
+ 36, 40, 41, 46
};
# endif
/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
static const yytype_uint8 yyr1[] =
{
- 0, 22, 23, 23, 24, 24, 24, 25, 25, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27
+ 0, 24, 25, 25, 26, 26, 26, 27, 27, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29
};
/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
static const yytype_uint8 yyr2[] =
{
0, 2, 1, 1, 1, 3, 2, 1, 2, 2,
- 3, 2, 3, 2, 3, 2, 1, 1, 1, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1
+ 3, 2, 3, 2, 3, 2, 1, 1, 1, 1,
+ 1, 3, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
};
/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -515,33 +523,33 @@ static const yytype_uint8 yyr2[] =
means the default is an error. */
static const yytype_uint8 yydefact[] =
{
- 0, 3, 21, 28, 22, 23, 24, 25, 26, 27,
- 17, 18, 0, 20, 0, 2, 4, 7, 16, 0,
- 1, 6, 8, 15, 9, 13, 11, 19, 5, 10,
- 14, 12
+ 0, 3, 23, 30, 24, 25, 26, 27, 28, 29,
+ 17, 18, 19, 20, 0, 22, 0, 2, 4, 7,
+ 16, 0, 1, 6, 8, 15, 9, 13, 11, 21,
+ 5, 10, 14, 12
};
/* YYDEFGOTO[NTERM-NUM]. */
static const yytype_int8 yydefgoto[] =
{
- -1, 14, 15, 16, 17, 18
+ -1, 16, 17, 18, 19, 20
};
/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
STATE-NUM. */
-#define YYPACT_NINF -16
+#define YYPACT_NINF -12
static const yytype_int8 yypact[] =
{
- -1, -16, -16, -16, -16, -16, -16, -16, -16, -16,
- -16, -16, 16, -16, 3, -9, 16, -16, 24, 1,
- -16, 16, -16, -16, -3, 0, 15, -16, 16, -16,
- -16, -16
+ -1, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, 18, -12, 1, -11, 18, -12,
+ -2, 21, -12, 18, -12, -12, 0, 16, 17, -12,
+ 18, -12, -12, -12
};
/* YYPGOTO[NTERM-NUM]. */
static const yytype_int8 yypgoto[] =
{
- -16, -16, 19, 11, -15, -16
+ -12, -12, 26, 19, 5, -12
};
/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
@@ -551,20 +559,20 @@ static const yytype_int8 yypgoto[] =
#define YYTABLE_NINF -1
static const yytype_uint8 yytable[] =
{
- 1, 22, 2, 20, 21, 3, 4, 5, 6, 7,
- 8, 9, 29, 22, 21, 30, 10, 11, 12, 2,
- 13, 27, 3, 4, 5, 6, 7, 8, 9, 23,
- 31, 19, 28, 10, 11, 12, 0, 13, 24, 25,
- 26
+ 1, 22, 2, 25, 23, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 26, 27, 28, 31, 12, 13,
+ 14, 2, 15, 24, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 32, 33, 24, 23, 12, 13, 14,
+ 21, 15, 30, 29
};
-static const yytype_int8 yycheck[] =
+static const yytype_uint8 yycheck[] =
{
- 1, 16, 3, 0, 13, 6, 7, 8, 9, 10,
- 11, 12, 15, 28, 13, 15, 17, 18, 19, 3,
- 21, 20, 6, 7, 8, 9, 10, 11, 12, 5,
- 15, 12, 21, 17, 18, 19, -1, 21, 14, 15,
- 16
+ 1, 0, 3, 5, 15, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 16, 17, 18, 17, 19, 20,
+ 21, 3, 23, 18, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 17, 17, 30, 15, 19, 20, 21,
+ 14, 23, 23, 22
};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
@@ -572,9 +580,9 @@ static const yytype_int8 yycheck[] =
static const yytype_uint8 yystos[] =
{
0, 1, 3, 6, 7, 8, 9, 10, 11, 12,
- 17, 18, 19, 21, 23, 24, 25, 26, 27, 24,
- 0, 13, 26, 5, 14, 15, 16, 20, 25, 15,
- 15, 15
+ 13, 14, 19, 20, 21, 23, 25, 26, 27, 28,
+ 29, 26, 0, 15, 28, 5, 16, 17, 18, 22,
+ 27, 17, 17, 17
};
#define yyerrok (yyerrstatus = 0)
@@ -1095,29 +1103,29 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
switch (yytype)
{
case 6: /* "_CLASS_" */
-#line 78 "re_grammar.y"
+#line 80 "re_grammar.y"
{ yr_free((yyvaluep->class_vector)); };
-#line 1101 "re_grammar.c"
+#line 1109 "re_grammar.c"
break;
- case 24: /* "alternative" */
-#line 79 "re_grammar.y"
+ case 26: /* "alternative" */
+#line 81 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1106 "re_grammar.c"
+#line 1114 "re_grammar.c"
break;
- case 25: /* "concatenation" */
-#line 80 "re_grammar.y"
+ case 27: /* "concatenation" */
+#line 82 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1111 "re_grammar.c"
+#line 1119 "re_grammar.c"
break;
- case 26: /* "repeat" */
-#line 81 "re_grammar.y"
+ case 28: /* "repeat" */
+#line 83 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1116 "re_grammar.c"
+#line 1124 "re_grammar.c"
break;
- case 27: /* "single" */
-#line 82 "re_grammar.y"
+ case 29: /* "single" */
+#line 84 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1121 "re_grammar.c"
+#line 1129 "re_grammar.c"
break;
default:
@@ -1427,7 +1435,7 @@ yyreduce:
switch (yyn)
{
case 2:
-#line 87 "re_grammar.y"
+#line 89 "re_grammar.y"
{
RE* re = yyget_extra(yyscanner);
re->root_node = (yyvsp[(1) - (1)].re_node);
@@ -1435,14 +1443,14 @@ yyreduce:
break;
case 4:
-#line 95 "re_grammar.y"
+#line 97 "re_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 5:
-#line 99 "re_grammar.y"
+#line 101 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
@@ -1454,7 +1462,7 @@ yyreduce:
break;
case 6:
-#line 108 "re_grammar.y"
+#line 110 "re_grammar.y"
{
RE_NODE* node;
@@ -1470,14 +1478,14 @@ yyreduce:
break;
case 7:
-#line 123 "re_grammar.y"
+#line 125 "re_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 8:
-#line 127 "re_grammar.y"
+#line 129 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
@@ -1488,7 +1496,7 @@ yyreduce:
break;
case 9:
-#line 137 "re_grammar.y"
+#line 139 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (2)].re_node), NULL);
@@ -1498,7 +1506,7 @@ yyreduce:
break;
case 10:
-#line 144 "re_grammar.y"
+#line 146 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (3)].re_node), NULL);
@@ -1510,7 +1518,7 @@ yyreduce:
break;
case 11:
-#line 153 "re_grammar.y"
+#line 155 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (2)].re_node), NULL);
@@ -1520,7 +1528,7 @@ yyreduce:
break;
case 12:
-#line 160 "re_grammar.y"
+#line 162 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (3)].re_node), NULL);
@@ -1532,7 +1540,7 @@ yyreduce:
break;
case 13:
-#line 169 "re_grammar.y"
+#line 171 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
@@ -1545,7 +1553,7 @@ yyreduce:
break;
case 14:
-#line 179 "re_grammar.y"
+#line 181 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (3)].re_node), NULL);
@@ -1559,7 +1567,7 @@ yyreduce:
break;
case 15:
-#line 190 "re_grammar.y"
+#line 192 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
@@ -1572,25 +1580,25 @@ yyreduce:
break;
case 16:
-#line 200 "re_grammar.y"
+#line 202 "re_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 17:
-#line 204 "re_grammar.y"
+#line 206 "re_grammar.y"
{
- (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
+ (yyval.re_node) = yr_re_node_create(RE_NODE_WORD_BOUNDARY, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
}
break;
case 18:
-#line 210 "re_grammar.y"
+#line 212 "re_grammar.y"
{
- (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
+ (yyval.re_node) = yr_re_node_create(RE_NODE_NON_WORD_BOUNDARY, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
}
@@ -1599,21 +1607,39 @@ yyreduce:
case 19:
#line 218 "re_grammar.y"
{
- (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
+ (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
+
+ ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
}
break;
case 20:
-#line 222 "re_grammar.y"
+#line 224 "re_grammar.y"
{
- (yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+ (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
}
break;
case 21:
-#line 228 "re_grammar.y"
+#line 232 "re_grammar.y"
+ {
+ (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
+ }
+ break;
+
+ case 22:
+#line 236 "re_grammar.y"
+ {
+ (yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+ ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+ }
+ break;
+
+ case 23:
+#line 242 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
@@ -1623,8 +1649,8 @@ yyreduce:
}
break;
- case 22:
-#line 236 "re_grammar.y"
+ case 24:
+#line 250 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_WORD_CHAR, NULL, NULL);
@@ -1632,8 +1658,8 @@ yyreduce:
}
break;
- case 23:
-#line 242 "re_grammar.y"
+ case 25:
+#line 256 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_NON_WORD_CHAR, NULL, NULL);
@@ -1641,8 +1667,8 @@ yyreduce:
}
break;
- case 24:
-#line 248 "re_grammar.y"
+ case 26:
+#line 262 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_SPACE, NULL, NULL);
@@ -1650,8 +1676,8 @@ yyreduce:
}
break;
- case 25:
-#line 254 "re_grammar.y"
+ case 27:
+#line 268 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_NON_SPACE, NULL, NULL);
@@ -1659,8 +1685,8 @@ yyreduce:
}
break;
- case 26:
-#line 260 "re_grammar.y"
+ case 28:
+#line 274 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_DIGIT, NULL, NULL);
@@ -1668,8 +1694,8 @@ yyreduce:
}
break;
- case 27:
-#line 266 "re_grammar.y"
+ case 29:
+#line 280 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_NON_DIGIT, NULL, NULL);
@@ -1677,8 +1703,8 @@ yyreduce:
}
break;
- case 28:
-#line 272 "re_grammar.y"
+ case 30:
+#line 286 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_CLASS, NULL, NULL);
@@ -1690,7 +1716,7 @@ yyreduce:
/* Line 1267 of yacc.c. */
-#line 1694 "re_grammar.c"
+#line 1720 "re_grammar.c"
default: break;
}
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1904,6 +1930,6 @@ yyreturn:
}
-#line 282 "re_grammar.y"
+#line 296 "re_grammar.y"
diff --git a/libyara/re_grammar.h b/libyara/re_grammar.h
index b9574b0..c55856b 100644
--- a/libyara/re_grammar.h
+++ b/libyara/re_grammar.h
@@ -48,7 +48,9 @@
_SPACE_ = 264,
_NON_SPACE_ = 265,
_DIGIT_ = 266,
- _NON_DIGIT_ = 267
+ _NON_DIGIT_ = 267,
+ _WORD_BOUNDARY_ = 268,
+ _NON_WORD_BOUNDARY_ = 269
};
#endif
/* Tokens. */
@@ -62,6 +64,8 @@
#define _NON_SPACE_ 265
#define _DIGIT_ 266
#define _NON_DIGIT_ 267
+#define _WORD_BOUNDARY_ 268
+#define _NON_WORD_BOUNDARY_ 269
@@ -76,7 +80,7 @@ typedef union YYSTYPE
uint8_t* class_vector;
}
/* Line 1529 of yacc.c. */
-#line 80 "re_grammar.h"
+#line 84 "re_grammar.h"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
diff --git a/libyara/re_grammar.y b/libyara/re_grammar.y
index f4e3434..f5ed099 100644
--- a/libyara/re_grammar.y
+++ b/libyara/re_grammar.y
@@ -72,6 +72,8 @@ limitations under the License.
%token _NON_SPACE_
%token _DIGIT_
%token _NON_DIGIT_
+%token _WORD_BOUNDARY_
+%token _NON_WORD_BOUNDARY_
%type <re_node> alternative concatenation repeat single
@@ -200,6 +202,18 @@ repeat : single '*'
{
$$ = $1;
}
+ | _WORD_BOUNDARY_
+ {
+ $$ = yr_re_node_create(RE_NODE_WORD_BOUNDARY, NULL, NULL);
+
+ ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+ }
+ | _NON_WORD_BOUNDARY_
+ {
+ $$ = yr_re_node_create(RE_NODE_NON_WORD_BOUNDARY, NULL, NULL);
+
+ ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+ }
| '^'
{
$$ = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index d89ee38..a66c40b 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -47,6 +47,7 @@ typedef int16_t flex_int16_t;
typedef uint16_t flex_uint16_t;
typedef int32_t flex_int32_t;
typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
#else
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
@@ -357,13 +358,13 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
*/
#define YY_DO_BEFORE_ACTION \
yyg->yytext_ptr = yy_bp; \
- yyleng = (size_t) (yy_cp - yy_bp); \
+ yyleng = (yy_size_t) (yy_cp - yy_bp); \
yyg->yy_hold_char = *yy_cp; \
*yy_cp = '\0'; \
yyg->yy_c_buf_p = yy_cp;
-#define YY_NUM_RULES 27
-#define YY_END_OF_BUFFER 28
+#define YY_NUM_RULES 29
+#define YY_END_OF_BUFFER 30
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@@ -371,13 +372,13 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
-static yyconst flex_int16_t yy_accept[43] =
+static yyconst flex_int16_t yy_accept[45] =
{ 0,
- 0, 0, 0, 0, 28, 7, 7, 26, 6, 15,
- 7, 25, 27, 24, 16, 5, 3, 14, 13, 11,
- 9, 12, 10, 8, 0, 0, 0, 0, 23, 21,
- 19, 22, 20, 18, 0, 4, 0, 1, 2, 17,
- 0, 0
+ 0, 0, 0, 0, 30, 7, 7, 28, 6, 17,
+ 7, 27, 29, 26, 18, 5, 3, 16, 15, 13,
+ 11, 9, 14, 12, 10, 8, 0, 0, 0, 0,
+ 25, 23, 21, 24, 22, 20, 0, 4, 0, 1,
+ 2, 19, 0, 0
} ;
static yyconst flex_int32_t yy_ec[256] =
@@ -388,14 +389,14 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1, 3, 1, 1, 1, 3,
3, 3, 3, 4, 5, 3, 1, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 1, 1, 1,
- 1, 1, 3, 1, 7, 7, 7, 8, 7, 7,
+ 1, 1, 3, 1, 7, 8, 7, 9, 7, 7,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 9, 1, 1, 1, 10, 1, 1, 1,
- 11, 12, 13, 14, 1, 1, 7, 7, 7, 15,
+ 1, 1, 10, 1, 1, 1, 11, 1, 1, 1,
+ 12, 13, 14, 15, 1, 1, 7, 16, 7, 17,
7, 7, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 16, 1, 1, 1, 17, 18,
- 1, 1, 19, 3, 20, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 18, 1, 1, 1, 19, 20,
+ 1, 1, 21, 3, 22, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -412,69 +413,70 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1
} ;
-static yyconst flex_int32_t yy_meta[21] =
+static yyconst flex_int32_t yy_meta[23] =
{ 0,
- 1, 2, 1, 1, 3, 4, 4, 4, 1, 1,
- 1, 1, 5, 1, 4, 1, 1, 1, 1, 1
+ 1, 2, 1, 1, 3, 4, 4, 4, 4, 1,
+ 1, 1, 1, 5, 1, 4, 4, 1, 1, 1,
+ 1, 1
} ;
-static yyconst flex_int16_t yy_base[49] =
+static yyconst flex_int16_t yy_base[51] =
{ 0,
- 0, 18, 3, 5, 46, 91, 91, 91, 9, 32,
- 0, 40, 39, 42, 38, 91, 26, 30, 91, 91,
- 91, 91, 91, 91, 4, 49, 0, 30, 29, 28,
- 26, 23, 21, 20, 4, 91, 7, 91, 91, 91,
- 0, 91, 69, 74, 79, 84, 86, 4
+ 0, 20, 3, 5, 50, 89, 89, 89, 10, 36,
+ 0, 44, 43, 47, 38, 89, 26, 33, 89, 89,
+ 89, 89, 89, 89, 89, 89, 4, 5, 0, 33,
+ 32, 31, 29, 26, 24, 23, 15, 89, 8, 89,
+ 89, 89, 0, 89, 67, 72, 77, 82, 84, 4
} ;
-static yyconst flex_int16_t yy_def[49] =
+static yyconst flex_int16_t yy_def[51] =
{ 0,
- 43, 43, 44, 44, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 45, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 46, 42, 42, 42,
- 42, 42, 42, 42, 47, 42, 42, 42, 42, 42,
- 48, 0, 42, 42, 42, 42, 42, 42
+ 45, 45, 46, 46, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 47, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 48, 44,
+ 44, 44, 44, 44, 44, 44, 49, 44, 44, 44,
+ 44, 44, 50, 0, 44, 44, 44, 44, 44, 44
} ;
static yyconst flex_int16_t yy_nxt[112] =
{ 0,
- 42, 7, 8, 25, 13, 26, 13, 28, 27, 37,
- 9, 10, 37, 8, 14, 15, 14, 15, 11, 7,
- 8, 16, 17, 38, 27, 27, 38, 27, 9, 10,
- 27, 8, 27, 27, 27, 18, 11, 18, 36, 19,
- 20, 21, 27, 27, 27, 42, 22, 23, 24, 29,
- 30, 31, 25, 42, 26, 42, 32, 33, 34, 35,
- 42, 42, 42, 42, 42, 42, 42, 42, 39, 6,
- 6, 6, 6, 6, 12, 12, 12, 12, 12, 28,
- 42, 28, 28, 28, 40, 40, 40, 40, 41, 41,
- 5, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42
+ 44, 7, 8, 27, 13, 28, 13, 30, 27, 39,
+ 28, 9, 10, 39, 8, 14, 15, 14, 15, 29,
+ 11, 7, 8, 16, 17, 40, 41, 29, 29, 40,
+ 29, 9, 10, 29, 8, 29, 29, 29, 18, 38,
+ 11, 18, 29, 19, 20, 21, 22, 29, 29, 44,
+ 44, 23, 24, 25, 26, 31, 32, 33, 44, 44,
+ 44, 44, 44, 34, 35, 36, 37, 6, 6, 6,
+ 6, 6, 12, 12, 12, 12, 12, 30, 44, 30,
+ 30, 30, 42, 42, 42, 42, 43, 43, 5, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44
} ;
static yyconst flex_int16_t yy_chk[112] =
{ 0,
- 0, 1, 1, 11, 3, 11, 4, 48, 35, 25,
- 1, 1, 37, 1, 3, 3, 4, 4, 1, 2,
- 2, 9, 9, 25, 34, 33, 37, 32, 2, 2,
- 31, 2, 30, 29, 28, 18, 2, 10, 17, 10,
- 10, 10, 15, 13, 12, 5, 10, 10, 10, 14,
- 14, 14, 26, 0, 26, 0, 14, 14, 14, 14,
- 0, 0, 0, 0, 0, 0, 0, 0, 26, 43,
- 43, 43, 43, 43, 44, 44, 44, 44, 44, 45,
- 0, 45, 45, 45, 46, 46, 46, 46, 47, 47,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42
+ 0, 1, 1, 11, 3, 11, 4, 50, 28, 27,
+ 28, 1, 1, 39, 1, 3, 3, 4, 4, 37,
+ 1, 2, 2, 9, 9, 27, 28, 36, 35, 39,
+ 34, 2, 2, 33, 2, 32, 31, 30, 18, 17,
+ 2, 10, 15, 10, 10, 10, 10, 13, 12, 5,
+ 0, 10, 10, 10, 10, 14, 14, 14, 0, 0,
+ 0, 0, 0, 14, 14, 14, 14, 45, 45, 45,
+ 45, 45, 46, 46, 46, 46, 46, 47, 0, 47,
+ 47, 47, 48, 48, 48, 48, 49, 49, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44
} ;
/* Table of booleans, true if rule could match eol. */
-static yyconst flex_int32_t yy_rule_can_match_eol[28] =
+static yyconst flex_int32_t yy_rule_can_match_eol[30] =
{ 0,
-0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, };
+0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
/* The intent behind this definition is that it'll catch
* any uses of REJECT which flex missed.
@@ -525,7 +527,7 @@ uint8_t read_escaped_char(yyscan_t yyscanner);
#define YY_NO_UNISTD_H 1
-#line 529 "re_lexer.c"
+#line 531 "re_lexer.c"
#define INITIAL 0
#define char_class 1
@@ -760,7 +762,7 @@ YY_DECL
#line 62 "re_lexer.l"
-#line 764 "re_lexer.c"
+#line 766 "re_lexer.c"
yylval = yylval_param;
@@ -815,13 +817,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 43 )
+ if ( yy_current_state >= 45 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
++yy_cp;
}
- while ( yy_current_state != 42 );
+ while ( yy_current_state != 44 );
yy_cp = yyg->yy_last_accepting_cpos;
yy_current_state = yyg->yy_last_accepting_state;
@@ -1021,22 +1023,36 @@ case 14:
YY_RULE_SETUP
#line 200 "re_lexer.l"
{
+ return _WORD_BOUNDARY_;
+}
+ YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 204 "re_lexer.l"
+{
+ return _NON_WORD_BOUNDARY_;
+}
+ YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 209 "re_lexer.l"
+{
yyerror(yyscanner, lex_env, "backreferences are not allowed");
yyterminate();
}
YY_BREAK
-case 15:
+case 17:
YY_RULE_SETUP
-#line 207 "re_lexer.l"
+#line 216 "re_lexer.l"
{
yylval->integer = read_escaped_char(yyscanner);
return _CHAR_;
}
YY_BREAK
-case 16:
+case 18:
YY_RULE_SETUP
-#line 213 "re_lexer.l"
+#line 222 "re_lexer.l"
{
// End of character class.
@@ -1056,10 +1072,10 @@ YY_RULE_SETUP
return _CLASS_;
}
YY_BREAK
-case 17:
-/* rule 17 can match eol */
+case 19:
+/* rule 19 can match eol */
YY_RULE_SETUP
-#line 234 "re_lexer.l"
+#line 243 "re_lexer.l"
{
// A range inside a character class.
@@ -1096,9 +1112,9 @@ YY_RULE_SETUP
}
}
YY_BREAK
-case 18:
+case 20:
YY_RULE_SETUP
-#line 271 "re_lexer.l"
+#line 280 "re_lexer.l"
{
int i;
@@ -1111,9 +1127,9 @@ YY_RULE_SETUP
LEX_ENV->class_vector[i] |= word_chars[i];
}
YY_BREAK
-case 19:
+case 21:
YY_RULE_SETUP
-#line 284 "re_lexer.l"
+#line 293 "re_lexer.l"
{
int i;
@@ -1126,18 +1142,18 @@ YY_RULE_SETUP
LEX_ENV->class_vector[i] |= ~word_chars[i];
}
YY_BREAK
-case 20:
+case 22:
YY_RULE_SETUP
-#line 297 "re_lexer.l"
+#line 306 "re_lexer.l"
{
LEX_ENV->class_vector[' ' / 8] |= 1 << ' ' % 8;
LEX_ENV->class_vector['\t' / 8] |= 1 << '\t' % 8;
}
YY_BREAK
-case 21:
+case 23:
YY_RULE_SETUP
-#line 304 "re_lexer.l"
+#line 313 "re_lexer.l"
{
int i;
@@ -1149,9 +1165,9 @@ YY_RULE_SETUP
LEX_ENV->class_vector['\t' / 8] &= ~(1 << '\t' % 8);
}
YY_BREAK
-case 22:
+case 24:
YY_RULE_SETUP
-#line 316 "re_lexer.l"
+#line 325 "re_lexer.l"
{
char c;
@@ -1160,9 +1176,9 @@ YY_RULE_SETUP
LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
}
YY_BREAK
-case 23:
+case 25:
YY_RULE_SETUP
-#line 325 "re_lexer.l"
+#line 334 "re_lexer.l"
{
int i;
@@ -1175,18 +1191,18 @@ YY_RULE_SETUP
LEX_ENV->class_vector[c / 8] &= ~(1 << c % 8);
}
YY_BREAK
-case 24:
+case 26:
YY_RULE_SETUP
-#line 338 "re_lexer.l"
+#line 347 "re_lexer.l"
{
uint8_t c = read_escaped_char(yyscanner);
LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
}
YY_BREAK
-case 25:
+case 27:
YY_RULE_SETUP
-#line 345 "re_lexer.l"
+#line 354 "re_lexer.l"
{
if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1204,7 +1220,7 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(char_class):
-#line 362 "re_lexer.l"
+#line 371 "re_lexer.l"
{
// End of regexp reached while scanning a character class.
@@ -1213,9 +1229,9 @@ case YY_STATE_EOF(char_class):
yyterminate();
}
YY_BREAK
-case 26:
+case 28:
YY_RULE_SETUP
-#line 371 "re_lexer.l"
+#line 380 "re_lexer.l"
{
if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1230,18 +1246,18 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(INITIAL):
-#line 385 "re_lexer.l"
+#line 394 "re_lexer.l"
{
yyterminate();
}
YY_BREAK
-case 27:
+case 29:
YY_RULE_SETUP
-#line 390 "re_lexer.l"
+#line 399 "re_lexer.l"
ECHO;
YY_BREAK
-#line 1245 "re_lexer.c"
+#line 1261 "re_lexer.c"
case YY_END_OF_BUFFER:
{
@@ -1534,7 +1550,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 43 )
+ if ( yy_current_state >= 45 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1563,11 +1579,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 43 )
+ if ( yy_current_state >= 45 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 42);
+ yy_is_jam = (yy_current_state == 44);
return yy_is_jam ? 0 : yy_current_state;
}
@@ -2371,7 +2387,7 @@ void re_yyfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
-#line 390 "re_lexer.l"
+#line 399 "re_lexer.l"
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index 7c500bf..19f234d 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -197,6 +197,15 @@ hex_digit [0-9a-fA-F]
}
+\\b {
+ return _WORD_BOUNDARY_;
+}
+
+\\B {
+ return _NON_WORD_BOUNDARY_;
+}
+
+
\\{digit}+ {
yyerror(yyscanner, lex_env, "backreferences are not allowed");
diff --git a/yara-python/tests.py b/yara-python/tests.py
index 4c719f1..7b4a402 100644
--- a/yara-python/tests.py
+++ b/yara-python/tests.py
@@ -194,6 +194,26 @@ RE_TESTS = [
('^(ab|cd)e', 'abcde', FAIL),
('(abc|)ef', 'abcdef', SUCCEED, 'ef'),
('(abc|)ef', 'abcef', SUCCEED, 'abcef'),
+ (r'\babc', 'abc', SUCCEED, 'abc'),
+ (r'abc\b', 'abc', SUCCEED, 'abc'),
+ (r'\babc', '1abc', FAIL),
+ (r'abc\b', 'abc1', FAIL),
+ (r'abc\s\b', 'abc x', SUCCEED, 'abc '),
+ (r'abc\s\b', 'abc ', FAIL),
+ (r'\babc\b', ' abc ', SUCCEED, 'abc'),
+ (r'\b\w\w\w\b', ' abc ', SUCCEED, 'abc'),
+ (r'\w\w\w\b', 'abcd', SUCCEED, 'bcd'),
+ (r'\b\w\w\w', 'abcd', SUCCEED, 'abc'),
+ (r'\b\w\w\w\b', 'abcd', FAIL),
+ (r'\Babc', 'abc', FAIL),
+ (r'abc\B', 'abc', FAIL),
+ (r'\Babc', '1abc', SUCCEED, 'abc'),
+ (r'abc\B', 'abc1', SUCCEED, 'abc'),
+ (r'abc\s\B', 'abc x', FAIL),
+ (r'abc\s\B', 'abc ', SUCCEED, 'abc '),
+ (r'\w\w\w\B', 'abcd', SUCCEED, 'abc'),
+ (r'\B\w\w\w', 'abcd', SUCCEED, 'bcd'),
+ (r'\B\w\w\w\B', 'abcd', FAIL),
# This is allowed in most regexp engines but in order to keep the
# grammar free of shift/reduce conflicts I've decided not supporting
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list