[Forensics-changes] [yara] 369/407: Implement \b and \B anchors in regexps

Hilko Bengen bengen at moszumanska.debian.org
Sat Jul 1 10:28:46 UTC 2017


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to annotated tag v3.3.0
in repository yara.

commit 9198ce67b38c93b6619d9be7baf1fdc74bb93842
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Tue Feb 3 10:27:27 2015 +0100

    Implement \b and \B anchors in regexps
---
 libyara/atoms.c              |   2 +
 libyara/include/yara/arena.h |   2 +-
 libyara/include/yara/re.h    |  27 +++--
 libyara/re.c                 |  58 ++++++++--
 libyara/re_grammar.c         | 254 ++++++++++++++++++++++++-------------------
 libyara/re_grammar.h         |   8 +-
 libyara/re_grammar.y         |  14 +++
 libyara/re_lexer.c           | 208 +++++++++++++++++++----------------
 libyara/re_lexer.l           |   9 ++
 yara-python/tests.py         |  20 ++++
 10 files changed, 371 insertions(+), 231 deletions(-)

diff --git a/libyara/atoms.c b/libyara/atoms.c
index 60ea9ae..b6e5357 100644
--- a/libyara/atoms.c
+++ b/libyara/atoms.c
@@ -773,6 +773,8 @@ ATOM_TREE_NODE* _yr_atoms_extract_from_re_node(
     case RE_NODE_EMPTY:
     case RE_NODE_ANCHOR_START:
     case RE_NODE_ANCHOR_END:
+    case RE_NODE_WORD_BOUNDARY:
+    case RE_NODE_NON_WORD_BOUNDARY:
 
       append_current_leaf_to_node(current_node);
       return current_node;
diff --git a/libyara/include/yara/arena.h b/libyara/include/yara/arena.h
index ad0d582..a6ede1e 100644
--- a/libyara/include/yara/arena.h
+++ b/libyara/include/yara/arena.h
@@ -23,7 +23,7 @@ limitations under the License.
 
 #define ARENA_FLAGS_FIXED_SIZE   1
 #define ARENA_FLAGS_COALESCED    2
-#define ARENA_FILE_VERSION       5
+#define ARENA_FILE_VERSION       6
 
 #define EOL ((size_t) -1)
 
diff --git a/libyara/include/yara/re.h b/libyara/include/yara/re.h
index 7a3f022..5d1e237 100644
--- a/libyara/include/yara/re.h
+++ b/libyara/include/yara/re.h
@@ -17,6 +17,8 @@ limitations under the License.
 #ifndef YR_RE_H
 #define YR_RE_H
 
+#include <ctype.h>
+
 #include <yara/arena.h>
 #include <yara/sizedstr.h>
 
@@ -38,6 +40,8 @@ limitations under the License.
 #define RE_NODE_EMPTY               16
 #define RE_NODE_ANCHOR_START        17
 #define RE_NODE_ANCHOR_END          18
+#define RE_NODE_WORD_BOUNDARY       19
+#define RE_NODE_NON_WORD_BOUNDARY   20
 
 
 #define RE_OPCODE_ANY                   0xA0
@@ -54,15 +58,18 @@ limitations under the License.
 #define RE_OPCODE_DIGIT                 0xAB
 #define RE_OPCODE_NON_DIGIT             0xAC
 #define RE_OPCODE_MATCH                 0xAD
-#define RE_OPCODE_MATCH_AT_END          0xAE
-#define RE_OPCODE_MATCH_AT_START        0xAF
 
-#define RE_OPCODE_SPLIT_A               0xB0
-#define RE_OPCODE_SPLIT_B               0xB1
-#define RE_OPCODE_PUSH                  0xB2
-#define RE_OPCODE_POP                   0xB3
-#define RE_OPCODE_JNZ                   0xB4
-#define RE_OPCODE_JUMP                  0xB5
+#define RE_OPCODE_MATCH_AT_END          0xB0
+#define RE_OPCODE_MATCH_AT_START        0xB1
+#define RE_OPCODE_WORD_BOUNDARY         0xB2
+#define RE_OPCODE_NON_WORD_BOUNDARY     0xB3
+
+#define RE_OPCODE_SPLIT_A               0xC0
+#define RE_OPCODE_SPLIT_B               0xC1
+#define RE_OPCODE_PUSH                  0xC2
+#define RE_OPCODE_POP                   0xC3
+#define RE_OPCODE_JNZ                   0xC4
+#define RE_OPCODE_JUMP                  0xC5
 
 
 #define RE_FLAGS_FAST_HEX_REGEXP          0x02
@@ -85,6 +92,10 @@ typedef uint8_t* RE_CODE;
     ((cls)[(chr) / 8] & 1 << ((chr) % 8))
 
 
+#define IS_WORD_CHAR(chr) \
+    (isalnum(chr) || (chr) == '_')
+
+
 struct RE_NODE
 {
   int type;
diff --git a/libyara/re.c b/libyara/re.c
index f903c04..9084e5c 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -26,7 +26,6 @@ order to avoid confusion with operating system threads.
 */
 
 #include <assert.h>
-#include <ctype.h>
 #include <string.h>
 #include <limits.h>
 
@@ -692,6 +691,24 @@ int _yr_re_emit(
         code_size));
     break;
 
+  case RE_NODE_WORD_BOUNDARY:
+
+    FAIL_ON_ERROR(_yr_emit_inst(
+        arena,
+        RE_OPCODE_WORD_BOUNDARY,
+        &instruction_addr,
+        code_size));
+    break;
+
+  case RE_NODE_NON_WORD_BOUNDARY:
+
+    FAIL_ON_ERROR(_yr_emit_inst(
+        arena,
+        RE_OPCODE_NON_WORD_BOUNDARY,
+        &instruction_addr,
+        code_size));
+    break;
+
   case RE_NODE_SPACE:
 
     FAIL_ON_ERROR(_yr_emit_inst(
@@ -1527,6 +1544,7 @@ int yr_re_exec(
   int max_count;
   int match;
   int character_size;
+  int input_incr;
   int kill;
   int action;
   int result = -1;
@@ -1551,14 +1569,18 @@ int yr_re_exec(
     character_size = 1;
 
   input = input_data;
+  input_incr = character_size;
 
   if (flags & RE_FLAGS_BACKWARDS)
+  {
     input -= character_size;
+    input_incr = -input_incr;
+  }
 
   max_count = min(input_size, RE_SCAN_LIMIT);
 
-  // round down max_count to a multiple of character size, this way if
-  // character_size is 2 and the input size is impair we are ignoring the
+  // Round down max_count to a multiple of character_size, this way if
+  // character_size is 2 and input_size is impair we are ignoring the
   // extra byte which can't match anyways.
 
   max_count = max_count - max_count % character_size;
@@ -1645,14 +1667,14 @@ int yr_re_exec(
 
         case RE_OPCODE_WORD_CHAR:
           prolog;
-          match = (isalnum(*input) || *input == '_');
+          match = IS_WORD_CHAR(*input);
           action = match ? ACTION_NONE : ACTION_KILL;
           fiber->ip += 1;
           break;
 
         case RE_OPCODE_NON_WORD_CHAR:
           prolog;
-          match = (!isalnum(*input) && *input != '_');
+          match = !IS_WORD_CHAR(*input);
           action = match ? ACTION_NONE : ACTION_KILL;
           fiber->ip += 1;
           break;
@@ -1711,6 +1733,26 @@ int yr_re_exec(
           fiber->ip += 1;
           break;
 
+        case RE_OPCODE_WORD_BOUNDARY:
+        case RE_OPCODE_NON_WORD_BOUNDARY:
+
+          if (count == 0 &&
+              !(flags & RE_FLAGS_NOT_AT_START) &&
+              !(flags & RE_FLAGS_BACKWARDS))
+            match = TRUE;
+          else if (count >= max_count)
+            match = TRUE;
+          else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input))
+            match = TRUE;
+          else
+            match = FALSE;
+
+          if (*ip == RE_OPCODE_NON_WORD_BOUNDARY)
+            match = !match;
+
+          action = match ? ACTION_CONTINUE : ACTION_KILL;
+          break;
+
         case RE_OPCODE_MATCH_AT_START:
           if (flags & RE_FLAGS_BACKWARDS)
             kill = input_size > count;
@@ -1781,11 +1823,7 @@ int yr_re_exec(
     if (flags & RE_FLAGS_WIDE && *(input + 1) != 0)
       _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool);
 
-    if (flags & RE_FLAGS_BACKWARDS)
-      input -= character_size;
-    else
-      input += character_size;
-
+    input += input_incr;
     count += character_size;
 
     if (flags & RE_FLAGS_SCAN && count < max_count)
diff --git a/libyara/re_grammar.c b/libyara/re_grammar.c
index de26a47..95b5541 100644
--- a/libyara/re_grammar.c
+++ b/libyara/re_grammar.c
@@ -83,7 +83,9 @@
      _SPACE_ = 264,
      _NON_SPACE_ = 265,
      _DIGIT_ = 266,
-     _NON_DIGIT_ = 267
+     _NON_DIGIT_ = 267,
+     _WORD_BOUNDARY_ = 268,
+     _NON_WORD_BOUNDARY_ = 269
    };
 #endif
 /* Tokens.  */
@@ -97,6 +99,8 @@
 #define _NON_SPACE_ 265
 #define _DIGIT_ 266
 #define _NON_DIGIT_ 267
+#define _WORD_BOUNDARY_ 268
+#define _NON_WORD_BOUNDARY_ 269
 
 
 
@@ -160,7 +164,7 @@ typedef union YYSTYPE
   uint8_t* class_vector;
 }
 /* Line 193 of yacc.c.  */
-#line 164 "re_grammar.c"
+#line 168 "re_grammar.c"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
@@ -173,7 +177,7 @@ typedef union YYSTYPE
 
 
 /* Line 216 of yacc.c.  */
-#line 177 "re_grammar.c"
+#line 181 "re_grammar.c"
 
 #ifdef short
 # undef short
@@ -386,22 +390,22 @@ union yyalloc
 #endif
 
 /* YYFINAL -- State number of the termination state.  */
-#define YYFINAL  20
+#define YYFINAL  22
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   40
+#define YYLAST   43
 
 /* YYNTOKENS -- Number of terminals.  */
-#define YYNTOKENS  22
+#define YYNTOKENS  24
 /* YYNNTS -- Number of nonterminals.  */
 #define YYNNTS  6
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  28
+#define YYNRULES  30
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  32
+#define YYNSTATES  34
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
-#define YYMAXUTOK   267
+#define YYMAXUTOK   269
 
 #define YYTRANSLATE(YYX)						\
   ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
@@ -412,16 +416,16 @@ static const yytype_uint8 yytranslate[] =
        0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,    18,     2,     2,     2,
-      19,    20,    14,    16,     2,     2,    21,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,    20,     2,     2,     2,
+      21,    22,    16,    18,     2,     2,    23,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,    15,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,    17,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,    17,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,    19,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,    13,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,    15,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
@@ -435,7 +439,7 @@ static const yytype_uint8 yytranslate[] =
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
-       5,     6,     7,     8,     9,    10,    11,    12
+       5,     6,     7,     8,     9,    10,    11,    12,    13,    14
 };
 
 #if YYDEBUG
@@ -445,28 +449,30 @@ static const yytype_uint8 yyprhs[] =
 {
        0,     0,     3,     5,     7,     9,    13,    16,    18,    21,
       24,    28,    31,    35,    38,    42,    45,    47,    49,    51,
-      55,    57,    59,    61,    63,    65,    67,    69,    71
+      53,    55,    59,    61,    63,    65,    67,    69,    71,    73,
+      75
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
 static const yytype_int8 yyrhs[] =
 {
-      23,     0,    -1,    24,    -1,     1,    -1,    25,    -1,    24,
-      13,    25,    -1,    24,    13,    -1,    26,    -1,    25,    26,
-      -1,    27,    14,    -1,    27,    14,    15,    -1,    27,    16,
-      -1,    27,    16,    15,    -1,    27,    15,    -1,    27,    15,
-      15,    -1,    27,     5,    -1,    27,    -1,    17,    -1,    18,
-      -1,    19,    24,    20,    -1,    21,    -1,     3,    -1,     7,
-      -1,     8,    -1,     9,    -1,    10,    -1,    11,    -1,    12,
-      -1,     6,    -1
+      25,     0,    -1,    26,    -1,     1,    -1,    27,    -1,    26,
+      15,    27,    -1,    26,    15,    -1,    28,    -1,    27,    28,
+      -1,    29,    16,    -1,    29,    16,    17,    -1,    29,    18,
+      -1,    29,    18,    17,    -1,    29,    17,    -1,    29,    17,
+      17,    -1,    29,     5,    -1,    29,    -1,    13,    -1,    14,
+      -1,    19,    -1,    20,    -1,    21,    26,    22,    -1,    23,
+      -1,     3,    -1,     7,    -1,     8,    -1,     9,    -1,    10,
+      -1,    11,    -1,    12,    -1,     6,    -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
 static const yytype_uint16 yyrline[] =
 {
-       0,    86,    86,    91,    94,    98,   107,   122,   126,   136,
-     143,   152,   159,   168,   178,   189,   199,   203,   209,   217,
-     221,   227,   235,   241,   247,   253,   259,   265,   271
+       0,    88,    88,    93,    96,   100,   109,   124,   128,   138,
+     145,   154,   161,   170,   180,   191,   201,   205,   211,   217,
+     223,   231,   235,   241,   249,   255,   261,   267,   273,   279,
+     285
 };
 #endif
 
@@ -477,9 +483,9 @@ static const char *const yytname[] =
 {
   "$end", "error", "$undefined", "_CHAR_", "_ANY_", "_RANGE_", "_CLASS_",
   "_WORD_CHAR_", "_NON_WORD_CHAR_", "_SPACE_", "_NON_SPACE_", "_DIGIT_",
-  "_NON_DIGIT_", "'|'", "'*'", "'?'", "'+'", "'^'", "'$'", "'('", "')'",
-  "'.'", "$accept", "re", "alternative", "concatenation", "repeat",
-  "single", 0
+  "_NON_DIGIT_", "_WORD_BOUNDARY_", "_NON_WORD_BOUNDARY_", "'|'", "'*'",
+  "'?'", "'+'", "'^'", "'$'", "'('", "')'", "'.'", "$accept", "re",
+  "alternative", "concatenation", "repeat", "single", 0
 };
 #endif
 
@@ -489,25 +495,27 @@ static const char *const yytname[] =
 static const yytype_uint16 yytoknum[] =
 {
        0,   256,   257,   258,   259,   260,   261,   262,   263,   264,
-     265,   266,   267,   124,    42,    63,    43,    94,    36,    40,
-      41,    46
+     265,   266,   267,   268,   269,   124,    42,    63,    43,    94,
+      36,    40,    41,    46
 };
 # endif
 
 /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
 static const yytype_uint8 yyr1[] =
 {
-       0,    22,    23,    23,    24,    24,    24,    25,    25,    26,
-      26,    26,    26,    26,    26,    26,    26,    26,    26,    27,
-      27,    27,    27,    27,    27,    27,    27,    27,    27
+       0,    24,    25,    25,    26,    26,    26,    27,    27,    28,
+      28,    28,    28,    28,    28,    28,    28,    28,    28,    28,
+      28,    29,    29,    29,    29,    29,    29,    29,    29,    29,
+      29
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
 static const yytype_uint8 yyr2[] =
 {
        0,     2,     1,     1,     1,     3,     2,     1,     2,     2,
-       3,     2,     3,     2,     3,     2,     1,     1,     1,     3,
-       1,     1,     1,     1,     1,     1,     1,     1,     1
+       3,     2,     3,     2,     3,     2,     1,     1,     1,     1,
+       1,     3,     1,     1,     1,     1,     1,     1,     1,     1,
+       1
 };
 
 /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -515,33 +523,33 @@ static const yytype_uint8 yyr2[] =
    means the default is an error.  */
 static const yytype_uint8 yydefact[] =
 {
-       0,     3,    21,    28,    22,    23,    24,    25,    26,    27,
-      17,    18,     0,    20,     0,     2,     4,     7,    16,     0,
-       1,     6,     8,    15,     9,    13,    11,    19,     5,    10,
-      14,    12
+       0,     3,    23,    30,    24,    25,    26,    27,    28,    29,
+      17,    18,    19,    20,     0,    22,     0,     2,     4,     7,
+      16,     0,     1,     6,     8,    15,     9,    13,    11,    21,
+       5,    10,    14,    12
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int8 yydefgoto[] =
 {
-      -1,    14,    15,    16,    17,    18
+      -1,    16,    17,    18,    19,    20
 };
 
 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
    STATE-NUM.  */
-#define YYPACT_NINF -16
+#define YYPACT_NINF -12
 static const yytype_int8 yypact[] =
 {
-      -1,   -16,   -16,   -16,   -16,   -16,   -16,   -16,   -16,   -16,
-     -16,   -16,    16,   -16,     3,    -9,    16,   -16,    24,     1,
-     -16,    16,   -16,   -16,    -3,     0,    15,   -16,    16,   -16,
-     -16,   -16
+      -1,   -12,   -12,   -12,   -12,   -12,   -12,   -12,   -12,   -12,
+     -12,   -12,   -12,   -12,    18,   -12,     1,   -11,    18,   -12,
+      -2,    21,   -12,    18,   -12,   -12,     0,    16,    17,   -12,
+      18,   -12,   -12,   -12
 };
 
 /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int8 yypgoto[] =
 {
-     -16,   -16,    19,    11,   -15,   -16
+     -12,   -12,    26,    19,     5,   -12
 };
 
 /* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
@@ -551,20 +559,20 @@ static const yytype_int8 yypgoto[] =
 #define YYTABLE_NINF -1
 static const yytype_uint8 yytable[] =
 {
-       1,    22,     2,    20,    21,     3,     4,     5,     6,     7,
-       8,     9,    29,    22,    21,    30,    10,    11,    12,     2,
-      13,    27,     3,     4,     5,     6,     7,     8,     9,    23,
-      31,    19,    28,    10,    11,    12,     0,    13,    24,    25,
-      26
+       1,    22,     2,    25,    23,     3,     4,     5,     6,     7,
+       8,     9,    10,    11,    26,    27,    28,    31,    12,    13,
+      14,     2,    15,    24,     3,     4,     5,     6,     7,     8,
+       9,    10,    11,    32,    33,    24,    23,    12,    13,    14,
+      21,    15,    30,    29
 };
 
-static const yytype_int8 yycheck[] =
+static const yytype_uint8 yycheck[] =
 {
-       1,    16,     3,     0,    13,     6,     7,     8,     9,    10,
-      11,    12,    15,    28,    13,    15,    17,    18,    19,     3,
-      21,    20,     6,     7,     8,     9,    10,    11,    12,     5,
-      15,    12,    21,    17,    18,    19,    -1,    21,    14,    15,
-      16
+       1,     0,     3,     5,    15,     6,     7,     8,     9,    10,
+      11,    12,    13,    14,    16,    17,    18,    17,    19,    20,
+      21,     3,    23,    18,     6,     7,     8,     9,    10,    11,
+      12,    13,    14,    17,    17,    30,    15,    19,    20,    21,
+      14,    23,    23,    22
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
@@ -572,9 +580,9 @@ static const yytype_int8 yycheck[] =
 static const yytype_uint8 yystos[] =
 {
        0,     1,     3,     6,     7,     8,     9,    10,    11,    12,
-      17,    18,    19,    21,    23,    24,    25,    26,    27,    24,
-       0,    13,    26,     5,    14,    15,    16,    20,    25,    15,
-      15,    15
+      13,    14,    19,    20,    21,    23,    25,    26,    27,    28,
+      29,    26,     0,    15,    28,     5,    16,    17,    18,    22,
+      27,    17,    17,    17
 };
 
 #define yyerrok		(yyerrstatus = 0)
@@ -1095,29 +1103,29 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
   switch (yytype)
     {
       case 6: /* "_CLASS_" */
-#line 78 "re_grammar.y"
+#line 80 "re_grammar.y"
 	{ yr_free((yyvaluep->class_vector)); };
-#line 1101 "re_grammar.c"
+#line 1109 "re_grammar.c"
 	break;
-      case 24: /* "alternative" */
-#line 79 "re_grammar.y"
+      case 26: /* "alternative" */
+#line 81 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1106 "re_grammar.c"
+#line 1114 "re_grammar.c"
 	break;
-      case 25: /* "concatenation" */
-#line 80 "re_grammar.y"
+      case 27: /* "concatenation" */
+#line 82 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1111 "re_grammar.c"
+#line 1119 "re_grammar.c"
 	break;
-      case 26: /* "repeat" */
-#line 81 "re_grammar.y"
+      case 28: /* "repeat" */
+#line 83 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1116 "re_grammar.c"
+#line 1124 "re_grammar.c"
 	break;
-      case 27: /* "single" */
-#line 82 "re_grammar.y"
+      case 29: /* "single" */
+#line 84 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1121 "re_grammar.c"
+#line 1129 "re_grammar.c"
 	break;
 
       default:
@@ -1427,7 +1435,7 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 87 "re_grammar.y"
+#line 89 "re_grammar.y"
     {
         RE* re = yyget_extra(yyscanner);
         re->root_node = (yyvsp[(1) - (1)].re_node);
@@ -1435,14 +1443,14 @@ yyreduce:
     break;
 
   case 4:
-#line 95 "re_grammar.y"
+#line 97 "re_grammar.y"
     {
                 (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
               }
     break;
 
   case 5:
-#line 99 "re_grammar.y"
+#line 101 "re_grammar.y"
     {
                 (yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
 
@@ -1454,7 +1462,7 @@ yyreduce:
     break;
 
   case 6:
-#line 108 "re_grammar.y"
+#line 110 "re_grammar.y"
     {
                 RE_NODE* node;
 
@@ -1470,14 +1478,14 @@ yyreduce:
     break;
 
   case 7:
-#line 123 "re_grammar.y"
+#line 125 "re_grammar.y"
     {
                   (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
                 }
     break;
 
   case 8:
-#line 127 "re_grammar.y"
+#line 129 "re_grammar.y"
     {
                   (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
 
@@ -1488,7 +1496,7 @@ yyreduce:
     break;
 
   case 9:
-#line 137 "re_grammar.y"
+#line 139 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (2)].re_node), NULL);
 
@@ -1498,7 +1506,7 @@ yyreduce:
     break;
 
   case 10:
-#line 144 "re_grammar.y"
+#line 146 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (3)].re_node), NULL);
 
@@ -1510,7 +1518,7 @@ yyreduce:
     break;
 
   case 11:
-#line 153 "re_grammar.y"
+#line 155 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (2)].re_node), NULL);
 
@@ -1520,7 +1528,7 @@ yyreduce:
     break;
 
   case 12:
-#line 160 "re_grammar.y"
+#line 162 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (3)].re_node), NULL);
 
@@ -1532,7 +1540,7 @@ yyreduce:
     break;
 
   case 13:
-#line 169 "re_grammar.y"
+#line 171 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
 
@@ -1545,7 +1553,7 @@ yyreduce:
     break;
 
   case 14:
-#line 179 "re_grammar.y"
+#line 181 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (3)].re_node), NULL);
 
@@ -1559,7 +1567,7 @@ yyreduce:
     break;
 
   case 15:
-#line 190 "re_grammar.y"
+#line 192 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
 
@@ -1572,25 +1580,25 @@ yyreduce:
     break;
 
   case 16:
-#line 200 "re_grammar.y"
+#line 202 "re_grammar.y"
     {
             (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
          }
     break;
 
   case 17:
-#line 204 "re_grammar.y"
+#line 206 "re_grammar.y"
     {
-            (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
+            (yyval.re_node) = yr_re_node_create(RE_NODE_WORD_BOUNDARY, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
          }
     break;
 
   case 18:
-#line 210 "re_grammar.y"
+#line 212 "re_grammar.y"
     {
-            (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
+            (yyval.re_node) = yr_re_node_create(RE_NODE_NON_WORD_BOUNDARY, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
          }
@@ -1599,21 +1607,39 @@ yyreduce:
   case 19:
 #line 218 "re_grammar.y"
     {
-            (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
+            (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
+
+            ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
          }
     break;
 
   case 20:
-#line 222 "re_grammar.y"
+#line 224 "re_grammar.y"
     {
-            (yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+            (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
          }
     break;
 
   case 21:
-#line 228 "re_grammar.y"
+#line 232 "re_grammar.y"
+    {
+            (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
+         }
+    break;
+
+  case 22:
+#line 236 "re_grammar.y"
+    {
+            (yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+            ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+         }
+    break;
+
+  case 23:
+#line 242 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
 
@@ -1623,8 +1649,8 @@ yyreduce:
          }
     break;
 
-  case 22:
-#line 236 "re_grammar.y"
+  case 24:
+#line 250 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_WORD_CHAR, NULL, NULL);
 
@@ -1632,8 +1658,8 @@ yyreduce:
          }
     break;
 
-  case 23:
-#line 242 "re_grammar.y"
+  case 25:
+#line 256 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_NON_WORD_CHAR, NULL, NULL);
 
@@ -1641,8 +1667,8 @@ yyreduce:
          }
     break;
 
-  case 24:
-#line 248 "re_grammar.y"
+  case 26:
+#line 262 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_SPACE, NULL, NULL);
 
@@ -1650,8 +1676,8 @@ yyreduce:
          }
     break;
 
-  case 25:
-#line 254 "re_grammar.y"
+  case 27:
+#line 268 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_NON_SPACE, NULL, NULL);
 
@@ -1659,8 +1685,8 @@ yyreduce:
          }
     break;
 
-  case 26:
-#line 260 "re_grammar.y"
+  case 28:
+#line 274 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_DIGIT, NULL, NULL);
 
@@ -1668,8 +1694,8 @@ yyreduce:
          }
     break;
 
-  case 27:
-#line 266 "re_grammar.y"
+  case 29:
+#line 280 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_NON_DIGIT, NULL, NULL);
 
@@ -1677,8 +1703,8 @@ yyreduce:
          }
     break;
 
-  case 28:
-#line 272 "re_grammar.y"
+  case 30:
+#line 286 "re_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_CLASS, NULL, NULL);
 
@@ -1690,7 +1716,7 @@ yyreduce:
 
 
 /* Line 1267 of yacc.c.  */
-#line 1694 "re_grammar.c"
+#line 1720 "re_grammar.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1904,6 +1930,6 @@ yyreturn:
 }
 
 
-#line 282 "re_grammar.y"
+#line 296 "re_grammar.y"
 
 
diff --git a/libyara/re_grammar.h b/libyara/re_grammar.h
index b9574b0..c55856b 100644
--- a/libyara/re_grammar.h
+++ b/libyara/re_grammar.h
@@ -48,7 +48,9 @@
      _SPACE_ = 264,
      _NON_SPACE_ = 265,
      _DIGIT_ = 266,
-     _NON_DIGIT_ = 267
+     _NON_DIGIT_ = 267,
+     _WORD_BOUNDARY_ = 268,
+     _NON_WORD_BOUNDARY_ = 269
    };
 #endif
 /* Tokens.  */
@@ -62,6 +64,8 @@
 #define _NON_SPACE_ 265
 #define _DIGIT_ 266
 #define _NON_DIGIT_ 267
+#define _WORD_BOUNDARY_ 268
+#define _NON_WORD_BOUNDARY_ 269
 
 
 
@@ -76,7 +80,7 @@ typedef union YYSTYPE
   uint8_t* class_vector;
 }
 /* Line 1529 of yacc.c.  */
-#line 80 "re_grammar.h"
+#line 84 "re_grammar.h"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
diff --git a/libyara/re_grammar.y b/libyara/re_grammar.y
index f4e3434..f5ed099 100644
--- a/libyara/re_grammar.y
+++ b/libyara/re_grammar.y
@@ -72,6 +72,8 @@ limitations under the License.
 %token _NON_SPACE_
 %token _DIGIT_
 %token _NON_DIGIT_
+%token _WORD_BOUNDARY_
+%token _NON_WORD_BOUNDARY_
 
 %type <re_node>  alternative concatenation repeat single
 
@@ -200,6 +202,18 @@ repeat : single '*'
          {
             $$ = $1;
          }
+       | _WORD_BOUNDARY_
+         {
+            $$ = yr_re_node_create(RE_NODE_WORD_BOUNDARY, NULL, NULL);
+
+            ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+         }
+       | _NON_WORD_BOUNDARY_
+         {
+            $$ = yr_re_node_create(RE_NODE_NON_WORD_BOUNDARY, NULL, NULL);
+
+            ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+         }
        | '^'
          {
             $$ = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index d89ee38..a66c40b 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -47,6 +47,7 @@ typedef int16_t flex_int16_t;
 typedef uint16_t flex_uint16_t;
 typedef int32_t flex_int32_t;
 typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
 #else
 typedef signed char flex_int8_t;
 typedef short int flex_int16_t;
@@ -357,13 +358,13 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
  */
 #define YY_DO_BEFORE_ACTION \
 	yyg->yytext_ptr = yy_bp; \
-	yyleng = (size_t) (yy_cp - yy_bp); \
+	yyleng = (yy_size_t) (yy_cp - yy_bp); \
 	yyg->yy_hold_char = *yy_cp; \
 	*yy_cp = '\0'; \
 	yyg->yy_c_buf_p = yy_cp;
 
-#define YY_NUM_RULES 27
-#define YY_END_OF_BUFFER 28
+#define YY_NUM_RULES 29
+#define YY_END_OF_BUFFER 30
 /* This struct is not used in this scanner,
    but its presence is necessary. */
 struct yy_trans_info
@@ -371,13 +372,13 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[43] =
+static yyconst flex_int16_t yy_accept[45] =
     {   0,
-        0,    0,    0,    0,   28,    7,    7,   26,    6,   15,
-        7,   25,   27,   24,   16,    5,    3,   14,   13,   11,
-        9,   12,   10,    8,    0,    0,    0,    0,   23,   21,
-       19,   22,   20,   18,    0,    4,    0,    1,    2,   17,
-        0,    0
+        0,    0,    0,    0,   30,    7,    7,   28,    6,   17,
+        7,   27,   29,   26,   18,    5,    3,   16,   15,   13,
+       11,    9,   14,   12,   10,    8,    0,    0,    0,    0,
+       25,   23,   21,   24,   22,   20,    0,    4,    0,    1,
+        2,   19,    0,    0
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -388,14 +389,14 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1,    3,    1,    1,    1,    3,
         3,    3,    3,    4,    5,    3,    1,    6,    6,    6,
         6,    6,    6,    6,    6,    6,    6,    1,    1,    1,
-        1,    1,    3,    1,    7,    7,    7,    8,    7,    7,
+        1,    1,    3,    1,    7,    8,    7,    9,    7,    7,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    9,    1,    1,    1,   10,    1,    1,    1,
-       11,   12,   13,   14,    1,    1,    7,    7,    7,   15,
+        1,    1,   10,    1,    1,    1,   11,    1,    1,    1,
+       12,   13,   14,   15,    1,    1,    7,   16,    7,   17,
 
         7,    7,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,   16,    1,    1,    1,   17,   18,
-        1,    1,   19,    3,   20,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,   18,    1,    1,    1,   19,   20,
+        1,    1,   21,    3,   22,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -412,69 +413,70 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[21] =
+static yyconst flex_int32_t yy_meta[23] =
     {   0,
-        1,    2,    1,    1,    3,    4,    4,    4,    1,    1,
-        1,    1,    5,    1,    4,    1,    1,    1,    1,    1
+        1,    2,    1,    1,    3,    4,    4,    4,    4,    1,
+        1,    1,    1,    5,    1,    4,    4,    1,    1,    1,
+        1,    1
     } ;
 
-static yyconst flex_int16_t yy_base[49] =
+static yyconst flex_int16_t yy_base[51] =
     {   0,
-        0,   18,    3,    5,   46,   91,   91,   91,    9,   32,
-        0,   40,   39,   42,   38,   91,   26,   30,   91,   91,
-       91,   91,   91,   91,    4,   49,    0,   30,   29,   28,
-       26,   23,   21,   20,    4,   91,    7,   91,   91,   91,
-        0,   91,   69,   74,   79,   84,   86,    4
+        0,   20,    3,    5,   50,   89,   89,   89,   10,   36,
+        0,   44,   43,   47,   38,   89,   26,   33,   89,   89,
+       89,   89,   89,   89,   89,   89,    4,    5,    0,   33,
+       32,   31,   29,   26,   24,   23,   15,   89,    8,   89,
+       89,   89,    0,   89,   67,   72,   77,   82,   84,    4
     } ;
 
-static yyconst flex_int16_t yy_def[49] =
+static yyconst flex_int16_t yy_def[51] =
     {   0,
-       43,   43,   44,   44,   42,   42,   42,   42,   42,   42,
-       42,   42,   42,   45,   42,   42,   42,   42,   42,   42,
-       42,   42,   42,   42,   42,   42,   46,   42,   42,   42,
-       42,   42,   42,   42,   47,   42,   42,   42,   42,   42,
-       48,    0,   42,   42,   42,   42,   42,   42
+       45,   45,   46,   46,   44,   44,   44,   44,   44,   44,
+       44,   44,   44,   47,   44,   44,   44,   44,   44,   44,
+       44,   44,   44,   44,   44,   44,   44,   44,   48,   44,
+       44,   44,   44,   44,   44,   44,   49,   44,   44,   44,
+       44,   44,   50,    0,   44,   44,   44,   44,   44,   44
     } ;
 
 static yyconst flex_int16_t yy_nxt[112] =
     {   0,
-       42,    7,    8,   25,   13,   26,   13,   28,   27,   37,
-        9,   10,   37,    8,   14,   15,   14,   15,   11,    7,
-        8,   16,   17,   38,   27,   27,   38,   27,    9,   10,
-       27,    8,   27,   27,   27,   18,   11,   18,   36,   19,
-       20,   21,   27,   27,   27,   42,   22,   23,   24,   29,
-       30,   31,   25,   42,   26,   42,   32,   33,   34,   35,
-       42,   42,   42,   42,   42,   42,   42,   42,   39,    6,
-        6,    6,    6,    6,   12,   12,   12,   12,   12,   28,
-       42,   28,   28,   28,   40,   40,   40,   40,   41,   41,
-        5,   42,   42,   42,   42,   42,   42,   42,   42,   42,
-
-       42,   42,   42,   42,   42,   42,   42,   42,   42,   42,
-       42
+       44,    7,    8,   27,   13,   28,   13,   30,   27,   39,
+       28,    9,   10,   39,    8,   14,   15,   14,   15,   29,
+       11,    7,    8,   16,   17,   40,   41,   29,   29,   40,
+       29,    9,   10,   29,    8,   29,   29,   29,   18,   38,
+       11,   18,   29,   19,   20,   21,   22,   29,   29,   44,
+       44,   23,   24,   25,   26,   31,   32,   33,   44,   44,
+       44,   44,   44,   34,   35,   36,   37,    6,    6,    6,
+        6,    6,   12,   12,   12,   12,   12,   30,   44,   30,
+       30,   30,   42,   42,   42,   42,   43,   43,    5,   44,
+       44,   44,   44,   44,   44,   44,   44,   44,   44,   44,
+
+       44,   44,   44,   44,   44,   44,   44,   44,   44,   44,
+       44
     } ;
 
 static yyconst flex_int16_t yy_chk[112] =
     {   0,
-        0,    1,    1,   11,    3,   11,    4,   48,   35,   25,
-        1,    1,   37,    1,    3,    3,    4,    4,    1,    2,
-        2,    9,    9,   25,   34,   33,   37,   32,    2,    2,
-       31,    2,   30,   29,   28,   18,    2,   10,   17,   10,
-       10,   10,   15,   13,   12,    5,   10,   10,   10,   14,
-       14,   14,   26,    0,   26,    0,   14,   14,   14,   14,
-        0,    0,    0,    0,    0,    0,    0,    0,   26,   43,
-       43,   43,   43,   43,   44,   44,   44,   44,   44,   45,
-        0,   45,   45,   45,   46,   46,   46,   46,   47,   47,
-       42,   42,   42,   42,   42,   42,   42,   42,   42,   42,
-
-       42,   42,   42,   42,   42,   42,   42,   42,   42,   42,
-       42
+        0,    1,    1,   11,    3,   11,    4,   50,   28,   27,
+       28,    1,    1,   39,    1,    3,    3,    4,    4,   37,
+        1,    2,    2,    9,    9,   27,   28,   36,   35,   39,
+       34,    2,    2,   33,    2,   32,   31,   30,   18,   17,
+        2,   10,   15,   10,   10,   10,   10,   13,   12,    5,
+        0,   10,   10,   10,   10,   14,   14,   14,    0,    0,
+        0,    0,    0,   14,   14,   14,   14,   45,   45,   45,
+       45,   45,   46,   46,   46,   46,   46,   47,    0,   47,
+       47,   47,   48,   48,   48,   48,   49,   49,   44,   44,
+       44,   44,   44,   44,   44,   44,   44,   44,   44,   44,
+
+       44,   44,   44,   44,   44,   44,   44,   44,   44,   44,
+       44
     } ;
 
 /* Table of booleans, true if rule could match eol. */
-static yyconst flex_int32_t yy_rule_can_match_eol[28] =
+static yyconst flex_int32_t yy_rule_can_match_eol[30] =
     {   0,
-0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0,     };
+0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     };
 
 /* The intent behind this definition is that it'll catch
  * any uses of REJECT which flex missed.
@@ -525,7 +527,7 @@ uint8_t read_escaped_char(yyscan_t yyscanner);
 
 #define YY_NO_UNISTD_H 1
 
-#line 529 "re_lexer.c"
+#line 531 "re_lexer.c"
 
 #define INITIAL 0
 #define char_class 1
@@ -760,7 +762,7 @@ YY_DECL
 #line 62 "re_lexer.l"
 
 
-#line 764 "re_lexer.c"
+#line 766 "re_lexer.c"
 
     yylval = yylval_param;
 
@@ -815,13 +817,13 @@ yy_match:
 			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 				{
 				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 43 )
+				if ( yy_current_state >= 45 )
 					yy_c = yy_meta[(unsigned int) yy_c];
 				}
 			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 			++yy_cp;
 			}
-		while ( yy_current_state != 42 );
+		while ( yy_current_state != 44 );
 		yy_cp = yyg->yy_last_accepting_cpos;
 		yy_current_state = yyg->yy_last_accepting_state;
 
@@ -1021,22 +1023,36 @@ case 14:
 YY_RULE_SETUP
 #line 200 "re_lexer.l"
 {
+  return _WORD_BOUNDARY_;
+}
+	YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 204 "re_lexer.l"
+{
+  return _NON_WORD_BOUNDARY_;
+}
+	YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 209 "re_lexer.l"
+{
 
   yyerror(yyscanner, lex_env, "backreferences are not allowed");
   yyterminate();
 }
 	YY_BREAK
-case 15:
+case 17:
 YY_RULE_SETUP
-#line 207 "re_lexer.l"
+#line 216 "re_lexer.l"
 {
   yylval->integer = read_escaped_char(yyscanner);
   return _CHAR_;
 }
 	YY_BREAK
-case 16:
+case 18:
 YY_RULE_SETUP
-#line 213 "re_lexer.l"
+#line 222 "re_lexer.l"
 {
 
   // End of character class.
@@ -1056,10 +1072,10 @@ YY_RULE_SETUP
   return _CLASS_;
 }
 	YY_BREAK
-case 17:
-/* rule 17 can match eol */
+case 19:
+/* rule 19 can match eol */
 YY_RULE_SETUP
-#line 234 "re_lexer.l"
+#line 243 "re_lexer.l"
 {
 
   // A range inside a character class.
@@ -1096,9 +1112,9 @@ YY_RULE_SETUP
   }
 }
 	YY_BREAK
-case 18:
+case 20:
 YY_RULE_SETUP
-#line 271 "re_lexer.l"
+#line 280 "re_lexer.l"
 {
 
   int i;
@@ -1111,9 +1127,9 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[i] |= word_chars[i];
 }
 	YY_BREAK
-case 19:
+case 21:
 YY_RULE_SETUP
-#line 284 "re_lexer.l"
+#line 293 "re_lexer.l"
 {
 
   int i;
@@ -1126,18 +1142,18 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[i] |= ~word_chars[i];
 }
 	YY_BREAK
-case 20:
+case 22:
 YY_RULE_SETUP
-#line 297 "re_lexer.l"
+#line 306 "re_lexer.l"
 {
 
   LEX_ENV->class_vector[' ' / 8] |= 1 << ' ' % 8;
   LEX_ENV->class_vector['\t' / 8] |= 1 << '\t' % 8;
 }
 	YY_BREAK
-case 21:
+case 23:
 YY_RULE_SETUP
-#line 304 "re_lexer.l"
+#line 313 "re_lexer.l"
 {
 
   int i;
@@ -1149,9 +1165,9 @@ YY_RULE_SETUP
   LEX_ENV->class_vector['\t' / 8] &= ~(1 << '\t' % 8);
 }
 	YY_BREAK
-case 22:
+case 24:
 YY_RULE_SETUP
-#line 316 "re_lexer.l"
+#line 325 "re_lexer.l"
 {
 
   char c;
@@ -1160,9 +1176,9 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
 }
 	YY_BREAK
-case 23:
+case 25:
 YY_RULE_SETUP
-#line 325 "re_lexer.l"
+#line 334 "re_lexer.l"
 {
 
   int i;
@@ -1175,18 +1191,18 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[c / 8] &= ~(1 << c % 8);
 }
 	YY_BREAK
-case 24:
+case 26:
 YY_RULE_SETUP
-#line 338 "re_lexer.l"
+#line 347 "re_lexer.l"
 {
 
   uint8_t c = read_escaped_char(yyscanner);
   LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
 }
 	YY_BREAK
-case 25:
+case 27:
 YY_RULE_SETUP
-#line 345 "re_lexer.l"
+#line 354 "re_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1204,7 +1220,7 @@ YY_RULE_SETUP
 }
 	YY_BREAK
 case YY_STATE_EOF(char_class):
-#line 362 "re_lexer.l"
+#line 371 "re_lexer.l"
 {
 
   // End of regexp reached while scanning a character class.
@@ -1213,9 +1229,9 @@ case YY_STATE_EOF(char_class):
   yyterminate();
 }
 	YY_BREAK
-case 26:
+case 28:
 YY_RULE_SETUP
-#line 371 "re_lexer.l"
+#line 380 "re_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1230,18 +1246,18 @@ YY_RULE_SETUP
 }
 	YY_BREAK
 case YY_STATE_EOF(INITIAL):
-#line 385 "re_lexer.l"
+#line 394 "re_lexer.l"
 {
 
   yyterminate();
 }
 	YY_BREAK
-case 27:
+case 29:
 YY_RULE_SETUP
-#line 390 "re_lexer.l"
+#line 399 "re_lexer.l"
 ECHO;
 	YY_BREAK
-#line 1245 "re_lexer.c"
+#line 1261 "re_lexer.c"
 
 	case YY_END_OF_BUFFER:
 		{
@@ -1534,7 +1550,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 			{
 			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 43 )
+			if ( yy_current_state >= 45 )
 				yy_c = yy_meta[(unsigned int) yy_c];
 			}
 		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1563,11 +1579,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 		{
 		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 43 )
+		if ( yy_current_state >= 45 )
 			yy_c = yy_meta[(unsigned int) yy_c];
 		}
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 42);
+	yy_is_jam = (yy_current_state == 44);
 
 	return yy_is_jam ? 0 : yy_current_state;
 }
@@ -2371,7 +2387,7 @@ void re_yyfree (void * ptr , yyscan_t yyscanner)
 
 #define YYTABLES_NAME "yytables"
 
-#line 390 "re_lexer.l"
+#line 399 "re_lexer.l"
 
 
 
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index 7c500bf..19f234d 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -197,6 +197,15 @@ hex_digit     [0-9a-fA-F]
 }
 
 
+\\b {
+  return _WORD_BOUNDARY_;
+}
+
+\\B {
+  return _NON_WORD_BOUNDARY_;
+}
+
+
 \\{digit}+ {
 
   yyerror(yyscanner, lex_env, "backreferences are not allowed");
diff --git a/yara-python/tests.py b/yara-python/tests.py
index 4c719f1..7b4a402 100644
--- a/yara-python/tests.py
+++ b/yara-python/tests.py
@@ -194,6 +194,26 @@ RE_TESTS = [
   ('^(ab|cd)e', 'abcde', FAIL),
   ('(abc|)ef', 'abcdef', SUCCEED, 'ef'),
   ('(abc|)ef', 'abcef', SUCCEED, 'abcef'),
+  (r'\babc', 'abc', SUCCEED, 'abc'),
+  (r'abc\b', 'abc', SUCCEED, 'abc'),
+  (r'\babc', '1abc', FAIL),
+  (r'abc\b', 'abc1', FAIL),
+  (r'abc\s\b', 'abc x', SUCCEED, 'abc '),
+  (r'abc\s\b', 'abc  ', FAIL),
+  (r'\babc\b', ' abc ', SUCCEED, 'abc'),
+  (r'\b\w\w\w\b', ' abc ', SUCCEED, 'abc'),
+  (r'\w\w\w\b', 'abcd', SUCCEED, 'bcd'),
+  (r'\b\w\w\w', 'abcd', SUCCEED, 'abc'),
+  (r'\b\w\w\w\b', 'abcd', FAIL),
+  (r'\Babc', 'abc', FAIL),
+  (r'abc\B', 'abc', FAIL),
+  (r'\Babc', '1abc', SUCCEED, 'abc'),
+  (r'abc\B', 'abc1', SUCCEED, 'abc'),
+  (r'abc\s\B', 'abc x', FAIL),
+  (r'abc\s\B', 'abc  ', SUCCEED, 'abc '),
+  (r'\w\w\w\B', 'abcd', SUCCEED, 'abc'),
+  (r'\B\w\w\w', 'abcd', SUCCEED, 'bcd'),
+  (r'\B\w\w\w\B', 'abcd', FAIL),
 
   # This is allowed in most regexp engines but in order to keep the
   # grammar free of shift/reduce conflicts I've decided not supporting

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list