[Forensics-changes] [yara] 07/368: Fix segfault while parsing corrupted regexps

Sat Jul 1 10:30:04 UTC 2017

This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to annotated tag v3.5.0
in repository yara.

commit cf4746dd2228d22aa961e67c8447e90c6917a14d
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Tue Jun 30 11:24:17 2015 +0200

    Fix segfault while parsing corrupted regexps
---
 libyara/re_lexer.c | 158 +++++++++++++++++++++++++++++++++--------------------
 libyara/re_lexer.l |  86 +++++++++++++++++++++--------
 2 files changed, 164 insertions(+), 80 deletions(-)

diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index 266aed9..8c6fe75 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -542,12 +542,16 @@ with noyywrap then we can remove this pragma.
 #endif
 
 
-uint8_t escaped_char_value(char* text);
-uint8_t read_escaped_char(yyscan_t yyscanner);
+uint8_t escaped_char_value(
+    char* text);
+
+int read_escaped_char(
+    yyscan_t yyscanner,
+    uint8_t* escaped_char);
 
 #define YY_NO_UNISTD_H 1
 
-#line 551 "re_lexer.c"
+#line 555 "re_lexer.c"
 
 #define INITIAL 0
 #define char_class 1
@@ -812,10 +816,10 @@ YY_DECL
 		}
 
 	{
-#line 75 "re_lexer.l"
+#line 79 "re_lexer.l"
 
 
-#line 819 "re_lexer.c"
+#line 823 "re_lexer.c"
 
 	while ( 1 )		/* loops until end-of-file is reached */
 		{
@@ -882,7 +886,7 @@ do_action:	/* This label is used only to access EOF actions. */
 
 case 1:
 YY_RULE_SETUP
-#line 77 "re_lexer.l"
+#line 81 "re_lexer.l"
 {
 
   // Examples: {3,8} {0,5} {,5} {7,}
@@ -918,7 +922,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 2:
 YY_RULE_SETUP
-#line 111 "re_lexer.l"
+#line 115 "re_lexer.l"
 {
 
   // Example: {10}
@@ -938,7 +942,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 3:
 YY_RULE_SETUP
-#line 129 "re_lexer.l"
+#line 133 "re_lexer.l"
 {
 
   // Start of a negated character class. Example: [^abcd]
@@ -950,7 +954,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 4:
 YY_RULE_SETUP
-#line 138 "re_lexer.l"
+#line 142 "re_lexer.l"
 {
 
   // Start of character negated class containing a ].
@@ -965,7 +969,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 5:
 YY_RULE_SETUP
-#line 151 "re_lexer.l"
+#line 155 "re_lexer.l"
 {
 
   // Start of character class containing a ].
@@ -980,7 +984,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 6:
 YY_RULE_SETUP
-#line 164 "re_lexer.l"
+#line 168 "re_lexer.l"
 {
 
   // Start of character class. Example: [abcd]
@@ -993,7 +997,7 @@ YY_RULE_SETUP
 case 7:
 /* rule 7 can match eol */
 YY_RULE_SETUP
-#line 174 "re_lexer.l"
+#line 178 "re_lexer.l"
 {
 
   // Any non-special character is passed as a CHAR token to the scanner.
@@ -1004,63 +1008,63 @@ YY_RULE_SETUP
 	YY_BREAK
 case 8:
 YY_RULE_SETUP
-#line 183 "re_lexer.l"
+#line 187 "re_lexer.l"
 {
   return _WORD_CHAR_;
 }
 	YY_BREAK
 case 9:
 YY_RULE_SETUP
-#line 188 "re_lexer.l"
+#line 192 "re_lexer.l"
 {
   return _NON_WORD_CHAR_;
 }
 	YY_BREAK
 case 10:
 YY_RULE_SETUP
-#line 193 "re_lexer.l"
+#line 197 "re_lexer.l"
 {
   return _SPACE_;
 }
 	YY_BREAK
 case 11:
 YY_RULE_SETUP
-#line 198 "re_lexer.l"
+#line 202 "re_lexer.l"
 {
   return _NON_SPACE_;
 }
 	YY_BREAK
 case 12:
 YY_RULE_SETUP
-#line 203 "re_lexer.l"
+#line 207 "re_lexer.l"
 {
   return _DIGIT_;
 }
 	YY_BREAK
 case 13:
 YY_RULE_SETUP
-#line 208 "re_lexer.l"
+#line 212 "re_lexer.l"
 {
   return _NON_DIGIT_;
 }
 	YY_BREAK
 case 14:
 YY_RULE_SETUP
-#line 213 "re_lexer.l"
+#line 217 "re_lexer.l"
 {
   return _WORD_BOUNDARY_;
 }
 	YY_BREAK
 case 15:
 YY_RULE_SETUP
-#line 217 "re_lexer.l"
+#line 221 "re_lexer.l"
 {
   return _NON_WORD_BOUNDARY_;
 }
 	YY_BREAK
 case 16:
 YY_RULE_SETUP
-#line 222 "re_lexer.l"
+#line 226 "re_lexer.l"
 {
 
   yyerror(yyscanner, lex_env, "backreferences are not allowed");
@@ -1069,15 +1073,26 @@ YY_RULE_SETUP
 	YY_BREAK
 case 17:
 YY_RULE_SETUP
-#line 229 "re_lexer.l"
+#line 233 "re_lexer.l"
 {
-  yylval->integer = read_escaped_char(yyscanner);
-  return _CHAR_;
+
+  uint8_t c;
+
+  if (read_escaped_char(yyscanner, &c))
+  {
+    yylval->integer = c;
+    return _CHAR_;
+  }
+  else
+  {
+    yyerror(yyscanner, lex_env, "unexpected end of buffer");
+    yyterminate();
+  }
 }
 	YY_BREAK
 case 18:
 YY_RULE_SETUP
-#line 235 "re_lexer.l"
+#line 250 "re_lexer.l"
 {
 
   // End of character class.
@@ -1098,7 +1113,7 @@ YY_RULE_SETUP
 case 19:
 /* rule 19 can match eol */
 YY_RULE_SETUP
-#line 254 "re_lexer.l"
+#line 269 "re_lexer.l"
 {
 
   // A range inside a character class.
@@ -1119,7 +1134,13 @@ YY_RULE_SETUP
   }
 
   if (end == '\\')
-    end = read_escaped_char(yyscanner);
+  {
+    if (!read_escaped_char(yyscanner, &end))
+    {
+      yyerror(yyscanner, lex_env, "unexpected end of buffer");
+      yyterminate();
+    }
+  }
 
   if (end < start)
   {
@@ -1135,7 +1156,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 20:
 YY_RULE_SETUP
-#line 289 "re_lexer.l"
+#line 310 "re_lexer.l"
 {
 
   char word_chars[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
@@ -1149,7 +1170,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 21:
 YY_RULE_SETUP
-#line 301 "re_lexer.l"
+#line 322 "re_lexer.l"
 {
 
   char word_chars[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
@@ -1163,7 +1184,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 22:
 YY_RULE_SETUP
-#line 313 "re_lexer.l"
+#line 334 "re_lexer.l"
 {
 
   LEX_ENV->class_vector[' ' / 8] |= 1 << ' ' % 8;
@@ -1172,7 +1193,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 23:
 YY_RULE_SETUP
-#line 320 "re_lexer.l"
+#line 341 "re_lexer.l"
 {
 
   for (int i = 0; i < 32; i++)
@@ -1188,7 +1209,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 24:
 YY_RULE_SETUP
-#line 334 "re_lexer.l"
+#line 355 "re_lexer.l"
 {
 
   for (char c = '0'; c <= '9'; c++)
@@ -1197,7 +1218,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 25:
 YY_RULE_SETUP
-#line 341 "re_lexer.l"
+#line 362 "re_lexer.l"
 {
 
   for (int i = 0; i < 32; i++)
@@ -1217,16 +1238,25 @@ YY_RULE_SETUP
 	YY_BREAK
 case 26:
 YY_RULE_SETUP
-#line 359 "re_lexer.l"
+#line 380 "re_lexer.l"
 {
 
-  uint8_t c = read_escaped_char(yyscanner);
-  LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+  uint8_t c;
+
+  if (read_escaped_char(yyscanner, &c))
+  {
+    LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+  }
+  else
+  {
+    yyerror(yyscanner, lex_env, "unexpected end of buffer");
+    yyterminate();
+  }
 }
 	YY_BREAK
 case 27:
 YY_RULE_SETUP
-#line 366 "re_lexer.l"
+#line 396 "re_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1244,7 +1274,7 @@ YY_RULE_SETUP
 }
 	YY_BREAK
 case YY_STATE_EOF(char_class):
-#line 383 "re_lexer.l"
+#line 413 "re_lexer.l"
 {
 
   // End of regexp reached while scanning a character class.
@@ -1255,7 +1285,7 @@ case YY_STATE_EOF(char_class):
 	YY_BREAK
 case 28:
 YY_RULE_SETUP
-#line 392 "re_lexer.l"
+#line 422 "re_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1270,7 +1300,7 @@ YY_RULE_SETUP
 }
 	YY_BREAK
 case YY_STATE_EOF(INITIAL):
-#line 406 "re_lexer.l"
+#line 436 "re_lexer.l"
 {
 
   yyterminate();
@@ -1278,10 +1308,10 @@ case YY_STATE_EOF(INITIAL):
 	YY_BREAK
 case 29:
 YY_RULE_SETUP
-#line 411 "re_lexer.l"
+#line 441 "re_lexer.l"
 ECHO;
 	YY_BREAK
-#line 1285 "re_lexer.c"
+#line 1315 "re_lexer.c"
 
 	case YY_END_OF_BUFFER:
 		{
@@ -2414,11 +2444,12 @@ void re_yyfree (void * ptr , yyscan_t yyscanner)
 
 #define YYTABLES_NAME "yytables"
 
-#line 411 "re_lexer.l"
+#line 441 "re_lexer.l"
 
 
 
-uint8_t escaped_char_value(char* text)
+uint8_t escaped_char_value(
+    char* text)
 {
   char hex[3];
   int result;
@@ -2462,30 +2493,41 @@ uint8_t escaped_char_value(char* text)
 }
 
 
-uint8_t read_escaped_char(yyscan_t yyscanner)
+#ifdef __cplusplus
+#define INPUT yyinput
+#else
+#define INPUT input
+#endif
+
+
+int read_escaped_char(
+    yyscan_t yyscanner,
+    uint8_t* escaped_char)
 {
   char text[4];
 
   text[0] = '\\';
+  text[1] = INPUT(yyscanner);
 
-  #ifdef __cplusplus
-  text[1] = yyinput(yyscanner);
-  #else
-  text[1] = input(yyscanner);
-  #endif
+  if (text[1] == EOF)
+    return 0;
 
   if (text[1] == 'x')
   {
-    #ifdef __cplusplus
-    text[2] = yyinput(yyscanner);
-    text[3] = yyinput(yyscanner);
-    #else
-    text[2] = input(yyscanner);
-    text[3] = input(yyscanner);
-    #endif
+    text[2] = INPUT(yyscanner);
+
+    if (text[2] == EOF)
+      return 0;
+
+    text[3] = INPUT(yyscanner);
+
+    if (text[3] == EOF)
+      return 0;
   }
 
-  return escaped_char_value(text);
+  *escaped_char = escaped_char_value(text);
+
+  return 1;
 }
 
 
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index a6d2260..6be9d54 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -49,8 +49,12 @@ with noyywrap then we can remove this pragma.
 #endif
 
 
-uint8_t escaped_char_value(char* text);
-uint8_t read_escaped_char(yyscan_t yyscanner);
+uint8_t escaped_char_value(
+    char* text);
+
+int read_escaped_char(
+    yyscan_t yyscanner,
+    uint8_t* escaped_char);
 
 %}
 
@@ -227,8 +231,19 @@ hex_digit     [0-9a-fA-F]
 
 
 \\ {
-  yylval->integer = read_escaped_char(yyscanner);
-  return _CHAR_;
+
+  uint8_t c;
+
+  if (read_escaped_char(yyscanner, &c))
+  {
+    yylval->integer = c;
+    return _CHAR_;
+  }
+  else
+  {
+    yyerror(yyscanner, lex_env, "unexpected end of buffer");
+    yyterminate();
+  }
 }
 
 
@@ -271,7 +286,13 @@ hex_digit     [0-9a-fA-F]
   }
 
   if (end == '\\')
-    end = read_escaped_char(yyscanner);
+  {
+    if (!read_escaped_char(yyscanner, &end))
+    {
+      yyerror(yyscanner, lex_env, "unexpected end of buffer");
+      yyterminate();
+    }
+  }
 
   if (end < start)
   {
@@ -358,8 +379,17 @@ hex_digit     [0-9a-fA-F]
 
 <char_class>\\ {
 
-  uint8_t c = read_escaped_char(yyscanner);
-  LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+  uint8_t c;
+
+  if (read_escaped_char(yyscanner, &c))
+  {
+    LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+  }
+  else
+  {
+    yyerror(yyscanner, lex_env, "unexpected end of buffer");
+    yyterminate();
+  }
 }
 
 
@@ -410,7 +440,8 @@ hex_digit     [0-9a-fA-F]
 
 %%
 
-uint8_t escaped_char_value(char* text)
+uint8_t escaped_char_value(
+    char* text)
 {
   char hex[3];
   int result;
@@ -454,30 +485,41 @@ uint8_t escaped_char_value(char* text)
 }
 
 
-uint8_t read_escaped_char(yyscan_t yyscanner)
+#ifdef __cplusplus
+#define INPUT yyinput
+#else
+#define INPUT input
+#endif
+
+
+int read_escaped_char(
+    yyscan_t yyscanner,
+    uint8_t* escaped_char)
 {
   char text[4];
 
   text[0] = '\\';
+  text[1] = INPUT(yyscanner);
 
-  #ifdef __cplusplus
-  text[1] = yyinput(yyscanner);
-  #else
-  text[1] = input(yyscanner);
-  #endif
+  if (text[1] == EOF)
+    return 0;
 
   if (text[1] == 'x')
   {
-    #ifdef __cplusplus
-    text[2] = yyinput(yyscanner);
-    text[3] = yyinput(yyscanner);
-    #else
-    text[2] = input(yyscanner);
-    text[3] = input(yyscanner);
-    #endif
+    text[2] = INPUT(yyscanner);
+
+    if (text[2] == EOF)
+      return 0;
+
+    text[3] = INPUT(yyscanner);
+
+    if (text[3] == EOF)
+      return 0;
   }
 
-  return escaped_char_value(text);
+  *escaped_char = escaped_char_value(text);
+
+  return 1;
 }
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git