[Forensics-changes] [yara] 07/368: Fix segfault while parsing corrupted regexps
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:30:04 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.5.0
in repository yara.
commit cf4746dd2228d22aa961e67c8447e90c6917a14d
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Tue Jun 30 11:24:17 2015 +0200
Fix segfault while parsing corrupted regexps
---
libyara/re_lexer.c | 158 +++++++++++++++++++++++++++++++++--------------------
libyara/re_lexer.l | 86 +++++++++++++++++++++--------
2 files changed, 164 insertions(+), 80 deletions(-)
diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index 266aed9..8c6fe75 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -542,12 +542,16 @@ with noyywrap then we can remove this pragma.
#endif
-uint8_t escaped_char_value(char* text);
-uint8_t read_escaped_char(yyscan_t yyscanner);
+uint8_t escaped_char_value(
+ char* text);
+
+int read_escaped_char(
+ yyscan_t yyscanner,
+ uint8_t* escaped_char);
#define YY_NO_UNISTD_H 1
-#line 551 "re_lexer.c"
+#line 555 "re_lexer.c"
#define INITIAL 0
#define char_class 1
@@ -812,10 +816,10 @@ YY_DECL
}
{
-#line 75 "re_lexer.l"
+#line 79 "re_lexer.l"
-#line 819 "re_lexer.c"
+#line 823 "re_lexer.c"
while ( 1 ) /* loops until end-of-file is reached */
{
@@ -882,7 +886,7 @@ do_action: /* This label is used only to access EOF actions. */
case 1:
YY_RULE_SETUP
-#line 77 "re_lexer.l"
+#line 81 "re_lexer.l"
{
// Examples: {3,8} {0,5} {,5} {7,}
@@ -918,7 +922,7 @@ YY_RULE_SETUP
YY_BREAK
case 2:
YY_RULE_SETUP
-#line 111 "re_lexer.l"
+#line 115 "re_lexer.l"
{
// Example: {10}
@@ -938,7 +942,7 @@ YY_RULE_SETUP
YY_BREAK
case 3:
YY_RULE_SETUP
-#line 129 "re_lexer.l"
+#line 133 "re_lexer.l"
{
// Start of a negated character class. Example: [^abcd]
@@ -950,7 +954,7 @@ YY_RULE_SETUP
YY_BREAK
case 4:
YY_RULE_SETUP
-#line 138 "re_lexer.l"
+#line 142 "re_lexer.l"
{
// Start of character negated class containing a ].
@@ -965,7 +969,7 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 151 "re_lexer.l"
+#line 155 "re_lexer.l"
{
// Start of character class containing a ].
@@ -980,7 +984,7 @@ YY_RULE_SETUP
YY_BREAK
case 6:
YY_RULE_SETUP
-#line 164 "re_lexer.l"
+#line 168 "re_lexer.l"
{
// Start of character class. Example: [abcd]
@@ -993,7 +997,7 @@ YY_RULE_SETUP
case 7:
/* rule 7 can match eol */
YY_RULE_SETUP
-#line 174 "re_lexer.l"
+#line 178 "re_lexer.l"
{
// Any non-special character is passed as a CHAR token to the scanner.
@@ -1004,63 +1008,63 @@ YY_RULE_SETUP
YY_BREAK
case 8:
YY_RULE_SETUP
-#line 183 "re_lexer.l"
+#line 187 "re_lexer.l"
{
return _WORD_CHAR_;
}
YY_BREAK
case 9:
YY_RULE_SETUP
-#line 188 "re_lexer.l"
+#line 192 "re_lexer.l"
{
return _NON_WORD_CHAR_;
}
YY_BREAK
case 10:
YY_RULE_SETUP
-#line 193 "re_lexer.l"
+#line 197 "re_lexer.l"
{
return _SPACE_;
}
YY_BREAK
case 11:
YY_RULE_SETUP
-#line 198 "re_lexer.l"
+#line 202 "re_lexer.l"
{
return _NON_SPACE_;
}
YY_BREAK
case 12:
YY_RULE_SETUP
-#line 203 "re_lexer.l"
+#line 207 "re_lexer.l"
{
return _DIGIT_;
}
YY_BREAK
case 13:
YY_RULE_SETUP
-#line 208 "re_lexer.l"
+#line 212 "re_lexer.l"
{
return _NON_DIGIT_;
}
YY_BREAK
case 14:
YY_RULE_SETUP
-#line 213 "re_lexer.l"
+#line 217 "re_lexer.l"
{
return _WORD_BOUNDARY_;
}
YY_BREAK
case 15:
YY_RULE_SETUP
-#line 217 "re_lexer.l"
+#line 221 "re_lexer.l"
{
return _NON_WORD_BOUNDARY_;
}
YY_BREAK
case 16:
YY_RULE_SETUP
-#line 222 "re_lexer.l"
+#line 226 "re_lexer.l"
{
yyerror(yyscanner, lex_env, "backreferences are not allowed");
@@ -1069,15 +1073,26 @@ YY_RULE_SETUP
YY_BREAK
case 17:
YY_RULE_SETUP
-#line 229 "re_lexer.l"
+#line 233 "re_lexer.l"
{
- yylval->integer = read_escaped_char(yyscanner);
- return _CHAR_;
+
+ uint8_t c;
+
+ if (read_escaped_char(yyscanner, &c))
+ {
+ yylval->integer = c;
+ return _CHAR_;
+ }
+ else
+ {
+ yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyterminate();
+ }
}
YY_BREAK
case 18:
YY_RULE_SETUP
-#line 235 "re_lexer.l"
+#line 250 "re_lexer.l"
{
// End of character class.
@@ -1098,7 +1113,7 @@ YY_RULE_SETUP
case 19:
/* rule 19 can match eol */
YY_RULE_SETUP
-#line 254 "re_lexer.l"
+#line 269 "re_lexer.l"
{
// A range inside a character class.
@@ -1119,7 +1134,13 @@ YY_RULE_SETUP
}
if (end == '\\')
- end = read_escaped_char(yyscanner);
+ {
+ if (!read_escaped_char(yyscanner, &end))
+ {
+ yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyterminate();
+ }
+ }
if (end < start)
{
@@ -1135,7 +1156,7 @@ YY_RULE_SETUP
YY_BREAK
case 20:
YY_RULE_SETUP
-#line 289 "re_lexer.l"
+#line 310 "re_lexer.l"
{
char word_chars[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
@@ -1149,7 +1170,7 @@ YY_RULE_SETUP
YY_BREAK
case 21:
YY_RULE_SETUP
-#line 301 "re_lexer.l"
+#line 322 "re_lexer.l"
{
char word_chars[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
@@ -1163,7 +1184,7 @@ YY_RULE_SETUP
YY_BREAK
case 22:
YY_RULE_SETUP
-#line 313 "re_lexer.l"
+#line 334 "re_lexer.l"
{
LEX_ENV->class_vector[' ' / 8] |= 1 << ' ' % 8;
@@ -1172,7 +1193,7 @@ YY_RULE_SETUP
YY_BREAK
case 23:
YY_RULE_SETUP
-#line 320 "re_lexer.l"
+#line 341 "re_lexer.l"
{
for (int i = 0; i < 32; i++)
@@ -1188,7 +1209,7 @@ YY_RULE_SETUP
YY_BREAK
case 24:
YY_RULE_SETUP
-#line 334 "re_lexer.l"
+#line 355 "re_lexer.l"
{
for (char c = '0'; c <= '9'; c++)
@@ -1197,7 +1218,7 @@ YY_RULE_SETUP
YY_BREAK
case 25:
YY_RULE_SETUP
-#line 341 "re_lexer.l"
+#line 362 "re_lexer.l"
{
for (int i = 0; i < 32; i++)
@@ -1217,16 +1238,25 @@ YY_RULE_SETUP
YY_BREAK
case 26:
YY_RULE_SETUP
-#line 359 "re_lexer.l"
+#line 380 "re_lexer.l"
{
- uint8_t c = read_escaped_char(yyscanner);
- LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+ uint8_t c;
+
+ if (read_escaped_char(yyscanner, &c))
+ {
+ LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+ }
+ else
+ {
+ yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyterminate();
+ }
}
YY_BREAK
case 27:
YY_RULE_SETUP
-#line 366 "re_lexer.l"
+#line 396 "re_lexer.l"
{
if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1244,7 +1274,7 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(char_class):
-#line 383 "re_lexer.l"
+#line 413 "re_lexer.l"
{
// End of regexp reached while scanning a character class.
@@ -1255,7 +1285,7 @@ case YY_STATE_EOF(char_class):
YY_BREAK
case 28:
YY_RULE_SETUP
-#line 392 "re_lexer.l"
+#line 422 "re_lexer.l"
{
if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1270,7 +1300,7 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(INITIAL):
-#line 406 "re_lexer.l"
+#line 436 "re_lexer.l"
{
yyterminate();
@@ -1278,10 +1308,10 @@ case YY_STATE_EOF(INITIAL):
YY_BREAK
case 29:
YY_RULE_SETUP
-#line 411 "re_lexer.l"
+#line 441 "re_lexer.l"
ECHO;
YY_BREAK
-#line 1285 "re_lexer.c"
+#line 1315 "re_lexer.c"
case YY_END_OF_BUFFER:
{
@@ -2414,11 +2444,12 @@ void re_yyfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
-#line 411 "re_lexer.l"
+#line 441 "re_lexer.l"
-uint8_t escaped_char_value(char* text)
+uint8_t escaped_char_value(
+ char* text)
{
char hex[3];
int result;
@@ -2462,30 +2493,41 @@ uint8_t escaped_char_value(char* text)
}
-uint8_t read_escaped_char(yyscan_t yyscanner)
+#ifdef __cplusplus
+#define INPUT yyinput
+#else
+#define INPUT input
+#endif
+
+
+int read_escaped_char(
+ yyscan_t yyscanner,
+ uint8_t* escaped_char)
{
char text[4];
text[0] = '\\';
+ text[1] = INPUT(yyscanner);
- #ifdef __cplusplus
- text[1] = yyinput(yyscanner);
- #else
- text[1] = input(yyscanner);
- #endif
+ if (text[1] == EOF)
+ return 0;
if (text[1] == 'x')
{
- #ifdef __cplusplus
- text[2] = yyinput(yyscanner);
- text[3] = yyinput(yyscanner);
- #else
- text[2] = input(yyscanner);
- text[3] = input(yyscanner);
- #endif
+ text[2] = INPUT(yyscanner);
+
+ if (text[2] == EOF)
+ return 0;
+
+ text[3] = INPUT(yyscanner);
+
+ if (text[3] == EOF)
+ return 0;
}
- return escaped_char_value(text);
+ *escaped_char = escaped_char_value(text);
+
+ return 1;
}
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index a6d2260..6be9d54 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -49,8 +49,12 @@ with noyywrap then we can remove this pragma.
#endif
-uint8_t escaped_char_value(char* text);
-uint8_t read_escaped_char(yyscan_t yyscanner);
+uint8_t escaped_char_value(
+ char* text);
+
+int read_escaped_char(
+ yyscan_t yyscanner,
+ uint8_t* escaped_char);
%}
@@ -227,8 +231,19 @@ hex_digit [0-9a-fA-F]
\\ {
- yylval->integer = read_escaped_char(yyscanner);
- return _CHAR_;
+
+ uint8_t c;
+
+ if (read_escaped_char(yyscanner, &c))
+ {
+ yylval->integer = c;
+ return _CHAR_;
+ }
+ else
+ {
+ yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyterminate();
+ }
}
@@ -271,7 +286,13 @@ hex_digit [0-9a-fA-F]
}
if (end == '\\')
- end = read_escaped_char(yyscanner);
+ {
+ if (!read_escaped_char(yyscanner, &end))
+ {
+ yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyterminate();
+ }
+ }
if (end < start)
{
@@ -358,8 +379,17 @@ hex_digit [0-9a-fA-F]
<char_class>\\ {
- uint8_t c = read_escaped_char(yyscanner);
- LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+ uint8_t c;
+
+ if (read_escaped_char(yyscanner, &c))
+ {
+ LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
+ }
+ else
+ {
+ yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyterminate();
+ }
}
@@ -410,7 +440,8 @@ hex_digit [0-9a-fA-F]
%%
-uint8_t escaped_char_value(char* text)
+uint8_t escaped_char_value(
+ char* text)
{
char hex[3];
int result;
@@ -454,30 +485,41 @@ uint8_t escaped_char_value(char* text)
}
-uint8_t read_escaped_char(yyscan_t yyscanner)
+#ifdef __cplusplus
+#define INPUT yyinput
+#else
+#define INPUT input
+#endif
+
+
+int read_escaped_char(
+ yyscan_t yyscanner,
+ uint8_t* escaped_char)
{
char text[4];
text[0] = '\\';
+ text[1] = INPUT(yyscanner);
- #ifdef __cplusplus
- text[1] = yyinput(yyscanner);
- #else
- text[1] = input(yyscanner);
- #endif
+ if (text[1] == EOF)
+ return 0;
if (text[1] == 'x')
{
- #ifdef __cplusplus
- text[2] = yyinput(yyscanner);
- text[3] = yyinput(yyscanner);
- #else
- text[2] = input(yyscanner);
- text[3] = input(yyscanner);
- #endif
+ text[2] = INPUT(yyscanner);
+
+ if (text[2] == EOF)
+ return 0;
+
+ text[3] = INPUT(yyscanner);
+
+ if (text[3] == EOF)
+ return 0;
}
- return escaped_char_value(text);
+ *escaped_char = escaped_char_value(text);
+
+ return 1;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list