[Forensics-changes] [yara] 92/192: re_lexer: Make reading escape sequences more robust (#586)
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:31:51 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.6.0
in repository yara.
commit 3119b232c9c453c98d8fa8b6ae4e37ba18117cd4
Author: Hilko Bengen <hillu at users.noreply.github.com>
Date: Tue Jan 17 17:07:02 2017 +0100
re_lexer: Make reading escape sequences more robust (#586)
* Add test for issue #503
* re_lexer: Make reading escape sequences more robust
This commit fixes parsing incomplete escape sequences at the end of a
regular expression and parsing things like \xxy (invalid hex digits)
which before were silently turned into (char)255.
Close #503
* Update re_lexer.c
---
libyara/re_lexer.c | 40 ++++++++++++++++++++--------------------
libyara/re_lexer.l | 12 ++++++------
tests/test-rules.c | 6 ++++++
3 files changed, 32 insertions(+), 26 deletions(-)
diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index 82d4871..f0940a8 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -190,7 +190,7 @@ typedef size_t yy_size_t;
/* Note: We specifically omit the test for yy_rule_can_match_eol because it requires
* access to the local variable yy_act. Since yyless() is a macro, it would break
- * existing scanners that call yyless() from OUTSIDE re_yylex.
+ * existing scanners that call yyless() from OUTSIDE re_yylex.
* One obvious solution it to make yy_act a global. I tried that, and saw
* a 5% performance hit in a non-yylineno scanner, because yy_act is
* normally declared as a register variable-- so it is not worth it.
@@ -266,7 +266,7 @@ struct yy_buffer_state
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
@@ -906,7 +906,7 @@ yy_find_action:
yy_size_t yyl;
for ( yyl = 0; yyl < yyleng; ++yyl )
if ( yytext[yyl] == '\n' )
-
+
do{ yylineno++;
yycolumn=0;
}while(0)
@@ -1125,7 +1125,7 @@ YY_RULE_SETUP
}
else
{
- yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyerror(yyscanner, lex_env, "illegal escape sequence");
yyterminate();
}
}
@@ -1180,7 +1180,7 @@ YY_RULE_SETUP
{
if (!read_escaped_char(yyscanner, &end))
{
- yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyerror(yyscanner, lex_env, "illegal escape sequence");
yyterminate();
}
}
@@ -1292,7 +1292,7 @@ YY_RULE_SETUP
}
else
{
- yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyerror(yyscanner, lex_env, "illegal escape sequence");
yyterminate();
}
}
@@ -1763,7 +1763,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
yyg->yy_hold_char = *++yyg->yy_c_buf_p;
if ( c == '\n' )
-
+
do{ yylineno++;
yycolumn=0;
}while(0)
@@ -2018,9 +2018,9 @@ static void re_yyensure_buffer_stack (yyscan_t yyscanner)
, yyscanner);
if ( ! yyg->yy_buffer_stack )
YY_FATAL_ERROR( "out of dynamic memory in re_yyensure_buffer_stack()" );
-
+
memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
+
yyg->yy_buffer_stack_max = num_to_alloc;
yyg->yy_buffer_stack_top = 0;
return;
@@ -2049,7 +2049,7 @@ static void re_yyensure_buffer_stack (yyscan_t yyscanner)
* @param base the character buffer
* @param size the size in bytes of the character buffer
* @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
+ * @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE re_yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
{
@@ -2177,7 +2177,7 @@ YY_EXTRA_TYPE re_yyget_extra (yyscan_t yyscanner)
int re_yyget_lineno (yyscan_t yyscanner)
{
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
+
if (! YY_CURRENT_BUFFER)
return 0;
@@ -2190,7 +2190,7 @@ int re_yyget_lineno (yyscan_t yyscanner)
int re_yyget_column (yyscan_t yyscanner)
{
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
+
if (! YY_CURRENT_BUFFER)
return 0;
@@ -2365,20 +2365,20 @@ int re_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
errno = EINVAL;
return 1;
}
-
+
*ptr_yy_globals = (yyscan_t) re_yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
-
+
if (*ptr_yy_globals == NULL){
errno = ENOMEM;
return 1;
}
-
+
/* By setting to 0xAA, we expose bugs in
yy_init_globals. Leave at 0x00 for releases. */
memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
+
re_yyset_extra (yy_user_defined, *ptr_yy_globals);
-
+
return yy_init_globals ( *ptr_yy_globals );
}
@@ -2568,19 +2568,19 @@ int read_escaped_char(
text[0] = '\\';
text[1] = RE_YY_INPUT(yyscanner);
- if (text[1] == EOF)
+ if (text[1] == EOF || text[1] == 0)
return 0;
if (text[1] == 'x')
{
text[2] = RE_YY_INPUT(yyscanner);
- if (text[2] == EOF)
+ if (!isxdigit(text[2]))
return 0;
text[3] = RE_YY_INPUT(yyscanner);
- if (text[3] == EOF)
+ if (!isxdigit(text[3]))
return 0;
}
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index 1b3f5aa..9e0b005 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -261,7 +261,7 @@ hex_digit [0-9a-fA-F]
}
else
{
- yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyerror(yyscanner, lex_env, "illegal escape sequence");
yyterminate();
}
}
@@ -312,7 +312,7 @@ hex_digit [0-9a-fA-F]
{
if (!read_escaped_char(yyscanner, &end))
{
- yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyerror(yyscanner, lex_env, "illegal escape sequence");
yyterminate();
}
}
@@ -410,7 +410,7 @@ hex_digit [0-9a-fA-F]
}
else
{
- yyerror(yyscanner, lex_env, "unexpected end of buffer");
+ yyerror(yyscanner, lex_env, "illegal escape sequence");
yyterminate();
}
}
@@ -524,19 +524,19 @@ int read_escaped_char(
text[0] = '\\';
text[1] = RE_YY_INPUT(yyscanner);
- if (text[1] == EOF)
+ if (text[1] == EOF || text[1] == 0)
return 0;
if (text[1] == 'x')
{
text[2] = RE_YY_INPUT(yyscanner);
- if (text[2] == EOF)
+ if (!isxdigit(text[2]))
return 0;
text[3] = RE_YY_INPUT(yyscanner);
- if (text[3] == EOF)
+ if (!isxdigit(text[3]))
return 0;
}
diff --git a/tests/test-rules.c b/tests/test-rules.c
index 5570dbc..556e345 100644
--- a/tests/test-rules.c
+++ b/tests/test-rules.c
@@ -1019,6 +1019,12 @@ void test_re()
// Test case for issue #324
assert_true_regexp("whatever| x. x", " xy x", " xy x");
+
+ // test case for issue #503, \x without two following hex-digits
+ assert_regexp_syntax_error("\\x0");
+ assert_regexp_syntax_error("\\x");
+
+ assert_regexp_syntax_error("\\xxy");
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list