[Forensics-changes] [yara] 114/192: Implement dotnet module
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:31:54 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.6.0
in repository yara.
commit b4b63418bd29c7d6ed1b19aca619e8f88680363f
Author: Wesley Shields <wxs at atarininja.org>
Date: Wed Mar 15 11:18:44 2017 -0400
Implement dotnet module
---
docs/modules/dotnet.rst | 33 +++++
libyara/include/yara/dotnet.h | 57 +++++++++
libyara/modules/dotnet.c | 286 +++++++++++++++++++++++++++++++++++++-----
3 files changed, 342 insertions(+), 34 deletions(-)
diff --git a/docs/modules/dotnet.rst b/docs/modules/dotnet.rst
index a525e02..6e63bc9 100644
--- a/docs/modules/dotnet.rst
+++ b/docs/modules/dotnet.rst
@@ -142,3 +142,36 @@ Reference
.. c:type:: typelib
The typelib of the file.
+
+.. c:type:: assembly_refs
+
+ Object for .NET assembly reference information.
+
+ .. c:member:: version
+
+ An object with integer values representing version information for this
+ assembly. Attributes are:
+
+ ``major``
+ ``minor``
+ ``build_number``
+ ``revision_number``
+
+ .. c:member:: name
+
+ String containing the assembly name.
+
+ .. c:member:: public_key_or_token
+
+ String containing the public key or token which identifies the author of
+ this assembly.
+ assembly.
+
+.. c:type:: number_of_user_strings
+
+ The number of user strings in the file.
+
+.. c:type:: user_strings
+
+ An zero-based array of user strings, one for each stream contained in the
+ file. Individual strings can be accessed by using the [] operator.
diff --git a/libyara/include/yara/dotnet.h b/libyara/include/yara/dotnet.h
index 75aac08..00171e6 100644
--- a/libyara/include/yara/dotnet.h
+++ b/libyara/include/yara/dotnet.h
@@ -122,6 +122,15 @@ typedef struct _TILDE_HEADER {
//#define BIT_IMPORTSCOPE 0x35
//#define BIT_STATEMACHINEMETHOD 0x36
+
+//
+// Element types. Note this is not a complete list as we aren't parsing all of
+// them. This only includes the ones we care about.
+// ECMA-335 Section II.23.1.16
+//
+#define ELEMENT_TYPE_STRING 0x0E
+
+
// The string length of a typelib attribute is at most 0xFF.
#define MAX_TYPELIB_SIZE 0xFF
@@ -170,6 +179,28 @@ typedef struct _ASSEMBLY_TABLE {
} Name;
} ASSEMBLY_TABLE, *PASSEMBLY_TABLE;
+
+//
+// Assembly Reference Table
+// ECMA-335 Section II.22.5
+//
+typedef struct _ASSEMBLYREF_TABLE {
+ WORD MajorVersion;
+ WORD MinorVersion;
+ WORD BuildNumber;
+ WORD RevisionNumber;
+ DWORD Flags;
+ union {
+ WORD PublicKeyOrToken_Short;
+ DWORD PublicKeyOrToken_Long;
+ } PublicKeyOrToken;
+ union {
+ WORD Name_Short;
+ DWORD Name_Long;
+ } Name;
+} ASSEMBLYREF_TABLE, *PASSEMBLYREF_TABLE;
+
+
//
// Manifest Resource Table
// ECMA-335 Section II.22.24
@@ -221,15 +252,41 @@ typedef struct _CUSTOMATTRIBUTE_TABLE {
} CUSTOMATTRIBUTE_TABLE, *PCUSTOMATTRIBUTE_TABLE;
+//
+// Constant TAble
+// ECMA-335 Section II.22.9
+//
+typedef struct _CONSTANT_TABLE {
+ WORD Type;
+ union {
+ WORD Parent_Short;
+ DWORD Parent_Long;
+ } Parent;
+ union {
+ WORD Value_Short;
+ DWORD Value_Long;
+ } Value;
+} CONSTANT_TABLE, *PCONSTANT_TABLE;
+
+
// Used to return offsets to the various headers.
typedef struct _STREAMS {
PSTREAM_HEADER guid;
PSTREAM_HEADER tilde;
PSTREAM_HEADER string;
PSTREAM_HEADER blob;
+ PSTREAM_HEADER us;
} STREAMS, *PSTREAMS;
+// Used to return the value of parsing a #US or #Blob entry.
+// ECMA-335 Section II.24.2.4
+typedef struct _BLOB_PARSE_RESULT {
+ uint8_t size; // Number of bytes parsed. This is the new offset.
+ DWORD length; // Value of the bytes parsed. This is the blob length.
+} BLOB_PARSE_RESULT, *PBLOB_PARSE_RESULT;
+
+
// Used to store the number of rows of each table.
typedef struct _ROWS {
uint32_t module;
diff --git a/libyara/modules/dotnet.c b/libyara/modules/dotnet.c
index 0188422..ab8afb3 100644
--- a/libyara/modules/dotnet.c
+++ b/libyara/modules/dotnet.c
@@ -123,6 +123,118 @@ void dotnet_parse_guid(
}
+// Given an offset into a #US or #Blob stream, parse the entry at that position.
+// The offset is relative to the start of the PE file.
+BLOB_PARSE_RESULT dotnet_parse_blob_entry(
+ PE* pe,
+ uint8_t* offset)
+{
+ BLOB_PARSE_RESULT result;
+
+ // Blob size is encoded in the first 1, 2 or 4 bytes of the blob.
+ //
+ // If the high bit is not set the length is encoded in one byte.
+ //
+ // If the high 2 bits are 10 (base 2) then the length is encoded in
+ // the rest of the bits and the next byte.
+ //
+ // If the high 3 bits are 110 (base 2) then the length is encoded
+ // in the rest of the bits and the next 3 bytes.
+ //
+ // See ECMA-335 II.24.2.4 for details.
+
+ // Make sure we have at least one byte.
+ if (!fits_in_pe(pe, offset, 1))
+ {
+ result.size = 0;
+ return result;
+ }
+
+ if ((*offset & 0x80) == 0x00)
+ {
+ result.length = (DWORD) *offset;
+ result.size = 1;
+ }
+ else if ((*offset & 0xC0) == 0x80)
+ {
+ // Make sure we have one more byte.
+ if (!fits_in_pe(pe, offset, 2))
+ {
+ result.size = 0;
+ return result;
+ }
+
+ // Shift remaining 6 bits left by 8 and OR in the remaining byte.
+ result.length = ((*offset & 0x3F) << 8) | *(offset + 1);
+ result.size = 2;
+ }
+ else if (offset + 4 < pe->data + pe->data_size && (*offset & 0xE0) == 0xC0)
+ {
+ // Make sure we have 3 more bytes.
+ if (!fits_in_pe(pe, offset, 4))
+ {
+ result.size = 0;
+ return result;
+ }
+
+ result.length = ((*offset & 0x1F) << 24) |
+ (*(offset + 1) << 16) |
+ (*(offset + 2) << 8) |
+ *(offset + 3);
+ result.size = 3;
+ }
+ else
+ {
+ // Return a 0 size as an error.
+ result.size = 0;
+ }
+
+ return result;
+}
+
+
+void dotnet_parse_us(
+ PE* pe,
+ int64_t metadata_root,
+ PSTREAM_HEADER us_header)
+{
+ BLOB_PARSE_RESULT blob_result;
+ int i = 0;
+ uint8_t* offset = pe->data + metadata_root + us_header->Offset;
+ uint8_t* end_of_header = offset + us_header->Size;
+
+ // Make sure end of header is not past end of PE, and the first entry MUST be
+ // a single NULL byte.
+ if (!fits_in_pe(pe, offset, us_header->Size) || *offset != 0x00)
+ return;
+
+ offset++;
+
+ while (offset < end_of_header)
+ {
+ blob_result = dotnet_parse_blob_entry(pe, offset);
+ if (blob_result.size == 0 || !fits_in_pe(pe, offset, blob_result.length))
+ {
+ set_integer(i, pe->object, "number_of_user_strings");
+ return;
+ }
+
+ offset += blob_result.size;
+ // Avoid empty strings, which usually happen as padding at the end of the
+ // stream.
+ if (blob_result.length > 0)
+ {
+ set_sized_string(
+ (char*) offset, blob_result.length, pe->object, "user_strings[%i]", i);
+ i++;
+ offset += blob_result.length;
+ }
+ }
+
+ set_integer(i, pe->object, "number_of_user_strings");
+}
+
+
STREAMS dotnet_parse_stream_headers(
PE* pe,
int64_t offset,
@@ -169,6 +281,8 @@ STREAMS dotnet_parse_stream_headers(
headers.string = stream_header;
else if (strncmp(stream_name, "#Blob", 5) == 0)
headers.blob = stream_header;
+ else if (strncmp(stream_name, "#US", 3) == 0 && headers.us == NULL)
+ headers.us = stream_header;
// Stream name is padded to a multiple of 4.
stream_header = (PSTREAM_HEADER) ((uint8_t*) stream_header +
@@ -203,9 +317,11 @@ void dotnet_parse_tilde_2(
{
PMODULE_TABLE module_table;
PASSEMBLY_TABLE assembly_table;
+ PASSEMBLYREF_TABLE assemblyref_table;
PMANIFESTRESOURCE_TABLE manifestresource_table;
PMODULEREF_TABLE moduleref_table;
PCUSTOMATTRIBUTE_TABLE customattribute_table;
+ PCONSTANT_TABLE constant_table;
DWORD resource_size, implementation;
char *name;
char typelib[MAX_TYPELIB_SIZE + 1];
@@ -246,6 +362,7 @@ void dotnet_parse_tilde_2(
uint8_t* memberref_row = NULL;
DWORD type_index;
DWORD class_index;
+ BLOB_PARSE_RESULT blob_result;
DWORD blob_index;
DWORD blob_length;
// These are used to determine the size of coded indexes, which are the
@@ -305,8 +422,7 @@ void dotnet_parse_tilde_2(
table_offset += (2 + index_sizes.string + (index_sizes.guid * 3)) * num_rows;
break;
case BIT_TYPEREF:
- row_count = max_rows(4, rows.module, rows.moduleref, rows.assemblyref,
- rows.typeref);
+ row_count = max_rows(4, rows.module, rows.moduleref, rows.assemblyref, rows.typeref);
if (row_count > (0xFFFF >> 0x02))
index_size = 4;
@@ -377,7 +493,60 @@ void dotnet_parse_tilde_2(
else
index_size = 2;
- table_offset += (1 + 1 + index_size + index_sizes.blob) * num_rows;
+ // Using 'i' is insufficent since we may skip certain constants and
+ // it would give an inaccurate count in that case.
+ counter = 0;
+ row_size = (1 + 1 + index_size + index_sizes.blob);
+ row_ptr = table_offset;
+ for (i = 0; i < num_rows; i++)
+ {
+ if (!fits_in_pe(pe, row_ptr, row_size))
+ break;
+
+ constant_table = (PCONSTANT_TABLE) row_ptr;
+ // Only look for constants of type string.
+ if (constant_table->Type != ELEMENT_TYPE_STRING)
+ {
+ row_ptr += row_size;
+ continue;
+ }
+
+ // Get the blob offset and pull it out of the blob table.
+ blob_offset = ((uint8_t*) constant_table) + 2 + index_size;
+ if (index_sizes.blob == 4)
+ blob_index = *(DWORD*) blob_offset;
+ else
+ // Cast the value (index into blob table) to a 32bit value.
+ blob_index = (DWORD) (*(WORD*) blob_offset);
+
+ // Everything checks out. Make sure the index into the blob field
+ // is valid (non-null and within range).
+ blob_offset = pe->data + metadata_root + streams->blob->Offset + blob_index;
+ blob_result = dotnet_parse_blob_entry(pe, blob_offset);
+
+ if (blob_result.size == 0)
+ {
+ row_ptr += row_size;
+ continue;
+ }
+
+ blob_length = blob_result.length;
+ blob_offset += blob_result.size;
+
+ // Quick sanity check to make sure the blob entry is within bounds.
+ if (blob_offset + blob_length >= pe->data + pe->data_size)
+ {
+ row_ptr += row_size;
+ continue;
+ }
+
+ set_sized_string((char*) blob_offset, blob_result.length, pe->object, "constants[%i]", i);
+ counter++;
+ row_ptr += row_size;
+ }
+
+ set_integer(counter, pe->object, "number_of_constants");
+ table_offset += row_size * num_rows;
break;
case BIT_CUSTOMATTRIBUTE:
// index_size is size of the parent column.
@@ -529,7 +698,6 @@ void dotnet_parse_tilde_2(
// Cast the value (index into blob table) to a 32bit value.
blob_index = (DWORD) (*(WORD*) customattribute_table);
-
// Everything checks out. Make sure the index into the blob field
// is valid (non-null and within range).
blob_offset = pe->data + metadata_root + streams->blob->Offset + blob_index;
@@ -543,39 +711,14 @@ void dotnet_parse_tilde_2(
continue;
}
- // Blob size is encoded in the first 1, 2 or 4 bytes of the blob.
- //
- // If the high bit is not set the length is encoded in one byte.
- //
- // If the high 2 bits are 10 (base 2) then the length is encoded in
- // the rest of the bits and the next byte.
- //
- // If the high 3 bits are 110 (base 2) then the length is encoded
- // in the rest of the bits and the next 3 bytes.
- //
- // See ECMA-335 II.24.2.4 for details.
- if ((*blob_offset & 0x80) == 0x00)
- {
- blob_length = (DWORD) *blob_offset;
- blob_offset++;
- }
- else if (blob_offset + 1 < pe->data + pe->data_size &&
- (*blob_offset & 0xC0) == 0x80)
- {
- blob_length = (DWORD) ((*(WORD*) blob_offset) & 0x3FFF);
- blob_offset += 2;
- }
- else if (blob_offset + 4 < pe->data + pe->data_size &&
- (*blob_offset & 0xE0) == 0xC0)
- {
- blob_length = (*(DWORD*) blob_offset) & 0x1FFFFFFF;
- blob_offset += 3;
- }
- else
+ blob_result = dotnet_parse_blob_entry(pe, blob_offset);
+ if (blob_result.size == 0)
{
row_ptr += row_size;
continue;
}
+ blob_length = blob_result.length;
+ blob_offset += blob_result.size;
// Quick sanity check to make sure the blob entry is within bounds.
if (blob_offset + blob_length >= pe->data + pe->data_size)
@@ -794,7 +937,64 @@ void dotnet_parse_tilde_2(
table_offset += (4 + 4 + 4) * num_rows;
break;
case BIT_ASSEMBLYREF:
- table_offset += (2 + 2 + 2 + 2 + 4 + (index_sizes.blob * 2) + (index_sizes.string * 2)) * num_rows;
+ row_size = (2 + 2 + 2 + 2 + 4 + (index_sizes.blob * 2) + (index_sizes.string * 2));
+ row_ptr = table_offset;
+ for (i = 0; i < num_rows; i++)
+ {
+ if (!fits_in_pe(pe, table_offset, row_size))
+ break;
+
+ assemblyref_table = (PASSEMBLYREF_TABLE) row_ptr;
+ set_integer(assemblyref_table->MajorVersion,
+ pe->object, "assembly_refs[%i].version.major", i);
+ set_integer(assemblyref_table->MinorVersion,
+ pe->object, "assembly_refs[%i].version.minor", i);
+ set_integer(assemblyref_table->BuildNumber,
+ pe->object, "assembly_refs[%i].version.build_number", i);
+ set_integer(assembly_table->RevisionNumber,
+ pe->object, "assembly_refs[%i].version.revision_number", i);
+
+ blob_offset = pe->data + metadata_root + streams->blob->Offset;
+ if (index_sizes.blob == 4)
+ blob_offset += assemblyref_table->PublicKeyOrToken.PublicKeyOrToken_Long;
+ else
+ blob_offset += assemblyref_table->PublicKeyOrToken.PublicKeyOrToken_Short;
+
+ blob_result = dotnet_parse_blob_entry(pe, blob_offset);
+ if (blob_result.size == 0 || !fits_in_pe(pe, blob_offset, blob_result.length))
+ {
+ row_ptr += row_size;
+ continue;
+ }
+
+ // Avoid empty strings.
+ if (blob_result.length > 0)
+ {
+ blob_offset += blob_result.size;
+ set_sized_string((char*) blob_offset,
+ blob_result.length, pe->object,
+ "assembly_refs[%i].public_key_or_token", i);
+ }
+
+ // Can't use assemblyref_table here because the PublicKey comes before
+ // Name and is a variable length field.
+ if (index_sizes.string == 4)
+ name = pe_get_dotnet_string(pe,
+ string_offset,
+ *(DWORD*) (row_ptr + 2 + 2 + 2 + 2 + 4 + index_sizes.blob));
+ else
+ name = pe_get_dotnet_string(pe,
+ string_offset,
+ *(WORD*) (row_ptr + 2 + 2 + 2 + 2 + 4 + index_sizes.blob));
+
+ if (name != NULL)
+ set_string(name, pe->object, "assembly_refs[%i].name", i);
+
+ row_ptr += row_size;
+ }
+
+ set_integer(i, pe->object, "number_of_assembly_refs");
+ table_offset += row_size * num_rows;
break;
case BIT_ASSEMBLYREFPROCESSOR:
table_offset += (4 + index_sizes.assemblyrefprocessor) * num_rows;
@@ -1154,6 +1354,9 @@ void dotnet_parse_com(
// Parse the #~ stream, which includes various tables of interest.
if (headers.tilde != NULL)
dotnet_parse_tilde(pe, metadata_root, cli_header, &headers);
+
+ if (headers.us != NULL)
+ dotnet_parse_us(pe, metadata_root, headers.us);
}
@@ -1175,6 +1378,17 @@ begin_declarations;
declare_string("name");
end_struct_array("resources");
declare_integer("number_of_resources");
+ begin_struct_array("assembly_refs")
+ begin_struct("version");
+ declare_integer("major");
+ declare_integer("minor");
+ declare_integer("build_number");
+ declare_integer("revision_number");
+ end_struct("version");
+ declare_string("public_key_or_token");
+ declare_string("name");
+ end_struct_array("assembly_refs")
+ declare_integer("number_of_assembly_refs");
begin_struct("assembly");
begin_struct("version");
declare_integer("major");
@@ -1187,7 +1401,11 @@ begin_declarations;
end_struct("assembly");
declare_string_array("modulerefs");
declare_integer("number_of_modulerefs");
+ declare_string_array("user_strings");
+ declare_integer("number_of_user_strings");
declare_string("typelib");
+ declare_string_array("constants");
+ declare_integer("number_of_constants");
end_declarations;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list