[Forensics-changes] [yara] 253/407: Improve resource handling and add entropy.

Hilko Bengen bengen at moszumanska.debian.org
Sat Jul 1 10:28:32 UTC 2017


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to annotated tag v3.3.0
in repository yara.

commit c8c0925540d29b56e93d4e3e92800d5554a67cd8
Author: Wesley Shields <wxs at atarininja.org>
Date:   Thu Dec 18 00:16:02 2014 -0500

    Improve resource handling and add entropy.
    
    Attributes of resources are now stored as an array of structs. In
    particular these are the attributes:
    
    type
    id
    language
    size
    data
    
    There is also some attributes that are stored outside of the array,
    because they are not specific to any given resource. These are:
    
    resource_timestamp
    resource_major_version
    resource_minor_version
    number_of_resources
    
    The point of doing this is that we can now do things like look for
    resources with specific attributes. The data is stored in an attribute
    so that you can use the hash module or the new entropy module on it too.
    
    I've added an entropy module which will operate over a given
    SIZED_STRING argument or over a range of bytes. It currently truncates
    the result to an integer. I need to add support for double as a
    first-class type in YARA.
---
 libyara/Makefile.am         |   1 +
 libyara/modules/entropy.c   | 180 +++++++++++++++++++++++++++++++++
 libyara/modules/module_list |   1 +
 libyara/modules/pe.c        | 238 +++++++++++++++++++++++---------------------
 4 files changed, 309 insertions(+), 111 deletions(-)

diff --git a/libyara/Makefile.am b/libyara/Makefile.am
index 0fbab0d..ce43038 100644
--- a/libyara/Makefile.am
+++ b/libyara/Makefile.am
@@ -2,6 +2,7 @@
 MODULES =  modules/tests.c
 MODULES += modules/pe.c
 MODULES += modules/elf.c
+MODULES += modules/entropy.c
 
 if CUCKOO
 MODULES += modules/cuckoo.c
diff --git a/libyara/modules/entropy.c b/libyara/modules/entropy.c
new file mode 100644
index 0000000..6b19326
--- /dev/null
+++ b/libyara/modules/entropy.c
@@ -0,0 +1,180 @@
+/*
+Copyright (c) 2014. The YARA Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <stdbool.h>
+#include <math.h>
+
+#include <yara/modules.h>
+#include <yara/mem.h>
+
+#define MODULE_NAME entropy
+
+
+define_function(string_entropy)
+{
+  int i;
+  double x;
+  uint32_t* data;
+  double entropy = 0.0;
+  SIZED_STRING* s = sized_string_argument(1);
+
+  if (IS_UNDEFINED(s))
+    return_integer(UNDEFINED);
+
+  data = (uint32_t*) yr_calloc(256, sizeof(uint32_t));
+  if (data == NULL)
+    return_integer(UNDEFINED);
+
+  for (i = 0; i < s->length; i++)
+  {
+    uint8_t c = s->c_string[i];
+    data[c] += 1;
+  }
+
+  for (i = 0; i < 256; i++)
+  {
+    if (data[i] != 0)
+    {
+      x = (double) (data[i]) / s->length;
+      entropy -= x * log2(x);
+    }
+  }
+
+  yr_free(data);
+  return_integer((int) entropy);
+}
+
+
+define_function(data_entropy)
+{
+  int i;
+  double x;
+  uint32_t* data;
+  double entropy = 0.0;
+  bool past_first_block = false;
+  uint64_t total_len = 0;
+
+  int64_t offset = integer_argument(1);   // offset where to start
+  int64_t length = integer_argument(2);   // length of bytes we want entropy on
+
+  if (IS_UNDEFINED(offset) || IS_UNDEFINED(length))
+    return_integer(UNDEFINED);
+
+  YR_SCAN_CONTEXT* context = scan_context();
+  YR_MEMORY_BLOCK* block = NULL;
+
+  if (offset < 0 || length < 0 || offset < context->mem_block->base)
+  {
+    return ERROR_WRONG_ARGUMENTS;
+  }
+
+  data = (uint32_t*) yr_calloc(256, sizeof(uint32_t));
+  if (data == NULL)
+    return_integer(UNDEFINED);
+
+  foreach_memory_block(context, block)
+  {
+    if (offset >= block->base &&
+        offset < block->base + block->size)
+    {
+      uint64_t data_offset = offset - block->base;
+      uint64_t data_len = min(length, block->size - data_offset);
+      total_len += data_len;
+
+      offset += data_len;
+      length -= data_len;
+
+      for (i = 0; i < data_len; i++)
+      {
+        uint8_t c = *(block->data + data_offset + i);
+        data[c] += 1;
+      }
+
+      past_first_block = true;
+    }
+    else if (past_first_block)
+    {
+      // If offset is not within current block and we already
+      // past the first block then the we are trying to compute
+      // the checksum over a range of non contiguos blocks. As
+      // range contains gaps of undefined data the checksum is
+      // undefined.
+
+      yr_free(data);
+      return_integer(UNDEFINED);
+    }
+
+    if (block->base + block->size > offset + length)
+      break;
+  }
+
+  if (!past_first_block)
+  {
+    yr_free(data);
+    return_integer(UNDEFINED);
+  }
+
+  for (i = 0; i < 256; i++)
+  {
+    if (data[i] != 0)
+    {
+      x = (double) (data[i]) / total_len;
+      entropy -= x * log2(x);
+    }
+  }
+  yr_free(data);
+  return_integer((int) entropy);
+}
+
+
+begin_declarations;
+
+  declare_function("entropy", "ii", "i", data_entropy);
+  declare_function("entropy", "s", "i", string_entropy);
+
+end_declarations;
+
+
+int module_initialize(
+    YR_MODULE* module)
+{
+  return ERROR_SUCCESS;
+}
+
+
+int module_finalize(
+    YR_MODULE* module)
+{
+  return ERROR_SUCCESS;
+}
+
+
+int module_load(
+    YR_SCAN_CONTEXT* context,
+    YR_OBJECT* module_object,
+    void* module_data,
+    size_t module_data_size)
+{
+
+  return ERROR_SUCCESS;
+}
+
+
+int module_unload(
+    YR_OBJECT* module_object)
+{
+  return ERROR_SUCCESS;
+}
diff --git a/libyara/modules/module_list b/libyara/modules/module_list
index 1c28b02..e78b75c 100644
--- a/libyara/modules/module_list
+++ b/libyara/modules/module_list
@@ -1,6 +1,7 @@
 MODULE(tests)
 MODULE(pe)
 MODULE(elf)
+MODULE(entropy)
 
 #ifdef CUCKOO
 MODULE(cuckoo)
diff --git a/libyara/modules/pe.c b/libyara/modules/pe.c
index ff81134..b366ebd 100644
--- a/libyara/modules/pe.c
+++ b/libyara/modules/pe.c
@@ -126,6 +126,7 @@ typedef struct _PE
   PIMAGE_NT_HEADERS32 header;
   YR_OBJECT* object;
   IMPORTED_DLL* imported_dlls;
+  uint32_t resources;
 
 } PE;
 
@@ -465,9 +466,24 @@ int pe_iterate_resources(
     if (offset != 0 &&
         offset < pe->data_size)
     {
+      PIMAGE_RESOURCE_DIRECTORY rsrc_dir =
+        (PIMAGE_RESOURCE_DIRECTORY) (pe->data + offset);
+
+      set_integer(rsrc_dir->TimeDateStamp,
+                  pe->object,
+                  "resource_timestamp",
+                  pe->resources);
+      set_integer(rsrc_dir->MajorVersion,
+                  pe->object,
+                  "resource_major_version",
+                  pe->resources);
+      set_integer(rsrc_dir->MinorVersion,
+                  pe->object,
+                  "resource_minor_version",
+                  pe->resources);
       _pe_iterate_resources(
           pe,
-          (PIMAGE_RESOURCE_DIRECTORY) (pe->data + offset),
+          rsrc_dir,
           pe->data + offset,
           0,
           &type,
@@ -493,11 +509,8 @@ int pe_iterate_resources(
     (typeof(ptr)) ((uint8_t*) (ptr) + ((offset + 3) & ~3))
 
 
-int pe_find_version_info_cb(
+void pe_parse_version_info(
     PIMAGE_RESOURCE_DATA_ENTRY rsrc_data,
-    int rsrc_type,
-    int rsrc_id,
-    int rsrc_language,
     PE* pe)
 {
   PVERSION_INFO version_info;
@@ -508,78 +521,101 @@ int pe_find_version_info_cb(
 
   size_t version_info_offset;
 
-  if (rsrc_type == RESOURCE_TYPE_VERSION)
-  {
-    version_info_offset = pe_rva_to_offset(pe, rsrc_data->OffsetToData);
+  version_info_offset = pe_rva_to_offset(pe, rsrc_data->OffsetToData);
 
-    if (version_info_offset == 0)
-      return RESOURCE_CALLBACK_CONTINUE;
+  if (version_info_offset == 0)
+    return;
 
-    version_info = (PVERSION_INFO) (pe->data + version_info_offset);
+  version_info = (PVERSION_INFO) (pe->data + version_info_offset);
 
-    if (!struct_fits_in_pe(pe, version_info, VERSION_INFO))
-      return RESOURCE_CALLBACK_CONTINUE;
+  if (!struct_fits_in_pe(pe, version_info, VERSION_INFO))
+    return;
 
-    if (!fits_in_pe(pe, version_info, sizeof("VS_VERSION_INFO")))
-      return RESOURCE_CALLBACK_CONTINUE;
+  if (!fits_in_pe(pe, version_info, sizeof("VS_VERSION_INFO")))
+    return;
 
-    if (strcmp_w(version_info->Key, "VS_VERSION_INFO") != 0)
-      return RESOURCE_CALLBACK_CONTINUE;
+  if (strcmp_w(version_info->Key, "VS_VERSION_INFO") != 0)
+    return;
 
-    string_file_info = ADD_OFFSET(version_info, sizeof(VERSION_INFO) + 86);
+  string_file_info = ADD_OFFSET(version_info, sizeof(VERSION_INFO) + 86);
 
-    if (!struct_fits_in_pe(pe, string_file_info, VERSION_INFO))
-      return RESOURCE_CALLBACK_CONTINUE;
+  if (!struct_fits_in_pe(pe, string_file_info, VERSION_INFO))
+    return;
 
-    if (!fits_in_pe(pe, string_file_info, sizeof("StringFileInfo")))
-      return RESOURCE_CALLBACK_CONTINUE;
+  if (!fits_in_pe(pe, string_file_info, sizeof("StringFileInfo")))
+    return;
+
+  while(strcmp_w(string_file_info->Key, "StringFileInfo") == 0)
+  {
+    PVERSION_INFO string_table = ADD_OFFSET(
+        string_file_info,
+        sizeof(VERSION_INFO) + 30);
 
-    while(strcmp_w(string_file_info->Key, "StringFileInfo") == 0)
+    string_file_info = ADD_OFFSET(
+        string_file_info,
+        string_file_info->Length);
+
+    while (string_table < string_file_info)
     {
-      PVERSION_INFO string_table = ADD_OFFSET(
-          string_file_info,
-          sizeof(VERSION_INFO) + 30);
+      PVERSION_INFO string = ADD_OFFSET(
+          string_table,
+          sizeof(VERSION_INFO) + 2 * (strlen_w(string_table->Key) + 1));
 
-      string_file_info = ADD_OFFSET(
-          string_file_info,
-          string_file_info->Length);
+      string_table = ADD_OFFSET(
+          string_table,
+          string_table->Length);
 
-      while (string_table < string_file_info)
+      while (string < string_table)
       {
-        PVERSION_INFO string = ADD_OFFSET(
-            string_table,
-            sizeof(VERSION_INFO) + 2 * (strlen_w(string_table->Key) + 1));
+        char* string_value = (char*) ADD_OFFSET(
+            string,
+            sizeof(VERSION_INFO) + 2 * (strlen_w(string->Key) + 1));
 
-        string_table = ADD_OFFSET(
-            string_table,
-            string_table->Length);
+        strlcpy_w(key, string->Key, sizeof(key));
+        strlcpy_w(value, string_value, sizeof(value));
 
-        while (string < string_table)
-        {
-          char* string_value = (char*) ADD_OFFSET(
-              string,
-              sizeof(VERSION_INFO) + 2 * (strlen_w(string->Key) + 1));
+        set_string(value, pe->object, "version_info[%s]", key);
 
-          strlcpy_w(key, string->Key, sizeof(key));
-          strlcpy_w(value, string_value, sizeof(value));
+        if (string->Length == 0)
+          break;
 
-          set_string(value, pe->object, "version_info[%s]", key);
+        string = ADD_OFFSET(string, string->Length);
+      }
 
-          if (string->Length == 0)
-            break;
+      if (string_table->Length == 0)
+        break;
+    }
+  }
+}
 
-          string = ADD_OFFSET(string, string->Length);
-        }
 
-        if (string_table->Length == 0)
-          break;
-      }
-    }
+int pe_collect_resources(
+    PIMAGE_RESOURCE_DATA_ENTRY rsrc_data,
+    int rsrc_type,
+    int rsrc_id,
+    int rsrc_language,
+    PE* pe)
+{
+    size_t offset = pe_rva_to_offset(pe, rsrc_data->OffsetToData);
+    if (offset == 0 || !fits_in_pe(pe, offset, rsrc_data->Size))
+      return RESOURCE_CALLBACK_CONTINUE;
 
-    return RESOURCE_CALLBACK_ABORT;
-  }
+    set_integer(rsrc_type, pe->object, "resources[%i].type", pe->resources);
+    set_integer(rsrc_id, pe->object, "resources[%i].id", pe->resources);
+    set_integer(
+        rsrc_language, pe->object, "resources[%i].language", pe->resources);
+    set_integer(
+        rsrc_data->Size, pe->object, "resources[%i].size", pe->resources);
+    set_sized_string(
+        (char*) (pe->data + offset), rsrc_data->Size, pe->object,
+        "resources[%i].data", pe->resources);
+
+    // Resources we do extra parsing on
+    if (rsrc_type == RESOURCE_TYPE_VERSION)
+        pe_parse_version_info(rsrc_data, pe);
 
-  return RESOURCE_CALLBACK_CONTINUE;
+    pe->resources += 1;
+    return RESOURCE_CALLBACK_CONTINUE;
 }
 
 
@@ -1038,9 +1074,11 @@ void pe_parse_header(
 
   pe_iterate_resources(
       pe,
-      (RESOURCE_CALLBACK_FUNC) pe_find_version_info_cb,
+      (RESOURCE_CALLBACK_FUNC) pe_collect_resources,
       (void*) pe);
 
+  set_integer(pe->resources, pe->object, "number_of_resources");
+
   section = IMAGE_FIRST_SECTION(pe->header);
 
   int scount = min(pe->header->FileHeader.NumberOfSections, MAX_PE_SECTIONS);
@@ -1329,88 +1367,53 @@ define_function(imports)
 }
 
 
-typedef struct _FIND_LANGUAGE_CB_DATA
-{
-  uint64_t locale;
-  uint64_t mask;
-
-  int found;
-
-} FIND_LANGUAGE_CB_DATA;
-
-
-int pe_find_language_cb(
-    PIMAGE_RESOURCE_DATA_ENTRY rsrc_data,
-    int rsrc_type,
-    int rsrc_id,
-    int rsrc_language,
-    FIND_LANGUAGE_CB_DATA* cb_data)
-{
-  if ((rsrc_language & cb_data->mask) == cb_data->locale)
-  {
-    cb_data->found = TRUE;
-    return RESOURCE_CALLBACK_ABORT;
-  }
-
-  return RESOURCE_CALLBACK_CONTINUE;
-}
-
-
 define_function(locale)
 {
-  FIND_LANGUAGE_CB_DATA cb_data;
-
-  cb_data.locale = integer_argument(1);
-  cb_data.mask = 0xFFFF;
-  cb_data.found = FALSE;
+  int64_t n;
+  uint64_t rsrc_language;
+  uint64_t locale = integer_argument(1);
 
   YR_OBJECT* module = module();
   PE* pe = (PE*) module->data;
 
   // If not a PE file, return UNDEFINED
-
   if (pe == NULL)
     return_integer(UNDEFINED);
 
-  if (pe_iterate_resources(pe,
-          (RESOURCE_CALLBACK_FUNC) pe_find_language_cb,
-          (void*) &cb_data))
-  {
-    return_integer(cb_data.found);
-  }
-  else
+  n = get_integer(module, "number_of_resources");
+  for (int i = 0; i < n; i++)
   {
-    return_integer(UNDEFINED);
+    rsrc_language = get_integer(module, "resources[%i].language", i);
+    if ((rsrc_language & 0xFFFF) == locale)
+      return_integer(1);
   }
+
+  return_integer(0);
 }
 
 
 define_function(language)
 {
-  FIND_LANGUAGE_CB_DATA cb_data;
-
-  cb_data.locale = integer_argument(1);
-  cb_data.mask = 0xFF;
-  cb_data.found = FALSE;
+  int64_t n;
+  uint64_t rsrc_language;
+  uint64_t language = integer_argument(1);
 
   YR_OBJECT* module = module();
   PE* pe = (PE*) module->data;
 
   // If not a PE file, return UNDEFINED
-
   if (pe == NULL)
     return_integer(UNDEFINED);
 
-  if (pe_iterate_resources(pe,
-          (RESOURCE_CALLBACK_FUNC) pe_find_language_cb,
-          (void*) &cb_data))
+  n = get_integer(module, "number_of_resources");
+  for (int i = 0; i < n; i++)
   {
-    return_integer(cb_data.found);
-  }
-  else
-  {
-    return_integer(UNDEFINED);
+    rsrc_language = get_integer(module, "resources[%i].language", i);
+    if ((rsrc_language & 0xFF) == language)
+      return_integer(1);
   }
+
+  return_integer(0);
 }
 
 
@@ -1502,6 +1505,18 @@ begin_declarations;
   declare_function("locale", "i", "i", locale);
   declare_function("language", "i", "i", language);
 
+  declare_integer("resource_timestamp")
+  declare_integer("resource_major_version")
+  declare_integer("resource_minor_version")
+  begin_struct_array("resources");
+    declare_integer("type")
+    declare_integer("id")
+    declare_integer("language")
+    declare_integer("size")
+    declare_string("data")
+  end_struct_array("resources");
+  declare_integer("number_of_resources");
+
   #if defined(HAVE_LIBCRYPTO)
   begin_struct_array("signatures");
     declare_string("issuer");
@@ -1636,6 +1651,7 @@ int module_load(
         pe->data_size = block->size;
         pe->header = pe_header;
         pe->object = module_object;
+        pe->resources = 0;
 
         module_object->data = pe;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list