[Pkg-sdl-commits] [sdlgfx] 01/06: Imported Upstream version 2.0.25

Gianfranco Costamagna locutusofborg-guest at moszumanska.debian.org
Fri Dec 20 16:14:03 UTC 2013


This is an automated email from the git hooks/post-receive script.

locutusofborg-guest pushed a commit to branch master
in repository sdlgfx.

commit cf3097d8875b0a09bbadaf5c512f4bf02138d4a8
Author: Gianfranco Costamagna <costamagnagianfranco at yahoo.it>
Date:   Sat Nov 9 11:46:16 2013 +0100

    Imported Upstream version 2.0.25
---
 ChangeLog                                          |     9 +
 Docs/html.doxyfile                                 |    10 +-
 Docs/html/_r_e_a_d_m_e.html                        |     6 +-
 Docs/html/_r_e_a_d_m_e_source.html                 |     6 +-
 Docs/html/_s_d_l__framerate_8c.html                |     6 +-
 Docs/html/_s_d_l__framerate_8c_source.html         |     6 +-
 Docs/html/_s_d_l__framerate_8h.html                |     6 +-
 Docs/html/_s_d_l__framerate_8h_source.html         |     6 +-
 Docs/html/_s_d_l__gfx_blit_func_8c.html            |     6 +-
 Docs/html/_s_d_l__gfx_blit_func_8c_source.html     |     6 +-
 Docs/html/_s_d_l__gfx_blit_func_8h.html            |    10 +-
 Docs/html/_s_d_l__gfx_blit_func_8h_source.html     |    10 +-
 Docs/html/_s_d_l__gfx_primitives_8c.html           |     6 +-
 Docs/html/_s_d_l__gfx_primitives_8c_source.html    |     8 +-
 Docs/html/_s_d_l__gfx_primitives_8h.html           |    10 +-
 Docs/html/_s_d_l__gfx_primitives_8h_source.html    |     8 +-
 Docs/html/_s_d_l__gfx_primitives__font_8h.html     |     6 +-
 .../_s_d_l__gfx_primitives__font_8h_source.html    |     6 +-
 Docs/html/_s_d_l__image_filter_8c.html             |  2551 +---
 Docs/html/_s_d_l__image_filter_8c_source.html      | 13467 +++++++++----------
 Docs/html/_s_d_l__image_filter_8h.html             |    90 +-
 Docs/html/_s_d_l__image_filter_8h_source.html      |     6 +-
 Docs/html/_s_d_l__rotozoom_8c.html                 |     6 +-
 Docs/html/_s_d_l__rotozoom_8c_source.html          |     6 +-
 Docs/html/_s_d_l__rotozoom_8h.html                 |     6 +-
 Docs/html/_s_d_l__rotozoom_8h_source.html          |     6 +-
 Docs/html/annotated.html                           |     2 +-
 Docs/html/classes.html                             |     2 +-
 Docs/html/files.html                               |    26 +-
 Docs/html/functions.html                           |     2 +-
 Docs/html/functions_vars.html                      |     2 +-
 Docs/html/globals.html                             |     5 +-
 Docs/html/globals_0x61.html                        |     2 +-
 Docs/html/globals_0x62.html                        |     2 +-
 Docs/html/globals_0x63.html                        |     2 +-
 Docs/html/globals_0x64.html                        |     2 +-
 Docs/html/globals_0x65.html                        |     2 +-
 Docs/html/globals_0x66.html                        |     2 +-
 Docs/html/globals_0x67.html                        |     2 +-
 Docs/html/globals_0x68.html                        |     2 +-
 Docs/html/globals_0x6c.html                        |     2 +-
 Docs/html/globals_0x6d.html                        |     2 +-
 Docs/html/globals_0x70.html                        |     2 +-
 Docs/html/globals_0x72.html                        |     2 +-
 Docs/html/globals_0x73.html                        |   129 +-
 Docs/html/globals_0x74.html                        |     2 +-
 Docs/html/globals_0x76.html                        |     2 +-
 Docs/html/globals_0x7a.html                        |     2 +-
 Docs/html/globals_defs.html                        |     2 +-
 Docs/html/globals_func.html                        |     5 +-
 Docs/html/globals_func_0x61.html                   |     2 +-
 Docs/html/globals_func_0x62.html                   |     2 +-
 Docs/html/globals_func_0x63.html                   |     2 +-
 Docs/html/globals_func_0x65.html                   |     2 +-
 Docs/html/globals_func_0x66.html                   |     2 +-
 Docs/html/globals_func_0x67.html                   |     2 +-
 Docs/html/globals_func_0x68.html                   |     2 +-
 Docs/html/globals_func_0x6c.html                   |     2 +-
 Docs/html/globals_func_0x70.html                   |     2 +-
 Docs/html/globals_func_0x72.html                   |     2 +-
 Docs/html/globals_func_0x73.html                   |   113 +-
 Docs/html/globals_func_0x74.html                   |     2 +-
 Docs/html/globals_func_0x76.html                   |     2 +-
 Docs/html/globals_func_0x7a.html                   |     2 +-
 Docs/html/globals_type.html                        |     2 +-
 Docs/html/globals_vars.html                        |     2 +-
 Docs/html/index.html                               |    25 +-
 Docs/html/struct_f_p_smanager.html                 |     4 +-
 Docs/html/struct_s_d_l__gfx_blit_info.html         |     4 +-
 .../html/struct_s_d_l__gfx_bresenham_iterator.html |     4 +-
 Docs/html/struct_s_d_l__gfx_murphy_iterator.html   |     4 +-
 Docs/html/structt_color_r_g_b_a.html               |     4 +-
 Docs/html/structt_color_y.html                     |     4 +-
 INSTALL                                            |     9 +-
 Makefile.am                                        |     1 +
 Makefile.in                                        |   140 +-
 README                                             |    11 +-
 SDL_gfx.spec                                       |     4 +-
 SDL_gfxBlitFunc.h                                  |     4 +-
 SDL_gfxPrimitives.c                                |     2 +-
 SDL_gfxPrimitives.h                                |     2 +-
 SDL_gfx_VS2008.vcproj                              |     6 +-
 SDL_gfx.sln => SDL_gfx_VS2010.sln                  |    27 +-
 SDL_gfx_VS2010.vcxproj                             |   134 +
 SDL_imageFilter.c                                  |  1656 +--
 Test/INSTALL                                       |     9 +-
 Test/LaplaceRelaxation_VS2008.vcproj               |     8 +-
 ...on.vcxproj => LaplaceRelaxation_VS2010.vcxproj} |    62 +-
 Test/Makefile.am                                   |     2 +
 Test/Makefile.in                                   |   130 +-
 Test/TestABGR_VS2008.vcproj                        |     8 +-
 Test/{TestABGR.vcxproj => TestABGR_VS2010.vcxproj} |    59 +-
 ...{TestFonts.vcxproj => TestFonts_VS2010.vcxproj} |    54 +-
 ...merate.vcxproj => TestFramerate_VS2010.vcxproj} |    55 +-
 ...tGfxBlit.vcxproj => TestGfxBlit_VS2010.vcxproj} |    55 +-
 ...es.vcxproj => TestGfxPrimitives_VS2010.vcxproj} |    55 +-
 ...xture.vcxproj => TestGfxTexture_VS2010.vcxproj} |    55 +-
 Test/TestImageFilter.c                             |   742 +-
 ...lter.vcxproj => TestImageFilter_VS2010.vcxproj} |    55 +-
 ...otozoom.vcxproj => TestRotozoom_VS2010.vcxproj} |    56 +-
 ...estShrink.vcxproj => TestShrink_VS2010.vcxproj} |    56 +-
 Test/aclocal.m4                                    |   127 +-
 Test/config.guess                                  |   259 +-
 Test/config.sub                                    |   213 +-
 Test/configure                                     |   402 +-
 Test/install-sh                                    |    29 +-
 Test/ltmain.sh                                     |    32 +-
 Test/missing                                       |    53 +-
 aclocal.m4                                         |   127 +-
 config.guess                                       |   259 +-
 config.sub                                         |   213 +-
 configure                                          |   436 +-
 configure.in                                       |    20 +-
 depcomp                                            |   190 +-
 install-sh                                         |    29 +-
 ltmain.sh                                          |    32 +-
 missing                                            |    53 +-
 117 files changed, 10473 insertions(+), 12124 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e2c71d1..1ac6258 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,15 @@
 CHANGES/VERSION
 ===============
 
+Ver 2.0.25 - Sun, Oct 27, 2013  3:08:15 PM
+* Added patch for 32- and 64-bit GCC-compiled MMX support
+  (contributed by Sylvain Beucler, backported from SDL2_gfx)
+* Fixed bug in _aaline when x1>x2 and dy==0 (found by Yannick 
+  Guesnet - thanks!)
+* Updated documentation.
+* Fixed header in SDL_gfxBlitFunc.h (reported by Jaders77 on
+  sourceforge - thanks!)
+
 Ver 2.0.24 - Sun, Jul 22, 2012  9:27:29 AM
 * Removed some missed LGPL references
 * Fixed thick line swap bug (patch contributed by Thien-Thi)
diff --git a/Docs/html.doxyfile b/Docs/html.doxyfile
index 7a7e5c7..dc144fe 100644
--- a/Docs/html.doxyfile
+++ b/Docs/html.doxyfile
@@ -32,7 +32,7 @@ PROJECT_NAME           = SDL_gfx
 # This could be handy for archiving the generated documentation or 
 # if some version control system is used.
 
-PROJECT_NUMBER         = 2.0.24
+PROJECT_NUMBER         = 2.0.25
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description 
 # for a project that appears at the top of each page and should give viewer 
@@ -52,7 +52,7 @@ PROJECT_LOGO           =
 # If a relative path is entered, it will be relative to the location 
 # where doxygen was started. If left blank the current directory will be used.
 
-OUTPUT_DIRECTORY       = "C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/Docs"
+OUTPUT_DIRECTORY       = .
 
 # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
 # 4096 sub-directories (in 2 levels) under the output directory of each output 
@@ -138,7 +138,7 @@ FULL_PATH_NAMES        = YES
 # If left blank the directory from which doxygen is run is used as the 
 # path to strip.
 
-STRIP_FROM_PATH        = "E:/Users/Andreas Schiffler/Desktop/SVN/sdlgfx"
+STRIP_FROM_PATH        = 
 
 # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
 # the path mentioned in the documentation of a class, which tells 
@@ -671,7 +671,7 @@ WARN_LOGFILE           =
 # directories like "/usr/src/myproject". Separate the files or directories 
 # with spaces.
 
-INPUT                  = "C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx"
+INPUT                  = ..
 
 # This tag can be used to specify the character encoding of the source files 
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
@@ -762,7 +762,7 @@ EXCLUDE_SYMBOLS        =
 # directories that contain example code fragments that are included (see 
 # the \include command).
 
-EXAMPLE_PATH           = "C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx"
+EXAMPLE_PATH           = ..
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the 
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
diff --git a/Docs/html/_r_e_a_d_m_e.html b/Docs/html/_r_e_a_d_m_e.html
index ea7001d..d58efad 100644
--- a/Docs/html/_r_e_a_d_m_e.html
+++ b/Docs/html/_r_e_a_d_m_e.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/README File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/README File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/README File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/README File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 
diff --git a/Docs/html/_r_e_a_d_m_e_source.html b/Docs/html/_r_e_a_d_m_e_source.html
index d07fa6c..27d963e 100644
--- a/Docs/html/_r_e_a_d_m_e_source.html
+++ b/Docs/html/_r_e_a_d_m_e_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/README Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/README Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/README</div>  </div>
+<div class="title">I:/Sources/sdlgfx/README</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_r_e_a_d_m_e.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 
diff --git a/Docs/html/_s_d_l__framerate_8c.html b/Docs/html/_s_d_l__framerate_8c.html
index 9999749..351f8d4 100644
--- a/Docs/html/_s_d_l__framerate_8c.html
+++ b/Docs/html/_s_d_l__framerate_8c.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.c File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_framerate.c File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -54,7 +54,7 @@
   <div class="summary">
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.c File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_framerate.c File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include "<a class="el" href="_s_d_l__framerate_8h_source.html">SDL_framerate.h</a>"</code><br/>
diff --git a/Docs/html/_s_d_l__framerate_8c_source.html b/Docs/html/_s_d_l__framerate_8c_source.html
index de93365..e337c64 100644
--- a/Docs/html/_s_d_l__framerate_8c_source.html
+++ b/Docs/html/_s_d_l__framerate_8c_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.c Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_framerate.c Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.c</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_framerate.c</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__framerate_8c.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span>
diff --git a/Docs/html/_s_d_l__framerate_8h.html b/Docs/html/_s_d_l__framerate_8h.html
index d47d9bc..41d199c 100644
--- a/Docs/html/_s_d_l__framerate_8h.html
+++ b/Docs/html/_s_d_l__framerate_8h.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.h File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_framerate.h File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -56,7 +56,7 @@
 <a href="#define-members">Defines</a> |
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.h File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_framerate.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include "SDL.h"</code><br/>
diff --git a/Docs/html/_s_d_l__framerate_8h_source.html b/Docs/html/_s_d_l__framerate_8h_source.html
index 99c2c7f..e0d8f4e 100644
--- a/Docs/html/_s_d_l__framerate_8h_source.html
+++ b/Docs/html/_s_d_l__framerate_8h_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.h Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_framerate.h Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_framerate.h</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_framerate.h</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__framerate_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span>
diff --git a/Docs/html/_s_d_l__gfx_blit_func_8c.html b/Docs/html/_s_d_l__gfx_blit_func_8c.html
index d99fa0f..efb0e0c 100644
--- a/Docs/html/_s_d_l__gfx_blit_func_8c.html
+++ b/Docs/html/_s_d_l__gfx_blit_func_8c.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.c File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxBlitFunc.c File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -55,7 +55,7 @@
 <a href="#func-members">Functions</a> |
 <a href="#var-members">Variables</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.c File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxBlitFunc.c File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include "<a class="el" href="_s_d_l__gfx_blit_func_8h_source.html">SDL_gfxBlitFunc.h</a>"</code><br/>
diff --git a/Docs/html/_s_d_l__gfx_blit_func_8c_source.html b/Docs/html/_s_d_l__gfx_blit_func_8c_source.html
index 0f717e9..7914993 100644
--- a/Docs/html/_s_d_l__gfx_blit_func_8c_source.html
+++ b/Docs/html/_s_d_l__gfx_blit_func_8c_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.c Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxBlitFunc.c Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.c</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxBlitFunc.c</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__gfx_blit_func_8c.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* </span>
diff --git a/Docs/html/_s_d_l__gfx_blit_func_8h.html b/Docs/html/_s_d_l__gfx_blit_func_8h.html
index e3571d3..c01f999 100644
--- a/Docs/html/_s_d_l__gfx_blit_func_8h.html
+++ b/Docs/html/_s_d_l__gfx_blit_func_8h.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.h File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxBlitFunc.h File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -57,13 +57,13 @@
 <a href="#func-members">Functions</a> |
 <a href="#var-members">Variables</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.h File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxBlitFunc.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include <stdio.h></code><br/>
 <code>#include <stdlib.h></code><br/>
-<code>#include <SDL.h></code><br/>
-<code>#include <SDL_video.h></code><br/>
+<code>#include "SDL.h"</code><br/>
+<code>#include "SDL_video.h"</code><br/>
 </div>
 <p><a href="_s_d_l__gfx_blit_func_8h_source.html">Go to the source code of this file.</a></p>
 <table class="memberdecls">
diff --git a/Docs/html/_s_d_l__gfx_blit_func_8h_source.html b/Docs/html/_s_d_l__gfx_blit_func_8h_source.html
index 326300e..d8bb6aa 100644
--- a/Docs/html/_s_d_l__gfx_blit_func_8h_source.html
+++ b/Docs/html/_s_d_l__gfx_blit_func_8h_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.h Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxBlitFunc.h Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxBlitFunc.h</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxBlitFunc.h</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__gfx_blit_func_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* </span>
@@ -95,8 +95,8 @@
 <a name="l00038"></a>00038 <span class="preprocessor">#include <stdio.h></span>
 <a name="l00039"></a>00039 <span class="preprocessor">#include <stdlib.h></span>
 <a name="l00040"></a>00040 
-<a name="l00041"></a>00041 <span class="preprocessor">#include <SDL.h></span>
-<a name="l00042"></a>00042 <span class="preprocessor">#include <SDL_video.h></span>
+<a name="l00041"></a>00041 <span class="preprocessor">#include "SDL.h"</span>
+<a name="l00042"></a>00042 <span class="preprocessor">#include "SDL_video.h"</span>
 <a name="l00043"></a>00043 
 <a name="l00044"></a>00044 
 <a name="l00045"></a>00045         <span class="keyword">extern</span> <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__gfx_blit_func_8c.html#a6d0c17342154e14322a281603960691a" title="Alpha adjustment table for custom blitter.">GFX_ALPHA_ADJUST_ARRAY</a>[256];
diff --git a/Docs/html/_s_d_l__gfx_primitives_8c.html b/Docs/html/_s_d_l__gfx_primitives_8c.html
index f5d406f..3cd567a 100644
--- a/Docs/html/_s_d_l__gfx_primitives_8c.html
+++ b/Docs/html/_s_d_l__gfx_primitives_8c.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.c File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxPrimitives.c File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -56,7 +56,7 @@
 <a href="#define-members">Defines</a> |
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.c File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxPrimitives.c File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include <stdio.h></code><br/>
diff --git a/Docs/html/_s_d_l__gfx_primitives_8c_source.html b/Docs/html/_s_d_l__gfx_primitives_8c_source.html
index b11e714..92a8af7 100644
--- a/Docs/html/_s_d_l__gfx_primitives_8c_source.html
+++ b/Docs/html/_s_d_l__gfx_primitives_8c_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.c Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxPrimitives.c Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.c</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxPrimitives.c</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__gfx_primitives_8c.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* </span>
@@ -2270,7 +2270,7 @@
 <a name="l02659"></a>02659                 {
 <a name="l02660"></a>02660                         <span class="keywordflow">return</span> (<a class="code" href="_s_d_l__gfx_primitives_8c.html#ac211a904dce45093315e15b10c80d8ac" title="Draw horizontal line with blending.">hlineColor</a>(dst, x1, x2, y1, color));
 <a name="l02661"></a>02661                 } <span class="keywordflow">else</span> {
-<a name="l02662"></a>02662                         <span class="keywordflow">if</span> (dx>0) {
+<a name="l02662"></a>02662                         <span class="keywordflow">if</span> (dx!=0) {
 <a name="l02663"></a>02663                                 <span class="keywordflow">return</span> (<a class="code" href="_s_d_l__gfx_primitives_8c.html#ac211a904dce45093315e15b10c80d8ac" title="Draw horizontal line with blending.">hlineColor</a>(dst, xx0, xx0+dx, y1, color));
 <a name="l02664"></a>02664                         } <span class="keywordflow">else</span> {
 <a name="l02665"></a>02665                                 <span class="keywordflow">return</span> (<a class="code" href="_s_d_l__gfx_primitives_8c.html#ae6f8690e5c5a85d3263c8e16727b34ef" title="Pixel draw with blending enabled if a<255.">pixelColor</a>(dst, x1, y1, color));
diff --git a/Docs/html/_s_d_l__gfx_primitives_8h.html b/Docs/html/_s_d_l__gfx_primitives_8h.html
index b6ae297..6460835 100644
--- a/Docs/html/_s_d_l__gfx_primitives_8h.html
+++ b/Docs/html/_s_d_l__gfx_primitives_8h.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.h File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxPrimitives.h File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -55,7 +55,7 @@
 <a href="#define-members">Defines</a> |
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.h File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxPrimitives.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include <math.h></code><br/>
@@ -68,7 +68,7 @@ Defines</h2></td></tr>
 <tr class="memitem:ae71449b1cc6e6250b91f539153a7a0d3"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__gfx_primitives_8h.html#ae71449b1cc6e6250b91f539153a7a0d3">M_PI</a>   3.1415926535897932384626433832795</td></tr>
 <tr class="memitem:a2a585f5832061010155d87737ef5bf88"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__gfx_primitives_8h.html#a2a585f5832061010155d87737ef5bf88">SDL_GFXPRIMITIVES_MAJOR</a>   2</td></tr>
 <tr class="memitem:abd0939b1856bbb822b37b68755dc9ee5"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__gfx_primitives_8h.html#abd0939b1856bbb822b37b68755dc9ee5">SDL_GFXPRIMITIVES_MINOR</a>   0</td></tr>
-<tr class="memitem:a06ea681a295987b8b8ec3fcdb04713df"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__gfx_primitives_8h.html#a06ea681a295987b8b8ec3fcdb04713df">SDL_GFXPRIMITIVES_MICRO</a>   24</td></tr>
+<tr class="memitem:a06ea681a295987b8b8ec3fcdb04713df"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__gfx_primitives_8h.html#a06ea681a295987b8b8ec3fcdb04713df">SDL_GFXPRIMITIVES_MICRO</a>   25</td></tr>
 <tr class="memitem:a9501419cbbd2b8739ec5dc4e890c165b"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__gfx_primitives_8h.html#a9501419cbbd2b8739ec5dc4e890c165b">SDL_GFXPRIMITIVES_SCOPE</a>   extern</td></tr>
 <tr><td colspan="2"><h2><a name="func-members"></a>
 Functions</h2></td></tr>
@@ -233,7 +233,7 @@ Functions</h2></td></tr>
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">#define <a class="el" href="_s_d_l__gfx_primitives_8h.html#a06ea681a295987b8b8ec3fcdb04713df">SDL_GFXPRIMITIVES_MICRO</a>   24</td>
+          <td class="memname">#define <a class="el" href="_s_d_l__gfx_primitives_8h.html#a06ea681a295987b8b8ec3fcdb04713df">SDL_GFXPRIMITIVES_MICRO</a>   25</td>
         </tr>
       </table>
 </div>
diff --git a/Docs/html/_s_d_l__gfx_primitives_8h_source.html b/Docs/html/_s_d_l__gfx_primitives_8h_source.html
index 7ac6d67..b1425c1 100644
--- a/Docs/html/_s_d_l__gfx_primitives_8h_source.html
+++ b/Docs/html/_s_d_l__gfx_primitives_8h_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.h Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxPrimitives.h Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives.h</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxPrimitives.h</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__gfx_primitives_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* </span>
@@ -103,7 +103,7 @@
 <a name="l00046"></a>00046 
 <a name="l00047"></a><a class="code" href="_s_d_l__gfx_primitives_8h.html#a2a585f5832061010155d87737ef5bf88">00047</a> <span class="preprocessor">#define SDL_GFXPRIMITIVES_MAJOR 2</span>
 <a name="l00048"></a><a class="code" href="_s_d_l__gfx_primitives_8h.html#abd0939b1856bbb822b37b68755dc9ee5">00048</a> <span class="preprocessor"></span><span class="preprocessor">#define SDL_GFXPRIMITIVES_MINOR 0</span>
-<a name="l00049"></a><a class="code" href="_s_d_l__gfx_primitives_8h.html#a06ea681a295987b8b8ec3fcdb04713df">00049</a> <span class="preprocessor"></span><span class="preprocessor">#define SDL_GFXPRIMITIVES_MICRO 24</span>
+<a name="l00049"></a><a class="code" href="_s_d_l__gfx_primitives_8h.html#a06ea681a295987b8b8ec3fcdb04713df">00049</a> <span class="preprocessor"></span><span class="preprocessor">#define SDL_GFXPRIMITIVES_MICRO 25</span>
 <a name="l00050"></a>00050 <span class="preprocessor"></span>
 <a name="l00051"></a>00051 
 <a name="l00052"></a>00052         <span class="comment">/* ---- Function Prototypes */</span>
diff --git a/Docs/html/_s_d_l__gfx_primitives__font_8h.html b/Docs/html/_s_d_l__gfx_primitives__font_8h.html
index e7af2aa..84d79ae 100644
--- a/Docs/html/_s_d_l__gfx_primitives__font_8h.html
+++ b/Docs/html/_s_d_l__gfx_primitives__font_8h.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives_font.h File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxPrimitives_font.h File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -54,7 +54,7 @@
   <div class="summary">
 <a href="#define-members">Defines</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives_font.h File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxPrimitives_font.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 
diff --git a/Docs/html/_s_d_l__gfx_primitives__font_8h_source.html b/Docs/html/_s_d_l__gfx_primitives__font_8h_source.html
index f5f6b1a..ceb570c 100644
--- a/Docs/html/_s_d_l__gfx_primitives__font_8h_source.html
+++ b/Docs/html/_s_d_l__gfx_primitives__font_8h_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives_font.h Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_gfxPrimitives_font.h Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_gfxPrimitives_font.h</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_gfxPrimitives_font.h</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__gfx_primitives__font_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 
diff --git a/Docs/html/_s_d_l__image_filter_8c.html b/Docs/html/_s_d_l__image_filter_8c.html
index 150d958..7d780c9 100644
--- a/Docs/html/_s_d_l__image_filter_8c.html
+++ b/Docs/html/_s_d_l__image_filter_8c.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.c File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_imageFilter.c File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -55,12 +55,13 @@
 <a href="#define-members">Defines</a> |
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.c File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_imageFilter.c File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include <stdio.h></code><br/>
 <code>#include <stdlib.h></code><br/>
 <code>#include <string.h></code><br/>
+<code>#include <SDL_cpuinfo.h></code><br/>
 <code>#include "<a class="el" href="_s_d_l__image_filter_8h_source.html">SDL_imageFilter.h</a>"</code><br/>
 </div>
 <p><a href="_s_d_l__image_filter_8c_source.html">Go to the source code of this file.</a></p>
@@ -71,120 +72,68 @@ Defines</h2></td></tr>
 <tr class="memdesc:a700fb30611761c46a674a45cc28ff561"><td class="mdescLeft"> </td><td class="mdescRight">Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.).  <a href="#a700fb30611761c46a674a45cc28ff561"></a><br/></td></tr>
 <tr><td colspan="2"><h2><a name="func-members"></a>
 Functions</h2></td></tr>
-<tr class="memitem:ade15666303ddc71c543f44cf1536d00e"><td class="memItemLeft" align="right" valign="top">unsigned int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ade15666303ddc71c543f44cf1536d00e">_cpuFlags</a> ()</td></tr>
-<tr class="memdesc:ade15666303ddc71c543f44cf1536d00e"><td class="mdescLeft"> </td><td class="mdescRight">Internal function returning the CPU flags.  <a href="#ade15666303ddc71c543f44cf1536d00e"></a><br/></td></tr>
 <tr class="memitem:a798ce71024ee1a1d1b174fd60fe79917"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917">SDL_imageFilterMMXdetect</a> (void)</td></tr>
 <tr class="memdesc:a798ce71024ee1a1d1b174fd60fe79917"><td class="mdescLeft"> </td><td class="mdescRight">MMX detection routine (with override flag).  <a href="#a798ce71024ee1a1d1b174fd60fe79917"></a><br/></td></tr>
 <tr class="memitem:a5dff661660755161bb4aaf6199cd1384"><td class="memItemLeft" align="right" valign="top">void </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a5dff661660755161bb4aaf6199cd1384">SDL_imageFilterMMXoff</a> ()</td></tr>
 <tr class="memdesc:a5dff661660755161bb4aaf6199cd1384"><td class="mdescLeft"> </td><td class="mdescRight">Disable MMX check for filter functions and and force to use non-MMX C based code.  <a href="#a5dff661660755161bb4aaf6199cd1384"></a><br/></td></tr>
 <tr class="memitem:a353ee234c3b51b33c4c5c4b30db5832d"><td class="memItemLeft" align="right" valign="top">void </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a353ee234c3b51b33c4c5c4b30db5832d">SDL_imageFilterMMXon</a> ()</td></tr>
 <tr class="memdesc:a353ee234c3b51b33c4c5c4b30db5832d"><td class="mdescLeft"> </td><td class="mdescRight">Enable MMX check for filter functions and use MMX code if available.  <a href="#a353ee234c3b51b33c4c5c4b30db5832d"></a><br/></td></tr>
-<tr class="memitem:ace0bf40de8d58bbd8d6ff9c3fc04ec6e"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ace0bf40de8d58bbd8d6ff9c3fc04ec6e">SDL_imageFilterAddMMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:ace0bf40de8d58bbd8d6ff9c3fc04ec6e"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using Add: D = saturation255(S1 + S2)  <a href="#ace0bf40de8d58bbd8d6ff9c3fc04ec6e"></a><br/></td></tr>
 <tr class="memitem:a9f06507eb0b63198dbd67495d61c9b20"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a9f06507eb0b63198dbd67495d61c9b20">SDL_imageFilterAdd</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a9f06507eb0b63198dbd67495d61c9b20"><td class="mdescLeft"> </td><td class="mdescRight">Filter using Add: D = saturation255(S1 + S2)  <a href="#a9f06507eb0b63198dbd67495d61c9b20"></a><br/></td></tr>
-<tr class="memitem:ae3a61d6df0940ef96ccc7b48a0fc8966"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ae3a61d6df0940ef96ccc7b48a0fc8966">SDL_imageFilterMeanMMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength, unsigned char *Mask)</td></tr>
-<tr class="memdesc:ae3a61d6df0940ef96ccc7b48a0fc8966"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using Mean: D = S1/2 + S2/2.  <a href="#ae3a61d6df0940ef96ccc7b48a0fc8966"></a><br/></td></tr>
 <tr class="memitem:ace072118fef77973210eb04fb4bfc779"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ace072118fef77973210eb04fb4bfc779">SDL_imageFilterMean</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:ace072118fef77973210eb04fb4bfc779"><td class="mdescLeft"> </td><td class="mdescRight">Filter using Mean: D = S1/2 + S2/2.  <a href="#ace072118fef77973210eb04fb4bfc779"></a><br/></td></tr>
-<tr class="memitem:a45d54d410e677d32ef33ef6226e9ea12"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a45d54d410e677d32ef33ef6226e9ea12">SDL_imageFilterSubMMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a45d54d410e677d32ef33ef6226e9ea12"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using Sub: D = saturation0(S1 - S2)  <a href="#a45d54d410e677d32ef33ef6226e9ea12"></a><br/></td></tr>
 <tr class="memitem:a3c01cf8576ea7a0dfc09dbaa953c9287"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a3c01cf8576ea7a0dfc09dbaa953c9287">SDL_imageFilterSub</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a3c01cf8576ea7a0dfc09dbaa953c9287"><td class="mdescLeft"> </td><td class="mdescRight">Filter using Sub: D = saturation0(S1 - S2)  <a href="#a3c01cf8576ea7a0dfc09dbaa953c9287"></a><br/></td></tr>
-<tr class="memitem:a601bf863185e51af32c6008ecb0a5095"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a601bf863185e51af32c6008ecb0a5095">SDL_imageFilterAbsDiffMMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a601bf863185e51af32c6008ecb0a5095"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using AbsDiff: D = | S1 - S2 |.  <a href="#a601bf863185e51af32c6008ecb0a5095"></a><br/></td></tr>
 <tr class="memitem:a472909f904274255cd6793c520172e48"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a472909f904274255cd6793c520172e48">SDL_imageFilterAbsDiff</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a472909f904274255cd6793c520172e48"><td class="mdescLeft"> </td><td class="mdescRight">Filter using AbsDiff: D = | S1 - S2 |.  <a href="#a472909f904274255cd6793c520172e48"></a><br/></td></tr>
-<tr class="memitem:ad565921b533977ad2059d58d3c4a3094"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ad565921b533977ad2059d58d3c4a3094">SDL_imageFilterMultMMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:ad565921b533977ad2059d58d3c4a3094"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using Mult: D = saturation255(S1 * S2)  <a href="#ad565921b533977ad2059d58d3c4a3094"></a><br/></td></tr>
 <tr class="memitem:af4633031d40a9ea0956a2f3c6c87a384"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#af4633031d40a9ea0956a2f3c6c87a384">SDL_imageFilterMult</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:af4633031d40a9ea0956a2f3c6c87a384"><td class="mdescLeft"> </td><td class="mdescRight">Filter using Mult: D = saturation255(S1 * S2)  <a href="#af4633031d40a9ea0956a2f3c6c87a384"></a><br/></td></tr>
 <tr class="memitem:a346db972dff9c56e3c45c904eaa3c39a"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a">SDL_imageFilterMultNorASM</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
 <tr class="memdesc:a346db972dff9c56e3c45c904eaa3c39a"><td class="mdescLeft"> </td><td class="mdescRight">Internal ASM Filter using MultNor: D = S1 * S2.  <a href="#a346db972dff9c56e3c45c904eaa3c39a"></a><br/></td></tr>
 <tr class="memitem:a5f3c9fd40426bb46eba5ac167505dcc5"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a5f3c9fd40426bb46eba5ac167505dcc5">SDL_imageFilterMultNor</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a5f3c9fd40426bb46eba5ac167505dcc5"><td class="mdescLeft"> </td><td class="mdescRight">Filter using MultNor: D = S1 * S2.  <a href="#a5f3c9fd40426bb46eba5ac167505dcc5"></a><br/></td></tr>
-<tr class="memitem:a12272cd24ce7f09bc2c35c609e025983"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a12272cd24ce7f09bc2c35c609e025983">SDL_imageFilterMultDivby2MMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a12272cd24ce7f09bc2c35c609e025983"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using MultDivby2: D = saturation255(S1/2 * S2)  <a href="#a12272cd24ce7f09bc2c35c609e025983"></a><br/></td></tr>
 <tr class="memitem:a80737f6427c7bdb30d39a92f6524fc14"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a80737f6427c7bdb30d39a92f6524fc14">SDL_imageFilterMultDivby2</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a80737f6427c7bdb30d39a92f6524fc14"><td class="mdescLeft"> </td><td class="mdescRight">Filter using MultDivby2: D = saturation255(S1/2 * S2)  <a href="#a80737f6427c7bdb30d39a92f6524fc14"></a><br/></td></tr>
-<tr class="memitem:a1f8bf77328e934701c7a9e4ef51d9b41"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a1f8bf77328e934701c7a9e4ef51d9b41">SDL_imageFilterMultDivby4MMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a1f8bf77328e934701c7a9e4ef51d9b41"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using MultDivby4: D = saturation255(S1/2 * S2/2)  <a href="#a1f8bf77328e934701c7a9e4ef51d9b41"></a><br/></td></tr>
 <tr class="memitem:a30e685653eb1050c7d48feaeb8f801a1"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a30e685653eb1050c7d48feaeb8f801a1">SDL_imageFilterMultDivby4</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a30e685653eb1050c7d48feaeb8f801a1"><td class="mdescLeft"> </td><td class="mdescRight">Filter using MultDivby4: D = saturation255(S1/2 * S2/2)  <a href="#a30e685653eb1050c7d48feaeb8f801a1"></a><br/></td></tr>
-<tr class="memitem:a8a86c969daeb874fb643347592003484"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a8a86c969daeb874fb643347592003484">SDL_imageFilterBitAndMMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a8a86c969daeb874fb643347592003484"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using BitAnd: D = S1 & S2.  <a href="#a8a86c969daeb874fb643347592003484"></a><br/></td></tr>
 <tr class="memitem:a85837ce1b5de1f907b6b9053922b5cbc"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a85837ce1b5de1f907b6b9053922b5cbc">SDL_imageFilterBitAnd</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a85837ce1b5de1f907b6b9053922b5cbc"><td class="mdescLeft"> </td><td class="mdescRight">Filter using BitAnd: D = S1 & S2.  <a href="#a85837ce1b5de1f907b6b9053922b5cbc"></a><br/></td></tr>
-<tr class="memitem:a2cd7db5de491dce5dfcf292fc241031d"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a2cd7db5de491dce5dfcf292fc241031d">SDL_imageFilterBitOrMMX</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a2cd7db5de491dce5dfcf292fc241031d"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using BitOr: D = S1 | S2.  <a href="#a2cd7db5de491dce5dfcf292fc241031d"></a><br/></td></tr>
 <tr class="memitem:a5cf1c477f4e32d02f74ee95d9f7b0021"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a5cf1c477f4e32d02f74ee95d9f7b0021">SDL_imageFilterBitOr</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a5cf1c477f4e32d02f74ee95d9f7b0021"><td class="mdescLeft"> </td><td class="mdescRight">Filter using BitOr: D = S1 | S2.  <a href="#a5cf1c477f4e32d02f74ee95d9f7b0021"></a><br/></td></tr>
-<tr class="memitem:a95791d257c510c597a2ef542f43d6678"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a95791d257c510c597a2ef542f43d6678">SDL_imageFilterDivASM</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a95791d257c510c597a2ef542f43d6678"><td class="mdescLeft"> </td><td class="mdescRight">Internal ASM Filter using Div: D = S1 / S2.  <a href="#a95791d257c510c597a2ef542f43d6678"></a><br/></td></tr>
 <tr class="memitem:a0ea22f01c6a4724bac307da3e5355f58"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a0ea22f01c6a4724bac307da3e5355f58">SDL_imageFilterDiv</a> (unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:a0ea22f01c6a4724bac307da3e5355f58"><td class="mdescLeft"> </td><td class="mdescRight">Filter using Div: D = S1 / S2.  <a href="#a0ea22f01c6a4724bac307da3e5355f58"></a><br/></td></tr>
-<tr class="memitem:a1b522e196f9647501c6badd1de727b97"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a1b522e196f9647501c6badd1de727b97">SDL_imageFilterBitNegationMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength)</td></tr>
-<tr class="memdesc:a1b522e196f9647501c6badd1de727b97"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using BitNegation: D = !S.  <a href="#a1b522e196f9647501c6badd1de727b97"></a><br/></td></tr>
 <tr class="memitem:ac3abfaa8ec2e88c3c4893588c5555856"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ac3abfaa8ec2e88c3c4893588c5555856">SDL_imageFilterBitNegation</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length)</td></tr>
 <tr class="memdesc:ac3abfaa8ec2e88c3c4893588c5555856"><td class="mdescLeft"> </td><td class="mdescRight">Filter using BitNegation: D = !S.  <a href="#ac3abfaa8ec2e88c3c4893588c5555856"></a><br/></td></tr>
-<tr class="memitem:a032e94beee7b3f7dc9e3bc999f51dfb3"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a032e94beee7b3f7dc9e3bc999f51dfb3">SDL_imageFilterAddByteMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)</td></tr>
-<tr class="memdesc:a032e94beee7b3f7dc9e3bc999f51dfb3"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using AddByte: D = saturation255(S + C)  <a href="#a032e94beee7b3f7dc9e3bc999f51dfb3"></a><br/></td></tr>
 <tr class="memitem:a812cb307cb60ef31f1ffe81a9eee6bb1"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a812cb307cb60ef31f1ffe81a9eee6bb1">SDL_imageFilterAddByte</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)</td></tr>
 <tr class="memdesc:a812cb307cb60ef31f1ffe81a9eee6bb1"><td class="mdescLeft"> </td><td class="mdescRight">Filter using AddByte: D = saturation255(S + C)  <a href="#a812cb307cb60ef31f1ffe81a9eee6bb1"></a><br/></td></tr>
-<tr class="memitem:ac337129ad7f11e7e33d73fa39b8239eb"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ac337129ad7f11e7e33d73fa39b8239eb">SDL_imageFilterAddUintMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)</td></tr>
-<tr class="memdesc:ac337129ad7f11e7e33d73fa39b8239eb"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C)  <a href="#ac337129ad7f11e7e33d73fa39b8239eb"></a><br/></td></tr>
 <tr class="memitem:a660543426c47dfec39a349eb3b8f905b"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a660543426c47dfec39a349eb3b8f905b">SDL_imageFilterAddUint</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)</td></tr>
 <tr class="memdesc:a660543426c47dfec39a349eb3b8f905b"><td class="mdescLeft"> </td><td class="mdescRight">Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C)  <a href="#a660543426c47dfec39a349eb3b8f905b"></a><br/></td></tr>
-<tr class="memitem:ae7c132373eb318713635c4e82f478f9d"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ae7c132373eb318713635c4e82f478f9d">SDL_imageFilterAddByteToHalfMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C, unsigned char *Mask)</td></tr>
-<tr class="memdesc:ae7c132373eb318713635c4e82f478f9d"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using AddByteToHalf: D = saturation255(S/2 + C)  <a href="#ae7c132373eb318713635c4e82f478f9d"></a><br/></td></tr>
 <tr class="memitem:ab82db97d129c8cfc36780bcdc6286fcc"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ab82db97d129c8cfc36780bcdc6286fcc">SDL_imageFilterAddByteToHalf</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)</td></tr>
 <tr class="memdesc:ab82db97d129c8cfc36780bcdc6286fcc"><td class="mdescLeft"> </td><td class="mdescRight">Filter using AddByteToHalf: D = saturation255(S/2 + C)  <a href="#ab82db97d129c8cfc36780bcdc6286fcc"></a><br/></td></tr>
 <tr class="memitem:a657e128016cc448778007d8b6475dd65"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65">SDL_imageFilterSubByteMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)</td></tr>
 <tr class="memdesc:a657e128016cc448778007d8b6475dd65"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using SubByte: D = saturation0(S - C)  <a href="#a657e128016cc448778007d8b6475dd65"></a><br/></td></tr>
 <tr class="memitem:a387fb6f0d48cc5d08f37f7f9b92d14b2"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a387fb6f0d48cc5d08f37f7f9b92d14b2">SDL_imageFilterSubByte</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)</td></tr>
 <tr class="memdesc:a387fb6f0d48cc5d08f37f7f9b92d14b2"><td class="mdescLeft"> </td><td class="mdescRight">Filter using SubByte: D = saturation0(S - C)  <a href="#a387fb6f0d48cc5d08f37f7f9b92d14b2"></a><br/></td></tr>
-<tr class="memitem:acfb143905b751680650576e75847f9c1"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#acfb143905b751680650576e75847f9c1">SDL_imageFilterSubUintMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)</td></tr>
-<tr class="memdesc:acfb143905b751680650576e75847f9c1"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C)  <a href="#acfb143905b751680650576e75847f9c1"></a><br/></td></tr>
 <tr class="memitem:abb343ef95e22945e1d4d648b2e176e64"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#abb343ef95e22945e1d4d648b2e176e64">SDL_imageFilterSubUint</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)</td></tr>
 <tr class="memdesc:abb343ef95e22945e1d4d648b2e176e64"><td class="mdescLeft"> </td><td class="mdescRight">Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C)  <a href="#abb343ef95e22945e1d4d648b2e176e64"></a><br/></td></tr>
-<tr class="memitem:a696568e00b153011f0673bdf1297e9fa"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a696568e00b153011f0673bdf1297e9fa">SDL_imageFilterShiftRightMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N, unsigned char *Mask)</td></tr>
-<tr class="memdesc:a696568e00b153011f0673bdf1297e9fa"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using ShiftRight: D = saturation0(S >> N)  <a href="#a696568e00b153011f0673bdf1297e9fa"></a><br/></td></tr>
 <tr class="memitem:a68851aed2dcc5dfd2f3b258236f3b88c"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a68851aed2dcc5dfd2f3b258236f3b88c">SDL_imageFilterShiftRight</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)</td></tr>
 <tr class="memdesc:a68851aed2dcc5dfd2f3b258236f3b88c"><td class="mdescLeft"> </td><td class="mdescRight">Filter using ShiftRight: D = saturation0(S >> N)  <a href="#a68851aed2dcc5dfd2f3b258236f3b88c"></a><br/></td></tr>
-<tr class="memitem:a23430360ee5ce8031158831a44e83d56"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a23430360ee5ce8031158831a44e83d56">SDL_imageFilterShiftRightUintMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)</td></tr>
-<tr class="memdesc:a23430360ee5ce8031158831a44e83d56"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N)  <a href="#a23430360ee5ce8031158831a44e83d56"></a><br/></td></tr>
 <tr class="memitem:a540d4625d76bcd03318c2a59ce650fdb"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a540d4625d76bcd03318c2a59ce650fdb">SDL_imageFilterShiftRightUint</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)</td></tr>
 <tr class="memdesc:a540d4625d76bcd03318c2a59ce650fdb"><td class="mdescLeft"> </td><td class="mdescRight">Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N)  <a href="#a540d4625d76bcd03318c2a59ce650fdb"></a><br/></td></tr>
-<tr class="memitem:ad18d23ec352f7508f89e47cff9c9a4ea"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ad18d23ec352f7508f89e47cff9c9a4ea">SDL_imageFilterMultByByteMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)</td></tr>
-<tr class="memdesc:ad18d23ec352f7508f89e47cff9c9a4ea"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using MultByByte: D = saturation255(S * C)  <a href="#ad18d23ec352f7508f89e47cff9c9a4ea"></a><br/></td></tr>
 <tr class="memitem:a06f7a19d6e2fc89d7b48cc45d715806d"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a06f7a19d6e2fc89d7b48cc45d715806d">SDL_imageFilterMultByByte</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)</td></tr>
 <tr class="memdesc:a06f7a19d6e2fc89d7b48cc45d715806d"><td class="mdescLeft"> </td><td class="mdescRight">Filter using MultByByte: D = saturation255(S * C)  <a href="#a06f7a19d6e2fc89d7b48cc45d715806d"></a><br/></td></tr>
-<tr class="memitem:a80d18182b54de0ec1f8d9a79dc5b879a"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a80d18182b54de0ec1f8d9a79dc5b879a">SDL_imageFilterShiftRightAndMultByByteMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N, unsigned char C)</td></tr>
-<tr class="memdesc:a80d18182b54de0ec1f8d9a79dc5b879a"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using ShiftRightAndMultByByteMMX: D = saturation255((S >> N) * C)  <a href="#a80d18182b54de0ec1f8d9a79dc5b879a"></a><br/></td></tr>
 <tr class="memitem:a0713d6c267fba9756d6beae81e89f9e4"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a0713d6c267fba9756d6beae81e89f9e4">SDL_imageFilterShiftRightAndMultByByte</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N, unsigned char C)</td></tr>
 <tr class="memdesc:a0713d6c267fba9756d6beae81e89f9e4"><td class="mdescLeft"> </td><td class="mdescRight">Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C)  <a href="#a0713d6c267fba9756d6beae81e89f9e4"></a><br/></td></tr>
-<tr class="memitem:a0d383d58c9a5262dbac636f6ebe26b62"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a0d383d58c9a5262dbac636f6ebe26b62">SDL_imageFilterShiftLeftByteMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N, unsigned char *Mask)</td></tr>
-<tr class="memdesc:a0d383d58c9a5262dbac636f6ebe26b62"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using ShiftLeftByte: D = (S << N)  <a href="#a0d383d58c9a5262dbac636f6ebe26b62"></a><br/></td></tr>
 <tr class="memitem:a4561a73b249a26babc4c469ffbdae604"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a4561a73b249a26babc4c469ffbdae604">SDL_imageFilterShiftLeftByte</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)</td></tr>
 <tr class="memdesc:a4561a73b249a26babc4c469ffbdae604"><td class="mdescLeft"> </td><td class="mdescRight">Filter using ShiftLeftByte: D = (S << N)  <a href="#a4561a73b249a26babc4c469ffbdae604"></a><br/></td></tr>
-<tr class="memitem:a4a4260369d38e7bbcd9e3690bf57b8d4"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a4a4260369d38e7bbcd9e3690bf57b8d4">SDL_imageFilterShiftLeftUintMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)</td></tr>
-<tr class="memdesc:a4a4260369d38e7bbcd9e3690bf57b8d4"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using ShiftLeftUint: D = ((uint)S << N)  <a href="#a4a4260369d38e7bbcd9e3690bf57b8d4"></a><br/></td></tr>
 <tr class="memitem:a250e796fb2db470da0a78b74b78114e8"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a250e796fb2db470da0a78b74b78114e8">SDL_imageFilterShiftLeftUint</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)</td></tr>
 <tr class="memdesc:a250e796fb2db470da0a78b74b78114e8"><td class="mdescLeft"> </td><td class="mdescRight">Filter using ShiftLeftUint: D = ((uint)S << N)  <a href="#a250e796fb2db470da0a78b74b78114e8"></a><br/></td></tr>
-<tr class="memitem:a3ea84aa8cf313790dc7468f2f4f29497"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a3ea84aa8cf313790dc7468f2f4f29497">SDL_imageFilterShiftLeftMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)</td></tr>
-<tr class="memdesc:a3ea84aa8cf313790dc7468f2f4f29497"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter ShiftLeft: D = saturation255(S << N)  <a href="#a3ea84aa8cf313790dc7468f2f4f29497"></a><br/></td></tr>
 <tr class="memitem:a98372fea76310903abef7808db10d226"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a98372fea76310903abef7808db10d226">SDL_imageFilterShiftLeft</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)</td></tr>
 <tr class="memdesc:a98372fea76310903abef7808db10d226"><td class="mdescLeft"> </td><td class="mdescRight">Filter ShiftLeft: D = saturation255(S << N)  <a href="#a98372fea76310903abef7808db10d226"></a><br/></td></tr>
-<tr class="memitem:a6f06923cb26d510ad72d4b1dd6583284"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a6f06923cb26d510ad72d4b1dd6583284">SDL_imageFilterBinarizeUsingThresholdMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char T)</td></tr>
-<tr class="memdesc:a6f06923cb26d510ad72d4b1dd6583284"><td class="mdescLeft"> </td><td class="mdescRight">MMX BinarizeUsingThreshold: D = (S >= T) ? 255:0.  <a href="#a6f06923cb26d510ad72d4b1dd6583284"></a><br/></td></tr>
 <tr class="memitem:a951a062e15df290a137428e1e0f4d5ce"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a951a062e15df290a137428e1e0f4d5ce">SDL_imageFilterBinarizeUsingThreshold</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char T)</td></tr>
 <tr class="memdesc:a951a062e15df290a137428e1e0f4d5ce"><td class="mdescLeft"> </td><td class="mdescRight">Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0.  <a href="#a951a062e15df290a137428e1e0f4d5ce"></a><br/></td></tr>
-<tr class="memitem:adc2b0f3e3a32724df1325a2121e9f96d"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#adc2b0f3e3a32724df1325a2121e9f96d">SDL_imageFilterClipToRangeMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char Tmin, unsigned char Tmax)</td></tr>
-<tr class="memdesc:adc2b0f3e3a32724df1325a2121e9f96d"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax.  <a href="#adc2b0f3e3a32724df1325a2121e9f96d"></a><br/></td></tr>
 <tr class="memitem:ab7224abc4ecc1b8a6f4441ef8379515f"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ab7224abc4ecc1b8a6f4441ef8379515f">SDL_imageFilterClipToRange</a> (unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char Tmin, unsigned char Tmax)</td></tr>
 <tr class="memdesc:ab7224abc4ecc1b8a6f4441ef8379515f"><td class="mdescLeft"> </td><td class="mdescRight">Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax.  <a href="#ab7224abc4ecc1b8a6f4441ef8379515f"></a><br/></td></tr>
-<tr class="memitem:a2e7631c748eb46544e7be40fa64bc232"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a2e7631c748eb46544e7be40fa64bc232">SDL_imageFilterNormalizeLinearMMX</a> (unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, int Cmin, int Cmax, int Nmin, int Nmax)</td></tr>
-<tr class="memdesc:a2e7631c748eb46544e7be40fa64bc232"><td class="mdescLeft"> </td><td class="mdescRight">Internal MMX Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin)  <a href="#a2e7631c748eb46544e7be40fa64bc232"></a><br/></td></tr>
 <tr class="memitem:ab018ace4db884cac953b06b09c00828b"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#ab018ace4db884cac953b06b09c00828b">SDL_imageFilterNormalizeLinear</a> (unsigned char *Src, unsigned char *Dest, unsigned int length, int Cmin, int Cmax, int Nmin, int Nmax)</td></tr>
 <tr class="memdesc:ab018ace4db884cac953b06b09c00828b"><td class="mdescLeft"> </td><td class="mdescRight">Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin)  <a href="#ab018ace4db884cac953b06b09c00828b"></a><br/></td></tr>
 <tr class="memitem:a8e7e4138a93e26f1912763189d407770"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="_s_d_l__image_filter_8c.html#a8e7e4138a93e26f1912763189d407770">SDL_imageFilterConvolveKernel3x3Divide</a> (unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)</td></tr>
@@ -230,32 +179,11 @@ Functions</h2></td></tr>
 
 <p>Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.). </p>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00049">49</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00058">58</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
 <hr/><h2>Function Documentation</h2>
-<a class="anchor" id="ade15666303ddc71c543f44cf1536d00e"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">unsigned int <a class="el" href="_s_d_l__image_filter_8c.html#ade15666303ddc71c543f44cf1536d00e">_cpuFlags</a> </td>
-          <td>(</td>
-          <td class="paramname"></td><td>)</td>
-          <td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal function returning the CPU flags. </p>
-<dl class="section return"><dt>Returns:</dt><dd>Flags of system CPU. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00068">68</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
 <a class="anchor" id="a472909f904274255cd6793c520172e48"></a>
 <div class="memitem">
 <div class="memproto">
@@ -305,60 +233,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00607">607</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a601bf863185e51af32c6008ecb0a5095"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a601bf863185e51af32c6008ecb0a5095">SDL_imageFilterAbsDiffMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using AbsDiff: D = | S1 - S2 |. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00538">538</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00539">539</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -411,7 +286,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00207">207</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00170">170</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -464,16 +339,16 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01808">1808</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01788">1788</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a032e94beee7b3f7dc9e3bc999f51dfb3"></a>
+<a class="anchor" id="ab82db97d129c8cfc36780bcdc6286fcc"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a032e94beee7b3f7dc9e3bc999f51dfb3">SDL_imageFilterAddByteMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a8cbdffd5dbcab3b5dc9207d57af616b3">SDL_imageFilterAddByteToHalf</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -488,7 +363,7 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramname"><em>length</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
@@ -505,28 +380,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using AddByte: D = saturation255(S + C) </p>
+<p>Filter using AddByteToHalf: D = saturation255(S/2 + C) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant value to add (C).</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
+    <tr><td class="paramname">C</td><td>Constant to add (C).</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01727">1727</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02065">2065</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="ab82db97d129c8cfc36780bcdc6286fcc"></a>
+<a class="anchor" id="a660543426c47dfec39a349eb3b8f905b"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a8cbdffd5dbcab3b5dc9207d57af616b3">SDL_imageFilterAddByteToHalf</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#af1a17645dea69e52c7bd560521286765">SDL_imageFilterAddUint</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -546,7 +421,7 @@ Functions</h2></td></tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char </td>
+          <td class="paramtype">unsigned int </td>
           <td class="paramname"><em>C</em> </td>
         </tr>
         <tr>
@@ -558,7 +433,7 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using AddByteToHalf: D = saturation255(S/2 + C) </p>
+<p>Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
@@ -570,16 +445,37 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02115">2115</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01916">1916</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+
+</div>
+</div>
+<a class="anchor" id="afbfcc8c03e3d791ac74c955d14a135e4"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a08a45265e9e84bf8beedebba26da947c">SDL_imageFilterAlignStack</a> </td>
+          <td>(</td>
+          <td class="paramtype">void </td>
+          <td class="paramname"></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div>
+<div class="memdoc">
+
+<p>Align stack to 32 byte boundary,. </p>
+
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07323">7323</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="ae7c132373eb318713635c4e82f478f9d"></a>
+<a class="anchor" id="a951a062e15df290a137428e1e0f4d5ce"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#ae7c132373eb318713635c4e82f478f9d">SDL_imageFilterAddByteToHalfMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ad5bf97d7e39d018d2eeb570e97edf8c0">SDL_imageFilterBinarizeUsingThreshold</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -594,19 +490,13 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramname"><em>length</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>C</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Mask</em> </td>
+          <td class="paramname"><em>T</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -617,29 +507,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using AddByteToHalf: D = saturation255(S/2 + C) </p>
+<p>Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to add (C). </td></tr>
-    <tr><td class="paramname">Mask</td><td>Pointer to 8 mask bytes of value 0x7F.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
+    <tr><td class="paramname">T</td><td>The threshold boundary (inclusive).</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02023">2023</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03531">3531</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="ace0bf40de8d58bbd8d6ff9c3fc04ec6e"></a>
+<a class="anchor" id="a85837ce1b5de1f907b6b9053922b5cbc"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#ace0bf40de8d58bbd8d6ff9c3fc04ec6e">SDL_imageFilterAddMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a5f67460c0b89dadd49d04832608a345b">SDL_imageFilterBitAnd</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -660,7 +549,7 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -671,28 +560,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using Add: D = saturation255(S1 + S2) </p>
+<p>Filter using BitAnd: D = S1 & S2. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
     <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00144">144</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01275">1275</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a660543426c47dfec39a349eb3b8f905b"></a>
+<a class="anchor" id="ac3abfaa8ec2e88c3c4893588c5555856"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#af1a17645dea69e52c7bd560521286765">SDL_imageFilterAddUint</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#abc3c3fc5f018e271f6393921f3964d31">SDL_imageFilterBitNegation</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -707,13 +596,7 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>C</em> </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -724,28 +607,27 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C) </p>
+<p>Filter using BitNegation: D = !S. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to add (C).</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source array.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01949">1949</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01668">1668</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="ac337129ad7f11e7e33d73fa39b8239eb"></a>
+<a class="anchor" id="a5cf1c477f4e32d02f74ee95d9f7b0021"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#ac337129ad7f11e7e33d73fa39b8239eb">SDL_imageFilterAddUintMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a0acf0eabba33f8fa7acbc08dc3015cd3">SDL_imageFilterBitOr</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -754,25 +636,19 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramname"><em>Src2</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>C</em>, </td>
+          <td class="paramtype">unsigned char * </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>D</em> </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -783,50 +659,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C) </p>
+<p>Filter using BitOr: D = S1 | S2. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to add (C). </td></tr>
-    <tr><td class="paramname">D</td><td>Byteorder-swapped constant to add (Cs).</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01874">1874</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01389">1389</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="afbfcc8c03e3d791ac74c955d14a135e4"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a08a45265e9e84bf8beedebba26da947c">SDL_imageFilterAlignStack</a> </td>
-          <td>(</td>
-          <td class="paramtype">void </td>
-          <td class="paramname"></td><td>)</td>
-          <td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Align stack to 32 byte boundary,. </p>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07537">7537</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a951a062e15df290a137428e1e0f4d5ce"></a>
+<a class="anchor" id="ab7224abc4ecc1b8a6f4441ef8379515f"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ad5bf97d7e39d018d2eeb570e97edf8c0">SDL_imageFilterBinarizeUsingThreshold</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ae9d552de9cf5a4a1716d91ee905eafd7">SDL_imageFilterClipToRange</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -847,7 +701,13 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>T</em> </td>
+          <td class="paramname"><em>Tmin</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char </td>
+          <td class="paramname"><em>Tmax</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -858,31 +718,32 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0. </p>
+<p>Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
     <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">T</td><td>The threshold boundary (inclusive).</td></tr>
+    <tr><td class="paramname">Tmin</td><td>Lower (inclusive) boundary of the clipping range. </td></tr>
+    <tr><td class="paramname">Tmax</td><td>Upper (inclusive) boundary of the clipping range.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03717">3717</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03688">3688</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a6f06923cb26d510ad72d4b1dd6583284"></a>
+<a class="anchor" id="a8e7e4138a93e26f1912763189d407770"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a6f06923cb26d510ad72d4b1dd6583284">SDL_imageFilterBinarizeUsingThresholdMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a7286cd21fa0a0cfb0606806dacfbe121">SDL_imageFilterConvolveKernel3x3Divide</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
@@ -893,14 +754,26 @@ Functions</h2></td></tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>T</em> </td>
+          <td class="paramname"><em>Divisor</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -911,1353 +784,64 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>MMX BinarizeUsingThreshold: D = (S >= T) ? 255:0. </p>
+<p>Filter using ConvolveKernel3x3Divide: Dij = saturation0and255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">T</td><td>The threshold boundary (inclusive).</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >2. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >2. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 3x3. </td></tr>
+    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03629">3629</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03977">3977</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a85837ce1b5de1f907b6b9053922b5cbc"></a>
+<a class="anchor" id="ac329e5a3b60351768c96c94db9f9cf97"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a5f67460c0b89dadd49d04832608a345b">SDL_imageFilterBitAnd</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a67929babce179e1e333c5cd2e5fc4091">SDL_imageFilterConvolveKernel3x3ShiftRight</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
         </tr>
         <tr>
+          <td class="paramkey"></td>
           <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using BitAnd: D = S1 & S2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01312">1312</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a8a86c969daeb874fb643347592003484"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a8a86c969daeb874fb643347592003484">SDL_imageFilterBitAndMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using BitAnd: D = S1 & S2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01249">1249</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="ac3abfaa8ec2e88c3c4893588c5555856"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#abc3c3fc5f018e271f6393921f3964d31">SDL_imageFilterBitNegation</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using BitNegation: D = !S. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source array.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01674">1674</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a1b522e196f9647501c6badd1de727b97"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a1b522e196f9647501c6badd1de727b97">SDL_imageFilterBitNegationMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using BitNegation: D = !S. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S1). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01615">1615</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a5cf1c477f4e32d02f74ee95d9f7b0021"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a0acf0eabba33f8fa7acbc08dc3015cd3">SDL_imageFilterBitOr</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using BitOr: D = S1 | S2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01434">1434</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a2cd7db5de491dce5dfcf292fc241031d"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a2cd7db5de491dce5dfcf292fc241031d">SDL_imageFilterBitOrMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using BitOr: D = S1 | S2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01371">1371</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="ab7224abc4ecc1b8a6f4441ef8379515f"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ae9d552de9cf5a4a1716d91ee905eafd7">SDL_imageFilterClipToRange</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Tmin</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Tmax</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">Tmin</td><td>Lower (inclusive) boundary of the clipping range. </td></tr>
-    <tr><td class="paramname">Tmax</td><td>Upper (inclusive) boundary of the clipping range.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03890">3890</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="adc2b0f3e3a32724df1325a2121e9f96d"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#adc2b0f3e3a32724df1325a2121e9f96d">SDL_imageFilterClipToRangeMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Tmin</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Tmax</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">Tmin</td><td>Lower (inclusive) boundary of the clipping range. </td></tr>
-    <tr><td class="paramname">Tmax</td><td>Upper (inclusive) boundary of the clipping range.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03778">3778</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a8e7e4138a93e26f1912763189d407770"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a7286cd21fa0a0cfb0606806dacfbe121">SDL_imageFilterConvolveKernel3x3Divide</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Divisor</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel3x3Divide: Dij = saturation0and255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >2. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >2. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 3x3. </td></tr>
-    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04201">4201</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="ac329e5a3b60351768c96c94db9f9cf97"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a67929babce179e1e333c5cd2e5fc4091">SDL_imageFilterConvolveKernel3x3ShiftRight</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>NRightShift</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel3x3ShiftRight: Dij = saturation0and255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >2. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >2. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 3x3. </td></tr>
-    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05595">5595</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="ac9a556492480ce71f54d456a0ff7e6cb"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a432d7bcc34b6bea42d1a07b4db795e1f">SDL_imageFilterConvolveKernel5x5Divide</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Divisor</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel5x5Divide: Dij = saturation0and255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >4. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >4. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 5x5. </td></tr>
-    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04390">4390</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a5253738dc4c892352b078d9a7dec2b20"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a9aaa45452b04f51f52826c2104ea3b85">SDL_imageFilterConvolveKernel5x5ShiftRight</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>NRightShift</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel5x5ShiftRight: Dij = saturation0and255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >4. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >4. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 5x5. </td></tr>
-    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05771">5771</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a363f48e6843fd3f48da53688b89bca48"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#acc177cf891758fdc4bf7533fb266e339">SDL_imageFilterConvolveKernel7x7Divide</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Divisor</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel7x7Divide: Dij = saturation0and255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >6. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >6. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 7x7. </td></tr>
-    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04692">4692</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a48b40065652dda699875f1425b9227a6"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a6dbe52e917c0858bd311e9ce75219587">SDL_imageFilterConvolveKernel7x7ShiftRight</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>NRightShift</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel7x7ShiftRight: Dij = saturation0and255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >6. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >6. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 7x7. </td></tr>
-    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06071">6071</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="ae1e91ff193beed110a71119ec901f09d"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#af8a8114acd0509787ae5265990049720">SDL_imageFilterConvolveKernel9x9Divide</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>Divisor</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel9x9Divide: Dij = saturation0and255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >8. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >8. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 9x9. </td></tr>
-    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05048">5048</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a6aaa30dc51d1e51585d02d123b0f1a7a"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ad2702d0524a16032118fdf67e3e0f44a">SDL_imageFilterConvolveKernel9x9ShiftRight</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>rows</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>columns</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">signed short * </td>
-          <td class="paramname"><em>Kernel</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>NRightShift</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using ConvolveKernel9x9ShiftRight: Dij = saturation255( ... ) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
-    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
-    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >8. </td></tr>
-    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >8. </td></tr>
-    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 9x9. </td></tr>
-    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
-  </table>
-  </dd>
-</dl>
-<p>Note: Non-MMX implementation not available for this function.</p>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06433">6433</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a0ea22f01c6a4724bac307da3e5355f58"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aeb8ed56aa7de3c8b0d0b2aa9163c3e37">SDL_imageFilterDiv</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using Div: D = S1 / S2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01561">1561</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a95791d257c510c597a2ef542f43d6678"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a95791d257c510c597a2ef542f43d6678">SDL_imageFilterDivASM</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal ASM Filter using Div: D = S1 / S2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01490">1490</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="ace072118fef77973210eb04fb4bfc779"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a69cfa83c5d198c8ae4be4ab86e8d3b8f">SDL_imageFilterMean</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using Mean: D = S1/2 + S2/2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00356">356</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="ae3a61d6df0940ef96ccc7b48a0fc8966"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#ae3a61d6df0940ef96ccc7b48a0fc8966">SDL_imageFilterMeanMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Mask</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using Mean: D = S1/2 + S2/2. </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays. </td></tr>
-    <tr><td class="paramname">Mask</td><td>Mask array containing 8 bytes with 0x7F value. ] </td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00269">269</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a798ce71024ee1a1d1b174fd60fe79917"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a5823f6eb23fe8e74764a94f3d78204ef">SDL_imageFilterMMXdetect</a> </td>
-          <td>(</td>
-          <td class="paramtype">void </td>
-          <td class="paramname"></td><td>)</td>
-          <td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>MMX detection routine (with override flag). </p>
-<dl class="section return"><dt>Returns:</dt><dd>1 of MMX was detected, 0 otherwise. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00100">100</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a5dff661660755161bb4aaf6199cd1384"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a403adc470cb1dd34520f18d55804d4ea">SDL_imageFilterMMXoff</a> </td>
-          <td>(</td>
-          <td class="paramtype">void </td>
-          <td class="paramname"></td><td>)</td>
-          <td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Disable MMX check for filter functions and and force to use non-MMX C based code. </p>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00119">119</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a353ee234c3b51b33c4c5c4b30db5832d"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a848ce7e9551b25fea19fe1fb739f74fb">SDL_imageFilterMMXon</a> </td>
-          <td>(</td>
-          <td class="paramtype">void </td>
-          <td class="paramname"></td><td>)</td>
-          <td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Enable MMX check for filter functions and use MMX code if available. </p>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00127">127</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="af4633031d40a9ea0956a2f3c6c87a384"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a4657c2a1e1bf55d3241dc737cd618409">SDL_imageFilterMult</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Filter using Mult: D = saturation255(S1 * S2) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00766">766</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a06f7a19d6e2fc89d7b48cc45d715806d"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#add06bb6ea7847fc13a3041ddceb4ac3c">SDL_imageFilterMultByByte</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em>, </td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>C</em> </td>
+          <td class="paramname"><em>NRightShift</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2268,31 +852,34 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using MultByByte: D = saturation255(S * C) </p>
+<p>Filter using ConvolveKernel3x3ShiftRight: Dij = saturation0and255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to multiply with (C).</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >2. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >2. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 3x3. </td></tr>
+    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02903">2903</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05375">5375</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="ad18d23ec352f7508f89e47cff9c9a4ea"></a>
+<a class="anchor" id="ac9a556492480ce71f54d456a0ff7e6cb"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#ad18d23ec352f7508f89e47cff9c9a4ea">SDL_imageFilterMultByByteMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a432d7bcc34b6bea42d1a07b4db795e1f">SDL_imageFilterConvolveKernel5x5Divide</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
@@ -2303,14 +890,26 @@ Functions</h2></td></tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>C</em> </td>
+          <td class="paramname"><em>Divisor</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2321,49 +920,64 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using MultByByte: D = saturation255(S * C) </p>
+<p>Filter using ConvolveKernel5x5Divide: Dij = saturation0and255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to multiply with (C).</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >4. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >4. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 5x5. </td></tr>
+    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02762">2762</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04167">4167</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a80737f6427c7bdb30d39a92f6524fc14"></a>
+<a class="anchor" id="a5253738dc4c892352b078d9a7dec2b20"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aa19248767b1fd9ffdea4ba69b9f00175">SDL_imageFilterMultDivby2</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a9aaa45452b04f51f52826c2104ea3b85">SDL_imageFilterConvolveKernel5x5ShiftRight</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char </td>
+          <td class="paramname"><em>NRightShift</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2374,49 +988,64 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using MultDivby2: D = saturation255(S1/2 * S2) </p>
+<p>Filter using ConvolveKernel5x5ShiftRight: Dij = saturation0and255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >4. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >4. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 5x5. </td></tr>
+    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01038">1038</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05552">5552</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a12272cd24ce7f09bc2c35c609e025983"></a>
+<a class="anchor" id="a363f48e6843fd3f48da53688b89bca48"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a12272cd24ce7f09bc2c35c609e025983">SDL_imageFilterMultDivby2MMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#acc177cf891758fdc4bf7533fb266e339">SDL_imageFilterConvolveKernel7x7Divide</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char </td>
+          <td class="paramname"><em>Divisor</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2427,49 +1056,64 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using MultDivby2: D = saturation255(S1/2 * S2) </p>
+<p>Filter using ConvolveKernel7x7Divide: Dij = saturation0and255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >6. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >6. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 7x7. </td></tr>
+    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00951">951</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04470">4470</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a30e685653eb1050c7d48feaeb8f801a1"></a>
+<a class="anchor" id="a48b40065652dda699875f1425b9227a6"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aa92bea3946c8081c9656304a7d944fae">SDL_imageFilterMultDivby4</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a6dbe52e917c0858bd311e9ce75219587">SDL_imageFilterConvolveKernel7x7ShiftRight</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em> </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char </td>
+          <td class="paramname"><em>NRightShift</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2480,49 +1124,64 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using MultDivby4: D = saturation255(S1/2 * S2/2) </p>
+<p>Filter using ConvolveKernel7x7ShiftRight: Dij = saturation0and255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >6. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >6. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 7x7. </td></tr>
+    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01189">1189</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05853">5853</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a1f8bf77328e934701c7a9e4ef51d9b41"></a>
+<a class="anchor" id="ae1e91ff193beed110a71119ec901f09d"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a1f8bf77328e934701c7a9e4ef51d9b41">SDL_imageFilterMultDivby4MMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#af8a8114acd0509787ae5265990049720">SDL_imageFilterConvolveKernel9x9Divide</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char </td>
+          <td class="paramname"><em>Divisor</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2533,49 +1192,64 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using MultDivby4: D = saturation255(S1/2 * S2/2) </p>
+<p>Filter using ConvolveKernel9x9Divide: Dij = saturation0and255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >8. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >8. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 9x9. </td></tr>
+    <tr><td class="paramname">Divisor</td><td>The divisor of the convolution sum. Must be >0.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01098">1098</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04827">4827</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="ad565921b533977ad2059d58d3c4a3094"></a>
+<a class="anchor" id="a6aaa30dc51d1e51585d02d123b0f1a7a"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#ad565921b533977ad2059d58d3c4a3094">SDL_imageFilterMultMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ad2702d0524a16032118fdf67e3e0f44a">SDL_imageFilterConvolveKernel9x9ShiftRight</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>rows</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>columns</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">signed short * </td>
+          <td class="paramname"><em>Kernel</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char </td>
+          <td class="paramname"><em>NRightShift</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2586,28 +1260,31 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using Mult: D = saturation255(S1 * S2) </p>
+<p>Filter using ConvolveKernel9x9ShiftRight: Dij = saturation255( ... ) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
+    <tr><td class="paramname">Src</td><td>The source 2D byte array to convolve. Should be different from destination. </td></tr>
+    <tr><td class="paramname">Dest</td><td>The destination 2D byte array to store the result in. Should be different from source. </td></tr>
+    <tr><td class="paramname">rows</td><td>Number of rows in source/destination array. Must be >8. </td></tr>
+    <tr><td class="paramname">columns</td><td>Number of columns in source/destination array. Must be >8. </td></tr>
+    <tr><td class="paramname">Kernel</td><td>The 2D convolution kernel of size 9x9. </td></tr>
+    <tr><td class="paramname">NRightShift</td><td>The number of right bit shifts to apply to the convolution sum. Must be <7.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Note: Non-MMX implementation not available for this function.</p>
+<dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00665">665</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06216">6216</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a5f3c9fd40426bb46eba5ac167505dcc5"></a>
+<a class="anchor" id="a0ea22f01c6a4724bac307da3e5355f58"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ac4f3446d0da18746b48606fe37c26385">SDL_imageFilterMultNor</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aeb8ed56aa7de3c8b0d0b2aa9163c3e37">SDL_imageFilterDiv</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -2639,7 +1316,7 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using MultNor: D = S1 * S2. </p>
+<p>Filter using Div: D = S1 / S2. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
@@ -2651,16 +1328,16 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00888">888</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01546">1546</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a346db972dff9c56e3c45c904eaa3c39a"></a>
+<a class="anchor" id="ace072118fef77973210eb04fb4bfc779"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a">SDL_imageFilterMultNorASM</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a69cfa83c5d198c8ae4be4ab86e8d3b8f">SDL_imageFilterMean</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -2681,7 +1358,7 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2692,102 +1369,92 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal ASM Filter using MultNor: D = S1 * S2. </p>
+<p>Filter using Mean: D = S1/2 + S2/2. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
     <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00829">829</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00305">305</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="ab018ace4db884cac953b06b09c00828b"></a>
+<a class="anchor" id="a798ce71024ee1a1d1b174fd60fe79917"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aacb316a18d8cb7999d5d53ee5e7b9750">SDL_imageFilterNormalizeLinear</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a5823f6eb23fe8e74764a94f3d78204ef">SDL_imageFilterMMXdetect</a> </td>
           <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Cmin</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Cmax</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
+          <td class="paramtype">void </td>
+          <td class="paramname"></td><td>)</td>
           <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Nmin</em>, </td>
         </tr>
+      </table>
+</div>
+<div class="memdoc">
+
+<p>MMX detection routine (with override flag). </p>
+<dl class="section return"><dt>Returns:</dt><dd>1 of MMX was detected, 0 otherwise. </dd></dl>
+
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00077">77</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+
+</div>
+</div>
+<a class="anchor" id="a5dff661660755161bb4aaf6199cd1384"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
         <tr>
-          <td class="paramkey"></td>
+          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a403adc470cb1dd34520f18d55804d4ea">SDL_imageFilterMMXoff</a> </td>
+          <td>(</td>
+          <td class="paramtype">void </td>
+          <td class="paramname"></td><td>)</td>
           <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Nmax</em> </td>
         </tr>
+      </table>
+</div>
+<div class="memdoc">
+
+<p>Disable MMX check for filter functions and and force to use non-MMX C based code. </p>
+
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00090">90</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+
+</div>
+</div>
+<a class="anchor" id="a353ee234c3b51b33c4c5c4b30db5832d"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
         <tr>
+          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a848ce7e9551b25fea19fe1fb739f74fb">SDL_imageFilterMMXon</a> </td>
+          <td>(</td>
+          <td class="paramtype">void </td>
+          <td class="paramname"></td><td>)</td>
           <td></td>
-          <td>)</td>
-          <td></td><td></td>
         </tr>
       </table>
 </div>
 <div class="memdoc">
 
-<p>Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">Cmin</td><td>Normalization constant. </td></tr>
-    <tr><td class="paramname">Cmax</td><td>Normalization constant. </td></tr>
-    <tr><td class="paramname">Nmin</td><td>Normalization constant. </td></tr>
-    <tr><td class="paramname">Nmax</td><td>Normalization constant.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
+<p>Enable MMX check for filter functions and use MMX code if available. </p>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04130">4130</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00098">98</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a2e7631c748eb46544e7be40fa64bc232"></a>
+<a class="anchor" id="af4633031d40a9ea0956a2f3c6c87a384"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a2e7631c748eb46544e7be40fa64bc232">SDL_imageFilterNormalizeLinearMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a4657c2a1e1bf55d3241dc737cd618409">SDL_imageFilterMult</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -2796,37 +1463,19 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Cmin</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Cmax</em>, </td>
+          <td class="paramname"><em>Src2</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Nmin</em>, </td>
+          <td class="paramtype">unsigned char * </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int </td>
-          <td class="paramname"><em>Nmax</em> </td>
+          <td class="paramtype">unsigned int </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2837,52 +1486,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin) </p>
+<p>Filter using Mult: D = saturation255(S1 * S2) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">Cmin</td><td>Normalization constant (Cmin). </td></tr>
-    <tr><td class="paramname">Cmax</td><td>Normalization constant (Cmax). </td></tr>
-    <tr><td class="paramname">Nmin</td><td>Normalization constant (Nmin). </td></tr>
-    <tr><td class="paramname">Nmax</td><td>Normalization constant (Nmax).</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03960">3960</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a3147eb5ddd4965d65702f0e533b42974"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a84f360601d5e6e017f0e74a2cf83be6c">SDL_imageFilterRestoreStack</a> </td>
-          <td>(</td>
-          <td class="paramtype">void </td>
-          <td class="paramname"></td><td>)</td>
-          <td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Restore previously aligned stack. </p>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07565">7565</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00726">726</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a98372fea76310903abef7808db10d226"></a>
+<a class="anchor" id="a06f7a19d6e2fc89d7b48cc45d715806d"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a084f9544f049cc01e7b2f1090534abbf">SDL_imageFilterShiftLeft</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#add06bb6ea7847fc13a3041ddceb4ac3c">SDL_imageFilterMultByByte</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -2903,7 +1528,7 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em> </td>
+          <td class="paramname"><em>C</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2914,28 +1539,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter ShiftLeft: D = saturation255(S << N) </p>
+<p>Filter using MultByByte: D = saturation255(S * C) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays. </td></tr>
+    <tr><td class="paramname">C</td><td>Constant to multiply with (C).</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03562">3562</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02787">2787</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a4561a73b249a26babc4c469ffbdae604"></a>
+<a class="anchor" id="a80737f6427c7bdb30d39a92f6524fc14"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ac32f1ea9acbee51c2db94224ef6f7fd2">SDL_imageFilterShiftLeftByte</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aa19248767b1fd9ffdea4ba69b9f00175">SDL_imageFilterMultDivby2</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -2944,19 +1569,19 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramname"><em>Src2</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em>, </td>
+          <td class="paramtype">unsigned char * </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em> </td>
+          <td class="paramtype">unsigned int </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -2967,28 +1592,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using ShiftLeftByte: D = (S << N) </p>
+<p>Filter using MultDivby2: D = saturation255(S1/2 * S2) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03237">3237</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00997">997</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a0d383d58c9a5262dbac636f6ebe26b62"></a>
+<a class="anchor" id="a30e685653eb1050c7d48feaeb8f801a1"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a0d383d58c9a5262dbac636f6ebe26b62">SDL_imageFilterShiftLeftByteMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aa92bea3946c8081c9656304a7d944fae">SDL_imageFilterMultDivby4</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -2997,25 +1622,19 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramname"><em>Src2</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em>, </td>
+          <td class="paramtype">unsigned char * </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Mask</em> </td>
+          <td class="paramtype">unsigned int </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3026,29 +1645,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using ShiftLeftByte: D = (S << N) </p>
+<p>Filter using MultDivby4: D = saturation255(S1/2 * S2/2) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8. </td></tr>
-    <tr><td class="paramname">Mask</td><td>Byte array containing 8 bytes of 0xFE value.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03147">3147</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01138">1138</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a3ea84aa8cf313790dc7468f2f4f29497"></a>
+<a class="anchor" id="a5f3c9fd40426bb46eba5ac167505dcc5"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a3ea84aa8cf313790dc7468f2f4f29497">SDL_imageFilterShiftLeftMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ac4f3446d0da18746b48606fe37c26385">SDL_imageFilterMultNor</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3057,19 +1675,19 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramname"><em>Src2</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramtype">unsigned char * </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em> </td>
+          <td class="paramtype">unsigned int </td>
+          <td class="paramname"><em>length</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3080,28 +1698,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter ShiftLeft: D = saturation255(S << N) </p>
+<p>Filter using MultNor: D = S1 * S2. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03434">3434</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00859">859</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a250e796fb2db470da0a78b74b78114e8"></a>
+<a class="anchor" id="a346db972dff9c56e3c45c904eaa3c39a"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a4fd6d4a9711c13163496587454d9f1a2">SDL_imageFilterShiftLeftUint</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a">SDL_imageFilterMultNorASM</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3110,19 +1728,19 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
+          <td class="paramname"><em>Src2</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>length</em>, </td>
+          <td class="paramtype">unsigned char * </td>
+          <td class="paramname"><em>Dest</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em> </td>
+          <td class="paramtype">unsigned int </td>
+          <td class="paramname"><em>SrcLength</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3133,31 +1751,31 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using ShiftLeftUint: D = ((uint)S << N) </p>
+<p>Internal ASM Filter using MultNor: D = S1 * S2. </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 32.</td></tr>
+    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03364">3364</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00789">789</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a4a4260369d38e7bbcd9e3690bf57b8d4"></a>
+<a class="anchor" id="ab018ace4db884cac953b06b09c00828b"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a4a4260369d38e7bbcd9e3690bf57b8d4">SDL_imageFilterShiftLeftUintMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#aacb316a18d8cb7999d5d53ee5e7b9750">SDL_imageFilterNormalizeLinear</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
+          <td class="paramname"><em>Src</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
@@ -3169,13 +1787,31 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramname"><em>length</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em> </td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>Cmin</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>Cmax</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>Nmin</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int </td>
+          <td class="paramname"><em>Nmax</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3186,28 +1822,52 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using ShiftLeftUint: D = ((uint)S << N) </p>
+<p>Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
+    <tr><td class="paramname">Src</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 32.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
+    <tr><td class="paramname">Cmin</td><td>Normalization constant. </td></tr>
+    <tr><td class="paramname">Cmax</td><td>Normalization constant. </td></tr>
+    <tr><td class="paramname">Nmin</td><td>Normalization constant. </td></tr>
+    <tr><td class="paramname">Nmax</td><td>Normalization constant.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03303">3303</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03906">3906</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a68851aed2dcc5dfd2f3b258236f3b88c"></a>
+<a class="anchor" id="a3147eb5ddd4965d65702f0e533b42974"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a931f1232cd03acd2ba90af222625f4ca">SDL_imageFilterShiftRight</a> </td>
+          <td class="memname">void <a class="el" href="_s_d_l__image_filter_8h.html#a84f360601d5e6e017f0e74a2cf83be6c">SDL_imageFilterRestoreStack</a> </td>
+          <td>(</td>
+          <td class="paramtype">void </td>
+          <td class="paramname"></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div>
+<div class="memdoc">
+
+<p>Restore previously aligned stack. </p>
+
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07351">7351</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+
+</div>
+</div>
+<a class="anchor" id="a98372fea76310903abef7808db10d226"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a084f9544f049cc01e7b2f1090534abbf">SDL_imageFilterShiftLeft</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3239,10 +1899,10 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using ShiftRight: D = saturation0(S >> N) </p>
+<p>Filter ShiftLeft: D = saturation255(S << N) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S1). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
     <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
     <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8.</td></tr>
@@ -3251,16 +1911,16 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02564">2564</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03390">3390</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a0713d6c267fba9756d6beae81e89f9e4"></a>
+<a class="anchor" id="a4561a73b249a26babc4c469ffbdae604"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a40e1e21ede9a7ed1eddac2cdbfd0b079">SDL_imageFilterShiftRightAndMultByByte</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#ac32f1ea9acbee51c2db94224ef6f7fd2">SDL_imageFilterShiftLeftByte</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3281,13 +1941,7 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>C</em> </td>
+          <td class="paramname"><em>N</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3298,29 +1952,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C) </p>
+<p>Filter using ShiftLeftByte: D = (S << N) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to multiply with (C).</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source arrays. </td></tr>
+    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03074">3074</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03090">3090</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a80d18182b54de0ec1f8d9a79dc5b879a"></a>
+<a class="anchor" id="a250e796fb2db470da0a78b74b78114e8"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a80d18182b54de0ec1f8d9a79dc5b879a">SDL_imageFilterShiftRightAndMultByByteMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a4fd6d4a9711c13163496587454d9f1a2">SDL_imageFilterShiftLeftUint</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3335,19 +1988,13 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em>, </td>
+          <td class="paramname"><em>length</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>C</em> </td>
+          <td class="paramname"><em>N</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3358,29 +2005,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using ShiftRightAndMultByByteMMX: D = saturation255((S >> N) * C) </p>
+<p>Filter using ShiftLeftUint: D = ((uint)S << N) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to multiply with (C).</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
+    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 32.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02970">2970</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03207">3207</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a696568e00b153011f0673bdf1297e9fa"></a>
+<a class="anchor" id="a68851aed2dcc5dfd2f3b258236f3b88c"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a696568e00b153011f0673bdf1297e9fa">SDL_imageFilterShiftRightMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a931f1232cd03acd2ba90af222625f4ca">SDL_imageFilterShiftRight</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3395,19 +2041,13 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramname"><em>length</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Mask</em> </td>
+          <td class="paramname"><em>N</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3418,29 +2058,28 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using ShiftRight: D = saturation0(S >> N) </p>
+<p>Filter using ShiftRight: D = saturation0(S >> N) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8. </td></tr>
-    <tr><td class="paramname">Mask</td><td>Byte array containing 8 bytes with 0x7F value.</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
+    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02471">2471</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02473">2473</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a540d4625d76bcd03318c2a59ce650fdb"></a>
+<a class="anchor" id="a0713d6c267fba9756d6beae81e89f9e4"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a4ccddf5c575cc4d6074c9a54789240a6">SDL_imageFilterShiftRightUint</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a40e1e21ede9a7ed1eddac2cdbfd0b079">SDL_imageFilterShiftRightAndMultByByte</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3461,7 +2100,13 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned char </td>
-          <td class="paramname"><em>N</em> </td>
+          <td class="paramname"><em>N</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char </td>
+          <td class="paramname"><em>C</em> </td>
         </tr>
         <tr>
           <td></td>
@@ -3472,28 +2117,29 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N) </p>
+<p>Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S1). </td></tr>
+    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
     <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 32.</td></tr>
+    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 8. </td></tr>
+    <tr><td class="paramname">C</td><td>Constant to multiply with (C).</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02692">2692</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02940">2940</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
-<a class="anchor" id="a23430360ee5ce8031158831a44e83d56"></a>
+<a class="anchor" id="a540d4625d76bcd03318c2a59ce650fdb"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a23430360ee5ce8031158831a44e83d56">SDL_imageFilterShiftRightUintMMX</a> </td>
+          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8h.html#a4ccddf5c575cc4d6074c9a54789240a6">SDL_imageFilterShiftRightUint</a> </td>
           <td>(</td>
           <td class="paramtype">unsigned char * </td>
           <td class="paramname"><em>Src1</em>, </td>
@@ -3508,7 +2154,7 @@ Functions</h2></td></tr>
           <td class="paramkey"></td>
           <td></td>
           <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
+          <td class="paramname"><em>length</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
@@ -3525,19 +2171,19 @@ Functions</h2></td></tr>
 </div>
 <div class="memdoc">
 
-<p>Internal MMX Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N) </p>
+<p>Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N) </p>
 <dl class="params"><dt><b>Parameters:</b></dt><dd>
   <table class="params">
     <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S1). </td></tr>
     <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N).</td></tr>
+    <tr><td class="paramname">length</td><td>The number of bytes in the source array. </td></tr>
+    <tr><td class="paramname">N</td><td>Number of bit-positions to shift (N). Valid range is 0 to 32.</td></tr>
   </table>
   </dd>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02631">2631</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02591">2591</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -3591,7 +2237,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07012">7012</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06796">6796</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -3652,7 +2298,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07264">7264</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07049">7049</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -3705,7 +2351,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00478">478</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00419">419</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -3758,7 +2404,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02256">2256</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02193">2193</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -3811,60 +2457,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02177">2177</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="a45d54d410e677d32ef33ef6226e9ea12"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#a45d54d410e677d32ef33ef6226e9ea12">SDL_imageFilterSubMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src2</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using Sub: D = saturation0(S1 - S2) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the first source byte array (S1). </td></tr>
-    <tr><td class="paramname">Src2</td><td>Pointer to the start of the second source byte array (S2). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source arrays.</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00415">415</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02127">2127</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -3917,67 +2510,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02397">2397</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
-
-</div>
-</div>
-<a class="anchor" id="acfb143905b751680650576e75847f9c1"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="_s_d_l__image_filter_8c.html#acfb143905b751680650576e75847f9c1">SDL_imageFilterSubUintMMX</a> </td>
-          <td>(</td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Src1</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned char * </td>
-          <td class="paramname"><em>Dest</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>SrcLength</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>C</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype">unsigned int </td>
-          <td class="paramname"><em>D</em> </td>
-        </tr>
-        <tr>
-          <td></td>
-          <td>)</td>
-          <td></td><td></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Internal MMX Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C) </p>
-<dl class="params"><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">Src1</td><td>Pointer to the start of the source byte array (S). </td></tr>
-    <tr><td class="paramname">Dest</td><td>Pointer to the start of the destination byte array (D). </td></tr>
-    <tr><td class="paramname">SrcLength</td><td>The number of bytes in the source array. </td></tr>
-    <tr><td class="paramname">C</td><td>Constant to subtract (C). </td></tr>
-    <tr><td class="paramname">D</td><td>Byteorder-swapped constant to subtract (Cs).</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
-
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02323">2323</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02322">2322</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
diff --git a/Docs/html/_s_d_l__image_filter_8c_source.html b/Docs/html/_s_d_l__image_filter_8c_source.html
index 824294b..b5847fe 100644
--- a/Docs/html/_s_d_l__image_filter_8c_source.html
+++ b/Docs/html/_s_d_l__image_filter_8c_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.c Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_imageFilter.c Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.c</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_imageFilter.c</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__image_filter_8c.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span>
@@ -60,6863 +60,6654 @@
 <a name="l00003"></a>00003 <span class="comment">SDL_imageFilter.c: byte-image "filter" routines</span>
 <a name="l00004"></a>00004 <span class="comment"></span>
 <a name="l00005"></a>00005 <span class="comment">Copyright (C) 2001-2012  Andreas Schiffler</span>
-<a name="l00006"></a>00006 <span class="comment"></span>
-<a name="l00007"></a>00007 <span class="comment">This software is provided 'as-is', without any express or implied</span>
-<a name="l00008"></a>00008 <span class="comment">warranty. In no event will the authors be held liable for any damages</span>
-<a name="l00009"></a>00009 <span class="comment">arising from the use of this software.</span>
-<a name="l00010"></a>00010 <span class="comment"></span>
-<a name="l00011"></a>00011 <span class="comment">Permission is granted to anyone to use this software for any purpose,</span>
-<a name="l00012"></a>00012 <span class="comment">including commercial applications, and to alter it and redistribute it</span>
-<a name="l00013"></a>00013 <span class="comment">freely, subject to the following restrictions:</span>
-<a name="l00014"></a>00014 <span class="comment"></span>
-<a name="l00015"></a>00015 <span class="comment">   1. The origin of this software must not be misrepresented; you must not</span>
-<a name="l00016"></a>00016 <span class="comment">   claim that you wrote the original software. If you use this software</span>
-<a name="l00017"></a>00017 <span class="comment">   in a product, an acknowledgment in the product documentation would be</span>
-<a name="l00018"></a>00018 <span class="comment">   appreciated but is not required.</span>
-<a name="l00019"></a>00019 <span class="comment"></span>
-<a name="l00020"></a>00020 <span class="comment">   2. Altered source versions must be plainly marked as such, and must not be</span>
-<a name="l00021"></a>00021 <span class="comment">   misrepresented as being the original software.</span>
-<a name="l00022"></a>00022 <span class="comment"></span>
-<a name="l00023"></a>00023 <span class="comment">   3. This notice may not be removed or altered from any source</span>
-<a name="l00024"></a>00024 <span class="comment">   distribution.</span>
-<a name="l00025"></a>00025 <span class="comment"></span>
-<a name="l00026"></a>00026 <span class="comment">Andreas Schiffler -- aschiffler at ferzkopp dot net</span>
-<a name="l00027"></a>00027 <span class="comment"></span>
-<a name="l00028"></a>00028 <span class="comment">*/</span>
-<a name="l00029"></a>00029 
-<a name="l00030"></a>00030 <span class="comment">/*</span>
-<a name="l00031"></a>00031 <span class="comment"></span>
-<a name="l00032"></a>00032 <span class="comment">Note: Uses inline x86 MMX or ASM optimizations if available and enabled.</span>
-<a name="l00033"></a>00033 <span class="comment"></span>
-<a name="l00034"></a>00034 <span class="comment">Note: Most of the MMX code is based on published routines </span>
-<a name="l00035"></a>00035 <span class="comment">by Vladimir Kravtchenko at vk at cs.ubc.ca - credits go to </span>
-<a name="l00036"></a>00036 <span class="comment">him for his work.</span>
-<a name="l00037"></a>00037 <span class="comment"></span>
-<a name="l00038"></a>00038 <span class="comment">*/</span>
-<a name="l00039"></a>00039 
-<a name="l00040"></a>00040 <span class="preprocessor">#include <stdio.h></span>
-<a name="l00041"></a>00041 <span class="preprocessor">#include <stdlib.h></span>
-<a name="l00042"></a>00042 <span class="preprocessor">#include <string.h></span>
-<a name="l00043"></a>00043 
-<a name="l00044"></a>00044 <span class="preprocessor">#include "<a class="code" href="_s_d_l__image_filter_8h.html">SDL_imageFilter.h</a>"</span>
-<a name="l00045"></a>00045 
-<a name="l00049"></a><a class="code" href="_s_d_l__image_filter_8c.html#a700fb30611761c46a674a45cc28ff561">00049</a> <span class="preprocessor">#define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8)  | (((x) & 0x0000ff00) << 8)  | ((x) << 24))</span>
-<a name="l00050"></a>00050 <span class="preprocessor"></span>
-<a name="l00051"></a>00051 <span class="comment">/* ------ Static variables ----- */</span>
-<a name="l00052"></a>00052 
-<a name="l00056"></a>00056 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterUseMMX = 1;
-<a name="l00057"></a>00057 
-<a name="l00058"></a>00058 <span class="comment">/* Detect GCC */</span>
-<a name="l00059"></a>00059 <span class="preprocessor">#if defined(__GNUC__)</span>
-<a name="l00060"></a>00060 <span class="preprocessor"></span><span class="preprocessor">#define GCC__</span>
-<a name="l00061"></a>00061 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l00062"></a>00062 <span class="preprocessor"></span>
-<a name="l00068"></a><a class="code" href="_s_d_l__image_filter_8c.html#ade15666303ddc71c543f44cf1536d00e">00068</a> <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ade15666303ddc71c543f44cf1536d00e" title="Internal function returning the CPU flags.">_cpuFlags</a>()
-<a name="l00069"></a>00069 {
-<a name="l00070"></a>00070         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> flags = 0;
-<a name="l00071"></a>00071 
-<a name="l00072"></a>00072 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00073"></a>00073 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00074"></a>00074 <span class="preprocessor"></span>        __asm
-<a name="l00075"></a>00075         {
-<a name="l00076"></a>00076                 pusha
-<a name="l00077"></a>00077                         mov eax, 1
-<a name="l00078"></a>00078                         cpuid   <span class="comment">/* get CPU ID flag */</span>
-<a name="l00079"></a>00079                         mov flags,edx   <span class="comment">/* move result to mmx_bit */</span>
-<a name="l00080"></a>00080                         popa
-<a name="l00081"></a>00081         }
-<a name="l00082"></a>00082 <span class="preprocessor">#else</span>
-<a name="l00083"></a>00083 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span> (<span class="stringliteral">"pusha                 \n\t"</span> <span class="stringliteral">"mov    %1, %%eax     \n\t"</span>  <span class="comment">/* request feature flag */</span>
-<a name="l00084"></a>00084                 <span class="stringliteral">"cpuid                \n\t"</span>     <span class="comment">/* get CPU ID flag */</span>
-<a name="l00085"></a>00085                 <span class="stringliteral">"mov    %%edx, %0     \n\t"</span>     <span class="comment">/* move result to mmx_bit */</span>
-<a name="l00086"></a>00086                 <span class="stringliteral">"popa                \n\t"</span>:<span class="stringliteral">"=m"</span> (flags) <span class="comment">/* %0 */</span>
-<a name="l00087"></a>00087                 :<span class="stringliteral">"i"</span>(0x00000001)        <span class="comment">/* %1 */</span>
-<a name="l00088"></a>00088                 );
-<a name="l00089"></a>00089 <span class="preprocessor">#endif</span>
-<a name="l00090"></a>00090 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l00091"></a>00091 <span class="preprocessor"></span>
-<a name="l00092"></a>00092         <span class="keywordflow">return</span> (flags);
+<a name="l00006"></a>00006 <span class="comment">Copyright (C) 2013  Sylvain Beucler</span>
+<a name="l00007"></a>00007 <span class="comment"></span>
+<a name="l00008"></a>00008 <span class="comment">This software is provided 'as-is', without any express or implied</span>
+<a name="l00009"></a>00009 <span class="comment">warranty. In no event will the authors be held liable for any damages</span>
+<a name="l00010"></a>00010 <span class="comment">arising from the use of this software.</span>
+<a name="l00011"></a>00011 <span class="comment"></span>
+<a name="l00012"></a>00012 <span class="comment">Permission is granted to anyone to use this software for any purpose,</span>
+<a name="l00013"></a>00013 <span class="comment">including commercial applications, and to alter it and redistribute it</span>
+<a name="l00014"></a>00014 <span class="comment">freely, subject to the following restrictions:</span>
+<a name="l00015"></a>00015 <span class="comment"></span>
+<a name="l00016"></a>00016 <span class="comment">   1. The origin of this software must not be misrepresented; you must not</span>
+<a name="l00017"></a>00017 <span class="comment">   claim that you wrote the original software. If you use this software</span>
+<a name="l00018"></a>00018 <span class="comment">   in a product, an acknowledgment in the product documentation would be</span>
+<a name="l00019"></a>00019 <span class="comment">   appreciated but is not required.</span>
+<a name="l00020"></a>00020 <span class="comment"></span>
+<a name="l00021"></a>00021 <span class="comment">   2. Altered source versions must be plainly marked as such, and must not be</span>
+<a name="l00022"></a>00022 <span class="comment">   misrepresented as being the original software.</span>
+<a name="l00023"></a>00023 <span class="comment"></span>
+<a name="l00024"></a>00024 <span class="comment">   3. This notice may not be removed or altered from any source</span>
+<a name="l00025"></a>00025 <span class="comment">   distribution.</span>
+<a name="l00026"></a>00026 <span class="comment"></span>
+<a name="l00027"></a>00027 <span class="comment">Andreas Schiffler -- aschiffler at ferzkopp dot net</span>
+<a name="l00028"></a>00028 <span class="comment"></span>
+<a name="l00029"></a>00029 <span class="comment">*/</span>
+<a name="l00030"></a>00030 
+<a name="l00031"></a>00031 <span class="comment">/*</span>
+<a name="l00032"></a>00032 <span class="comment"></span>
+<a name="l00033"></a>00033 <span class="comment">Note: Uses inline x86 MMX or ASM optimizations if available and enabled.</span>
+<a name="l00034"></a>00034 <span class="comment"></span>
+<a name="l00035"></a>00035 <span class="comment">Note: Most of the MMX code is based on published routines </span>
+<a name="l00036"></a>00036 <span class="comment">by Vladimir Kravtchenko at vk at cs.ubc.ca - credits go to </span>
+<a name="l00037"></a>00037 <span class="comment">him for his work.</span>
+<a name="l00038"></a>00038 <span class="comment"></span>
+<a name="l00039"></a>00039 <span class="comment">*/</span>
+<a name="l00040"></a>00040 
+<a name="l00041"></a>00041 <span class="preprocessor">#include <stdio.h></span>
+<a name="l00042"></a>00042 <span class="preprocessor">#include <stdlib.h></span>
+<a name="l00043"></a>00043 <span class="preprocessor">#include <string.h></span>
+<a name="l00044"></a>00044 
+<a name="l00045"></a>00045 <span class="comment">/* Use GCC intrinsics if available: they support both i386 and x86_64,</span>
+<a name="l00046"></a>00046 <span class="comment">   provide ASM-grade performances, and lift the PUSHA/POPA issues. */</span>
+<a name="l00047"></a>00047 <span class="preprocessor">#ifdef __GNUC__</span>
+<a name="l00048"></a>00048 <span class="preprocessor"></span><span class="preprocessor">#  ifdef USE_MMX</span>
+<a name="l00049"></a>00049 <span class="preprocessor"></span><span class="preprocessor">#    include <mmintrin.h></span>
+<a name="l00050"></a>00050 <span class="preprocessor">#  endif</span>
+<a name="l00051"></a>00051 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l00052"></a>00052 <span class="preprocessor"></span><span class="preprocessor">#include <SDL_cpuinfo.h></span>
+<a name="l00053"></a>00053 <span class="preprocessor">#include "<a class="code" href="_s_d_l__image_filter_8h.html">SDL_imageFilter.h</a>"</span>
+<a name="l00054"></a>00054 
+<a name="l00058"></a><a class="code" href="_s_d_l__image_filter_8c.html#a700fb30611761c46a674a45cc28ff561">00058</a> <span class="preprocessor">#define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8)  | (((x) & 0x0000ff00) << 8)  | ((x) << 24))</span>
+<a name="l00059"></a>00059 <span class="preprocessor"></span>
+<a name="l00060"></a>00060 <span class="comment">/* ------ Static variables ----- */</span>
+<a name="l00061"></a>00061 
+<a name="l00065"></a>00065 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterUseMMX = 1;
+<a name="l00066"></a>00066 
+<a name="l00067"></a>00067 <span class="comment">/* Detect GCC */</span>
+<a name="l00068"></a>00068 <span class="preprocessor">#if defined(__GNUC__)</span>
+<a name="l00069"></a>00069 <span class="preprocessor"></span><span class="preprocessor">#define GCC__</span>
+<a name="l00070"></a>00070 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l00071"></a>00071 <span class="preprocessor"></span>
+<a name="l00077"></a><a class="code" href="_s_d_l__image_filter_8h.html#a5823f6eb23fe8e74764a94f3d78204ef">00077</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>(<span class="keywordtype">void</span>)
+<a name="l00078"></a>00078 {
+<a name="l00079"></a>00079         <span class="comment">/* Check override flag */</span>
+<a name="l00080"></a>00080         <span class="keywordflow">if</span> (SDL_imageFilterUseMMX == 0) {
+<a name="l00081"></a>00081                 <span class="keywordflow">return</span> (0);
+<a name="l00082"></a>00082         }
+<a name="l00083"></a>00083 
+<a name="l00084"></a>00084         <span class="keywordflow">return</span> SDL_HasMMX();
+<a name="l00085"></a>00085 }
+<a name="l00086"></a>00086 
+<a name="l00090"></a><a class="code" href="_s_d_l__image_filter_8h.html#a403adc470cb1dd34520f18d55804d4ea">00090</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5dff661660755161bb4aaf6199cd1384" title="Disable MMX check for filter functions and and force to use non-MMX C based code.">SDL_imageFilterMMXoff</a>()
+<a name="l00091"></a>00091 {
+<a name="l00092"></a>00092         SDL_imageFilterUseMMX = 0;
 <a name="l00093"></a>00093 }
 <a name="l00094"></a>00094 
-<a name="l00100"></a><a class="code" href="_s_d_l__image_filter_8h.html#a5823f6eb23fe8e74764a94f3d78204ef">00100</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>(<span class="keywordtype">void</span>)
-<a name="l00101"></a>00101 {
-<a name="l00102"></a>00102         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> mmx_bit;
-<a name="l00103"></a>00103 
-<a name="l00104"></a>00104         <span class="comment">/* Check override flag */</span>
-<a name="l00105"></a>00105         <span class="keywordflow">if</span> (SDL_imageFilterUseMMX == 0) {
-<a name="l00106"></a>00106                 <span class="keywordflow">return</span> (0);
-<a name="l00107"></a>00107         }
-<a name="l00108"></a>00108 
-<a name="l00109"></a>00109         mmx_bit = <a class="code" href="_s_d_l__image_filter_8c.html#ade15666303ddc71c543f44cf1536d00e" title="Internal function returning the CPU flags.">_cpuFlags</a>();
-<a name="l00110"></a>00110         mmx_bit &= 0x00800000;
-<a name="l00111"></a>00111         mmx_bit = (mmx_bit && 0x00800000);
-<a name="l00112"></a>00112 
-<a name="l00113"></a>00113         <span class="keywordflow">return</span> (<span class="keywordtype">int</span>)(mmx_bit);
-<a name="l00114"></a>00114 }
-<a name="l00115"></a>00115 
-<a name="l00119"></a><a class="code" href="_s_d_l__image_filter_8h.html#a403adc470cb1dd34520f18d55804d4ea">00119</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5dff661660755161bb4aaf6199cd1384" title="Disable MMX check for filter functions and and force to use non-MMX C based code.">SDL_imageFilterMMXoff</a>()
-<a name="l00120"></a>00120 {
-<a name="l00121"></a>00121         SDL_imageFilterUseMMX = 0;
-<a name="l00122"></a>00122 }
-<a name="l00123"></a>00123 
-<a name="l00127"></a><a class="code" href="_s_d_l__image_filter_8h.html#a848ce7e9551b25fea19fe1fb739f74fb">00127</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#a353ee234c3b51b33c4c5c4b30db5832d" title="Enable MMX check for filter functions and use MMX code if available.">SDL_imageFilterMMXon</a>()
-<a name="l00128"></a>00128 {
-<a name="l00129"></a>00129         SDL_imageFilterUseMMX = 1;
-<a name="l00130"></a>00130 }
-<a name="l00131"></a>00131 
-<a name="l00132"></a>00132 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
-<a name="l00133"></a>00133 
-<a name="l00144"></a><a class="code" href="_s_d_l__image_filter_8c.html#ace0bf40de8d58bbd8d6ff9c3fc04ec6e">00144</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ace0bf40de8d58bbd8d6ff9c3fc04ec6e" title="Internal MMX Filter using Add: D = saturation255(S1 + S2)">SDL_imageFilterAddMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char< [...]
-<a name="l00145"></a>00145 {
-<a name="l00146"></a>00146 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00147"></a>00147 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00148"></a>00148 <span class="preprocessor"></span>        __asm
-<a name="l00149"></a>00149         {
-<a name="l00150"></a>00150                 pusha
-<a name="l00151"></a>00151                         mov eax, Src1   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00152"></a>00152                         mov ebx, Src2   <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00153"></a>00153                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
-<a name="l00154"></a>00154                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00155"></a>00155                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00156"></a>00156                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00157"></a>00157 L1010:
-<a name="l00158"></a>00158                 movq mm1, [eax] <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00159"></a>00159                 paddusb mm1, [ebx]      <span class="comment">/* mm1=Src1+Src2 (add 8 bytes with saturation) */</span>
-<a name="l00160"></a>00160                 movq [edi], mm1 <span class="comment">/* store result in Dest */</span>
-<a name="l00161"></a>00161                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00162"></a>00162                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
-<a name="l00163"></a>00163                         add edi, 8
-<a name="l00164"></a>00164                         dec ecx <span class="comment">/* decrease loop counter */</span>
-<a name="l00165"></a>00165                         jnz L1010       <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00166"></a>00166                         emms <span class="comment">/* exit MMX state */</span>
-<a name="l00167"></a>00167                         popa
-<a name="l00168"></a>00168         }
-<a name="l00169"></a>00169 <span class="preprocessor">#else</span>
-<a name="l00170"></a>00170 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l00171"></a>00171                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov          %2, %%eax \n\t"</span>        <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00172"></a>00172                 <span class="stringliteral">"mov          %1, %%ebx \n\t"</span>   <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00173"></a>00173                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l00174"></a>00174                 <span class="stringliteral">"mov          %3, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00175"></a>00175                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00176"></a>00176                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00177"></a>00177                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>           <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00178"></a>00178                 <span class="stringliteral">"paddusb (%%ebx), %%mm1 \n\t"</span>   <span class="comment">/* mm1=Src1+Src2 (add 8 bytes with saturation) */</span>
-<a name="l00179"></a>00179                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l00180"></a>00180                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00181"></a>00181                 <span class="stringliteral">"add          $8, %%ebx \n\t"</span>   <span class="comment">/* register pointers by 8 */</span>
-<a name="l00182"></a>00182                 <span class="stringliteral">"add          $8, %%edi \n\t"</span> <span class="stringliteral">"dec              %%ecx \n\t"</span>     <span class="comment">/* decrease loop counter */</span>
-<a name="l00183"></a>00183                 <span class="stringliteral">"jnz             1b     \n\t"</span>     <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00184"></a>00184                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l00185"></a>00185                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l00186"></a>00186                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l00187"></a>00187                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l00188"></a>00188                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l00189"></a>00189                 );
-<a name="l00190"></a>00190 <span class="preprocessor">#endif</span>
-<a name="l00191"></a>00191 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l00192"></a>00192 <span class="preprocessor">#else</span>
-<a name="l00193"></a>00193 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l00194"></a>00194 <span class="preprocessor">#endif</span>
-<a name="l00195"></a>00195 <span class="preprocessor"></span>}
-<a name="l00196"></a>00196 
-<a name="l00207"></a><a class="code" href="_s_d_l__image_filter_8h.html#a9034268e2f51550d8f1d6084bda45194">00207</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a9f06507eb0b63198dbd67495d61c9b20" title="Filter using Add: D = saturation255(S1 + S2)">SDL_imageFilterAdd</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <s [...]
-<a name="l00208"></a>00208 {
-<a name="l00209"></a>00209         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l00210"></a>00210         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l00211"></a>00211         <span class="keywordtype">int</span> result;
-<a name="l00212"></a>00212 
-<a name="l00213"></a>00213         <span class="comment">/* Validate input parameters */</span>
-<a name="l00214"></a>00214         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l00215"></a>00215                 <span class="keywordflow">return</span>(-1);
-<a name="l00216"></a>00216         <span class="keywordflow">if</span> (length == 0)
-<a name="l00217"></a>00217                 <span class="keywordflow">return</span>(0);
-<a name="l00218"></a>00218 
-<a name="l00219"></a>00219         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l00098"></a><a class="code" href="_s_d_l__image_filter_8h.html#a848ce7e9551b25fea19fe1fb739f74fb">00098</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#a353ee234c3b51b33c4c5c4b30db5832d" title="Enable MMX check for filter functions and use MMX code if available.">SDL_imageFilterMMXon</a>()
+<a name="l00099"></a>00099 {
+<a name="l00100"></a>00100         SDL_imageFilterUseMMX = 1;
+<a name="l00101"></a>00101 }
+<a name="l00102"></a>00102 
+<a name="l00103"></a>00103 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
+<a name="l00104"></a>00104 
+<a name="l00115"></a>00115 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterAddMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l00116"></a>00116 {
+<a name="l00117"></a>00117 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l00118"></a>00118 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l00119"></a>00119 <span class="preprocessor"></span>        __asm
+<a name="l00120"></a>00120         {
+<a name="l00121"></a>00121                 pusha
+<a name="l00122"></a>00122                         mov eax, Src1   <span class="comment">/* load Src1 address into eax */</span>
+<a name="l00123"></a>00123                         mov ebx, Src2   <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l00124"></a>00124                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
+<a name="l00125"></a>00125                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00126"></a>00126                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l00127"></a>00127                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00128"></a>00128 L1010:
+<a name="l00129"></a>00129                 movq mm1, [eax] <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l00130"></a>00130                 paddusb mm1, [ebx]      <span class="comment">/* mm1=Src1+Src2 (add 8 bytes with saturation) */</span>
+<a name="l00131"></a>00131                 movq [edi], mm1 <span class="comment">/* store result in Dest */</span>
+<a name="l00132"></a>00132                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l00133"></a>00133                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
+<a name="l00134"></a>00134                         add edi, 8
+<a name="l00135"></a>00135                         dec ecx <span class="comment">/* decrease loop counter */</span>
+<a name="l00136"></a>00136                         jnz L1010       <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00137"></a>00137                         emms <span class="comment">/* exit MMX state */</span>
+<a name="l00138"></a>00138                         popa
+<a name="l00139"></a>00139         }
+<a name="l00140"></a>00140 <span class="preprocessor">#else</span>
+<a name="l00141"></a>00141 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l00142"></a>00142         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l00143"></a>00143         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l00144"></a>00144         __m64 *mDest = (__m64*)Dest;
+<a name="l00145"></a>00145         <span class="keywordtype">int</span> i;
+<a name="l00146"></a>00146         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l00147"></a>00147                 *mDest = _m_paddusb(*mSrc1, *mSrc2);    <span class="comment">/* Src1+Src2 (add 8 bytes with saturation) */</span>
+<a name="l00148"></a>00148                 mSrc1++;
+<a name="l00149"></a>00149                 mSrc2++;
+<a name="l00150"></a>00150                 mDest++;
+<a name="l00151"></a>00151         }
+<a name="l00152"></a>00152         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l00153"></a>00153 <span class="preprocessor">#endif</span>
+<a name="l00154"></a>00154 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l00155"></a>00155 <span class="preprocessor">#else</span>
+<a name="l00156"></a>00156 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l00157"></a>00157 <span class="preprocessor">#endif</span>
+<a name="l00158"></a>00158 <span class="preprocessor"></span>}
+<a name="l00159"></a>00159 
+<a name="l00170"></a><a class="code" href="_s_d_l__image_filter_8h.html#a9034268e2f51550d8f1d6084bda45194">00170</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a9f06507eb0b63198dbd67495d61c9b20" title="Filter using Add: D = saturation255(S1 + S2)">SDL_imageFilterAdd</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <s [...]
+<a name="l00171"></a>00171 {
+<a name="l00172"></a>00172         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l00173"></a>00173         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l00174"></a>00174         <span class="keywordtype">int</span> result;
+<a name="l00175"></a>00175 
+<a name="l00176"></a>00176         <span class="comment">/* Validate input parameters */</span>
+<a name="l00177"></a>00177         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l00178"></a>00178                 <span class="keywordflow">return</span>(-1);
+<a name="l00179"></a>00179         <span class="keywordflow">if</span> (length == 0)
+<a name="l00180"></a>00180                 <span class="keywordflow">return</span>(0);
+<a name="l00181"></a>00181 
+<a name="l00182"></a>00182         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l00183"></a>00183 
+<a name="l00184"></a>00184                 <span class="comment">/* Use MMX assembly routine */</span>
+<a name="l00185"></a>00185                 SDL_imageFilterAddMMX(Src1, Src2, Dest, length);
+<a name="l00186"></a>00186 
+<a name="l00187"></a>00187                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l00188"></a>00188                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l00189"></a>00189                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l00190"></a>00190                         istart = length & 0xfffffff8;
+<a name="l00191"></a>00191                         cursrc1 = &Src1[istart];
+<a name="l00192"></a>00192                         cursrc2 = &Src2[istart];
+<a name="l00193"></a>00193                         curdst = &Dest[istart];
+<a name="l00194"></a>00194                 } <span class="keywordflow">else</span> {
+<a name="l00195"></a>00195                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l00196"></a>00196                         <span class="keywordflow">return</span> (0);
+<a name="l00197"></a>00197                 }
+<a name="l00198"></a>00198         } <span class="keywordflow">else</span> {
+<a name="l00199"></a>00199                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l00200"></a>00200                 istart = 0;
+<a name="l00201"></a>00201                 cursrc1 = Src1;
+<a name="l00202"></a>00202                 cursrc2 = Src2;
+<a name="l00203"></a>00203                 curdst = Dest;
+<a name="l00204"></a>00204         }
+<a name="l00205"></a>00205 
+<a name="l00206"></a>00206         <span class="comment">/* C routine to process image */</span>
+<a name="l00207"></a>00207         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l00208"></a>00208                 result = (int) *cursrc1 + (<span class="keywordtype">int</span>) *cursrc2;
+<a name="l00209"></a>00209                 <span class="keywordflow">if</span> (result > 255)
+<a name="l00210"></a>00210                         result = 255;
+<a name="l00211"></a>00211                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l00212"></a>00212                 <span class="comment">/* Advance pointers */</span>
+<a name="l00213"></a>00213                 cursrc1++;
+<a name="l00214"></a>00214                 cursrc2++;
+<a name="l00215"></a>00215                 curdst++;
+<a name="l00216"></a>00216         }
+<a name="l00217"></a>00217 
+<a name="l00218"></a>00218         <span class="keywordflow">return</span> (0);
+<a name="l00219"></a>00219 }
 <a name="l00220"></a>00220 
-<a name="l00221"></a>00221                 <span class="comment">/* Use MMX assembly routine */</span>
-<a name="l00222"></a>00222                 <a class="code" href="_s_d_l__image_filter_8c.html#ace0bf40de8d58bbd8d6ff9c3fc04ec6e" title="Internal MMX Filter using Add: D = saturation255(S1 + S2)">SDL_imageFilterAddMMX</a>(Src1, Src2, Dest, length);
-<a name="l00223"></a>00223 
-<a name="l00224"></a>00224                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l00225"></a>00225                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l00226"></a>00226                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l00227"></a>00227                         istart = length & 0xfffffff8;
-<a name="l00228"></a>00228                         cursrc1 = &Src1[istart];
-<a name="l00229"></a>00229                         cursrc2 = &Src2[istart];
-<a name="l00230"></a>00230                         curdst = &Dest[istart];
-<a name="l00231"></a>00231                 } <span class="keywordflow">else</span> {
-<a name="l00232"></a>00232                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l00233"></a>00233                         <span class="keywordflow">return</span> (0);
-<a name="l00234"></a>00234                 }
-<a name="l00235"></a>00235         } <span class="keywordflow">else</span> {
-<a name="l00236"></a>00236                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l00237"></a>00237                 istart = 0;
-<a name="l00238"></a>00238                 cursrc1 = Src1;
-<a name="l00239"></a>00239                 cursrc2 = Src2;
-<a name="l00240"></a>00240                 curdst = Dest;
-<a name="l00241"></a>00241         }
-<a name="l00242"></a>00242 
-<a name="l00243"></a>00243         <span class="comment">/* C routine to process image */</span>
-<a name="l00244"></a>00244         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l00245"></a>00245                 result = (int) *cursrc1 + (<span class="keywordtype">int</span>) *cursrc2;
-<a name="l00246"></a>00246                 <span class="keywordflow">if</span> (result > 255)
-<a name="l00247"></a>00247                         result = 255;
-<a name="l00248"></a>00248                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l00249"></a>00249                 <span class="comment">/* Advance pointers */</span>
-<a name="l00250"></a>00250                 cursrc1++;
-<a name="l00251"></a>00251                 cursrc2++;
-<a name="l00252"></a>00252                 curdst++;
-<a name="l00253"></a>00253         }
-<a name="l00254"></a>00254 
-<a name="l00255"></a>00255         <span class="keywordflow">return</span> (0);
-<a name="l00256"></a>00256 }
-<a name="l00257"></a>00257 
-<a name="l00269"></a><a class="code" href="_s_d_l__image_filter_8c.html#ae3a61d6df0940ef96ccc7b48a0fc8966">00269</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ae3a61d6df0940ef96ccc7b48a0fc8966" title="Internal MMX Filter using Mean: D = S1/2 + S2/2.">SDL_imageFilterMeanMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> * [...]
-<a name="l00270"></a>00270                                                    <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
-<a name="l00271"></a>00271 {
-<a name="l00272"></a>00272 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00273"></a>00273 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00274"></a>00274 <span class="preprocessor"></span>        __asm
-<a name="l00275"></a>00275         { 
-<a name="l00276"></a>00276                 pusha
-<a name="l00277"></a>00277                         mov edx, Mask <span class="comment">/* load Mask address into edx */</span>
-<a name="l00278"></a>00278                         movq mm0, [edx] <span class="comment">/* load Mask into mm0 */</span>
-<a name="l00279"></a>00279                 mov eax, Src1 <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00280"></a>00280                         mov ebx, Src2 <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00281"></a>00281                         mov edi, Dest <span class="comment">/* load Dest address into edi */</span>
-<a name="l00282"></a>00282                         mov ecx, SrcLength <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00283"></a>00283                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00284"></a>00284                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00285"></a>00285 L21011:
-<a name="l00286"></a>00286                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00287"></a>00287                 movq mm2,  [ebx]        <span class="comment">/* load 8 bytes from Src2 into mm2 */</span>
-<a name="l00288"></a>00288                 <span class="comment">/* --- Byte shift via Word shift --- */</span>
-<a name="l00289"></a>00289                 psrlw mm1, 1    <span class="comment">/* shift 4 WORDS of mm1 1 bit to the right */</span>
-<a name="l00290"></a>00290                         psrlw mm2, 1    <span class="comment">/* shift 4 WORDS of mm2 1 bit to the right */</span>
-<a name="l00291"></a>00291                         pand mm1, mm0   <span class="comment">// apply Mask to 8 BYTES of mm1 */</span>
-<a name="l00292"></a>00292                         <span class="comment">/* byte     0x0f, 0xdb, 0xc8 */</span>
-<a name="l00293"></a>00293                         pand mm2, mm0   <span class="comment">// apply Mask to 8 BYTES of mm2 */</span>
-<a name="l00294"></a>00294                         <span class="comment">/* byte     0x0f, 0xdb, 0xd0 */</span>
-<a name="l00295"></a>00295                         paddusb mm1,  mm2       <span class="comment">/* mm1=mm1+mm2 (add 8 bytes with saturation) */</span>
-<a name="l00296"></a>00296                         movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
-<a name="l00297"></a>00297                         add eax,  8     <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00298"></a>00298                         add ebx,  8     <span class="comment">/* register pointers by 8 */</span>
-<a name="l00299"></a>00299                         add edi,  8
-<a name="l00300"></a>00300                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l00301"></a>00301                         jnz L21011      <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00302"></a>00302                         emms    <span class="comment">/* exit MMX state */</span>
-<a name="l00303"></a>00303                         popa
-<a name="l00304"></a>00304         }
-<a name="l00305"></a>00305 <span class="preprocessor">#else</span>
-<a name="l00306"></a>00306 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l00307"></a>00307                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"movl         %4, %%edx \n\t"</span>        <span class="comment">/* load Mask address into edx */</span>
-<a name="l00308"></a>00308                 <span class="stringliteral">"movq    (%%edx), %%mm0 \n\t"</span>   <span class="comment">/* load Mask into mm0 */</span>
-<a name="l00309"></a>00309                 <span class="stringliteral">"mov          %2, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00310"></a>00310                 <span class="stringliteral">"mov          %1, %%ebx \n\t"</span>   <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00311"></a>00311                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l00312"></a>00312                 <span class="stringliteral">"mov          %3, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00313"></a>00313                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00314"></a>00314                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00315"></a>00315                 <span class="stringliteral">"1:                      \n\t"</span>
-<a name="l00316"></a>00316                 <span class="stringliteral">"movq    (%%eax), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00317"></a>00317                 <span class="stringliteral">"movq    (%%ebx), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src2 into mm2 */</span>
-<a name="l00318"></a>00318                 <span class="comment">/* --- Byte shift via Word shift --- */</span>
-<a name="l00319"></a>00319                 <span class="stringliteral">"psrlw        $1, %%mm1 \n\t"</span>   <span class="comment">/* shift 4 WORDS of mm1 1 bit to the right */</span>
-<a name="l00320"></a>00320                 <span class="stringliteral">"psrlw        $1, %%mm2 \n\t"</span>   <span class="comment">/* shift 4 WORDS of mm2 1 bit to the right */</span>
-<a name="l00321"></a>00321                 <span class="comment">/*      "pand      %%mm0, %%mm1 \n\t"    // apply Mask to 8 BYTES of mm1 */</span>
-<a name="l00322"></a>00322                 <span class="stringliteral">".byte     0x0f, 0xdb, 0xc8 \n\t"</span>
-<a name="l00323"></a>00323                 <span class="comment">/*      "pand      %%mm0, %%mm2 \n\t"    // apply Mask to 8 BYTES of mm2 */</span>
-<a name="l00324"></a>00324                 <span class="stringliteral">".byte     0x0f, 0xdb, 0xd0 \n\t"</span> 
-<a name="l00325"></a>00325                 <span class="stringliteral">"paddusb   %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* mm1=mm1+mm2 (add 8 bytes with saturation) */</span>
-<a name="l00326"></a>00326                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l00327"></a>00327                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00328"></a>00328                 <span class="stringliteral">"add          $8, %%ebx \n\t"</span>   <span class="comment">/* register pointers by 8 */</span>
-<a name="l00329"></a>00329                 <span class="stringliteral">"add          $8, %%edi \n\t"</span> 
-<a name="l00330"></a>00330                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l00331"></a>00331                 <span class="stringliteral">"jnz                 1b \n\t"</span>     <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00332"></a>00332                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l00333"></a>00333                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l00334"></a>00334                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l00335"></a>00335                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l00336"></a>00336                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %3 */</span>
-<a name="l00337"></a>00337                 <span class="stringliteral">"m"</span>(Mask)                       <span class="comment">/* %4 */</span>
-<a name="l00338"></a>00338                 );
-<a name="l00339"></a>00339 <span class="preprocessor">#endif</span>
-<a name="l00340"></a>00340 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l00341"></a>00341 <span class="preprocessor">#else</span>
-<a name="l00342"></a>00342 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l00343"></a>00343 <span class="preprocessor">#endif</span>
-<a name="l00344"></a>00344 <span class="preprocessor"></span>}
-<a name="l00345"></a>00345 
-<a name="l00356"></a><a class="code" href="_s_d_l__image_filter_8h.html#a69cfa83c5d198c8ae4be4ab86e8d3b8f">00356</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ace072118fef77973210eb04fb4bfc779" title="Filter using Mean: D = S1/2 + S2/2.">SDL_imageFilterMean</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span clas [...]
-<a name="l00357"></a>00357 {
-<a name="l00358"></a>00358         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
-<a name="l00359"></a>00359         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l00360"></a>00360         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l00361"></a>00361         <span class="keywordtype">int</span> result;
-<a name="l00362"></a>00362 
-<a name="l00363"></a>00363         <span class="comment">/* Validate input parameters */</span>
-<a name="l00364"></a>00364         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l00365"></a>00365                 <span class="keywordflow">return</span>(-1);
-<a name="l00366"></a>00366         <span class="keywordflow">if</span> (length == 0)
-<a name="l00367"></a>00367                 <span class="keywordflow">return</span>(0);
-<a name="l00368"></a>00368 
-<a name="l00369"></a>00369         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l00370"></a>00370                 <span class="comment">/* MMX routine */</span>
-<a name="l00371"></a>00371                 <a class="code" href="_s_d_l__image_filter_8c.html#ae3a61d6df0940ef96ccc7b48a0fc8966" title="Internal MMX Filter using Mean: D = S1/2 + S2/2.">SDL_imageFilterMeanMMX</a>(Src1, Src2, Dest, length, Mask);
-<a name="l00372"></a>00372 
-<a name="l00373"></a>00373                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l00374"></a>00374                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l00375"></a>00375                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l00376"></a>00376                         istart = length & 0xfffffff8;
-<a name="l00377"></a>00377                         cursrc1 = &Src1[istart];
-<a name="l00378"></a>00378                         cursrc2 = &Src2[istart];
-<a name="l00379"></a>00379                         curdst = &Dest[istart];
-<a name="l00380"></a>00380                 } <span class="keywordflow">else</span> {
-<a name="l00381"></a>00381                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l00382"></a>00382                         <span class="keywordflow">return</span> (0);
-<a name="l00383"></a>00383                 }
-<a name="l00384"></a>00384         } <span class="keywordflow">else</span> {
-<a name="l00385"></a>00385                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l00386"></a>00386                 istart = 0;
-<a name="l00387"></a>00387                 cursrc1 = Src1;
-<a name="l00388"></a>00388                 cursrc2 = Src2;
-<a name="l00389"></a>00389                 curdst = Dest;
-<a name="l00390"></a>00390         }
-<a name="l00391"></a>00391 
-<a name="l00392"></a>00392         <span class="comment">/* C routine to process image */</span>
-<a name="l00393"></a>00393         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l00394"></a>00394                 result = (int) *cursrc1 / 2 + (<span class="keywordtype">int</span>) *cursrc2 / 2;
-<a name="l00395"></a>00395                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l00396"></a>00396                 <span class="comment">/* Advance pointers */</span>
-<a name="l00397"></a>00397                 cursrc1++;
-<a name="l00398"></a>00398                 cursrc2++;
-<a name="l00399"></a>00399                 curdst++;
+<a name="l00232"></a>00232 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterMeanMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength,
+<a name="l00233"></a>00233                                                    <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
+<a name="l00234"></a>00234 {
+<a name="l00235"></a>00235 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l00236"></a>00236 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l00237"></a>00237 <span class="preprocessor"></span>        __asm
+<a name="l00238"></a>00238         { 
+<a name="l00239"></a>00239                 pusha
+<a name="l00240"></a>00240                         mov edx, Mask <span class="comment">/* load Mask address into edx */</span>
+<a name="l00241"></a>00241                         movq mm0, [edx] <span class="comment">/* load Mask into mm0 */</span>
+<a name="l00242"></a>00242                 mov eax, Src1 <span class="comment">/* load Src1 address into eax */</span>
+<a name="l00243"></a>00243                         mov ebx, Src2 <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l00244"></a>00244                         mov edi, Dest <span class="comment">/* load Dest address into edi */</span>
+<a name="l00245"></a>00245                         mov ecx, SrcLength <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00246"></a>00246                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l00247"></a>00247                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00248"></a>00248 L21011:
+<a name="l00249"></a>00249                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l00250"></a>00250                 movq mm2,  [ebx]        <span class="comment">/* load 8 bytes from Src2 into mm2 */</span>
+<a name="l00251"></a>00251                 <span class="comment">/* --- Byte shift via Word shift --- */</span>
+<a name="l00252"></a>00252                 psrlw mm1, 1    <span class="comment">/* shift 4 WORDS of mm1 1 bit to the right */</span>
+<a name="l00253"></a>00253                         psrlw mm2, 1    <span class="comment">/* shift 4 WORDS of mm2 1 bit to the right */</span>
+<a name="l00254"></a>00254                         pand mm1, mm0   <span class="comment">// apply Mask to 8 BYTES of mm1 */</span>
+<a name="l00255"></a>00255                         <span class="comment">/* byte     0x0f, 0xdb, 0xc8 */</span>
+<a name="l00256"></a>00256                         pand mm2, mm0   <span class="comment">// apply Mask to 8 BYTES of mm2 */</span>
+<a name="l00257"></a>00257                         <span class="comment">/* byte     0x0f, 0xdb, 0xd0 */</span>
+<a name="l00258"></a>00258                         paddusb mm1,  mm2       <span class="comment">/* mm1=mm1+mm2 (add 8 bytes with saturation) */</span>
+<a name="l00259"></a>00259                         movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
+<a name="l00260"></a>00260                         add eax,  8     <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l00261"></a>00261                         add ebx,  8     <span class="comment">/* register pointers by 8 */</span>
+<a name="l00262"></a>00262                         add edi,  8
+<a name="l00263"></a>00263                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l00264"></a>00264                         jnz L21011      <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00265"></a>00265                         emms    <span class="comment">/* exit MMX state */</span>
+<a name="l00266"></a>00266                         popa
+<a name="l00267"></a>00267         }
+<a name="l00268"></a>00268 <span class="preprocessor">#else</span>
+<a name="l00269"></a>00269 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l00270"></a>00270         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l00271"></a>00271         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l00272"></a>00272         __m64 *mDest = (__m64*)Dest;
+<a name="l00273"></a>00273         __m64 *mMask = (__m64*)Mask;
+<a name="l00274"></a>00274         <span class="keywordtype">int</span> i;
+<a name="l00275"></a>00275         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l00276"></a>00276                 __m64 mm1 = *mSrc1,
+<a name="l00277"></a>00277                       mm2 = *mSrc2;
+<a name="l00278"></a>00278                 mm1 = _m_psrlwi(mm1, 1);        <span class="comment">/* shift 4 WORDS of mm1 1 bit to the right */</span>
+<a name="l00279"></a>00279                 mm2 = _m_psrlwi(mm2, 1);        <span class="comment">/* shift 4 WORDS of mm2 1 bit to the right */</span>
+<a name="l00280"></a>00280                 mm1 = _m_pand(mm1, *mMask);     <span class="comment">/* apply Mask to 8 BYTES of mm1 */</span>
+<a name="l00281"></a>00281                 mm2 = _m_pand(mm2, *mMask);     <span class="comment">/* apply Mask to 8 BYTES of mm2 */</span>
+<a name="l00282"></a>00282                 *mDest = _m_paddusb(mm1, mm2);  <span class="comment">/* mm1+mm2 (add 8 bytes with saturation) */</span>
+<a name="l00283"></a>00283                 mSrc1++;
+<a name="l00284"></a>00284                 mSrc2++;
+<a name="l00285"></a>00285                 mDest++;
+<a name="l00286"></a>00286         }
+<a name="l00287"></a>00287         _m_empty();                             <span class="comment">/* clean MMX state */</span>
+<a name="l00288"></a>00288 <span class="preprocessor">#endif</span>
+<a name="l00289"></a>00289 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l00290"></a>00290 <span class="preprocessor">#else</span>
+<a name="l00291"></a>00291 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l00292"></a>00292 <span class="preprocessor">#endif</span>
+<a name="l00293"></a>00293 <span class="preprocessor"></span>}
+<a name="l00294"></a>00294 
+<a name="l00305"></a><a class="code" href="_s_d_l__image_filter_8h.html#a69cfa83c5d198c8ae4be4ab86e8d3b8f">00305</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ace072118fef77973210eb04fb4bfc779" title="Filter using Mean: D = S1/2 + S2/2.">SDL_imageFilterMean</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span clas [...]
+<a name="l00306"></a>00306 {
+<a name="l00307"></a>00307         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
+<a name="l00308"></a>00308         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l00309"></a>00309         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l00310"></a>00310         <span class="keywordtype">int</span> result;
+<a name="l00311"></a>00311 
+<a name="l00312"></a>00312         <span class="comment">/* Validate input parameters */</span>
+<a name="l00313"></a>00313         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l00314"></a>00314                 <span class="keywordflow">return</span>(-1);
+<a name="l00315"></a>00315         <span class="keywordflow">if</span> (length == 0)
+<a name="l00316"></a>00316                 <span class="keywordflow">return</span>(0);
+<a name="l00317"></a>00317 
+<a name="l00318"></a>00318         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l00319"></a>00319                 <span class="comment">/* MMX routine */</span>
+<a name="l00320"></a>00320                 SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask);
+<a name="l00321"></a>00321 
+<a name="l00322"></a>00322                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l00323"></a>00323                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l00324"></a>00324                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l00325"></a>00325                         istart = length & 0xfffffff8;
+<a name="l00326"></a>00326                         cursrc1 = &Src1[istart];
+<a name="l00327"></a>00327                         cursrc2 = &Src2[istart];
+<a name="l00328"></a>00328                         curdst = &Dest[istart];
+<a name="l00329"></a>00329                 } <span class="keywordflow">else</span> {
+<a name="l00330"></a>00330                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l00331"></a>00331                         <span class="keywordflow">return</span> (0);
+<a name="l00332"></a>00332                 }
+<a name="l00333"></a>00333         } <span class="keywordflow">else</span> {
+<a name="l00334"></a>00334                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l00335"></a>00335                 istart = 0;
+<a name="l00336"></a>00336                 cursrc1 = Src1;
+<a name="l00337"></a>00337                 cursrc2 = Src2;
+<a name="l00338"></a>00338                 curdst = Dest;
+<a name="l00339"></a>00339         }
+<a name="l00340"></a>00340 
+<a name="l00341"></a>00341         <span class="comment">/* C routine to process image */</span>
+<a name="l00342"></a>00342         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l00343"></a>00343                 result = (int) *cursrc1 / 2 + (<span class="keywordtype">int</span>) *cursrc2 / 2;
+<a name="l00344"></a>00344                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l00345"></a>00345                 <span class="comment">/* Advance pointers */</span>
+<a name="l00346"></a>00346                 cursrc1++;
+<a name="l00347"></a>00347                 cursrc2++;
+<a name="l00348"></a>00348                 curdst++;
+<a name="l00349"></a>00349         }
+<a name="l00350"></a>00350 
+<a name="l00351"></a>00351         <span class="keywordflow">return</span> (0);
+<a name="l00352"></a>00352 }
+<a name="l00353"></a>00353 
+<a name="l00364"></a>00364 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterSubMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l00365"></a>00365 {
+<a name="l00366"></a>00366 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l00367"></a>00367 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l00368"></a>00368 <span class="preprocessor"></span>        __asm
+<a name="l00369"></a>00369         {
+<a name="l00370"></a>00370                 pusha
+<a name="l00371"></a>00371                         mov eax,  Src1  <span class="comment">/* load Src1 address into eax */</span>
+<a name="l00372"></a>00372                         mov ebx,  Src2  <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l00373"></a>00373                         mov edi,  Dest  <span class="comment">/* load Dest address into edi */</span>
+<a name="l00374"></a>00374                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00375"></a>00375                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l00376"></a>00376                         align 16 <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00377"></a>00377 L1012:
+<a name="l00378"></a>00378                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l00379"></a>00379                 psubusb mm1,  [ebx]     <span class="comment">/* mm1=Src1-Src2 (sub 8 bytes with saturation) */</span>
+<a name="l00380"></a>00380                 movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
+<a name="l00381"></a>00381                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l00382"></a>00382                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
+<a name="l00383"></a>00383                         add edi, 8
+<a name="l00384"></a>00384                         dec ecx <span class="comment">/* decrease loop counter */</span>
+<a name="l00385"></a>00385                         jnz L1012       <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00386"></a>00386                         emms <span class="comment">/* exit MMX state */</span>
+<a name="l00387"></a>00387                         popa
+<a name="l00388"></a>00388         }
+<a name="l00389"></a>00389 <span class="preprocessor">#else</span>
+<a name="l00390"></a>00390 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l00391"></a>00391         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l00392"></a>00392         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l00393"></a>00393         __m64 *mDest = (__m64*)Dest;
+<a name="l00394"></a>00394         <span class="keywordtype">int</span> i;
+<a name="l00395"></a>00395         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l00396"></a>00396                 *mDest = _m_psubusb(*mSrc1, *mSrc2);    <span class="comment">/* Src1-Src2 (sub 8 bytes with saturation) */</span>
+<a name="l00397"></a>00397                 mSrc1++;
+<a name="l00398"></a>00398                 mSrc2++;
+<a name="l00399"></a>00399                 mDest++;
 <a name="l00400"></a>00400         }
-<a name="l00401"></a>00401 
-<a name="l00402"></a>00402         <span class="keywordflow">return</span> (0);
-<a name="l00403"></a>00403 }
-<a name="l00404"></a>00404 
-<a name="l00415"></a><a class="code" href="_s_d_l__image_filter_8c.html#a45d54d410e677d32ef33ef6226e9ea12">00415</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a45d54d410e677d32ef33ef6226e9ea12" title="Internal MMX Filter using Sub: D = saturation0(S1 - S2)">SDL_imageFilterSubMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</s [...]
-<a name="l00416"></a>00416 {
-<a name="l00417"></a>00417 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00418"></a>00418 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00419"></a>00419 <span class="preprocessor"></span>        __asm
-<a name="l00420"></a>00420         {
-<a name="l00421"></a>00421                 pusha
-<a name="l00422"></a>00422                         mov eax,  Src1  <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00423"></a>00423                         mov ebx,  Src2  <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00424"></a>00424                         mov edi,  Dest  <span class="comment">/* load Dest address into edi */</span>
-<a name="l00425"></a>00425                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00426"></a>00426                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00427"></a>00427                         align 16 <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00428"></a>00428 L1012:
-<a name="l00429"></a>00429                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00430"></a>00430                 psubusb mm1,  [ebx]     <span class="comment">/* mm1=Src1-Src2 (sub 8 bytes with saturation) */</span>
-<a name="l00431"></a>00431                 movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
-<a name="l00432"></a>00432                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00433"></a>00433                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
-<a name="l00434"></a>00434                         add edi, 8
-<a name="l00435"></a>00435                         dec ecx <span class="comment">/* decrease loop counter */</span>
-<a name="l00436"></a>00436                         jnz L1012       <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00437"></a>00437                         emms <span class="comment">/* exit MMX state */</span>
-<a name="l00438"></a>00438                         popa
-<a name="l00439"></a>00439         }
-<a name="l00440"></a>00440 <span class="preprocessor">#else</span>
-<a name="l00441"></a>00441 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l00442"></a>00442                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov %2, %%eax \n\t"</span> <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00443"></a>00443                 <span class="stringliteral">"mov %1, %%ebx \n\t"</span>    <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00444"></a>00444                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l00445"></a>00445                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00446"></a>00446                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00447"></a>00447                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00448"></a>00448                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>     <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00449"></a>00449                 <span class="stringliteral">"psubusb (%%ebx), %%mm1 \n\t"</span>   <span class="comment">/* mm1=Src1-Src2 (sub 8 bytes with saturation) */</span>
-<a name="l00450"></a>00450                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l00451"></a>00451                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00452"></a>00452                 <span class="stringliteral">"add $8, %%ebx \n\t"</span>    <span class="comment">/* register pointers by 8 */</span>
-<a name="l00453"></a>00453                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l00454"></a>00454                 <span class="stringliteral">"jnz 1b         \n\t"</span>     <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00455"></a>00455                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l00456"></a>00456                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l00457"></a>00457                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l00458"></a>00458                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l00459"></a>00459                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l00460"></a>00460                 );
-<a name="l00461"></a>00461 <span class="preprocessor">#endif</span>
-<a name="l00462"></a>00462 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l00463"></a>00463 <span class="preprocessor">#else</span>
-<a name="l00464"></a>00464 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l00465"></a>00465 <span class="preprocessor">#endif</span>
-<a name="l00466"></a>00466 <span class="preprocessor"></span>}
-<a name="l00467"></a>00467 
-<a name="l00478"></a><a class="code" href="_s_d_l__image_filter_8h.html#a0e0fb80a3dad33d61a8147c7fb9f529d">00478</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a3c01cf8576ea7a0dfc09dbaa953c9287" title="Filter using Sub: D = saturation0(S1 - S2)">SDL_imageFilterSub</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <spa [...]
-<a name="l00479"></a>00479 {
-<a name="l00480"></a>00480         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l00481"></a>00481         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l00482"></a>00482         <span class="keywordtype">int</span> result;
-<a name="l00483"></a>00483 
-<a name="l00484"></a>00484         <span class="comment">/* Validate input parameters */</span>
-<a name="l00485"></a>00485         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l00486"></a>00486                 <span class="keywordflow">return</span>(-1);
-<a name="l00487"></a>00487         <span class="keywordflow">if</span> (length == 0)
-<a name="l00488"></a>00488                 <span class="keywordflow">return</span>(0);
-<a name="l00489"></a>00489 
-<a name="l00490"></a>00490         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l00491"></a>00491                 <span class="comment">/* MMX routine */</span>
-<a name="l00492"></a>00492                 <a class="code" href="_s_d_l__image_filter_8c.html#a45d54d410e677d32ef33ef6226e9ea12" title="Internal MMX Filter using Sub: D = saturation0(S1 - S2)">SDL_imageFilterSubMMX</a>(Src1, Src2, Dest, length);
-<a name="l00493"></a>00493 
-<a name="l00494"></a>00494                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l00495"></a>00495                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l00496"></a>00496                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l00497"></a>00497                         istart = length & 0xfffffff8;
-<a name="l00498"></a>00498                         cursrc1 = &Src1[istart];
-<a name="l00499"></a>00499                         cursrc2 = &Src2[istart];
-<a name="l00500"></a>00500                         curdst = &Dest[istart];
-<a name="l00501"></a>00501                 } <span class="keywordflow">else</span> {
-<a name="l00502"></a>00502                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l00503"></a>00503                         <span class="keywordflow">return</span> (0);
-<a name="l00504"></a>00504                 }
-<a name="l00505"></a>00505         } <span class="keywordflow">else</span> {
-<a name="l00506"></a>00506                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l00507"></a>00507                 istart = 0;
-<a name="l00508"></a>00508                 cursrc1 = Src1;
-<a name="l00509"></a>00509                 cursrc2 = Src2;
-<a name="l00510"></a>00510                 curdst = Dest;
-<a name="l00511"></a>00511         }
-<a name="l00512"></a>00512 
-<a name="l00513"></a>00513         <span class="comment">/* C routine to process image */</span>
-<a name="l00514"></a>00514         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l00515"></a>00515                 result = (int) *cursrc1 - (<span class="keywordtype">int</span>) *cursrc2;
-<a name="l00516"></a>00516                 <span class="keywordflow">if</span> (result < 0)
-<a name="l00517"></a>00517                         result = 0;
-<a name="l00518"></a>00518                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l00519"></a>00519                 <span class="comment">/* Advance pointers */</span>
-<a name="l00520"></a>00520                 cursrc1++;
-<a name="l00521"></a>00521                 cursrc2++;
-<a name="l00522"></a>00522                 curdst++;
-<a name="l00523"></a>00523         }
-<a name="l00524"></a>00524 
-<a name="l00525"></a>00525         <span class="keywordflow">return</span> (0);
-<a name="l00526"></a>00526 }
-<a name="l00527"></a>00527 
-<a name="l00538"></a><a class="code" href="_s_d_l__image_filter_8c.html#a601bf863185e51af32c6008ecb0a5095">00538</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a601bf863185e51af32c6008ecb0a5095" title="Internal MMX Filter using AbsDiff: D = | S1 - S2 |.">SDL_imageFilterAbsDiffMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</s [...]
-<a name="l00539"></a>00539 {
-<a name="l00540"></a>00540 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00541"></a>00541 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00542"></a>00542 <span class="preprocessor"></span>        __asm
-<a name="l00543"></a>00543         {
-<a name="l00544"></a>00544                 pusha
-<a name="l00545"></a>00545                         mov eax, Src1   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00546"></a>00546                         mov ebx, Src2   <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00547"></a>00547                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
-<a name="l00548"></a>00548                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00549"></a>00549                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00550"></a>00550                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00551"></a>00551 L1013:
-<a name="l00552"></a>00552                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00553"></a>00553                 movq mm2,  [ebx]        <span class="comment">/* load 8 bytes from Src2 into mm2 */</span>
-<a name="l00554"></a>00554                 psubusb mm1,  [ebx]     <span class="comment">/* mm1=Src1-Src2 (sub 8 bytes with saturation) */</span>
-<a name="l00555"></a>00555                 psubusb mm2,  [eax]     <span class="comment">/* mm2=Src2-Src1 (sub 8 bytes with saturation) */</span>
-<a name="l00556"></a>00556                 por mm1,  mm2   <span class="comment">/* combine both mm2 and mm1 results */</span>
-<a name="l00557"></a>00557                         movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
-<a name="l00558"></a>00558                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00559"></a>00559                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
-<a name="l00560"></a>00560                         add edi, 8
-<a name="l00561"></a>00561                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l00562"></a>00562                         jnz L1013       <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00563"></a>00563                         emms         <span class="comment">/* exit MMX state */</span>
-<a name="l00564"></a>00564                         popa
-<a name="l00565"></a>00565         }
-<a name="l00566"></a>00566 <span class="preprocessor">#else</span>
-<a name="l00567"></a>00567 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l00568"></a>00568                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov %2, %%eax \n\t"</span> <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00569"></a>00569                 <span class="stringliteral">"mov %1, %%ebx \n\t"</span>    <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00570"></a>00570                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l00571"></a>00571                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00572"></a>00572                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00573"></a>00573                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00574"></a>00574                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>     <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00575"></a>00575                 <span class="stringliteral">"movq    (%%ebx), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src2 into mm2 */</span>
-<a name="l00576"></a>00576                 <span class="stringliteral">"psubusb (%%ebx), %%mm1 \n\t"</span>   <span class="comment">/* mm1=Src1-Src2 (sub 8 bytes with saturation) */</span>
-<a name="l00577"></a>00577                 <span class="stringliteral">"psubusb (%%eax), %%mm2 \n\t"</span>   <span class="comment">/* mm2=Src2-Src1 (sub 8 bytes with saturation) */</span>
-<a name="l00578"></a>00578                 <span class="stringliteral">"por       %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* combine both mm2 and mm1 results */</span>
-<a name="l00579"></a>00579                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l00580"></a>00580                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00581"></a>00581                 <span class="stringliteral">"add $8, %%ebx \n\t"</span>    <span class="comment">/* register pointers by 8 */</span>
-<a name="l00582"></a>00582                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l00583"></a>00583                 <span class="stringliteral">"jnz 1b        \n\t"</span>      <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00584"></a>00584                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l00585"></a>00585                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l00586"></a>00586                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l00587"></a>00587                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l00588"></a>00588                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l00589"></a>00589                 );
-<a name="l00590"></a>00590 <span class="preprocessor">#endif</span>
-<a name="l00591"></a>00591 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l00592"></a>00592 <span class="preprocessor">#else</span>
-<a name="l00593"></a>00593 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l00594"></a>00594 <span class="preprocessor">#endif</span>
-<a name="l00595"></a>00595 <span class="preprocessor"></span>}
-<a name="l00596"></a>00596 
-<a name="l00607"></a><a class="code" href="_s_d_l__image_filter_8h.html#a789ce070edcc478ad97a0d7ff90e6aa2">00607</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a472909f904274255cd6793c520172e48" title="Filter using AbsDiff: D = | S1 - S2 |.">SDL_imageFilterAbsDiff</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <spa [...]
-<a name="l00608"></a>00608 {
-<a name="l00609"></a>00609         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l00610"></a>00610         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l00611"></a>00611         <span class="keywordtype">int</span> result;
-<a name="l00612"></a>00612 
-<a name="l00613"></a>00613         <span class="comment">/* Validate input parameters */</span>
-<a name="l00614"></a>00614         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l00615"></a>00615                 <span class="keywordflow">return</span>(-1);
-<a name="l00616"></a>00616         <span class="keywordflow">if</span> (length == 0)
-<a name="l00617"></a>00617                 <span class="keywordflow">return</span>(0);
-<a name="l00618"></a>00618 
-<a name="l00619"></a>00619         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l00620"></a>00620                 <span class="comment">/* MMX routine */</span>
-<a name="l00621"></a>00621                 <a class="code" href="_s_d_l__image_filter_8c.html#a601bf863185e51af32c6008ecb0a5095" title="Internal MMX Filter using AbsDiff: D = | S1 - S2 |.">SDL_imageFilterAbsDiffMMX</a>(Src1, Src2, Dest, length);
-<a name="l00622"></a>00622 
-<a name="l00623"></a>00623                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l00624"></a>00624                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l00625"></a>00625                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l00626"></a>00626                         istart = length & 0xfffffff8;
-<a name="l00627"></a>00627                         cursrc1 = &Src1[istart];
-<a name="l00628"></a>00628                         cursrc2 = &Src2[istart];
-<a name="l00629"></a>00629                         curdst = &Dest[istart];
-<a name="l00630"></a>00630                 } <span class="keywordflow">else</span> {
-<a name="l00631"></a>00631                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l00632"></a>00632                         <span class="keywordflow">return</span> (0);
-<a name="l00633"></a>00633                 }
-<a name="l00634"></a>00634         } <span class="keywordflow">else</span> {
-<a name="l00635"></a>00635                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l00636"></a>00636                 istart = 0;
-<a name="l00637"></a>00637                 cursrc1 = Src1;
-<a name="l00638"></a>00638                 cursrc2 = Src2;
-<a name="l00639"></a>00639                 curdst = Dest;
+<a name="l00401"></a>00401         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l00402"></a>00402 <span class="preprocessor">#endif</span>
+<a name="l00403"></a>00403 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l00404"></a>00404 <span class="preprocessor">#else</span>
+<a name="l00405"></a>00405 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l00406"></a>00406 <span class="preprocessor">#endif</span>
+<a name="l00407"></a>00407 <span class="preprocessor"></span>}
+<a name="l00408"></a>00408 
+<a name="l00419"></a><a class="code" href="_s_d_l__image_filter_8h.html#a0e0fb80a3dad33d61a8147c7fb9f529d">00419</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a3c01cf8576ea7a0dfc09dbaa953c9287" title="Filter using Sub: D = saturation0(S1 - S2)">SDL_imageFilterSub</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <spa [...]
+<a name="l00420"></a>00420 {
+<a name="l00421"></a>00421         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l00422"></a>00422         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l00423"></a>00423         <span class="keywordtype">int</span> result;
+<a name="l00424"></a>00424 
+<a name="l00425"></a>00425         <span class="comment">/* Validate input parameters */</span>
+<a name="l00426"></a>00426         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l00427"></a>00427                 <span class="keywordflow">return</span>(-1);
+<a name="l00428"></a>00428         <span class="keywordflow">if</span> (length == 0)
+<a name="l00429"></a>00429                 <span class="keywordflow">return</span>(0);
+<a name="l00430"></a>00430 
+<a name="l00431"></a>00431         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l00432"></a>00432                 <span class="comment">/* MMX routine */</span>
+<a name="l00433"></a>00433                 SDL_imageFilterSubMMX(Src1, Src2, Dest, length);
+<a name="l00434"></a>00434 
+<a name="l00435"></a>00435                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l00436"></a>00436                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l00437"></a>00437                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l00438"></a>00438                         istart = length & 0xfffffff8;
+<a name="l00439"></a>00439                         cursrc1 = &Src1[istart];
+<a name="l00440"></a>00440                         cursrc2 = &Src2[istart];
+<a name="l00441"></a>00441                         curdst = &Dest[istart];
+<a name="l00442"></a>00442                 } <span class="keywordflow">else</span> {
+<a name="l00443"></a>00443                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l00444"></a>00444                         <span class="keywordflow">return</span> (0);
+<a name="l00445"></a>00445                 }
+<a name="l00446"></a>00446         } <span class="keywordflow">else</span> {
+<a name="l00447"></a>00447                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l00448"></a>00448                 istart = 0;
+<a name="l00449"></a>00449                 cursrc1 = Src1;
+<a name="l00450"></a>00450                 cursrc2 = Src2;
+<a name="l00451"></a>00451                 curdst = Dest;
+<a name="l00452"></a>00452         }
+<a name="l00453"></a>00453 
+<a name="l00454"></a>00454         <span class="comment">/* C routine to process image */</span>
+<a name="l00455"></a>00455         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l00456"></a>00456                 result = (int) *cursrc1 - (<span class="keywordtype">int</span>) *cursrc2;
+<a name="l00457"></a>00457                 <span class="keywordflow">if</span> (result < 0)
+<a name="l00458"></a>00458                         result = 0;
+<a name="l00459"></a>00459                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l00460"></a>00460                 <span class="comment">/* Advance pointers */</span>
+<a name="l00461"></a>00461                 cursrc1++;
+<a name="l00462"></a>00462                 cursrc2++;
+<a name="l00463"></a>00463                 curdst++;
+<a name="l00464"></a>00464         }
+<a name="l00465"></a>00465 
+<a name="l00466"></a>00466         <span class="keywordflow">return</span> (0);
+<a name="l00467"></a>00467 }
+<a name="l00468"></a>00468 
+<a name="l00479"></a>00479 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterAbsDiffMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l00480"></a>00480 {
+<a name="l00481"></a>00481 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l00482"></a>00482 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l00483"></a>00483 <span class="preprocessor"></span>        __asm
+<a name="l00484"></a>00484         {
+<a name="l00485"></a>00485                 pusha
+<a name="l00486"></a>00486                         mov eax, Src1   <span class="comment">/* load Src1 address into eax */</span>
+<a name="l00487"></a>00487                         mov ebx, Src2   <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l00488"></a>00488                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
+<a name="l00489"></a>00489                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00490"></a>00490                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l00491"></a>00491                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00492"></a>00492 L1013:
+<a name="l00493"></a>00493                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l00494"></a>00494                 movq mm2,  [ebx]        <span class="comment">/* load 8 bytes from Src2 into mm2 */</span>
+<a name="l00495"></a>00495                 psubusb mm1,  [ebx]     <span class="comment">/* mm1=Src1-Src2 (sub 8 bytes with saturation) */</span>
+<a name="l00496"></a>00496                 psubusb mm2,  [eax]     <span class="comment">/* mm2=Src2-Src1 (sub 8 bytes with saturation) */</span>
+<a name="l00497"></a>00497                 por mm1,  mm2   <span class="comment">/* combine both mm2 and mm1 results */</span>
+<a name="l00498"></a>00498                         movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
+<a name="l00499"></a>00499                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l00500"></a>00500                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
+<a name="l00501"></a>00501                         add edi, 8
+<a name="l00502"></a>00502                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l00503"></a>00503                         jnz L1013       <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00504"></a>00504                         emms         <span class="comment">/* exit MMX state */</span>
+<a name="l00505"></a>00505                         popa
+<a name="l00506"></a>00506         }
+<a name="l00507"></a>00507 <span class="preprocessor">#else</span>
+<a name="l00508"></a>00508 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l00509"></a>00509         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l00510"></a>00510         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l00511"></a>00511         __m64 *mDest = (__m64*)Dest;
+<a name="l00512"></a>00512         <span class="keywordtype">int</span> i;
+<a name="l00513"></a>00513         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l00514"></a>00514                 __m64 mm1 = _m_psubusb(*mSrc2, *mSrc1); <span class="comment">/* Src1-Src2 (sub 8 bytes with saturation) */</span>
+<a name="l00515"></a>00515                 __m64 mm2 = _m_psubusb(*mSrc1, *mSrc2); <span class="comment">/* Src2-Src1 (sub 8 bytes with saturation) */</span>
+<a name="l00516"></a>00516                 *mDest = _m_por(mm1, mm2);              <span class="comment">/* combine both mm2 and mm1 results */</span>
+<a name="l00517"></a>00517                 mSrc1++;
+<a name="l00518"></a>00518                 mSrc2++;
+<a name="l00519"></a>00519                 mDest++;
+<a name="l00520"></a>00520         }
+<a name="l00521"></a>00521         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l00522"></a>00522 <span class="preprocessor">#endif</span>
+<a name="l00523"></a>00523 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l00524"></a>00524 <span class="preprocessor">#else</span>
+<a name="l00525"></a>00525 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l00526"></a>00526 <span class="preprocessor">#endif</span>
+<a name="l00527"></a>00527 <span class="preprocessor"></span>}
+<a name="l00528"></a>00528 
+<a name="l00539"></a><a class="code" href="_s_d_l__image_filter_8h.html#a789ce070edcc478ad97a0d7ff90e6aa2">00539</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a472909f904274255cd6793c520172e48" title="Filter using AbsDiff: D = | S1 - S2 |.">SDL_imageFilterAbsDiff</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <spa [...]
+<a name="l00540"></a>00540 {
+<a name="l00541"></a>00541         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l00542"></a>00542         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l00543"></a>00543         <span class="keywordtype">int</span> result;
+<a name="l00544"></a>00544 
+<a name="l00545"></a>00545         <span class="comment">/* Validate input parameters */</span>
+<a name="l00546"></a>00546         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l00547"></a>00547                 <span class="keywordflow">return</span>(-1);
+<a name="l00548"></a>00548         <span class="keywordflow">if</span> (length == 0)
+<a name="l00549"></a>00549                 <span class="keywordflow">return</span>(0);
+<a name="l00550"></a>00550 
+<a name="l00551"></a>00551         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l00552"></a>00552                 <span class="comment">/* MMX routine */</span>
+<a name="l00553"></a>00553                 SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length);
+<a name="l00554"></a>00554 
+<a name="l00555"></a>00555                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l00556"></a>00556                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l00557"></a>00557                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l00558"></a>00558                         istart = length & 0xfffffff8;
+<a name="l00559"></a>00559                         cursrc1 = &Src1[istart];
+<a name="l00560"></a>00560                         cursrc2 = &Src2[istart];
+<a name="l00561"></a>00561                         curdst = &Dest[istart];
+<a name="l00562"></a>00562                 } <span class="keywordflow">else</span> {
+<a name="l00563"></a>00563                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l00564"></a>00564                         <span class="keywordflow">return</span> (0);
+<a name="l00565"></a>00565                 }
+<a name="l00566"></a>00566         } <span class="keywordflow">else</span> {
+<a name="l00567"></a>00567                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l00568"></a>00568                 istart = 0;
+<a name="l00569"></a>00569                 cursrc1 = Src1;
+<a name="l00570"></a>00570                 cursrc2 = Src2;
+<a name="l00571"></a>00571                 curdst = Dest;
+<a name="l00572"></a>00572         }
+<a name="l00573"></a>00573 
+<a name="l00574"></a>00574         <span class="comment">/* C routine to process image */</span>
+<a name="l00575"></a>00575         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l00576"></a>00576                 result = abs((<span class="keywordtype">int</span>) *cursrc1 - (<span class="keywordtype">int</span>) *cursrc2);
+<a name="l00577"></a>00577                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l00578"></a>00578                 <span class="comment">/* Advance pointers */</span>
+<a name="l00579"></a>00579                 cursrc1++;
+<a name="l00580"></a>00580                 cursrc2++;
+<a name="l00581"></a>00581                 curdst++;
+<a name="l00582"></a>00582         }
+<a name="l00583"></a>00583 
+<a name="l00584"></a>00584         <span class="keywordflow">return</span> (0);
+<a name="l00585"></a>00585 }
+<a name="l00586"></a>00586 
+<a name="l00597"></a>00597 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterMultMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l00598"></a>00598 {
+<a name="l00599"></a>00599 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l00600"></a>00600 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l00601"></a>00601 <span class="preprocessor"></span>        __asm
+<a name="l00602"></a>00602         {
+<a name="l00603"></a>00603                 pusha
+<a name="l00604"></a>00604                         mov eax, Src1   <span class="comment">/* load Src1 address into eax */</span>
+<a name="l00605"></a>00605                         mov ebx, Src2   <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l00606"></a>00606                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
+<a name="l00607"></a>00607                         mov ecx, SrcLength   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00608"></a>00608                         shr ecx, 3   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l00609"></a>00609                         pxor mm0, mm0   <span class="comment">/* zero mm0 register */</span>
+<a name="l00610"></a>00610                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00611"></a>00611 L1014:
+<a name="l00612"></a>00612                 movq mm1, [eax]   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l00613"></a>00613                 movq mm3, [ebx]   <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
+<a name="l00614"></a>00614                 movq mm2, mm1   <span class="comment">/* copy mm1 into mm2 */</span>
+<a name="l00615"></a>00615                         movq mm4, mm3   <span class="comment">/* copy mm3 into mm4  */</span>
+<a name="l00616"></a>00616                         punpcklbw mm1, mm0   <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l00617"></a>00617                         punpckhbw mm2, mm0   <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l00618"></a>00618                         punpcklbw mm3, mm0   <span class="comment">/* unpack low  bytes of Src2 into words */</span>
+<a name="l00619"></a>00619                         punpckhbw mm4, mm0   <span class="comment">/* unpack high bytes of Src2 into words */</span>
+<a name="l00620"></a>00620                         pmullw mm1, mm3   <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
+<a name="l00621"></a>00621                         pmullw mm2, mm4   <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
+<a name="l00622"></a>00622                         <span class="comment">/* Take abs value of the results (signed words) */</span>
+<a name="l00623"></a>00623                         movq mm5, mm1   <span class="comment">/* copy mm1 into mm5 */</span>
+<a name="l00624"></a>00624                         movq mm6, mm2   <span class="comment">/* copy mm2 into mm6 */</span>
+<a name="l00625"></a>00625                         psraw mm5, 15   <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l00626"></a>00626                         psraw mm6, 15   <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l00627"></a>00627                         pxor mm1, mm5   <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l00628"></a>00628                         pxor mm2, mm6   <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l00629"></a>00629                         psubsw mm1, mm5   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l00630"></a>00630                         psubsw mm2, mm6   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l00631"></a>00631                         packuswb mm1, mm2   <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l00632"></a>00632                         movq [edi], mm1   <span class="comment">/* store result in Dest */</span>
+<a name="l00633"></a>00633                         add eax, 8   <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l00634"></a>00634                         add ebx, 8   <span class="comment">/* register pointers by 8 */</span>
+<a name="l00635"></a>00635                         add edi, 8
+<a name="l00636"></a>00636                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l00637"></a>00637                         jnz L1014       <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00638"></a>00638                         emms <span class="comment">/* exit MMX state */</span>
+<a name="l00639"></a>00639                         popa
 <a name="l00640"></a>00640         }
-<a name="l00641"></a>00641 
-<a name="l00642"></a>00642         <span class="comment">/* C routine to process image */</span>
-<a name="l00643"></a>00643         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l00644"></a>00644                 result = abs((<span class="keywordtype">int</span>) *cursrc1 - (<span class="keywordtype">int</span>) *cursrc2);
-<a name="l00645"></a>00645                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l00646"></a>00646                 <span class="comment">/* Advance pointers */</span>
-<a name="l00647"></a>00647                 cursrc1++;
-<a name="l00648"></a>00648                 cursrc2++;
-<a name="l00649"></a>00649                 curdst++;
-<a name="l00650"></a>00650         }
-<a name="l00651"></a>00651 
-<a name="l00652"></a>00652         <span class="keywordflow">return</span> (0);
-<a name="l00653"></a>00653 }
-<a name="l00654"></a>00654 
-<a name="l00665"></a><a class="code" href="_s_d_l__image_filter_8c.html#ad565921b533977ad2059d58d3c4a3094">00665</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ad565921b533977ad2059d58d3c4a3094" title="Internal MMX Filter using Mult: D = saturation255(S1 * S2)">SDL_imageFilterMultMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">cha [...]
-<a name="l00666"></a>00666 {
-<a name="l00667"></a>00667 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00668"></a>00668 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00669"></a>00669 <span class="preprocessor"></span>        __asm
-<a name="l00670"></a>00670         {
-<a name="l00671"></a>00671                 pusha
-<a name="l00672"></a>00672                         mov eax, Src1   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00673"></a>00673                         mov ebx, Src2   <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00674"></a>00674                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
-<a name="l00675"></a>00675                         mov ecx, SrcLength   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00676"></a>00676                         shr ecx, 3   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00677"></a>00677                         pxor mm0, mm0   <span class="comment">/* zero mm0 register */</span>
-<a name="l00678"></a>00678                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00679"></a>00679 L1014:
-<a name="l00680"></a>00680                 movq mm1, [eax]   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00681"></a>00681                 movq mm3, [ebx]   <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
-<a name="l00682"></a>00682                 movq mm2, mm1   <span class="comment">/* copy mm1 into mm2 */</span>
-<a name="l00683"></a>00683                         movq mm4, mm3   <span class="comment">/* copy mm3 into mm4  */</span>
-<a name="l00684"></a>00684                         punpcklbw mm1, mm0   <span class="comment">/* unpack low  bytes of Src1 into words */</span>
-<a name="l00685"></a>00685                         punpckhbw mm2, mm0   <span class="comment">/* unpack high bytes of Src1 into words */</span>
-<a name="l00686"></a>00686                         punpcklbw mm3, mm0   <span class="comment">/* unpack low  bytes of Src2 into words */</span>
-<a name="l00687"></a>00687                         punpckhbw mm4, mm0   <span class="comment">/* unpack high bytes of Src2 into words */</span>
-<a name="l00688"></a>00688                         pmullw mm1, mm3   <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
-<a name="l00689"></a>00689                         pmullw mm2, mm4   <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
-<a name="l00690"></a>00690                         <span class="comment">/* Take abs value of the results (signed words) */</span>
-<a name="l00691"></a>00691                         movq mm5, mm1   <span class="comment">/* copy mm1 into mm5 */</span>
-<a name="l00692"></a>00692                         movq mm6, mm2   <span class="comment">/* copy mm2 into mm6 */</span>
-<a name="l00693"></a>00693                         psraw mm5, 15   <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l00694"></a>00694                         psraw mm6, 15   <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l00695"></a>00695                         pxor mm1, mm5   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l00696"></a>00696                         pxor mm2, mm6   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l00697"></a>00697                         psubsw mm1, mm5   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l00698"></a>00698                         psubsw mm2, mm6   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l00699"></a>00699                         packuswb mm1, mm2   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l00700"></a>00700                         movq [edi], mm1   <span class="comment">/* store result in Dest */</span>
-<a name="l00701"></a>00701                         add eax, 8   <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00702"></a>00702                         add ebx, 8   <span class="comment">/* register pointers by 8 */</span>
-<a name="l00703"></a>00703                         add edi, 8
-<a name="l00704"></a>00704                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l00705"></a>00705                         jnz L1014       <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00706"></a>00706                         emms <span class="comment">/* exit MMX state */</span>
-<a name="l00707"></a>00707                         popa
-<a name="l00708"></a>00708         }
-<a name="l00709"></a>00709 <span class="preprocessor">#else</span>
-<a name="l00710"></a>00710 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l00711"></a>00711                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov %2, %%eax \n\t"</span> <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00712"></a>00712                 <span class="stringliteral">"mov %1, %%ebx \n\t"</span>    <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00713"></a>00713                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l00714"></a>00714                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00715"></a>00715                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00716"></a>00716                 <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>   <span class="comment">/* zero mm0 register */</span>
-<a name="l00717"></a>00717                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00718"></a>00718                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>     <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00719"></a>00719                 <span class="stringliteral">"movq    (%%ebx), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
-<a name="l00720"></a>00720                 <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy mm1 into mm2 */</span>
-<a name="l00721"></a>00721                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy mm3 into mm4  */</span>
-<a name="l00722"></a>00722                 <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack low  bytes of Src1 into words */</span>
-<a name="l00723"></a>00723                 <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack high bytes of Src1 into words */</span>
-<a name="l00724"></a>00724                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of Src2 into words */</span>
-<a name="l00725"></a>00725                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of Src2 into words */</span>
-<a name="l00726"></a>00726                 <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
-<a name="l00727"></a>00727                 <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
-<a name="l00728"></a>00728                 <span class="comment">/* Take abs value of the results (signed words) */</span>
-<a name="l00729"></a>00729                 <span class="stringliteral">"movq      %%mm1, %%mm5 \n\t"</span>   <span class="comment">/* copy mm1 into mm5 */</span>
-<a name="l00730"></a>00730                 <span class="stringliteral">"movq      %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* copy mm2 into mm6 */</span>
-<a name="l00731"></a>00731                 <span class="stringliteral">"psraw       $15, %%mm5 \n\t"</span>   <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l00732"></a>00732                 <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l00733"></a>00733                 <span class="stringliteral">"pxor      %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l00734"></a>00734                 <span class="stringliteral">"pxor      %%mm6, %%mm2 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l00735"></a>00735                 <span class="stringliteral">"psubsw    %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l00736"></a>00736                 <span class="stringliteral">"psubsw    %%mm6, %%mm2 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l00737"></a>00737                 <span class="stringliteral">"packuswb  %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l00738"></a>00738                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l00739"></a>00739                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00740"></a>00740                 <span class="stringliteral">"add $8, %%ebx \n\t"</span>    <span class="comment">/* register pointers by 8 */</span>
-<a name="l00741"></a>00741                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l00742"></a>00742                 <span class="stringliteral">"jnz 1b        \n\t"</span>      <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00743"></a>00743                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l00744"></a>00744                 <span class="stringliteral">"popa \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest) <span class="comment">/* %0 */</span>
-<a name="l00745"></a>00745                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l00746"></a>00746                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l00747"></a>00747                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l00748"></a>00748                 );
-<a name="l00749"></a>00749 <span class="preprocessor">#endif</span>
-<a name="l00750"></a>00750 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l00751"></a>00751 <span class="preprocessor">#else</span>
-<a name="l00752"></a>00752 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l00753"></a>00753 <span class="preprocessor">#endif</span>
-<a name="l00754"></a>00754 <span class="preprocessor"></span>}
-<a name="l00755"></a>00755 
-<a name="l00766"></a><a class="code" href="_s_d_l__image_filter_8h.html#a4657c2a1e1bf55d3241dc737cd618409">00766</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#af4633031d40a9ea0956a2f3c6c87a384" title="Filter using Mult: D = saturation255(S1 * S2)">SDL_imageFilterMult</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2,  [...]
-<a name="l00767"></a>00767 {
-<a name="l00768"></a>00768         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l00769"></a>00769         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l00770"></a>00770         <span class="keywordtype">int</span> result;
-<a name="l00771"></a>00771 
-<a name="l00772"></a>00772         <span class="comment">/* Validate input parameters */</span>
-<a name="l00773"></a>00773         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l00774"></a>00774                 <span class="keywordflow">return</span>(-1);
-<a name="l00775"></a>00775         <span class="keywordflow">if</span> (length == 0)
-<a name="l00776"></a>00776                 <span class="keywordflow">return</span>(0);
-<a name="l00777"></a>00777 
-<a name="l00778"></a>00778         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l00779"></a>00779                 <span class="comment">/* MMX routine */</span>
-<a name="l00780"></a>00780                 <a class="code" href="_s_d_l__image_filter_8c.html#ad565921b533977ad2059d58d3c4a3094" title="Internal MMX Filter using Mult: D = saturation255(S1 * S2)">SDL_imageFilterMultMMX</a>(Src1, Src2, Dest, length);
-<a name="l00781"></a>00781 
-<a name="l00782"></a>00782                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l00783"></a>00783                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l00784"></a>00784                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l00785"></a>00785                         istart = length & 0xfffffff8;
-<a name="l00786"></a>00786                         cursrc1 = &Src1[istart];
-<a name="l00787"></a>00787                         cursrc2 = &Src2[istart];
-<a name="l00788"></a>00788                         curdst = &Dest[istart];
-<a name="l00789"></a>00789                 } <span class="keywordflow">else</span> {
-<a name="l00790"></a>00790                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l00791"></a>00791                         <span class="keywordflow">return</span> (0);
-<a name="l00792"></a>00792                 }
-<a name="l00793"></a>00793         } <span class="keywordflow">else</span> {
-<a name="l00794"></a>00794                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l00795"></a>00795                 istart = 0;
-<a name="l00796"></a>00796                 cursrc1 = Src1;
-<a name="l00797"></a>00797                 cursrc2 = Src2;
-<a name="l00798"></a>00798                 curdst = Dest;
-<a name="l00799"></a>00799         }
-<a name="l00800"></a>00800 
-<a name="l00801"></a>00801         <span class="comment">/* C routine to process image */</span>
-<a name="l00802"></a>00802         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l00803"></a>00803 
-<a name="l00804"></a>00804                 <span class="comment">/* NOTE: this is probably wrong - dunno what the MMX code does */</span>
-<a name="l00805"></a>00805 
-<a name="l00806"></a>00806                 result = (int) *cursrc1 * (<span class="keywordtype">int</span>) *cursrc2;
-<a name="l00807"></a>00807                 <span class="keywordflow">if</span> (result > 255)
-<a name="l00808"></a>00808                         result = 255;
-<a name="l00809"></a>00809                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l00810"></a>00810                 <span class="comment">/* Advance pointers */</span>
-<a name="l00811"></a>00811                 cursrc1++;
-<a name="l00812"></a>00812                 cursrc2++;
-<a name="l00813"></a>00813                 curdst++;
-<a name="l00814"></a>00814         }
-<a name="l00815"></a>00815 
-<a name="l00816"></a>00816         <span class="keywordflow">return</span> (0);
-<a name="l00817"></a>00817 }
-<a name="l00818"></a>00818 
-<a name="l00829"></a><a class="code" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a">00829</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a" title="Internal ASM Filter using MultNor: D = S1 * S2.">SDL_imageFilterMultNorASM</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> [...]
-<a name="l00830"></a>00830 {
-<a name="l00831"></a>00831 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00832"></a>00832 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00833"></a>00833 <span class="preprocessor"></span>        __asm
-<a name="l00834"></a>00834         {
-<a name="l00835"></a>00835                 pusha
-<a name="l00836"></a>00836                         mov edx, Src1   <span class="comment">/* load Src1 address into edx */</span>
-<a name="l00837"></a>00837                         mov esi, Src2   <span class="comment">/* load Src2 address into esi */</span>
-<a name="l00838"></a>00838                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
-<a name="l00839"></a>00839                         mov ecx, SrcLength   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00840"></a>00840                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00841"></a>00841 L10141:
-<a name="l00842"></a>00842                 mov al, [edx]   <span class="comment">/* load a byte from Src1 */</span>
-<a name="l00843"></a>00843                 mul [esi]       <span class="comment">/* mul with a byte from Src2 */</span>
-<a name="l00844"></a>00844                 mov [edi], al   <span class="comment">/* move a byte result to Dest */</span>
-<a name="l00845"></a>00845                         inc edx         <span class="comment">/* increment Src1, Src2, Dest */</span>
-<a name="l00846"></a>00846                         inc esi                 <span class="comment">/* pointer registers by one */</span>
-<a name="l00847"></a>00847                         inc edi
-<a name="l00848"></a>00848                         dec ecx <span class="comment">/* decrease loop counter */</span>
-<a name="l00849"></a>00849                         jnz L10141      <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00850"></a>00850                         popa
-<a name="l00851"></a>00851         }
-<a name="l00852"></a>00852 <span class="preprocessor">#else</span>
-<a name="l00853"></a>00853 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l00854"></a>00854                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov %2, %%edx \n\t"</span> <span class="comment">/* load Src1 address into edx */</span>
-<a name="l00855"></a>00855                 <span class="stringliteral">"mov %1, %%esi \n\t"</span>    <span class="comment">/* load Src2 address into esi */</span>
-<a name="l00856"></a>00856                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l00857"></a>00857                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00858"></a>00858                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00859"></a>00859                 <span class="stringliteral">"1:mov  (%%edx), %%al \n\t"</span>      <span class="comment">/* load a byte from Src1 */</span>
-<a name="l00860"></a>00860                 <span class="stringliteral">"mulb (%%esi)       \n\t"</span>       <span class="comment">/* mul with a byte from Src2 */</span>
-<a name="l00861"></a>00861                 <span class="stringliteral">"mov %%al, (%%edi)  \n\t"</span>       <span class="comment">/* move a byte result to Dest */</span>
-<a name="l00862"></a>00862                 <span class="stringliteral">"inc %%edx \n\t"</span>                <span class="comment">/* increment Src1, Src2, Dest */</span>
-<a name="l00863"></a>00863                 <span class="stringliteral">"inc %%esi \n\t"</span>                <span class="comment">/* pointer registers by one */</span>
-<a name="l00864"></a>00864                 <span class="stringliteral">"inc %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx      \n\t"</span>  <span class="comment">/* decrease loop counter */</span>
-<a name="l00865"></a>00865                 <span class="stringliteral">"jnz 1b         \n\t"</span>     <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00866"></a>00866                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l00867"></a>00867                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l00868"></a>00868                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l00869"></a>00869                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l00870"></a>00870                 );
-<a name="l00871"></a>00871 <span class="preprocessor">#endif</span>
-<a name="l00872"></a>00872 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l00873"></a>00873 <span class="preprocessor">#else</span>
-<a name="l00874"></a>00874 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l00875"></a>00875 <span class="preprocessor">#endif</span>
-<a name="l00876"></a>00876 <span class="preprocessor"></span>}
-<a name="l00877"></a>00877 
-<a name="l00888"></a><a class="code" href="_s_d_l__image_filter_8h.html#ac4f3446d0da18746b48606fe37c26385">00888</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5f3c9fd40426bb46eba5ac167505dcc5" title="Filter using MultNor: D = S1 * S2.">SDL_imageFilterMultNor</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span cl [...]
-<a name="l00889"></a>00889 {
-<a name="l00890"></a>00890         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l00891"></a>00891         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l00892"></a>00892         <span class="keywordtype">int</span> result;
-<a name="l00893"></a>00893 
-<a name="l00894"></a>00894         <span class="comment">/* Validate input parameters */</span>
-<a name="l00895"></a>00895         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l00896"></a>00896                 <span class="keywordflow">return</span>(-1);
-<a name="l00897"></a>00897         <span class="keywordflow">if</span> (length == 0)
-<a name="l00898"></a>00898                 <span class="keywordflow">return</span>(0);
-<a name="l00899"></a>00899 
-<a name="l00900"></a>00900         <span class="keywordflow">if</span> (<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) {
-<a name="l00901"></a>00901                 <span class="keywordflow">if</span> (length > 0) {
-<a name="l00902"></a>00902                         <span class="comment">/* ASM routine */</span>
-<a name="l00903"></a>00903                         <a class="code" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a" title="Internal ASM Filter using MultNor: D = S1 * S2.">SDL_imageFilterMultNorASM</a>(Src1, Src2, Dest, length);
-<a name="l00904"></a>00904 
-<a name="l00905"></a>00905                         <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l00906"></a>00906                         <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l00907"></a>00907                                 <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l00908"></a>00908                                 istart = length & 0xfffffff8;
-<a name="l00909"></a>00909                                 cursrc1 = &Src1[istart];
-<a name="l00910"></a>00910                                 cursrc2 = &Src2[istart];
-<a name="l00911"></a>00911                                 curdst = &Dest[istart];
-<a name="l00912"></a>00912                         } <span class="keywordflow">else</span> {
-<a name="l00913"></a>00913                                 <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l00914"></a>00914                                 <span class="keywordflow">return</span> (0);
-<a name="l00915"></a>00915                         }
-<a name="l00916"></a>00916                 } <span class="keywordflow">else</span> {
-<a name="l00917"></a>00917                         <span class="comment">/* No bytes - we are done */</span>
-<a name="l00918"></a>00918                         <span class="keywordflow">return</span> (0);
-<a name="l00919"></a>00919                 }
-<a name="l00920"></a>00920         } <span class="keywordflow">else</span> {
-<a name="l00921"></a>00921                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l00922"></a>00922                 istart = 0;
-<a name="l00923"></a>00923                 cursrc1 = Src1;
-<a name="l00924"></a>00924                 cursrc2 = Src2;
-<a name="l00925"></a>00925                 curdst = Dest;
-<a name="l00926"></a>00926         }
-<a name="l00927"></a>00927 
-<a name="l00928"></a>00928         <span class="comment">/* C routine to process image */</span>
-<a name="l00929"></a>00929         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l00930"></a>00930                 result = (int) *cursrc1 * (<span class="keywordtype">int</span>) *cursrc2;
-<a name="l00931"></a>00931                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l00932"></a>00932                 <span class="comment">/* Advance pointers */</span>
-<a name="l00933"></a>00933                 cursrc1++;
-<a name="l00934"></a>00934                 cursrc2++;
-<a name="l00935"></a>00935                 curdst++;
-<a name="l00936"></a>00936         }
-<a name="l00937"></a>00937 
-<a name="l00938"></a>00938         <span class="keywordflow">return</span> (0);
-<a name="l00939"></a>00939 }
-<a name="l00940"></a>00940 
-<a name="l00951"></a><a class="code" href="_s_d_l__image_filter_8c.html#a12272cd24ce7f09bc2c35c609e025983">00951</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a12272cd24ce7f09bc2c35c609e025983" title="Internal MMX Filter using MultDivby2: D = saturation255(S1/2 * S2)">SDL_imageFilterMultDivby2MMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="ke [...]
-<a name="l00952"></a>00952 {
-<a name="l00953"></a>00953 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l00954"></a>00954 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l00955"></a>00955 <span class="preprocessor"></span>        __asm
-<a name="l00956"></a>00956         { 
-<a name="l00957"></a>00957                 pusha
-<a name="l00958"></a>00958                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00959"></a>00959                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00960"></a>00960                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l00961"></a>00961                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00962"></a>00962                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00963"></a>00963                         pxor mm0,  mm0  <span class="comment">/* zero mm0 register */</span>
-<a name="l00964"></a>00964                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00965"></a>00965 L1015:
-<a name="l00966"></a>00966                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00967"></a>00967                 movq mm3,  [ebx]        <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
-<a name="l00968"></a>00968                 movq mm2,  mm1  <span class="comment">/* copy mm1 into mm2 */</span>
-<a name="l00969"></a>00969                         movq mm4,  mm3  <span class="comment">/* copy mm3 into mm4  */</span>
-<a name="l00970"></a>00970                         punpcklbw mm1,  mm0     <span class="comment">/* unpack low  bytes of Src1 into words */</span>
-<a name="l00971"></a>00971                         punpckhbw mm2,  mm0     <span class="comment">/* unpack high bytes of Src1 into words */</span>
-<a name="l00972"></a>00972                         punpcklbw mm3,  mm0     <span class="comment">/* unpack low  bytes of Src2 into words */</span>
-<a name="l00973"></a>00973                         punpckhbw mm4,  mm0     <span class="comment">/* unpack high bytes of Src2 into words */</span>
-<a name="l00974"></a>00974                         psrlw mm1,  1   <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
-<a name="l00975"></a>00975                         psrlw mm2,  1   <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
-<a name="l00976"></a>00976                         pmullw mm1,  mm3        <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
-<a name="l00977"></a>00977                         pmullw mm2,  mm4        <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
-<a name="l00978"></a>00978                         packuswb mm1,  mm2      <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l00979"></a>00979                         movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
-<a name="l00980"></a>00980                         add eax,  8     <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l00981"></a>00981                         add ebx,  8     <span class="comment">/* register pointers by 8 */</span>
-<a name="l00982"></a>00982                         add edi,  8
-<a name="l00983"></a>00983                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l00984"></a>00984                         jnz L1015               <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l00985"></a>00985                         emms                    <span class="comment">/* exit MMX state */</span>
-<a name="l00986"></a>00986                         popa
-<a name="l00987"></a>00987         }
-<a name="l00988"></a>00988 <span class="preprocessor">#else</span>
-<a name="l00989"></a>00989 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l00990"></a>00990                 (<span class="stringliteral">"pusha \n\t"</span> <span class="stringliteral">"mov %2, %%eax \n\t"</span>      <span class="comment">/* load Src1 address into eax */</span>
-<a name="l00991"></a>00991                 <span class="stringliteral">"mov %1, %%ebx \n\t"</span>    <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l00992"></a>00992                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l00993"></a>00993                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l00994"></a>00994                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l00995"></a>00995                 <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>   <span class="comment">/* zero mm0 register */</span>
-<a name="l00996"></a>00996                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l00997"></a>00997                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l00998"></a>00998                 <span class="stringliteral">"movq    (%%ebx), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
-<a name="l00999"></a>00999                 <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy mm1 into mm2 */</span>
-<a name="l01000"></a>01000                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy mm3 into mm4  */</span>
-<a name="l01001"></a>01001                 <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack low  bytes of Src1 into words */</span>
-<a name="l01002"></a>01002                 <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack high bytes of Src1 into words */</span>
-<a name="l01003"></a>01003                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of Src2 into words */</span>
-<a name="l01004"></a>01004                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of Src2 into words */</span>
-<a name="l01005"></a>01005                 <span class="stringliteral">"psrlw        $1, %%mm1 \n\t"</span>   <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
-<a name="l01006"></a>01006                 <span class="stringliteral">"psrlw        $1, %%mm2 \n\t"</span>   <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
-<a name="l01007"></a>01007                 <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
-<a name="l01008"></a>01008                 <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
-<a name="l01009"></a>01009                 <span class="stringliteral">"packuswb  %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l01010"></a>01010                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l01011"></a>01011                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01012"></a>01012                 <span class="stringliteral">"add $8, %%ebx \n\t"</span>    <span class="comment">/* register pointers by 8 */</span>
-<a name="l01013"></a>01013                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l01014"></a>01014                 <span class="stringliteral">"jnz 1b        \n\t"</span>    <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01015"></a>01015                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l01016"></a>01016                 <span class="stringliteral">"popa \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest) <span class="comment">/* %0 */</span>
-<a name="l01017"></a>01017                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l01018"></a>01018                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l01019"></a>01019                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l01020"></a>01020                 );
-<a name="l01021"></a>01021 <span class="preprocessor">#endif</span>
-<a name="l01022"></a>01022 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01023"></a>01023 <span class="preprocessor">#else</span>
-<a name="l01024"></a>01024 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01025"></a>01025 <span class="preprocessor">#endif</span>
-<a name="l01026"></a>01026 <span class="preprocessor"></span>}
-<a name="l01027"></a>01027 
-<a name="l01038"></a><a class="code" href="_s_d_l__image_filter_8h.html#aa19248767b1fd9ffdea4ba69b9f00175">01038</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a80737f6427c7bdb30d39a92f6524fc14" title="Filter using MultDivby2: D = saturation255(S1/2 * S2)">SDL_imageFilterMultDivby2</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char< [...]
-<a name="l01039"></a>01039 {
-<a name="l01040"></a>01040         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l01041"></a>01041         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l01042"></a>01042         <span class="keywordtype">int</span> result;
+<a name="l00641"></a>00641 <span class="preprocessor">#else</span>
+<a name="l00642"></a>00642 <span class="preprocessor"></span>        <span class="comment">/* i386 ASM with constraints: */</span>
+<a name="l00643"></a>00643         <span class="comment">/* asm volatile ( */</span>
+<a name="l00644"></a>00644         <span class="comment">/*      "shr $3, %%ecx \n\t"    /\* counter/8 (MMX loads 8 bytes at a time) *\/ */</span>
+<a name="l00645"></a>00645         <span class="comment">/*      "pxor      %%mm0, %%mm0 \n\t"   /\* zero mm0 register *\/ */</span>
+<a name="l00646"></a>00646         <span class="comment">/*      ".align 16       \n\t"  /\* 16 byte alignment of the loop entry *\/ */</span>
+<a name="l00647"></a>00647         <span class="comment">/*      "1: movq (%%eax), %%mm1 \n\t"     /\* load 8 bytes from Src1 into mm1 *\/ */</span>
+<a name="l00648"></a>00648         <span class="comment">/*      "movq    (%%ebx), %%mm3 \n\t"   /\* load 8 bytes from Src2 into mm3 *\/ */</span>
+<a name="l00649"></a>00649         <span class="comment">/*      "movq      %%mm1, %%mm2 \n\t"   /\* copy mm1 into mm2 *\/ */</span>
+<a name="l00650"></a>00650         <span class="comment">/*      "movq      %%mm3, %%mm4 \n\t"   /\* copy mm3 into mm4  *\/ */</span>
+<a name="l00651"></a>00651         <span class="comment">/*      "punpcklbw %%mm0, %%mm1 \n\t"   /\* unpack low  bytes of Src1 into words *\/ */</span>
+<a name="l00652"></a>00652         <span class="comment">/*      "punpckhbw %%mm0, %%mm2 \n\t"   /\* unpack high bytes of Src1 into words *\/ */</span>
+<a name="l00653"></a>00653         <span class="comment">/*      "punpcklbw %%mm0, %%mm3 \n\t"   /\* unpack low  bytes of Src2 into words *\/ */</span>
+<a name="l00654"></a>00654         <span class="comment">/*      "punpckhbw %%mm0, %%mm4 \n\t"   /\* unpack high bytes of Src2 into words *\/ */</span>
+<a name="l00655"></a>00655         <span class="comment">/*      "pmullw    %%mm3, %%mm1 \n\t"   /\* mul low  bytes of Src1 and Src2  *\/ */</span>
+<a name="l00656"></a>00656         <span class="comment">/*      "pmullw    %%mm4, %%mm2 \n\t"   /\* mul high bytes of Src1 and Src2 *\/ */</span>
+<a name="l00657"></a>00657         <span class="comment">/*      /\* Take abs value of the results (signed words) *\/ */</span>
+<a name="l00658"></a>00658         <span class="comment">/*      "movq      %%mm1, %%mm5 \n\t"   /\* copy mm1 into mm5 *\/ */</span>
+<a name="l00659"></a>00659         <span class="comment">/*      "movq      %%mm2, %%mm6 \n\t"   /\* copy mm2 into mm6 *\/ */</span>
+<a name="l00660"></a>00660         <span class="comment">/*      "psraw       $15, %%mm5 \n\t"   /\* fill mm5 words with word sign bit *\/ */</span>
+<a name="l00661"></a>00661         <span class="comment">/*      "psraw       $15, %%mm6 \n\t"   /\* fill mm6 words with word sign bit *\/ */</span>
+<a name="l00662"></a>00662         <span class="comment">/*      "pxor      %%mm5, %%mm1 \n\t"   /\* take 1's compliment of only neg. words *\/ */</span>
+<a name="l00663"></a>00663         <span class="comment">/*      "pxor      %%mm6, %%mm2 \n\t"   /\* take 1's compliment of only neg. words *\/ */</span>
+<a name="l00664"></a>00664         <span class="comment">/*      "psubsw    %%mm5, %%mm1 \n\t"   /\* add 1 to only neg. words, W-(-1) or W-0 *\/ */</span>
+<a name="l00665"></a>00665         <span class="comment">/*      "psubsw    %%mm6, %%mm2 \n\t"   /\* add 1 to only neg. words, W-(-1) or W-0 *\/ */</span>
+<a name="l00666"></a>00666         <span class="comment">/*      "packuswb  %%mm2, %%mm1 \n\t"   /\* pack words back into bytes with saturation *\/ */</span>
+<a name="l00667"></a>00667         <span class="comment">/*      "movq    %%mm1, (%%edi) \n\t"   /\* store result in Dest *\/ */</span>
+<a name="l00668"></a>00668         <span class="comment">/*      "add $8, %%eax \n\t"    /\* increase Src1, Src2 and Dest  *\/ */</span>
+<a name="l00669"></a>00669         <span class="comment">/*      "add $8, %%ebx \n\t"    /\* register pointers by 8 *\/ */</span>
+<a name="l00670"></a>00670         <span class="comment">/*      "add $8, %%edi \n\t" */</span>
+<a name="l00671"></a>00671         <span class="comment">/*      "dec %%ecx     \n\t"    /\* decrease loop counter *\/ */</span>
+<a name="l00672"></a>00672         <span class="comment">/*      "jnz 1b        \n\t"    /\* check loop termination, proceed if required *\/ */</span>
+<a name="l00673"></a>00673         <span class="comment">/*      "emms          \n\t"    /\* exit MMX state *\/ */</span>
+<a name="l00674"></a>00674         <span class="comment">/*      : "+a" (Src1),          /\* load Src1 address into rax, modified by the loop *\/ */</span>
+<a name="l00675"></a>00675         <span class="comment">/*        "+b" (Src2),          /\* load Src2 address into rbx, modified by the loop *\/ */</span>
+<a name="l00676"></a>00676         <span class="comment">/*        "+c" (SrcLength),     /\* load loop counter (SIZE) into rcx, modified by the loop *\/ */</span>
+<a name="l00677"></a>00677         <span class="comment">/*        "+D" (Dest)           /\* load Dest address into rdi, modified by the loop *\/ */</span>
+<a name="l00678"></a>00678         <span class="comment">/*      : */</span>
+<a name="l00679"></a>00679         <span class="comment">/*      : "memory",             /\* *Dest is modified *\/ */</span>
+<a name="l00680"></a>00680         <span class="comment">/*           "mm0","mm1","mm2","mm3","mm4","mm5","mm6"  /\* registers modified *\/ */</span>
+<a name="l00681"></a>00681         <span class="comment">/* ); */</span>
+<a name="l00682"></a>00682 
+<a name="l00683"></a>00683         <span class="comment">/* i386 and x86_64 */</span>
+<a name="l00684"></a>00684         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l00685"></a>00685         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l00686"></a>00686         __m64 *mDest = (__m64*)Dest;
+<a name="l00687"></a>00687         __m64 mm0 = _m_from_int(0); <span class="comment">/* zero mm0 register */</span>
+<a name="l00688"></a>00688         <span class="keywordtype">int</span> i;
+<a name="l00689"></a>00689         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l00690"></a>00690                 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
+<a name="l00691"></a>00691                 mm1 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l00692"></a>00692                 mm2 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l00693"></a>00693                 mm3 = _m_punpcklbw(*mSrc2, mm0);        <span class="comment">/* unpack low  bytes of Src2 into words */</span>
+<a name="l00694"></a>00694                 mm4 = _m_punpckhbw(*mSrc2, mm0);        <span class="comment">/* unpack high bytes of Src2 into words */</span>
+<a name="l00695"></a>00695                 mm1 = _m_pmullw(mm1, mm3);              <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
+<a name="l00696"></a>00696                 mm2 = _m_pmullw(mm2, mm4);              <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
+<a name="l00697"></a>00697                 mm5 = _m_psrawi(mm1, 15);               <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l00698"></a>00698                 mm6 = _m_psrawi(mm2, 15);               <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l00699"></a>00699                 mm1 = _m_pxor(mm1, mm5);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l00700"></a>00700                 mm2 = _m_pxor(mm2, mm6);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l00701"></a>00701                 mm1 = _m_psubsw(mm1, mm5);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l00702"></a>00702                 mm2 = _m_psubsw(mm2, mm6);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l00703"></a>00703                 *mDest = _m_packuswb(mm1, mm2);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l00704"></a>00704                 mSrc1++;
+<a name="l00705"></a>00705                 mSrc2++;
+<a name="l00706"></a>00706                 mDest++;
+<a name="l00707"></a>00707         }
+<a name="l00708"></a>00708         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l00709"></a>00709 <span class="preprocessor">#endif</span>
+<a name="l00710"></a>00710 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l00711"></a>00711 <span class="preprocessor">#else</span>
+<a name="l00712"></a>00712 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l00713"></a>00713 <span class="preprocessor">#endif</span>
+<a name="l00714"></a>00714 <span class="preprocessor"></span>}
+<a name="l00715"></a>00715 
+<a name="l00726"></a><a class="code" href="_s_d_l__image_filter_8h.html#a4657c2a1e1bf55d3241dc737cd618409">00726</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#af4633031d40a9ea0956a2f3c6c87a384" title="Filter using Mult: D = saturation255(S1 * S2)">SDL_imageFilterMult</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2,  [...]
+<a name="l00727"></a>00727 {
+<a name="l00728"></a>00728         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l00729"></a>00729         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l00730"></a>00730         <span class="keywordtype">int</span> result;
+<a name="l00731"></a>00731 
+<a name="l00732"></a>00732         <span class="comment">/* Validate input parameters */</span>
+<a name="l00733"></a>00733         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l00734"></a>00734                 <span class="keywordflow">return</span>(-1);
+<a name="l00735"></a>00735         <span class="keywordflow">if</span> (length == 0)
+<a name="l00736"></a>00736                 <span class="keywordflow">return</span>(0);
+<a name="l00737"></a>00737 
+<a name="l00738"></a>00738         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l00739"></a>00739                 <span class="comment">/* MMX routine */</span>
+<a name="l00740"></a>00740                 SDL_imageFilterMultMMX(Src1, Src2, Dest, length);
+<a name="l00741"></a>00741 
+<a name="l00742"></a>00742                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l00743"></a>00743                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l00744"></a>00744                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l00745"></a>00745                         istart = length & 0xfffffff8;
+<a name="l00746"></a>00746                         cursrc1 = &Src1[istart];
+<a name="l00747"></a>00747                         cursrc2 = &Src2[istart];
+<a name="l00748"></a>00748                         curdst = &Dest[istart];
+<a name="l00749"></a>00749                 } <span class="keywordflow">else</span> {
+<a name="l00750"></a>00750                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l00751"></a>00751                         <span class="keywordflow">return</span> (0);
+<a name="l00752"></a>00752                 }
+<a name="l00753"></a>00753         } <span class="keywordflow">else</span> {
+<a name="l00754"></a>00754                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l00755"></a>00755                 istart = 0;
+<a name="l00756"></a>00756                 cursrc1 = Src1;
+<a name="l00757"></a>00757                 cursrc2 = Src2;
+<a name="l00758"></a>00758                 curdst = Dest;
+<a name="l00759"></a>00759         }
+<a name="l00760"></a>00760 
+<a name="l00761"></a>00761         <span class="comment">/* C routine to process image */</span>
+<a name="l00762"></a>00762         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l00763"></a>00763 
+<a name="l00764"></a>00764                 <span class="comment">/* NOTE: this is probably wrong - dunno what the MMX code does */</span>
+<a name="l00765"></a>00765 
+<a name="l00766"></a>00766                 result = (int) *cursrc1 * (<span class="keywordtype">int</span>) *cursrc2;
+<a name="l00767"></a>00767                 <span class="keywordflow">if</span> (result > 255)
+<a name="l00768"></a>00768                         result = 255;
+<a name="l00769"></a>00769                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l00770"></a>00770                 <span class="comment">/* Advance pointers */</span>
+<a name="l00771"></a>00771                 cursrc1++;
+<a name="l00772"></a>00772                 cursrc2++;
+<a name="l00773"></a>00773                 curdst++;
+<a name="l00774"></a>00774         }
+<a name="l00775"></a>00775 
+<a name="l00776"></a>00776         <span class="keywordflow">return</span> (0);
+<a name="l00777"></a>00777 }
+<a name="l00778"></a>00778 
+<a name="l00789"></a><a class="code" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a">00789</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a" title="Internal ASM Filter using MultNor: D = S1 * S2.">SDL_imageFilterMultNorASM</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> [...]
+<a name="l00790"></a>00790 {
+<a name="l00791"></a>00791 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l00792"></a>00792 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l00793"></a>00793 <span class="preprocessor"></span>        __asm
+<a name="l00794"></a>00794         {
+<a name="l00795"></a>00795                 pusha
+<a name="l00796"></a>00796                         mov edx, Src1   <span class="comment">/* load Src1 address into edx */</span>
+<a name="l00797"></a>00797                         mov esi, Src2   <span class="comment">/* load Src2 address into esi */</span>
+<a name="l00798"></a>00798                         mov edi, Dest   <span class="comment">/* load Dest address into edi */</span>
+<a name="l00799"></a>00799                         mov ecx, SrcLength   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00800"></a>00800                         align 16        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00801"></a>00801 L10141:
+<a name="l00802"></a>00802                 mov al, [edx]   <span class="comment">/* load a byte from Src1 */</span>
+<a name="l00803"></a>00803                 mul [esi]       <span class="comment">/* mul with a byte from Src2 */</span>
+<a name="l00804"></a>00804                 mov [edi], al   <span class="comment">/* move a byte result to Dest */</span>
+<a name="l00805"></a>00805                         inc edx         <span class="comment">/* increment Src1, Src2, Dest */</span>
+<a name="l00806"></a>00806                         inc esi                 <span class="comment">/* pointer registers by one */</span>
+<a name="l00807"></a>00807                         inc edi
+<a name="l00808"></a>00808                         dec ecx <span class="comment">/* decrease loop counter */</span>
+<a name="l00809"></a>00809                         jnz L10141      <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00810"></a>00810                         popa
+<a name="l00811"></a>00811         }
+<a name="l00812"></a>00812 <span class="preprocessor">#else</span>
+<a name="l00813"></a>00813 <span class="preprocessor"></span>        <span class="comment">/* Note: ~5% gain on i386, less efficient than C on x86_64 */</span>
+<a name="l00814"></a>00814         <span class="comment">/* Also depends on whether this function is static (?!) */</span>
+<a name="l00815"></a>00815         <span class="keyword">asm</span> <span class="keyword">volatile</span> (
+<a name="l00816"></a>00816                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00817"></a>00817 <span class="preprocessor">#  if defined(i386)</span>
+<a name="l00818"></a>00818 <span class="preprocessor"></span>                <span class="stringliteral">"1:mov  (%%edx), %%al \n\t"</span>      <span class="comment">/* load a byte from Src1 */</span>
+<a name="l00819"></a>00819                 <span class="stringliteral">"mulb (%%esi)       \n\t"</span>       <span class="comment">/* mul with a byte from Src2 */</span>
+<a name="l00820"></a>00820                 <span class="stringliteral">"mov %%al, (%%edi)  \n\t"</span>       <span class="comment">/* move a byte result to Dest */</span>
+<a name="l00821"></a>00821                 <span class="stringliteral">"inc %%edx \n\t"</span>                <span class="comment">/* increment Src1, Src2, Dest */</span>
+<a name="l00822"></a>00822                 <span class="stringliteral">"inc %%esi \n\t"</span>                <span class="comment">/* pointer registers by one */</span>
+<a name="l00823"></a>00823                 <span class="stringliteral">"inc %%edi \n\t"</span>
+<a name="l00824"></a>00824                 <span class="stringliteral">"dec %%ecx      \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
+<a name="l00825"></a>00825 <span class="preprocessor">#  elif defined(__x86_64__)</span>
+<a name="l00826"></a>00826 <span class="preprocessor"></span>                <span class="stringliteral">"1:mov  (%%rdx), %%al \n\t"</span>      <span class="comment">/* load a byte from Src1 */</span>
+<a name="l00827"></a>00827                 <span class="stringliteral">"mulb (%%rsi)       \n\t"</span>       <span class="comment">/* mul with a byte from Src2 */</span>
+<a name="l00828"></a>00828                 <span class="stringliteral">"mov %%al, (%%rdi)  \n\t"</span>       <span class="comment">/* move a byte result to Dest */</span>
+<a name="l00829"></a>00829                 <span class="stringliteral">"inc %%rdx \n\t"</span>                <span class="comment">/* increment Src1, Src2, Dest */</span>
+<a name="l00830"></a>00830                 <span class="stringliteral">"inc %%rsi \n\t"</span>                <span class="comment">/* pointer registers by one */</span>
+<a name="l00831"></a>00831                 <span class="stringliteral">"inc %%rdi \n\t"</span>
+<a name="l00832"></a>00832                 <span class="stringliteral">"dec %%rcx      \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
+<a name="l00833"></a>00833 <span class="preprocessor">#  endif</span>
+<a name="l00834"></a>00834 <span class="preprocessor"></span>                <span class="stringliteral">"jnz 1b         \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00835"></a>00835                 : <span class="stringliteral">"+d"</span> (Src1),          <span class="comment">/* load Src1 address into edx */</span>
+<a name="l00836"></a>00836                   <span class="stringliteral">"+S"</span> (Src2),          <span class="comment">/* load Src2 address into esi */</span>
+<a name="l00837"></a>00837                   <span class="stringliteral">"+c"</span> (SrcLength),     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00838"></a>00838                   <span class="stringliteral">"+D"</span> (Dest)           <span class="comment">/* load Dest address into edi */</span>
+<a name="l00839"></a>00839                 :
+<a name="l00840"></a>00840                 : <span class="stringliteral">"memory"</span>, <span class="stringliteral">"rax"</span>
+<a name="l00841"></a>00841                 );
+<a name="l00842"></a>00842 <span class="preprocessor">#endif</span>
+<a name="l00843"></a>00843 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l00844"></a>00844 <span class="preprocessor">#else</span>
+<a name="l00845"></a>00845 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l00846"></a>00846 <span class="preprocessor">#endif</span>
+<a name="l00847"></a>00847 <span class="preprocessor"></span>}
+<a name="l00848"></a>00848 
+<a name="l00859"></a><a class="code" href="_s_d_l__image_filter_8h.html#ac4f3446d0da18746b48606fe37c26385">00859</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5f3c9fd40426bb46eba5ac167505dcc5" title="Filter using MultNor: D = S1 * S2.">SDL_imageFilterMultNor</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span cl [...]
+<a name="l00860"></a>00860 {
+<a name="l00861"></a>00861         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l00862"></a>00862         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l00863"></a>00863 
+<a name="l00864"></a>00864         <span class="comment">/* Validate input parameters */</span>
+<a name="l00865"></a>00865         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l00866"></a>00866                 <span class="keywordflow">return</span>(-1);
+<a name="l00867"></a>00867         <span class="keywordflow">if</span> (length == 0)
+<a name="l00868"></a>00868                 <span class="keywordflow">return</span>(0);
+<a name="l00869"></a>00869 
+<a name="l00870"></a>00870         <span class="keywordflow">if</span> (<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) {
+<a name="l00871"></a>00871                 <span class="keywordflow">if</span> (length > 0) {
+<a name="l00872"></a>00872                         <span class="comment">/* ASM routine */</span>
+<a name="l00873"></a>00873                         <a class="code" href="_s_d_l__image_filter_8c.html#a346db972dff9c56e3c45c904eaa3c39a" title="Internal ASM Filter using MultNor: D = S1 * S2.">SDL_imageFilterMultNorASM</a>(Src1, Src2, Dest, length);
+<a name="l00874"></a>00874 
+<a name="l00875"></a>00875                         <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l00876"></a>00876                         <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l00877"></a>00877                                 <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l00878"></a>00878                                 istart = length & 0xfffffff8;
+<a name="l00879"></a>00879                                 cursrc1 = &Src1[istart];
+<a name="l00880"></a>00880                                 cursrc2 = &Src2[istart];
+<a name="l00881"></a>00881                                 curdst = &Dest[istart];
+<a name="l00882"></a>00882                         } <span class="keywordflow">else</span> {
+<a name="l00883"></a>00883                                 <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l00884"></a>00884                                 <span class="keywordflow">return</span> (0);
+<a name="l00885"></a>00885                         }
+<a name="l00886"></a>00886                 } <span class="keywordflow">else</span> {
+<a name="l00887"></a>00887                         <span class="comment">/* No bytes - we are done */</span>
+<a name="l00888"></a>00888                         <span class="keywordflow">return</span> (0);
+<a name="l00889"></a>00889                 }
+<a name="l00890"></a>00890         } <span class="keywordflow">else</span> {
+<a name="l00891"></a>00891                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l00892"></a>00892                 istart = 0;
+<a name="l00893"></a>00893                 cursrc1 = Src1;
+<a name="l00894"></a>00894                 cursrc2 = Src2;
+<a name="l00895"></a>00895                 curdst = Dest;
+<a name="l00896"></a>00896         }
+<a name="l00897"></a>00897 
+<a name="l00898"></a>00898         <span class="comment">/* C routine to process image */</span>
+<a name="l00899"></a>00899         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l00900"></a>00900                 *curdst = (int)*cursrc1 * (<span class="keywordtype">int</span>)*cursrc2;  <span class="comment">// (int) for efficiency</span>
+<a name="l00901"></a>00901                 <span class="comment">/* Advance pointers */</span>
+<a name="l00902"></a>00902                 cursrc1++;
+<a name="l00903"></a>00903                 cursrc2++;
+<a name="l00904"></a>00904                 curdst++;
+<a name="l00905"></a>00905         }
+<a name="l00906"></a>00906 
+<a name="l00907"></a>00907         <span class="keywordflow">return</span> (0);
+<a name="l00908"></a>00908 }
+<a name="l00909"></a>00909 
+<a name="l00920"></a>00920 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterMultDivby2MMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l00921"></a>00921 {
+<a name="l00922"></a>00922 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l00923"></a>00923 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l00924"></a>00924 <span class="preprocessor"></span>        __asm
+<a name="l00925"></a>00925         { 
+<a name="l00926"></a>00926                 pusha
+<a name="l00927"></a>00927                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l00928"></a>00928                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l00929"></a>00929                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l00930"></a>00930                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l00931"></a>00931                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l00932"></a>00932                         pxor mm0,  mm0  <span class="comment">/* zero mm0 register */</span>
+<a name="l00933"></a>00933                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l00934"></a>00934 L1015:
+<a name="l00935"></a>00935                 movq mm1,  [eax]        <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l00936"></a>00936                 movq mm3,  [ebx]        <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
+<a name="l00937"></a>00937                 movq mm2,  mm1  <span class="comment">/* copy mm1 into mm2 */</span>
+<a name="l00938"></a>00938                         movq mm4,  mm3  <span class="comment">/* copy mm3 into mm4  */</span>
+<a name="l00939"></a>00939                         punpcklbw mm1,  mm0     <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l00940"></a>00940                         punpckhbw mm2,  mm0     <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l00941"></a>00941                         punpcklbw mm3,  mm0     <span class="comment">/* unpack low  bytes of Src2 into words */</span>
+<a name="l00942"></a>00942                         punpckhbw mm4,  mm0     <span class="comment">/* unpack high bytes of Src2 into words */</span>
+<a name="l00943"></a>00943                         psrlw mm1,  1   <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
+<a name="l00944"></a>00944                         psrlw mm2,  1   <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
+<a name="l00945"></a>00945                         pmullw mm1,  mm3        <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
+<a name="l00946"></a>00946                         pmullw mm2,  mm4        <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
+<a name="l00947"></a>00947                         packuswb mm1,  mm2      <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l00948"></a>00948                         movq [edi],  mm1        <span class="comment">/* store result in Dest */</span>
+<a name="l00949"></a>00949                         add eax,  8     <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l00950"></a>00950                         add ebx,  8     <span class="comment">/* register pointers by 8 */</span>
+<a name="l00951"></a>00951                         add edi,  8
+<a name="l00952"></a>00952                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l00953"></a>00953                         jnz L1015               <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l00954"></a>00954                         emms                    <span class="comment">/* exit MMX state */</span>
+<a name="l00955"></a>00955                         popa
+<a name="l00956"></a>00956         }
+<a name="l00957"></a>00957 <span class="preprocessor">#else</span>
+<a name="l00958"></a>00958 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l00959"></a>00959         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l00960"></a>00960         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l00961"></a>00961         __m64 *mDest = (__m64*)Dest;
+<a name="l00962"></a>00962         __m64 mm0 = _m_from_int(0); <span class="comment">/* zero mm0 register */</span>
+<a name="l00963"></a>00963         <span class="keywordtype">int</span> i;
+<a name="l00964"></a>00964         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l00965"></a>00965                 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
+<a name="l00966"></a>00966                 mm1 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l00967"></a>00967                 mm2 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l00968"></a>00968                 mm3 = _m_punpcklbw(*mSrc2, mm0);        <span class="comment">/* unpack low  bytes of Src2 into words */</span>
+<a name="l00969"></a>00969                 mm4 = _m_punpckhbw(*mSrc2, mm0);        <span class="comment">/* unpack high bytes of Src2 into words */</span>
+<a name="l00970"></a>00970                 mm1 = _m_psrlwi(mm1, 1);                <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
+<a name="l00971"></a>00971                 mm2 = _m_psrlwi(mm2, 1);                <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
+<a name="l00972"></a>00972                 mm1 = _m_pmullw(mm1, mm3);              <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
+<a name="l00973"></a>00973                 mm2 = _m_pmullw(mm2, mm4);              <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
+<a name="l00974"></a>00974                 *mDest = _m_packuswb(mm1, mm2);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l00975"></a>00975                 mSrc1++;
+<a name="l00976"></a>00976                 mSrc2++;
+<a name="l00977"></a>00977                 mDest++;
+<a name="l00978"></a>00978         }
+<a name="l00979"></a>00979         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l00980"></a>00980 <span class="preprocessor">#endif</span>
+<a name="l00981"></a>00981 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l00982"></a>00982 <span class="preprocessor">#else</span>
+<a name="l00983"></a>00983 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l00984"></a>00984 <span class="preprocessor">#endif</span>
+<a name="l00985"></a>00985 <span class="preprocessor"></span>}
+<a name="l00986"></a>00986 
+<a name="l00997"></a><a class="code" href="_s_d_l__image_filter_8h.html#aa19248767b1fd9ffdea4ba69b9f00175">00997</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a80737f6427c7bdb30d39a92f6524fc14" title="Filter using MultDivby2: D = saturation255(S1/2 * S2)">SDL_imageFilterMultDivby2</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char< [...]
+<a name="l00998"></a>00998 {
+<a name="l00999"></a>00999         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l01000"></a>01000         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l01001"></a>01001         <span class="keywordtype">int</span> result;
+<a name="l01002"></a>01002 
+<a name="l01003"></a>01003         <span class="comment">/* Validate input parameters */</span>
+<a name="l01004"></a>01004         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l01005"></a>01005                 <span class="keywordflow">return</span>(-1);
+<a name="l01006"></a>01006         <span class="keywordflow">if</span> (length == 0)
+<a name="l01007"></a>01007                 <span class="keywordflow">return</span>(0);
+<a name="l01008"></a>01008 
+<a name="l01009"></a>01009         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l01010"></a>01010                 <span class="comment">/* MMX routine */</span>
+<a name="l01011"></a>01011                 SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length);
+<a name="l01012"></a>01012 
+<a name="l01013"></a>01013                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l01014"></a>01014                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l01015"></a>01015                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l01016"></a>01016                         istart = length & 0xfffffff8;
+<a name="l01017"></a>01017                         cursrc1 = &Src1[istart];
+<a name="l01018"></a>01018                         cursrc2 = &Src2[istart];
+<a name="l01019"></a>01019                         curdst = &Dest[istart];
+<a name="l01020"></a>01020                 } <span class="keywordflow">else</span> {
+<a name="l01021"></a>01021                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l01022"></a>01022                         <span class="keywordflow">return</span> (0);
+<a name="l01023"></a>01023                 }
+<a name="l01024"></a>01024         } <span class="keywordflow">else</span> {
+<a name="l01025"></a>01025                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l01026"></a>01026                 istart = 0;
+<a name="l01027"></a>01027                 cursrc1 = Src1;
+<a name="l01028"></a>01028                 cursrc2 = Src2;
+<a name="l01029"></a>01029                 curdst = Dest;
+<a name="l01030"></a>01030         }
+<a name="l01031"></a>01031 
+<a name="l01032"></a>01032         <span class="comment">/* C routine to process image */</span>
+<a name="l01033"></a>01033         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l01034"></a>01034                 result = ((int) *cursrc1 / 2) * (int) *cursrc2;
+<a name="l01035"></a>01035                 <span class="keywordflow">if</span> (result > 255)
+<a name="l01036"></a>01036                         result = 255;
+<a name="l01037"></a>01037                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l01038"></a>01038                 <span class="comment">/* Advance pointers */</span>
+<a name="l01039"></a>01039                 cursrc1++;
+<a name="l01040"></a>01040                 cursrc2++;
+<a name="l01041"></a>01041                 curdst++;
+<a name="l01042"></a>01042         }
 <a name="l01043"></a>01043 
-<a name="l01044"></a>01044         <span class="comment">/* Validate input parameters */</span>
-<a name="l01045"></a>01045         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l01046"></a>01046                 <span class="keywordflow">return</span>(-1);
-<a name="l01047"></a>01047         <span class="keywordflow">if</span> (length == 0)
-<a name="l01048"></a>01048                 <span class="keywordflow">return</span>(0);
-<a name="l01049"></a>01049 
-<a name="l01050"></a>01050         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l01051"></a>01051                 <span class="comment">/* MMX routine */</span>
-<a name="l01052"></a>01052                 <a class="code" href="_s_d_l__image_filter_8c.html#a12272cd24ce7f09bc2c35c609e025983" title="Internal MMX Filter using MultDivby2: D = saturation255(S1/2 * S2)">SDL_imageFilterMultDivby2MMX</a>(Src1, Src2, Dest, length);
-<a name="l01053"></a>01053 
-<a name="l01054"></a>01054                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l01055"></a>01055                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l01056"></a>01056                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l01057"></a>01057                         istart = length & 0xfffffff8;
-<a name="l01058"></a>01058                         cursrc1 = &Src1[istart];
-<a name="l01059"></a>01059                         cursrc2 = &Src2[istart];
-<a name="l01060"></a>01060                         curdst = &Dest[istart];
-<a name="l01061"></a>01061                 } <span class="keywordflow">else</span> {
-<a name="l01062"></a>01062                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l01063"></a>01063                         <span class="keywordflow">return</span> (0);
-<a name="l01064"></a>01064                 }
-<a name="l01065"></a>01065         } <span class="keywordflow">else</span> {
-<a name="l01066"></a>01066                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l01067"></a>01067                 istart = 0;
-<a name="l01068"></a>01068                 cursrc1 = Src1;
-<a name="l01069"></a>01069                 cursrc2 = Src2;
-<a name="l01070"></a>01070                 curdst = Dest;
-<a name="l01071"></a>01071         }
-<a name="l01072"></a>01072 
-<a name="l01073"></a>01073         <span class="comment">/* C routine to process image */</span>
-<a name="l01074"></a>01074         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l01075"></a>01075                 result = ((int) *cursrc1 / 2) * (int) *cursrc2;
-<a name="l01076"></a>01076                 <span class="keywordflow">if</span> (result > 255)
-<a name="l01077"></a>01077                         result = 255;
-<a name="l01078"></a>01078                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l01079"></a>01079                 <span class="comment">/* Advance pointers */</span>
-<a name="l01080"></a>01080                 cursrc1++;
-<a name="l01081"></a>01081                 cursrc2++;
-<a name="l01082"></a>01082                 curdst++;
-<a name="l01083"></a>01083         }
-<a name="l01084"></a>01084 
-<a name="l01085"></a>01085         <span class="keywordflow">return</span> (0);
-<a name="l01086"></a>01086 }
-<a name="l01087"></a>01087 
-<a name="l01098"></a><a class="code" href="_s_d_l__image_filter_8c.html#a1f8bf77328e934701c7a9e4ef51d9b41">01098</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a1f8bf77328e934701c7a9e4ef51d9b41" title="Internal MMX Filter using MultDivby4: D = saturation255(S1/2 * S2/2)">SDL_imageFilterMultDivby4MMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class=" [...]
-<a name="l01099"></a>01099 {
-<a name="l01100"></a>01100 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l01101"></a>01101 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l01102"></a>01102 <span class="preprocessor"></span>        __asm
-<a name="l01103"></a>01103         {
-<a name="l01104"></a>01104                 pusha
-<a name="l01105"></a>01105                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01106"></a>01106                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l01107"></a>01107                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l01108"></a>01108                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01109"></a>01109                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01110"></a>01110                         pxor mm0, mm0           <span class="comment">/* zero mm0 register */</span>
-<a name="l01111"></a>01111                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01112"></a>01112 L1016:
-<a name="l01113"></a>01113                 movq mm1, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01114"></a>01114                 movq mm3, [ebx]         <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
-<a name="l01115"></a>01115                 movq mm2, mm1           <span class="comment">/* copy mm1 into mm2 */</span>
-<a name="l01116"></a>01116                         movq mm4, mm3           <span class="comment">/* copy mm3 into mm4  */</span>
-<a name="l01117"></a>01117                         punpcklbw mm1, mm0      <span class="comment">/* unpack low  bytes of Src1 into words */</span>
-<a name="l01118"></a>01118                         punpckhbw mm2, mm0      <span class="comment">/* unpack high bytes of Src1 into words */</span>
-<a name="l01119"></a>01119                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of Src2 into words */</span>
-<a name="l01120"></a>01120                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of Src2 into words */</span>
-<a name="l01121"></a>01121                         psrlw mm1, 1    <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
-<a name="l01122"></a>01122                         psrlw mm2, 1    <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
-<a name="l01123"></a>01123                         psrlw mm3, 1    <span class="comment">/* divide mm3 words by 2, Src2 low bytes */</span>
-<a name="l01124"></a>01124                         psrlw mm4, 1    <span class="comment">/* divide mm4 words by 2, Src2 high bytes */</span>
-<a name="l01125"></a>01125                         pmullw mm1, mm3         <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
-<a name="l01126"></a>01126                         pmullw mm2, mm4         <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
-<a name="l01127"></a>01127                         packuswb mm1, mm2       <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l01128"></a>01128                         movq [edi], mm1         <span class="comment">/* store result in Dest */</span>
-<a name="l01129"></a>01129                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01130"></a>01130                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
-<a name="l01131"></a>01131                         add edi,  8
-<a name="l01132"></a>01132                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l01133"></a>01133                         jnz L1016               <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01134"></a>01134                         emms                    <span class="comment">/* exit MMX state */</span>
-<a name="l01135"></a>01135                         popa
-<a name="l01136"></a>01136         }
-<a name="l01137"></a>01137 <span class="preprocessor">#else</span>
-<a name="l01138"></a>01138 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l01139"></a>01139                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov %2, %%eax \n\t"</span> <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01140"></a>01140                 <span class="stringliteral">"mov %1, %%ebx \n\t"</span>    <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l01141"></a>01141                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l01142"></a>01142                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01143"></a>01143                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01144"></a>01144                 <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>   <span class="comment">/* zero mm0 register */</span>
-<a name="l01145"></a>01145                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01146"></a>01146                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01147"></a>01147                 <span class="stringliteral">"movq    (%%ebx), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
-<a name="l01148"></a>01148                 <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy mm1 into mm2 */</span>
-<a name="l01149"></a>01149                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy mm3 into mm4  */</span>
-<a name="l01150"></a>01150                 <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack low  bytes of Src1 into words */</span>
-<a name="l01151"></a>01151                 <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack high bytes of Src1 into words */</span>
-<a name="l01152"></a>01152                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of Src2 into words */</span>
-<a name="l01153"></a>01153                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of Src2 into words */</span>
-<a name="l01154"></a>01154                 <span class="stringliteral">"psrlw        $1, %%mm1 \n\t"</span>   <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
-<a name="l01155"></a>01155                 <span class="stringliteral">"psrlw        $1, %%mm2 \n\t"</span>   <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
-<a name="l01156"></a>01156                 <span class="stringliteral">"psrlw        $1, %%mm3 \n\t"</span>   <span class="comment">/* divide mm3 words by 2, Src2 low bytes */</span>
-<a name="l01157"></a>01157                 <span class="stringliteral">"psrlw        $1, %%mm4 \n\t"</span>   <span class="comment">/* divide mm4 words by 2, Src2 high bytes */</span>
-<a name="l01158"></a>01158                 <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
-<a name="l01159"></a>01159                 <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
-<a name="l01160"></a>01160                 <span class="stringliteral">"packuswb  %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l01161"></a>01161                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l01162"></a>01162                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01163"></a>01163                 <span class="stringliteral">"add $8, %%ebx \n\t"</span>    <span class="comment">/* register pointers by 8 */</span>
-<a name="l01164"></a>01164                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l01165"></a>01165                 <span class="stringliteral">"jnz 1b        \n\t"</span>    <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01166"></a>01166                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l01167"></a>01167                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l01168"></a>01168                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l01169"></a>01169                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l01170"></a>01170                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l01171"></a>01171                 );
-<a name="l01172"></a>01172 <span class="preprocessor">#endif</span>
-<a name="l01173"></a>01173 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01174"></a>01174 <span class="preprocessor">#else</span>
-<a name="l01175"></a>01175 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01176"></a>01176 <span class="preprocessor">#endif</span>
-<a name="l01177"></a>01177 <span class="preprocessor"></span>}
-<a name="l01178"></a>01178 
-<a name="l01189"></a><a class="code" href="_s_d_l__image_filter_8h.html#aa92bea3946c8081c9656304a7d944fae">01189</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a30e685653eb1050c7d48feaeb8f801a1" title="Filter using MultDivby4: D = saturation255(S1/2 * S2/2)">SDL_imageFilterMultDivby4</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">cha [...]
-<a name="l01190"></a>01190 {
-<a name="l01191"></a>01191         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l01192"></a>01192         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l01193"></a>01193         <span class="keywordtype">int</span> result;
-<a name="l01194"></a>01194 
-<a name="l01195"></a>01195         <span class="comment">/* Validate input parameters */</span>
-<a name="l01196"></a>01196         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l01197"></a>01197                 <span class="keywordflow">return</span>(-1);
-<a name="l01198"></a>01198         <span class="keywordflow">if</span> (length == 0)
-<a name="l01199"></a>01199                 <span class="keywordflow">return</span>(0);
-<a name="l01200"></a>01200 
-<a name="l01201"></a>01201         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l01202"></a>01202                 <span class="comment">/* MMX routine */</span>
-<a name="l01203"></a>01203                 <a class="code" href="_s_d_l__image_filter_8c.html#a1f8bf77328e934701c7a9e4ef51d9b41" title="Internal MMX Filter using MultDivby4: D = saturation255(S1/2 * S2/2)">SDL_imageFilterMultDivby4MMX</a>(Src1, Src2, Dest, length);
-<a name="l01204"></a>01204 
-<a name="l01205"></a>01205                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l01206"></a>01206                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l01207"></a>01207                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l01208"></a>01208                         istart = length & 0xfffffff8;
-<a name="l01209"></a>01209                         cursrc1 = &Src1[istart];
-<a name="l01210"></a>01210                         cursrc2 = &Src2[istart];
-<a name="l01211"></a>01211                         curdst = &Dest[istart];
-<a name="l01212"></a>01212                 } <span class="keywordflow">else</span> {
-<a name="l01213"></a>01213                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l01214"></a>01214                         <span class="keywordflow">return</span> (0);
-<a name="l01215"></a>01215                 }
-<a name="l01216"></a>01216         } <span class="keywordflow">else</span> {
-<a name="l01217"></a>01217                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l01218"></a>01218                 istart = 0;
-<a name="l01219"></a>01219                 cursrc1 = Src1;
-<a name="l01220"></a>01220                 cursrc2 = Src2;
-<a name="l01221"></a>01221                 curdst = Dest;
+<a name="l01044"></a>01044         <span class="keywordflow">return</span> (0);
+<a name="l01045"></a>01045 }
+<a name="l01046"></a>01046 
+<a name="l01057"></a>01057 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterMultDivby4MMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l01058"></a>01058 {
+<a name="l01059"></a>01059 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01060"></a>01060 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01061"></a>01061 <span class="preprocessor"></span>        __asm
+<a name="l01062"></a>01062         {
+<a name="l01063"></a>01063                 pusha
+<a name="l01064"></a>01064                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l01065"></a>01065                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l01066"></a>01066                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01067"></a>01067                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01068"></a>01068                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l01069"></a>01069                         pxor mm0, mm0           <span class="comment">/* zero mm0 register */</span>
+<a name="l01070"></a>01070                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01071"></a>01071 L1016:
+<a name="l01072"></a>01072                 movq mm1, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l01073"></a>01073                 movq mm3, [ebx]         <span class="comment">/* load 8 bytes from Src2 into mm3 */</span>
+<a name="l01074"></a>01074                 movq mm2, mm1           <span class="comment">/* copy mm1 into mm2 */</span>
+<a name="l01075"></a>01075                         movq mm4, mm3           <span class="comment">/* copy mm3 into mm4  */</span>
+<a name="l01076"></a>01076                         punpcklbw mm1, mm0      <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l01077"></a>01077                         punpckhbw mm2, mm0      <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l01078"></a>01078                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of Src2 into words */</span>
+<a name="l01079"></a>01079                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of Src2 into words */</span>
+<a name="l01080"></a>01080                         psrlw mm1, 1    <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
+<a name="l01081"></a>01081                         psrlw mm2, 1    <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
+<a name="l01082"></a>01082                         psrlw mm3, 1    <span class="comment">/* divide mm3 words by 2, Src2 low bytes */</span>
+<a name="l01083"></a>01083                         psrlw mm4, 1    <span class="comment">/* divide mm4 words by 2, Src2 high bytes */</span>
+<a name="l01084"></a>01084                         pmullw mm1, mm3         <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
+<a name="l01085"></a>01085                         pmullw mm2, mm4         <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
+<a name="l01086"></a>01086                         packuswb mm1, mm2       <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l01087"></a>01087                         movq [edi], mm1         <span class="comment">/* store result in Dest */</span>
+<a name="l01088"></a>01088                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l01089"></a>01089                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
+<a name="l01090"></a>01090                         add edi,  8
+<a name="l01091"></a>01091                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l01092"></a>01092                         jnz L1016               <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01093"></a>01093                         emms                    <span class="comment">/* exit MMX state */</span>
+<a name="l01094"></a>01094                         popa
+<a name="l01095"></a>01095         }
+<a name="l01096"></a>01096 <span class="preprocessor">#else</span>
+<a name="l01097"></a>01097 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l01098"></a>01098         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l01099"></a>01099         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l01100"></a>01100         __m64 *mDest = (__m64*)Dest;
+<a name="l01101"></a>01101         __m64 mm0 = _m_from_int(0); <span class="comment">/* zero mm0 register */</span>
+<a name="l01102"></a>01102         <span class="keywordtype">int</span> i;
+<a name="l01103"></a>01103         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l01104"></a>01104                 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
+<a name="l01105"></a>01105                 mm1 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l01106"></a>01106                 mm2 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l01107"></a>01107                 mm3 = _m_punpcklbw(*mSrc2, mm0);        <span class="comment">/* unpack low  bytes of Src2 into words */</span>
+<a name="l01108"></a>01108                 mm4 = _m_punpckhbw(*mSrc2, mm0);        <span class="comment">/* unpack high bytes of Src2 into words */</span>
+<a name="l01109"></a>01109                 mm1 = _m_psrlwi(mm1, 1);                <span class="comment">/* divide mm1 words by 2, Src1 low bytes */</span>
+<a name="l01110"></a>01110                 mm2 = _m_psrlwi(mm2, 1);                <span class="comment">/* divide mm2 words by 2, Src1 high bytes */</span>
+<a name="l01111"></a>01111                 mm3 = _m_psrlwi(mm3, 1);                <span class="comment">/* divide mm3 words by 2, Src2 low bytes */</span>
+<a name="l01112"></a>01112                 mm4 = _m_psrlwi(mm4, 1);                <span class="comment">/* divide mm4 words by 2, Src2 high bytes */</span>
+<a name="l01113"></a>01113                 mm1 = _m_pmullw(mm1, mm3);              <span class="comment">/* mul low  bytes of Src1 and Src2  */</span>
+<a name="l01114"></a>01114                 mm2 = _m_pmullw(mm2, mm4);              <span class="comment">/* mul high bytes of Src1 and Src2 */</span>
+<a name="l01115"></a>01115                 *mDest = _m_packuswb(mm1, mm2);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l01116"></a>01116                 mSrc1++;
+<a name="l01117"></a>01117                 mSrc2++;
+<a name="l01118"></a>01118                 mDest++;
+<a name="l01119"></a>01119         }
+<a name="l01120"></a>01120         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l01121"></a>01121 <span class="preprocessor">#endif</span>
+<a name="l01122"></a>01122 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l01123"></a>01123 <span class="preprocessor">#else</span>
+<a name="l01124"></a>01124 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l01125"></a>01125 <span class="preprocessor">#endif</span>
+<a name="l01126"></a>01126 <span class="preprocessor"></span>}
+<a name="l01127"></a>01127 
+<a name="l01138"></a><a class="code" href="_s_d_l__image_filter_8h.html#aa92bea3946c8081c9656304a7d944fae">01138</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a30e685653eb1050c7d48feaeb8f801a1" title="Filter using MultDivby4: D = saturation255(S1/2 * S2/2)">SDL_imageFilterMultDivby4</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">cha [...]
+<a name="l01139"></a>01139 {
+<a name="l01140"></a>01140         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l01141"></a>01141         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l01142"></a>01142         <span class="keywordtype">int</span> result;
+<a name="l01143"></a>01143 
+<a name="l01144"></a>01144         <span class="comment">/* Validate input parameters */</span>
+<a name="l01145"></a>01145         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l01146"></a>01146                 <span class="keywordflow">return</span>(-1);
+<a name="l01147"></a>01147         <span class="keywordflow">if</span> (length == 0)
+<a name="l01148"></a>01148                 <span class="keywordflow">return</span>(0);
+<a name="l01149"></a>01149 
+<a name="l01150"></a>01150         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l01151"></a>01151                 <span class="comment">/* MMX routine */</span>
+<a name="l01152"></a>01152                 SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length);
+<a name="l01153"></a>01153 
+<a name="l01154"></a>01154                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l01155"></a>01155                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l01156"></a>01156                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l01157"></a>01157                         istart = length & 0xfffffff8;
+<a name="l01158"></a>01158                         cursrc1 = &Src1[istart];
+<a name="l01159"></a>01159                         cursrc2 = &Src2[istart];
+<a name="l01160"></a>01160                         curdst = &Dest[istart];
+<a name="l01161"></a>01161                 } <span class="keywordflow">else</span> {
+<a name="l01162"></a>01162                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l01163"></a>01163                         <span class="keywordflow">return</span> (0);
+<a name="l01164"></a>01164                 }
+<a name="l01165"></a>01165         } <span class="keywordflow">else</span> {
+<a name="l01166"></a>01166                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l01167"></a>01167                 istart = 0;
+<a name="l01168"></a>01168                 cursrc1 = Src1;
+<a name="l01169"></a>01169                 cursrc2 = Src2;
+<a name="l01170"></a>01170                 curdst = Dest;
+<a name="l01171"></a>01171         }
+<a name="l01172"></a>01172 
+<a name="l01173"></a>01173         <span class="comment">/* C routine to process image */</span>
+<a name="l01174"></a>01174         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l01175"></a>01175                 result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2);
+<a name="l01176"></a>01176                 <span class="keywordflow">if</span> (result > 255)
+<a name="l01177"></a>01177                         result = 255;
+<a name="l01178"></a>01178                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l01179"></a>01179                 <span class="comment">/* Advance pointers */</span>
+<a name="l01180"></a>01180                 cursrc1++;
+<a name="l01181"></a>01181                 cursrc2++;
+<a name="l01182"></a>01182                 curdst++;
+<a name="l01183"></a>01183         }
+<a name="l01184"></a>01184 
+<a name="l01185"></a>01185         <span class="keywordflow">return</span> (0);
+<a name="l01186"></a>01186 }
+<a name="l01187"></a>01187 
+<a name="l01198"></a>01198 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterBitAndMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l01199"></a>01199 {
+<a name="l01200"></a>01200 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01201"></a>01201 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01202"></a>01202 <span class="preprocessor"></span>        __asm
+<a name="l01203"></a>01203         {
+<a name="l01204"></a>01204                 pusha
+<a name="l01205"></a>01205                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l01206"></a>01206                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l01207"></a>01207                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01208"></a>01208                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01209"></a>01209                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l01210"></a>01210                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01211"></a>01211 L1017:
+<a name="l01212"></a>01212                 movq mm1, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l01213"></a>01213                 pand mm1, [ebx]         <span class="comment">/* mm1=Src1&Src2 */</span>
+<a name="l01214"></a>01214                 movq [edi], mm1         <span class="comment">/* store result in Dest */</span>
+<a name="l01215"></a>01215                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l01216"></a>01216                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
+<a name="l01217"></a>01217                         add edi, 8
+<a name="l01218"></a>01218                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l01219"></a>01219                         jnz L1017               <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01220"></a>01220                         emms                    <span class="comment">/* exit MMX state */</span>
+<a name="l01221"></a>01221                         popa
 <a name="l01222"></a>01222         }
-<a name="l01223"></a>01223 
-<a name="l01224"></a>01224         <span class="comment">/* C routine to process image */</span>
-<a name="l01225"></a>01225         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l01226"></a>01226                 result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2);
-<a name="l01227"></a>01227                 <span class="keywordflow">if</span> (result > 255)
-<a name="l01228"></a>01228                         result = 255;
-<a name="l01229"></a>01229                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l01230"></a>01230                 <span class="comment">/* Advance pointers */</span>
-<a name="l01231"></a>01231                 cursrc1++;
-<a name="l01232"></a>01232                 cursrc2++;
-<a name="l01233"></a>01233                 curdst++;
-<a name="l01234"></a>01234         }
-<a name="l01235"></a>01235 
-<a name="l01236"></a>01236         <span class="keywordflow">return</span> (0);
-<a name="l01237"></a>01237 }
-<a name="l01238"></a>01238 
-<a name="l01249"></a><a class="code" href="_s_d_l__image_filter_8c.html#a8a86c969daeb874fb643347592003484">01249</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a8a86c969daeb874fb643347592003484" title="Internal MMX Filter using BitAnd: D = S1 & S2.">SDL_imageFilterBitAndMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</spa [...]
-<a name="l01250"></a>01250 {
-<a name="l01251"></a>01251 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l01252"></a>01252 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l01253"></a>01253 <span class="preprocessor"></span>        __asm
-<a name="l01254"></a>01254         {
-<a name="l01255"></a>01255                 pusha
-<a name="l01256"></a>01256                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01257"></a>01257                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l01258"></a>01258                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l01259"></a>01259                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01260"></a>01260                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01261"></a>01261                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01262"></a>01262 L1017:
-<a name="l01263"></a>01263                 movq mm1, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01264"></a>01264                 pand mm1, [ebx]         <span class="comment">/* mm1=Src1&Src2 */</span>
-<a name="l01265"></a>01265                 movq [edi], mm1         <span class="comment">/* store result in Dest */</span>
-<a name="l01266"></a>01266                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01267"></a>01267                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
-<a name="l01268"></a>01268                         add edi, 8
-<a name="l01269"></a>01269                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l01270"></a>01270                         jnz L1017               <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01271"></a>01271                         emms                    <span class="comment">/* exit MMX state */</span>
-<a name="l01272"></a>01272                         popa
-<a name="l01273"></a>01273         }
-<a name="l01274"></a>01274 <span class="preprocessor">#else</span>
-<a name="l01275"></a>01275 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l01276"></a>01276                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov %2, %%eax \n\t"</span> <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01277"></a>01277                 <span class="stringliteral">"mov %1, %%ebx \n\t"</span>    <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l01278"></a>01278                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l01279"></a>01279                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01280"></a>01280                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01281"></a>01281                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01282"></a>01282                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01283"></a>01283                 <span class="stringliteral">"pand    (%%ebx), %%mm1 \n\t"</span>   <span class="comment">/* mm1=Src1&Src2 */</span>
-<a name="l01284"></a>01284                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l01285"></a>01285                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01286"></a>01286                 <span class="stringliteral">"add $8, %%ebx \n\t"</span>    <span class="comment">/* register pointers by 8 */</span>
-<a name="l01287"></a>01287                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l01288"></a>01288                 <span class="stringliteral">"jnz 1b        \n\t"</span>    <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01289"></a>01289                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l01290"></a>01290                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l01291"></a>01291                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l01292"></a>01292                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l01293"></a>01293                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l01294"></a>01294                 );
-<a name="l01295"></a>01295 <span class="preprocessor">#endif</span>
-<a name="l01296"></a>01296 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01297"></a>01297 <span class="preprocessor">#else</span>
-<a name="l01298"></a>01298 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01299"></a>01299 <span class="preprocessor">#endif</span>
-<a name="l01300"></a>01300 <span class="preprocessor"></span>}
-<a name="l01301"></a>01301 
-<a name="l01312"></a><a class="code" href="_s_d_l__image_filter_8h.html#a5f67460c0b89dadd49d04832608a345b">01312</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a85837ce1b5de1f907b6b9053922b5cbc" title="Filter using BitAnd: D = S1 & S2.">SDL_imageFilterBitAnd</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span  [...]
-<a name="l01313"></a>01313 {
-<a name="l01314"></a>01314         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l01315"></a>01315         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l01316"></a>01316 
-<a name="l01317"></a>01317         <span class="comment">/* Validate input parameters */</span>
-<a name="l01318"></a>01318         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l01319"></a>01319                 <span class="keywordflow">return</span>(-1);
-<a name="l01320"></a>01320         <span class="keywordflow">if</span> (length == 0)
-<a name="l01321"></a>01321                 <span class="keywordflow">return</span>(0);
-<a name="l01322"></a>01322 
-<a name="l01323"></a>01323         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()>0) && (length>7)) {
-<a name="l01324"></a>01324                 <span class="comment">/*  if (length > 7) { */</span>
-<a name="l01325"></a>01325                 <span class="comment">/* Call MMX routine */</span>
-<a name="l01326"></a>01326 
-<a name="l01327"></a>01327                 <a class="code" href="_s_d_l__image_filter_8c.html#a8a86c969daeb874fb643347592003484" title="Internal MMX Filter using BitAnd: D = S1 & S2.">SDL_imageFilterBitAndMMX</a>(Src1, Src2, Dest, length);
-<a name="l01328"></a>01328 
-<a name="l01329"></a>01329                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l01330"></a>01330                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l01331"></a>01331 
-<a name="l01332"></a>01332                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l01333"></a>01333                         istart = length & 0xfffffff8;
-<a name="l01334"></a>01334                         cursrc1 = &Src1[istart];
-<a name="l01335"></a>01335                         cursrc2 = &Src2[istart];
-<a name="l01336"></a>01336                         curdst = &Dest[istart];
-<a name="l01337"></a>01337                 } <span class="keywordflow">else</span> {
-<a name="l01338"></a>01338                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l01339"></a>01339                         <span class="keywordflow">return</span> (0);
-<a name="l01340"></a>01340                 }
-<a name="l01341"></a>01341         } <span class="keywordflow">else</span> {
-<a name="l01342"></a>01342                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l01343"></a>01343                 istart = 0;
-<a name="l01344"></a>01344                 cursrc1 = Src1;
-<a name="l01345"></a>01345                 cursrc2 = Src2;
-<a name="l01346"></a>01346                 curdst = Dest;
-<a name="l01347"></a>01347         }
-<a name="l01348"></a>01348 
-<a name="l01349"></a>01349         <span class="comment">/* C routine to process image */</span>
-<a name="l01350"></a>01350         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l01351"></a>01351                 *curdst = (*cursrc1) & (*cursrc2);
-<a name="l01352"></a>01352                 <span class="comment">/* Advance pointers */</span>
-<a name="l01353"></a>01353                 cursrc1++;
-<a name="l01354"></a>01354                 cursrc2++;
-<a name="l01355"></a>01355                 curdst++;
-<a name="l01356"></a>01356         }
-<a name="l01357"></a>01357 
-<a name="l01358"></a>01358         <span class="keywordflow">return</span> (0);
-<a name="l01359"></a>01359 }
-<a name="l01360"></a>01360 
-<a name="l01371"></a><a class="code" href="_s_d_l__image_filter_8c.html#a2cd7db5de491dce5dfcf292fc241031d">01371</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a2cd7db5de491dce5dfcf292fc241031d" title="Internal MMX Filter using BitOr: D = S1 | S2.">SDL_imageFilterBitOrMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Sr [...]
-<a name="l01372"></a>01372 {
-<a name="l01373"></a>01373 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l01374"></a>01374 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l01375"></a>01375 <span class="preprocessor"></span>        __asm
-<a name="l01376"></a>01376         {
-<a name="l01377"></a>01377                 pusha
-<a name="l01378"></a>01378                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01379"></a>01379                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l01380"></a>01380                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l01381"></a>01381                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01382"></a>01382                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01383"></a>01383                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01384"></a>01384 L91017:
-<a name="l01385"></a>01385                 movq mm1, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01386"></a>01386                 por mm1, [ebx]          <span class="comment">/* mm1=Src1|Src2 */</span>
-<a name="l01387"></a>01387                 movq [edi], mm1         <span class="comment">/* store result in Dest */</span>
-<a name="l01388"></a>01388                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01389"></a>01389                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
-<a name="l01390"></a>01390                         add edi,  8
-<a name="l01391"></a>01391                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l01392"></a>01392                         jnz L91017              <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01393"></a>01393                         emms                    <span class="comment">/* exit MMX state */</span>
-<a name="l01394"></a>01394                         popa
-<a name="l01395"></a>01395         }
-<a name="l01396"></a>01396 <span class="preprocessor">#else</span>
-<a name="l01397"></a>01397 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l01398"></a>01398                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov %2, %%eax \n\t"</span> <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01399"></a>01399                 <span class="stringliteral">"mov %1, %%ebx \n\t"</span>    <span class="comment">/* load Src2 address into ebx */</span>
-<a name="l01400"></a>01400                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l01401"></a>01401                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01402"></a>01402                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01403"></a>01403                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01404"></a>01404                 <span class="stringliteral">"1: movq (%%eax), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01405"></a>01405                 <span class="stringliteral">"por     (%%ebx), %%mm1 \n\t"</span>   <span class="comment">/* mm1=Src1|Src2 */</span>
-<a name="l01406"></a>01406                 <span class="stringliteral">"movq    %%mm1, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l01407"></a>01407                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01408"></a>01408                 <span class="stringliteral">"add $8, %%ebx \n\t"</span>    <span class="comment">/* register pointers by 8 */</span>
-<a name="l01409"></a>01409                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l01410"></a>01410                 <span class="stringliteral">"jnz 1b        \n\t"</span>    <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01411"></a>01411                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l01412"></a>01412                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l01413"></a>01413                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l01414"></a>01414                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l01415"></a>01415                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l01416"></a>01416                 );
-<a name="l01417"></a>01417 <span class="preprocessor">#endif</span>
-<a name="l01418"></a>01418 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01419"></a>01419 <span class="preprocessor">#else</span>
-<a name="l01420"></a>01420 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01421"></a>01421 <span class="preprocessor">#endif</span>
-<a name="l01422"></a>01422 <span class="preprocessor"></span>}
+<a name="l01223"></a>01223 <span class="preprocessor">#else</span>
+<a name="l01224"></a>01224 <span class="preprocessor"></span>        <span class="comment">/* x86_64 ASM with constraints: */</span>
+<a name="l01225"></a>01225         <span class="comment">/* asm volatile ( */</span>
+<a name="l01226"></a>01226         <span class="comment">/*      "shr $3, %%rcx \n\t"    /\* counter/8 (MMX loads 8 bytes at a time) *\/ */</span>
+<a name="l01227"></a>01227         <span class="comment">/*      ".align 16       \n\t"  /\* 16 byte alignment of the loop entry *\/ */</span>
+<a name="l01228"></a>01228         <span class="comment">/*      "1: movq (%%rax), %%mm1 \n\t"   /\* load 8 bytes from Src1 into mm1 *\/ */</span>
+<a name="l01229"></a>01229         <span class="comment">/*      "pand    (%%rbx), %%mm1 \n\t"   /\* mm1=Src1&Src2 *\/ */</span>
+<a name="l01230"></a>01230         <span class="comment">/*      "movq    %%mm1, (%%rdi) \n\t"   /\* store result in Dest *\/ */</span>
+<a name="l01231"></a>01231         <span class="comment">/*      "add $8, %%rax \n\t"    /\* increase Src1, Src2 and Dest  *\/ */</span>
+<a name="l01232"></a>01232         <span class="comment">/*      "add $8, %%rbx \n\t"    /\* register pointers by 8 *\/ */</span>
+<a name="l01233"></a>01233         <span class="comment">/*      "add $8, %%rdi \n\t" */</span>
+<a name="l01234"></a>01234         <span class="comment">/*      "dec %%rcx     \n\t"    /\* decrease loop counter *\/ */</span>
+<a name="l01235"></a>01235         <span class="comment">/*      "jnz 1b        \n\t"    /\* check loop termination, proceed if required *\/ */</span>
+<a name="l01236"></a>01236         <span class="comment">/*      "emms          \n\t"    /\* exit MMX state *\/ */</span>
+<a name="l01237"></a>01237         <span class="comment">/*      : "+a" (Src1),          /\* load Src1 address into rax, modified by the loop *\/ */</span>
+<a name="l01238"></a>01238         <span class="comment">/*        "+b" (Src2),          /\* load Src2 address into rbx, modified by the loop *\/ */</span>
+<a name="l01239"></a>01239         <span class="comment">/*        "+c" (SrcLength),     /\* load loop counter (SIZE) into rcx, modified by the loop *\/ */</span>
+<a name="l01240"></a>01240         <span class="comment">/*        "+D" (Dest)           /\* load Dest address into rdi, modified by the loop *\/ */</span>
+<a name="l01241"></a>01241         <span class="comment">/*      : */</span>
+<a name="l01242"></a>01242         <span class="comment">/*      : "memory",             /\* *Dest is modified *\/ */</span>
+<a name="l01243"></a>01243         <span class="comment">/*           "mm1"                      /\* register mm1 modified *\/ */</span>
+<a name="l01244"></a>01244         <span class="comment">/* ); */</span>
+<a name="l01245"></a>01245 
+<a name="l01246"></a>01246         <span class="comment">/* i386 and x86_64 */</span>
+<a name="l01247"></a>01247         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l01248"></a>01248         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l01249"></a>01249         __m64 *mDest = (__m64*)Dest;
+<a name="l01250"></a>01250         <span class="keywordtype">int</span> i;
+<a name="l01251"></a>01251         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l01252"></a>01252                 *mDest = _m_pand(*mSrc1, *mSrc2);       <span class="comment">/* Src1&Src2 */</span>
+<a name="l01253"></a>01253                 mSrc1++;
+<a name="l01254"></a>01254                 mSrc2++;
+<a name="l01255"></a>01255                 mDest++;
+<a name="l01256"></a>01256         }
+<a name="l01257"></a>01257         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l01258"></a>01258 <span class="preprocessor">#endif</span>
+<a name="l01259"></a>01259 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l01260"></a>01260 <span class="preprocessor">#else</span>
+<a name="l01261"></a>01261 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l01262"></a>01262 <span class="preprocessor">#endif</span>
+<a name="l01263"></a>01263 <span class="preprocessor"></span>}
+<a name="l01264"></a>01264 
+<a name="l01275"></a><a class="code" href="_s_d_l__image_filter_8h.html#a5f67460c0b89dadd49d04832608a345b">01275</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a85837ce1b5de1f907b6b9053922b5cbc" title="Filter using BitAnd: D = S1 & S2.">SDL_imageFilterBitAnd</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span  [...]
+<a name="l01276"></a>01276 {
+<a name="l01277"></a>01277         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l01278"></a>01278         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l01279"></a>01279 
+<a name="l01280"></a>01280         <span class="comment">/* Validate input parameters */</span>
+<a name="l01281"></a>01281         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l01282"></a>01282                 <span class="keywordflow">return</span>(-1);
+<a name="l01283"></a>01283         <span class="keywordflow">if</span> (length == 0)
+<a name="l01284"></a>01284                 <span class="keywordflow">return</span>(0);
+<a name="l01285"></a>01285 
+<a name="l01286"></a>01286         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()>0) && (length>7)) {
+<a name="l01287"></a>01287                 <span class="comment">/*  if (length > 7) { */</span>
+<a name="l01288"></a>01288                 <span class="comment">/* Call MMX routine */</span>
+<a name="l01289"></a>01289 
+<a name="l01290"></a>01290                 SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length);
+<a name="l01291"></a>01291 
+<a name="l01292"></a>01292                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l01293"></a>01293                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l01294"></a>01294 
+<a name="l01295"></a>01295                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l01296"></a>01296                         istart = length & 0xfffffff8;
+<a name="l01297"></a>01297                         cursrc1 = &Src1[istart];
+<a name="l01298"></a>01298                         cursrc2 = &Src2[istart];
+<a name="l01299"></a>01299                         curdst = &Dest[istart];
+<a name="l01300"></a>01300                 } <span class="keywordflow">else</span> {
+<a name="l01301"></a>01301                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l01302"></a>01302                         <span class="keywordflow">return</span> (0);
+<a name="l01303"></a>01303                 }
+<a name="l01304"></a>01304         } <span class="keywordflow">else</span> {
+<a name="l01305"></a>01305                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l01306"></a>01306                 istart = 0;
+<a name="l01307"></a>01307                 cursrc1 = Src1;
+<a name="l01308"></a>01308                 cursrc2 = Src2;
+<a name="l01309"></a>01309                 curdst = Dest;
+<a name="l01310"></a>01310         }
+<a name="l01311"></a>01311 
+<a name="l01312"></a>01312         <span class="comment">/* C routine to process image */</span>
+<a name="l01313"></a>01313         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l01314"></a>01314                 *curdst = (*cursrc1) & (*cursrc2);
+<a name="l01315"></a>01315                 <span class="comment">/* Advance pointers */</span>
+<a name="l01316"></a>01316                 cursrc1++;
+<a name="l01317"></a>01317                 cursrc2++;
+<a name="l01318"></a>01318                 curdst++;
+<a name="l01319"></a>01319         }
+<a name="l01320"></a>01320 
+<a name="l01321"></a>01321         <span class="keywordflow">return</span> (0);
+<a name="l01322"></a>01322 }
+<a name="l01323"></a>01323 
+<a name="l01334"></a>01334 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterBitOrMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l01335"></a>01335 {
+<a name="l01336"></a>01336 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01337"></a>01337 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01338"></a>01338 <span class="preprocessor"></span>        __asm
+<a name="l01339"></a>01339         {
+<a name="l01340"></a>01340                 pusha
+<a name="l01341"></a>01341                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l01342"></a>01342                         mov ebx, Src2           <span class="comment">/* load Src2 address into ebx */</span>
+<a name="l01343"></a>01343                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01344"></a>01344                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01345"></a>01345                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l01346"></a>01346                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01347"></a>01347 L91017:
+<a name="l01348"></a>01348                 movq mm1, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l01349"></a>01349                 por mm1, [ebx]          <span class="comment">/* mm1=Src1|Src2 */</span>
+<a name="l01350"></a>01350                 movq [edi], mm1         <span class="comment">/* store result in Dest */</span>
+<a name="l01351"></a>01351                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l01352"></a>01352                         add ebx, 8      <span class="comment">/* register pointers by 8 */</span>
+<a name="l01353"></a>01353                         add edi,  8
+<a name="l01354"></a>01354                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l01355"></a>01355                         jnz L91017              <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01356"></a>01356                         emms                    <span class="comment">/* exit MMX state */</span>
+<a name="l01357"></a>01357                         popa
+<a name="l01358"></a>01358         }
+<a name="l01359"></a>01359 <span class="preprocessor">#else</span>
+<a name="l01360"></a>01360 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l01361"></a>01361         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l01362"></a>01362         __m64 *mSrc2 = (__m64*)Src2;
+<a name="l01363"></a>01363         __m64 *mDest = (__m64*)Dest;
+<a name="l01364"></a>01364         <span class="keywordtype">int</span> i;
+<a name="l01365"></a>01365         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l01366"></a>01366                 *mDest = _m_por(*mSrc1, *mSrc2);        <span class="comment">/* Src1|Src2 */</span>
+<a name="l01367"></a>01367                 mSrc1++;
+<a name="l01368"></a>01368                 mSrc2++;
+<a name="l01369"></a>01369                 mDest++;
+<a name="l01370"></a>01370         }
+<a name="l01371"></a>01371         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l01372"></a>01372 <span class="preprocessor">#endif</span>
+<a name="l01373"></a>01373 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l01374"></a>01374 <span class="preprocessor">#else</span>
+<a name="l01375"></a>01375 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l01376"></a>01376 <span class="preprocessor">#endif</span>
+<a name="l01377"></a>01377 <span class="preprocessor"></span>}
+<a name="l01378"></a>01378 
+<a name="l01389"></a><a class="code" href="_s_d_l__image_filter_8h.html#a0acf0eabba33f8fa7acbc08dc3015cd3">01389</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5cf1c477f4e32d02f74ee95d9f7b0021" title="Filter using BitOr: D = S1 | S2.">SDL_imageFilterBitOr</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class= [...]
+<a name="l01390"></a>01390 {
+<a name="l01391"></a>01391         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l01392"></a>01392         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
+<a name="l01393"></a>01393 
+<a name="l01394"></a>01394         <span class="comment">/* Validate input parameters */</span>
+<a name="l01395"></a>01395         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l01396"></a>01396                 <span class="keywordflow">return</span>(-1);
+<a name="l01397"></a>01397         <span class="keywordflow">if</span> (length == 0)
+<a name="l01398"></a>01398                 <span class="keywordflow">return</span>(0);
+<a name="l01399"></a>01399 
+<a name="l01400"></a>01400         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l01401"></a>01401 
+<a name="l01402"></a>01402                 <span class="comment">/* MMX routine */</span>
+<a name="l01403"></a>01403                 SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length);
+<a name="l01404"></a>01404 
+<a name="l01405"></a>01405                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l01406"></a>01406                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l01407"></a>01407                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l01408"></a>01408                         istart = length & 0xfffffff8;
+<a name="l01409"></a>01409                         cursrc1 = &Src1[istart];
+<a name="l01410"></a>01410                         cursrc2 = &Src2[istart];
+<a name="l01411"></a>01411                         curdst = &Dest[istart];
+<a name="l01412"></a>01412                 } <span class="keywordflow">else</span> {
+<a name="l01413"></a>01413                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l01414"></a>01414                         <span class="keywordflow">return</span> (0);
+<a name="l01415"></a>01415                 }
+<a name="l01416"></a>01416         } <span class="keywordflow">else</span> {
+<a name="l01417"></a>01417                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l01418"></a>01418                 istart = 0;
+<a name="l01419"></a>01419                 cursrc1 = Src1;
+<a name="l01420"></a>01420                 cursrc2 = Src2;
+<a name="l01421"></a>01421                 curdst = Dest;
+<a name="l01422"></a>01422         }
 <a name="l01423"></a>01423 
-<a name="l01434"></a><a class="code" href="_s_d_l__image_filter_8h.html#a0acf0eabba33f8fa7acbc08dc3015cd3">01434</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5cf1c477f4e32d02f74ee95d9f7b0021" title="Filter using BitOr: D = S1 | S2.">SDL_imageFilterBitOr</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class= [...]
-<a name="l01435"></a>01435 {
-<a name="l01436"></a>01436         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l01437"></a>01437         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l01438"></a>01438 
-<a name="l01439"></a>01439         <span class="comment">/* Validate input parameters */</span>
-<a name="l01440"></a>01440         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l01441"></a>01441                 <span class="keywordflow">return</span>(-1);
-<a name="l01442"></a>01442         <span class="keywordflow">if</span> (length == 0)
-<a name="l01443"></a>01443                 <span class="keywordflow">return</span>(0);
-<a name="l01444"></a>01444 
-<a name="l01445"></a>01445         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l01446"></a>01446 
-<a name="l01447"></a>01447                 <span class="comment">/* MMX routine */</span>
-<a name="l01448"></a>01448                 <a class="code" href="_s_d_l__image_filter_8c.html#a2cd7db5de491dce5dfcf292fc241031d" title="Internal MMX Filter using BitOr: D = S1 | S2.">SDL_imageFilterBitOrMMX</a>(Src1, Src2, Dest, length);
-<a name="l01449"></a>01449 
-<a name="l01450"></a>01450                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l01451"></a>01451                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l01452"></a>01452                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l01453"></a>01453                         istart = length & 0xfffffff8;
-<a name="l01454"></a>01454                         cursrc1 = &Src1[istart];
-<a name="l01455"></a>01455                         cursrc2 = &Src2[istart];
-<a name="l01456"></a>01456                         curdst = &Dest[istart];
-<a name="l01457"></a>01457                 } <span class="keywordflow">else</span> {
-<a name="l01458"></a>01458                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l01459"></a>01459                         <span class="keywordflow">return</span> (0);
-<a name="l01460"></a>01460                 }
-<a name="l01461"></a>01461         } <span class="keywordflow">else</span> {
-<a name="l01462"></a>01462                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l01463"></a>01463                 istart = 0;
-<a name="l01464"></a>01464                 cursrc1 = Src1;
-<a name="l01465"></a>01465                 cursrc2 = Src2;
-<a name="l01466"></a>01466                 curdst = Dest;
-<a name="l01467"></a>01467         }
-<a name="l01468"></a>01468 
-<a name="l01469"></a>01469         <span class="comment">/* C routine to process image */</span>
-<a name="l01470"></a>01470         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l01471"></a>01471                 *curdst = *cursrc1 | *cursrc2;
-<a name="l01472"></a>01472                 <span class="comment">/* Advance pointers */</span>
-<a name="l01473"></a>01473                 cursrc1++;
-<a name="l01474"></a>01474                 cursrc2++;
-<a name="l01475"></a>01475                 curdst++;
-<a name="l01476"></a>01476         }
-<a name="l01477"></a>01477         <span class="keywordflow">return</span> (0);
-<a name="l01478"></a>01478 }
-<a name="l01479"></a>01479 
-<a name="l01490"></a><a class="code" href="_s_d_l__image_filter_8c.html#a95791d257c510c597a2ef542f43d6678">01490</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a95791d257c510c597a2ef542f43d6678" title="Internal ASM Filter using Div: D = S1 / S2.">SDL_imageFilterDivASM</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2,  [...]
-<a name="l01491"></a>01491 {
-<a name="l01492"></a>01492 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l01493"></a>01493 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l01494"></a>01494 <span class="preprocessor"></span>        __asm
-<a name="l01495"></a>01495         {
-<a name="l01496"></a>01496                 pusha
-<a name="l01497"></a>01497                         mov edx, Src1           <span class="comment">/* load Src1 address into edx */</span>
-<a name="l01498"></a>01498                         mov esi, Src2           <span class="comment">/* load Src2 address into esi */</span>
-<a name="l01499"></a>01499                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l01500"></a>01500                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01501"></a>01501                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01502"></a>01502 L10191:
-<a name="l01503"></a>01503                 mov bl, [esi]           <span class="comment">/* load a byte from Src2 */</span>
-<a name="l01504"></a>01504                 cmp bl, 0       <span class="comment">/* check if it zero */</span>
-<a name="l01505"></a>01505                         jnz L10192
-<a name="l01506"></a>01506                         mov [edi], 255          <span class="comment">/* division by zero = 255 !!! */</span>
-<a name="l01507"></a>01507                         jmp  L10193
-<a name="l01508"></a>01508 L10192:
-<a name="l01509"></a>01509                 xor ah, ah      <span class="comment">/* prepare AX, zero AH register */</span>
-<a name="l01510"></a>01510                         mov al, [edx]           <span class="comment">/* load a byte from Src1 into AL */</span>
-<a name="l01511"></a>01511                 div   bl                <span class="comment">/* divide AL by BL */</span>
-<a name="l01512"></a>01512                         mov [edi], al           <span class="comment">/* move a byte result to Dest */</span>
-<a name="l01513"></a>01513 L10193:
-<a name="l01514"></a>01514                 inc edx         <span class="comment">/* increment Src1, Src2, Dest */</span>
-<a name="l01515"></a>01515                         inc esi                 <span class="comment">/* pointer registers by one */</span>
-<a name="l01516"></a>01516                         inc edi
-<a name="l01517"></a>01517                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l01518"></a>01518                         jnz L10191      <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01519"></a>01519                         popa
-<a name="l01520"></a>01520         }
-<a name="l01521"></a>01521 <span class="preprocessor">#else</span>
-<a name="l01522"></a>01522 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l01523"></a>01523                 (<span class="stringliteral">"pusha \n\t"</span> <span class="stringliteral">"mov %2, %%edx \n\t"</span>      <span class="comment">/* load Src1 address into edx */</span>
-<a name="l01524"></a>01524                 <span class="stringliteral">"mov %1, %%esi \n\t"</span>    <span class="comment">/* load Src2 address into esi */</span>
-<a name="l01525"></a>01525                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l01526"></a>01526                 <span class="stringliteral">"mov %3, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01527"></a>01527                 <span class="stringliteral">".align 16     \n\t"</span>    <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01528"></a>01528                 <span class="stringliteral">"1: mov (%%esi), %%bl  \n\t"</span>    <span class="comment">/* load a byte from Src2 */</span>
-<a name="l01529"></a>01529                 <span class="stringliteral">"cmp       $0, %%bl  \n\t"</span>      <span class="comment">/* check if it zero */</span>
-<a name="l01530"></a>01530                 <span class="stringliteral">"jnz 2f              \n\t"</span> <span class="stringliteral">"movb  $255, (%%edi) \n\t"</span>   <span class="comment">/* division by zero = 255 !!! */</span>
-<a name="l01531"></a>01531                 <span class="stringliteral">"jmp 3f              \n\t"</span> <span class="stringliteral">"2:                  \n\t"</span> <span class="stringliteral">"xor   %%ah, %%ah    \n\t"</span>        <span class="comment">/* prepare AX, zero AH register */</span>
-<a name="l01532"></a>01532                 <span class="stringliteral">"mov   (%%edx), %%al \n\t"</span>      <span class="comment">/* load a byte from Src1 into AL */</span>
-<a name="l01533"></a>01533                 <span class="stringliteral">"div   %%bl          \n\t"</span>      <span class="comment">/* divide AL by BL */</span>
-<a name="l01534"></a>01534                 <span class="stringliteral">"mov   %%al, (%%edi) \n\t"</span>      <span class="comment">/* move a byte result to Dest */</span>
-<a name="l01535"></a>01535                 <span class="stringliteral">"3: inc %%edx        \n\t"</span>      <span class="comment">/* increment Src1, Src2, Dest */</span>
-<a name="l01536"></a>01536                 <span class="stringliteral">"inc %%esi \n\t"</span>                <span class="comment">/* pointer registers by one */</span>
-<a name="l01537"></a>01537                 <span class="stringliteral">"inc %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx    \n\t"</span>    <span class="comment">/* decrease loop counter */</span>
-<a name="l01538"></a>01538                 <span class="stringliteral">"jnz 1b       \n\t"</span>     <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01539"></a>01539                 <span class="stringliteral">"popa \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest) <span class="comment">/* %0 */</span>
-<a name="l01540"></a>01540                 :<span class="stringliteral">"m"</span>(Src2),             <span class="comment">/* %1 */</span>
-<a name="l01541"></a>01541                 <span class="stringliteral">"m"</span>(Src1),              <span class="comment">/* %2 */</span>
-<a name="l01542"></a>01542                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %3 */</span>
-<a name="l01543"></a>01543                 );
-<a name="l01544"></a>01544 <span class="preprocessor">#endif</span>
-<a name="l01545"></a>01545 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01546"></a>01546 <span class="preprocessor">#else</span>
-<a name="l01547"></a>01547 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01548"></a>01548 <span class="preprocessor">#endif</span>
-<a name="l01549"></a>01549 <span class="preprocessor"></span>}
+<a name="l01424"></a>01424         <span class="comment">/* C routine to process image */</span>
+<a name="l01425"></a>01425         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l01426"></a>01426                 *curdst = *cursrc1 | *cursrc2;
+<a name="l01427"></a>01427                 <span class="comment">/* Advance pointers */</span>
+<a name="l01428"></a>01428                 cursrc1++;
+<a name="l01429"></a>01429                 cursrc2++;
+<a name="l01430"></a>01430                 curdst++;
+<a name="l01431"></a>01431         }
+<a name="l01432"></a>01432         <span class="keywordflow">return</span> (0);
+<a name="l01433"></a>01433 }
+<a name="l01434"></a>01434 
+<a name="l01445"></a>01445 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterDivASM(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l01446"></a>01446 {
+<a name="l01447"></a>01447 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01448"></a>01448 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01449"></a>01449 <span class="preprocessor"></span>        __asm
+<a name="l01450"></a>01450         {
+<a name="l01451"></a>01451                 pusha
+<a name="l01452"></a>01452                         mov edx, Src1           <span class="comment">/* load Src1 address into edx */</span>
+<a name="l01453"></a>01453                         mov esi, Src2           <span class="comment">/* load Src2 address into esi */</span>
+<a name="l01454"></a>01454                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01455"></a>01455                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01456"></a>01456                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01457"></a>01457 L10191:
+<a name="l01458"></a>01458                 mov bl, [esi]           <span class="comment">/* load a byte from Src2 */</span>
+<a name="l01459"></a>01459                 cmp bl, 0       <span class="comment">/* check if it zero */</span>
+<a name="l01460"></a>01460                         jnz L10192
+<a name="l01461"></a>01461                         mov [edi], 255          <span class="comment">/* division by zero = 255 !!! */</span>
+<a name="l01462"></a>01462                         jmp  L10193
+<a name="l01463"></a>01463 L10192:
+<a name="l01464"></a>01464                 xor ah, ah      <span class="comment">/* prepare AX, zero AH register */</span>
+<a name="l01465"></a>01465                         mov al, [edx]           <span class="comment">/* load a byte from Src1 into AL */</span>
+<a name="l01466"></a>01466                 div   bl                <span class="comment">/* divide AL by BL */</span>
+<a name="l01467"></a>01467                         mov [edi], al           <span class="comment">/* move a byte result to Dest */</span>
+<a name="l01468"></a>01468 L10193:
+<a name="l01469"></a>01469                 inc edx         <span class="comment">/* increment Src1, Src2, Dest */</span>
+<a name="l01470"></a>01470                         inc esi                 <span class="comment">/* pointer registers by one */</span>
+<a name="l01471"></a>01471                         inc edi
+<a name="l01472"></a>01472                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l01473"></a>01473                         jnz L10191      <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01474"></a>01474                         popa
+<a name="l01475"></a>01475         }
+<a name="l01476"></a>01476 <span class="preprocessor">#else</span>
+<a name="l01477"></a>01477 <span class="preprocessor"></span>        <span class="comment">/* Note: ~15% gain on i386, less efficient than C on x86_64 */</span>
+<a name="l01478"></a>01478         <span class="comment">/* Also depends on whether the function is static (?!) */</span>
+<a name="l01479"></a>01479         <span class="comment">/* Also depends on whether we work on malloc() or static char[] */</span>
+<a name="l01480"></a>01480         <span class="keyword">asm</span> <span class="keyword">volatile</span> (
+<a name="l01481"></a>01481 <span class="preprocessor">#  if defined(i386)</span>
+<a name="l01482"></a>01482 <span class="preprocessor"></span>                <span class="stringliteral">"pushl %%ebx \n\t"</span>              <span class="comment">/* %ebx may be the PIC register.  */</span>
+<a name="l01483"></a>01483                 <span class="stringliteral">".align 16     \n\t"</span>            <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01484"></a>01484                 <span class="stringliteral">"1: mov (%%esi), %%bl  \n\t"</span>    <span class="comment">/* load a byte from Src2 */</span>
+<a name="l01485"></a>01485                 <span class="stringliteral">"cmp       $0, %%bl    \n\t"</span>    <span class="comment">/* check if it zero */</span>
+<a name="l01486"></a>01486                 <span class="stringliteral">"jnz 2f                \n\t"</span>
+<a name="l01487"></a>01487                 <span class="stringliteral">"movb  $255, (%%edi)   \n\t"</span>    <span class="comment">/* division by zero = 255 !!! */</span>
+<a name="l01488"></a>01488                 <span class="stringliteral">"jmp 3f                \n\t"</span>
+<a name="l01489"></a>01489                 <span class="stringliteral">"2: xor %%ah, %%ah     \n\t"</span>    <span class="comment">/* prepare AX, zero AH register */</span>
+<a name="l01490"></a>01490                 <span class="stringliteral">"mov   (%%edx), %%al   \n\t"</span>    <span class="comment">/* load a byte from Src1 into AL */</span>
+<a name="l01491"></a>01491                 <span class="stringliteral">"div   %%bl            \n\t"</span>    <span class="comment">/* divide AL by BL */</span>
+<a name="l01492"></a>01492                 <span class="stringliteral">"mov   %%al, (%%edi)   \n\t"</span>    <span class="comment">/* move a byte result to Dest */</span>
+<a name="l01493"></a>01493                 <span class="stringliteral">"3: inc %%edx          \n\t"</span>    <span class="comment">/* increment Src1, Src2, Dest */</span>
+<a name="l01494"></a>01494                 <span class="stringliteral">"inc %%esi \n\t"</span>                <span class="comment">/* pointer registers by one */</span>
+<a name="l01495"></a>01495                 <span class="stringliteral">"inc %%edi \n\t"</span>
+<a name="l01496"></a>01496                 <span class="stringliteral">"dec %%ecx \n\t"</span>                <span class="comment">/* decrease loop counter */</span>
+<a name="l01497"></a>01497                 <span class="stringliteral">"jnz 1b    \n\t"</span>                <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01498"></a>01498                 <span class="stringliteral">"popl %%ebx \n\t"</span>               <span class="comment">/* restore %ebx */</span>
+<a name="l01499"></a>01499                 : <span class="stringliteral">"+d"</span> (Src1),          <span class="comment">/* load Src1 address into edx */</span>
+<a name="l01500"></a>01500                   <span class="stringliteral">"+S"</span> (Src2),          <span class="comment">/* load Src2 address into esi */</span>
+<a name="l01501"></a>01501                   <span class="stringliteral">"+c"</span> (SrcLength),     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01502"></a>01502                   <span class="stringliteral">"+D"</span> (Dest)           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01503"></a>01503                 :
+<a name="l01504"></a>01504                 : <span class="stringliteral">"memory"</span>, <span class="stringliteral">"rax"</span>
+<a name="l01505"></a>01505 <span class="preprocessor">#  elif defined(__x86_64__)</span>
+<a name="l01506"></a>01506 <span class="preprocessor"></span>                <span class="stringliteral">".align 16     \n\t"</span>            <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01507"></a>01507                 <span class="stringliteral">"1: mov (%%rsi), %%bl  \n\t"</span>    <span class="comment">/* load a byte from Src2 */</span>
+<a name="l01508"></a>01508                 <span class="stringliteral">"cmp       $0, %%bl    \n\t"</span>    <span class="comment">/* check if it zero */</span>
+<a name="l01509"></a>01509                 <span class="stringliteral">"jnz 2f                \n\t"</span>
+<a name="l01510"></a>01510                 <span class="stringliteral">"movb  $255, (%%rdi)   \n\t"</span>    <span class="comment">/* division by zero = 255 !!! */</span>
+<a name="l01511"></a>01511                 <span class="stringliteral">"jmp 3f                \n\t"</span>
+<a name="l01512"></a>01512                 <span class="stringliteral">"2: xor %%ah, %%ah     \n\t"</span>    <span class="comment">/* prepare AX, zero AH register */</span>
+<a name="l01513"></a>01513                 <span class="stringliteral">"mov   (%%rdx), %%al   \n\t"</span>    <span class="comment">/* load a byte from Src1 into AL */</span>
+<a name="l01514"></a>01514                 <span class="stringliteral">"div   %%bl            \n\t"</span>    <span class="comment">/* divide AL by BL */</span>
+<a name="l01515"></a>01515                 <span class="stringliteral">"mov   %%al, (%%rdi)   \n\t"</span>    <span class="comment">/* move a byte result to Dest */</span>
+<a name="l01516"></a>01516                 <span class="stringliteral">"3: inc %%rdx          \n\t"</span>    <span class="comment">/* increment Src1, Src2, Dest */</span>
+<a name="l01517"></a>01517                 <span class="stringliteral">"inc %%rsi \n\t"</span>                <span class="comment">/* pointer registers by one */</span>
+<a name="l01518"></a>01518                 <span class="stringliteral">"inc %%rdi \n\t"</span>
+<a name="l01519"></a>01519                 <span class="stringliteral">"dec %%rcx \n\t"</span>                <span class="comment">/* decrease loop counter */</span>
+<a name="l01520"></a>01520                 <span class="stringliteral">"jnz 1b    \n\t"</span>                <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01521"></a>01521                 : <span class="stringliteral">"+d"</span> (Src1),          <span class="comment">/* load Src1 address into edx */</span>
+<a name="l01522"></a>01522                   <span class="stringliteral">"+S"</span> (Src2),          <span class="comment">/* load Src2 address into esi */</span>
+<a name="l01523"></a>01523                   <span class="stringliteral">"+c"</span> (SrcLength),     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01524"></a>01524                   <span class="stringliteral">"+D"</span> (Dest)           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01525"></a>01525                 :
+<a name="l01526"></a>01526                 : <span class="stringliteral">"memory"</span>, <span class="stringliteral">"rax"</span>, <span class="stringliteral">"rbx"</span>
+<a name="l01527"></a>01527 <span class="preprocessor">#  endif</span>
+<a name="l01528"></a>01528 <span class="preprocessor"></span>                );
+<a name="l01529"></a>01529 <span class="preprocessor">#endif</span>
+<a name="l01530"></a>01530 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l01531"></a>01531 <span class="preprocessor">#else</span>
+<a name="l01532"></a>01532 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l01533"></a>01533 <span class="preprocessor">#endif</span>
+<a name="l01534"></a>01534 <span class="preprocessor"></span>}
+<a name="l01535"></a>01535 
+<a name="l01546"></a><a class="code" href="_s_d_l__image_filter_8h.html#aeb8ed56aa7de3c8b0d0b2aa9163c3e37">01546</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a0ea22f01c6a4724bac307da3e5355f58" title="Filter using Div: D = S1 / S2.">SDL_imageFilterDiv</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="key [...]
+<a name="l01547"></a>01547 {
+<a name="l01548"></a>01548         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l01549"></a>01549         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
 <a name="l01550"></a>01550 
-<a name="l01561"></a><a class="code" href="_s_d_l__image_filter_8h.html#aeb8ed56aa7de3c8b0d0b2aa9163c3e37">01561</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a0ea22f01c6a4724bac307da3e5355f58" title="Filter using Div: D = S1 / S2.">SDL_imageFilterDiv</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src2, <span class="key [...]
-<a name="l01562"></a>01562 {
-<a name="l01563"></a>01563         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l01564"></a>01564         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *cursrc2, *curdst;
-<a name="l01565"></a>01565         <span class="keywordtype">int</span> result;
-<a name="l01566"></a>01566 
-<a name="l01567"></a>01567         <span class="comment">/* Validate input parameters */</span>
-<a name="l01568"></a>01568         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
-<a name="l01569"></a>01569                 <span class="keywordflow">return</span>(-1);
-<a name="l01570"></a>01570         <span class="keywordflow">if</span> (length == 0)
-<a name="l01571"></a>01571                 <span class="keywordflow">return</span>(0);
-<a name="l01572"></a>01572 
-<a name="l01573"></a>01573         <span class="keywordflow">if</span> (<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) {
-<a name="l01574"></a>01574                 <span class="keywordflow">if</span> (length > 0) {
-<a name="l01575"></a>01575                         <span class="comment">/* Call ASM routine */</span>
-<a name="l01576"></a>01576                         <a class="code" href="_s_d_l__image_filter_8c.html#a95791d257c510c597a2ef542f43d6678" title="Internal ASM Filter using Div: D = S1 / S2.">SDL_imageFilterDivASM</a>(Src1, Src2, Dest, length);
-<a name="l01577"></a>01577 
-<a name="l01578"></a>01578                         <span class="comment">/* Never unaligned bytes - we are done */</span>
-<a name="l01579"></a>01579                         <span class="keywordflow">return</span> (0);
-<a name="l01580"></a>01580                 } <span class="keywordflow">else</span> {
-<a name="l01581"></a>01581                         <span class="keywordflow">return</span> (-1);
-<a name="l01582"></a>01582                 }
-<a name="l01583"></a>01583         } 
-<a name="l01584"></a>01584         
-<a name="l01585"></a>01585         <span class="comment">/* Setup to process whole image */</span>
-<a name="l01586"></a>01586         istart = 0;
-<a name="l01587"></a>01587         cursrc1 = Src1;
-<a name="l01588"></a>01588         cursrc2 = Src2;
-<a name="l01589"></a>01589         curdst = Dest;
-<a name="l01590"></a>01590 
-<a name="l01591"></a>01591         <span class="comment">/* C routine to process image */</span>
-<a name="l01592"></a>01592         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l01593"></a>01593                 result = (int) *cursrc1 / (<span class="keywordtype">int</span>) *cursrc2;
-<a name="l01594"></a>01594                 *curdst = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l01595"></a>01595                 <span class="comment">/* Advance pointers */</span>
-<a name="l01596"></a>01596                 cursrc1++;
-<a name="l01597"></a>01597                 cursrc2++;
-<a name="l01598"></a>01598                 curdst++;
-<a name="l01599"></a>01599         }
-<a name="l01600"></a>01600 
-<a name="l01601"></a>01601         <span class="keywordflow">return</span> (0);
-<a name="l01602"></a>01602 }
-<a name="l01603"></a>01603 
-<a name="l01604"></a>01604 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
-<a name="l01605"></a>01605 
-<a name="l01615"></a><a class="code" href="_s_d_l__image_filter_8c.html#a1b522e196f9647501c6badd1de727b97">01615</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a1b522e196f9647501c6badd1de727b97" title="Internal MMX Filter using BitNegation: D = !S.">SDL_imageFilterBitNegationMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</sp [...]
-<a name="l01616"></a>01616 {
-<a name="l01617"></a>01617 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l01618"></a>01618 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l01619"></a>01619 <span class="preprocessor"></span>        __asm
-<a name="l01620"></a>01620         {
-<a name="l01621"></a>01621                 pusha
-<a name="l01622"></a>01622                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l01623"></a>01623                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01624"></a>01624                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l01625"></a>01625                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01626"></a>01626                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01627"></a>01627                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01628"></a>01628 L91117:
-<a name="l01629"></a>01629                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01630"></a>01630                 pxor mm0, mm1           <span class="comment">/* negate mm0 by xoring with mm1 */</span>
-<a name="l01631"></a>01631                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
-<a name="l01632"></a>01632                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01633"></a>01633                         add edi,  8
-<a name="l01634"></a>01634                         dec ecx         <span class="comment">/* decrease loop counter */</span>
-<a name="l01635"></a>01635                         jnz L91117              <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01636"></a>01636                         emms                    <span class="comment">/* exit MMX state */</span>
-<a name="l01637"></a>01637                         popa
-<a name="l01638"></a>01638         }
-<a name="l01639"></a>01639 <span class="preprocessor">#else</span>
-<a name="l01640"></a>01640 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l01641"></a>01641                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pcmpeqb   %%mm1, %%mm1 \n\t"</span>        <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l01642"></a>01642                 <span class="stringliteral">"mov %1, %%eax \n\t"</span>    <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01643"></a>01643                 <span class="stringliteral">"mov %0, %%edi \n\t"</span>    <span class="comment">/* load Dest address into edi */</span>
-<a name="l01644"></a>01644                 <span class="stringliteral">"mov %2, %%ecx \n\t"</span>    <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01645"></a>01645                 <span class="stringliteral">"shr $3, %%ecx \n\t"</span>    <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01646"></a>01646                 <span class="stringliteral">".align 16       \n\t"</span>  <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01647"></a>01647                 <span class="stringliteral">"1: movq (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
-<a name="l01648"></a>01648                 <span class="stringliteral">"pxor      %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* negate mm0 by xoring with mm1 */</span>
-<a name="l01649"></a>01649                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l01650"></a>01650                 <span class="stringliteral">"add $8, %%eax \n\t"</span>    <span class="comment">/* increase Src1, Src2 and Dest  */</span>
-<a name="l01651"></a>01651                 <span class="stringliteral">"add $8, %%edi \n\t"</span> <span class="stringliteral">"dec %%ecx     \n\t"</span>       <span class="comment">/* decrease loop counter */</span>
-<a name="l01652"></a>01652                 <span class="stringliteral">"jnz 1b        \n\t"</span>    <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01653"></a>01653                 <span class="stringliteral">"emms          \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l01654"></a>01654                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l01655"></a>01655                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l01656"></a>01656                 <span class="stringliteral">"m"</span>(SrcLength)          <span class="comment">/* %2 */</span>
-<a name="l01657"></a>01657                 );
-<a name="l01658"></a>01658 <span class="preprocessor">#endif</span>
-<a name="l01659"></a>01659 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01660"></a>01660 <span class="preprocessor">#else</span>
-<a name="l01661"></a>01661 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01662"></a>01662 <span class="preprocessor">#endif</span>
-<a name="l01663"></a>01663 <span class="preprocessor"></span>}
-<a name="l01664"></a>01664 
-<a name="l01674"></a><a class="code" href="_s_d_l__image_filter_8h.html#abc3c3fc5f018e271f6393921f3964d31">01674</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ac3abfaa8ec2e88c3c4893588c5555856" title="Filter using BitNegation: D = !S.">SDL_imageFilterBitNegation</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span [...]
-<a name="l01675"></a>01675 {
-<a name="l01676"></a>01676         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l01677"></a>01677         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdst;
+<a name="l01551"></a>01551         <span class="comment">/* Validate input parameters */</span>
+<a name="l01552"></a>01552         <span class="keywordflow">if</span> ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
+<a name="l01553"></a>01553                 <span class="keywordflow">return</span>(-1);
+<a name="l01554"></a>01554         <span class="keywordflow">if</span> (length == 0)
+<a name="l01555"></a>01555                 <span class="keywordflow">return</span>(0);
+<a name="l01556"></a>01556 
+<a name="l01557"></a>01557         <span class="keywordflow">if</span> (<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) {
+<a name="l01558"></a>01558                 <span class="keywordflow">if</span> (length > 0) {
+<a name="l01559"></a>01559                         <span class="comment">/* Call ASM routine */</span>
+<a name="l01560"></a>01560                         SDL_imageFilterDivASM(Src1, Src2, Dest, length);
+<a name="l01561"></a>01561 
+<a name="l01562"></a>01562                         <span class="comment">/* Never unaligned bytes - we are done */</span>
+<a name="l01563"></a>01563                         <span class="keywordflow">return</span> (0);
+<a name="l01564"></a>01564                 } <span class="keywordflow">else</span> {
+<a name="l01565"></a>01565                         <span class="keywordflow">return</span> (-1);
+<a name="l01566"></a>01566                 }
+<a name="l01567"></a>01567         } 
+<a name="l01568"></a>01568         
+<a name="l01569"></a>01569         <span class="comment">/* Setup to process whole image */</span>
+<a name="l01570"></a>01570         istart = 0;
+<a name="l01571"></a>01571         cursrc1 = Src1;
+<a name="l01572"></a>01572         cursrc2 = Src2;
+<a name="l01573"></a>01573         curdst = Dest;
+<a name="l01574"></a>01574 
+<a name="l01575"></a>01575         <span class="comment">/* C routine to process image */</span>
+<a name="l01576"></a>01576         <span class="comment">/* for (i = istart; i < length; i++) { */</span>
+<a name="l01577"></a>01577         <span class="comment">/*      if (*cursrc2 == 0) { */</span>
+<a name="l01578"></a>01578         <span class="comment">/*              *curdst = 255; */</span>
+<a name="l01579"></a>01579         <span class="comment">/*      } else { */</span>
+<a name="l01580"></a>01580         <span class="comment">/*              result = (int) *cursrc1 / (int) *cursrc2; */</span>
+<a name="l01581"></a>01581         <span class="comment">/*              *curdst = (unsigned char) result; */</span>
+<a name="l01582"></a>01582         <span class="comment">/*      } */</span>
+<a name="l01583"></a>01583         <span class="comment">/*      /\* Advance pointers *\/ */</span>
+<a name="l01584"></a>01584         <span class="comment">/*      cursrc1++; */</span>
+<a name="l01585"></a>01585         <span class="comment">/*      cursrc2++; */</span>
+<a name="l01586"></a>01586         <span class="comment">/*      curdst++; */</span>
+<a name="l01587"></a>01587         <span class="comment">/* } */</span>
+<a name="l01588"></a>01588         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l01589"></a>01589                 <span class="keywordflow">if</span> (*cursrc2 == 0) {
+<a name="l01590"></a>01590                         *curdst = 255;
+<a name="l01591"></a>01591                 } <span class="keywordflow">else</span> {
+<a name="l01592"></a>01592                         *curdst = (int)*cursrc1 / (<span class="keywordtype">int</span>)*cursrc2;  <span class="comment">// (int) for efficiency</span>
+<a name="l01593"></a>01593                 }
+<a name="l01594"></a>01594                 <span class="comment">/* Advance pointers */</span>
+<a name="l01595"></a>01595                 cursrc1++;
+<a name="l01596"></a>01596                 cursrc2++;
+<a name="l01597"></a>01597                 curdst++;
+<a name="l01598"></a>01598         }
+<a name="l01599"></a>01599 
+<a name="l01600"></a>01600         <span class="keywordflow">return</span> (0);
+<a name="l01601"></a>01601 }
+<a name="l01602"></a>01602 
+<a name="l01603"></a>01603 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
+<a name="l01604"></a>01604 
+<a name="l01614"></a>01614 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterBitNegationMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength)
+<a name="l01615"></a>01615 {
+<a name="l01616"></a>01616 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01617"></a>01617 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01618"></a>01618 <span class="preprocessor"></span>        __asm
+<a name="l01619"></a>01619         {
+<a name="l01620"></a>01620                 pusha
+<a name="l01621"></a>01621                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l01622"></a>01622                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l01623"></a>01623                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01624"></a>01624                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01625"></a>01625                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l01626"></a>01626                         align 16                <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01627"></a>01627 L91117:
+<a name="l01628"></a>01628                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into mm1 */</span>
+<a name="l01629"></a>01629                 pxor mm0, mm1           <span class="comment">/* negate mm0 by xoring with mm1 */</span>
+<a name="l01630"></a>01630                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
+<a name="l01631"></a>01631                         add eax, 8      <span class="comment">/* increase Src1, Src2 and Dest  */</span>
+<a name="l01632"></a>01632                         add edi,  8
+<a name="l01633"></a>01633                         dec ecx         <span class="comment">/* decrease loop counter */</span>
+<a name="l01634"></a>01634                         jnz L91117              <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01635"></a>01635                         emms                    <span class="comment">/* exit MMX state */</span>
+<a name="l01636"></a>01636                         popa
+<a name="l01637"></a>01637         }
+<a name="l01638"></a>01638 <span class="preprocessor">#else</span>
+<a name="l01639"></a>01639 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l01640"></a>01640         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l01641"></a>01641         __m64 *mDest = (__m64*)Dest;
+<a name="l01642"></a>01642         __m64 mm1;
+<a name="l01643"></a>01643         mm1 = _m_pcmpeqb(mm1, mm1);             <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l01644"></a>01644         <span class="keywordtype">int</span> i;
+<a name="l01645"></a>01645         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l01646"></a>01646                 *mDest = _m_pxor(*mSrc1, mm1);  <span class="comment">/* negate mm0 by xoring with mm1 */</span>
+<a name="l01647"></a>01647                 mSrc1++;
+<a name="l01648"></a>01648                 mDest++;
+<a name="l01649"></a>01649         }
+<a name="l01650"></a>01650         _m_empty();                             <span class="comment">/* clean MMX state */</span>
+<a name="l01651"></a>01651 
+<a name="l01652"></a>01652 <span class="preprocessor">#endif</span>
+<a name="l01653"></a>01653 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l01654"></a>01654 <span class="preprocessor">#else</span>
+<a name="l01655"></a>01655 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l01656"></a>01656 <span class="preprocessor">#endif</span>
+<a name="l01657"></a>01657 <span class="preprocessor"></span>}
+<a name="l01658"></a>01658 
+<a name="l01668"></a><a class="code" href="_s_d_l__image_filter_8h.html#abc3c3fc5f018e271f6393921f3964d31">01668</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ac3abfaa8ec2e88c3c4893588c5555856" title="Filter using BitNegation: D = !S.">SDL_imageFilterBitNegation</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span [...]
+<a name="l01669"></a>01669 {
+<a name="l01670"></a>01670         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l01671"></a>01671         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdst;
+<a name="l01672"></a>01672 
+<a name="l01673"></a>01673         <span class="comment">/* Validate input parameters */</span>
+<a name="l01674"></a>01674         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l01675"></a>01675                 <span class="keywordflow">return</span>(-1);
+<a name="l01676"></a>01676         <span class="keywordflow">if</span> (length == 0)
+<a name="l01677"></a>01677                 <span class="keywordflow">return</span>(0);
 <a name="l01678"></a>01678 
-<a name="l01679"></a>01679         <span class="comment">/* Validate input parameters */</span>
-<a name="l01680"></a>01680         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l01681"></a>01681                 <span class="keywordflow">return</span>(-1);
-<a name="l01682"></a>01682         <span class="keywordflow">if</span> (length == 0)
-<a name="l01683"></a>01683                 <span class="keywordflow">return</span>(0);
-<a name="l01684"></a>01684 
-<a name="l01685"></a>01685         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l01686"></a>01686                 <span class="comment">/* MMX routine */</span>
-<a name="l01687"></a>01687                 <a class="code" href="_s_d_l__image_filter_8c.html#a1b522e196f9647501c6badd1de727b97" title="Internal MMX Filter using BitNegation: D = !S.">SDL_imageFilterBitNegationMMX</a>(Src1, Dest, length);
-<a name="l01688"></a>01688 
-<a name="l01689"></a>01689                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l01690"></a>01690                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l01691"></a>01691                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l01692"></a>01692                         istart = length & 0xfffffff8;
-<a name="l01693"></a>01693                         cursrc1 = &Src1[istart];
-<a name="l01694"></a>01694                         curdst = &Dest[istart];
-<a name="l01695"></a>01695                 } <span class="keywordflow">else</span> {
-<a name="l01696"></a>01696                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l01697"></a>01697                         <span class="keywordflow">return</span> (0);
-<a name="l01698"></a>01698                 }
-<a name="l01699"></a>01699         } <span class="keywordflow">else</span> {
-<a name="l01700"></a>01700                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l01701"></a>01701                 istart = 0;
-<a name="l01702"></a>01702                 cursrc1 = Src1;
-<a name="l01703"></a>01703                 curdst = Dest;
-<a name="l01704"></a>01704         }
-<a name="l01705"></a>01705 
-<a name="l01706"></a>01706         <span class="comment">/* C routine to process image */</span>
-<a name="l01707"></a>01707         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l01708"></a>01708                 *curdst = ~(*cursrc1);
-<a name="l01709"></a>01709                 <span class="comment">/* Advance pointers */</span>
-<a name="l01710"></a>01710                 cursrc1++;
-<a name="l01711"></a>01711                 curdst++;
-<a name="l01712"></a>01712         }
-<a name="l01713"></a>01713 
-<a name="l01714"></a>01714         <span class="keywordflow">return</span> (0);
-<a name="l01715"></a>01715 }
-<a name="l01716"></a>01716 
-<a name="l01727"></a><a class="code" href="_s_d_l__image_filter_8c.html#a032e94beee7b3f7dc9e3bc999f51dfb3">01727</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a032e94beee7b3f7dc9e3bc999f51dfb3" title="Internal MMX Filter using AddByte: D = saturation255(S + C)">SDL_imageFilterAddByteMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype" [...]
-<a name="l01728"></a>01728 {
-<a name="l01729"></a>01729 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l01730"></a>01730 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l01731"></a>01731 <span class="preprocessor"></span>        __asm
-<a name="l01732"></a>01732         {
-<a name="l01733"></a>01733                 pusha
-<a name="l01734"></a>01734                         <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
-<a name="l01735"></a>01735                         mov al, C       <span class="comment">/* load C into AL */</span>
-<a name="l01736"></a>01736                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
-<a name="l01737"></a>01737                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l01738"></a>01738                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l01739"></a>01739                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l01740"></a>01740                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l01741"></a>01741                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l01742"></a>01742                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l01743"></a>01743                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01744"></a>01744                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l01745"></a>01745                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01746"></a>01746                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01747"></a>01747                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01748"></a>01748 L1021:
-<a name="l01749"></a>01749                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
-<a name="l01750"></a>01750                 paddusb mm0,  mm1       <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l01751"></a>01751                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
-<a name="l01752"></a>01752                         add eax, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l01753"></a>01753                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l01754"></a>01754                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l01755"></a>01755                         jnz             L1021           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01756"></a>01756                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l01757"></a>01757                         popa
-<a name="l01758"></a>01758         }
-<a name="l01759"></a>01759 <span class="preprocessor">#else</span>
-<a name="l01760"></a>01760 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l01761"></a>01761                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l01762"></a>01762                 <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
-<a name="l01763"></a>01763                 <span class="stringliteral">"mov           %3, %%al \n\t"</span>   <span class="comment">/* load C into AL */</span>
-<a name="l01764"></a>01764                 <span class="stringliteral">"mov         %%al, %%ah \n\t"</span>   <span class="comment">/* copy AL into AH */</span>
-<a name="l01765"></a>01765                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l01766"></a>01766                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l01767"></a>01767                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l01768"></a>01768                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l01769"></a>01769                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l01770"></a>01770                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l01771"></a>01771                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01772"></a>01772                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l01773"></a>01773                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01774"></a>01774                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01775"></a>01775                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01776"></a>01776                 <span class="stringliteral">"1:                     \n\t"</span> 
-<a name="l01777"></a>01777                 <span class="stringliteral">"movq    (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
-<a name="l01778"></a>01778                 <span class="stringliteral">"paddusb   %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l01779"></a>01779                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l01780"></a>01780                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l01781"></a>01781                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l01782"></a>01782                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l01783"></a>01783                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01784"></a>01784                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l01785"></a>01785                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l01786"></a>01786                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l01787"></a>01787                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l01788"></a>01788                 <span class="stringliteral">"m"</span>(C)                  <span class="comment">/* %3 */</span>
-<a name="l01789"></a>01789                 );
-<a name="l01790"></a>01790 <span class="preprocessor">#endif</span>
-<a name="l01791"></a>01791 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01792"></a>01792 <span class="preprocessor">#else</span>
-<a name="l01793"></a>01793 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01794"></a>01794 <span class="preprocessor">#endif</span>
-<a name="l01795"></a>01795 <span class="preprocessor"></span>}
-<a name="l01796"></a>01796 
-<a name="l01808"></a><a class="code" href="_s_d_l__image_filter_8h.html#a6be6dccd000eff4baadd33297e5cc419">01808</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a812cb307cb60ef31f1ffe81a9eee6bb1" title="Filter using AddByte: D = saturation255(S + C)">SDL_imageFilterAddByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *De [...]
-<a name="l01809"></a>01809 {
-<a name="l01810"></a>01810         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l01811"></a>01811         <span class="keywordtype">int</span> iC;
-<a name="l01812"></a>01812         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
-<a name="l01813"></a>01813         <span class="keywordtype">int</span> result;
-<a name="l01814"></a>01814 
-<a name="l01815"></a>01815         <span class="comment">/* Validate input parameters */</span>
-<a name="l01816"></a>01816         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l01817"></a>01817                 <span class="keywordflow">return</span>(-1);
-<a name="l01818"></a>01818         <span class="keywordflow">if</span> (length == 0)
-<a name="l01819"></a>01819                 <span class="keywordflow">return</span>(0);
-<a name="l01820"></a>01820 
-<a name="l01821"></a>01821         <span class="comment">/* Special case: C==0 */</span>
-<a name="l01822"></a>01822         <span class="keywordflow">if</span> (C == 0) {
-<a name="l01823"></a>01823                 memcpy(Src1, Dest, length);
-<a name="l01824"></a>01824                 <span class="keywordflow">return</span> (0); 
-<a name="l01825"></a>01825         }
-<a name="l01826"></a>01826 
-<a name="l01827"></a>01827         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l01679"></a>01679         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l01680"></a>01680                 <span class="comment">/* MMX routine */</span>
+<a name="l01681"></a>01681                 SDL_imageFilterBitNegationMMX(Src1, Dest, length);
+<a name="l01682"></a>01682 
+<a name="l01683"></a>01683                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l01684"></a>01684                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l01685"></a>01685                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l01686"></a>01686                         istart = length & 0xfffffff8;
+<a name="l01687"></a>01687                         cursrc1 = &Src1[istart];
+<a name="l01688"></a>01688                         curdst = &Dest[istart];
+<a name="l01689"></a>01689                 } <span class="keywordflow">else</span> {
+<a name="l01690"></a>01690                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l01691"></a>01691                         <span class="keywordflow">return</span> (0);
+<a name="l01692"></a>01692                 }
+<a name="l01693"></a>01693         } <span class="keywordflow">else</span> {
+<a name="l01694"></a>01694                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l01695"></a>01695                 istart = 0;
+<a name="l01696"></a>01696                 cursrc1 = Src1;
+<a name="l01697"></a>01697                 curdst = Dest;
+<a name="l01698"></a>01698         }
+<a name="l01699"></a>01699 
+<a name="l01700"></a>01700         <span class="comment">/* C routine to process image */</span>
+<a name="l01701"></a>01701         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l01702"></a>01702                 *curdst = ~(*cursrc1);
+<a name="l01703"></a>01703                 <span class="comment">/* Advance pointers */</span>
+<a name="l01704"></a>01704                 cursrc1++;
+<a name="l01705"></a>01705                 curdst++;
+<a name="l01706"></a>01706         }
+<a name="l01707"></a>01707 
+<a name="l01708"></a>01708         <span class="keywordflow">return</span> (0);
+<a name="l01709"></a>01709 }
+<a name="l01710"></a>01710 
+<a name="l01721"></a>01721 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterAddByteMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> C)
+<a name="l01722"></a>01722 {
+<a name="l01723"></a>01723 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01724"></a>01724 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01725"></a>01725 <span class="preprocessor"></span>        __asm
+<a name="l01726"></a>01726         {
+<a name="l01727"></a>01727                 pusha
+<a name="l01728"></a>01728                         <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
+<a name="l01729"></a>01729                         mov al, C       <span class="comment">/* load C into AL */</span>
+<a name="l01730"></a>01730                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
+<a name="l01731"></a>01731                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l01732"></a>01732                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l01733"></a>01733                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l01734"></a>01734                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l01735"></a>01735                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l01736"></a>01736                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l01737"></a>01737                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l01738"></a>01738                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01739"></a>01739                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01740"></a>01740                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l01741"></a>01741                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01742"></a>01742 L1021:
+<a name="l01743"></a>01743                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
+<a name="l01744"></a>01744                 paddusb mm0,  mm1       <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
+<a name="l01745"></a>01745                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
+<a name="l01746"></a>01746                         add eax, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l01747"></a>01747                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l01748"></a>01748                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l01749"></a>01749                         jnz             L1021           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01750"></a>01750                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l01751"></a>01751                         popa
+<a name="l01752"></a>01752         }
+<a name="l01753"></a>01753 <span class="preprocessor">#else</span>
+<a name="l01754"></a>01754 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l01755"></a>01755         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l01756"></a>01756         __m64 *mDest = (__m64*)Dest;
+<a name="l01757"></a>01757         <span class="comment">/* Duplicate C in 8 bytes of MM1 */</span>
+<a name="l01758"></a>01758         <span class="keywordtype">int</span> i;
+<a name="l01759"></a>01759         memset(&i, C, 4);
+<a name="l01760"></a>01760         __m64 mm1 = _m_from_int(i);
+<a name="l01761"></a>01761         __m64 mm2 = _m_from_int(i);
+<a name="l01762"></a>01762         mm1 = _m_punpckldq(mm1, mm2);                   <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l01763"></a>01763         <span class="comment">//__m64 mm1 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l01764"></a>01764         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l01765"></a>01765                 *mDest = _m_paddusb(*mSrc1, mm1);       <span class="comment">/* Src1+C (add 8 bytes with saturation) */</span>
+<a name="l01766"></a>01766                 mSrc1++;
+<a name="l01767"></a>01767                 mDest++;
+<a name="l01768"></a>01768         }
+<a name="l01769"></a>01769         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l01770"></a>01770 <span class="preprocessor">#endif</span>
+<a name="l01771"></a>01771 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l01772"></a>01772 <span class="preprocessor">#else</span>
+<a name="l01773"></a>01773 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l01774"></a>01774 <span class="preprocessor">#endif</span>
+<a name="l01775"></a>01775 <span class="preprocessor"></span>}
+<a name="l01776"></a>01776 
+<a name="l01788"></a><a class="code" href="_s_d_l__image_filter_8h.html#a6be6dccd000eff4baadd33297e5cc419">01788</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a812cb307cb60ef31f1ffe81a9eee6bb1" title="Filter using AddByte: D = saturation255(S + C)">SDL_imageFilterAddByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *De [...]
+<a name="l01789"></a>01789 {
+<a name="l01790"></a>01790         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l01791"></a>01791         <span class="keywordtype">int</span> iC;
+<a name="l01792"></a>01792         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
+<a name="l01793"></a>01793         <span class="keywordtype">int</span> result;
+<a name="l01794"></a>01794 
+<a name="l01795"></a>01795         <span class="comment">/* Validate input parameters */</span>
+<a name="l01796"></a>01796         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l01797"></a>01797                 <span class="keywordflow">return</span>(-1);
+<a name="l01798"></a>01798         <span class="keywordflow">if</span> (length == 0)
+<a name="l01799"></a>01799                 <span class="keywordflow">return</span>(0);
+<a name="l01800"></a>01800 
+<a name="l01801"></a>01801         <span class="comment">/* Special case: C==0 */</span>
+<a name="l01802"></a>01802         <span class="keywordflow">if</span> (C == 0) {
+<a name="l01803"></a>01803                 memcpy(Src1, Dest, length);
+<a name="l01804"></a>01804                 <span class="keywordflow">return</span> (0); 
+<a name="l01805"></a>01805         }
+<a name="l01806"></a>01806 
+<a name="l01807"></a>01807         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l01808"></a>01808 
+<a name="l01809"></a>01809                 <span class="comment">/* MMX routine */</span>
+<a name="l01810"></a>01810                 SDL_imageFilterAddByteMMX(Src1, Dest, length, C);
+<a name="l01811"></a>01811 
+<a name="l01812"></a>01812                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l01813"></a>01813                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l01814"></a>01814                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l01815"></a>01815                         istart = length & 0xfffffff8;
+<a name="l01816"></a>01816                         cursrc1 = &Src1[istart];
+<a name="l01817"></a>01817                         curdest = &Dest[istart];
+<a name="l01818"></a>01818                 } <span class="keywordflow">else</span> {
+<a name="l01819"></a>01819                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l01820"></a>01820                         <span class="keywordflow">return</span> (0);
+<a name="l01821"></a>01821                 }
+<a name="l01822"></a>01822         } <span class="keywordflow">else</span> {
+<a name="l01823"></a>01823                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l01824"></a>01824                 istart = 0;
+<a name="l01825"></a>01825                 cursrc1 = Src1;
+<a name="l01826"></a>01826                 curdest = Dest;
+<a name="l01827"></a>01827         }
 <a name="l01828"></a>01828 
-<a name="l01829"></a>01829                 <span class="comment">/* MMX routine */</span>
-<a name="l01830"></a>01830                 <a class="code" href="_s_d_l__image_filter_8c.html#a032e94beee7b3f7dc9e3bc999f51dfb3" title="Internal MMX Filter using AddByte: D = saturation255(S + C)">SDL_imageFilterAddByteMMX</a>(Src1, Dest, length, C);
-<a name="l01831"></a>01831 
-<a name="l01832"></a>01832                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l01833"></a>01833                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l01834"></a>01834                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l01835"></a>01835                         istart = length & 0xfffffff8;
-<a name="l01836"></a>01836                         cursrc1 = &Src1[istart];
-<a name="l01837"></a>01837                         curdest = &Dest[istart];
-<a name="l01838"></a>01838                 } <span class="keywordflow">else</span> {
-<a name="l01839"></a>01839                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l01840"></a>01840                         <span class="keywordflow">return</span> (0);
-<a name="l01841"></a>01841                 }
-<a name="l01842"></a>01842         } <span class="keywordflow">else</span> {
-<a name="l01843"></a>01843                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l01844"></a>01844                 istart = 0;
-<a name="l01845"></a>01845                 cursrc1 = Src1;
-<a name="l01846"></a>01846                 curdest = Dest;
-<a name="l01847"></a>01847         }
-<a name="l01848"></a>01848 
-<a name="l01849"></a>01849         <span class="comment">/* C routine to process image */</span>
-<a name="l01850"></a>01850         iC = (int) C;
-<a name="l01851"></a>01851         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l01852"></a>01852                 result = (int) *cursrc1 + iC;
-<a name="l01853"></a>01853                 <span class="keywordflow">if</span> (result > 255)
-<a name="l01854"></a>01854                         result = 255;
-<a name="l01855"></a>01855                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l01856"></a>01856                 <span class="comment">/* Advance pointers */</span>
-<a name="l01857"></a>01857                 cursrc1++;
-<a name="l01858"></a>01858                 curdest++;
-<a name="l01859"></a>01859         }
-<a name="l01860"></a>01860         <span class="keywordflow">return</span> (0);
-<a name="l01861"></a>01861 }
-<a name="l01862"></a>01862 
-<a name="l01874"></a><a class="code" href="_s_d_l__image_filter_8c.html#ac337129ad7f11e7e33d73fa39b8239eb">01874</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ac337129ad7f11e7e33d73fa39b8239eb" title="Internal MMX Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterAddUintMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned< [...]
-<a name="l01875"></a>01875 {
-<a name="l01876"></a>01876 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l01877"></a>01877 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l01878"></a>01878 <span class="preprocessor"></span>        __asm
-<a name="l01879"></a>01879         {
-<a name="l01880"></a>01880                 pusha
-<a name="l01881"></a>01881                         <span class="comment">/* ** Duplicate (int)C in 8 bytes of MM1 ** */</span>
-<a name="l01882"></a>01882                         mov eax, C      <span class="comment">/* load C into EAX */</span>
-<a name="l01883"></a>01883                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l01884"></a>01884                         mov eax, D      <span class="comment">/* load D into EAX */</span>
-<a name="l01885"></a>01885                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l01886"></a>01886                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l01887"></a>01887                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01888"></a>01888                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l01889"></a>01889                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01890"></a>01890                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01891"></a>01891                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01892"></a>01892 L11023:
-<a name="l01893"></a>01893                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l01894"></a>01894                 paddusb mm0,  mm1       <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l01895"></a>01895                         movq [edi],  mm0        <span class="comment">/* store result in SrcDest */</span>
-<a name="l01896"></a>01896                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l01897"></a>01897                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l01898"></a>01898                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l01899"></a>01899                         jnz             L11023          <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01900"></a>01900                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l01901"></a>01901                         popa
-<a name="l01902"></a>01902         }
-<a name="l01903"></a>01903 <span class="preprocessor">#else</span>
-<a name="l01904"></a>01904 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l01905"></a>01905                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l01906"></a>01906                 <span class="comment">/* ** Duplicate (int)C in 8 bytes of MM1 ** */</span>
-<a name="l01907"></a>01907                 <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load C into EAX */</span>
-<a name="l01908"></a>01908                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l01909"></a>01909                 <span class="stringliteral">"mov          %4, %%eax \n\t"</span>   <span class="comment">/* load D into EAX */</span>
-<a name="l01910"></a>01910                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l01911"></a>01911                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l01912"></a>01912                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l01913"></a>01913                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l01914"></a>01914                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l01915"></a>01915                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l01916"></a>01916                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l01917"></a>01917                 <span class="stringliteral">"1:                     \n\t"</span> 
-<a name="l01918"></a>01918                 <span class="stringliteral">"movq    (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l01919"></a>01919                 <span class="stringliteral">"paddusb   %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l01920"></a>01920                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in SrcDest */</span>
-<a name="l01921"></a>01921                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l01922"></a>01922                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l01923"></a>01923                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l01924"></a>01924                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l01925"></a>01925                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l01926"></a>01926                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l01927"></a>01927                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l01928"></a>01928                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l01929"></a>01929                 <span class="stringliteral">"m"</span>(C),                 <span class="comment">/* %3 */</span>
-<a name="l01930"></a>01930                 <span class="stringliteral">"m"</span>(D)                  <span class="comment">/* %4 */</span>
-<a name="l01931"></a>01931                 );
-<a name="l01932"></a>01932 <span class="preprocessor">#endif</span>
-<a name="l01933"></a>01933 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l01934"></a>01934 <span class="preprocessor">#else</span>
-<a name="l01935"></a>01935 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l01936"></a>01936 <span class="preprocessor">#endif</span>
-<a name="l01937"></a>01937 <span class="preprocessor"></span>}
-<a name="l01938"></a>01938 
-<a name="l01949"></a><a class="code" href="_s_d_l__image_filter_8h.html#af1a17645dea69e52c7bd560521286765">01949</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a660543426c47dfec39a349eb3b8f905b" title="Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterAddUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span cla [...]
-<a name="l01950"></a>01950 {
-<a name="l01951"></a>01951         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, j, istart, D;
-<a name="l01952"></a>01952         <span class="keywordtype">int</span> iC[4];
-<a name="l01953"></a>01953         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l01954"></a>01954         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l01955"></a>01955         <span class="keywordtype">int</span> result;
-<a name="l01956"></a>01956 
-<a name="l01957"></a>01957         <span class="comment">/* Validate input parameters */</span>
-<a name="l01958"></a>01958         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l01959"></a>01959                 <span class="keywordflow">return</span>(-1);
-<a name="l01960"></a>01960         <span class="keywordflow">if</span> (length == 0)
-<a name="l01961"></a>01961                 <span class="keywordflow">return</span>(0);
-<a name="l01962"></a>01962 
-<a name="l01963"></a>01963         <span class="comment">/* Special case: C==0 */</span>
-<a name="l01964"></a>01964         <span class="keywordflow">if</span> (C == 0) {
-<a name="l01965"></a>01965                 memcpy(Src1, Dest, length);
-<a name="l01966"></a>01966                 <span class="keywordflow">return</span> (0); 
-<a name="l01967"></a>01967         }
-<a name="l01968"></a>01968 
-<a name="l01969"></a>01969         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l01970"></a>01970 
-<a name="l01971"></a>01971                 <span class="comment">/* MMX routine */</span>
-<a name="l01972"></a>01972                 D=<a class="code" href="_s_d_l__image_filter_8c.html#a700fb30611761c46a674a45cc28ff561" title="Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.).">SWAP_32</a>(C);
-<a name="l01973"></a>01973                 <a class="code" href="_s_d_l__image_filter_8c.html#ac337129ad7f11e7e33d73fa39b8239eb" title="Internal MMX Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterAddUintMMX</a>(Src1, Dest, length, C, D);
-<a name="l01974"></a>01974 
-<a name="l01975"></a>01975                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l01976"></a>01976                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l01977"></a>01977                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l01978"></a>01978                         istart = length & 0xfffffff8;
-<a name="l01979"></a>01979                         cursrc1 = &Src1[istart];
-<a name="l01980"></a>01980                         curdest = &Dest[istart];
-<a name="l01981"></a>01981                 } <span class="keywordflow">else</span> {
-<a name="l01982"></a>01982                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l01983"></a>01983                         <span class="keywordflow">return</span> (0);
-<a name="l01984"></a>01984                 }
-<a name="l01985"></a>01985         } <span class="keywordflow">else</span> {
-<a name="l01986"></a>01986                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l01987"></a>01987                 istart = 0;
-<a name="l01988"></a>01988                 cursrc1 = Src1;
-<a name="l01989"></a>01989                 curdest = Dest;
-<a name="l01990"></a>01990         }
-<a name="l01991"></a>01991 
-<a name="l01992"></a>01992         <span class="comment">/* C routine to process bytes */</span>
-<a name="l01993"></a>01993         iC[3] = (int) ((C >> 24) & 0xff);
-<a name="l01994"></a>01994         iC[2] = (int) ((C >> 16) & 0xff);
-<a name="l01995"></a>01995         iC[1] = (int) ((C >>  8) & 0xff);
-<a name="l01996"></a>01996         iC[0] = (int) ((C >>  0) & 0xff);
-<a name="l01997"></a>01997         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
-<a name="l01998"></a>01998                 <span class="keywordflow">for</span> (j = 0; j < 4; j++) {
-<a name="l01999"></a>01999                         <span class="keywordflow">if</span> ((i+j)<length) {
-<a name="l02000"></a>02000                                 result = (int) *cursrc1 + iC[j];
-<a name="l02001"></a>02001                                 <span class="keywordflow">if</span> (result > 255) result = 255;
-<a name="l02002"></a>02002                                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l02003"></a>02003                                 <span class="comment">/* Advance pointers */</span>
-<a name="l02004"></a>02004                                 cursrc1++;
-<a name="l02005"></a>02005                                 curdest++;
-<a name="l02006"></a>02006                         }
-<a name="l02007"></a>02007                 }
-<a name="l02008"></a>02008         }
-<a name="l02009"></a>02009         <span class="keywordflow">return</span> (0);
-<a name="l02010"></a>02010 }
-<a name="l02011"></a>02011 
-<a name="l02023"></a><a class="code" href="_s_d_l__image_filter_8c.html#ae7c132373eb318713635c4e82f478f9d">02023</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ae7c132373eb318713635c4e82f478f9d" title="Internal MMX Filter using AddByteToHalf: D = saturation255(S/2 + C)">SDL_imageFilterAddByteToHalfMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class [...]
-<a name="l02024"></a>02024                                                                         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
-<a name="l02025"></a>02025 {
-<a name="l02026"></a>02026 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l02027"></a>02027 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l02028"></a>02028 <span class="preprocessor"></span>        __asm
-<a name="l02029"></a>02029         {
-<a name="l02030"></a>02030                 pusha
-<a name="l02031"></a>02031                         <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
-<a name="l02032"></a>02032                         mov al, C       <span class="comment">/* load C into AL */</span>
-<a name="l02033"></a>02033                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
-<a name="l02034"></a>02034                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l02035"></a>02035                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l02036"></a>02036                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l02037"></a>02037                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02038"></a>02038                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02039"></a>02039                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l02040"></a>02040                         mov edx, Mask           <span class="comment">/* load Mask address into edx */</span>
-<a name="l02041"></a>02041                         movq mm0, [edx]         <span class="comment">/* load Mask into mm0 */</span>
-<a name="l02042"></a>02042                 mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02043"></a>02043                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l02044"></a>02044                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02045"></a>02045                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02046"></a>02046                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02047"></a>02047 L1022:
-<a name="l02048"></a>02048                 movq mm2, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM2 */</span>
-<a name="l02049"></a>02049                 psrlw mm2, 1    <span class="comment">/* shift 4 WORDS of MM2 1 bit to the right */</span>
-<a name="l02050"></a>02050                         pand mm2, mm0        <span class="comment">// apply Mask to 8 BYTES of MM2 */</span>
-<a name="l02051"></a>02051                         <span class="comment">/* byte     0x0f, 0xdb, 0xd0 */</span>
-<a name="l02052"></a>02052                         paddusb mm2,  mm1       <span class="comment">/* MM2=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l02053"></a>02053                         movq [edi], mm2         <span class="comment">/* store result in Dest */</span>
-<a name="l02054"></a>02054                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02055"></a>02055                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02056"></a>02056                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l02057"></a>02057                         jnz             L1022           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02058"></a>02058                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l02059"></a>02059                         popa
-<a name="l02060"></a>02060         }
-<a name="l02061"></a>02061 <span class="preprocessor">#else</span>
-<a name="l02062"></a>02062 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l02063"></a>02063                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l02064"></a>02064                 <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
-<a name="l02065"></a>02065                 <span class="stringliteral">"mov           %3, %%al \n\t"</span>   <span class="comment">/* load C into AL */</span>
-<a name="l02066"></a>02066                 <span class="stringliteral">"mov         %%al, %%ah \n\t"</span>   <span class="comment">/* copy AL into AH */</span>
-<a name="l02067"></a>02067                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l02068"></a>02068                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l02069"></a>02069                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l02070"></a>02070                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02071"></a>02071                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02072"></a>02072                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l02073"></a>02073                 <span class="stringliteral">"movl         %4, %%edx \n\t"</span>   <span class="comment">/* load Mask address into edx */</span>
-<a name="l02074"></a>02074                 <span class="stringliteral">"movq    (%%edx), %%mm0 \n\t"</span>   <span class="comment">/* load Mask into mm0 */</span>
-<a name="l02075"></a>02075                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02076"></a>02076                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l02077"></a>02077                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02078"></a>02078                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02079"></a>02079                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02080"></a>02080                 <span class="stringliteral">"1:                     \n\t"</span> 
-<a name="l02081"></a>02081                 <span class="stringliteral">"movq    (%%eax), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM2 */</span>
-<a name="l02082"></a>02082                 <span class="stringliteral">"psrlw        $1, %%mm2 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM2 1 bit to the right */</span>
-<a name="l02083"></a>02083                 <span class="comment">/*    "pand      %%mm0, %%mm2 \n\t"    // apply Mask to 8 BYTES of MM2 */</span>
-<a name="l02084"></a>02084                 <span class="stringliteral">".byte     0x0f, 0xdb, 0xd0 \n\t"</span> 
-<a name="l02085"></a>02085                 <span class="stringliteral">"paddusb   %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* MM2=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l02086"></a>02086                 <span class="stringliteral">"movq    %%mm2, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l02087"></a>02087                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02088"></a>02088                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02089"></a>02089                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02090"></a>02090                 <span class="stringliteral">"jnz                  1b \n\t"</span>  <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02091"></a>02091                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l02092"></a>02092                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l02093"></a>02093                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l02094"></a>02094                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l02095"></a>02095                 <span class="stringliteral">"m"</span>(C),                 <span class="comment">/* %3 */</span>
-<a name="l02096"></a>02096                 <span class="stringliteral">"m"</span>(Mask)                       <span class="comment">/* %4 */</span>
-<a name="l02097"></a>02097                 );
-<a name="l02098"></a>02098 <span class="preprocessor">#endif</span>
-<a name="l02099"></a>02099 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l02100"></a>02100 <span class="preprocessor">#else</span>
-<a name="l02101"></a>02101 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l02102"></a>02102 <span class="preprocessor">#endif</span>
-<a name="l02103"></a>02103 <span class="preprocessor"></span>}
-<a name="l02104"></a>02104 
-<a name="l02115"></a><a class="code" href="_s_d_l__image_filter_8h.html#a8cbdffd5dbcab3b5dc9207d57af616b3">02115</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ab82db97d129c8cfc36780bcdc6286fcc" title="Filter using AddByteToHalf: D = saturation255(S/2 + C)">SDL_imageFilterAddByteToHalf</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">c [...]
-<a name="l02116"></a>02116 {
-<a name="l02117"></a>02117         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
-<a name="l02118"></a>02118         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l02119"></a>02119         <span class="keywordtype">int</span> iC;
-<a name="l02120"></a>02120         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l02121"></a>02121         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l02122"></a>02122         <span class="keywordtype">int</span> result;
-<a name="l02123"></a>02123 
-<a name="l02124"></a>02124         <span class="comment">/* Validate input parameters */</span>
-<a name="l02125"></a>02125         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l02126"></a>02126                 <span class="keywordflow">return</span>(-1);
-<a name="l02127"></a>02127         <span class="keywordflow">if</span> (length == 0)
-<a name="l02128"></a>02128                 <span class="keywordflow">return</span>(0);
-<a name="l02129"></a>02129 
-<a name="l02130"></a>02130         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l02131"></a>02131 
-<a name="l02132"></a>02132                 <span class="comment">/* MMX routine */</span>
-<a name="l02133"></a>02133                 <a class="code" href="_s_d_l__image_filter_8c.html#ae7c132373eb318713635c4e82f478f9d" title="Internal MMX Filter using AddByteToHalf: D = saturation255(S/2 + C)">SDL_imageFilterAddByteToHalfMMX</a>(Src1, Dest, length, C, Mask);
-<a name="l02134"></a>02134 
-<a name="l02135"></a>02135                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l02136"></a>02136                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l02137"></a>02137                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l02138"></a>02138                         istart = length & 0xfffffff8;
-<a name="l02139"></a>02139                         cursrc1 = &Src1[istart];
-<a name="l02140"></a>02140                         curdest = &Dest[istart];
-<a name="l02141"></a>02141                 } <span class="keywordflow">else</span> {
-<a name="l02142"></a>02142                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l02143"></a>02143                         <span class="keywordflow">return</span> (0);
-<a name="l02144"></a>02144                 }
-<a name="l02145"></a>02145         } <span class="keywordflow">else</span> {
-<a name="l02146"></a>02146                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l02147"></a>02147                 istart = 0;
-<a name="l02148"></a>02148                 cursrc1 = Src1;
-<a name="l02149"></a>02149                 curdest = Dest;
-<a name="l02150"></a>02150         }
-<a name="l02151"></a>02151 
-<a name="l02152"></a>02152         <span class="comment">/* C routine to process image */</span>
-<a name="l02153"></a>02153         iC = (int) C;
-<a name="l02154"></a>02154         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l02155"></a>02155                 result = (int) (*cursrc1 / 2) + iC;
-<a name="l02156"></a>02156                 <span class="keywordflow">if</span> (result > 255)
-<a name="l02157"></a>02157                         result = 255;
-<a name="l02158"></a>02158                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l02159"></a>02159                 <span class="comment">/* Advance pointers */</span>
-<a name="l02160"></a>02160                 cursrc1++;
-<a name="l02161"></a>02161                 curdest++;
-<a name="l02162"></a>02162         }
-<a name="l02163"></a>02163 
-<a name="l02164"></a>02164         <span class="keywordflow">return</span> (0);
-<a name="l02165"></a>02165 }
-<a name="l02166"></a>02166 
-<a name="l02177"></a><a class="code" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65">02177</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65" title="Internal MMX Filter using SubByte: D = saturation0(S - C)">SDL_imageFilterSubByteMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">c [...]
-<a name="l02178"></a>02178 {
-<a name="l02179"></a>02179 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l02180"></a>02180 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l02181"></a>02181 <span class="preprocessor"></span>        __asm
-<a name="l02182"></a>02182         {
-<a name="l02183"></a>02183                 pusha
-<a name="l02184"></a>02184                         <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
-<a name="l02185"></a>02185                         mov al, C       <span class="comment">/* load C into AL */</span>
-<a name="l02186"></a>02186                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
-<a name="l02187"></a>02187                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l02188"></a>02188                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l02189"></a>02189                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l02190"></a>02190                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02191"></a>02191                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02192"></a>02192                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l02193"></a>02193                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02194"></a>02194                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l02195"></a>02195                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02196"></a>02196                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02197"></a>02197                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02198"></a>02198 L1023:
-<a name="l02199"></a>02199                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02200"></a>02200                 psubusb mm0,  mm1       <span class="comment">/* MM0=SrcDest-C (sub 8 bytes with saturation) */</span>
-<a name="l02201"></a>02201                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
-<a name="l02202"></a>02202                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02203"></a>02203                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02204"></a>02204                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l02205"></a>02205                         jnz             L1023           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02206"></a>02206                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l02207"></a>02207                         popa
-<a name="l02208"></a>02208         }
-<a name="l02209"></a>02209 <span class="preprocessor">#else</span>
-<a name="l02210"></a>02210 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l02211"></a>02211                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l02212"></a>02212                 <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
-<a name="l02213"></a>02213                 <span class="stringliteral">"mov           %3, %%al \n\t"</span>   <span class="comment">/* load C into AL */</span>
-<a name="l02214"></a>02214                 <span class="stringliteral">"mov         %%al, %%ah \n\t"</span>   <span class="comment">/* copy AL into AH */</span>
-<a name="l02215"></a>02215                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l02216"></a>02216                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l02217"></a>02217                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l02218"></a>02218                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02219"></a>02219                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02220"></a>02220                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l02221"></a>02221                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02222"></a>02222                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l02223"></a>02223                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02224"></a>02224                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02225"></a>02225                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02226"></a>02226                 <span class="stringliteral">"1: movq (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02227"></a>02227                 <span class="stringliteral">"psubusb   %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* MM0=SrcDest-C (sub 8 bytes with saturation) */</span>
-<a name="l02228"></a>02228                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in SrcDest */</span>
-<a name="l02229"></a>02229                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02230"></a>02230                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02231"></a>02231                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02232"></a>02232                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02233"></a>02233                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l02234"></a>02234                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l02235"></a>02235                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l02236"></a>02236                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l02237"></a>02237                 <span class="stringliteral">"m"</span>(C)                  <span class="comment">/* %3 */</span>
-<a name="l02238"></a>02238                 );
-<a name="l02239"></a>02239 <span class="preprocessor">#endif</span>
-<a name="l02240"></a>02240 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l02241"></a>02241 <span class="preprocessor">#else</span>
-<a name="l02242"></a>02242 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l02243"></a>02243 <span class="preprocessor">#endif</span>
-<a name="l02244"></a>02244 <span class="preprocessor"></span>}
-<a name="l02245"></a>02245 
-<a name="l02256"></a><a class="code" href="_s_d_l__image_filter_8h.html#af8f4ab4050a0661c7696783ba1a1b12b">02256</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a387fb6f0d48cc5d08f37f7f9b92d14b2" title="Filter using SubByte: D = saturation0(S - C)">SDL_imageFilterSubByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest [...]
-<a name="l02257"></a>02257 {
-<a name="l02258"></a>02258         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l02259"></a>02259         <span class="keywordtype">int</span> iC;
-<a name="l02260"></a>02260         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l02261"></a>02261         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l02262"></a>02262         <span class="keywordtype">int</span> result;
-<a name="l02263"></a>02263 
-<a name="l02264"></a>02264         <span class="comment">/* Validate input parameters */</span>
-<a name="l02265"></a>02265         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l02266"></a>02266                 <span class="keywordflow">return</span>(-1);
-<a name="l02267"></a>02267         <span class="keywordflow">if</span> (length == 0)
-<a name="l02268"></a>02268                 <span class="keywordflow">return</span>(0);
-<a name="l02269"></a>02269 
-<a name="l02270"></a>02270         <span class="comment">/* Special case: C==0 */</span>
-<a name="l02271"></a>02271         <span class="keywordflow">if</span> (C == 0) {
-<a name="l02272"></a>02272                 memcpy(Src1, Dest, length);
-<a name="l02273"></a>02273                 <span class="keywordflow">return</span> (0); 
-<a name="l02274"></a>02274         }
-<a name="l02275"></a>02275 
-<a name="l02276"></a>02276         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l02277"></a>02277 
-<a name="l02278"></a>02278                 <span class="comment">/* MMX routine */</span>
-<a name="l02279"></a>02279                 <a class="code" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65" title="Internal MMX Filter using SubByte: D = saturation0(S - C)">SDL_imageFilterSubByteMMX</a>(Src1, Dest, length, C);
-<a name="l02280"></a>02280 
-<a name="l02281"></a>02281                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l02282"></a>02282                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l02283"></a>02283                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l02284"></a>02284                         istart = length & 0xfffffff8;
-<a name="l02285"></a>02285                         cursrc1 = &Src1[istart];
-<a name="l02286"></a>02286                         curdest = &Dest[istart];
-<a name="l02287"></a>02287                 } <span class="keywordflow">else</span> {
-<a name="l02288"></a>02288                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l02289"></a>02289                         <span class="keywordflow">return</span> (0);
-<a name="l02290"></a>02290                 }
-<a name="l02291"></a>02291         } <span class="keywordflow">else</span> {
-<a name="l02292"></a>02292                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l02293"></a>02293                 istart = 0;
-<a name="l02294"></a>02294                 cursrc1 = Src1;
-<a name="l02295"></a>02295                 curdest = Dest;
-<a name="l02296"></a>02296         }
-<a name="l02297"></a>02297 
-<a name="l02298"></a>02298         <span class="comment">/* C routine to process image */</span>
-<a name="l02299"></a>02299         iC = (int) C;
-<a name="l02300"></a>02300         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l02301"></a>02301                 result = (int) *cursrc1 - iC;
-<a name="l02302"></a>02302                 <span class="keywordflow">if</span> (result < 0)
-<a name="l02303"></a>02303                         result = 0;
-<a name="l02304"></a>02304                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l02305"></a>02305                 <span class="comment">/* Advance pointers */</span>
-<a name="l02306"></a>02306                 cursrc1++;
-<a name="l02307"></a>02307                 curdest++;
-<a name="l02308"></a>02308         }
-<a name="l02309"></a>02309         <span class="keywordflow">return</span> (0);
-<a name="l02310"></a>02310 }
+<a name="l01829"></a>01829         <span class="comment">/* C routine to process image */</span>
+<a name="l01830"></a>01830         iC = (int) C;
+<a name="l01831"></a>01831         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l01832"></a>01832                 result = (int) *cursrc1 + iC;
+<a name="l01833"></a>01833                 <span class="keywordflow">if</span> (result > 255)
+<a name="l01834"></a>01834                         result = 255;
+<a name="l01835"></a>01835                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l01836"></a>01836                 <span class="comment">/* Advance pointers */</span>
+<a name="l01837"></a>01837                 cursrc1++;
+<a name="l01838"></a>01838                 curdest++;
+<a name="l01839"></a>01839         }
+<a name="l01840"></a>01840         <span class="keywordflow">return</span> (0);
+<a name="l01841"></a>01841 }
+<a name="l01842"></a>01842 
+<a name="l01854"></a>01854 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterAddUintMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> C, <span class="keyword [...]
+<a name="l01855"></a>01855 {
+<a name="l01856"></a>01856 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01857"></a>01857 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01858"></a>01858 <span class="preprocessor"></span>        __asm
+<a name="l01859"></a>01859         {
+<a name="l01860"></a>01860                 pusha
+<a name="l01861"></a>01861                         <span class="comment">/* ** Duplicate (int)C in 8 bytes of MM1 ** */</span>
+<a name="l01862"></a>01862                         mov eax, C      <span class="comment">/* load C into EAX */</span>
+<a name="l01863"></a>01863                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l01864"></a>01864                         mov eax, D      <span class="comment">/* load D into EAX */</span>
+<a name="l01865"></a>01865                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l01866"></a>01866                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l01867"></a>01867                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l01868"></a>01868                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l01869"></a>01869                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l01870"></a>01870                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l01871"></a>01871                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l01872"></a>01872 L11023:
+<a name="l01873"></a>01873                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
+<a name="l01874"></a>01874                 paddusb mm0,  mm1       <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
+<a name="l01875"></a>01875                         movq [edi],  mm0        <span class="comment">/* store result in SrcDest */</span>
+<a name="l01876"></a>01876                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l01877"></a>01877                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l01878"></a>01878                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l01879"></a>01879                         jnz             L11023          <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l01880"></a>01880                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l01881"></a>01881                         popa
+<a name="l01882"></a>01882         }
+<a name="l01883"></a>01883 <span class="preprocessor">#else</span>
+<a name="l01884"></a>01884 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l01885"></a>01885         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l01886"></a>01886         __m64 *mDest = (__m64*)Dest;
+<a name="l01887"></a>01887         <span class="comment">/* Duplicate (int)C in 8 bytes of MM1 */</span>
+<a name="l01888"></a>01888         __m64 mm1 = _m_from_int(C);
+<a name="l01889"></a>01889         __m64 mm2 = _m_from_int(C);
+<a name="l01890"></a>01890         mm1 = _m_punpckldq(mm1, mm2);                   <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l01891"></a>01891         <span class="comment">//__m64 mm1 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l01892"></a>01892         <span class="keywordtype">int</span> i;
+<a name="l01893"></a>01893         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l01894"></a>01894                 *mDest = _m_paddusb(*mSrc1, mm1);       <span class="comment">/* Src1+C (add 8 bytes with saturation) */</span>
+<a name="l01895"></a>01895                 mSrc1++;
+<a name="l01896"></a>01896                 mDest++;
+<a name="l01897"></a>01897         }
+<a name="l01898"></a>01898         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l01899"></a>01899 <span class="preprocessor">#endif</span>
+<a name="l01900"></a>01900 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l01901"></a>01901 <span class="preprocessor">#else</span>
+<a name="l01902"></a>01902 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l01903"></a>01903 <span class="preprocessor">#endif</span>
+<a name="l01904"></a>01904 <span class="preprocessor"></span>}
+<a name="l01905"></a>01905 
+<a name="l01916"></a><a class="code" href="_s_d_l__image_filter_8h.html#af1a17645dea69e52c7bd560521286765">01916</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a660543426c47dfec39a349eb3b8f905b" title="Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterAddUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span cla [...]
+<a name="l01917"></a>01917 {
+<a name="l01918"></a>01918         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, j, istart, D;
+<a name="l01919"></a>01919         <span class="keywordtype">int</span> iC[4];
+<a name="l01920"></a>01920         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l01921"></a>01921         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l01922"></a>01922         <span class="keywordtype">int</span> result;
+<a name="l01923"></a>01923 
+<a name="l01924"></a>01924         <span class="comment">/* Validate input parameters */</span>
+<a name="l01925"></a>01925         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l01926"></a>01926                 <span class="keywordflow">return</span>(-1);
+<a name="l01927"></a>01927         <span class="keywordflow">if</span> (length == 0)
+<a name="l01928"></a>01928                 <span class="keywordflow">return</span>(0);
+<a name="l01929"></a>01929 
+<a name="l01930"></a>01930         <span class="comment">/* Special case: C==0 */</span>
+<a name="l01931"></a>01931         <span class="keywordflow">if</span> (C == 0) {
+<a name="l01932"></a>01932                 memcpy(Src1, Dest, length);
+<a name="l01933"></a>01933                 <span class="keywordflow">return</span> (0); 
+<a name="l01934"></a>01934         }
+<a name="l01935"></a>01935 
+<a name="l01936"></a>01936         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l01937"></a>01937 
+<a name="l01938"></a>01938                 <span class="comment">/* MMX routine */</span>
+<a name="l01939"></a>01939                 D=<a class="code" href="_s_d_l__image_filter_8c.html#a700fb30611761c46a674a45cc28ff561" title="Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.).">SWAP_32</a>(C);
+<a name="l01940"></a>01940                 SDL_imageFilterAddUintMMX(Src1, Dest, length, C, D);
+<a name="l01941"></a>01941 
+<a name="l01942"></a>01942                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l01943"></a>01943                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l01944"></a>01944                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l01945"></a>01945                         istart = length & 0xfffffff8;
+<a name="l01946"></a>01946                         cursrc1 = &Src1[istart];
+<a name="l01947"></a>01947                         curdest = &Dest[istart];
+<a name="l01948"></a>01948                 } <span class="keywordflow">else</span> {
+<a name="l01949"></a>01949                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l01950"></a>01950                         <span class="keywordflow">return</span> (0);
+<a name="l01951"></a>01951                 }
+<a name="l01952"></a>01952         } <span class="keywordflow">else</span> {
+<a name="l01953"></a>01953                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l01954"></a>01954                 istart = 0;
+<a name="l01955"></a>01955                 cursrc1 = Src1;
+<a name="l01956"></a>01956                 curdest = Dest;
+<a name="l01957"></a>01957         }
+<a name="l01958"></a>01958 
+<a name="l01959"></a>01959         <span class="comment">/* C routine to process bytes */</span>
+<a name="l01960"></a>01960         iC[3] = (int) ((C >> 24) & 0xff);
+<a name="l01961"></a>01961         iC[2] = (int) ((C >> 16) & 0xff);
+<a name="l01962"></a>01962         iC[1] = (int) ((C >>  8) & 0xff);
+<a name="l01963"></a>01963         iC[0] = (int) ((C >>  0) & 0xff);
+<a name="l01964"></a>01964         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
+<a name="l01965"></a>01965                 <span class="keywordflow">for</span> (j = 0; j < 4; j++) {
+<a name="l01966"></a>01966                         <span class="keywordflow">if</span> ((i+j)<length) {
+<a name="l01967"></a>01967                                 result = (int) *cursrc1 + iC[j];
+<a name="l01968"></a>01968                                 <span class="keywordflow">if</span> (result > 255) result = 255;
+<a name="l01969"></a>01969                                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l01970"></a>01970                                 <span class="comment">/* Advance pointers */</span>
+<a name="l01971"></a>01971                                 cursrc1++;
+<a name="l01972"></a>01972                                 curdest++;
+<a name="l01973"></a>01973                         }
+<a name="l01974"></a>01974                 }
+<a name="l01975"></a>01975         }
+<a name="l01976"></a>01976         <span class="keywordflow">return</span> (0);
+<a name="l01977"></a>01977 }
+<a name="l01978"></a>01978 
+<a name="l01990"></a>01990 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterAddByteToHalfMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> C,
+<a name="l01991"></a>01991                                                                         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
+<a name="l01992"></a>01992 {
+<a name="l01993"></a>01993 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l01994"></a>01994 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l01995"></a>01995 <span class="preprocessor"></span>        __asm
+<a name="l01996"></a>01996         {
+<a name="l01997"></a>01997                 pusha
+<a name="l01998"></a>01998                         <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
+<a name="l01999"></a>01999                         mov al, C       <span class="comment">/* load C into AL */</span>
+<a name="l02000"></a>02000                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
+<a name="l02001"></a>02001                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l02002"></a>02002                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l02003"></a>02003                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l02004"></a>02004                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l02005"></a>02005                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l02006"></a>02006                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l02007"></a>02007                         mov edx, Mask           <span class="comment">/* load Mask address into edx */</span>
+<a name="l02008"></a>02008                         movq mm0, [edx]         <span class="comment">/* load Mask into mm0 */</span>
+<a name="l02009"></a>02009                 mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l02010"></a>02010                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l02011"></a>02011                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l02012"></a>02012                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l02013"></a>02013                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02014"></a>02014 L1022:
+<a name="l02015"></a>02015                 movq mm2, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM2 */</span>
+<a name="l02016"></a>02016                 psrlw mm2, 1    <span class="comment">/* shift 4 WORDS of MM2 1 bit to the right */</span>
+<a name="l02017"></a>02017                         pand mm2, mm0        <span class="comment">// apply Mask to 8 BYTES of MM2 */</span>
+<a name="l02018"></a>02018                         paddusb mm2,  mm1       <span class="comment">/* MM2=SrcDest+C (add 8 bytes with saturation) */</span>
+<a name="l02019"></a>02019                         movq [edi], mm2         <span class="comment">/* store result in Dest */</span>
+<a name="l02020"></a>02020                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02021"></a>02021                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02022"></a>02022                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02023"></a>02023                         jnz             L1022           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02024"></a>02024                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l02025"></a>02025                         popa
+<a name="l02026"></a>02026         }
+<a name="l02027"></a>02027 <span class="preprocessor">#else</span>
+<a name="l02028"></a>02028 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l02029"></a>02029         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l02030"></a>02030         __m64 *mDest = (__m64*)Dest;
+<a name="l02031"></a>02031         __m64 *mMask = (__m64*)Mask;
+<a name="l02032"></a>02032         <span class="comment">/* Duplicate C in 8 bytes of MM1 */</span>
+<a name="l02033"></a>02033         <span class="keywordtype">int</span> i;
+<a name="l02034"></a>02034         memset(&i, C, 4);
+<a name="l02035"></a>02035         __m64 mm1 = _m_from_int(i);
+<a name="l02036"></a>02036         __m64 mm2 = _m_from_int(i);
+<a name="l02037"></a>02037         mm1 = _m_punpckldq(mm1, mm2);                   <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l02038"></a>02038         <span class="comment">//__m64 mm1 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l02039"></a>02039         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02040"></a>02040                 __m64 mm2 = _m_psrlwi(*mSrc1, 1);       <span class="comment">/* shift 4 WORDS of MM2 1 bit to the right */</span>
+<a name="l02041"></a>02041                 mm2 = _m_pand(mm2, *mMask);             <span class="comment">/* apply Mask to 8 BYTES of MM2 */</span>
+<a name="l02042"></a>02042                                                         <span class="comment">/* byte     0x0f, 0xdb, 0xd0 */</span>
+<a name="l02043"></a>02043                 *mDest = _m_paddusb(mm1, mm2);          <span class="comment">/* Src1+C (add 8 bytes with saturation) */</span>
+<a name="l02044"></a>02044                 mSrc1++;
+<a name="l02045"></a>02045                 mDest++;
+<a name="l02046"></a>02046         }
+<a name="l02047"></a>02047         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l02048"></a>02048 <span class="preprocessor">#endif</span>
+<a name="l02049"></a>02049 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l02050"></a>02050 <span class="preprocessor">#else</span>
+<a name="l02051"></a>02051 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l02052"></a>02052 <span class="preprocessor">#endif</span>
+<a name="l02053"></a>02053 <span class="preprocessor"></span>}
+<a name="l02054"></a>02054 
+<a name="l02065"></a><a class="code" href="_s_d_l__image_filter_8h.html#a8cbdffd5dbcab3b5dc9207d57af616b3">02065</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ab82db97d129c8cfc36780bcdc6286fcc" title="Filter using AddByteToHalf: D = saturation255(S/2 + C)">SDL_imageFilterAddByteToHalf</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">c [...]
+<a name="l02066"></a>02066 {
+<a name="l02067"></a>02067         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
+<a name="l02068"></a>02068         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l02069"></a>02069         <span class="keywordtype">int</span> iC;
+<a name="l02070"></a>02070         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l02071"></a>02071         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l02072"></a>02072         <span class="keywordtype">int</span> result;
+<a name="l02073"></a>02073 
+<a name="l02074"></a>02074         <span class="comment">/* Validate input parameters */</span>
+<a name="l02075"></a>02075         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l02076"></a>02076                 <span class="keywordflow">return</span>(-1);
+<a name="l02077"></a>02077         <span class="keywordflow">if</span> (length == 0)
+<a name="l02078"></a>02078                 <span class="keywordflow">return</span>(0);
+<a name="l02079"></a>02079 
+<a name="l02080"></a>02080         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l02081"></a>02081 
+<a name="l02082"></a>02082                 <span class="comment">/* MMX routine */</span>
+<a name="l02083"></a>02083                 SDL_imageFilterAddByteToHalfMMX(Src1, Dest, length, C, Mask);
+<a name="l02084"></a>02084 
+<a name="l02085"></a>02085                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l02086"></a>02086                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l02087"></a>02087                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l02088"></a>02088                         istart = length & 0xfffffff8;
+<a name="l02089"></a>02089                         cursrc1 = &Src1[istart];
+<a name="l02090"></a>02090                         curdest = &Dest[istart];
+<a name="l02091"></a>02091                 } <span class="keywordflow">else</span> {
+<a name="l02092"></a>02092                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l02093"></a>02093                         <span class="keywordflow">return</span> (0);
+<a name="l02094"></a>02094                 }
+<a name="l02095"></a>02095         } <span class="keywordflow">else</span> {
+<a name="l02096"></a>02096                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l02097"></a>02097                 istart = 0;
+<a name="l02098"></a>02098                 cursrc1 = Src1;
+<a name="l02099"></a>02099                 curdest = Dest;
+<a name="l02100"></a>02100         }
+<a name="l02101"></a>02101 
+<a name="l02102"></a>02102         <span class="comment">/* C routine to process image */</span>
+<a name="l02103"></a>02103         iC = (int) C;
+<a name="l02104"></a>02104         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l02105"></a>02105                 result = (int) (*cursrc1 / 2) + iC;
+<a name="l02106"></a>02106                 <span class="keywordflow">if</span> (result > 255)
+<a name="l02107"></a>02107                         result = 255;
+<a name="l02108"></a>02108                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l02109"></a>02109                 <span class="comment">/* Advance pointers */</span>
+<a name="l02110"></a>02110                 cursrc1++;
+<a name="l02111"></a>02111                 curdest++;
+<a name="l02112"></a>02112         }
+<a name="l02113"></a>02113 
+<a name="l02114"></a>02114         <span class="keywordflow">return</span> (0);
+<a name="l02115"></a>02115 }
+<a name="l02116"></a>02116 
+<a name="l02127"></a><a class="code" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65">02127</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65" title="Internal MMX Filter using SubByte: D = saturation0(S - C)">SDL_imageFilterSubByteMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">c [...]
+<a name="l02128"></a>02128 {
+<a name="l02129"></a>02129 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l02130"></a>02130 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l02131"></a>02131 <span class="preprocessor"></span>        __asm
+<a name="l02132"></a>02132         {
+<a name="l02133"></a>02133                 pusha
+<a name="l02134"></a>02134                         <span class="comment">/* ** Duplicate C in 8 bytes of MM1 ** */</span>
+<a name="l02135"></a>02135                         mov al, C       <span class="comment">/* load C into AL */</span>
+<a name="l02136"></a>02136                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
+<a name="l02137"></a>02137                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l02138"></a>02138                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l02139"></a>02139                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l02140"></a>02140                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l02141"></a>02141                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l02142"></a>02142                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l02143"></a>02143                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l02144"></a>02144                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l02145"></a>02145                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l02146"></a>02146                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l02147"></a>02147                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02148"></a>02148 L1023:
+<a name="l02149"></a>02149                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
+<a name="l02150"></a>02150                 psubusb mm0,  mm1       <span class="comment">/* MM0=SrcDest-C (sub 8 bytes with saturation) */</span>
+<a name="l02151"></a>02151                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
+<a name="l02152"></a>02152                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02153"></a>02153                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02154"></a>02154                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02155"></a>02155                         jnz             L1023           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02156"></a>02156                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l02157"></a>02157                         popa
+<a name="l02158"></a>02158         }
+<a name="l02159"></a>02159 <span class="preprocessor">#else</span>
+<a name="l02160"></a>02160 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l02161"></a>02161         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l02162"></a>02162         __m64 *mDest = (__m64*)Dest;
+<a name="l02163"></a>02163         <span class="comment">/* Duplicate C in 8 bytes of MM1 */</span>
+<a name="l02164"></a>02164         <span class="keywordtype">int</span> i;
+<a name="l02165"></a>02165         memset(&i, C, 4);
+<a name="l02166"></a>02166         __m64 mm1 = _m_from_int(i);
+<a name="l02167"></a>02167         __m64 mm2 = _m_from_int(i);
+<a name="l02168"></a>02168         mm1 = _m_punpckldq(mm1, mm2);                   <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l02169"></a>02169         <span class="comment">//__m64 mm1 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l02170"></a>02170         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02171"></a>02171                 *mDest = _m_psubusb(*mSrc1, mm1);       <span class="comment">/* Src1-C (sub 8 bytes with saturation) */</span>
+<a name="l02172"></a>02172                 mSrc1++;
+<a name="l02173"></a>02173                 mDest++;
+<a name="l02174"></a>02174         }
+<a name="l02175"></a>02175         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l02176"></a>02176 <span class="preprocessor">#endif</span>
+<a name="l02177"></a>02177 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l02178"></a>02178 <span class="preprocessor">#else</span>
+<a name="l02179"></a>02179 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l02180"></a>02180 <span class="preprocessor">#endif</span>
+<a name="l02181"></a>02181 <span class="preprocessor"></span>}
+<a name="l02182"></a>02182 
+<a name="l02193"></a><a class="code" href="_s_d_l__image_filter_8h.html#af8f4ab4050a0661c7696783ba1a1b12b">02193</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a387fb6f0d48cc5d08f37f7f9b92d14b2" title="Filter using SubByte: D = saturation0(S - C)">SDL_imageFilterSubByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest [...]
+<a name="l02194"></a>02194 {
+<a name="l02195"></a>02195         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l02196"></a>02196         <span class="keywordtype">int</span> iC;
+<a name="l02197"></a>02197         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l02198"></a>02198         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l02199"></a>02199         <span class="keywordtype">int</span> result;
+<a name="l02200"></a>02200 
+<a name="l02201"></a>02201         <span class="comment">/* Validate input parameters */</span>
+<a name="l02202"></a>02202         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l02203"></a>02203                 <span class="keywordflow">return</span>(-1);
+<a name="l02204"></a>02204         <span class="keywordflow">if</span> (length == 0)
+<a name="l02205"></a>02205                 <span class="keywordflow">return</span>(0);
+<a name="l02206"></a>02206 
+<a name="l02207"></a>02207         <span class="comment">/* Special case: C==0 */</span>
+<a name="l02208"></a>02208         <span class="keywordflow">if</span> (C == 0) {
+<a name="l02209"></a>02209                 memcpy(Src1, Dest, length);
+<a name="l02210"></a>02210                 <span class="keywordflow">return</span> (0); 
+<a name="l02211"></a>02211         }
+<a name="l02212"></a>02212 
+<a name="l02213"></a>02213         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l02214"></a>02214 
+<a name="l02215"></a>02215                 <span class="comment">/* MMX routine */</span>
+<a name="l02216"></a>02216                 <a class="code" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65" title="Internal MMX Filter using SubByte: D = saturation0(S - C)">SDL_imageFilterSubByteMMX</a>(Src1, Dest, length, C);
+<a name="l02217"></a>02217 
+<a name="l02218"></a>02218                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l02219"></a>02219                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l02220"></a>02220                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l02221"></a>02221                         istart = length & 0xfffffff8;
+<a name="l02222"></a>02222                         cursrc1 = &Src1[istart];
+<a name="l02223"></a>02223                         curdest = &Dest[istart];
+<a name="l02224"></a>02224                 } <span class="keywordflow">else</span> {
+<a name="l02225"></a>02225                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l02226"></a>02226                         <span class="keywordflow">return</span> (0);
+<a name="l02227"></a>02227                 }
+<a name="l02228"></a>02228         } <span class="keywordflow">else</span> {
+<a name="l02229"></a>02229                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l02230"></a>02230                 istart = 0;
+<a name="l02231"></a>02231                 cursrc1 = Src1;
+<a name="l02232"></a>02232                 curdest = Dest;
+<a name="l02233"></a>02233         }
+<a name="l02234"></a>02234 
+<a name="l02235"></a>02235         <span class="comment">/* C routine to process image */</span>
+<a name="l02236"></a>02236         iC = (int) C;
+<a name="l02237"></a>02237         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l02238"></a>02238                 result = (int) *cursrc1 - iC;
+<a name="l02239"></a>02239                 <span class="keywordflow">if</span> (result < 0)
+<a name="l02240"></a>02240                         result = 0;
+<a name="l02241"></a>02241                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l02242"></a>02242                 <span class="comment">/* Advance pointers */</span>
+<a name="l02243"></a>02243                 cursrc1++;
+<a name="l02244"></a>02244                 curdest++;
+<a name="l02245"></a>02245         }
+<a name="l02246"></a>02246         <span class="keywordflow">return</span> (0);
+<a name="l02247"></a>02247 }
+<a name="l02248"></a>02248 
+<a name="l02260"></a>02260 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterSubUintMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> C, <span class="keyword [...]
+<a name="l02261"></a>02261 {
+<a name="l02262"></a>02262 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l02263"></a>02263 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l02264"></a>02264 <span class="preprocessor"></span>        __asm
+<a name="l02265"></a>02265         {
+<a name="l02266"></a>02266                 pusha
+<a name="l02267"></a>02267                         <span class="comment">/* ** Duplicate (int)C in 8 bytes of MM1 ** */</span>
+<a name="l02268"></a>02268                         mov eax, C      <span class="comment">/* load C into EAX */</span>
+<a name="l02269"></a>02269                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l02270"></a>02270                         mov eax, D      <span class="comment">/* load D into EAX */</span>
+<a name="l02271"></a>02271                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l02272"></a>02272                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l02273"></a>02273                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l02274"></a>02274                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l02275"></a>02275                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l02276"></a>02276                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l02277"></a>02277                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02278"></a>02278 L11024:
+<a name="l02279"></a>02279                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
+<a name="l02280"></a>02280                 psubusb mm0, mm1        <span class="comment">/* MM0=SrcDest-C (sub 8 bytes with saturation) */</span>
+<a name="l02281"></a>02281                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
+<a name="l02282"></a>02282                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02283"></a>02283                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02284"></a>02284                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02285"></a>02285                         jnz             L11024          <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02286"></a>02286                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l02287"></a>02287                         popa
+<a name="l02288"></a>02288         }
+<a name="l02289"></a>02289 <span class="preprocessor">#else</span>
+<a name="l02290"></a>02290 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l02291"></a>02291         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l02292"></a>02292         __m64 *mDest = (__m64*)Dest;
+<a name="l02293"></a>02293         <span class="comment">/* Duplicate (int)C in 8 bytes of MM1 */</span>
+<a name="l02294"></a>02294         __m64 mm1 = _m_from_int(C);
+<a name="l02295"></a>02295         __m64 mm2 = _m_from_int(C);
+<a name="l02296"></a>02296         mm1 = _m_punpckldq(mm1, mm2);                   <span class="comment">/* fill higher bytes of MM1 with C */</span>
+<a name="l02297"></a>02297         <span class="comment">//__m64 mm1 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l02298"></a>02298         <span class="keywordtype">int</span> i;
+<a name="l02299"></a>02299         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02300"></a>02300                 *mDest = _m_psubusb(*mSrc1, mm1);       <span class="comment">/* Src1-C (sub 8 bytes with saturation) */</span>
+<a name="l02301"></a>02301                 mSrc1++;
+<a name="l02302"></a>02302                 mDest++;
+<a name="l02303"></a>02303         }
+<a name="l02304"></a>02304         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l02305"></a>02305 <span class="preprocessor">#endif</span>
+<a name="l02306"></a>02306 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l02307"></a>02307 <span class="preprocessor">#else</span>
+<a name="l02308"></a>02308 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l02309"></a>02309 <span class="preprocessor">#endif</span>
+<a name="l02310"></a>02310 <span class="preprocessor"></span>}
 <a name="l02311"></a>02311 
-<a name="l02323"></a><a class="code" href="_s_d_l__image_filter_8c.html#acfb143905b751680650576e75847f9c1">02323</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#acfb143905b751680650576e75847f9c1" title="Internal MMX Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterSubUintMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</sp [...]
-<a name="l02324"></a>02324 {
-<a name="l02325"></a>02325 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l02326"></a>02326 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l02327"></a>02327 <span class="preprocessor"></span>        __asm
-<a name="l02328"></a>02328         {
-<a name="l02329"></a>02329                 pusha
-<a name="l02330"></a>02330                         <span class="comment">/* ** Duplicate (int)C in 8 bytes of MM1 ** */</span>
-<a name="l02331"></a>02331                         mov eax, C      <span class="comment">/* load C into EAX */</span>
-<a name="l02332"></a>02332                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02333"></a>02333                         mov eax, D      <span class="comment">/* load D into EAX */</span>
-<a name="l02334"></a>02334                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02335"></a>02335                         punpckldq mm1, mm2      <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l02336"></a>02336                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02337"></a>02337                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l02338"></a>02338                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02339"></a>02339                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02340"></a>02340                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02341"></a>02341 L11024:
-<a name="l02342"></a>02342                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02343"></a>02343                 psubusb mm0, mm1        <span class="comment">/* MM0=SrcDest-C (sub 8 bytes with saturation) */</span>
-<a name="l02344"></a>02344                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
-<a name="l02345"></a>02345                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02346"></a>02346                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02347"></a>02347                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l02348"></a>02348                         jnz             L11024          <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02349"></a>02349                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l02350"></a>02350                         popa
-<a name="l02351"></a>02351         }
-<a name="l02352"></a>02352 <span class="preprocessor">#else</span>
-<a name="l02353"></a>02353 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l02354"></a>02354                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l02355"></a>02355                 <span class="comment">/* ** Duplicate (int)C in 8 bytes of MM1 ** */</span>
-<a name="l02356"></a>02356                 <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load C into EAX */</span>
-<a name="l02357"></a>02357                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02358"></a>02358                 <span class="stringliteral">"mov          %4, %%eax \n\t"</span>   <span class="comment">/* load D into EAX */</span>
-<a name="l02359"></a>02359                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02360"></a>02360                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher bytes of MM1 with C */</span>
-<a name="l02361"></a>02361                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02362"></a>02362                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l02363"></a>02363                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02364"></a>02364                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02365"></a>02365                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02366"></a>02366                 <span class="stringliteral">"1: movq (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02367"></a>02367                 <span class="stringliteral">"psubusb   %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* MM0=SrcDest-C (sub 8 bytes with saturation) */</span>
-<a name="l02368"></a>02368                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in SrcDest */</span>
-<a name="l02369"></a>02369                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02370"></a>02370                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02371"></a>02371                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02372"></a>02372                 <span class="stringliteral">"jnz                  1b \n\t"</span>  <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02373"></a>02373                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l02374"></a>02374                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l02375"></a>02375                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l02376"></a>02376                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l02377"></a>02377                 <span class="stringliteral">"m"</span>(C),                 <span class="comment">/* %3 */</span>
-<a name="l02378"></a>02378                 <span class="stringliteral">"m"</span>(D)                  <span class="comment">/* %4 */</span>
-<a name="l02379"></a>02379                 );
-<a name="l02380"></a>02380 <span class="preprocessor">#endif</span>
-<a name="l02381"></a>02381 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l02382"></a>02382 <span class="preprocessor">#else</span>
-<a name="l02383"></a>02383 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l02384"></a>02384 <span class="preprocessor">#endif</span>
-<a name="l02385"></a>02385 <span class="preprocessor"></span>}
-<a name="l02386"></a>02386 
-<a name="l02397"></a><a class="code" href="_s_d_l__image_filter_8h.html#ae2f3c5992701bded7c2d256bbbfb403f">02397</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#abb343ef95e22945e1d4d648b2e176e64" title="Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterSubUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class= [...]
+<a name="l02322"></a><a class="code" href="_s_d_l__image_filter_8h.html#ae2f3c5992701bded7c2d256bbbfb403f">02322</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#abb343ef95e22945e1d4d648b2e176e64" title="Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterSubUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class= [...]
+<a name="l02323"></a>02323 {
+<a name="l02324"></a>02324         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, j, istart, D;
+<a name="l02325"></a>02325         <span class="keywordtype">int</span> iC[4];
+<a name="l02326"></a>02326         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l02327"></a>02327         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l02328"></a>02328         <span class="keywordtype">int</span> result;
+<a name="l02329"></a>02329 
+<a name="l02330"></a>02330         <span class="comment">/* Validate input parameters */</span>
+<a name="l02331"></a>02331         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l02332"></a>02332                 <span class="keywordflow">return</span>(-1);
+<a name="l02333"></a>02333         <span class="keywordflow">if</span> (length == 0)
+<a name="l02334"></a>02334                 <span class="keywordflow">return</span>(0);
+<a name="l02335"></a>02335 
+<a name="l02336"></a>02336     <span class="comment">/* Special case: C==0 */</span>
+<a name="l02337"></a>02337         <span class="keywordflow">if</span> (C == 0) {
+<a name="l02338"></a>02338                 memcpy(Src1, Dest, length);
+<a name="l02339"></a>02339                 <span class="keywordflow">return</span> (0); 
+<a name="l02340"></a>02340         }
+<a name="l02341"></a>02341 
+<a name="l02342"></a>02342         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l02343"></a>02343 
+<a name="l02344"></a>02344                 <span class="comment">/* MMX routine */</span>
+<a name="l02345"></a>02345                 D=<a class="code" href="_s_d_l__image_filter_8c.html#a700fb30611761c46a674a45cc28ff561" title="Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.).">SWAP_32</a>(C);
+<a name="l02346"></a>02346                 SDL_imageFilterSubUintMMX(Src1, Dest, length, C, D);
+<a name="l02347"></a>02347 
+<a name="l02348"></a>02348                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l02349"></a>02349                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l02350"></a>02350                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l02351"></a>02351                         istart = length & 0xfffffff8;
+<a name="l02352"></a>02352                         cursrc1 = &Src1[istart];
+<a name="l02353"></a>02353                         curdest = &Dest[istart];
+<a name="l02354"></a>02354                 } <span class="keywordflow">else</span> {
+<a name="l02355"></a>02355                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l02356"></a>02356                         <span class="keywordflow">return</span> (0);
+<a name="l02357"></a>02357                 }
+<a name="l02358"></a>02358         } <span class="keywordflow">else</span> {
+<a name="l02359"></a>02359                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l02360"></a>02360                 istart = 0;
+<a name="l02361"></a>02361                 cursrc1 = Src1;
+<a name="l02362"></a>02362                 curdest = Dest;
+<a name="l02363"></a>02363         }
+<a name="l02364"></a>02364 
+<a name="l02365"></a>02365         <span class="comment">/* C routine to process image */</span>
+<a name="l02366"></a>02366         iC[3] = (int) ((C >> 24) & 0xff);
+<a name="l02367"></a>02367         iC[2] = (int) ((C >> 16) & 0xff);
+<a name="l02368"></a>02368         iC[1] = (int) ((C >>  8) & 0xff);
+<a name="l02369"></a>02369         iC[0] = (int) ((C >>  0) & 0xff);
+<a name="l02370"></a>02370         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
+<a name="l02371"></a>02371                 <span class="keywordflow">for</span> (j = 0; j < 4; j++) {
+<a name="l02372"></a>02372                         <span class="keywordflow">if</span> ((i+j)<length) {
+<a name="l02373"></a>02373                                 result = (int) *cursrc1 - iC[j];
+<a name="l02374"></a>02374                                 <span class="keywordflow">if</span> (result < 0) result = 0;
+<a name="l02375"></a>02375                                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l02376"></a>02376                                 <span class="comment">/* Advance pointers */</span>
+<a name="l02377"></a>02377                                 cursrc1++;
+<a name="l02378"></a>02378                                 curdest++;
+<a name="l02379"></a>02379                         }
+<a name="l02380"></a>02380                 }
+<a name="l02381"></a>02381         }
+<a name="l02382"></a>02382         <span class="keywordflow">return</span> (0);
+<a name="l02383"></a>02383 }
+<a name="l02384"></a>02384 
+<a name="l02396"></a>02396 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterShiftRightMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> N,
+<a name="l02397"></a>02397                                                                  <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
 <a name="l02398"></a>02398 {
-<a name="l02399"></a>02399         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, j, istart, D;
-<a name="l02400"></a>02400         <span class="keywordtype">int</span> iC[4];
-<a name="l02401"></a>02401         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l02402"></a>02402         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l02403"></a>02403         <span class="keywordtype">int</span> result;
-<a name="l02404"></a>02404 
-<a name="l02405"></a>02405         <span class="comment">/* Validate input parameters */</span>
-<a name="l02406"></a>02406         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l02407"></a>02407                 <span class="keywordflow">return</span>(-1);
-<a name="l02408"></a>02408         <span class="keywordflow">if</span> (length == 0)
-<a name="l02409"></a>02409                 <span class="keywordflow">return</span>(0);
-<a name="l02410"></a>02410 
-<a name="l02411"></a>02411     <span class="comment">/* Special case: C==0 */</span>
-<a name="l02412"></a>02412         <span class="keywordflow">if</span> (C == 0) {
-<a name="l02413"></a>02413                 memcpy(Src1, Dest, length);
-<a name="l02414"></a>02414                 <span class="keywordflow">return</span> (0); 
-<a name="l02415"></a>02415         }
-<a name="l02416"></a>02416 
-<a name="l02417"></a>02417         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l02418"></a>02418 
-<a name="l02419"></a>02419                 <span class="comment">/* MMX routine */</span>
-<a name="l02420"></a>02420                 D=<a class="code" href="_s_d_l__image_filter_8c.html#a700fb30611761c46a674a45cc28ff561" title="Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.).">SWAP_32</a>(C);
-<a name="l02421"></a>02421                 <a class="code" href="_s_d_l__image_filter_8c.html#acfb143905b751680650576e75847f9c1" title="Internal MMX Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C)">SDL_imageFilterSubUintMMX</a>(Src1, Dest, length, C, D);
-<a name="l02422"></a>02422 
-<a name="l02423"></a>02423                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l02424"></a>02424                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l02425"></a>02425                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l02426"></a>02426                         istart = length & 0xfffffff8;
-<a name="l02427"></a>02427                         cursrc1 = &Src1[istart];
-<a name="l02428"></a>02428                         curdest = &Dest[istart];
-<a name="l02429"></a>02429                 } <span class="keywordflow">else</span> {
-<a name="l02430"></a>02430                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l02431"></a>02431                         <span class="keywordflow">return</span> (0);
-<a name="l02432"></a>02432                 }
-<a name="l02433"></a>02433         } <span class="keywordflow">else</span> {
-<a name="l02434"></a>02434                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l02435"></a>02435                 istart = 0;
-<a name="l02436"></a>02436                 cursrc1 = Src1;
-<a name="l02437"></a>02437                 curdest = Dest;
-<a name="l02438"></a>02438         }
-<a name="l02439"></a>02439 
-<a name="l02440"></a>02440         <span class="comment">/* C routine to process image */</span>
-<a name="l02441"></a>02441         iC[3] = (int) ((C >> 24) & 0xff);
-<a name="l02442"></a>02442         iC[2] = (int) ((C >> 16) & 0xff);
-<a name="l02443"></a>02443         iC[1] = (int) ((C >>  8) & 0xff);
-<a name="l02444"></a>02444         iC[0] = (int) ((C >>  0) & 0xff);
-<a name="l02445"></a>02445         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
-<a name="l02446"></a>02446                 <span class="keywordflow">for</span> (j = 0; j < 4; j++) {
-<a name="l02447"></a>02447                         <span class="keywordflow">if</span> ((i+j)<length) {
-<a name="l02448"></a>02448                                 result = (int) *cursrc1 - iC[j];
-<a name="l02449"></a>02449                                 <span class="keywordflow">if</span> (result < 0) result = 0;
-<a name="l02450"></a>02450                                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l02451"></a>02451                                 <span class="comment">/* Advance pointers */</span>
-<a name="l02452"></a>02452                                 cursrc1++;
-<a name="l02453"></a>02453                                 curdest++;
-<a name="l02454"></a>02454                         }
-<a name="l02455"></a>02455                 }
-<a name="l02456"></a>02456         }
-<a name="l02457"></a>02457         <span class="keywordflow">return</span> (0);
-<a name="l02458"></a>02458 }
-<a name="l02459"></a>02459 
-<a name="l02471"></a><a class="code" href="_s_d_l__image_filter_8c.html#a696568e00b153011f0673bdf1297e9fa">02471</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a696568e00b153011f0673bdf1297e9fa" title="Internal MMX Filter using ShiftRight: D = saturation0(S >> N)">SDL_imageFilterShiftRightMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="k [...]
-<a name="l02472"></a>02472                                                                  <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
-<a name="l02473"></a>02473 {
-<a name="l02474"></a>02474 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l02475"></a>02475 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l02476"></a>02476 <span class="preprocessor"></span>        __asm
-<a name="l02477"></a>02477         {
-<a name="l02478"></a>02478                 pusha
-<a name="l02479"></a>02479                         mov edx, Mask           <span class="comment">/* load Mask address into edx */</span>
-<a name="l02480"></a>02480                         movq mm0, [edx]         <span class="comment">/* load Mask into mm0 */</span>
-<a name="l02481"></a>02481                 xor ecx, ecx    <span class="comment">/* zero ECX */</span>
-<a name="l02482"></a>02482                         mov cl,  N      <span class="comment">/* load loop counter (N) into CL */</span>
-<a name="l02483"></a>02483                         movd mm3,  ecx  <span class="comment">/* copy (N) into MM3  */</span>
-<a name="l02484"></a>02484                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l02485"></a>02485 L10240:                         <span class="comment">/* ** Prepare proper bit-Mask in MM1 ** */</span>
-<a name="l02486"></a>02486                 psrlw mm1,  1   <span class="comment">/* shift 4 WORDS of MM1 1 bit to the right */</span>
-<a name="l02487"></a>02487                         pand mm1, mm0   <span class="comment">// apply Mask to 8 BYTES of MM1 */</span>
-<a name="l02488"></a>02488                         <span class="comment">/*  byte     0x0f, 0xdb, 0xc8 */</span>
-<a name="l02489"></a>02489                         dec               cl            <span class="comment">/* decrease loop counter */</span>
-<a name="l02490"></a>02490                         jnz            L10240           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02491"></a>02491                         <span class="comment">/* ** Shift all bytes of the image ** */</span>
-<a name="l02492"></a>02492                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02493"></a>02493                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l02494"></a>02494                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02495"></a>02495                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02496"></a>02496                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02497"></a>02497 L10241:
-<a name="l02498"></a>02498                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02499"></a>02499                 psrlw mm0, mm3          <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the right */</span>
-<a name="l02500"></a>02500                         pand mm0, mm1    <span class="comment">// apply proper bit-Mask to 8 BYTES of MM0 */</span>
-<a name="l02501"></a>02501                         <span class="comment">/* byte     0x0f, 0xdb, 0xc1 */</span>
-<a name="l02502"></a>02502                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
-<a name="l02503"></a>02503                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02504"></a>02504                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02505"></a>02505                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l02506"></a>02506                         jnz            L10241           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02507"></a>02507                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l02508"></a>02508                         popa
-<a name="l02509"></a>02509         }
-<a name="l02510"></a>02510 <span class="preprocessor">#else</span>
-<a name="l02511"></a>02511 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l02512"></a>02512                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"movl         %4, %%edx \n\t"</span>        <span class="comment">/* load Mask address into edx */</span>
-<a name="l02513"></a>02513                 <span class="stringliteral">"movq    (%%edx), %%mm0 \n\t"</span>   <span class="comment">/* load Mask into mm0 */</span>
-<a name="l02514"></a>02514                 <span class="stringliteral">"xor       %%ecx, %%ecx \n\t"</span>   <span class="comment">/* zero ECX */</span>
-<a name="l02515"></a>02515                 <span class="stringliteral">"mov           %3, %%cl \n\t"</span>   <span class="comment">/* load loop counter (N) into CL */</span>
-<a name="l02516"></a>02516                 <span class="stringliteral">"movd      %%ecx, %%mm3 \n\t"</span>   <span class="comment">/* copy (N) into MM3  */</span>
-<a name="l02517"></a>02517                 <span class="stringliteral">"pcmpeqb   %%mm1, %%mm1 \n\t"</span>   <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l02518"></a>02518                 <span class="stringliteral">"1:                     \n\t"</span>   <span class="comment">/* ** Prepare proper bit-Mask in MM1 ** */</span>
-<a name="l02519"></a>02519                 <span class="stringliteral">"psrlw        $1, %%mm1 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM1 1 bit to the right */</span>
-<a name="l02520"></a>02520                 <span class="comment">/*    "pand      %%mm0, %%mm1 \n\t"    // apply Mask to 8 BYTES of MM1 */</span>
-<a name="l02521"></a>02521                 <span class="stringliteral">".byte     0x0f, 0xdb, 0xc8 \n\t"</span> 
-<a name="l02522"></a>02522                 <span class="stringliteral">"dec               %%cl \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02523"></a>02523                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02524"></a>02524                 <span class="comment">/* ** Shift all bytes of the image ** */</span>
-<a name="l02525"></a>02525                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02526"></a>02526                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l02527"></a>02527                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02528"></a>02528                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02529"></a>02529                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02530"></a>02530                 <span class="stringliteral">"2:                     \n\t"</span> 
-<a name="l02531"></a>02531                 <span class="stringliteral">"movq    (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02532"></a>02532                 <span class="stringliteral">"psrlw     %%mm3, %%mm0 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the right */</span>
-<a name="l02533"></a>02533                 <span class="comment">/*    "pand      %%mm1, %%mm0 \n\t"    // apply proper bit-Mask to 8 BYTES of MM0 */</span>
-<a name="l02534"></a>02534                 <span class="stringliteral">".byte     0x0f, 0xdb, 0xc1 \n\t"</span> 
-<a name="l02535"></a>02535                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in SrcDest */</span>
-<a name="l02536"></a>02536                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02537"></a>02537                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02538"></a>02538                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02539"></a>02539                 <span class="stringliteral">"jnz                 2b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02540"></a>02540                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l02541"></a>02541                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l02542"></a>02542                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l02543"></a>02543                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l02544"></a>02544                 <span class="stringliteral">"m"</span>(N),                 <span class="comment">/* %3 */</span>
-<a name="l02545"></a>02545                 <span class="stringliteral">"m"</span>(Mask)                       <span class="comment">/* %4 */</span>
-<a name="l02546"></a>02546                 );
-<a name="l02547"></a>02547 <span class="preprocessor">#endif</span>
-<a name="l02548"></a>02548 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l02549"></a>02549 <span class="preprocessor">#else</span>
-<a name="l02550"></a>02550 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l02551"></a>02551 <span class="preprocessor">#endif</span>
-<a name="l02552"></a>02552 <span class="preprocessor"></span>}
-<a name="l02553"></a>02553 
-<a name="l02564"></a><a class="code" href="_s_d_l__image_filter_8h.html#a931f1232cd03acd2ba90af222625f4ca">02564</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a68851aed2dcc5dfd2f3b258236f3b88c" title="Filter using ShiftRight: D = saturation0(S >> N)">SDL_imageFilterShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char [...]
-<a name="l02565"></a>02565 {
-<a name="l02566"></a>02566         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
-<a name="l02567"></a>02567         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l02568"></a>02568         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l02569"></a>02569         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l02570"></a>02570 
-<a name="l02571"></a>02571         <span class="comment">/* Validate input parameters */</span>
-<a name="l02572"></a>02572         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l02573"></a>02573                 <span class="keywordflow">return</span>(-1);
-<a name="l02574"></a>02574         <span class="keywordflow">if</span> (length == 0)
-<a name="l02575"></a>02575                 <span class="keywordflow">return</span>(0);
-<a name="l02576"></a>02576 
-<a name="l02577"></a>02577         <span class="comment">/* Check shift */</span>
-<a name="l02578"></a>02578         <span class="keywordflow">if</span> (N > 8) {
-<a name="l02579"></a>02579                 <span class="keywordflow">return</span> (-1);
-<a name="l02580"></a>02580         }
-<a name="l02581"></a>02581 
-<a name="l02582"></a>02582         <span class="comment">/* Special case: N==0 */</span>
-<a name="l02583"></a>02583         <span class="keywordflow">if</span> (N == 0) {
-<a name="l02584"></a>02584                 memcpy(Src1, Dest, length);
-<a name="l02585"></a>02585                 <span class="keywordflow">return</span> (0); 
-<a name="l02586"></a>02586         }
-<a name="l02587"></a>02587 
-<a name="l02588"></a>02588         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l02589"></a>02589 
-<a name="l02590"></a>02590                 <span class="comment">/* MMX routine */</span>
-<a name="l02591"></a>02591                 <a class="code" href="_s_d_l__image_filter_8c.html#a696568e00b153011f0673bdf1297e9fa" title="Internal MMX Filter using ShiftRight: D = saturation0(S >> N)">SDL_imageFilterShiftRightMMX</a>(Src1, Dest, length, N, Mask);
-<a name="l02592"></a>02592 
-<a name="l02593"></a>02593                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l02594"></a>02594                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l02595"></a>02595                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l02596"></a>02596                         istart = length & 0xfffffff8;
-<a name="l02597"></a>02597                         cursrc1 = &Src1[istart];
-<a name="l02598"></a>02598                         curdest = &Dest[istart];
-<a name="l02599"></a>02599                 } <span class="keywordflow">else</span> {
-<a name="l02600"></a>02600                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l02601"></a>02601                         <span class="keywordflow">return</span> (0);
-<a name="l02602"></a>02602                 }
-<a name="l02603"></a>02603         } <span class="keywordflow">else</span> {
-<a name="l02604"></a>02604                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l02605"></a>02605                 istart = 0;
-<a name="l02606"></a>02606                 cursrc1 = Src1;
-<a name="l02607"></a>02607                 curdest = Dest;
-<a name="l02608"></a>02608         }
-<a name="l02609"></a>02609 
-<a name="l02610"></a>02610         <span class="comment">/* C routine to process image */</span>
-<a name="l02611"></a>02611         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l02612"></a>02612                 *curdest = (<span class="keywordtype">unsigned</span> char) *cursrc1 >> N;
-<a name="l02613"></a>02613                 <span class="comment">/* Advance pointers */</span>
-<a name="l02614"></a>02614                 cursrc1++;
-<a name="l02615"></a>02615                 curdest++;
-<a name="l02616"></a>02616         }
+<a name="l02399"></a>02399 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l02400"></a>02400 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l02401"></a>02401 <span class="preprocessor"></span>        __asm
+<a name="l02402"></a>02402         {
+<a name="l02403"></a>02403                 pusha
+<a name="l02404"></a>02404                         mov edx, Mask           <span class="comment">/* load Mask address into edx */</span>
+<a name="l02405"></a>02405                         movq mm0, [edx]         <span class="comment">/* load Mask into mm0 */</span>
+<a name="l02406"></a>02406                 xor ecx, ecx    <span class="comment">/* zero ECX */</span>
+<a name="l02407"></a>02407                         mov cl,  N      <span class="comment">/* load loop counter (N) into CL */</span>
+<a name="l02408"></a>02408                         movd mm3,  ecx  <span class="comment">/* copy (N) into MM3  */</span>
+<a name="l02409"></a>02409                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l02410"></a>02410 L10240:                         <span class="comment">/* ** Prepare proper bit-Mask in MM1 ** */</span>
+<a name="l02411"></a>02411                 psrlw mm1,  1   <span class="comment">/* shift 4 WORDS of MM1 1 bit to the right */</span>
+<a name="l02412"></a>02412                         pand mm1, mm0   <span class="comment">// apply Mask to 8 BYTES of MM1 */</span>
+<a name="l02413"></a>02413                         <span class="comment">/*  byte     0x0f, 0xdb, 0xc8 */</span>
+<a name="l02414"></a>02414                         dec               cl            <span class="comment">/* decrease loop counter */</span>
+<a name="l02415"></a>02415                         jnz            L10240           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02416"></a>02416                         <span class="comment">/* ** Shift all bytes of the image ** */</span>
+<a name="l02417"></a>02417                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l02418"></a>02418                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l02419"></a>02419                         mov ecx,  SrcLength     <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l02420"></a>02420                         shr ecx,  3     <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l02421"></a>02421                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02422"></a>02422 L10241:
+<a name="l02423"></a>02423                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
+<a name="l02424"></a>02424                 psrlw mm0, mm3          <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the right */</span>
+<a name="l02425"></a>02425                         pand mm0, mm1    <span class="comment">// apply proper bit-Mask to 8 BYTES of MM0 */</span>
+<a name="l02426"></a>02426                         <span class="comment">/* byte     0x0f, 0xdb, 0xc1 */</span>
+<a name="l02427"></a>02427                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
+<a name="l02428"></a>02428                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02429"></a>02429                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02430"></a>02430                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02431"></a>02431                         jnz            L10241           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02432"></a>02432                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l02433"></a>02433                         popa
+<a name="l02434"></a>02434         }
+<a name="l02435"></a>02435 <span class="preprocessor">#else</span>
+<a name="l02436"></a>02436 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l02437"></a>02437         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l02438"></a>02438         __m64 *mDest = (__m64*)Dest;
+<a name="l02439"></a>02439         __m64 *mMask = (__m64*)Mask;
+<a name="l02440"></a>02440         __m64 mm1;
+<a name="l02441"></a>02441         <span class="keywordtype">int</span> i;
+<a name="l02442"></a>02442         mm1 = _m_pcmpeqb(mm1, mm1);                     <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l02443"></a>02443         <span class="comment">/* Prepare proper bit-Mask in MM1 */</span>
+<a name="l02444"></a>02444         <span class="keywordflow">for</span> (i = 0; i < N; i++) {
+<a name="l02445"></a>02445                 mm1 = _m_psrlwi(mm1, 1);                <span class="comment">/* shift 4 WORDS of MM1 1 bit to the right */</span>
+<a name="l02446"></a>02446                 mm1 = _m_pand(mm1, *mMask);             <span class="comment">/* apply Mask to 8 BYTES of MM1 */</span>
+<a name="l02447"></a>02447         }
+<a name="l02448"></a>02448         <span class="comment">/* Shift all bytes of the image */</span>
+<a name="l02449"></a>02449         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02450"></a>02450                 __m64 mm0 = _m_psrlwi(*mSrc1, N);       <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the right */</span>
+<a name="l02451"></a>02451                 *mDest = _m_pand(mm0, mm1);             <span class="comment">/* apply proper bit-Mask to 8 BYTES of MM0 */</span>
+<a name="l02452"></a>02452                 mSrc1++;
+<a name="l02453"></a>02453                 mDest++;
+<a name="l02454"></a>02454         }
+<a name="l02455"></a>02455         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l02456"></a>02456 <span class="preprocessor">#endif</span>
+<a name="l02457"></a>02457 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l02458"></a>02458 <span class="preprocessor">#else</span>
+<a name="l02459"></a>02459 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l02460"></a>02460 <span class="preprocessor">#endif</span>
+<a name="l02461"></a>02461 <span class="preprocessor"></span>}
+<a name="l02462"></a>02462 
+<a name="l02473"></a><a class="code" href="_s_d_l__image_filter_8h.html#a931f1232cd03acd2ba90af222625f4ca">02473</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a68851aed2dcc5dfd2f3b258236f3b88c" title="Filter using ShiftRight: D = saturation0(S >> N)">SDL_imageFilterShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char [...]
+<a name="l02474"></a>02474 {
+<a name="l02475"></a>02475         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
+<a name="l02476"></a>02476         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l02477"></a>02477         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l02478"></a>02478         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l02479"></a>02479 
+<a name="l02480"></a>02480         <span class="comment">/* Validate input parameters */</span>
+<a name="l02481"></a>02481         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l02482"></a>02482                 <span class="keywordflow">return</span>(-1);
+<a name="l02483"></a>02483         <span class="keywordflow">if</span> (length == 0)
+<a name="l02484"></a>02484                 <span class="keywordflow">return</span>(0);
+<a name="l02485"></a>02485 
+<a name="l02486"></a>02486         <span class="comment">/* Check shift */</span>
+<a name="l02487"></a>02487         <span class="keywordflow">if</span> (N > 8) {
+<a name="l02488"></a>02488                 <span class="keywordflow">return</span> (-1);
+<a name="l02489"></a>02489         }
+<a name="l02490"></a>02490 
+<a name="l02491"></a>02491         <span class="comment">/* Special case: N==0 */</span>
+<a name="l02492"></a>02492         <span class="keywordflow">if</span> (N == 0) {
+<a name="l02493"></a>02493                 memcpy(Src1, Dest, length);
+<a name="l02494"></a>02494                 <span class="keywordflow">return</span> (0); 
+<a name="l02495"></a>02495         }
+<a name="l02496"></a>02496 
+<a name="l02497"></a>02497         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l02498"></a>02498 
+<a name="l02499"></a>02499                 <span class="comment">/* MMX routine */</span>
+<a name="l02500"></a>02500                 SDL_imageFilterShiftRightMMX(Src1, Dest, length, N, Mask);
+<a name="l02501"></a>02501 
+<a name="l02502"></a>02502                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l02503"></a>02503                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l02504"></a>02504                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l02505"></a>02505                         istart = length & 0xfffffff8;
+<a name="l02506"></a>02506                         cursrc1 = &Src1[istart];
+<a name="l02507"></a>02507                         curdest = &Dest[istart];
+<a name="l02508"></a>02508                 } <span class="keywordflow">else</span> {
+<a name="l02509"></a>02509                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l02510"></a>02510                         <span class="keywordflow">return</span> (0);
+<a name="l02511"></a>02511                 }
+<a name="l02512"></a>02512         } <span class="keywordflow">else</span> {
+<a name="l02513"></a>02513                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l02514"></a>02514                 istart = 0;
+<a name="l02515"></a>02515                 cursrc1 = Src1;
+<a name="l02516"></a>02516                 curdest = Dest;
+<a name="l02517"></a>02517         }
+<a name="l02518"></a>02518 
+<a name="l02519"></a>02519         <span class="comment">/* C routine to process image */</span>
+<a name="l02520"></a>02520         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l02521"></a>02521                 *curdest = (<span class="keywordtype">unsigned</span> char) *cursrc1 >> N;
+<a name="l02522"></a>02522                 <span class="comment">/* Advance pointers */</span>
+<a name="l02523"></a>02523                 cursrc1++;
+<a name="l02524"></a>02524                 curdest++;
+<a name="l02525"></a>02525         }
+<a name="l02526"></a>02526 
+<a name="l02527"></a>02527         <span class="keywordflow">return</span> (0);
+<a name="l02528"></a>02528 }
+<a name="l02529"></a>02529 
+<a name="l02540"></a>02540 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterShiftRightUintMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> N)
+<a name="l02541"></a>02541 {
+<a name="l02542"></a>02542 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l02543"></a>02543 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l02544"></a>02544 <span class="preprocessor"></span>        __asm
+<a name="l02545"></a>02545         {
+<a name="l02546"></a>02546                 pusha
+<a name="l02547"></a>02547                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l02548"></a>02548                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l02549"></a>02549                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l02550"></a>02550                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l02551"></a>02551                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02552"></a>02552 L13023:
+<a name="l02553"></a>02553                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
+<a name="l02554"></a>02554                 psrld mm0, N
+<a name="l02555"></a>02555                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
+<a name="l02556"></a>02556                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02557"></a>02557                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02558"></a>02558                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02559"></a>02559                         jnz             L13023          <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02560"></a>02560                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l02561"></a>02561                         popa
+<a name="l02562"></a>02562         }
+<a name="l02563"></a>02563 <span class="preprocessor">#else</span>
+<a name="l02564"></a>02564 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l02565"></a>02565         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l02566"></a>02566         __m64 *mDest = (__m64*)Dest;
+<a name="l02567"></a>02567         <span class="keywordtype">int</span> i;
+<a name="l02568"></a>02568         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02569"></a>02569                 *mDest = _m_psrldi(*mSrc1, N);
+<a name="l02570"></a>02570                 mSrc1++;
+<a name="l02571"></a>02571                 mDest++;
+<a name="l02572"></a>02572         }
+<a name="l02573"></a>02573         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l02574"></a>02574 <span class="preprocessor">#endif</span>
+<a name="l02575"></a>02575 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l02576"></a>02576 <span class="preprocessor">#else</span>
+<a name="l02577"></a>02577 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l02578"></a>02578 <span class="preprocessor">#endif</span>
+<a name="l02579"></a>02579 <span class="preprocessor"></span>}
+<a name="l02580"></a>02580 
+<a name="l02591"></a><a class="code" href="_s_d_l__image_filter_8h.html#a4ccddf5c575cc4d6074c9a54789240a6">02591</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a540d4625d76bcd03318c2a59ce650fdb" title="Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N)">SDL_imageFilterShiftRightUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class=" [...]
+<a name="l02592"></a>02592 {
+<a name="l02593"></a>02593         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l02594"></a>02594         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
+<a name="l02595"></a>02595         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *icursrc1, *icurdest;
+<a name="l02596"></a>02596         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> result;
+<a name="l02597"></a>02597 
+<a name="l02598"></a>02598         <span class="comment">/* Validate input parameters */</span>
+<a name="l02599"></a>02599         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l02600"></a>02600                 <span class="keywordflow">return</span>(-1);
+<a name="l02601"></a>02601         <span class="keywordflow">if</span> (length == 0)
+<a name="l02602"></a>02602                 <span class="keywordflow">return</span>(0);
+<a name="l02603"></a>02603 
+<a name="l02604"></a>02604         <span class="keywordflow">if</span> (N > 32) {
+<a name="l02605"></a>02605                 <span class="keywordflow">return</span> (-1);
+<a name="l02606"></a>02606         }
+<a name="l02607"></a>02607 
+<a name="l02608"></a>02608         <span class="comment">/* Special case: N==0 */</span>
+<a name="l02609"></a>02609         <span class="keywordflow">if</span> (N == 0) {
+<a name="l02610"></a>02610                 memcpy(Src1, Dest, length);
+<a name="l02611"></a>02611                 <span class="keywordflow">return</span> (0); 
+<a name="l02612"></a>02612         }
+<a name="l02613"></a>02613 
+<a name="l02614"></a>02614         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l02615"></a>02615 
+<a name="l02616"></a>02616                 SDL_imageFilterShiftRightUintMMX(Src1, Dest, length, N);
 <a name="l02617"></a>02617 
-<a name="l02618"></a>02618         <span class="keywordflow">return</span> (0);
-<a name="l02619"></a>02619 }
-<a name="l02620"></a>02620 
-<a name="l02631"></a><a class="code" href="_s_d_l__image_filter_8c.html#a23430360ee5ce8031158831a44e83d56">02631</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a23430360ee5ce8031158831a44e83d56" title="Internal MMX Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N)">SDL_imageFilterShiftRightUintMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</spa [...]
-<a name="l02632"></a>02632 {
-<a name="l02633"></a>02633 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l02634"></a>02634 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l02635"></a>02635 <span class="preprocessor"></span>        __asm
-<a name="l02636"></a>02636         {
-<a name="l02637"></a>02637                 pusha
-<a name="l02638"></a>02638                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02639"></a>02639                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l02640"></a>02640                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02641"></a>02641                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02642"></a>02642                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02643"></a>02643 L13023:
-<a name="l02644"></a>02644                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02645"></a>02645                 psrld mm0, N
-<a name="l02646"></a>02646                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
-<a name="l02647"></a>02647                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02648"></a>02648                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02649"></a>02649                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l02650"></a>02650                         jnz             L13023          <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02651"></a>02651                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l02652"></a>02652                         popa
-<a name="l02653"></a>02653         }
-<a name="l02654"></a>02654 <span class="preprocessor">#else</span>
-<a name="l02655"></a>02655 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l02656"></a>02656                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l02657"></a>02657                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02658"></a>02658                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l02659"></a>02659                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02660"></a>02660                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02661"></a>02661                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02662"></a>02662                 <span class="stringliteral">"1: movq (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l02663"></a>02663                 <span class="stringliteral">"psrld   %3, %%mm0 \n\t"</span>
-<a name="l02664"></a>02664                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in SrcDest */</span>
-<a name="l02665"></a>02665                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02666"></a>02666                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02667"></a>02667                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02668"></a>02668                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02669"></a>02669                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l02670"></a>02670                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l02671"></a>02671                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l02672"></a>02672                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l02673"></a>02673                 <span class="stringliteral">"m"</span>(N)                  <span class="comment">/* %3 */</span>
-<a name="l02674"></a>02674                 );
-<a name="l02675"></a>02675 <span class="preprocessor">#endif</span>
-<a name="l02676"></a>02676 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l02677"></a>02677 <span class="preprocessor">#else</span>
-<a name="l02678"></a>02678 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l02679"></a>02679 <span class="preprocessor">#endif</span>
-<a name="l02680"></a>02680 <span class="preprocessor"></span>}
-<a name="l02681"></a>02681 
-<a name="l02692"></a><a class="code" href="_s_d_l__image_filter_8h.html#a4ccddf5c575cc4d6074c9a54789240a6">02692</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a540d4625d76bcd03318c2a59ce650fdb" title="Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N)">SDL_imageFilterShiftRightUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class=" [...]
-<a name="l02693"></a>02693 {
-<a name="l02694"></a>02694         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l02695"></a>02695         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
-<a name="l02696"></a>02696         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *icursrc1, *icurdest;
-<a name="l02697"></a>02697         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> result;
-<a name="l02698"></a>02698 
-<a name="l02699"></a>02699         <span class="comment">/* Validate input parameters */</span>
-<a name="l02700"></a>02700         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l02701"></a>02701                 <span class="keywordflow">return</span>(-1);
-<a name="l02702"></a>02702         <span class="keywordflow">if</span> (length == 0)
-<a name="l02703"></a>02703                 <span class="keywordflow">return</span>(0);
-<a name="l02704"></a>02704 
-<a name="l02705"></a>02705         <span class="keywordflow">if</span> (N > 32) {
-<a name="l02706"></a>02706                 <span class="keywordflow">return</span> (-1);
-<a name="l02707"></a>02707         }
-<a name="l02708"></a>02708 
-<a name="l02709"></a>02709         <span class="comment">/* Special case: N==0 */</span>
-<a name="l02710"></a>02710         <span class="keywordflow">if</span> (N == 0) {
-<a name="l02711"></a>02711                 memcpy(Src1, Dest, length);
-<a name="l02712"></a>02712                 <span class="keywordflow">return</span> (0); 
-<a name="l02713"></a>02713         }
-<a name="l02714"></a>02714 
-<a name="l02715"></a>02715         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l02716"></a>02716 
-<a name="l02717"></a>02717                 <a class="code" href="_s_d_l__image_filter_8c.html#a23430360ee5ce8031158831a44e83d56" title="Internal MMX Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N)">SDL_imageFilterShiftRightUintMMX</a>(Src1, Dest, length, N);
-<a name="l02718"></a>02718 
-<a name="l02719"></a>02719                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l02720"></a>02720                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l02721"></a>02721                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l02722"></a>02722                         istart = length & 0xfffffff8;
-<a name="l02723"></a>02723                         cursrc1 = &Src1[istart];
-<a name="l02724"></a>02724                         curdest = &Dest[istart];
-<a name="l02725"></a>02725                 } <span class="keywordflow">else</span> {
-<a name="l02726"></a>02726                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l02727"></a>02727                         <span class="keywordflow">return</span> (0);
-<a name="l02728"></a>02728                 }
-<a name="l02729"></a>02729         } <span class="keywordflow">else</span> {
-<a name="l02730"></a>02730                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l02731"></a>02731                 istart = 0;
-<a name="l02732"></a>02732                 cursrc1 = Src1;
-<a name="l02733"></a>02733                 curdest = Dest;
-<a name="l02734"></a>02734         }
-<a name="l02735"></a>02735 
-<a name="l02736"></a>02736         <span class="comment">/* C routine to process image */</span>
-<a name="l02737"></a>02737         icursrc1=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)cursrc1;
-<a name="l02738"></a>02738         icurdest=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)curdest;
-<a name="l02739"></a>02739         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
-<a name="l02740"></a>02740                 <span class="keywordflow">if</span> ((i+4)<length) {
-<a name="l02741"></a>02741                         result = ((<span class="keywordtype">unsigned</span> int)*icursrc1 >> N);
-<a name="l02742"></a>02742                         *icurdest = result;
-<a name="l02743"></a>02743                 }
-<a name="l02744"></a>02744                 <span class="comment">/* Advance pointers */</span>
-<a name="l02745"></a>02745                 icursrc1++;
-<a name="l02746"></a>02746                 icurdest++;
-<a name="l02747"></a>02747         }
-<a name="l02748"></a>02748 
-<a name="l02749"></a>02749         <span class="keywordflow">return</span> (0);
-<a name="l02750"></a>02750 }
-<a name="l02751"></a>02751 
-<a name="l02762"></a><a class="code" href="_s_d_l__image_filter_8c.html#ad18d23ec352f7508f89e47cff9c9a4ea">02762</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ad18d23ec352f7508f89e47cff9c9a4ea" title="Internal MMX Filter using MultByByte: D = saturation255(S * C)">SDL_imageFilterMultByByteMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywor [...]
-<a name="l02763"></a>02763 {
-<a name="l02764"></a>02764 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l02765"></a>02765 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l02766"></a>02766 <span class="preprocessor"></span>        __asm
-<a name="l02767"></a>02767         {
-<a name="l02768"></a>02768                 pusha
-<a name="l02769"></a>02769                         <span class="comment">/* ** Duplicate C in 4 words of MM1 ** */</span>
-<a name="l02770"></a>02770                         mov al, C       <span class="comment">/* load C into AL */</span>
-<a name="l02771"></a>02771                         xor ah, ah      <span class="comment">/* zero AH */</span>
-<a name="l02772"></a>02772                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l02773"></a>02773                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l02774"></a>02774                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l02775"></a>02775                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02776"></a>02776                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02777"></a>02777                         punpckldq mm1, mm2      <span class="comment">/* fill higher words of MM1 with C */</span>
-<a name="l02778"></a>02778                         pxor mm0, mm0           <span class="comment">/* zero MM0 register */</span>
-<a name="l02779"></a>02779                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02780"></a>02780                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l02781"></a>02781                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02782"></a>02782                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02783"></a>02783                         cmp al, 128     <span class="comment">/* if (C <= 128) execute more efficient code */</span>
-<a name="l02784"></a>02784                         jg             L10251
-<a name="l02785"></a>02785                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02786"></a>02786 L10250:
-<a name="l02787"></a>02787                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l02788"></a>02788                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l02789"></a>02789                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l02790"></a>02790                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l02791"></a>02791                         pmullw mm3, mm1         <span class="comment">/* mul low  bytes of SrcDest and MM1 */</span>
-<a name="l02792"></a>02792                         pmullw mm4, mm1         <span class="comment">/* mul high bytes of SrcDest and MM1 */</span>
-<a name="l02793"></a>02793                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l02794"></a>02794                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
-<a name="l02795"></a>02795                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02796"></a>02796                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02797"></a>02797                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l02798"></a>02798                         jnz            L10250           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02799"></a>02799                         jmp            L10252
-<a name="l02800"></a>02800                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02801"></a>02801 L10251:
-<a name="l02802"></a>02802                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l02803"></a>02803                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l02804"></a>02804                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l02805"></a>02805                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l02806"></a>02806                         pmullw mm3, mm1         <span class="comment">/* mul low  bytes of SrcDest and MM1 */</span>
-<a name="l02807"></a>02807                         pmullw mm4, mm1         <span class="comment">/* mul high bytes of SrcDest and MM1 */</span>
-<a name="l02808"></a>02808                         <span class="comment">/* ** Take abs value of the results (signed words) ** */</span>
-<a name="l02809"></a>02809                         movq mm5, mm3           <span class="comment">/* copy mm3 into mm5 */</span>
-<a name="l02810"></a>02810                         movq mm6, mm4           <span class="comment">/* copy mm4 into mm6 */</span>
-<a name="l02811"></a>02811                         psraw mm5, 15           <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l02812"></a>02812                         psraw mm6, 15           <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l02813"></a>02813                         pxor mm3, mm5           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l02814"></a>02814                         pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l02815"></a>02815                         psubsw mm3, mm5         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l02816"></a>02816                         psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l02817"></a>02817                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l02818"></a>02818                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
-<a name="l02819"></a>02819                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02820"></a>02820                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02821"></a>02821                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l02822"></a>02822                         jnz            L10251           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02823"></a>02823 L10252:
-<a name="l02824"></a>02824                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l02825"></a>02825                         popa
+<a name="l02618"></a>02618                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l02619"></a>02619                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l02620"></a>02620                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l02621"></a>02621                         istart = length & 0xfffffff8;
+<a name="l02622"></a>02622                         cursrc1 = &Src1[istart];
+<a name="l02623"></a>02623                         curdest = &Dest[istart];
+<a name="l02624"></a>02624                 } <span class="keywordflow">else</span> {
+<a name="l02625"></a>02625                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l02626"></a>02626                         <span class="keywordflow">return</span> (0);
+<a name="l02627"></a>02627                 }
+<a name="l02628"></a>02628         } <span class="keywordflow">else</span> {
+<a name="l02629"></a>02629                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l02630"></a>02630                 istart = 0;
+<a name="l02631"></a>02631                 cursrc1 = Src1;
+<a name="l02632"></a>02632                 curdest = Dest;
+<a name="l02633"></a>02633         }
+<a name="l02634"></a>02634 
+<a name="l02635"></a>02635         <span class="comment">/* C routine to process image */</span>
+<a name="l02636"></a>02636         icursrc1=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)cursrc1;
+<a name="l02637"></a>02637         icurdest=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)curdest;
+<a name="l02638"></a>02638         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
+<a name="l02639"></a>02639                 <span class="keywordflow">if</span> ((i+4)<length) {
+<a name="l02640"></a>02640                         result = ((<span class="keywordtype">unsigned</span> int)*icursrc1 >> N);
+<a name="l02641"></a>02641                         *icurdest = result;
+<a name="l02642"></a>02642                 }
+<a name="l02643"></a>02643                 <span class="comment">/* Advance pointers */</span>
+<a name="l02644"></a>02644                 icursrc1++;
+<a name="l02645"></a>02645                 icurdest++;
+<a name="l02646"></a>02646         }
+<a name="l02647"></a>02647 
+<a name="l02648"></a>02648         <span class="keywordflow">return</span> (0);
+<a name="l02649"></a>02649 }
+<a name="l02650"></a>02650 
+<a name="l02661"></a>02661 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterMultByByteMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> C)
+<a name="l02662"></a>02662 {
+<a name="l02663"></a>02663 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l02664"></a>02664 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l02665"></a>02665 <span class="preprocessor"></span>        __asm
+<a name="l02666"></a>02666         {
+<a name="l02667"></a>02667                 pusha
+<a name="l02668"></a>02668                         <span class="comment">/* ** Duplicate C in 4 words of MM1 ** */</span>
+<a name="l02669"></a>02669                         mov al, C       <span class="comment">/* load C into AL */</span>
+<a name="l02670"></a>02670                         xor ah, ah      <span class="comment">/* zero AH */</span>
+<a name="l02671"></a>02671                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l02672"></a>02672                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l02673"></a>02673                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l02674"></a>02674                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l02675"></a>02675                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l02676"></a>02676                         punpckldq mm1, mm2      <span class="comment">/* fill higher words of MM1 with C */</span>
+<a name="l02677"></a>02677                         pxor mm0, mm0           <span class="comment">/* zero MM0 register */</span>
+<a name="l02678"></a>02678                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l02679"></a>02679                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l02680"></a>02680                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l02681"></a>02681                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l02682"></a>02682                         cmp al, 128     <span class="comment">/* if (C <= 128) execute more efficient code */</span>
+<a name="l02683"></a>02683                         jg             L10251
+<a name="l02684"></a>02684                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02685"></a>02685 L10250:
+<a name="l02686"></a>02686                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
+<a name="l02687"></a>02687                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
+<a name="l02688"></a>02688                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
+<a name="l02689"></a>02689                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
+<a name="l02690"></a>02690                         pmullw mm3, mm1         <span class="comment">/* mul low  bytes of SrcDest and MM1 */</span>
+<a name="l02691"></a>02691                         pmullw mm4, mm1         <span class="comment">/* mul high bytes of SrcDest and MM1 */</span>
+<a name="l02692"></a>02692                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l02693"></a>02693                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
+<a name="l02694"></a>02694                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02695"></a>02695                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02696"></a>02696                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02697"></a>02697                         jnz            L10250           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02698"></a>02698                         jmp            L10252
+<a name="l02699"></a>02699                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02700"></a>02700 L10251:
+<a name="l02701"></a>02701                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
+<a name="l02702"></a>02702                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
+<a name="l02703"></a>02703                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
+<a name="l02704"></a>02704                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
+<a name="l02705"></a>02705                         pmullw mm3, mm1         <span class="comment">/* mul low  bytes of SrcDest and MM1 */</span>
+<a name="l02706"></a>02706                         pmullw mm4, mm1         <span class="comment">/* mul high bytes of SrcDest and MM1 */</span>
+<a name="l02707"></a>02707                         <span class="comment">/* ** Take abs value of the results (signed words) ** */</span>
+<a name="l02708"></a>02708                         movq mm5, mm3           <span class="comment">/* copy mm3 into mm5 */</span>
+<a name="l02709"></a>02709                         movq mm6, mm4           <span class="comment">/* copy mm4 into mm6 */</span>
+<a name="l02710"></a>02710                         psraw mm5, 15           <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l02711"></a>02711                         psraw mm6, 15           <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l02712"></a>02712                         pxor mm3, mm5           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l02713"></a>02713                         pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l02714"></a>02714                         psubsw mm3, mm5         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l02715"></a>02715                         psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l02716"></a>02716                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l02717"></a>02717                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
+<a name="l02718"></a>02718                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02719"></a>02719                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02720"></a>02720                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02721"></a>02721                         jnz            L10251           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02722"></a>02722 L10252:
+<a name="l02723"></a>02723                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l02724"></a>02724                         popa
+<a name="l02725"></a>02725         }
+<a name="l02726"></a>02726 <span class="preprocessor">#else</span>
+<a name="l02727"></a>02727 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l02728"></a>02728         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l02729"></a>02729         __m64 *mDest = (__m64*)Dest;
+<a name="l02730"></a>02730         __m64 mm0 = _m_from_int(0);                             <span class="comment">/* zero mm0 register */</span>
+<a name="l02731"></a>02731         <span class="comment">/* Duplicate C in 4 words of MM1 */</span>
+<a name="l02732"></a>02732         <span class="keywordtype">int</span> i;
+<a name="l02733"></a>02733         i = C | C<<16;
+<a name="l02734"></a>02734         __m64 mm1 = _m_from_int(i);
+<a name="l02735"></a>02735         __m64 mm2 = _m_from_int(i);
+<a name="l02736"></a>02736         mm1 = _m_punpckldq(mm1, mm2);                           <span class="comment">/* fill higher words of MM1 with C */</span>
+<a name="l02737"></a>02737         <span class="comment">// long long lli = C | C<<16 | (long long)C<<32 | (long long)C<<48;</span>
+<a name="l02738"></a>02738         <span class="comment">//__m64 mm1 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l02739"></a>02739         <span class="keywordflow">if</span> (C <= 128) {                                         <span class="comment">/* if (C <= 128) execute more efficient code */</span>
+<a name="l02740"></a>02740                 <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02741"></a>02741                         __m64 mm3, mm4;
+<a name="l02742"></a>02742                         mm3 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l02743"></a>02743                         mm4 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l02744"></a>02744                         mm3 = _m_pmullw(mm3, mm1);              <span class="comment">/* mul low  bytes of Src1 and MM1 */</span>
+<a name="l02745"></a>02745                         mm4 = _m_pmullw(mm4, mm1);              <span class="comment">/* mul high bytes of Src1 and MM1 */</span>
+<a name="l02746"></a>02746                         *mDest = _m_packuswb(mm3, mm4);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l02747"></a>02747                         mSrc1++;
+<a name="l02748"></a>02748                         mDest++;
+<a name="l02749"></a>02749                 }
+<a name="l02750"></a>02750         } <span class="keywordflow">else</span> {
+<a name="l02751"></a>02751                 <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02752"></a>02752                         __m64 mm3, mm4, mm5, mm6;
+<a name="l02753"></a>02753                         mm3 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l02754"></a>02754                         mm4 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l02755"></a>02755                         mm3 = _m_pmullw(mm3, mm1);              <span class="comment">/* mul low  bytes of Src1 and MM1 */</span>
+<a name="l02756"></a>02756                         mm4 = _m_pmullw(mm4, mm1);              <span class="comment">/* mul high bytes of Src1 and MM1 */</span>
+<a name="l02757"></a>02757                         <span class="comment">/* Take abs value of the results (signed words) */</span>
+<a name="l02758"></a>02758                         mm5 = _m_psrawi(mm3, 15);               <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l02759"></a>02759                         mm6 = _m_psrawi(mm4, 15);               <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l02760"></a>02760                         mm3 = _m_pxor(mm3, mm5);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l02761"></a>02761                         mm4 = _m_pxor(mm4, mm6);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l02762"></a>02762                         mm3 = _m_psubsw(mm3, mm5);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l02763"></a>02763                         mm4 = _m_psubsw(mm4, mm6);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l02764"></a>02764                         *mDest = _m_packuswb(mm3, mm4);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l02765"></a>02765                         mSrc1++;
+<a name="l02766"></a>02766                         mDest++;
+<a name="l02767"></a>02767                 }
+<a name="l02768"></a>02768         }
+<a name="l02769"></a>02769         _m_empty();                                             <span class="comment">/* clean MMX state */</span>
+<a name="l02770"></a>02770 <span class="preprocessor">#endif</span>
+<a name="l02771"></a>02771 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l02772"></a>02772 <span class="preprocessor">#else</span>
+<a name="l02773"></a>02773 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l02774"></a>02774 <span class="preprocessor">#endif</span>
+<a name="l02775"></a>02775 <span class="preprocessor"></span>}
+<a name="l02776"></a>02776 
+<a name="l02787"></a><a class="code" href="_s_d_l__image_filter_8h.html#add06bb6ea7847fc13a3041ddceb4ac3c">02787</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a06f7a19d6e2fc89d7b48cc45d715806d" title="Filter using MultByByte: D = saturation255(S * C)">SDL_imageFilterMultByByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</spa [...]
+<a name="l02788"></a>02788 {
+<a name="l02789"></a>02789         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l02790"></a>02790         <span class="keywordtype">int</span> iC;
+<a name="l02791"></a>02791         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l02792"></a>02792         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l02793"></a>02793         <span class="keywordtype">int</span> result;
+<a name="l02794"></a>02794 
+<a name="l02795"></a>02795         <span class="comment">/* Validate input parameters */</span>
+<a name="l02796"></a>02796         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l02797"></a>02797                 <span class="keywordflow">return</span>(-1);
+<a name="l02798"></a>02798         <span class="keywordflow">if</span> (length == 0)
+<a name="l02799"></a>02799                 <span class="keywordflow">return</span>(0);
+<a name="l02800"></a>02800 
+<a name="l02801"></a>02801         <span class="comment">/* Special case: C==1 */</span>
+<a name="l02802"></a>02802         <span class="keywordflow">if</span> (C == 1) {
+<a name="l02803"></a>02803                 memcpy(Src1, Dest, length);
+<a name="l02804"></a>02804                 <span class="keywordflow">return</span> (0); 
+<a name="l02805"></a>02805         }
+<a name="l02806"></a>02806 
+<a name="l02807"></a>02807         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l02808"></a>02808 
+<a name="l02809"></a>02809                 SDL_imageFilterMultByByteMMX(Src1, Dest, length, C);
+<a name="l02810"></a>02810 
+<a name="l02811"></a>02811                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l02812"></a>02812                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l02813"></a>02813                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l02814"></a>02814                         istart = length & 0xfffffff8;
+<a name="l02815"></a>02815                         cursrc1 = &Src1[istart];
+<a name="l02816"></a>02816                         curdest = &Dest[istart];
+<a name="l02817"></a>02817                 } <span class="keywordflow">else</span> {
+<a name="l02818"></a>02818                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l02819"></a>02819                         <span class="keywordflow">return</span> (0);
+<a name="l02820"></a>02820                 }
+<a name="l02821"></a>02821         } <span class="keywordflow">else</span> {
+<a name="l02822"></a>02822                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l02823"></a>02823                 istart = 0;
+<a name="l02824"></a>02824                 cursrc1 = Src1;
+<a name="l02825"></a>02825                 curdest = Dest;
 <a name="l02826"></a>02826         }
-<a name="l02827"></a>02827 <span class="preprocessor">#else</span>
-<a name="l02828"></a>02828 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l02829"></a>02829                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l02830"></a>02830                 <span class="comment">/* ** Duplicate C in 4 words of MM1 ** */</span>
-<a name="l02831"></a>02831                 <span class="stringliteral">"mov           %3, %%al \n\t"</span>   <span class="comment">/* load C into AL */</span>
-<a name="l02832"></a>02832                 <span class="stringliteral">"xor         %%ah, %%ah \n\t"</span>   <span class="comment">/* zero AH */</span>
-<a name="l02833"></a>02833                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l02834"></a>02834                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l02835"></a>02835                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l02836"></a>02836                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02837"></a>02837                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02838"></a>02838                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher words of MM1 with C */</span>
-<a name="l02839"></a>02839                 <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>   <span class="comment">/* zero MM0 register */</span>
-<a name="l02840"></a>02840                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02841"></a>02841                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l02842"></a>02842                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02843"></a>02843                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02844"></a>02844                 <span class="stringliteral">"cmp         $128, %%al \n\t"</span>   <span class="comment">/* if (C <= 128) execute more efficient code */</span>
-<a name="l02845"></a>02845                 <span class="stringliteral">"jg                  2f \n\t"</span> <span class="stringliteral">".align 16              \n\t"</span>     <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02846"></a>02846                 <span class="stringliteral">"1: movq (%%eax), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l02847"></a>02847                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l02848"></a>02848                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l02849"></a>02849                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l02850"></a>02850                 <span class="stringliteral">"pmullw    %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* mul low  bytes of SrcDest and MM1 */</span>
-<a name="l02851"></a>02851                 <span class="stringliteral">"pmullw    %%mm1, %%mm4 \n\t"</span>   <span class="comment">/* mul high bytes of SrcDest and MM1 */</span>
-<a name="l02852"></a>02852                 <span class="stringliteral">"packuswb  %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l02853"></a>02853                 <span class="stringliteral">"movq    %%mm3, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l02854"></a>02854                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02855"></a>02855                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02856"></a>02856                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02857"></a>02857                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02858"></a>02858                 <span class="stringliteral">"jmp                 3f \n\t"</span> <span class="stringliteral">".align 16              \n\t"</span>     <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02859"></a>02859                 <span class="stringliteral">"2: movq (%%eax), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l02860"></a>02860                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l02861"></a>02861                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l02862"></a>02862                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l02863"></a>02863                 <span class="stringliteral">"pmullw    %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* mul low  bytes of SrcDest and MM1 */</span>
-<a name="l02864"></a>02864                 <span class="stringliteral">"pmullw    %%mm1, %%mm4 \n\t"</span>   <span class="comment">/* mul high bytes of SrcDest and MM1 */</span>
-<a name="l02865"></a>02865                 <span class="comment">/* ** Take abs value of the results (signed words) ** */</span>
-<a name="l02866"></a>02866                 <span class="stringliteral">"movq      %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* copy mm3 into mm5 */</span>
-<a name="l02867"></a>02867                 <span class="stringliteral">"movq      %%mm4, %%mm6 \n\t"</span>   <span class="comment">/* copy mm4 into mm6 */</span>
-<a name="l02868"></a>02868                 <span class="stringliteral">"psraw       $15, %%mm5 \n\t"</span>   <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l02869"></a>02869                 <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l02870"></a>02870                 <span class="stringliteral">"pxor      %%mm5, %%mm3 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l02871"></a>02871                 <span class="stringliteral">"pxor      %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l02872"></a>02872                 <span class="stringliteral">"psubsw    %%mm5, %%mm3 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l02873"></a>02873                 <span class="stringliteral">"psubsw    %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l02874"></a>02874                 <span class="stringliteral">"packuswb  %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l02875"></a>02875                 <span class="stringliteral">"movq    %%mm3, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l02876"></a>02876                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l02877"></a>02877                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l02878"></a>02878                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l02879"></a>02879                 <span class="stringliteral">"jnz                 2b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l02880"></a>02880                 <span class="stringliteral">"3: emms               \n\t"</span>    <span class="comment">/* exit MMX state */</span>
-<a name="l02881"></a>02881                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l02882"></a>02882                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l02883"></a>02883                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l02884"></a>02884                 <span class="stringliteral">"m"</span>(C)                  <span class="comment">/* %3 */</span>
-<a name="l02885"></a>02885                 );
-<a name="l02886"></a>02886 <span class="preprocessor">#endif</span>
-<a name="l02887"></a>02887 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l02888"></a>02888 <span class="preprocessor">#else</span>
-<a name="l02889"></a>02889 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l02890"></a>02890 <span class="preprocessor">#endif</span>
-<a name="l02891"></a>02891 <span class="preprocessor"></span>}
-<a name="l02892"></a>02892 
-<a name="l02903"></a><a class="code" href="_s_d_l__image_filter_8h.html#add06bb6ea7847fc13a3041ddceb4ac3c">02903</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a06f7a19d6e2fc89d7b48cc45d715806d" title="Filter using MultByByte: D = saturation255(S * C)">SDL_imageFilterMultByByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</spa [...]
-<a name="l02904"></a>02904 {
-<a name="l02905"></a>02905         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l02906"></a>02906         <span class="keywordtype">int</span> iC;
-<a name="l02907"></a>02907         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l02908"></a>02908         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l02909"></a>02909         <span class="keywordtype">int</span> result;
-<a name="l02910"></a>02910 
-<a name="l02911"></a>02911         <span class="comment">/* Validate input parameters */</span>
-<a name="l02912"></a>02912         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l02913"></a>02913                 <span class="keywordflow">return</span>(-1);
-<a name="l02914"></a>02914         <span class="keywordflow">if</span> (length == 0)
-<a name="l02915"></a>02915                 <span class="keywordflow">return</span>(0);
-<a name="l02916"></a>02916 
-<a name="l02917"></a>02917         <span class="comment">/* Special case: C==1 */</span>
-<a name="l02918"></a>02918         <span class="keywordflow">if</span> (C == 1) {
-<a name="l02919"></a>02919                 memcpy(Src1, Dest, length);
-<a name="l02920"></a>02920                 <span class="keywordflow">return</span> (0); 
-<a name="l02921"></a>02921         }
-<a name="l02922"></a>02922 
-<a name="l02923"></a>02923         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l02924"></a>02924 
-<a name="l02925"></a>02925                 <a class="code" href="_s_d_l__image_filter_8c.html#ad18d23ec352f7508f89e47cff9c9a4ea" title="Internal MMX Filter using MultByByte: D = saturation255(S * C)">SDL_imageFilterMultByByteMMX</a>(Src1, Dest, length, C);
-<a name="l02926"></a>02926 
-<a name="l02927"></a>02927                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l02928"></a>02928                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l02929"></a>02929                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l02930"></a>02930                         istart = length & 0xfffffff8;
-<a name="l02931"></a>02931                         cursrc1 = &Src1[istart];
-<a name="l02932"></a>02932                         curdest = &Dest[istart];
-<a name="l02933"></a>02933                 } <span class="keywordflow">else</span> {
-<a name="l02934"></a>02934                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l02935"></a>02935                         <span class="keywordflow">return</span> (0);
-<a name="l02936"></a>02936                 }
-<a name="l02937"></a>02937         } <span class="keywordflow">else</span> {
-<a name="l02938"></a>02938                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l02939"></a>02939                 istart = 0;
-<a name="l02940"></a>02940                 cursrc1 = Src1;
-<a name="l02941"></a>02941                 curdest = Dest;
-<a name="l02942"></a>02942         }
-<a name="l02943"></a>02943 
-<a name="l02944"></a>02944         <span class="comment">/* C routine to process image */</span>
-<a name="l02945"></a>02945         iC = (int) C;
-<a name="l02946"></a>02946         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l02947"></a>02947                 result = (int) *cursrc1 * iC;
-<a name="l02948"></a>02948                 <span class="keywordflow">if</span> (result > 255)
-<a name="l02949"></a>02949                         result = 255;
-<a name="l02950"></a>02950                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l02951"></a>02951                 <span class="comment">/* Advance pointers */</span>
-<a name="l02952"></a>02952                 cursrc1++;
-<a name="l02953"></a>02953                 curdest++;
-<a name="l02954"></a>02954         }
-<a name="l02955"></a>02955 
-<a name="l02956"></a>02956         <span class="keywordflow">return</span> (0);
-<a name="l02957"></a>02957 }
-<a name="l02958"></a>02958 
-<a name="l02970"></a><a class="code" href="_s_d_l__image_filter_8c.html#a80d18182b54de0ec1f8d9a79dc5b879a">02970</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a80d18182b54de0ec1f8d9a79dc5b879a" title="Internal MMX Filter using ShiftRightAndMultByByteMMX: D = saturation255((S >> N) * C)...">SDL_imageFilterShiftRightAndMultByByteMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="key [...]
-<a name="l02971"></a>02971                                                                                           <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> C)
-<a name="l02972"></a>02972 {
-<a name="l02973"></a>02973 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l02974"></a>02974 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l02975"></a>02975 <span class="preprocessor"></span>        __asm
-<a name="l02976"></a>02976         {
-<a name="l02977"></a>02977                 pusha
-<a name="l02978"></a>02978                         <span class="comment">/* ** Duplicate C in 4 words of MM1 ** */</span>
-<a name="l02979"></a>02979                         mov al, C       <span class="comment">/* load C into AL */</span>
-<a name="l02980"></a>02980                         xor ah, ah      <span class="comment">/* zero AH */</span>
-<a name="l02981"></a>02981                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l02982"></a>02982                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l02983"></a>02983                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l02984"></a>02984                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l02985"></a>02985                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l02986"></a>02986                         punpckldq mm1, mm2      <span class="comment">/* fill higher words of MM1 with C */</span>
-<a name="l02987"></a>02987                         xor ecx, ecx    <span class="comment">/* zero ECX */</span>
-<a name="l02988"></a>02988                         mov cl, N       <span class="comment">/* load N into CL */</span>
-<a name="l02989"></a>02989                         movd mm7, ecx           <span class="comment">/* copy N into MM7 */</span>
-<a name="l02990"></a>02990                         pxor mm0, mm0           <span class="comment">/* zero MM0 register */</span>
-<a name="l02991"></a>02991                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l02992"></a>02992                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l02993"></a>02993                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l02994"></a>02994                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l02995"></a>02995                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l02996"></a>02996 L1026:
-<a name="l02997"></a>02997                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l02998"></a>02998                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l02999"></a>02999                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l03000"></a>03000                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l03001"></a>03001                         psrlw mm3, mm7          <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
-<a name="l03002"></a>03002                         psrlw mm4, mm7          <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
-<a name="l03003"></a>03003                         pmullw mm3, mm1         <span class="comment">/* mul low  bytes of SrcDest by MM1 */</span>
-<a name="l03004"></a>03004                         pmullw mm4, mm1         <span class="comment">/* mul high bytes of SrcDest by MM1 */</span>
-<a name="l03005"></a>03005                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l03006"></a>03006                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
-<a name="l03007"></a>03007                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03008"></a>03008                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03009"></a>03009                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l03010"></a>03010                         jnz             L1026           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03011"></a>03011                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l03012"></a>03012                         popa
-<a name="l03013"></a>03013         }
-<a name="l03014"></a>03014 <span class="preprocessor">#else</span>
-<a name="l03015"></a>03015 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l03016"></a>03016                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l03017"></a>03017                 <span class="comment">/* ** Duplicate C in 4 words of MM1 ** */</span>
-<a name="l03018"></a>03018                 <span class="stringliteral">"mov           %4, %%al \n\t"</span>   <span class="comment">/* load C into AL */</span>
-<a name="l03019"></a>03019                 <span class="stringliteral">"xor         %%ah, %%ah \n\t"</span>   <span class="comment">/* zero AH */</span>
-<a name="l03020"></a>03020                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l03021"></a>03021                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03022"></a>03022                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l03023"></a>03023                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l03024"></a>03024                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l03025"></a>03025                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher words of MM1 with C */</span>
-<a name="l03026"></a>03026                 <span class="stringliteral">"xor       %%ecx, %%ecx \n\t"</span>   <span class="comment">/* zero ECX */</span>
-<a name="l03027"></a>03027                 <span class="stringliteral">"mov           %3, %%cl \n\t"</span>   <span class="comment">/* load N into CL */</span>
-<a name="l03028"></a>03028                 <span class="stringliteral">"movd      %%ecx, %%mm7 \n\t"</span>   <span class="comment">/* copy N into MM7 */</span>
-<a name="l03029"></a>03029                 <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>   <span class="comment">/* zero MM0 register */</span>
-<a name="l03030"></a>03030                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03031"></a>03031                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l03032"></a>03032                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03033"></a>03033                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03034"></a>03034                 <span class="stringliteral">".align 16             \n\t"</span>    <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03035"></a>03035                 <span class="stringliteral">"1: movq (%%eax), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l03036"></a>03036                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l03037"></a>03037                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l03038"></a>03038                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l03039"></a>03039                 <span class="stringliteral">"psrlw     %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
-<a name="l03040"></a>03040                 <span class="stringliteral">"psrlw     %%mm7, %%mm4 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
-<a name="l03041"></a>03041                 <span class="stringliteral">"pmullw    %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* mul low  bytes of SrcDest by MM1 */</span>
-<a name="l03042"></a>03042                 <span class="stringliteral">"pmullw    %%mm1, %%mm4 \n\t"</span>   <span class="comment">/* mul high bytes of SrcDest by MM1 */</span>
-<a name="l03043"></a>03043                 <span class="stringliteral">"packuswb  %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l03044"></a>03044                 <span class="stringliteral">"movq    %%mm3, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l03045"></a>03045                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03046"></a>03046                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03047"></a>03047                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l03048"></a>03048                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03049"></a>03049                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l03050"></a>03050                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l03051"></a>03051                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l03052"></a>03052                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l03053"></a>03053                 <span class="stringliteral">"m"</span>(N),                 <span class="comment">/* %3 */</span>
-<a name="l03054"></a>03054                 <span class="stringliteral">"m"</span>(C)                  <span class="comment">/* %4 */</span>
-<a name="l03055"></a>03055                 );
-<a name="l03056"></a>03056 <span class="preprocessor">#endif</span>
-<a name="l03057"></a>03057 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l03058"></a>03058 <span class="preprocessor">#else</span>
-<a name="l03059"></a>03059 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l03060"></a>03060 <span class="preprocessor">#endif</span>
-<a name="l03061"></a>03061 <span class="preprocessor"></span>}
-<a name="l03062"></a>03062 
-<a name="l03074"></a><a class="code" href="_s_d_l__image_filter_8h.html#a40e1e21ede9a7ed1eddac2cdbfd0b079">03074</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a0713d6c267fba9756d6beae81e89f9e4" title="Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C)">SDL_imageFilterShiftRightAndMultByByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</sp [...]
-<a name="l03075"></a>03075                                                                                    <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> C)
-<a name="l03076"></a>03076 {
-<a name="l03077"></a>03077         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l03078"></a>03078         <span class="keywordtype">int</span> iC;
-<a name="l03079"></a>03079         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l03080"></a>03080         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l03081"></a>03081         <span class="keywordtype">int</span> result;
-<a name="l03082"></a>03082 
-<a name="l03083"></a>03083         <span class="comment">/* Validate input parameters */</span>
-<a name="l03084"></a>03084         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l03085"></a>03085                 <span class="keywordflow">return</span>(-1);
-<a name="l03086"></a>03086         <span class="keywordflow">if</span> (length == 0)
-<a name="l03087"></a>03087                 <span class="keywordflow">return</span>(0);
-<a name="l03088"></a>03088 
-<a name="l03089"></a>03089         <span class="comment">/* Check shift */</span>
-<a name="l03090"></a>03090         <span class="keywordflow">if</span> (N > 8) {
-<a name="l03091"></a>03091                 <span class="keywordflow">return</span> (-1);
-<a name="l03092"></a>03092         }
-<a name="l03093"></a>03093 
-<a name="l03094"></a>03094         <span class="comment">/* Special case: N==0 && C==1 */</span>
-<a name="l03095"></a>03095         <span class="keywordflow">if</span> ((N == 0) && (C == 1)) {
-<a name="l03096"></a>03096                 memcpy(Src1, Dest, length);
-<a name="l03097"></a>03097                 <span class="keywordflow">return</span> (0); 
-<a name="l03098"></a>03098         }
-<a name="l03099"></a>03099 
-<a name="l03100"></a>03100         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l03101"></a>03101 
-<a name="l03102"></a>03102                 <a class="code" href="_s_d_l__image_filter_8c.html#a80d18182b54de0ec1f8d9a79dc5b879a" title="Internal MMX Filter using ShiftRightAndMultByByteMMX: D = saturation255((S >> N) * C)...">SDL_imageFilterShiftRightAndMultByByteMMX</a>(Src1, Dest, length, N, C);
-<a name="l03103"></a>03103 
-<a name="l03104"></a>03104                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l03105"></a>03105                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l03106"></a>03106                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l03107"></a>03107                         istart = length & 0xfffffff8;
-<a name="l03108"></a>03108                         cursrc1 = &Src1[istart];
-<a name="l03109"></a>03109                         curdest = &Dest[istart];
-<a name="l03110"></a>03110                 } <span class="keywordflow">else</span> {
-<a name="l03111"></a>03111                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l03112"></a>03112                         <span class="keywordflow">return</span> (0);
-<a name="l03113"></a>03113                 }
-<a name="l03114"></a>03114         } <span class="keywordflow">else</span> {
-<a name="l03115"></a>03115                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l03116"></a>03116                 istart = 0;
-<a name="l03117"></a>03117                 cursrc1 = Src1;
-<a name="l03118"></a>03118                 curdest = Dest;
-<a name="l03119"></a>03119         }
-<a name="l03120"></a>03120 
-<a name="l03121"></a>03121         <span class="comment">/* C routine to process image */</span>
-<a name="l03122"></a>03122         iC = (int) C;
-<a name="l03123"></a>03123         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l03124"></a>03124                 result = (int) (*cursrc1 >> N) * iC;
-<a name="l03125"></a>03125                 <span class="keywordflow">if</span> (result > 255)
-<a name="l03126"></a>03126                         result = 255;
-<a name="l03127"></a>03127                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l03128"></a>03128                 <span class="comment">/* Advance pointers */</span>
-<a name="l03129"></a>03129                 cursrc1++;
-<a name="l03130"></a>03130                 curdest++;
-<a name="l03131"></a>03131         }
-<a name="l03132"></a>03132 
-<a name="l03133"></a>03133         <span class="keywordflow">return</span> (0);
-<a name="l03134"></a>03134 }
-<a name="l03135"></a>03135 
-<a name="l03147"></a><a class="code" href="_s_d_l__image_filter_8c.html#a0d383d58c9a5262dbac636f6ebe26b62">03147</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a0d383d58c9a5262dbac636f6ebe26b62" title="Internal MMX Filter using ShiftLeftByte: D = (S << N)">SDL_imageFilterShiftLeftByteMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywor [...]
-<a name="l03148"></a>03148                                                                         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
-<a name="l03149"></a>03149 {
-<a name="l03150"></a>03150 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l03151"></a>03151 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l03152"></a>03152 <span class="preprocessor"></span>        __asm
-<a name="l03153"></a>03153         {
-<a name="l03154"></a>03154                 pusha
-<a name="l03155"></a>03155                         mov edx, Mask           <span class="comment">/* load Mask address into edx */</span>
-<a name="l03156"></a>03156                         movq mm0, [edx]         <span class="comment">/* load Mask into mm0 */</span>
-<a name="l03157"></a>03157                 xor ecx, ecx    <span class="comment">/* zero ECX */</span>
-<a name="l03158"></a>03158                         mov cl, N       <span class="comment">/* load loop counter (N) into CL */</span>
-<a name="l03159"></a>03159                         movd mm3, ecx           <span class="comment">/* copy (N) into MM3  */</span>
-<a name="l03160"></a>03160                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l03161"></a>03161 L10270:                         <span class="comment">/* ** Prepare proper bit-Mask in MM1 ** */</span>
-<a name="l03162"></a>03162                 psllw mm1, 1    <span class="comment">/* shift 4 WORDS of MM1 1 bit to the left */</span>
-<a name="l03163"></a>03163                         pand mm1, mm0        <span class="comment">// apply Mask to 8 BYTES of MM1 */</span>
-<a name="l03164"></a>03164                         <span class="comment">/*  byte     0x0f, 0xdb, 0xc8 */</span>
-<a name="l03165"></a>03165                         dec cl                          <span class="comment">/* decrease loop counter */</span>
-<a name="l03166"></a>03166                         jnz            L10270           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03167"></a>03167                         <span class="comment">/* ** Shift all bytes of the image ** */</span>
-<a name="l03168"></a>03168                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03169"></a>03169                         mov edi, Dest           <span class="comment">/* load SrcDest address into edi */</span>
-<a name="l03170"></a>03170                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03171"></a>03171                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03172"></a>03172                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03173"></a>03173 L10271:
-<a name="l03174"></a>03174                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
-<a name="l03175"></a>03175                 psllw mm0, mm3          <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the left */</span>
-<a name="l03176"></a>03176                         pand mm0, mm1    <span class="comment">// apply proper bit-Mask to 8 BYTES of MM0 */</span>
-<a name="l03177"></a>03177                         <span class="comment">/* byte     0x0f, 0xdb, 0xc1 */</span>
-<a name="l03178"></a>03178                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
-<a name="l03179"></a>03179                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03180"></a>03180                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03181"></a>03181                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l03182"></a>03182                         jnz            L10271           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03183"></a>03183                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l03184"></a>03184                         popa
-<a name="l03185"></a>03185         }
-<a name="l03186"></a>03186 <span class="preprocessor">#else</span>
-<a name="l03187"></a>03187 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l03188"></a>03188                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"movl         %4, %%edx \n\t"</span>        <span class="comment">/* load Mask address into edx */</span>
-<a name="l03189"></a>03189                 <span class="stringliteral">"movq    (%%edx), %%mm0 \n\t"</span>   <span class="comment">/* load Mask into mm0 */</span>
-<a name="l03190"></a>03190                 <span class="stringliteral">"xor       %%ecx, %%ecx \n\t"</span>   <span class="comment">/* zero ECX */</span>
-<a name="l03191"></a>03191                 <span class="stringliteral">"mov           %3, %%cl \n\t"</span>   <span class="comment">/* load loop counter (N) into CL */</span>
-<a name="l03192"></a>03192                 <span class="stringliteral">"movd      %%ecx, %%mm3 \n\t"</span>   <span class="comment">/* copy (N) into MM3  */</span>
-<a name="l03193"></a>03193                 <span class="stringliteral">"pcmpeqb   %%mm1, %%mm1 \n\t"</span>   <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l03194"></a>03194                 <span class="stringliteral">"1:                     \n\t"</span>   <span class="comment">/* ** Prepare proper bit-Mask in MM1 ** */</span>
-<a name="l03195"></a>03195                 <span class="stringliteral">"psllw        $1, %%mm1 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM1 1 bit to the left */</span>
-<a name="l03196"></a>03196                 <span class="comment">/*    "pand      %%mm0, %%mm1 \n\t"    // apply Mask to 8 BYTES of MM1 */</span>
-<a name="l03197"></a>03197                 <span class="stringliteral">".byte     0x0f, 0xdb, 0xc8 \n\t"</span> <span class="stringliteral">"dec %%cl               \n\t"</span> <span class="comment">/* decrease loop counter */</span>
-<a name="l03198"></a>03198                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03199"></a>03199                 <span class="comment">/* ** Shift all bytes of the image ** */</span>
-<a name="l03200"></a>03200                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03201"></a>03201                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load SrcDest address into edi */</span>
-<a name="l03202"></a>03202                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03203"></a>03203                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03204"></a>03204                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03205"></a>03205                 <span class="stringliteral">"2: movq (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
-<a name="l03206"></a>03206                 <span class="stringliteral">"psllw     %%mm3, %%mm0 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the left */</span>
-<a name="l03207"></a>03207                 <span class="comment">/*    "pand      %%mm1, %%mm0 \n\t"    // apply proper bit-Mask to 8 BYTES of MM0 */</span>
-<a name="l03208"></a>03208                 <span class="stringliteral">".byte     0x0f, 0xdb, 0xc1 \n\t"</span> <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span> <span class="comment">/* store result in Dest */</span>
-<a name="l03209"></a>03209                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03210"></a>03210                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03211"></a>03211                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l03212"></a>03212                 <span class="stringliteral">"jnz                 2b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03213"></a>03213                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l03214"></a>03214                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l03215"></a>03215                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l03216"></a>03216                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l03217"></a>03217                 <span class="stringliteral">"m"</span>(N),                 <span class="comment">/* %3 */</span>
-<a name="l03218"></a>03218                 <span class="stringliteral">"m"</span>(Mask)                       <span class="comment">/* %4 */</span>
-<a name="l03219"></a>03219                 );
-<a name="l03220"></a>03220 <span class="preprocessor">#endif</span>
-<a name="l03221"></a>03221 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l03222"></a>03222 <span class="preprocessor">#else</span>
-<a name="l03223"></a>03223 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l03224"></a>03224 <span class="preprocessor">#endif</span>
-<a name="l03225"></a>03225 <span class="preprocessor"></span>}
-<a name="l03226"></a>03226 
-<a name="l03237"></a><a class="code" href="_s_d_l__image_filter_8h.html#ac32f1ea9acbee51c2db94224ef6f7fd2">03237</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a4561a73b249a26babc4c469ffbdae604" title="Filter using ShiftLeftByte: D = (S << N)">SDL_imageFilterShiftLeftByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</spa [...]
-<a name="l03238"></a>03238 {
-<a name="l03239"></a>03239         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE };
-<a name="l03240"></a>03240         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l03241"></a>03241         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
-<a name="l03242"></a>03242         <span class="keywordtype">int</span> result;
-<a name="l03243"></a>03243 
-<a name="l03244"></a>03244         <span class="comment">/* Validate input parameters */</span>
-<a name="l03245"></a>03245         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l03246"></a>03246                 <span class="keywordflow">return</span>(-1);
-<a name="l03247"></a>03247         <span class="keywordflow">if</span> (length == 0)
-<a name="l03248"></a>03248                 <span class="keywordflow">return</span>(0);
-<a name="l03249"></a>03249 
-<a name="l03250"></a>03250         <span class="keywordflow">if</span> (N > 8) {
-<a name="l03251"></a>03251                 <span class="keywordflow">return</span> (-1);
-<a name="l03252"></a>03252         }
-<a name="l03253"></a>03253 
-<a name="l03254"></a>03254         <span class="comment">/* Special case: N==0 */</span>
-<a name="l03255"></a>03255         <span class="keywordflow">if</span> (N == 0) {
-<a name="l03256"></a>03256                 memcpy(Src1, Dest, length);
-<a name="l03257"></a>03257                 <span class="keywordflow">return</span> (0); 
-<a name="l03258"></a>03258         }
-<a name="l03259"></a>03259 
-<a name="l03260"></a>03260         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l03261"></a>03261 
-<a name="l03262"></a>03262                 <a class="code" href="_s_d_l__image_filter_8c.html#a0d383d58c9a5262dbac636f6ebe26b62" title="Internal MMX Filter using ShiftLeftByte: D = (S << N)">SDL_imageFilterShiftLeftByteMMX</a>(Src1, Dest, length, N, Mask);
+<a name="l02827"></a>02827 
+<a name="l02828"></a>02828         <span class="comment">/* C routine to process image */</span>
+<a name="l02829"></a>02829         iC = (int) C;
+<a name="l02830"></a>02830         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l02831"></a>02831                 result = (int) *cursrc1 * iC;
+<a name="l02832"></a>02832                 <span class="keywordflow">if</span> (result > 255)
+<a name="l02833"></a>02833                         result = 255;
+<a name="l02834"></a>02834                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l02835"></a>02835                 <span class="comment">/* Advance pointers */</span>
+<a name="l02836"></a>02836                 cursrc1++;
+<a name="l02837"></a>02837                 curdest++;
+<a name="l02838"></a>02838         }
+<a name="l02839"></a>02839 
+<a name="l02840"></a>02840         <span class="keywordflow">return</span> (0);
+<a name="l02841"></a>02841 }
+<a name="l02842"></a>02842 
+<a name="l02854"></a>02854 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterShiftRightAndMultByByteMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> N,
+<a name="l02855"></a>02855                                                                                           <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> C)
+<a name="l02856"></a>02856 {
+<a name="l02857"></a>02857 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l02858"></a>02858 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l02859"></a>02859 <span class="preprocessor"></span>        __asm
+<a name="l02860"></a>02860         {
+<a name="l02861"></a>02861                 pusha
+<a name="l02862"></a>02862                         <span class="comment">/* ** Duplicate C in 4 words of MM1 ** */</span>
+<a name="l02863"></a>02863                         mov al, C       <span class="comment">/* load C into AL */</span>
+<a name="l02864"></a>02864                         xor ah, ah      <span class="comment">/* zero AH */</span>
+<a name="l02865"></a>02865                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l02866"></a>02866                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l02867"></a>02867                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l02868"></a>02868                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l02869"></a>02869                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l02870"></a>02870                         punpckldq mm1, mm2      <span class="comment">/* fill higher words of MM1 with C */</span>
+<a name="l02871"></a>02871                         xor ecx, ecx    <span class="comment">/* zero ECX */</span>
+<a name="l02872"></a>02872                         mov cl, N       <span class="comment">/* load N into CL */</span>
+<a name="l02873"></a>02873                         movd mm7, ecx           <span class="comment">/* copy N into MM7 */</span>
+<a name="l02874"></a>02874                         pxor mm0, mm0           <span class="comment">/* zero MM0 register */</span>
+<a name="l02875"></a>02875                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l02876"></a>02876                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l02877"></a>02877                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l02878"></a>02878                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l02879"></a>02879                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l02880"></a>02880 L1026:
+<a name="l02881"></a>02881                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
+<a name="l02882"></a>02882                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
+<a name="l02883"></a>02883                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
+<a name="l02884"></a>02884                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
+<a name="l02885"></a>02885                         psrlw mm3, mm7          <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
+<a name="l02886"></a>02886                         psrlw mm4, mm7          <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
+<a name="l02887"></a>02887                         pmullw mm3, mm1         <span class="comment">/* mul low  bytes of SrcDest by MM1 */</span>
+<a name="l02888"></a>02888                         pmullw mm4, mm1         <span class="comment">/* mul high bytes of SrcDest by MM1 */</span>
+<a name="l02889"></a>02889                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l02890"></a>02890                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
+<a name="l02891"></a>02891                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l02892"></a>02892                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l02893"></a>02893                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l02894"></a>02894                         jnz             L1026           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l02895"></a>02895                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l02896"></a>02896                         popa
+<a name="l02897"></a>02897         }
+<a name="l02898"></a>02898 <span class="preprocessor">#else</span>
+<a name="l02899"></a>02899 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l02900"></a>02900         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l02901"></a>02901         __m64 *mDest = (__m64*)Dest;
+<a name="l02902"></a>02902         __m64 mm0 = _m_from_int(0);                     <span class="comment">/* zero mm0 register */</span>
+<a name="l02903"></a>02903         <span class="comment">/* Duplicate C in 4 words of MM1 */</span>
+<a name="l02904"></a>02904         <span class="keywordtype">int</span> i;
+<a name="l02905"></a>02905         i = (C<<16)|C;
+<a name="l02906"></a>02906         __m64 mm1 = _m_from_int(i);
+<a name="l02907"></a>02907         __m64 mm2 = _m_from_int(i);
+<a name="l02908"></a>02908         mm1 = _m_punpckldq(mm1, mm2);                   <span class="comment">/* fill higher words of MM1 with C */</span>
+<a name="l02909"></a>02909         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l02910"></a>02910                 __m64 mm3, mm4, mm5, mm6;
+<a name="l02911"></a>02911                 mm3 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l02912"></a>02912                 mm4 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l02913"></a>02913                 mm3 = _m_psrlwi(mm3, N);                <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
+<a name="l02914"></a>02914                 mm4 = _m_psrlwi(mm4, N);                <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
+<a name="l02915"></a>02915                 mm3 = _m_pmullw(mm3, mm1);              <span class="comment">/* mul low  bytes of Src1 and MM1 */</span>
+<a name="l02916"></a>02916                 mm4 = _m_pmullw(mm4, mm1);              <span class="comment">/* mul high bytes of Src1 and MM1 */</span>
+<a name="l02917"></a>02917                 *mDest = _m_packuswb(mm3, mm4);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l02918"></a>02918                 mSrc1++;
+<a name="l02919"></a>02919                 mDest++;
+<a name="l02920"></a>02920         }
+<a name="l02921"></a>02921         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l02922"></a>02922 <span class="preprocessor">#endif</span>
+<a name="l02923"></a>02923 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l02924"></a>02924 <span class="preprocessor">#else</span>
+<a name="l02925"></a>02925 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l02926"></a>02926 <span class="preprocessor">#endif</span>
+<a name="l02927"></a>02927 <span class="preprocessor"></span>}
+<a name="l02928"></a>02928 
+<a name="l02940"></a><a class="code" href="_s_d_l__image_filter_8h.html#a40e1e21ede9a7ed1eddac2cdbfd0b079">02940</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a0713d6c267fba9756d6beae81e89f9e4" title="Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C)">SDL_imageFilterShiftRightAndMultByByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</sp [...]
+<a name="l02941"></a>02941                                                                                    <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> C)
+<a name="l02942"></a>02942 {
+<a name="l02943"></a>02943         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l02944"></a>02944         <span class="keywordtype">int</span> iC;
+<a name="l02945"></a>02945         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l02946"></a>02946         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l02947"></a>02947         <span class="keywordtype">int</span> result;
+<a name="l02948"></a>02948 
+<a name="l02949"></a>02949         <span class="comment">/* Validate input parameters */</span>
+<a name="l02950"></a>02950         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l02951"></a>02951                 <span class="keywordflow">return</span>(-1);
+<a name="l02952"></a>02952         <span class="keywordflow">if</span> (length == 0)
+<a name="l02953"></a>02953                 <span class="keywordflow">return</span>(0);
+<a name="l02954"></a>02954 
+<a name="l02955"></a>02955         <span class="comment">/* Check shift */</span>
+<a name="l02956"></a>02956         <span class="keywordflow">if</span> (N > 8) {
+<a name="l02957"></a>02957                 <span class="keywordflow">return</span> (-1);
+<a name="l02958"></a>02958         }
+<a name="l02959"></a>02959 
+<a name="l02960"></a>02960         <span class="comment">/* Special case: N==0 && C==1 */</span>
+<a name="l02961"></a>02961         <span class="keywordflow">if</span> ((N == 0) && (C == 1)) {
+<a name="l02962"></a>02962                 memcpy(Src1, Dest, length);
+<a name="l02963"></a>02963                 <span class="keywordflow">return</span> (0); 
+<a name="l02964"></a>02964         }
+<a name="l02965"></a>02965 
+<a name="l02966"></a>02966         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l02967"></a>02967 
+<a name="l02968"></a>02968                 SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C);
+<a name="l02969"></a>02969 
+<a name="l02970"></a>02970                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l02971"></a>02971                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l02972"></a>02972                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l02973"></a>02973                         istart = length & 0xfffffff8;
+<a name="l02974"></a>02974                         cursrc1 = &Src1[istart];
+<a name="l02975"></a>02975                         curdest = &Dest[istart];
+<a name="l02976"></a>02976                 } <span class="keywordflow">else</span> {
+<a name="l02977"></a>02977                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l02978"></a>02978                         <span class="keywordflow">return</span> (0);
+<a name="l02979"></a>02979                 }
+<a name="l02980"></a>02980         } <span class="keywordflow">else</span> {
+<a name="l02981"></a>02981                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l02982"></a>02982                 istart = 0;
+<a name="l02983"></a>02983                 cursrc1 = Src1;
+<a name="l02984"></a>02984                 curdest = Dest;
+<a name="l02985"></a>02985         }
+<a name="l02986"></a>02986 
+<a name="l02987"></a>02987         <span class="comment">/* C routine to process image */</span>
+<a name="l02988"></a>02988         iC = (int) C;
+<a name="l02989"></a>02989         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l02990"></a>02990                 result = (int) (*cursrc1 >> N) * iC;
+<a name="l02991"></a>02991                 <span class="keywordflow">if</span> (result > 255)
+<a name="l02992"></a>02992                         result = 255;
+<a name="l02993"></a>02993                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l02994"></a>02994                 <span class="comment">/* Advance pointers */</span>
+<a name="l02995"></a>02995                 cursrc1++;
+<a name="l02996"></a>02996                 curdest++;
+<a name="l02997"></a>02997         }
+<a name="l02998"></a>02998 
+<a name="l02999"></a>02999         <span class="keywordflow">return</span> (0);
+<a name="l03000"></a>03000 }
+<a name="l03001"></a>03001 
+<a name="l03013"></a>03013 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterShiftLeftByteMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> N,
+<a name="l03014"></a>03014                                                                         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Mask)
+<a name="l03015"></a>03015 {
+<a name="l03016"></a>03016 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l03017"></a>03017 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l03018"></a>03018 <span class="preprocessor"></span>        __asm
+<a name="l03019"></a>03019         {
+<a name="l03020"></a>03020                 pusha
+<a name="l03021"></a>03021                         mov edx, Mask           <span class="comment">/* load Mask address into edx */</span>
+<a name="l03022"></a>03022                         movq mm0, [edx]         <span class="comment">/* load Mask into mm0 */</span>
+<a name="l03023"></a>03023                 xor ecx, ecx    <span class="comment">/* zero ECX */</span>
+<a name="l03024"></a>03024                         mov cl, N       <span class="comment">/* load loop counter (N) into CL */</span>
+<a name="l03025"></a>03025                         movd mm3, ecx           <span class="comment">/* copy (N) into MM3  */</span>
+<a name="l03026"></a>03026                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l03027"></a>03027 L10270:                         <span class="comment">/* ** Prepare proper bit-Mask in MM1 ** */</span>
+<a name="l03028"></a>03028                 psllw mm1, 1    <span class="comment">/* shift 4 WORDS of MM1 1 bit to the left */</span>
+<a name="l03029"></a>03029                         pand mm1, mm0        <span class="comment">// apply Mask to 8 BYTES of MM1 */</span>
+<a name="l03030"></a>03030                         <span class="comment">/*  byte     0x0f, 0xdb, 0xc8 */</span>
+<a name="l03031"></a>03031                         dec cl                          <span class="comment">/* decrease loop counter */</span>
+<a name="l03032"></a>03032                         jnz            L10270           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03033"></a>03033                         <span class="comment">/* ** Shift all bytes of the image ** */</span>
+<a name="l03034"></a>03034                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l03035"></a>03035                         mov edi, Dest           <span class="comment">/* load SrcDest address into edi */</span>
+<a name="l03036"></a>03036                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l03037"></a>03037                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l03038"></a>03038                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l03039"></a>03039 L10271:
+<a name="l03040"></a>03040                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
+<a name="l03041"></a>03041                 psllw mm0, mm3          <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the left */</span>
+<a name="l03042"></a>03042                         pand mm0, mm1    <span class="comment">// apply proper bit-Mask to 8 BYTES of MM0 */</span>
+<a name="l03043"></a>03043                         <span class="comment">/* byte     0x0f, 0xdb, 0xc1 */</span>
+<a name="l03044"></a>03044                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
+<a name="l03045"></a>03045                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l03046"></a>03046                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l03047"></a>03047                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l03048"></a>03048                         jnz            L10271           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03049"></a>03049                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l03050"></a>03050                         popa
+<a name="l03051"></a>03051         }
+<a name="l03052"></a>03052 <span class="preprocessor">#else</span>
+<a name="l03053"></a>03053 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l03054"></a>03054         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l03055"></a>03055         __m64 *mDest = (__m64*)Dest;
+<a name="l03056"></a>03056         __m64 *mMask = (__m64*)Mask;
+<a name="l03057"></a>03057         __m64 mm1;
+<a name="l03058"></a>03058         <span class="keywordtype">int</span> i;
+<a name="l03059"></a>03059         mm1 = _m_pcmpeqb(mm1, mm1);                     <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l03060"></a>03060         <span class="comment">/* Prepare proper bit-Mask in MM1 */</span>
+<a name="l03061"></a>03061         <span class="keywordflow">for</span> (i = 0; i < N; i++) {
+<a name="l03062"></a>03062                 mm1 = _m_psllwi(mm1, 1);                <span class="comment">/* shift 4 WORDS of MM1 1 bit to the left */</span>
+<a name="l03063"></a>03063                 mm1 = _m_pand(mm1, *mMask);             <span class="comment">/* apply Mask to 8 BYTES of MM1 */</span>
+<a name="l03064"></a>03064         }
+<a name="l03065"></a>03065         <span class="comment">/* ** Shift all bytes of the image ** */</span>
+<a name="l03066"></a>03066         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l03067"></a>03067                 __m64 mm0 = _m_psllwi(*mSrc1, N);       <span class="comment">/* shift 4 WORDS of MM0 (N) bits to the left */</span>
+<a name="l03068"></a>03068                 *mDest = _m_pand(mm0, mm1);             <span class="comment">/* apply proper bit-Mask to 8 BYTES of MM0 */</span>
+<a name="l03069"></a>03069                 mSrc1++;
+<a name="l03070"></a>03070                 mDest++;
+<a name="l03071"></a>03071         }
+<a name="l03072"></a>03072         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l03073"></a>03073 <span class="preprocessor">#endif</span>
+<a name="l03074"></a>03074 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l03075"></a>03075 <span class="preprocessor">#else</span>
+<a name="l03076"></a>03076 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l03077"></a>03077 <span class="preprocessor">#endif</span>
+<a name="l03078"></a>03078 <span class="preprocessor"></span>}
+<a name="l03079"></a>03079 
+<a name="l03090"></a><a class="code" href="_s_d_l__image_filter_8h.html#ac32f1ea9acbee51c2db94224ef6f7fd2">03090</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a4561a73b249a26babc4c469ffbdae604" title="Filter using ShiftLeftByte: D = (S << N)">SDL_imageFilterShiftLeftByte</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</spa [...]
+<a name="l03091"></a>03091 {
+<a name="l03092"></a>03092         <span class="keyword">static</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE };
+<a name="l03093"></a>03093         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l03094"></a>03094         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
+<a name="l03095"></a>03095         <span class="keywordtype">int</span> result;
+<a name="l03096"></a>03096 
+<a name="l03097"></a>03097         <span class="comment">/* Validate input parameters */</span>
+<a name="l03098"></a>03098         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l03099"></a>03099                 <span class="keywordflow">return</span>(-1);
+<a name="l03100"></a>03100         <span class="keywordflow">if</span> (length == 0)
+<a name="l03101"></a>03101                 <span class="keywordflow">return</span>(0);
+<a name="l03102"></a>03102 
+<a name="l03103"></a>03103         <span class="keywordflow">if</span> (N > 8) {
+<a name="l03104"></a>03104                 <span class="keywordflow">return</span> (-1);
+<a name="l03105"></a>03105         }
+<a name="l03106"></a>03106 
+<a name="l03107"></a>03107         <span class="comment">/* Special case: N==0 */</span>
+<a name="l03108"></a>03108         <span class="keywordflow">if</span> (N == 0) {
+<a name="l03109"></a>03109                 memcpy(Src1, Dest, length);
+<a name="l03110"></a>03110                 <span class="keywordflow">return</span> (0); 
+<a name="l03111"></a>03111         }
+<a name="l03112"></a>03112 
+<a name="l03113"></a>03113         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l03114"></a>03114 
+<a name="l03115"></a>03115                 SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask);
+<a name="l03116"></a>03116 
+<a name="l03117"></a>03117                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l03118"></a>03118                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l03119"></a>03119                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l03120"></a>03120                         istart = length & 0xfffffff8;
+<a name="l03121"></a>03121                         cursrc1 = &Src1[istart];
+<a name="l03122"></a>03122                         curdest = &Dest[istart];
+<a name="l03123"></a>03123                 } <span class="keywordflow">else</span> {
+<a name="l03124"></a>03124                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l03125"></a>03125                         <span class="keywordflow">return</span> (0);
+<a name="l03126"></a>03126                 }
+<a name="l03127"></a>03127         } <span class="keywordflow">else</span> {
+<a name="l03128"></a>03128                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l03129"></a>03129                 istart = 0;
+<a name="l03130"></a>03130                 cursrc1 = Src1;
+<a name="l03131"></a>03131                 curdest = Dest;
+<a name="l03132"></a>03132         }
+<a name="l03133"></a>03133 
+<a name="l03134"></a>03134         <span class="comment">/* C routine to process image */</span>
+<a name="l03135"></a>03135         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l03136"></a>03136                 result = ((int) *cursrc1 << N) & 0xff;
+<a name="l03137"></a>03137                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l03138"></a>03138                 <span class="comment">/* Advance pointers */</span>
+<a name="l03139"></a>03139                 cursrc1++;
+<a name="l03140"></a>03140                 curdest++;
+<a name="l03141"></a>03141         }
+<a name="l03142"></a>03142 
+<a name="l03143"></a>03143         <span class="keywordflow">return</span> (0);
+<a name="l03144"></a>03144 }
+<a name="l03145"></a>03145 
+<a name="l03156"></a>03156 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterShiftLeftUintMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> N)
+<a name="l03157"></a>03157 {
+<a name="l03158"></a>03158 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l03159"></a>03159 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l03160"></a>03160 <span class="preprocessor"></span>        __asm
+<a name="l03161"></a>03161         {
+<a name="l03162"></a>03162                 pusha
+<a name="l03163"></a>03163                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l03164"></a>03164                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l03165"></a>03165                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l03166"></a>03166                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l03167"></a>03167                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l03168"></a>03168 L12023:
+<a name="l03169"></a>03169                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
+<a name="l03170"></a>03170                 pslld mm0, N    <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
+<a name="l03171"></a>03171                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
+<a name="l03172"></a>03172                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l03173"></a>03173                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l03174"></a>03174                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l03175"></a>03175                         jnz             L12023          <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03176"></a>03176                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l03177"></a>03177                         popa
+<a name="l03178"></a>03178         }
+<a name="l03179"></a>03179 <span class="preprocessor">#else</span>
+<a name="l03180"></a>03180 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l03181"></a>03181         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l03182"></a>03182         __m64 *mDest = (__m64*)Dest;
+<a name="l03183"></a>03183         <span class="keywordtype">int</span> i;
+<a name="l03184"></a>03184         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l03185"></a>03185                 *mDest = _m_pslldi(*mSrc1, N);  <span class="comment">/* Src1+C (add 8 bytes with saturation) */</span>
+<a name="l03186"></a>03186                 mSrc1++;
+<a name="l03187"></a>03187                 mDest++;
+<a name="l03188"></a>03188         }
+<a name="l03189"></a>03189         _m_empty();                             <span class="comment">/* clean MMX state */</span>
+<a name="l03190"></a>03190 <span class="preprocessor">#endif</span>
+<a name="l03191"></a>03191 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l03192"></a>03192 <span class="preprocessor">#else</span>
+<a name="l03193"></a>03193 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l03194"></a>03194 <span class="preprocessor">#endif</span>
+<a name="l03195"></a>03195 <span class="preprocessor"></span>}
+<a name="l03196"></a>03196 
+<a name="l03207"></a><a class="code" href="_s_d_l__image_filter_8h.html#a4fd6d4a9711c13163496587454d9f1a2">03207</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a250e796fb2db470da0a78b74b78114e8" title="Filter using ShiftLeftUint: D = ((uint)S << N)">SDL_imageFilterShiftLeftUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">cha [...]
+<a name="l03208"></a>03208 {
+<a name="l03209"></a>03209         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l03210"></a>03210         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
+<a name="l03211"></a>03211         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *icursrc1, *icurdest;
+<a name="l03212"></a>03212         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> result;
+<a name="l03213"></a>03213 
+<a name="l03214"></a>03214         <span class="comment">/* Validate input parameters */</span>
+<a name="l03215"></a>03215         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l03216"></a>03216                 <span class="keywordflow">return</span>(-1);
+<a name="l03217"></a>03217         <span class="keywordflow">if</span> (length == 0)
+<a name="l03218"></a>03218                 <span class="keywordflow">return</span>(0);
+<a name="l03219"></a>03219 
+<a name="l03220"></a>03220         <span class="keywordflow">if</span> (N > 32) {
+<a name="l03221"></a>03221                 <span class="keywordflow">return</span> (-1);
+<a name="l03222"></a>03222         }
+<a name="l03223"></a>03223 
+<a name="l03224"></a>03224         <span class="comment">/* Special case: N==0 */</span>
+<a name="l03225"></a>03225         <span class="keywordflow">if</span> (N == 0) {
+<a name="l03226"></a>03226                 memcpy(Src1, Dest, length);
+<a name="l03227"></a>03227                 <span class="keywordflow">return</span> (0); 
+<a name="l03228"></a>03228         }
+<a name="l03229"></a>03229 
+<a name="l03230"></a>03230         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l03231"></a>03231 
+<a name="l03232"></a>03232                 SDL_imageFilterShiftLeftUintMMX(Src1, Dest, length, N);
+<a name="l03233"></a>03233 
+<a name="l03234"></a>03234                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l03235"></a>03235                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l03236"></a>03236                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l03237"></a>03237                         istart = length & 0xfffffff8;
+<a name="l03238"></a>03238                         cursrc1 = &Src1[istart];
+<a name="l03239"></a>03239                         curdest = &Dest[istart];
+<a name="l03240"></a>03240                 } <span class="keywordflow">else</span> {
+<a name="l03241"></a>03241                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l03242"></a>03242                         <span class="keywordflow">return</span> (0);
+<a name="l03243"></a>03243                 }
+<a name="l03244"></a>03244         } <span class="keywordflow">else</span> {
+<a name="l03245"></a>03245                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l03246"></a>03246                 istart = 0;
+<a name="l03247"></a>03247                 cursrc1 = Src1;
+<a name="l03248"></a>03248                 curdest = Dest;
+<a name="l03249"></a>03249         }
+<a name="l03250"></a>03250 
+<a name="l03251"></a>03251         <span class="comment">/* C routine to process image */</span>
+<a name="l03252"></a>03252         icursrc1=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)cursrc1;
+<a name="l03253"></a>03253         icurdest=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)curdest;
+<a name="l03254"></a>03254         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
+<a name="l03255"></a>03255                 <span class="keywordflow">if</span> ((i+4)<length) {
+<a name="l03256"></a>03256                         result = ((<span class="keywordtype">unsigned</span> int)*icursrc1 << N);
+<a name="l03257"></a>03257                         *icurdest = result;
+<a name="l03258"></a>03258                 }
+<a name="l03259"></a>03259                 <span class="comment">/* Advance pointers */</span>
+<a name="l03260"></a>03260                 icursrc1++;
+<a name="l03261"></a>03261                 icurdest++;
+<a name="l03262"></a>03262         }
 <a name="l03263"></a>03263 
-<a name="l03264"></a>03264                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l03265"></a>03265                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l03266"></a>03266                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l03267"></a>03267                         istart = length & 0xfffffff8;
-<a name="l03268"></a>03268                         cursrc1 = &Src1[istart];
-<a name="l03269"></a>03269                         curdest = &Dest[istart];
-<a name="l03270"></a>03270                 } <span class="keywordflow">else</span> {
-<a name="l03271"></a>03271                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l03272"></a>03272                         <span class="keywordflow">return</span> (0);
-<a name="l03273"></a>03273                 }
-<a name="l03274"></a>03274         } <span class="keywordflow">else</span> {
-<a name="l03275"></a>03275                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l03276"></a>03276                 istart = 0;
-<a name="l03277"></a>03277                 cursrc1 = Src1;
-<a name="l03278"></a>03278                 curdest = Dest;
-<a name="l03279"></a>03279         }
-<a name="l03280"></a>03280 
-<a name="l03281"></a>03281         <span class="comment">/* C routine to process image */</span>
-<a name="l03282"></a>03282         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l03283"></a>03283                 result = ((int) *cursrc1 << N) & 0xff;
-<a name="l03284"></a>03284                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l03285"></a>03285                 <span class="comment">/* Advance pointers */</span>
-<a name="l03286"></a>03286                 cursrc1++;
-<a name="l03287"></a>03287                 curdest++;
-<a name="l03288"></a>03288         }
-<a name="l03289"></a>03289 
-<a name="l03290"></a>03290         <span class="keywordflow">return</span> (0);
-<a name="l03291"></a>03291 }
-<a name="l03292"></a>03292 
-<a name="l03303"></a><a class="code" href="_s_d_l__image_filter_8c.html#a4a4260369d38e7bbcd9e3690bf57b8d4">03303</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a4a4260369d38e7bbcd9e3690bf57b8d4" title="Internal MMX Filter using ShiftLeftUint: D = ((uint)S << N)">SDL_imageFilterShiftLeftUintMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class=" [...]
-<a name="l03304"></a>03304 {
-<a name="l03305"></a>03305 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l03306"></a>03306 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l03307"></a>03307 <span class="preprocessor"></span>        __asm
-<a name="l03308"></a>03308         {
-<a name="l03309"></a>03309                 pusha
-<a name="l03310"></a>03310                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03311"></a>03311                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l03312"></a>03312                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03313"></a>03313                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03314"></a>03314                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03315"></a>03315 L12023:
-<a name="l03316"></a>03316                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l03317"></a>03317                 pslld mm0, N    <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l03318"></a>03318                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
-<a name="l03319"></a>03319                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03320"></a>03320                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03321"></a>03321                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l03322"></a>03322                         jnz             L12023          <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03323"></a>03323                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l03324"></a>03324                         popa
-<a name="l03325"></a>03325         }
-<a name="l03326"></a>03326 <span class="preprocessor">#else</span>
-<a name="l03327"></a>03327 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l03328"></a>03328                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l03329"></a>03329                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03330"></a>03330                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l03331"></a>03331                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03332"></a>03332                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03333"></a>03333                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03334"></a>03334                 <span class="stringliteral">"1: movq (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l03335"></a>03335                 <span class="stringliteral">"pslld   %3, %%mm0 \n\t"</span>        <span class="comment">/* MM0=SrcDest+C (add 8 bytes with saturation) */</span>
-<a name="l03336"></a>03336                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in SrcDest */</span>
-<a name="l03337"></a>03337                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03338"></a>03338                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03339"></a>03339                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l03340"></a>03340                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03341"></a>03341                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l03342"></a>03342                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l03343"></a>03343                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l03344"></a>03344                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l03345"></a>03345                 <span class="stringliteral">"m"</span>(N)                  <span class="comment">/* %3 */</span>
-<a name="l03346"></a>03346                 );
-<a name="l03347"></a>03347 <span class="preprocessor">#endif</span>
-<a name="l03348"></a>03348 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l03349"></a>03349 <span class="preprocessor">#else</span>
-<a name="l03350"></a>03350 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l03351"></a>03351 <span class="preprocessor">#endif</span>
-<a name="l03352"></a>03352 <span class="preprocessor"></span>}
-<a name="l03353"></a>03353 
-<a name="l03364"></a><a class="code" href="_s_d_l__image_filter_8h.html#a4fd6d4a9711c13163496587454d9f1a2">03364</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a250e796fb2db470da0a78b74b78114e8" title="Filter using ShiftLeftUint: D = ((uint)S << N)">SDL_imageFilterShiftLeftUint</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">cha [...]
-<a name="l03365"></a>03365 {
-<a name="l03366"></a>03366         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l03367"></a>03367         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
-<a name="l03368"></a>03368         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *icursrc1, *icurdest;
-<a name="l03369"></a>03369         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> result;
-<a name="l03370"></a>03370 
-<a name="l03371"></a>03371         <span class="comment">/* Validate input parameters */</span>
-<a name="l03372"></a>03372         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l03373"></a>03373                 <span class="keywordflow">return</span>(-1);
-<a name="l03374"></a>03374         <span class="keywordflow">if</span> (length == 0)
-<a name="l03375"></a>03375                 <span class="keywordflow">return</span>(0);
-<a name="l03376"></a>03376 
-<a name="l03377"></a>03377         <span class="keywordflow">if</span> (N > 32) {
-<a name="l03378"></a>03378                 <span class="keywordflow">return</span> (-1);
-<a name="l03379"></a>03379         }
-<a name="l03380"></a>03380 
-<a name="l03381"></a>03381         <span class="comment">/* Special case: N==0 */</span>
-<a name="l03382"></a>03382         <span class="keywordflow">if</span> (N == 0) {
-<a name="l03383"></a>03383                 memcpy(Src1, Dest, length);
-<a name="l03384"></a>03384                 <span class="keywordflow">return</span> (0); 
-<a name="l03385"></a>03385         }
-<a name="l03386"></a>03386 
-<a name="l03387"></a>03387         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l03388"></a>03388 
-<a name="l03389"></a>03389                 <a class="code" href="_s_d_l__image_filter_8c.html#a4a4260369d38e7bbcd9e3690bf57b8d4" title="Internal MMX Filter using ShiftLeftUint: D = ((uint)S << N)">SDL_imageFilterShiftLeftUintMMX</a>(Src1, Dest, length, N);
-<a name="l03390"></a>03390 
-<a name="l03391"></a>03391                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l03392"></a>03392                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l03393"></a>03393                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l03394"></a>03394                         istart = length & 0xfffffff8;
-<a name="l03395"></a>03395                         cursrc1 = &Src1[istart];
-<a name="l03396"></a>03396                         curdest = &Dest[istart];
-<a name="l03397"></a>03397                 } <span class="keywordflow">else</span> {
-<a name="l03398"></a>03398                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l03399"></a>03399                         <span class="keywordflow">return</span> (0);
-<a name="l03400"></a>03400                 }
-<a name="l03401"></a>03401         } <span class="keywordflow">else</span> {
-<a name="l03402"></a>03402                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l03403"></a>03403                 istart = 0;
-<a name="l03404"></a>03404                 cursrc1 = Src1;
-<a name="l03405"></a>03405                 curdest = Dest;
-<a name="l03406"></a>03406         }
-<a name="l03407"></a>03407 
-<a name="l03408"></a>03408         <span class="comment">/* C routine to process image */</span>
-<a name="l03409"></a>03409         icursrc1=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)cursrc1;
-<a name="l03410"></a>03410         icurdest=(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> *)curdest;
-<a name="l03411"></a>03411         <span class="keywordflow">for</span> (i = istart; i < length; i += 4) {
-<a name="l03412"></a>03412                 <span class="keywordflow">if</span> ((i+4)<length) {
-<a name="l03413"></a>03413                         result = ((<span class="keywordtype">unsigned</span> int)*icursrc1 << N);
-<a name="l03414"></a>03414                         *icurdest = result;
-<a name="l03415"></a>03415                 }
-<a name="l03416"></a>03416                 <span class="comment">/* Advance pointers */</span>
-<a name="l03417"></a>03417                 icursrc1++;
-<a name="l03418"></a>03418                 icurdest++;
-<a name="l03419"></a>03419         }
-<a name="l03420"></a>03420 
-<a name="l03421"></a>03421         <span class="keywordflow">return</span> (0);
-<a name="l03422"></a>03422 }
-<a name="l03423"></a>03423 
-<a name="l03434"></a><a class="code" href="_s_d_l__image_filter_8c.html#a3ea84aa8cf313790dc7468f2f4f29497">03434</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a3ea84aa8cf313790dc7468f2f4f29497" title="Internal MMX Filter ShiftLeft: D = saturation255(S << N)">SDL_imageFilterShiftLeftMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keyword [...]
-<a name="l03435"></a>03435 {
-<a name="l03436"></a>03436 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l03437"></a>03437 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l03438"></a>03438 <span class="preprocessor"></span>        __asm
-<a name="l03439"></a>03439         {
-<a name="l03440"></a>03440                 pusha
-<a name="l03441"></a>03441                         xor eax, eax    <span class="comment">/* zero EAX */</span>
-<a name="l03442"></a>03442                         mov al, N       <span class="comment">/* load N into AL */</span>
-<a name="l03443"></a>03443                         movd mm7, eax           <span class="comment">/* copy N into MM7 */</span>
-<a name="l03444"></a>03444                         pxor mm0, mm0           <span class="comment">/* zero MM0 register */</span>
-<a name="l03445"></a>03445                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03446"></a>03446                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l03447"></a>03447                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03448"></a>03448                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03449"></a>03449                         cmp al, 7       <span class="comment">/* if (N <= 7) execute more efficient code */</span>
-<a name="l03450"></a>03450                         jg             L10281
-<a name="l03451"></a>03451                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03452"></a>03452 L10280:
-<a name="l03453"></a>03453                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l03454"></a>03454                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l03455"></a>03455                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l03456"></a>03456                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l03457"></a>03457                         psllw mm3, mm7          <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
-<a name="l03458"></a>03458                         psllw mm4, mm7          <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
-<a name="l03459"></a>03459                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l03460"></a>03460                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
-<a name="l03461"></a>03461                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03462"></a>03462                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03463"></a>03463                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l03464"></a>03464                         jnz            L10280           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03465"></a>03465                         jmp            L10282
-<a name="l03466"></a>03466                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03467"></a>03467 L10281:
-<a name="l03468"></a>03468                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l03469"></a>03469                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l03470"></a>03470                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l03471"></a>03471                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l03472"></a>03472                         psllw mm3, mm7          <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
-<a name="l03473"></a>03473                         psllw mm4, mm7          <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
-<a name="l03474"></a>03474                         <span class="comment">/* ** Take abs value of the signed words ** */</span>
-<a name="l03475"></a>03475                         movq mm5, mm3           <span class="comment">/* copy mm3 into mm5 */</span>
-<a name="l03476"></a>03476                         movq mm6, mm4           <span class="comment">/* copy mm4 into mm6 */</span>
-<a name="l03477"></a>03477                         psraw mm5, 15           <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l03478"></a>03478                         psraw mm6, 15           <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l03479"></a>03479                         pxor mm3, mm5           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l03480"></a>03480                         pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l03481"></a>03481                         psubsw mm3, mm5         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l03482"></a>03482                         psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l03483"></a>03483                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l03484"></a>03484                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
-<a name="l03485"></a>03485                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03486"></a>03486                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03487"></a>03487                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l03488"></a>03488                         jnz            L10281           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03489"></a>03489 L10282:
-<a name="l03490"></a>03490                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l03264"></a>03264         <span class="keywordflow">return</span> (0);
+<a name="l03265"></a>03265 }
+<a name="l03266"></a>03266 
+<a name="l03277"></a>03277 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterShiftLeftMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> N)
+<a name="l03278"></a>03278 {
+<a name="l03279"></a>03279 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l03280"></a>03280 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l03281"></a>03281 <span class="preprocessor"></span>        __asm
+<a name="l03282"></a>03282         {
+<a name="l03283"></a>03283                 pusha
+<a name="l03284"></a>03284                         xor eax, eax    <span class="comment">/* zero EAX */</span>
+<a name="l03285"></a>03285                         mov al, N       <span class="comment">/* load N into AL */</span>
+<a name="l03286"></a>03286                         movd mm7, eax           <span class="comment">/* copy N into MM7 */</span>
+<a name="l03287"></a>03287                         pxor mm0, mm0           <span class="comment">/* zero MM0 register */</span>
+<a name="l03288"></a>03288                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l03289"></a>03289                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l03290"></a>03290                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l03291"></a>03291                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l03292"></a>03292                         cmp al, 7       <span class="comment">/* if (N <= 7) execute more efficient code */</span>
+<a name="l03293"></a>03293                         jg             L10281
+<a name="l03294"></a>03294                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l03295"></a>03295 L10280:
+<a name="l03296"></a>03296                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
+<a name="l03297"></a>03297                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
+<a name="l03298"></a>03298                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
+<a name="l03299"></a>03299                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
+<a name="l03300"></a>03300                         psllw mm3, mm7          <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the left */</span>
+<a name="l03301"></a>03301                         psllw mm4, mm7          <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the left */</span>
+<a name="l03302"></a>03302                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l03303"></a>03303                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
+<a name="l03304"></a>03304                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l03305"></a>03305                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l03306"></a>03306                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l03307"></a>03307                         jnz            L10280           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03308"></a>03308                         jmp            L10282
+<a name="l03309"></a>03309                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l03310"></a>03310 L10281:
+<a name="l03311"></a>03311                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
+<a name="l03312"></a>03312                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
+<a name="l03313"></a>03313                         punpcklbw mm3, mm0      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
+<a name="l03314"></a>03314                         punpckhbw mm4, mm0      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
+<a name="l03315"></a>03315                         psllw mm3, mm7          <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the left */</span>
+<a name="l03316"></a>03316                         psllw mm4, mm7          <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the left */</span>
+<a name="l03317"></a>03317                         <span class="comment">/* ** Take abs value of the signed words ** */</span>
+<a name="l03318"></a>03318                         movq mm5, mm3           <span class="comment">/* copy mm3 into mm5 */</span>
+<a name="l03319"></a>03319                         movq mm6, mm4           <span class="comment">/* copy mm4 into mm6 */</span>
+<a name="l03320"></a>03320                         psraw mm5, 15           <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l03321"></a>03321                         psraw mm6, 15           <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l03322"></a>03322                         pxor mm3, mm5           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l03323"></a>03323                         pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l03324"></a>03324                         psubsw mm3, mm5         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l03325"></a>03325                         psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l03326"></a>03326                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l03327"></a>03327                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
+<a name="l03328"></a>03328                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l03329"></a>03329                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l03330"></a>03330                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l03331"></a>03331                         jnz            L10281           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03332"></a>03332 L10282:
+<a name="l03333"></a>03333                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l03334"></a>03334                         popa
+<a name="l03335"></a>03335         }
+<a name="l03336"></a>03336 <span class="preprocessor">#else</span>
+<a name="l03337"></a>03337 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l03338"></a>03338         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l03339"></a>03339         __m64 *mDest = (__m64*)Dest;
+<a name="l03340"></a>03340         __m64 mm0 = _m_from_int(0);                             <span class="comment">/* zero mm0 register */</span>
+<a name="l03341"></a>03341         <span class="keywordtype">int</span> i;
+<a name="l03342"></a>03342         <span class="keywordflow">if</span> (N <= 7) {                                           <span class="comment">/* if (N <= 7) execute more efficient code */</span>
+<a name="l03343"></a>03343                 <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l03344"></a>03344                         __m64 mm3, mm4;
+<a name="l03345"></a>03345                         mm3 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l03346"></a>03346                         mm4 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l03347"></a>03347                         mm3 = _m_psllwi(mm3, N);                <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the left */</span>
+<a name="l03348"></a>03348                         mm4 = _m_psllwi(mm4, N);                <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the left */</span>
+<a name="l03349"></a>03349                         *mDest = _m_packuswb(mm3, mm4);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l03350"></a>03350                         mSrc1++;
+<a name="l03351"></a>03351                         mDest++;
+<a name="l03352"></a>03352                 }
+<a name="l03353"></a>03353         } <span class="keywordflow">else</span> {
+<a name="l03354"></a>03354                 <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l03355"></a>03355                         __m64 mm3, mm4, mm5, mm6;
+<a name="l03356"></a>03356                         mm3 = _m_punpcklbw(*mSrc1, mm0);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l03357"></a>03357                         mm4 = _m_punpckhbw(*mSrc1, mm0);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l03358"></a>03358                         mm3 = _m_psllwi(mm3, N);                <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the left */</span>
+<a name="l03359"></a>03359                         mm4 = _m_psllwi(mm4, N);                <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the left */</span>
+<a name="l03360"></a>03360                         <span class="comment">/* Take abs value of the signed words */</span>
+<a name="l03361"></a>03361                         mm5 = _m_psrawi(mm3, 15);               <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l03362"></a>03362                         mm6 = _m_psrawi(mm4, 15);               <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l03363"></a>03363                         mm3 = _m_pxor(mm3, mm5);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l03364"></a>03364                         mm4 = _m_pxor(mm4, mm6);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l03365"></a>03365                         mm3 = _m_psubsw(mm3, mm5);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l03366"></a>03366                         mm4 = _m_psubsw(mm4, mm6);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l03367"></a>03367                         *mDest = _m_packuswb(mm3, mm4);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l03368"></a>03368                         mSrc1++;
+<a name="l03369"></a>03369                         mDest++;
+<a name="l03370"></a>03370                 }
+<a name="l03371"></a>03371         }
+<a name="l03372"></a>03372         _m_empty();                                             <span class="comment">/* clean MMX state */</span>
+<a name="l03373"></a>03373 <span class="preprocessor">#endif</span>
+<a name="l03374"></a>03374 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l03375"></a>03375 <span class="preprocessor">#else</span>
+<a name="l03376"></a>03376 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l03377"></a>03377 <span class="preprocessor">#endif</span>
+<a name="l03378"></a>03378 <span class="preprocessor"></span>}
+<a name="l03379"></a>03379 
+<a name="l03390"></a><a class="code" href="_s_d_l__image_filter_8h.html#a084f9544f049cc01e7b2f1090534abbf">03390</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a98372fea76310903abef7808db10d226" title="Filter ShiftLeft: D = saturation255(S << N)">SDL_imageFilterShiftLeft</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span [...]
+<a name="l03391"></a>03391 {
+<a name="l03392"></a>03392         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l03393"></a>03393         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
+<a name="l03394"></a>03394         <span class="keywordtype">int</span> result;
+<a name="l03395"></a>03395 
+<a name="l03396"></a>03396         <span class="comment">/* Validate input parameters */</span>
+<a name="l03397"></a>03397         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l03398"></a>03398                 <span class="keywordflow">return</span>(-1);
+<a name="l03399"></a>03399         <span class="keywordflow">if</span> (length == 0)
+<a name="l03400"></a>03400                 <span class="keywordflow">return</span>(0);
+<a name="l03401"></a>03401 
+<a name="l03402"></a>03402         <span class="keywordflow">if</span> (N > 8) {
+<a name="l03403"></a>03403                 <span class="keywordflow">return</span> (-1);
+<a name="l03404"></a>03404         }
+<a name="l03405"></a>03405 
+<a name="l03406"></a>03406         <span class="comment">/* Special case: N==0 */</span>
+<a name="l03407"></a>03407         <span class="keywordflow">if</span> (N == 0) {
+<a name="l03408"></a>03408                 memcpy(Src1, Dest, length);
+<a name="l03409"></a>03409                 <span class="keywordflow">return</span> (0); 
+<a name="l03410"></a>03410         }
+<a name="l03411"></a>03411 
+<a name="l03412"></a>03412         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l03413"></a>03413 
+<a name="l03414"></a>03414                 SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N);
+<a name="l03415"></a>03415 
+<a name="l03416"></a>03416                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l03417"></a>03417                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l03418"></a>03418                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l03419"></a>03419                         istart = length & 0xfffffff8;
+<a name="l03420"></a>03420                         cursrc1 = &Src1[istart];
+<a name="l03421"></a>03421                         curdest = &Dest[istart];
+<a name="l03422"></a>03422                 } <span class="keywordflow">else</span> {
+<a name="l03423"></a>03423                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l03424"></a>03424                         <span class="keywordflow">return</span> (0);
+<a name="l03425"></a>03425                 }
+<a name="l03426"></a>03426         } <span class="keywordflow">else</span> {
+<a name="l03427"></a>03427                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l03428"></a>03428                 istart = 0;
+<a name="l03429"></a>03429                 cursrc1 = Src1;
+<a name="l03430"></a>03430                 curdest = Dest;
+<a name="l03431"></a>03431         }
+<a name="l03432"></a>03432 
+<a name="l03433"></a>03433         <span class="comment">/* C routine to process image */</span>
+<a name="l03434"></a>03434         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l03435"></a>03435                 result = (int) *cursrc1 << N;
+<a name="l03436"></a>03436                 <span class="keywordflow">if</span> (result > 255)
+<a name="l03437"></a>03437                         result = 255;
+<a name="l03438"></a>03438                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l03439"></a>03439                 <span class="comment">/* Advance pointers */</span>
+<a name="l03440"></a>03440                 cursrc1++;
+<a name="l03441"></a>03441                 curdest++;
+<a name="l03442"></a>03442         }
+<a name="l03443"></a>03443 
+<a name="l03444"></a>03444         <span class="keywordflow">return</span> (0);
+<a name="l03445"></a>03445 }
+<a name="l03446"></a>03446 
+<a name="l03457"></a>03457 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterBinarizeUsingThresholdMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> T)
+<a name="l03458"></a>03458 {
+<a name="l03459"></a>03459 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l03460"></a>03460 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l03461"></a>03461 <span class="preprocessor"></span>        __asm
+<a name="l03462"></a>03462         {
+<a name="l03463"></a>03463                 pusha
+<a name="l03464"></a>03464                         <span class="comment">/* ** Duplicate T in 8 bytes of MM3 ** */</span>
+<a name="l03465"></a>03465                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l03466"></a>03466                         pcmpeqb mm2, mm2        <span class="comment">/* generate all 1's in mm2 */</span>
+<a name="l03467"></a>03467                         mov al, T       <span class="comment">/* load T into AL */</span>
+<a name="l03468"></a>03468                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
+<a name="l03469"></a>03469                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l03470"></a>03470                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l03471"></a>03471                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l03472"></a>03472                         movd mm3, eax           <span class="comment">/* copy EAX into MM3 */</span>
+<a name="l03473"></a>03473                         movd mm4, eax           <span class="comment">/* copy EAX into MM4 */</span>
+<a name="l03474"></a>03474                         punpckldq mm3, mm4      <span class="comment">/* fill higher bytes of MM3 with T */</span>
+<a name="l03475"></a>03475                         psubusb mm2, mm3        <span class="comment">/* store 0xFF - T in MM2 */</span>
+<a name="l03476"></a>03476                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l03477"></a>03477                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l03478"></a>03478                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l03479"></a>03479                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l03480"></a>03480                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l03481"></a>03481 L1029:
+<a name="l03482"></a>03482                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
+<a name="l03483"></a>03483                 paddusb mm0, mm2        <span class="comment">/* MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) */</span>
+<a name="l03484"></a>03484                         pcmpeqb mm0, mm1        <span class="comment">/* binarize 255:0, comparing to 255 */</span>
+<a name="l03485"></a>03485                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
+<a name="l03486"></a>03486                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l03487"></a>03487                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l03488"></a>03488                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l03489"></a>03489                         jnz             L1029           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03490"></a>03490                         emms                            <span class="comment">/* exit MMX state */</span>
 <a name="l03491"></a>03491                         popa
 <a name="l03492"></a>03492         }
 <a name="l03493"></a>03493 <span class="preprocessor">#else</span>
-<a name="l03494"></a>03494 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l03495"></a>03495                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"xor       %%eax, %%eax \n\t"</span>        <span class="comment">/* zero EAX */</span>
-<a name="l03496"></a>03496                 <span class="stringliteral">"mov           %3, %%al \n\t"</span>   <span class="comment">/* load N into AL */</span>
-<a name="l03497"></a>03497                 <span class="stringliteral">"movd      %%eax, %%mm7 \n\t"</span>   <span class="comment">/* copy N into MM7 */</span>
-<a name="l03498"></a>03498                 <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>   <span class="comment">/* zero MM0 register */</span>
-<a name="l03499"></a>03499                 <span class="stringliteral">"mov         %1, %%eax  \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03500"></a>03500                 <span class="stringliteral">"mov         %0, %%edi  \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l03501"></a>03501                 <span class="stringliteral">"mov         %2, %%ecx  \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03502"></a>03502                 <span class="stringliteral">"shr         $3, %%ecx  \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03503"></a>03503                 <span class="stringliteral">"cmp           $7, %%al \n\t"</span>   <span class="comment">/* if (N <= 7) execute more efficient code */</span>
-<a name="l03504"></a>03504                 <span class="stringliteral">"jg                  2f \n\t"</span> <span class="stringliteral">".align 16              \n\t"</span>     <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03505"></a>03505                 <span class="stringliteral">"1: movq (%%eax), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l03506"></a>03506                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l03507"></a>03507                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l03508"></a>03508                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l03509"></a>03509                 <span class="stringliteral">"psllw     %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
-<a name="l03510"></a>03510                 <span class="stringliteral">"psllw     %%mm7, %%mm4 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
-<a name="l03511"></a>03511                 <span class="stringliteral">"packuswb  %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l03512"></a>03512                 <span class="stringliteral">"movq    %%mm3, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l03513"></a>03513                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03514"></a>03514                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03515"></a>03515                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l03516"></a>03516                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03517"></a>03517                 <span class="stringliteral">"jmp                 3f \n\t"</span> <span class="stringliteral">".align 16              \n\t"</span>     <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03518"></a>03518                 <span class="stringliteral">"2: movq (%%eax), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l03519"></a>03519                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l03520"></a>03520                 <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l03521"></a>03521                 <span class="stringliteral">"punpckhbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l03522"></a>03522                 <span class="stringliteral">"psllw     %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM3 (N) bits to the right */</span>
-<a name="l03523"></a>03523                 <span class="stringliteral">"psllw     %%mm7, %%mm4 \n\t"</span>   <span class="comment">/* shift 4 WORDS of MM4 (N) bits to the right */</span>
-<a name="l03524"></a>03524                 <span class="comment">/* ** Take abs value of the signed words ** */</span>
-<a name="l03525"></a>03525                 <span class="stringliteral">"movq      %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* copy mm3 into mm5 */</span>
-<a name="l03526"></a>03526                 <span class="stringliteral">"movq      %%mm4, %%mm6 \n\t"</span>   <span class="comment">/* copy mm4 into mm6 */</span>
-<a name="l03527"></a>03527                 <span class="stringliteral">"psraw       $15, %%mm5 \n\t"</span>   <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l03528"></a>03528                 <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l03529"></a>03529                 <span class="stringliteral">"pxor      %%mm5, %%mm3 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l03530"></a>03530                 <span class="stringliteral">"pxor      %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l03531"></a>03531                 <span class="stringliteral">"psubsw    %%mm5, %%mm3 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l03532"></a>03532                 <span class="stringliteral">"psubsw    %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l03533"></a>03533                 <span class="stringliteral">"packuswb  %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l03534"></a>03534                 <span class="stringliteral">"movq    %%mm3, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l03535"></a>03535                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03536"></a>03536                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03537"></a>03537                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l03538"></a>03538                 <span class="stringliteral">"jnz                 2b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03539"></a>03539                 <span class="stringliteral">"3: emms                \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l03540"></a>03540                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l03541"></a>03541                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l03542"></a>03542                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l03543"></a>03543                 <span class="stringliteral">"m"</span>(N)                  <span class="comment">/* %3 */</span>
-<a name="l03544"></a>03544                 );
-<a name="l03545"></a>03545 <span class="preprocessor">#endif</span>
-<a name="l03546"></a>03546 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l03547"></a>03547 <span class="preprocessor">#else</span>
-<a name="l03548"></a>03548 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l03549"></a>03549 <span class="preprocessor">#endif</span>
-<a name="l03550"></a>03550 <span class="preprocessor"></span>}
-<a name="l03551"></a>03551 
-<a name="l03562"></a><a class="code" href="_s_d_l__image_filter_8h.html#a084f9544f049cc01e7b2f1090534abbf">03562</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a98372fea76310903abef7808db10d226" title="Filter ShiftLeft: D = saturation255(S << N)">SDL_imageFilterShiftLeft</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span [...]
-<a name="l03563"></a>03563 {
-<a name="l03564"></a>03564         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l03565"></a>03565         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1, *curdest;
-<a name="l03566"></a>03566         <span class="keywordtype">int</span> result;
-<a name="l03567"></a>03567 
-<a name="l03568"></a>03568         <span class="comment">/* Validate input parameters */</span>
-<a name="l03569"></a>03569         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l03570"></a>03570                 <span class="keywordflow">return</span>(-1);
-<a name="l03571"></a>03571         <span class="keywordflow">if</span> (length == 0)
-<a name="l03572"></a>03572                 <span class="keywordflow">return</span>(0);
-<a name="l03573"></a>03573 
-<a name="l03574"></a>03574         <span class="keywordflow">if</span> (N > 8) {
-<a name="l03575"></a>03575                 <span class="keywordflow">return</span> (-1);
+<a name="l03494"></a>03494 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l03495"></a>03495         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l03496"></a>03496         __m64 *mDest = (__m64*)Dest;
+<a name="l03497"></a>03497         <span class="comment">/* Duplicate T in 8 bytes of MM3 */</span>
+<a name="l03498"></a>03498         __m64 mm1 = _m_pcmpeqb(mm1, mm1);                       <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l03499"></a>03499         __m64 mm2 = _m_pcmpeqb(mm2, mm2);                       <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l03500"></a>03500         <span class="keywordtype">int</span> i;
+<a name="l03501"></a>03501         memset(&i, T, 4);
+<a name="l03502"></a>03502         __m64 mm3 = _m_from_int(i);
+<a name="l03503"></a>03503         __m64 mm4 = _m_from_int(i);
+<a name="l03504"></a>03504         mm3 = _m_punpckldq(mm3, mm4);                   <span class="comment">/* fill higher bytes of MM3 with T */</span>
+<a name="l03505"></a>03505         mm2 = _m_psubusb(mm2, mm3);                     <span class="comment">/* store 0xFF - T in MM2 */</span>
+<a name="l03506"></a>03506         <span class="comment">//__m64 mm3 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l03507"></a>03507         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l03508"></a>03508                 __m64 mm0 = _m_paddusb(*mSrc1, mm2);    <span class="comment">/* Src1+(0xFF-T) (add 8 bytes with saturation) */</span>
+<a name="l03509"></a>03509                 *mDest = _m_pcmpeqb(mm0, mm1);          <span class="comment">/* binarize 255:0, comparing to 255 */</span>
+<a name="l03510"></a>03510                 mSrc1++;
+<a name="l03511"></a>03511                 mDest++;
+<a name="l03512"></a>03512         }
+<a name="l03513"></a>03513         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l03514"></a>03514 <span class="preprocessor">#endif</span>
+<a name="l03515"></a>03515 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l03516"></a>03516 <span class="preprocessor">#else</span>
+<a name="l03517"></a>03517 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l03518"></a>03518 <span class="preprocessor">#endif</span>
+<a name="l03519"></a>03519 <span class="preprocessor"></span>}
+<a name="l03520"></a>03520 
+<a name="l03531"></a><a class="code" href="_s_d_l__image_filter_8h.html#ad5bf97d7e39d018d2eeb570e97edf8c0">03531</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a951a062e15df290a137428e1e0f4d5ce" title="Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0.">SDL_imageFilterBinarizeUsingThreshold</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class [...]
+<a name="l03532"></a>03532 {
+<a name="l03533"></a>03533         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l03534"></a>03534         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l03535"></a>03535         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l03536"></a>03536 
+<a name="l03537"></a>03537         <span class="comment">/* Validate input parameters */</span>
+<a name="l03538"></a>03538         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l03539"></a>03539                 <span class="keywordflow">return</span>(-1);
+<a name="l03540"></a>03540         <span class="keywordflow">if</span> (length == 0)
+<a name="l03541"></a>03541                 <span class="keywordflow">return</span>(0);
+<a name="l03542"></a>03542 
+<a name="l03543"></a>03543         <span class="comment">/* Special case: T==0 */</span>
+<a name="l03544"></a>03544         <span class="keywordflow">if</span> (T == 0) {
+<a name="l03545"></a>03545                 memset(Dest, 255, length);
+<a name="l03546"></a>03546                 <span class="keywordflow">return</span> (0); 
+<a name="l03547"></a>03547         }
+<a name="l03548"></a>03548 
+<a name="l03549"></a>03549         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l03550"></a>03550 
+<a name="l03551"></a>03551                 SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T);
+<a name="l03552"></a>03552 
+<a name="l03553"></a>03553                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l03554"></a>03554                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l03555"></a>03555                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l03556"></a>03556                         istart = length & 0xfffffff8;
+<a name="l03557"></a>03557                         cursrc1 = &Src1[istart];
+<a name="l03558"></a>03558                         curdest = &Dest[istart];
+<a name="l03559"></a>03559                 } <span class="keywordflow">else</span> {
+<a name="l03560"></a>03560                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l03561"></a>03561                         <span class="keywordflow">return</span> (0);
+<a name="l03562"></a>03562                 }
+<a name="l03563"></a>03563         } <span class="keywordflow">else</span> {
+<a name="l03564"></a>03564                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l03565"></a>03565                 istart = 0;
+<a name="l03566"></a>03566                 cursrc1 = Src1;
+<a name="l03567"></a>03567                 curdest = Dest;
+<a name="l03568"></a>03568         }
+<a name="l03569"></a>03569 
+<a name="l03570"></a>03570         <span class="comment">/* C routine to process image */</span>
+<a name="l03571"></a>03571         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l03572"></a>03572                 *curdest = (<span class="keywordtype">unsigned</span> char)(((<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>)*cursrc1 >= T) ? 255 : 0);
+<a name="l03573"></a>03573                 <span class="comment">/* Advance pointers */</span>
+<a name="l03574"></a>03574                 cursrc1++;
+<a name="l03575"></a>03575                 curdest++;
 <a name="l03576"></a>03576         }
 <a name="l03577"></a>03577 
-<a name="l03578"></a>03578         <span class="comment">/* Special case: N==0 */</span>
-<a name="l03579"></a>03579         <span class="keywordflow">if</span> (N == 0) {
-<a name="l03580"></a>03580                 memcpy(Src1, Dest, length);
-<a name="l03581"></a>03581                 <span class="keywordflow">return</span> (0); 
-<a name="l03582"></a>03582         }
-<a name="l03583"></a>03583 
-<a name="l03584"></a>03584         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l03585"></a>03585 
-<a name="l03586"></a>03586                 <a class="code" href="_s_d_l__image_filter_8c.html#a3ea84aa8cf313790dc7468f2f4f29497" title="Internal MMX Filter ShiftLeft: D = saturation255(S << N)">SDL_imageFilterShiftLeftMMX</a>(Src1, Dest, length, N);
-<a name="l03587"></a>03587 
-<a name="l03588"></a>03588                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l03589"></a>03589                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l03590"></a>03590                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l03591"></a>03591                         istart = length & 0xfffffff8;
-<a name="l03592"></a>03592                         cursrc1 = &Src1[istart];
-<a name="l03593"></a>03593                         curdest = &Dest[istart];
-<a name="l03594"></a>03594                 } <span class="keywordflow">else</span> {
-<a name="l03595"></a>03595                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l03596"></a>03596                         <span class="keywordflow">return</span> (0);
-<a name="l03597"></a>03597                 }
-<a name="l03598"></a>03598         } <span class="keywordflow">else</span> {
-<a name="l03599"></a>03599                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l03600"></a>03600                 istart = 0;
-<a name="l03601"></a>03601                 cursrc1 = Src1;
-<a name="l03602"></a>03602                 curdest = Dest;
-<a name="l03603"></a>03603         }
-<a name="l03604"></a>03604 
-<a name="l03605"></a>03605         <span class="comment">/* C routine to process image */</span>
-<a name="l03606"></a>03606         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l03607"></a>03607                 result = (int) *cursrc1 << N;
-<a name="l03608"></a>03608                 <span class="keywordflow">if</span> (result > 255)
-<a name="l03609"></a>03609                         result = 255;
-<a name="l03610"></a>03610                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l03611"></a>03611                 <span class="comment">/* Advance pointers */</span>
-<a name="l03612"></a>03612                 cursrc1++;
-<a name="l03613"></a>03613                 curdest++;
-<a name="l03614"></a>03614         }
-<a name="l03615"></a>03615 
-<a name="l03616"></a>03616         <span class="keywordflow">return</span> (0);
-<a name="l03617"></a>03617 }
-<a name="l03618"></a>03618 
-<a name="l03629"></a><a class="code" href="_s_d_l__image_filter_8c.html#a6f06923cb26d510ad72d4b1dd6583284">03629</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a6f06923cb26d510ad72d4b1dd6583284" title="MMX BinarizeUsingThreshold: D = (S >= T) ? 255:0.">SDL_imageFilterBinarizeUsingThresholdMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keyw [...]
-<a name="l03630"></a>03630 {
-<a name="l03631"></a>03631 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l03632"></a>03632 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l03633"></a>03633 <span class="preprocessor"></span>        __asm
-<a name="l03634"></a>03634         {
-<a name="l03635"></a>03635                 pusha
-<a name="l03636"></a>03636                         <span class="comment">/* ** Duplicate T in 8 bytes of MM3 ** */</span>
-<a name="l03637"></a>03637                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l03638"></a>03638                         pcmpeqb mm2, mm2        <span class="comment">/* generate all 1's in mm2 */</span>
-<a name="l03639"></a>03639                         mov al, T       <span class="comment">/* load T into AL */</span>
-<a name="l03640"></a>03640                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
-<a name="l03641"></a>03641                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l03642"></a>03642                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03643"></a>03643                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l03644"></a>03644                         movd mm3, eax           <span class="comment">/* copy EAX into MM3 */</span>
-<a name="l03645"></a>03645                         movd mm4, eax           <span class="comment">/* copy EAX into MM4 */</span>
-<a name="l03646"></a>03646                         punpckldq mm3, mm4      <span class="comment">/* fill higher bytes of MM3 with T */</span>
-<a name="l03647"></a>03647                         psubusb mm2, mm3        <span class="comment">/* store 0xFF - T in MM2 */</span>
-<a name="l03648"></a>03648                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03649"></a>03649                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l03650"></a>03650                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03651"></a>03651                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03652"></a>03652                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03653"></a>03653 L1029:
-<a name="l03654"></a>03654                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l03655"></a>03655                 paddusb mm0, mm2        <span class="comment">/* MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) */</span>
-<a name="l03656"></a>03656                         pcmpeqb mm0, mm1        <span class="comment">/* binarize 255:0, comparing to 255 */</span>
-<a name="l03657"></a>03657                         movq [edi], mm0         <span class="comment">/* store result in SrcDest */</span>
-<a name="l03658"></a>03658                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03659"></a>03659                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03660"></a>03660                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l03661"></a>03661                         jnz             L1029           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03662"></a>03662                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l03663"></a>03663                         popa
-<a name="l03664"></a>03664         }
-<a name="l03665"></a>03665 <span class="preprocessor">#else</span>
-<a name="l03666"></a>03666 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l03667"></a>03667                 (<span class="stringliteral">"pusha              \n\t"</span>
-<a name="l03668"></a>03668                 <span class="comment">/* ** Duplicate T in 8 bytes of MM3 ** */</span>
-<a name="l03669"></a>03669                 <span class="stringliteral">"pcmpeqb   %%mm1, %%mm1 \n\t"</span>   <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l03670"></a>03670                 <span class="stringliteral">"pcmpeqb   %%mm2, %%mm2 \n\t"</span>   <span class="comment">/* generate all 1's in mm2 */</span>
-<a name="l03671"></a>03671                 <span class="stringliteral">"mov           %3, %%al \n\t"</span>   <span class="comment">/* load T into AL */</span>
-<a name="l03672"></a>03672                 <span class="stringliteral">"mov         %%al, %%ah \n\t"</span>   <span class="comment">/* copy AL into AH */</span>
-<a name="l03673"></a>03673                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l03674"></a>03674                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03675"></a>03675                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l03676"></a>03676                 <span class="stringliteral">"movd      %%eax, %%mm3 \n\t"</span>   <span class="comment">/* copy EAX into MM3 */</span>
-<a name="l03677"></a>03677                 <span class="stringliteral">"movd      %%eax, %%mm4 \n\t"</span>   <span class="comment">/* copy EAX into MM4 */</span>
-<a name="l03678"></a>03678                 <span class="stringliteral">"punpckldq %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* fill higher bytes of MM3 with T */</span>
-<a name="l03679"></a>03679                 <span class="stringliteral">"psubusb   %%mm3, %%mm2 \n\t"</span>   <span class="comment">/* store 0xFF - T in MM2 */</span>
-<a name="l03680"></a>03680                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03681"></a>03681                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l03682"></a>03682                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03683"></a>03683                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03684"></a>03684                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03685"></a>03685                 <span class="stringliteral">"1:                     \n\t"</span> 
-<a name="l03686"></a>03686                 <span class="stringliteral">"movq    (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from SrcDest into MM0 */</span>
-<a name="l03687"></a>03687                 <span class="stringliteral">"paddusb   %%mm2, %%mm0 \n\t"</span>   <span class="comment">/* MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) */</span>
-<a name="l03688"></a>03688                 <span class="stringliteral">"pcmpeqb   %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* binarize 255:0, comparing to 255 */</span>
-<a name="l03689"></a>03689                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in SrcDest */</span>
-<a name="l03690"></a>03690                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03691"></a>03691                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03692"></a>03692                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l03693"></a>03693                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03694"></a>03694                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l03695"></a>03695                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l03696"></a>03696                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l03697"></a>03697                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l03698"></a>03698                 <span class="stringliteral">"m"</span>(T)                  <span class="comment">/* %3 */</span>
-<a name="l03699"></a>03699                 );
-<a name="l03700"></a>03700 <span class="preprocessor">#endif</span>
-<a name="l03701"></a>03701 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l03702"></a>03702 <span class="preprocessor">#else</span>
-<a name="l03703"></a>03703 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l03704"></a>03704 <span class="preprocessor">#endif</span>
-<a name="l03705"></a>03705 <span class="preprocessor"></span>}
+<a name="l03578"></a>03578         <span class="keywordflow">return</span> (0);
+<a name="l03579"></a>03579 }
+<a name="l03580"></a>03580 
+<a name="l03592"></a>03592 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterClipToRangeMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Tmin,
+<a name="l03593"></a>03593                                                                   <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Tmax)
+<a name="l03594"></a>03594 {
+<a name="l03595"></a>03595 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l03596"></a>03596 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l03597"></a>03597 <span class="preprocessor"></span>        __asm
+<a name="l03598"></a>03598         {
+<a name="l03599"></a>03599                 pusha
+<a name="l03600"></a>03600                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l03601"></a>03601                         <span class="comment">/* ** Duplicate Tmax in 8 bytes of MM3 ** */</span>
+<a name="l03602"></a>03602                         mov al, Tmax    <span class="comment">/* load Tmax into AL */</span>
+<a name="l03603"></a>03603                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
+<a name="l03604"></a>03604                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l03605"></a>03605                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l03606"></a>03606                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l03607"></a>03607                         movd mm3, eax           <span class="comment">/* copy EAX into MM3 */</span>
+<a name="l03608"></a>03608                         movd mm4, eax           <span class="comment">/* copy EAX into MM4 */</span>
+<a name="l03609"></a>03609                         punpckldq mm3, mm4      <span class="comment">/* fill higher bytes of MM3 with Tmax */</span>
+<a name="l03610"></a>03610                         psubusb mm1, mm3        <span class="comment">/* store 0xFF - Tmax in MM1 */</span>
+<a name="l03611"></a>03611                         <span class="comment">/* ** Duplicate Tmin in 8 bytes of MM5 ** */</span>
+<a name="l03612"></a>03612                         mov al, Tmin    <span class="comment">/* load Tmin into AL */</span>
+<a name="l03613"></a>03613                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
+<a name="l03614"></a>03614                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l03615"></a>03615                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l03616"></a>03616                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l03617"></a>03617                         movd mm5, eax           <span class="comment">/* copy EAX into MM5 */</span>
+<a name="l03618"></a>03618                         movd mm4, eax           <span class="comment">/* copy EAX into MM4 */</span>
+<a name="l03619"></a>03619                         punpckldq mm5, mm4      <span class="comment">/* fill higher bytes of MM5 with Tmin */</span>
+<a name="l03620"></a>03620                         movq mm7, mm5           <span class="comment">/* copy MM5 into MM7 */</span>
+<a name="l03621"></a>03621                         paddusb mm7, mm1        <span class="comment">/* store 0xFF - Tmax + Tmin in MM7 */</span>
+<a name="l03622"></a>03622                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l03623"></a>03623                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l03624"></a>03624                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l03625"></a>03625                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l03626"></a>03626                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l03627"></a>03627 L1030:
+<a name="l03628"></a>03628                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
+<a name="l03629"></a>03629                 paddusb mm0, mm1        <span class="comment">/* MM0=SrcDest+(0xFF-Tmax) */</span>
+<a name="l03630"></a>03630                         psubusb mm0, mm7        <span class="comment">/* MM0=MM0-(0xFF-Tmax+Tmin) */</span>
+<a name="l03631"></a>03631                         paddusb mm0, mm5        <span class="comment">/* MM0=MM0+Tmin */</span>
+<a name="l03632"></a>03632                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
+<a name="l03633"></a>03633                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l03634"></a>03634                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l03635"></a>03635                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l03636"></a>03636                         jnz             L1030           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03637"></a>03637                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l03638"></a>03638                         popa
+<a name="l03639"></a>03639         }
+<a name="l03640"></a>03640 <span class="preprocessor">#else</span>
+<a name="l03641"></a>03641 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l03642"></a>03642         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l03643"></a>03643         __m64 *mDest = (__m64*)Dest;
+<a name="l03644"></a>03644         __m64 mm1 = _m_pcmpeqb(mm1, mm1);       <span class="comment">/* generate all 1's in mm1 */</span>
+<a name="l03645"></a>03645         <span class="keywordtype">int</span> i;
+<a name="l03646"></a>03646         <span class="comment">/* Duplicate Tmax in 8 bytes of MM3 */</span>
+<a name="l03647"></a>03647         __m64 mm3, mm4;
+<a name="l03648"></a>03648         memset(&i, Tmax, 4);
+<a name="l03649"></a>03649         mm3 = _m_from_int(i);
+<a name="l03650"></a>03650         mm4 = _m_from_int(i);
+<a name="l03651"></a>03651         mm3 = _m_punpckldq(mm3, mm4);           <span class="comment">/* fill higher bytes of MM3 with Tmax */</span>
+<a name="l03652"></a>03652         mm1 = _m_psubusb(mm1, mm3);             <span class="comment">/* store 0xFF - Tmax in MM1 */</span>
+<a name="l03653"></a>03653         <span class="comment">//__m64 mm3 = _m_from_int64(lli); // x86_64 only</span>
+<a name="l03654"></a>03654         <span class="comment">/* Duplicate Tmax in 8 bytes of MM3 */</span>
+<a name="l03655"></a>03655         __m64 mm5, mm7;
+<a name="l03656"></a>03656         memset(&i, Tmin, 4);
+<a name="l03657"></a>03657         mm5 = _m_from_int(i);
+<a name="l03658"></a>03658         mm4 = _m_from_int(i);
+<a name="l03659"></a>03659         mm5 = _m_punpckldq(mm5, mm4);           <span class="comment">/* fill higher bytes of MM5 with Tmin */</span>
+<a name="l03660"></a>03660         mm7 = _m_paddusb(mm5, mm1);     <span class="comment">/* store 0xFF - Tmax + Tmin in MM7 */</span>
+<a name="l03661"></a>03661         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l03662"></a>03662                 __m64 mm0;
+<a name="l03663"></a>03663                 mm0 = _m_paddusb(*mSrc1, mm1);  <span class="comment">/* MM0=Src1+(0xFF-Tmax) */</span>
+<a name="l03664"></a>03664                 mm0 = _m_psubusb(mm0, mm7);     <span class="comment">/* MM0=MM0-(0xFF-Tmax+Tmin) */</span>
+<a name="l03665"></a>03665                 *mDest = _m_paddusb(mm0, mm5);  <span class="comment">/* MM0+Tmin */</span>
+<a name="l03666"></a>03666                 mSrc1++;
+<a name="l03667"></a>03667                 mDest++;
+<a name="l03668"></a>03668         }
+<a name="l03669"></a>03669         _m_empty();                             <span class="comment">/* clean MMX state */</span>
+<a name="l03670"></a>03670 <span class="preprocessor">#endif</span>
+<a name="l03671"></a>03671 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l03672"></a>03672 <span class="preprocessor">#else</span>
+<a name="l03673"></a>03673 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l03674"></a>03674 <span class="preprocessor">#endif</span>
+<a name="l03675"></a>03675 <span class="preprocessor"></span>}
+<a name="l03676"></a>03676 
+<a name="l03688"></a><a class="code" href="_s_d_l__image_filter_8h.html#ae9d552de9cf5a4a1716d91ee905eafd7">03688</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ab7224abc4ecc1b8a6f4441ef8379515f" title="Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax.">SDL_imageFilterClipToRange</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <sp [...]
+<a name="l03689"></a>03689                                                            <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Tmax)
+<a name="l03690"></a>03690 {
+<a name="l03691"></a>03691         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l03692"></a>03692         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
+<a name="l03693"></a>03693         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l03694"></a>03694 
+<a name="l03695"></a>03695         <span class="comment">/* Validate input parameters */</span>
+<a name="l03696"></a>03696         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
+<a name="l03697"></a>03697                 <span class="keywordflow">return</span>(-1);
+<a name="l03698"></a>03698         <span class="keywordflow">if</span> (length == 0)
+<a name="l03699"></a>03699                 <span class="keywordflow">return</span>(0);
+<a name="l03700"></a>03700 
+<a name="l03701"></a>03701         <span class="comment">/* Special case: Tmin==0 && Tmax = 255 */</span>
+<a name="l03702"></a>03702         <span class="keywordflow">if</span> ((Tmin == 0) && (Tmax == 25)) {
+<a name="l03703"></a>03703                 memcpy(Src1, Dest, length);
+<a name="l03704"></a>03704                 <span class="keywordflow">return</span> (0); 
+<a name="l03705"></a>03705         }
 <a name="l03706"></a>03706 
-<a name="l03717"></a><a class="code" href="_s_d_l__image_filter_8h.html#ad5bf97d7e39d018d2eeb570e97edf8c0">03717</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a951a062e15df290a137428e1e0f4d5ce" title="Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0.">SDL_imageFilterBinarizeUsingThreshold</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class [...]
-<a name="l03718"></a>03718 {
-<a name="l03719"></a>03719         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l03720"></a>03720         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l03721"></a>03721         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l03722"></a>03722 
-<a name="l03723"></a>03723         <span class="comment">/* Validate input parameters */</span>
-<a name="l03724"></a>03724         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l03725"></a>03725                 <span class="keywordflow">return</span>(-1);
-<a name="l03726"></a>03726         <span class="keywordflow">if</span> (length == 0)
-<a name="l03727"></a>03727                 <span class="keywordflow">return</span>(0);
-<a name="l03728"></a>03728 
-<a name="l03729"></a>03729         <span class="comment">/* Special case: T==0 */</span>
-<a name="l03730"></a>03730         <span class="keywordflow">if</span> (T == 0) {
-<a name="l03731"></a>03731                 memset(Dest, 255, length);
-<a name="l03732"></a>03732                 <span class="keywordflow">return</span> (0); 
-<a name="l03733"></a>03733         }
-<a name="l03734"></a>03734 
-<a name="l03735"></a>03735         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l03736"></a>03736 
-<a name="l03737"></a>03737                 <a class="code" href="_s_d_l__image_filter_8c.html#a6f06923cb26d510ad72d4b1dd6583284" title="MMX BinarizeUsingThreshold: D = (S >= T) ? 255:0.">SDL_imageFilterBinarizeUsingThresholdMMX</a>(Src1, Dest, length, T);
-<a name="l03738"></a>03738 
-<a name="l03739"></a>03739                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l03740"></a>03740                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l03741"></a>03741                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l03742"></a>03742                         istart = length & 0xfffffff8;
-<a name="l03743"></a>03743                         cursrc1 = &Src1[istart];
-<a name="l03744"></a>03744                         curdest = &Dest[istart];
-<a name="l03745"></a>03745                 } <span class="keywordflow">else</span> {
-<a name="l03746"></a>03746                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l03747"></a>03747                         <span class="keywordflow">return</span> (0);
-<a name="l03748"></a>03748                 }
-<a name="l03749"></a>03749         } <span class="keywordflow">else</span> {
-<a name="l03750"></a>03750                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l03751"></a>03751                 istart = 0;
-<a name="l03752"></a>03752                 cursrc1 = Src1;
-<a name="l03753"></a>03753                 curdest = Dest;
-<a name="l03754"></a>03754         }
-<a name="l03755"></a>03755 
-<a name="l03756"></a>03756         <span class="comment">/* C routine to process image */</span>
-<a name="l03757"></a>03757         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l03758"></a>03758                 *curdest = (<span class="keywordtype">unsigned</span> char)(((<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>)*cursrc1 >= T) ? 255 : 0);
-<a name="l03759"></a>03759                 <span class="comment">/* Advance pointers */</span>
-<a name="l03760"></a>03760                 cursrc1++;
-<a name="l03761"></a>03761                 curdest++;
-<a name="l03762"></a>03762         }
-<a name="l03763"></a>03763 
-<a name="l03764"></a>03764         <span class="keywordflow">return</span> (0);
-<a name="l03765"></a>03765 }
-<a name="l03766"></a>03766 
-<a name="l03778"></a><a class="code" href="_s_d_l__image_filter_8c.html#adc2b0f3e3a32724df1325a2121e9f96d">03778</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#adc2b0f3e3a32724df1325a2121e9f96d" title="Internal MMX Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax.">SDL_imageFilterClipToRangeMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">uns [...]
-<a name="l03779"></a>03779                                                                   <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Tmax)
-<a name="l03780"></a>03780 {
-<a name="l03781"></a>03781 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l03782"></a>03782 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l03783"></a>03783 <span class="preprocessor"></span>        __asm
-<a name="l03784"></a>03784         {
-<a name="l03785"></a>03785                 pusha
-<a name="l03786"></a>03786                         pcmpeqb mm1, mm1        <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l03787"></a>03787                         <span class="comment">/* ** Duplicate Tmax in 8 bytes of MM3 ** */</span>
-<a name="l03788"></a>03788                         mov al, Tmax    <span class="comment">/* load Tmax into AL */</span>
-<a name="l03789"></a>03789                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
-<a name="l03790"></a>03790                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l03791"></a>03791                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03792"></a>03792                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l03793"></a>03793                         movd mm3, eax           <span class="comment">/* copy EAX into MM3 */</span>
-<a name="l03794"></a>03794                         movd mm4, eax           <span class="comment">/* copy EAX into MM4 */</span>
-<a name="l03795"></a>03795                         punpckldq mm3, mm4      <span class="comment">/* fill higher bytes of MM3 with Tmax */</span>
-<a name="l03796"></a>03796                         psubusb mm1, mm3        <span class="comment">/* store 0xFF - Tmax in MM1 */</span>
-<a name="l03797"></a>03797                         <span class="comment">/* ** Duplicate Tmin in 8 bytes of MM5 ** */</span>
-<a name="l03798"></a>03798                         mov al, Tmin    <span class="comment">/* load Tmin into AL */</span>
-<a name="l03799"></a>03799                         mov ah, al      <span class="comment">/* copy AL into AH */</span>
-<a name="l03800"></a>03800                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l03801"></a>03801                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03802"></a>03802                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l03803"></a>03803                         movd mm5, eax           <span class="comment">/* copy EAX into MM5 */</span>
-<a name="l03804"></a>03804                         movd mm4, eax           <span class="comment">/* copy EAX into MM4 */</span>
-<a name="l03805"></a>03805                         punpckldq mm5, mm4      <span class="comment">/* fill higher bytes of MM5 with Tmin */</span>
-<a name="l03806"></a>03806                         movq mm7, mm5           <span class="comment">/* copy MM5 into MM7 */</span>
-<a name="l03807"></a>03807                         paddusb mm7, mm1        <span class="comment">/* store 0xFF - Tmax + Tmin in MM7 */</span>
-<a name="l03808"></a>03808                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03809"></a>03809                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l03810"></a>03810                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03811"></a>03811                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03812"></a>03812                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03813"></a>03813 L1030:
-<a name="l03814"></a>03814                 movq mm0, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
-<a name="l03815"></a>03815                 paddusb mm0, mm1        <span class="comment">/* MM0=SrcDest+(0xFF-Tmax) */</span>
-<a name="l03816"></a>03816                         psubusb mm0, mm7        <span class="comment">/* MM0=MM0-(0xFF-Tmax+Tmin) */</span>
-<a name="l03817"></a>03817                         paddusb mm0, mm5        <span class="comment">/* MM0=MM0+Tmin */</span>
-<a name="l03818"></a>03818                         movq [edi], mm0         <span class="comment">/* store result in Dest */</span>
-<a name="l03819"></a>03819                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03820"></a>03820                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03821"></a>03821                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l03822"></a>03822                         jnz             L1030           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03823"></a>03823                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l03824"></a>03824                         popa
-<a name="l03825"></a>03825         }
-<a name="l03826"></a>03826 <span class="preprocessor">#else</span>
-<a name="l03827"></a>03827 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l03828"></a>03828                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pcmpeqb   %%mm1, %%mm1 \n\t"</span>        <span class="comment">/* generate all 1's in mm1 */</span>
-<a name="l03829"></a>03829                 <span class="comment">/* ** Duplicate Tmax in 8 bytes of MM3 ** */</span>
-<a name="l03830"></a>03830                 <span class="stringliteral">"mov           %4, %%al \n\t"</span>   <span class="comment">/* load Tmax into AL */</span>
-<a name="l03831"></a>03831                 <span class="stringliteral">"mov         %%al, %%ah \n\t"</span>   <span class="comment">/* copy AL into AH */</span>
-<a name="l03832"></a>03832                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l03833"></a>03833                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03834"></a>03834                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l03835"></a>03835                 <span class="stringliteral">"movd      %%eax, %%mm3 \n\t"</span>   <span class="comment">/* copy EAX into MM3 */</span>
-<a name="l03836"></a>03836                 <span class="stringliteral">"movd      %%eax, %%mm4 \n\t"</span>   <span class="comment">/* copy EAX into MM4 */</span>
-<a name="l03837"></a>03837                 <span class="stringliteral">"punpckldq %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* fill higher bytes of MM3 with Tmax */</span>
-<a name="l03838"></a>03838                 <span class="stringliteral">"psubusb   %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* store 0xFF - Tmax in MM1 */</span>
-<a name="l03839"></a>03839                 <span class="comment">/* ** Duplicate Tmin in 8 bytes of MM5 ** */</span>
-<a name="l03840"></a>03840                 <span class="stringliteral">"mov           %3, %%al \n\t"</span>   <span class="comment">/* load Tmin into AL */</span>
-<a name="l03841"></a>03841                 <span class="stringliteral">"mov         %%al, %%ah \n\t"</span>   <span class="comment">/* copy AL into AH */</span>
-<a name="l03842"></a>03842                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l03843"></a>03843                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03844"></a>03844                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l03845"></a>03845                 <span class="stringliteral">"movd      %%eax, %%mm5 \n\t"</span>   <span class="comment">/* copy EAX into MM5 */</span>
-<a name="l03846"></a>03846                 <span class="stringliteral">"movd      %%eax, %%mm4 \n\t"</span>   <span class="comment">/* copy EAX into MM4 */</span>
-<a name="l03847"></a>03847                 <span class="stringliteral">"punpckldq %%mm4, %%mm5 \n\t"</span>   <span class="comment">/* fill higher bytes of MM5 with Tmin */</span>
-<a name="l03848"></a>03848                 <span class="stringliteral">"movq      %%mm5, %%mm7 \n\t"</span>   <span class="comment">/* copy MM5 into MM7 */</span>
-<a name="l03849"></a>03849                 <span class="stringliteral">"paddusb   %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* store 0xFF - Tmax + Tmin in MM7 */</span>
-<a name="l03850"></a>03850                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l03851"></a>03851                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l03852"></a>03852                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l03853"></a>03853                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l03854"></a>03854                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l03855"></a>03855                 <span class="stringliteral">"1:                     \n\t"</span> 
-<a name="l03856"></a>03856                 <span class="stringliteral">"movq    (%%eax), %%mm0 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM0 */</span>
-<a name="l03857"></a>03857                 <span class="stringliteral">"paddusb   %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* MM0=SrcDest+(0xFF-Tmax) */</span>
-<a name="l03858"></a>03858                 <span class="stringliteral">"psubusb   %%mm7, %%mm0 \n\t"</span>   <span class="comment">/* MM0=MM0-(0xFF-Tmax+Tmin) */</span>
-<a name="l03859"></a>03859                 <span class="stringliteral">"paddusb   %%mm5, %%mm0 \n\t"</span>   <span class="comment">/* MM0=MM0+Tmin */</span>
-<a name="l03860"></a>03860                 <span class="stringliteral">"movq    %%mm0, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l03861"></a>03861                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l03862"></a>03862                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l03863"></a>03863                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l03864"></a>03864                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l03865"></a>03865                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l03866"></a>03866                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l03867"></a>03867                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l03868"></a>03868                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l03869"></a>03869                 <span class="stringliteral">"m"</span>(Tmin),              <span class="comment">/* %3 */</span>
-<a name="l03870"></a>03870                 <span class="stringliteral">"m"</span>(Tmax)                       <span class="comment">/* %4 */</span>
-<a name="l03871"></a>03871                 );
-<a name="l03872"></a>03872 <span class="preprocessor">#endif</span>
-<a name="l03873"></a>03873 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l03874"></a>03874 <span class="preprocessor">#else</span>
-<a name="l03875"></a>03875 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l03876"></a>03876 <span class="preprocessor">#endif</span>
-<a name="l03877"></a>03877 <span class="preprocessor"></span>}
-<a name="l03878"></a>03878 
-<a name="l03890"></a><a class="code" href="_s_d_l__image_filter_8h.html#ae9d552de9cf5a4a1716d91ee905eafd7">03890</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ab7224abc4ecc1b8a6f4441ef8379515f" title="Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax.">SDL_imageFilterClipToRange</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <sp [...]
-<a name="l03891"></a>03891                                                            <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Tmax)
-<a name="l03892"></a>03892 {
-<a name="l03893"></a>03893         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l03894"></a>03894         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc1;
-<a name="l03895"></a>03895         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l03896"></a>03896 
-<a name="l03897"></a>03897         <span class="comment">/* Validate input parameters */</span>
-<a name="l03898"></a>03898         <span class="keywordflow">if</span> ((Src1 == NULL) || (Dest == NULL))
-<a name="l03899"></a>03899                 <span class="keywordflow">return</span>(-1);
-<a name="l03900"></a>03900         <span class="keywordflow">if</span> (length == 0)
-<a name="l03901"></a>03901                 <span class="keywordflow">return</span>(0);
-<a name="l03902"></a>03902 
-<a name="l03903"></a>03903         <span class="comment">/* Special case: Tmin==0 && Tmax = 255 */</span>
-<a name="l03904"></a>03904         <span class="keywordflow">if</span> ((Tmin == 0) && (Tmax == 25)) {
-<a name="l03905"></a>03905                 memcpy(Src1, Dest, length);
-<a name="l03906"></a>03906                 <span class="keywordflow">return</span> (0); 
-<a name="l03907"></a>03907         }
-<a name="l03908"></a>03908 
-<a name="l03909"></a>03909         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l03910"></a>03910 
-<a name="l03911"></a>03911                 <a class="code" href="_s_d_l__image_filter_8c.html#adc2b0f3e3a32724df1325a2121e9f96d" title="Internal MMX Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax.">SDL_imageFilterClipToRangeMMX</a>(Src1, Dest, length, Tmin, Tmax);
-<a name="l03912"></a>03912 
-<a name="l03913"></a>03913                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l03914"></a>03914                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l03915"></a>03915                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l03916"></a>03916                         istart = length & 0xfffffff8;
-<a name="l03917"></a>03917                         cursrc1 = &Src1[istart];
-<a name="l03918"></a>03918                         curdest = &Dest[istart];
-<a name="l03919"></a>03919                 } <span class="keywordflow">else</span> {
-<a name="l03920"></a>03920                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l03921"></a>03921                         <span class="keywordflow">return</span> (0);
-<a name="l03922"></a>03922                 }
-<a name="l03923"></a>03923         } <span class="keywordflow">else</span> {
-<a name="l03924"></a>03924                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l03925"></a>03925                 istart = 0;
-<a name="l03926"></a>03926                 cursrc1 = Src1;
-<a name="l03927"></a>03927                 curdest = Dest;
-<a name="l03928"></a>03928         }
-<a name="l03929"></a>03929 
-<a name="l03930"></a>03930         <span class="comment">/* C routine to process image */</span>
-<a name="l03931"></a>03931         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l03932"></a>03932                 <span class="keywordflow">if</span> (*cursrc1 < Tmin) {
-<a name="l03933"></a>03933                         *curdest = Tmin;
-<a name="l03934"></a>03934                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (*cursrc1 > Tmax) {
-<a name="l03935"></a>03935                         *curdest = Tmax;
-<a name="l03936"></a>03936                 } <span class="keywordflow">else</span> {
-<a name="l03937"></a>03937                         *curdest = *cursrc1;
-<a name="l03938"></a>03938                 }
-<a name="l03939"></a>03939                 <span class="comment">/* Advance pointers */</span>
-<a name="l03940"></a>03940                 cursrc1++;
-<a name="l03941"></a>03941                 curdest++;
-<a name="l03942"></a>03942         }
-<a name="l03943"></a>03943 
-<a name="l03944"></a>03944         <span class="keywordflow">return</span> (0);
-<a name="l03945"></a>03945 }
-<a name="l03946"></a>03946 
-<a name="l03960"></a><a class="code" href="_s_d_l__image_filter_8c.html#a2e7631c748eb46544e7be40fa64bc232">03960</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a2e7631c748eb46544e7be40fa64bc232" title="Internal MMX Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) +...">SDL_imageFilterNormalizeLinearMMX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="k [...]
-<a name="l03961"></a>03961                                                                           <span class="keywordtype">int</span> Nmin, <span class="keywordtype">int</span> Nmax)
-<a name="l03962"></a>03962 {
-<a name="l03963"></a>03963 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l03964"></a>03964 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l03965"></a>03965 <span class="preprocessor"></span>        __asm
-<a name="l03966"></a>03966         {
-<a name="l03967"></a>03967                 pusha
-<a name="l03968"></a>03968                         mov ax, WORD PTR Nmax           <span class="comment">/* load Nmax in AX */</span>
-<a name="l03969"></a>03969                         mov bx, WORD PTR Cmax           <span class="comment">/* load Cmax in BX */</span>
-<a name="l03970"></a>03970                         sub ax, WORD PTR Nmin           <span class="comment">/* AX = Nmax - Nmin */</span>
-<a name="l03971"></a>03971                         sub bx, WORD PTR Cmin           <span class="comment">/* BX = Cmax - Cmin */</span>
-<a name="l03972"></a>03972                         jz             L10311           <span class="comment">/* check division by zero */</span>
-<a name="l03973"></a>03973                         xor dx, dx      <span class="comment">/* prepare for division, zero DX */</span>
-<a name="l03974"></a>03974                         div               bx            <span class="comment">/* AX = AX/BX */</span>
-<a name="l03975"></a>03975                         jmp            L10312
-<a name="l03976"></a>03976 L10311:
-<a name="l03977"></a>03977                 mov ax, 255     <span class="comment">/* if div by zero, assume result max byte value */</span>
-<a name="l03978"></a>03978 L10312:                         <span class="comment">/* ** Duplicate AX in 4 words of MM0 ** */</span>
-<a name="l03979"></a>03979                 mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l03980"></a>03980                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03981"></a>03981                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l03982"></a>03982                         movd mm0, eax           <span class="comment">/* copy EAX into MM0 */</span>
-<a name="l03983"></a>03983                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l03984"></a>03984                         punpckldq mm0, mm1      <span class="comment">/* fill higher words of MM0 with AX */</span>
-<a name="l03985"></a>03985                         <span class="comment">/* ** Duplicate Cmin in 4 words of MM1 ** */</span>
-<a name="l03986"></a>03986                         mov ax, WORD PTR Cmin           <span class="comment">/* load Cmin into AX */</span>
-<a name="l03987"></a>03987                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l03988"></a>03988                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03989"></a>03989                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l03990"></a>03990                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l03991"></a>03991                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l03992"></a>03992                         punpckldq mm1, mm2      <span class="comment">/* fill higher words of MM1 with Cmin */</span>
-<a name="l03993"></a>03993                         <span class="comment">/* ** Duplicate Nmin in 4 words of MM2 ** */</span>
-<a name="l03994"></a>03994                         mov ax, WORD PTR Nmin           <span class="comment">/* load Nmin into AX */</span>
-<a name="l03995"></a>03995                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
-<a name="l03996"></a>03996                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l03997"></a>03997                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
-<a name="l03998"></a>03998                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l03999"></a>03999                         movd mm3, eax           <span class="comment">/* copy EAX into MM3 */</span>
-<a name="l04000"></a>04000                         punpckldq mm2, mm3      <span class="comment">/* fill higher words of MM2 with Nmin */</span>
-<a name="l04001"></a>04001                         pxor mm7, mm7           <span class="comment">/* zero MM7 register */</span>
-<a name="l04002"></a>04002                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
-<a name="l04003"></a>04003                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
-<a name="l04004"></a>04004                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l04005"></a>04005                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l04006"></a>04006                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04007"></a>04007 L1031:
-<a name="l04008"></a>04008                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l04009"></a>04009                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l04010"></a>04010                         punpcklbw mm3, mm7      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l04011"></a>04011                         punpckhbw mm4, mm7      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l04012"></a>04012                         psubusb mm3, mm1        <span class="comment">/* S-Cmin, low  bytes */</span>
-<a name="l04013"></a>04013                         psubusb mm4, mm1        <span class="comment">/* S-Cmin, high bytes */</span>
-<a name="l04014"></a>04014                         pmullw mm3, mm0         <span class="comment">/* MM0*(S-Cmin), low  bytes */</span>
-<a name="l04015"></a>04015                         pmullw mm4, mm0         <span class="comment">/* MM0*(S-Cmin), high bytes */</span>
-<a name="l04016"></a>04016                         paddusb mm3, mm2        <span class="comment">/* MM0*(S-Cmin)+Nmin, low  bytes */</span>
-<a name="l04017"></a>04017                         paddusb mm4, mm2        <span class="comment">/* MM0*(S-Cmin)+Nmin, high bytes */</span>
-<a name="l04018"></a>04018                         <span class="comment">/* ** Take abs value of the signed words ** */</span>
-<a name="l04019"></a>04019                         movq mm5, mm3           <span class="comment">/* copy mm3 into mm5 */</span>
-<a name="l04020"></a>04020                         movq mm6, mm4           <span class="comment">/* copy mm4 into mm6 */</span>
-<a name="l04021"></a>04021                         psraw mm5, 15           <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l04022"></a>04022                         psraw mm6, 15           <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l04023"></a>04023                         pxor mm3, mm5           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l04024"></a>04024                         pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l04025"></a>04025                         psubsw mm3, mm5         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l04026"></a>04026                         psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l04027"></a>04027                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l04028"></a>04028                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
-<a name="l04029"></a>04029                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l04030"></a>04030                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l04031"></a>04031                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
-<a name="l04032"></a>04032                         jnz             L1031           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04033"></a>04033                         emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l04034"></a>04034                         popa
-<a name="l04035"></a>04035         }
-<a name="l04036"></a>04036 <span class="preprocessor">#else</span>
-<a name="l04037"></a>04037 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l04038"></a>04038                 (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"mov           %6, %%ax \n\t"</span>        <span class="comment">/* load Nmax in AX */</span>
-<a name="l04039"></a>04039                 <span class="stringliteral">"mov           %4, %%bx \n\t"</span>   <span class="comment">/* load Cmax in BX */</span>
-<a name="l04040"></a>04040                 <span class="stringliteral">"sub           %5, %%ax \n\t"</span>   <span class="comment">/* AX = Nmax - Nmin */</span>
-<a name="l04041"></a>04041                 <span class="stringliteral">"sub           %3, %%bx \n\t"</span>   <span class="comment">/* BX = Cmax - Cmin */</span>
-<a name="l04042"></a>04042                 <span class="stringliteral">"jz                  1f \n\t"</span>   <span class="comment">/* check division by zero */</span>
-<a name="l04043"></a>04043                 <span class="stringliteral">"xor         %%dx, %%dx \n\t"</span>   <span class="comment">/* prepare for division, zero DX */</span>
-<a name="l04044"></a>04044                 <span class="stringliteral">"div               %%bx \n\t"</span>   <span class="comment">/* AX = AX/BX */</span>
-<a name="l04045"></a>04045                 <span class="stringliteral">"jmp                 2f \n\t"</span> <span class="stringliteral">"1:                     \n\t"</span> <span class="stringliteral">"mov         $255, %%ax \n\t"</span>       <span class="comment">/* if div by zero, assume result max. byte value */</span>
-<a name="l04046"></a>04046                 <span class="stringliteral">"2:                    \n\t"</span>    <span class="comment">/* ** Duplicate AX in 4 words of MM0 ** */</span>
-<a name="l04047"></a>04047                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l04048"></a>04048                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l04049"></a>04049                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l04050"></a>04050                 <span class="stringliteral">"movd      %%eax, %%mm0 \n\t"</span>   <span class="comment">/* copy EAX into MM0 */</span>
-<a name="l04051"></a>04051                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l04052"></a>04052                 <span class="stringliteral">"punpckldq %%mm1, %%mm0 \n\t"</span>   <span class="comment">/* fill higher words of MM0 with AX */</span>
-<a name="l04053"></a>04053                 <span class="comment">/* ** Duplicate Cmin in 4 words of MM1 ** */</span>
-<a name="l04054"></a>04054                 <span class="stringliteral">"mov           %3, %%ax \n\t"</span>   <span class="comment">/* load Cmin into AX */</span>
-<a name="l04055"></a>04055                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l04056"></a>04056                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l04057"></a>04057                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l04058"></a>04058                 <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* copy EAX into MM1 */</span>
-<a name="l04059"></a>04059                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l04060"></a>04060                 <span class="stringliteral">"punpckldq %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* fill higher words of MM1 with Cmin */</span>
-<a name="l04061"></a>04061                 <span class="comment">/* ** Duplicate Nmin in 4 words of MM2 ** */</span>
-<a name="l04062"></a>04062                 <span class="stringliteral">"mov           %5, %%ax \n\t"</span>   <span class="comment">/* load Nmin into AX */</span>
-<a name="l04063"></a>04063                 <span class="stringliteral">"mov         %%ax, %%bx \n\t"</span>   <span class="comment">/* copy AX into BX */</span>
-<a name="l04064"></a>04064                 <span class="stringliteral">"shl         $16, %%eax \n\t"</span>   <span class="comment">/* shift 2 bytes of EAX left */</span>
-<a name="l04065"></a>04065                 <span class="stringliteral">"mov         %%bx, %%ax \n\t"</span>   <span class="comment">/* copy BX into AX */</span>
-<a name="l04066"></a>04066                 <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* copy EAX into MM2 */</span>
-<a name="l04067"></a>04067                 <span class="stringliteral">"movd      %%eax, %%mm3 \n\t"</span>   <span class="comment">/* copy EAX into MM3 */</span>
-<a name="l04068"></a>04068                 <span class="stringliteral">"punpckldq %%mm3, %%mm2 \n\t"</span>   <span class="comment">/* fill higher words of MM2 with Nmin */</span>
-<a name="l04069"></a>04069                 <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>   <span class="comment">/* zero MM7 register */</span>
-<a name="l04070"></a>04070                 <span class="stringliteral">"mov          %1, %%eax \n\t"</span>   <span class="comment">/* load Src1 address into eax */</span>
-<a name="l04071"></a>04071                 <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address into edi */</span>
-<a name="l04072"></a>04072                 <span class="stringliteral">"mov          %2, %%ecx \n\t"</span>   <span class="comment">/* load loop counter (SIZE) into ecx */</span>
-<a name="l04073"></a>04073                 <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l04074"></a>04074                 <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04075"></a>04075                 <span class="stringliteral">"1:                     \n\t"</span> 
-<a name="l04076"></a>04076                 <span class="stringliteral">"movq    (%%eax), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
-<a name="l04077"></a>04077                 <span class="stringliteral">"movq      %%mm3, %%mm4 \n\t"</span>   <span class="comment">/* copy MM3 into MM4  */</span>
-<a name="l04078"></a>04078                 <span class="stringliteral">"punpcklbw %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
-<a name="l04079"></a>04079                 <span class="stringliteral">"punpckhbw %%mm7, %%mm4 \n\t"</span>   <span class="comment">/* unpack high bytes of SrcDest into words */</span>
-<a name="l04080"></a>04080                 <span class="stringliteral">"psubusb   %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* S-Cmin, low  bytes */</span>
-<a name="l04081"></a>04081                 <span class="stringliteral">"psubusb   %%mm1, %%mm4 \n\t"</span>   <span class="comment">/* S-Cmin, high bytes */</span>
-<a name="l04082"></a>04082                 <span class="stringliteral">"pmullw    %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* MM0*(S-Cmin), low  bytes */</span>
-<a name="l04083"></a>04083                 <span class="stringliteral">"pmullw    %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* MM0*(S-Cmin), high bytes */</span>
-<a name="l04084"></a>04084                 <span class="stringliteral">"paddusb   %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* MM0*(S-Cmin)+Nmin, low  bytes */</span>
-<a name="l04085"></a>04085                 <span class="stringliteral">"paddusb   %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* MM0*(S-Cmin)+Nmin, high bytes */</span>
-<a name="l04086"></a>04086                 <span class="comment">/* ** Take abs value of the signed words ** */</span>
-<a name="l04087"></a>04087                 <span class="stringliteral">"movq      %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* copy mm3 into mm5 */</span>
-<a name="l04088"></a>04088                 <span class="stringliteral">"movq      %%mm4, %%mm6 \n\t"</span>   <span class="comment">/* copy mm4 into mm6 */</span>
-<a name="l04089"></a>04089                 <span class="stringliteral">"psraw       $15, %%mm5 \n\t"</span>   <span class="comment">/* fill mm5 words with word sign bit */</span>
-<a name="l04090"></a>04090                 <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill mm6 words with word sign bit */</span>
-<a name="l04091"></a>04091                 <span class="stringliteral">"pxor      %%mm5, %%mm3 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l04092"></a>04092                 <span class="stringliteral">"pxor      %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l04093"></a>04093                 <span class="stringliteral">"psubsw    %%mm5, %%mm3 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l04094"></a>04094                 <span class="stringliteral">"psubsw    %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l04095"></a>04095                 <span class="stringliteral">"packuswb  %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* pack words back into bytes with saturation */</span>
-<a name="l04096"></a>04096                 <span class="stringliteral">"movq    %%mm3, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l04097"></a>04097                 <span class="stringliteral">"add          $8, %%eax \n\t"</span>   <span class="comment">/* increase Src1 register pointer by 8 */</span>
-<a name="l04098"></a>04098                 <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* increase Dest register pointer by 8 */</span>
-<a name="l04099"></a>04099                 <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter */</span>
-<a name="l04100"></a>04100                 <span class="stringliteral">"jnz                 1b \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04101"></a>04101                 <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l04102"></a>04102                 <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l04103"></a>04103                 :<span class="stringliteral">"m"</span>(Src1),             <span class="comment">/* %1 */</span>
-<a name="l04104"></a>04104                 <span class="stringliteral">"m"</span>(SrcLength),         <span class="comment">/* %2 */</span>
-<a name="l04105"></a>04105                 <span class="stringliteral">"m"</span>(Cmin),              <span class="comment">/* %3 */</span>
-<a name="l04106"></a>04106                 <span class="stringliteral">"m"</span>(Cmax),              <span class="comment">/* %4 */</span>
-<a name="l04107"></a>04107                 <span class="stringliteral">"m"</span>(Nmin),              <span class="comment">/* %5 */</span>
-<a name="l04108"></a>04108                 <span class="stringliteral">"m"</span>(Nmax)                       <span class="comment">/* %6 */</span>
-<a name="l04109"></a>04109                 );
-<a name="l04110"></a>04110 <span class="preprocessor">#endif</span>
-<a name="l04111"></a>04111 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
-<a name="l04112"></a>04112 <span class="preprocessor">#else</span>
-<a name="l04113"></a>04113 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
-<a name="l04114"></a>04114 <span class="preprocessor">#endif</span>
-<a name="l04115"></a>04115 <span class="preprocessor"></span>}
-<a name="l04116"></a>04116 
-<a name="l04130"></a><a class="code" href="_s_d_l__image_filter_8h.html#aacb316a18d8cb7999d5d53ee5e7b9750">04130</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ab018ace4db884cac953b06b09c00828b" title="Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin)...">SDL_imageFilterNormalizeLinear</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype" [...]
-<a name="l04131"></a>04131                                                                    <span class="keywordtype">int</span> Nmax)
-<a name="l04132"></a>04132 {
-<a name="l04133"></a>04133         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
-<a name="l04134"></a>04134         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc;
-<a name="l04135"></a>04135         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
-<a name="l04136"></a>04136         <span class="keywordtype">int</span> dN, dC, factor;
-<a name="l04137"></a>04137         <span class="keywordtype">int</span> result;
-<a name="l04138"></a>04138 
-<a name="l04139"></a>04139         <span class="comment">/* Validate input parameters */</span>
-<a name="l04140"></a>04140         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL))
-<a name="l04141"></a>04141                 <span class="keywordflow">return</span>(-1);
-<a name="l04142"></a>04142         <span class="keywordflow">if</span> (length == 0)
-<a name="l04143"></a>04143                 <span class="keywordflow">return</span>(0);
-<a name="l04144"></a>04144 
-<a name="l04145"></a>04145         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
-<a name="l04146"></a>04146 
-<a name="l04147"></a>04147                 <a class="code" href="_s_d_l__image_filter_8c.html#a2e7631c748eb46544e7be40fa64bc232" title="Internal MMX Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) +...">SDL_imageFilterNormalizeLinearMMX</a>(Src, Dest, length, Cmin, Cmax, Nmin, Nmax);
-<a name="l04148"></a>04148 
-<a name="l04149"></a>04149                 <span class="comment">/* Check for unaligned bytes */</span>
-<a name="l04150"></a>04150                 <span class="keywordflow">if</span> ((length & 7) > 0) {
-<a name="l04151"></a>04151                         <span class="comment">/* Setup to process unaligned bytes */</span>
-<a name="l04152"></a>04152                         istart = length & 0xfffffff8;
-<a name="l04153"></a>04153                         cursrc = &Src[istart];
-<a name="l04154"></a>04154                         curdest = &Dest[istart];
-<a name="l04155"></a>04155                 } <span class="keywordflow">else</span> {
-<a name="l04156"></a>04156                         <span class="comment">/* No unaligned bytes - we are done */</span>
-<a name="l04157"></a>04157                         <span class="keywordflow">return</span> (0);
-<a name="l04158"></a>04158                 }
-<a name="l04159"></a>04159         } <span class="keywordflow">else</span> {
-<a name="l04160"></a>04160                 <span class="comment">/* Setup to process whole image */</span>
-<a name="l04161"></a>04161                 istart = 0;
-<a name="l04162"></a>04162                 cursrc = Src;
-<a name="l04163"></a>04163                 curdest = Dest;
-<a name="l04164"></a>04164         }
-<a name="l04165"></a>04165 
-<a name="l04166"></a>04166         <span class="comment">/* C routine to process image */</span>
-<a name="l04167"></a>04167         dC = Cmax - Cmin;
-<a name="l04168"></a>04168         <span class="keywordflow">if</span> (dC == 0)
-<a name="l04169"></a>04169                 <span class="keywordflow">return</span> (0);
-<a name="l04170"></a>04170         dN = Nmax - Nmin;
-<a name="l04171"></a>04171         factor = dN / dC;
-<a name="l04172"></a>04172         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
-<a name="l04173"></a>04173                 result = factor * ((int) (*cursrc) - Cmin) + Nmin;
-<a name="l04174"></a>04174                 <span class="keywordflow">if</span> (result > 255)
-<a name="l04175"></a>04175                         result = 255;
-<a name="l04176"></a>04176                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
-<a name="l04177"></a>04177                 <span class="comment">/* Advance pointers */</span>
-<a name="l04178"></a>04178                 cursrc++;
-<a name="l04179"></a>04179                 curdest++;
-<a name="l04180"></a>04180         }
-<a name="l04181"></a>04181 
-<a name="l04182"></a>04182         <span class="keywordflow">return</span> (0);
-<a name="l04183"></a>04183 }
-<a name="l04184"></a>04184 
-<a name="l04185"></a>04185 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
-<a name="l04186"></a>04186 
-<a name="l04201"></a><a class="code" href="_s_d_l__image_filter_8h.html#a7286cd21fa0a0cfb0606806dacfbe121">04201</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a8e7e4138a93e26f1912763189d407770" title="Filter using ConvolveKernel3x3Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel3x3Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
-<a name="l04202"></a>04202                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
-<a name="l04203"></a>04203 {
-<a name="l04204"></a>04204         <span class="comment">/* Validate input parameters */</span>
-<a name="l04205"></a>04205         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l04206"></a>04206                 <span class="keywordflow">return</span>(-1);
-<a name="l04207"></a>04207 
-<a name="l04208"></a>04208         <span class="keywordflow">if</span> ((columns < 3) || (rows < 3) || (Divisor == 0))
-<a name="l04209"></a>04209                 <span class="keywordflow">return</span> (-1);
-<a name="l04210"></a>04210 
-<a name="l04211"></a>04211         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l04212"></a>04212 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l04213"></a>04213 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l04214"></a>04214 <span class="preprocessor"></span>                __asm
-<a name="l04215"></a>04215                 {
-<a name="l04216"></a>04216                         pusha
-<a name="l04217"></a>04217                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l04218"></a>04218                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l04219"></a>04219                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
-<a name="l04220"></a>04220                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l04221"></a>04221                                 movq mm5, [edx]         <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
-<a name="l04222"></a>04222                         add edx, 8      <span class="comment">/* second row              |K0 K1 K2 0| */</span>
-<a name="l04223"></a>04223                                 movq mm6, [edx]         <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
-<a name="l04224"></a>04224                         add edx, 8      <span class="comment">/* third row               |K6 K7 K8 0| */</span>
-<a name="l04225"></a>04225                                 movq mm7, [edx]         <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
-<a name="l04226"></a>04226                         <span class="comment">/* ---, */</span>
-<a name="l04227"></a>04227                         mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l04228"></a>04228                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l04229"></a>04229                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l04230"></a>04230                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l04231"></a>04231                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l04232"></a>04232                                 mov edx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l04233"></a>04233                                 sub edx, 2      <span class="comment">/* do not use first and last row */</span>
-<a name="l04234"></a>04234                                 <span class="comment">/* ---, */</span>
-<a name="l04235"></a>04235 L10320:
-<a name="l04236"></a>04236                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l04237"></a>04237                                 sub ecx, 2      <span class="comment">/* do not use first and last column */</span>
-<a name="l04238"></a>04238                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04239"></a>04239 L10322:
-<a name="l04240"></a>04240                         <span class="comment">/* ---, */</span>
-<a name="l04241"></a>04241                         movq mm1, [esi]         <span class="comment">/* load 8 bytes of the image first row */</span>
-<a name="l04242"></a>04242                         add esi, eax    <span class="comment">/* move one row below */</span>
-<a name="l04243"></a>04243                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes of the image second row */</span>
-<a name="l04244"></a>04244                         add esi, eax    <span class="comment">/* move one row below */</span>
-<a name="l04245"></a>04245                                 movq mm3, [esi]         <span class="comment">/* load 8 bytes of the image third row */</span>
-<a name="l04246"></a>04246                         punpcklbw mm1, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l04247"></a>04247                                 punpcklbw mm2, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l04248"></a>04248                                 punpcklbw mm3, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l04249"></a>04249                                 pmullw mm1, mm5         <span class="comment">/* multiply words first row  image*Kernel */</span>
-<a name="l04250"></a>04250                                 pmullw mm2, mm6         <span class="comment">/* multiply words second row image*Kernel */</span>
-<a name="l04251"></a>04251                                 pmullw mm3, mm7         <span class="comment">/* multiply words third row  image*Kernel */</span>
-<a name="l04252"></a>04252                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the first and second rows */</span>
-<a name="l04253"></a>04253                                 paddsw mm1, mm3         <span class="comment">/* add 4 words of the third row and result */</span>
-<a name="l04254"></a>04254                                 movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04255"></a>04255                                 psrlq mm1, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l04256"></a>04256                                 paddsw mm1, mm2         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l04257"></a>04257                                 movq mm3, mm1           <span class="comment">/* copy MM1 into MM3 */</span>
-<a name="l04258"></a>04258                                 psrlq mm1, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l04259"></a>04259                                 paddsw mm1, mm3         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l04260"></a>04260                                 <span class="comment">/* --, */</span>
-<a name="l04261"></a>04261                                 movd mm2, eax           <span class="comment">/* save EAX in MM2 */</span>
-<a name="l04262"></a>04262                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
-<a name="l04263"></a>04263                                 movd eax, mm1           <span class="comment">/* copy MM1 into EAX */</span>
-<a name="l04264"></a>04264                                 psraw mm1, 15           <span class="comment">/* spread sign bit of the result */</span>
-<a name="l04265"></a>04265                                 movd edx, mm1           <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l04266"></a>04266                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l04267"></a>04267                                 movd mm1, eax           <span class="comment">/* move result of division into MM1 */</span>
-<a name="l04268"></a>04268                                 packuswb mm1, mm0       <span class="comment">/* pack division result with saturation */</span>
-<a name="l04269"></a>04269                                 movd eax, mm1           <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l04270"></a>04270                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l04271"></a>04271                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
-<a name="l04272"></a>04272                                 movd eax, mm2           <span class="comment">/* restore saved EAX */</span>
-<a name="l04273"></a>04273                                 <span class="comment">/* --, */</span>
-<a name="l04274"></a>04274                                 sub esi, eax    <span class="comment">/* move two rows up */</span>
-<a name="l04275"></a>04275                                 sub esi, eax    <span class="comment">/* */</span>
-<a name="l04276"></a>04276                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l04277"></a>04277                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l04278"></a>04278                                 <span class="comment">/* ---, */</span>
-<a name="l04279"></a>04279                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l04280"></a>04280                                 jnz            L10322           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04281"></a>04281                                 add esi, 2      <span class="comment">/* move to the next row in Src */</span>
-<a name="l04282"></a>04282                                 add edi, 2      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l04283"></a>04283                                 dec              edx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l04284"></a>04284                                 jnz            L10320           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04285"></a>04285                                 <span class="comment">/* ---, */</span>
-<a name="l04286"></a>04286                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l04287"></a>04287                                 popa
-<a name="l04288"></a>04288                 }
-<a name="l04289"></a>04289 <span class="preprocessor">#else</span>
-<a name="l04290"></a>04290 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l04291"></a>04291                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l04292"></a>04292                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l04293"></a>04293                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
-<a name="l04294"></a>04294                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l04295"></a>04295                         <span class="stringliteral">"movq    (%%edx), %%mm5 \n\t"</span>   <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
-<a name="l04296"></a>04296                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* second row              |K0 K1 K2 0| */</span>
-<a name="l04297"></a>04297                         <span class="stringliteral">"movq    (%%edx), %%mm6 \n\t"</span>   <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
-<a name="l04298"></a>04298                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* third row               |K6 K7 K8 0| */</span>
-<a name="l04299"></a>04299                         <span class="stringliteral">"movq    (%%edx), %%mm7 \n\t"</span>   <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
-<a name="l04300"></a>04300                         <span class="comment">/* --- */</span>
-<a name="l04301"></a>04301                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l04302"></a>04302                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l04303"></a>04303                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l04304"></a>04304                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l04305"></a>04305                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l04306"></a>04306                         <span class="stringliteral">"mov          %2, %%edx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
-<a name="l04307"></a>04307                         <span class="stringliteral">"sub          $2, %%edx \n\t"</span>   <span class="comment">/* do not use first and last row */</span>
-<a name="l04308"></a>04308                         <span class="comment">/* --- */</span>
-<a name="l04309"></a>04309                         <span class="stringliteral">".L10320:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l04310"></a>04310                         <span class="stringliteral">"sub          $2, %%ecx \n\t"</span>   <span class="comment">/* do not use first and last column */</span>
-<a name="l04311"></a>04311                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04312"></a>04312                         <span class="stringliteral">".L10322:               \n\t"</span>
-<a name="l04313"></a>04313                         <span class="comment">/* --- */</span>
-<a name="l04314"></a>04314                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the image first row */</span>
-<a name="l04315"></a>04315                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
-<a name="l04316"></a>04316                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes of the image second row */</span>
-<a name="l04317"></a>04317                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
-<a name="l04318"></a>04318                         <span class="stringliteral">"movq    (%%esi), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes of the image third row */</span>
-<a name="l04319"></a>04319                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l04320"></a>04320                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l04321"></a>04321                         <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l04322"></a>04322                         <span class="stringliteral">"pmullw    %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* multiply words first row  image*Kernel */</span>
-<a name="l04323"></a>04323                         <span class="stringliteral">"pmullw    %%mm6, %%mm2 \n\t"</span>   <span class="comment">/* multiply words second row image*Kernel */</span>
-<a name="l04324"></a>04324                         <span class="stringliteral">"pmullw    %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* multiply words third row  image*Kernel */</span>
-<a name="l04325"></a>04325                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the first and second rows */</span>
-<a name="l04326"></a>04326                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the third row and result */</span>
-<a name="l04327"></a>04327                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04328"></a>04328                         <span class="stringliteral">"psrlq       $32, %%mm1 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l04329"></a>04329                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l04330"></a>04330                         <span class="stringliteral">"movq      %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* copy MM1 into MM3 */</span>
-<a name="l04331"></a>04331                         <span class="stringliteral">"psrlq       $16, %%mm1 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l04332"></a>04332                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l04333"></a>04333                         <span class="comment">/* -- */</span>
-<a name="l04334"></a>04334                         <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* save EAX in MM2 */</span>
-<a name="l04335"></a>04335                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
-<a name="l04336"></a>04336                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* copy MM1 into EAX */</span>
-<a name="l04337"></a>04337                         <span class="stringliteral">"psraw       $15, %%mm1 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
-<a name="l04338"></a>04338                         <span class="stringliteral">"movd      %%mm1, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l04339"></a>04339                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l04340"></a>04340                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* move result of division into MM1 */</span>
-<a name="l04341"></a>04341                         <span class="stringliteral">"packuswb  %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
-<a name="l04342"></a>04342                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l04343"></a>04343                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l04344"></a>04344                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
-<a name="l04345"></a>04345                         <span class="stringliteral">"movd      %%mm2, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
-<a name="l04346"></a>04346                         <span class="comment">/* -- */</span>
-<a name="l04347"></a>04347                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move two rows up */</span>
-<a name="l04348"></a>04348                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* */</span>
-<a name="l04349"></a>04349                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l04350"></a>04350                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l04351"></a>04351                         <span class="comment">/* --- */</span>
-<a name="l04352"></a>04352                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l04353"></a>04353                         <span class="stringliteral">"jnz            .L10322 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04354"></a>04354                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l04355"></a>04355                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l04356"></a>04356                         <span class="stringliteral">"dec              %%edx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l04357"></a>04357                         <span class="stringliteral">"jnz            .L10320 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04358"></a>04358                         <span class="comment">/* --- */</span>
-<a name="l04359"></a>04359                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l04360"></a>04360                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l04361"></a>04361                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l04362"></a>04362                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l04363"></a>04363                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l04364"></a>04364                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l04365"></a>04365                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
-<a name="l04366"></a>04366                         );
-<a name="l04367"></a>04367 <span class="preprocessor">#endif</span>
-<a name="l04368"></a>04368 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l04369"></a>04369 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l04370"></a>04370         } <span class="keywordflow">else</span> {
-<a name="l04371"></a>04371                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l04372"></a>04372                 <span class="keywordflow">return</span> (-1);
-<a name="l04373"></a>04373         }
-<a name="l04374"></a>04374 }
-<a name="l04375"></a>04375 
-<a name="l04390"></a><a class="code" href="_s_d_l__image_filter_8h.html#a432d7bcc34b6bea42d1a07b4db795e1f">04390</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ac9a556492480ce71f54d456a0ff7e6cb" title="Filter using ConvolveKernel5x5Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel5x5Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
-<a name="l04391"></a>04391                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
-<a name="l04392"></a>04392 {
-<a name="l04393"></a>04393         <span class="comment">/* Validate input parameters */</span>
-<a name="l04394"></a>04394         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l04395"></a>04395                 <span class="keywordflow">return</span>(-1);
-<a name="l04396"></a>04396 
-<a name="l04397"></a>04397         <span class="keywordflow">if</span> ((columns < 5) || (rows < 5) || (Divisor == 0))
-<a name="l04398"></a>04398                 <span class="keywordflow">return</span> (-1);
-<a name="l04399"></a>04399 
-<a name="l04400"></a>04400         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l04401"></a>04401 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l04402"></a>04402 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l04403"></a>04403 <span class="preprocessor"></span>                __asm
-<a name="l04404"></a>04404                 {
-<a name="l04405"></a>04405                         pusha
-<a name="l04406"></a>04406                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l04407"></a>04407                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l04408"></a>04408                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
-<a name="l04409"></a>04409                                 movd mm5, ebx           <span class="comment">/* copy Divisor into MM5 */</span>
-<a name="l04410"></a>04410                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l04411"></a>04411                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
-<a name="l04412"></a>04412                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l04413"></a>04413                                 add edi, 2      <span class="comment">/* 2 column offset from the left edge */</span>
-<a name="l04414"></a>04414                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l04415"></a>04415                                 shl eax, 1      <span class="comment">/* EAX = columns * 2 */</span>
-<a name="l04416"></a>04416                                 add edi, eax    <span class="comment">/* 2 row offset from the top edge */</span>
-<a name="l04417"></a>04417                                 shr eax, 1      <span class="comment">/* EAX = columns */</span>
-<a name="l04418"></a>04418                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l04419"></a>04419                                 sub ebx, 4      <span class="comment">/* do not use first 2 and last 2 rows */</span>
-<a name="l04420"></a>04420                                 <span class="comment">/* ---, */</span>
-<a name="l04421"></a>04421 L10330:
-<a name="l04422"></a>04422                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l04423"></a>04423                                 sub ecx, 4      <span class="comment">/* do not use first 2 and last 2 columns */</span>
-<a name="l04424"></a>04424                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04425"></a>04425 L10332:
-<a name="l04426"></a>04426                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l04427"></a>04427                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
-<a name="l04428"></a>04428                                 <span class="comment">/* --- 1 */</span>
-<a name="l04429"></a>04429                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04430"></a>04430                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04431"></a>04431                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04432"></a>04432                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04433"></a>04433                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04434"></a>04434                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04435"></a>04435                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04436"></a>04436                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04437"></a>04437                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04438"></a>04438                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04439"></a>04439                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04440"></a>04440                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04441"></a>04441                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04442"></a>04442                                 <span class="comment">/* --- 2 */</span>
-<a name="l04443"></a>04443                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04444"></a>04444                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04445"></a>04445                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04446"></a>04446                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04447"></a>04447                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04448"></a>04448                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04449"></a>04449                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04450"></a>04450                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04451"></a>04451                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04452"></a>04452                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04453"></a>04453                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04454"></a>04454                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04455"></a>04455                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04456"></a>04456                                 <span class="comment">/* --- 3 */</span>
-<a name="l04457"></a>04457                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04458"></a>04458                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04459"></a>04459                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04460"></a>04460                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04461"></a>04461                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04462"></a>04462                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04463"></a>04463                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04464"></a>04464                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04465"></a>04465                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04466"></a>04466                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04467"></a>04467                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04468"></a>04468                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04469"></a>04469                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04470"></a>04470                                 <span class="comment">/* --- 4 */</span>
-<a name="l04471"></a>04471                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04472"></a>04472                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04473"></a>04473                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04474"></a>04474                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04475"></a>04475                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04476"></a>04476                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04477"></a>04477                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04478"></a>04478                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04479"></a>04479                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04480"></a>04480                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04481"></a>04481                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04482"></a>04482                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04483"></a>04483                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04484"></a>04484                                 <span class="comment">/* --- 5 */</span>
-<a name="l04485"></a>04485                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04486"></a>04486                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04487"></a>04487                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04488"></a>04488                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04489"></a>04489                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04490"></a>04490                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04491"></a>04491                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04492"></a>04492                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04493"></a>04493                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04494"></a>04494                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04495"></a>04495                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04496"></a>04496                                 <span class="comment">/* ---, */</span>
-<a name="l04497"></a>04497                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l04498"></a>04498                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l04499"></a>04499                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l04500"></a>04500                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l04501"></a>04501                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l04502"></a>04502                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l04503"></a>04503                                 <span class="comment">/* ---, */</span>
-<a name="l04504"></a>04504                                 movd mm1, eax           <span class="comment">/* save EDX in MM1 */</span>
-<a name="l04505"></a>04505                                 movd mm2, ebx           <span class="comment">/* save EDX in MM2 */</span>
-<a name="l04506"></a>04506                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
-<a name="l04507"></a>04507                                 movd eax, mm7           <span class="comment">/* load summation result into EAX */</span>
-<a name="l04508"></a>04508                                 psraw mm7, 15           <span class="comment">/* spread sign bit of the result */</span>
-<a name="l04509"></a>04509                                 movd ebx, mm5           <span class="comment">/* load Divisor into EBX */</span>
-<a name="l04510"></a>04510                                 movd edx, mm7           <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l04511"></a>04511                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l04512"></a>04512                                 movd mm7, eax           <span class="comment">/* move result of division into MM7 */</span>
-<a name="l04513"></a>04513                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
-<a name="l04514"></a>04514                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l04515"></a>04515                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l04516"></a>04516                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
-<a name="l04517"></a>04517                                 movd ebx, mm2           <span class="comment">/* restore saved EBX */</span>
-<a name="l04518"></a>04518                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
-<a name="l04519"></a>04519                                 <span class="comment">/* --, */</span>
-<a name="l04520"></a>04520                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l04521"></a>04521                                 sub edx, 72     <span class="comment">/* EDX = Kernel address */</span>
-<a name="l04522"></a>04522                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l04523"></a>04523                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l04524"></a>04524                                 <span class="comment">/* ---, */</span>
-<a name="l04525"></a>04525                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l04526"></a>04526                                 jnz            L10332           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04527"></a>04527                                 add esi, 4      <span class="comment">/* move to the next row in Src */</span>
-<a name="l04528"></a>04528                                 add edi, 4      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l04529"></a>04529                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l04530"></a>04530                                 jnz            L10330           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04531"></a>04531                                 <span class="comment">/* ---, */</span>
-<a name="l04532"></a>04532                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l04533"></a>04533                                 popa
-<a name="l04534"></a>04534                 }
-<a name="l04535"></a>04535 <span class="preprocessor">#else</span>
-<a name="l04536"></a>04536 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l04537"></a>04537                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l04538"></a>04538                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l04539"></a>04539                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
-<a name="l04540"></a>04540                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy Divisor into MM5 */</span>
-<a name="l04541"></a>04541                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l04542"></a>04542                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
-<a name="l04543"></a>04543                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l04544"></a>04544                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* 2 column offset from the left edge */</span>
-<a name="l04545"></a>04545                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l04546"></a>04546                         <span class="stringliteral">"shl          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns * 2 */</span>
-<a name="l04547"></a>04547                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 2 row offset from the top edge */</span>
-<a name="l04548"></a>04548                         <span class="stringliteral">"shr          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns */</span>
-<a name="l04549"></a>04549                         <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
-<a name="l04550"></a>04550                         <span class="stringliteral">"sub          $4, %%ebx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 rows */</span>
-<a name="l04551"></a>04551                         <span class="comment">/* --- */</span>
-<a name="l04552"></a>04552                         <span class="stringliteral">".L10330:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l04553"></a>04553                         <span class="stringliteral">"sub          $4, %%ecx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 columns */</span>
-<a name="l04554"></a>04554                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04555"></a>04555                         <span class="stringliteral">".L10332:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l04556"></a>04556                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
-<a name="l04557"></a>04557                         <span class="comment">/* --- 1 */</span>
-<a name="l04558"></a>04558                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04559"></a>04559                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04560"></a>04560                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04561"></a>04561                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04562"></a>04562                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04563"></a>04563                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04564"></a>04564                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04565"></a>04565                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04566"></a>04566                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04567"></a>04567                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04568"></a>04568                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04569"></a>04569                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04570"></a>04570                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04571"></a>04571                         <span class="comment">/* --- 2 */</span>
-<a name="l04572"></a>04572                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04573"></a>04573                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04574"></a>04574                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04575"></a>04575                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04576"></a>04576                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04577"></a>04577                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04578"></a>04578                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04579"></a>04579                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04580"></a>04580                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04581"></a>04581                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04582"></a>04582                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04583"></a>04583                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04584"></a>04584                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04585"></a>04585                         <span class="comment">/* --- 3 */</span>
-<a name="l04586"></a>04586                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04587"></a>04587                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04588"></a>04588                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04589"></a>04589                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04590"></a>04590                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04591"></a>04591                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04592"></a>04592                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04593"></a>04593                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04594"></a>04594                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04595"></a>04595                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04596"></a>04596                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04597"></a>04597                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04598"></a>04598                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04599"></a>04599                         <span class="comment">/* --- 4 */</span>
-<a name="l04600"></a>04600                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04601"></a>04601                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04602"></a>04602                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04603"></a>04603                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04604"></a>04604                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04605"></a>04605                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04606"></a>04606                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04607"></a>04607                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04608"></a>04608                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04609"></a>04609                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04610"></a>04610                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04611"></a>04611                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04612"></a>04612                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04613"></a>04613                         <span class="comment">/* --- 5 */</span>
-<a name="l04614"></a>04614                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04615"></a>04615                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04616"></a>04616                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04617"></a>04617                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04618"></a>04618                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04619"></a>04619                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04620"></a>04620                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04621"></a>04621                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04622"></a>04622                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04623"></a>04623                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04624"></a>04624                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04625"></a>04625                         <span class="comment">/* --- */</span>
-<a name="l04626"></a>04626                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l04627"></a>04627                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l04628"></a>04628                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l04629"></a>04629                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l04630"></a>04630                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l04631"></a>04631                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l04632"></a>04632                         <span class="comment">/* --- */</span>
-<a name="l04633"></a>04633                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EDX in MM1 */</span>
-<a name="l04634"></a>04634                         <span class="stringliteral">"movd      %%ebx, %%mm2 \n\t"</span>   <span class="comment">/* save EDX in MM2 */</span>
-<a name="l04635"></a>04635                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
-<a name="l04636"></a>04636                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* load summation result into EAX */</span>
-<a name="l04637"></a>04637                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
-<a name="l04638"></a>04638                         <span class="stringliteral">"movd      %%mm5, %%ebx \n\t"</span>   <span class="comment">/* load Divisor into EBX */</span>
-<a name="l04639"></a>04639                         <span class="stringliteral">"movd      %%mm7, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l04640"></a>04640                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l04641"></a>04641                         <span class="stringliteral">"movd      %%eax, %%mm7 \n\t"</span>   <span class="comment">/* move result of division into MM7 */</span>
-<a name="l04642"></a>04642                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
-<a name="l04643"></a>04643                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l04644"></a>04644                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l04645"></a>04645                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
-<a name="l04646"></a>04646                         <span class="stringliteral">"movd      %%mm2, %%ebx \n\t"</span>   <span class="comment">/* restore saved EBX */</span>
-<a name="l04647"></a>04647                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
-<a name="l04648"></a>04648                         <span class="comment">/* -- */</span>
-<a name="l04649"></a>04649                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l04650"></a>04650                         <span class="stringliteral">"sub         $72, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
-<a name="l04651"></a>04651                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l04652"></a>04652                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l04653"></a>04653                         <span class="comment">/* --- */</span>
-<a name="l04654"></a>04654                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l04655"></a>04655                         <span class="stringliteral">"jnz            .L10332 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04656"></a>04656                         <span class="stringliteral">"add          $4, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l04657"></a>04657                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l04658"></a>04658                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l04659"></a>04659                         <span class="stringliteral">"jnz            .L10330 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04660"></a>04660                         <span class="comment">/* --- */</span>
-<a name="l04661"></a>04661                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l04662"></a>04662                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l04663"></a>04663                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l04664"></a>04664                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l04665"></a>04665                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l04666"></a>04666                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l04667"></a>04667                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
-<a name="l04668"></a>04668                         );
-<a name="l04669"></a>04669 <span class="preprocessor">#endif</span>
-<a name="l04670"></a>04670 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l04671"></a>04671 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l04672"></a>04672         } <span class="keywordflow">else</span> {
-<a name="l04673"></a>04673                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l04674"></a>04674                 <span class="keywordflow">return</span> (-1);
-<a name="l04675"></a>04675         }
-<a name="l04676"></a>04676 }
-<a name="l04677"></a>04677 
-<a name="l04692"></a><a class="code" href="_s_d_l__image_filter_8h.html#acc177cf891758fdc4bf7533fb266e339">04692</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a363f48e6843fd3f48da53688b89bca48" title="Filter using ConvolveKernel7x7Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel7x7Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
-<a name="l04693"></a>04693                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
-<a name="l04694"></a>04694 {
-<a name="l04695"></a>04695         <span class="comment">/* Validate input parameters */</span>
-<a name="l04696"></a>04696         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l04697"></a>04697                 <span class="keywordflow">return</span>(-1);
-<a name="l04698"></a>04698 
-<a name="l04699"></a>04699         <span class="keywordflow">if</span> ((columns < 7) || (rows < 7) || (Divisor == 0))
-<a name="l04700"></a>04700                 <span class="keywordflow">return</span> (-1);
-<a name="l04701"></a>04701 
-<a name="l04702"></a>04702         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l04703"></a>04703 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l04704"></a>04704 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l04705"></a>04705 <span class="preprocessor"></span>                __asm
-<a name="l04706"></a>04706                 {
-<a name="l04707"></a>04707                         pusha
-<a name="l04708"></a>04708                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l04709"></a>04709                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l04710"></a>04710                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
-<a name="l04711"></a>04711                                 movd mm5, ebx           <span class="comment">/* copy Divisor into MM5 */</span>
-<a name="l04712"></a>04712                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l04713"></a>04713                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
-<a name="l04714"></a>04714                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l04715"></a>04715                                 add edi, 3      <span class="comment">/* 3 column offset from the left edge */</span>
-<a name="l04716"></a>04716                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l04717"></a>04717                                 add edi, eax    <span class="comment">/* 3 row offset from the top edge */</span>
-<a name="l04718"></a>04718                                 add edi, eax
-<a name="l04719"></a>04719                                 add edi, eax
-<a name="l04720"></a>04720                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l04721"></a>04721                                 sub ebx, 6      <span class="comment">/* do not use first 3 and last 3 rows */</span>
-<a name="l04722"></a>04722                                 <span class="comment">/* ---, */</span>
-<a name="l04723"></a>04723 L10340:
-<a name="l04724"></a>04724                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l04725"></a>04725                                 sub ecx, 6      <span class="comment">/* do not use first 3 and last 3 columns */</span>
-<a name="l04726"></a>04726                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04727"></a>04727 L10342:
-<a name="l04728"></a>04728                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l04729"></a>04729                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
-<a name="l04730"></a>04730                                 <span class="comment">/* --- 1 */</span>
-<a name="l04731"></a>04731                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04732"></a>04732                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04733"></a>04733                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04734"></a>04734                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04735"></a>04735                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04736"></a>04736                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04737"></a>04737                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04738"></a>04738                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04739"></a>04739                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04740"></a>04740                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04741"></a>04741                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04742"></a>04742                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04743"></a>04743                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04744"></a>04744                                 <span class="comment">/* --- 2 */</span>
-<a name="l04745"></a>04745                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04746"></a>04746                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04747"></a>04747                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04748"></a>04748                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04749"></a>04749                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04750"></a>04750                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04751"></a>04751                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04752"></a>04752                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04753"></a>04753                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04754"></a>04754                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04755"></a>04755                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04756"></a>04756                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04757"></a>04757                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04758"></a>04758                                 <span class="comment">/* --- 3 */</span>
-<a name="l04759"></a>04759                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04760"></a>04760                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04761"></a>04761                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04762"></a>04762                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04763"></a>04763                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04764"></a>04764                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04765"></a>04765                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04766"></a>04766                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04767"></a>04767                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04768"></a>04768                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04769"></a>04769                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04770"></a>04770                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04771"></a>04771                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04772"></a>04772                                 <span class="comment">/* --- 4 */</span>
-<a name="l04773"></a>04773                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04774"></a>04774                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04775"></a>04775                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04776"></a>04776                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04777"></a>04777                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04778"></a>04778                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04779"></a>04779                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04780"></a>04780                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04781"></a>04781                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04782"></a>04782                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04783"></a>04783                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04784"></a>04784                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04785"></a>04785                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04786"></a>04786                                 <span class="comment">/* --- 5 */</span>
-<a name="l04787"></a>04787                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04788"></a>04788                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04789"></a>04789                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04790"></a>04790                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04791"></a>04791                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04792"></a>04792                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04793"></a>04793                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04794"></a>04794                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04795"></a>04795                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04796"></a>04796                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04797"></a>04797                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04798"></a>04798                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04799"></a>04799                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04800"></a>04800                                 <span class="comment">/* --- 6 */</span>
-<a name="l04801"></a>04801                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04802"></a>04802                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04803"></a>04803                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04804"></a>04804                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04805"></a>04805                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04806"></a>04806                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04807"></a>04807                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04808"></a>04808                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04809"></a>04809                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04810"></a>04810                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04811"></a>04811                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04812"></a>04812                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04813"></a>04813                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04814"></a>04814                                 <span class="comment">/* --- 7 */</span>
-<a name="l04815"></a>04815                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04816"></a>04816                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04817"></a>04817                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04818"></a>04818                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04819"></a>04819                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04820"></a>04820                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04821"></a>04821                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04822"></a>04822                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l04823"></a>04823                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l04824"></a>04824                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04825"></a>04825                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04826"></a>04826                                 <span class="comment">/* ---, */</span>
-<a name="l04827"></a>04827                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l04828"></a>04828                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l04829"></a>04829                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l04830"></a>04830                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l04831"></a>04831                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l04832"></a>04832                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l04833"></a>04833                                 <span class="comment">/* ---, */</span>
-<a name="l04834"></a>04834                                 movd mm1, eax           <span class="comment">/* save EDX in MM1 */</span>
-<a name="l04835"></a>04835                                 movd mm2, ebx           <span class="comment">/* save EDX in MM2 */</span>
-<a name="l04836"></a>04836                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
-<a name="l04837"></a>04837                                 movd eax, mm7           <span class="comment">/* load summation result into EAX */</span>
-<a name="l04838"></a>04838                                 psraw mm7, 15           <span class="comment">/* spread sign bit of the result */</span>
-<a name="l04839"></a>04839                                 movd ebx, mm5           <span class="comment">/* load Divisor into EBX */</span>
-<a name="l04840"></a>04840                                 movd edx, mm7           <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l04841"></a>04841                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l04842"></a>04842                                 movd mm7, eax           <span class="comment">/* move result of division into MM7 */</span>
-<a name="l04843"></a>04843                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
-<a name="l04844"></a>04844                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l04845"></a>04845                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l04846"></a>04846                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
-<a name="l04847"></a>04847                                 movd ebx, mm2           <span class="comment">/* restore saved EBX */</span>
-<a name="l04848"></a>04848                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
-<a name="l04849"></a>04849                                 <span class="comment">/* --, */</span>
-<a name="l04850"></a>04850                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l04851"></a>04851                                 sub edx, 104    <span class="comment">/* EDX = Kernel address */</span>
-<a name="l04852"></a>04852                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l04853"></a>04853                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l04854"></a>04854                                 <span class="comment">/* ---, */</span>
-<a name="l04855"></a>04855                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l04856"></a>04856                                 jnz            L10342           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04857"></a>04857                                 add esi, 6      <span class="comment">/* move to the next row in Src */</span>
-<a name="l04858"></a>04858                                 add edi, 6      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l04859"></a>04859                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l04860"></a>04860                                 jnz            L10340           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l04861"></a>04861                                 <span class="comment">/* ---, */</span>
-<a name="l04862"></a>04862                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l04863"></a>04863                                 popa
-<a name="l04864"></a>04864                 }
-<a name="l04865"></a>04865 <span class="preprocessor">#else</span>
-<a name="l04866"></a>04866 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l04867"></a>04867                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l04868"></a>04868                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l04869"></a>04869                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
-<a name="l04870"></a>04870                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy Divisor into MM5 */</span>
-<a name="l04871"></a>04871                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l04872"></a>04872                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
-<a name="l04873"></a>04873                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l04874"></a>04874                         <span class="stringliteral">"add          $3, %%edi \n\t"</span>   <span class="comment">/* 3 column offset from the left edge */</span>
-<a name="l04875"></a>04875                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l04876"></a>04876                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 3 row offset from the top edge */</span>
-<a name="l04877"></a>04877                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>       <span class="comment">/* initialize ROWS counter */</span>
-<a name="l04878"></a>04878                         <span class="stringliteral">"sub          $6, %%ebx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 rows */</span>
-<a name="l04879"></a>04879                         <span class="comment">/* --- */</span>
-<a name="l04880"></a>04880                         <span class="stringliteral">".L10340:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l04881"></a>04881                         <span class="stringliteral">"sub          $6, %%ecx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 columns */</span>
-<a name="l04882"></a>04882                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l04883"></a>04883                         <span class="stringliteral">".L10342:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l04884"></a>04884                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
-<a name="l04885"></a>04885                         <span class="comment">/* --- 1 */</span>
-<a name="l04886"></a>04886                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04887"></a>04887                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04888"></a>04888                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04889"></a>04889                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04890"></a>04890                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04891"></a>04891                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04892"></a>04892                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04893"></a>04893                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04894"></a>04894                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04895"></a>04895                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04896"></a>04896                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04897"></a>04897                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04898"></a>04898                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04899"></a>04899                         <span class="comment">/* --- 2 */</span>
-<a name="l04900"></a>04900                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04901"></a>04901                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04902"></a>04902                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04903"></a>04903                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04904"></a>04904                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04905"></a>04905                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04906"></a>04906                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04907"></a>04907                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04908"></a>04908                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04909"></a>04909                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04910"></a>04910                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04911"></a>04911                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04912"></a>04912                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04913"></a>04913                         <span class="comment">/* --- 3 */</span>
-<a name="l04914"></a>04914                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04915"></a>04915                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04916"></a>04916                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04917"></a>04917                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04918"></a>04918                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04919"></a>04919                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04920"></a>04920                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04921"></a>04921                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04922"></a>04922                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04923"></a>04923                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04924"></a>04924                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04925"></a>04925                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04926"></a>04926                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04927"></a>04927                         <span class="comment">/* --- 4 */</span>
-<a name="l04928"></a>04928                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04929"></a>04929                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04930"></a>04930                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04931"></a>04931                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04932"></a>04932                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04933"></a>04933                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04934"></a>04934                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04935"></a>04935                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04936"></a>04936                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04937"></a>04937                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04938"></a>04938                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04939"></a>04939                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04940"></a>04940                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04941"></a>04941                         <span class="comment">/* --- 5 */</span>
-<a name="l04942"></a>04942                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04943"></a>04943                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04944"></a>04944                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04945"></a>04945                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04946"></a>04946                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04947"></a>04947                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04948"></a>04948                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04949"></a>04949                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04950"></a>04950                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04951"></a>04951                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04952"></a>04952                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04953"></a>04953                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04954"></a>04954                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04955"></a>04955                         <span class="comment">/* --- 6 */</span>
-<a name="l04956"></a>04956                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04957"></a>04957                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04958"></a>04958                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l04959"></a>04959                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04960"></a>04960                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04961"></a>04961                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04962"></a>04962                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04963"></a>04963                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04964"></a>04964                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04965"></a>04965                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04966"></a>04966                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04967"></a>04967                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04968"></a>04968                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04969"></a>04969                         <span class="comment">/* --- 7 */</span>
-<a name="l04970"></a>04970                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l04971"></a>04971                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l04972"></a>04972                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04973"></a>04973                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l04974"></a>04974                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l04975"></a>04975                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l04976"></a>04976                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l04977"></a>04977                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l04978"></a>04978                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l04979"></a>04979                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l04980"></a>04980                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l04981"></a>04981                         <span class="comment">/* --- */</span>
-<a name="l04982"></a>04982                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l04983"></a>04983                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l04984"></a>04984                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l04985"></a>04985                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l04986"></a>04986                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l04987"></a>04987                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l04988"></a>04988                         <span class="comment">/* --- */</span>
-<a name="l04989"></a>04989                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EDX in MM1 */</span>
-<a name="l04990"></a>04990                         <span class="stringliteral">"movd      %%ebx, %%mm2 \n\t"</span>   <span class="comment">/* save EDX in MM2 */</span>
-<a name="l04991"></a>04991                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
-<a name="l04992"></a>04992                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* load summation result into EAX */</span>
-<a name="l04993"></a>04993                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
-<a name="l04994"></a>04994                         <span class="stringliteral">"movd      %%mm5, %%ebx \n\t"</span>   <span class="comment">/* load Divisor into EBX */</span>
-<a name="l04995"></a>04995                         <span class="stringliteral">"movd      %%mm7, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l04996"></a>04996                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l04997"></a>04997                         <span class="stringliteral">"movd      %%eax, %%mm7 \n\t"</span>   <span class="comment">/* move result of division into MM7 */</span>
-<a name="l04998"></a>04998                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
-<a name="l04999"></a>04999                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l05000"></a>05000                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l05001"></a>05001                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
-<a name="l05002"></a>05002                         <span class="stringliteral">"movd      %%mm2, %%ebx \n\t"</span>   <span class="comment">/* restore saved EBX */</span>
-<a name="l05003"></a>05003                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
-<a name="l05004"></a>05004                         <span class="comment">/* -- */</span>
-<a name="l05005"></a>05005                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l05006"></a>05006                         <span class="stringliteral">"sub        $104, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
-<a name="l05007"></a>05007                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l05008"></a>05008                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l05009"></a>05009                         <span class="comment">/* --- */</span>
-<a name="l05010"></a>05010                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l05011"></a>05011                         <span class="stringliteral">"jnz            .L10342 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05012"></a>05012                         <span class="stringliteral">"add          $6, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l05013"></a>05013                         <span class="stringliteral">"add          $6, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l05014"></a>05014                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l05015"></a>05015                         <span class="stringliteral">"jnz            .L10340 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05016"></a>05016                         <span class="comment">/* --- */</span>
-<a name="l05017"></a>05017                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l05018"></a>05018                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l05019"></a>05019                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l05020"></a>05020                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l05021"></a>05021                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l05022"></a>05022                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l05023"></a>05023                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
-<a name="l05024"></a>05024                         );
-<a name="l05025"></a>05025 <span class="preprocessor">#endif</span>
-<a name="l05026"></a>05026 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l05027"></a>05027 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l05028"></a>05028         } <span class="keywordflow">else</span> {
-<a name="l05029"></a>05029                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l05030"></a>05030                 <span class="keywordflow">return</span> (-1);
-<a name="l05031"></a>05031         }
-<a name="l05032"></a>05032 }
-<a name="l05033"></a>05033 
-<a name="l05048"></a><a class="code" href="_s_d_l__image_filter_8h.html#af8a8114acd0509787ae5265990049720">05048</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ae1e91ff193beed110a71119ec901f09d" title="Filter using ConvolveKernel9x9Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel9x9Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
-<a name="l05049"></a>05049                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
-<a name="l05050"></a>05050 {
-<a name="l05051"></a>05051         <span class="comment">/* Validate input parameters */</span>
-<a name="l05052"></a>05052         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l05053"></a>05053                 <span class="keywordflow">return</span>(-1);
-<a name="l05054"></a>05054 
-<a name="l05055"></a>05055         <span class="keywordflow">if</span> ((columns < 9) || (rows < 9) || (Divisor == 0))
-<a name="l05056"></a>05056                 <span class="keywordflow">return</span> (-1);
-<a name="l05057"></a>05057 
-<a name="l05058"></a>05058         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l05059"></a>05059 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l05060"></a>05060 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l05061"></a>05061 <span class="preprocessor"></span>                __asm
-<a name="l05062"></a>05062                 {
-<a name="l05063"></a>05063                         pusha
-<a name="l05064"></a>05064                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l05065"></a>05065                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l05066"></a>05066                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
-<a name="l05067"></a>05067                                 movd mm5, ebx           <span class="comment">/* copy Divisor into MM5 */</span>
-<a name="l05068"></a>05068                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l05069"></a>05069                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
-<a name="l05070"></a>05070                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l05071"></a>05071                                 add edi, 4      <span class="comment">/* 4 column offset from the left edge */</span>
-<a name="l05072"></a>05072                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l05073"></a>05073                                 add edi, eax    <span class="comment">/* 4 row offset from the top edge */</span>
-<a name="l05074"></a>05074                                 add edi, eax
-<a name="l05075"></a>05075                                 add edi, eax
-<a name="l05076"></a>05076                                 add edi, eax
-<a name="l05077"></a>05077                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l05078"></a>05078                                 sub ebx, 8      <span class="comment">/* do not use first 4 and last 4 rows */</span>
-<a name="l05079"></a>05079                                 <span class="comment">/* ---, */</span>
-<a name="l05080"></a>05080 L10350:
-<a name="l05081"></a>05081                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l05082"></a>05082                                 sub ecx, 8      <span class="comment">/* do not use first 4 and last 4 columns */</span>
-<a name="l05083"></a>05083                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l05084"></a>05084 L10352:
-<a name="l05085"></a>05085                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l05086"></a>05086                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
-<a name="l05087"></a>05087                                 <span class="comment">/* --- 1 */</span>
-<a name="l05088"></a>05088                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05089"></a>05089                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05090"></a>05090                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05091"></a>05091                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05092"></a>05092                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05093"></a>05093                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05094"></a>05094                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05095"></a>05095                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05096"></a>05096                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05097"></a>05097                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05098"></a>05098                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05099"></a>05099                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05100"></a>05100                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05101"></a>05101                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05102"></a>05102                         dec              esi
-<a name="l05103"></a>05103                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05104"></a>05104                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05105"></a>05105                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05106"></a>05106                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05107"></a>05107                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05108"></a>05108                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05109"></a>05109                                 <span class="comment">/* --- 2 */</span>
-<a name="l05110"></a>05110                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05111"></a>05111                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05112"></a>05112                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05113"></a>05113                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05114"></a>05114                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05115"></a>05115                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05116"></a>05116                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05117"></a>05117                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05118"></a>05118                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05119"></a>05119                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05120"></a>05120                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05121"></a>05121                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05122"></a>05122                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05123"></a>05123                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05124"></a>05124                         dec              esi
-<a name="l05125"></a>05125                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05126"></a>05126                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05127"></a>05127                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05128"></a>05128                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05129"></a>05129                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05130"></a>05130                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05131"></a>05131                                 <span class="comment">/* --- 3 */</span>
-<a name="l05132"></a>05132                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05133"></a>05133                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05134"></a>05134                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05135"></a>05135                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05136"></a>05136                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05137"></a>05137                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05138"></a>05138                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05139"></a>05139                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05140"></a>05140                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05141"></a>05141                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05142"></a>05142                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05143"></a>05143                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05144"></a>05144                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05145"></a>05145                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05146"></a>05146                         dec              esi
-<a name="l05147"></a>05147                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05148"></a>05148                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05149"></a>05149                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05150"></a>05150                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05151"></a>05151                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05152"></a>05152                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05153"></a>05153                                 <span class="comment">/* --- 4 */</span>
-<a name="l05154"></a>05154                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05155"></a>05155                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05156"></a>05156                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05157"></a>05157                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05158"></a>05158                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05159"></a>05159                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05160"></a>05160                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05161"></a>05161                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05162"></a>05162                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05163"></a>05163                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05164"></a>05164                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05165"></a>05165                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05166"></a>05166                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05167"></a>05167                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05168"></a>05168                         dec              esi
-<a name="l05169"></a>05169                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05170"></a>05170                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05171"></a>05171                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05172"></a>05172                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05173"></a>05173                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05174"></a>05174                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05175"></a>05175                                 <span class="comment">/* --- 5 */</span>
-<a name="l05176"></a>05176                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05177"></a>05177                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05178"></a>05178                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05179"></a>05179                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05180"></a>05180                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05181"></a>05181                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05182"></a>05182                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05183"></a>05183                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05184"></a>05184                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05185"></a>05185                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05186"></a>05186                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05187"></a>05187                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05188"></a>05188                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05189"></a>05189                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05190"></a>05190                         dec              esi
-<a name="l05191"></a>05191                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05192"></a>05192                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05193"></a>05193                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05194"></a>05194                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05195"></a>05195                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05196"></a>05196                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05197"></a>05197                                 <span class="comment">/* --- 6 */</span>
-<a name="l05198"></a>05198                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05199"></a>05199                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05200"></a>05200                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05201"></a>05201                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05202"></a>05202                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05203"></a>05203                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05204"></a>05204                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05205"></a>05205                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05206"></a>05206                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05207"></a>05207                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05208"></a>05208                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05209"></a>05209                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05210"></a>05210                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05211"></a>05211                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05212"></a>05212                         dec              esi
-<a name="l05213"></a>05213                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05214"></a>05214                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05215"></a>05215                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05216"></a>05216                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05217"></a>05217                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05218"></a>05218                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05219"></a>05219                                 <span class="comment">/* --- 7 */</span>
-<a name="l05220"></a>05220                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05221"></a>05221                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05222"></a>05222                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05223"></a>05223                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05224"></a>05224                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05225"></a>05225                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05226"></a>05226                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05227"></a>05227                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05228"></a>05228                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05229"></a>05229                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05230"></a>05230                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05231"></a>05231                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05232"></a>05232                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05233"></a>05233                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05234"></a>05234                         dec              esi
-<a name="l05235"></a>05235                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05236"></a>05236                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05237"></a>05237                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05238"></a>05238                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05239"></a>05239                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05240"></a>05240                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05241"></a>05241                                 <span class="comment">/* --- 8 */</span>
-<a name="l05242"></a>05242                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05243"></a>05243                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05244"></a>05244                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05245"></a>05245                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05246"></a>05246                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05247"></a>05247                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05248"></a>05248                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05249"></a>05249                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05250"></a>05250                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05251"></a>05251                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05252"></a>05252                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05253"></a>05253                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05254"></a>05254                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05255"></a>05255                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05256"></a>05256                         dec              esi
-<a name="l05257"></a>05257                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05258"></a>05258                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05259"></a>05259                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05260"></a>05260                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05261"></a>05261                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05262"></a>05262                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05263"></a>05263                                 <span class="comment">/* --- 9 */</span>
-<a name="l05264"></a>05264                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05265"></a>05265                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05266"></a>05266                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05267"></a>05267                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05268"></a>05268                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05269"></a>05269                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05270"></a>05270                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05271"></a>05271                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05272"></a>05272                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05273"></a>05273                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05274"></a>05274                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05275"></a>05275                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05276"></a>05276                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05277"></a>05277                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05278"></a>05278                         movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05279"></a>05279                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05280"></a>05280                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05281"></a>05281                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05282"></a>05282                                 <span class="comment">/* ---, */</span>
-<a name="l05283"></a>05283                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l05284"></a>05284                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l05285"></a>05285                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l05286"></a>05286                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l05287"></a>05287                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l05288"></a>05288                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l05289"></a>05289                                 <span class="comment">/* ---, */</span>
-<a name="l05290"></a>05290                                 movd mm1, eax           <span class="comment">/* save EDX in MM1 */</span>
-<a name="l05291"></a>05291                                 movd mm2, ebx           <span class="comment">/* save EDX in MM2 */</span>
-<a name="l05292"></a>05292                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
-<a name="l05293"></a>05293                                 movd eax, mm7           <span class="comment">/* load summation result into EAX */</span>
-<a name="l05294"></a>05294                                 psraw mm7, 15           <span class="comment">/* spread sign bit of the result */</span>
-<a name="l05295"></a>05295                                 movd ebx, mm5           <span class="comment">/* load Divisor into EBX */</span>
-<a name="l05296"></a>05296                                 movd edx, mm7           <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l05297"></a>05297                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l05298"></a>05298                                 movd mm7, eax           <span class="comment">/* move result of division into MM7 */</span>
-<a name="l05299"></a>05299                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
-<a name="l05300"></a>05300                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l05301"></a>05301                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l05302"></a>05302                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
-<a name="l05303"></a>05303                                 movd ebx, mm2           <span class="comment">/* restore saved EBX */</span>
-<a name="l05304"></a>05304                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
-<a name="l05305"></a>05305                                 <span class="comment">/* --, */</span>
-<a name="l05306"></a>05306                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l05307"></a>05307                                 sub edx, 208    <span class="comment">/* EDX = Kernel address */</span>
-<a name="l05308"></a>05308                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l05309"></a>05309                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l05310"></a>05310                                 <span class="comment">/* ---, */</span>
-<a name="l05311"></a>05311                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l05312"></a>05312                                 jnz            L10352           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05313"></a>05313                                 add esi, 8      <span class="comment">/* move to the next row in Src */</span>
-<a name="l05314"></a>05314                                 add edi, 8      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l05315"></a>05315                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l05316"></a>05316                                 jnz            L10350           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05317"></a>05317                                 <span class="comment">/* ---, */</span>
-<a name="l05318"></a>05318                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l05319"></a>05319                                 popa
-<a name="l05320"></a>05320                 }
-<a name="l05321"></a>05321 <span class="preprocessor">#else</span>
-<a name="l05322"></a>05322 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l05323"></a>05323                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l05324"></a>05324                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l05325"></a>05325                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
-<a name="l05326"></a>05326                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy Divisor into MM5 */</span>
-<a name="l05327"></a>05327                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l05328"></a>05328                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
-<a name="l05329"></a>05329                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l05330"></a>05330                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* 4 column offset from the left edge */</span>
-<a name="l05331"></a>05331                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l05332"></a>05332                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 4 row offset from the top edge */</span>
-<a name="l05333"></a>05333                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span> <span class="comment">/* initialize ROWS counter */</span>
-<a name="l05334"></a>05334                         <span class="stringliteral">"sub          $8, %%ebx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 rows */</span>
-<a name="l05335"></a>05335                         <span class="comment">/* --- */</span>
-<a name="l05336"></a>05336                         <span class="stringliteral">".L10350:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l05337"></a>05337                         <span class="stringliteral">"sub          $8, %%ecx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 columns */</span>
-<a name="l05338"></a>05338                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l05339"></a>05339                         <span class="stringliteral">".L10352:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l05340"></a>05340                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
-<a name="l05341"></a>05341                         <span class="comment">/* --- 1 */</span>
-<a name="l05342"></a>05342                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05343"></a>05343                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05344"></a>05344                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05345"></a>05345                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05346"></a>05346                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05347"></a>05347                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05348"></a>05348                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05349"></a>05349                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05350"></a>05350                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05351"></a>05351                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05352"></a>05352                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05353"></a>05353                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05354"></a>05354                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05355"></a>05355                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05356"></a>05356                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05357"></a>05357                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05358"></a>05358                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05359"></a>05359                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05360"></a>05360                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05361"></a>05361                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05362"></a>05362                         <span class="comment">/* --- 2 */</span>
-<a name="l05363"></a>05363                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05364"></a>05364                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05365"></a>05365                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05366"></a>05366                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05367"></a>05367                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05368"></a>05368                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05369"></a>05369                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05370"></a>05370                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05371"></a>05371                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05372"></a>05372                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05373"></a>05373                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05374"></a>05374                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05375"></a>05375                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05376"></a>05376                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05377"></a>05377                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05378"></a>05378                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05379"></a>05379                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05380"></a>05380                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05381"></a>05381                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05382"></a>05382                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05383"></a>05383                         <span class="comment">/* --- 3 */</span>
-<a name="l05384"></a>05384                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05385"></a>05385                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05386"></a>05386                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05387"></a>05387                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05388"></a>05388                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05389"></a>05389                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05390"></a>05390                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05391"></a>05391                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05392"></a>05392                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05393"></a>05393                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05394"></a>05394                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05395"></a>05395                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05396"></a>05396                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05397"></a>05397                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05398"></a>05398                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05399"></a>05399                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05400"></a>05400                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05401"></a>05401                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05402"></a>05402                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05403"></a>05403                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05404"></a>05404                         <span class="comment">/* --- 4 */</span>
-<a name="l05405"></a>05405                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05406"></a>05406                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05407"></a>05407                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05408"></a>05408                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05409"></a>05409                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05410"></a>05410                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05411"></a>05411                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05412"></a>05412                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05413"></a>05413                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05414"></a>05414                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05415"></a>05415                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05416"></a>05416                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05417"></a>05417                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05418"></a>05418                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05419"></a>05419                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05420"></a>05420                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05421"></a>05421                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05422"></a>05422                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05423"></a>05423                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05424"></a>05424                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05425"></a>05425                         <span class="comment">/* --- 5 */</span>
-<a name="l05426"></a>05426                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05427"></a>05427                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05428"></a>05428                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05429"></a>05429                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05430"></a>05430                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05431"></a>05431                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05432"></a>05432                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05433"></a>05433                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05434"></a>05434                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05435"></a>05435                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05436"></a>05436                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05437"></a>05437                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05438"></a>05438                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05439"></a>05439                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05440"></a>05440                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05441"></a>05441                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05442"></a>05442                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05443"></a>05443                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05444"></a>05444                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05445"></a>05445                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05446"></a>05446                         <span class="comment">/* --- 6 */</span>
-<a name="l05447"></a>05447                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05448"></a>05448                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05449"></a>05449                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05450"></a>05450                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05451"></a>05451                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05452"></a>05452                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05453"></a>05453                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05454"></a>05454                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05455"></a>05455                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05456"></a>05456                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05457"></a>05457                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05458"></a>05458                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05459"></a>05459                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05460"></a>05460                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05461"></a>05461                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05462"></a>05462                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05463"></a>05463                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05464"></a>05464                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05465"></a>05465                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05466"></a>05466                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05467"></a>05467                         <span class="comment">/* --- 7 */</span>
-<a name="l05468"></a>05468                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05469"></a>05469                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05470"></a>05470                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05471"></a>05471                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05472"></a>05472                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05473"></a>05473                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05474"></a>05474                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05475"></a>05475                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05476"></a>05476                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05477"></a>05477                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05478"></a>05478                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05479"></a>05479                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05480"></a>05480                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05481"></a>05481                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05482"></a>05482                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05483"></a>05483                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05484"></a>05484                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05485"></a>05485                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05486"></a>05486                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05487"></a>05487                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05488"></a>05488                         <span class="comment">/* --- 8 */</span>
-<a name="l05489"></a>05489                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05490"></a>05490                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05491"></a>05491                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05492"></a>05492                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05493"></a>05493                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05494"></a>05494                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05495"></a>05495                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05496"></a>05496                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05497"></a>05497                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05498"></a>05498                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05499"></a>05499                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05500"></a>05500                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05501"></a>05501                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05502"></a>05502                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05503"></a>05503                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05504"></a>05504                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05505"></a>05505                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05506"></a>05506                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05507"></a>05507                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05508"></a>05508                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05509"></a>05509                         <span class="comment">/* --- 9 */</span>
-<a name="l05510"></a>05510                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05511"></a>05511                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05512"></a>05512                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l05513"></a>05513                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05514"></a>05514                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05515"></a>05515                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05516"></a>05516                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05517"></a>05517                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05518"></a>05518                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05519"></a>05519                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05520"></a>05520                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05521"></a>05521                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05522"></a>05522                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05523"></a>05523                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05524"></a>05524                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05525"></a>05525                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05526"></a>05526                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05527"></a>05527                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05528"></a>05528                         <span class="comment">/* --- */</span>
-<a name="l05529"></a>05529                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l05530"></a>05530                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l05531"></a>05531                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l05532"></a>05532                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l05533"></a>05533                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l05534"></a>05534                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l05535"></a>05535                         <span class="comment">/* --- */</span>
-<a name="l05536"></a>05536                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EDX in MM1 */</span>
-<a name="l05537"></a>05537                         <span class="stringliteral">"movd      %%ebx, %%mm2 \n\t"</span>   <span class="comment">/* save EDX in MM2 */</span>
-<a name="l05538"></a>05538                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
-<a name="l05539"></a>05539                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* load summation result into EAX */</span>
-<a name="l05540"></a>05540                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
-<a name="l05541"></a>05541                         <span class="stringliteral">"movd      %%mm5, %%ebx \n\t"</span>   <span class="comment">/* load Divisor into EBX */</span>
-<a name="l05542"></a>05542                         <span class="stringliteral">"movd      %%mm7, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
-<a name="l05543"></a>05543                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
-<a name="l05544"></a>05544                         <span class="stringliteral">"movd      %%eax, %%mm7 \n\t"</span>   <span class="comment">/* move result of division into MM7 */</span>
-<a name="l05545"></a>05545                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
-<a name="l05546"></a>05546                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l05547"></a>05547                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l05548"></a>05548                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
-<a name="l05549"></a>05549                         <span class="stringliteral">"movd      %%mm2, %%ebx \n\t"</span>   <span class="comment">/* restore saved EBX */</span>
-<a name="l05550"></a>05550                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
-<a name="l05551"></a>05551                         <span class="comment">/* -- */</span>
-<a name="l05552"></a>05552                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l05553"></a>05553                         <span class="stringliteral">"sub        $208, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
-<a name="l05554"></a>05554                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l05555"></a>05555                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l05556"></a>05556                         <span class="comment">/* --- */</span>
-<a name="l05557"></a>05557                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l05558"></a>05558                         <span class="stringliteral">"jnz            .L10352 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05559"></a>05559                         <span class="stringliteral">"add          $8, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l05560"></a>05560                         <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l05561"></a>05561                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l05562"></a>05562                         <span class="stringliteral">"jnz            .L10350 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05563"></a>05563                         <span class="comment">/* --- */</span>
-<a name="l05564"></a>05564                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l05565"></a>05565                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l05566"></a>05566                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l05567"></a>05567                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l05568"></a>05568                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l05569"></a>05569                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l05570"></a>05570                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
-<a name="l05571"></a>05571                         );
-<a name="l05572"></a>05572 <span class="preprocessor">#endif</span>
-<a name="l05573"></a>05573 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l05574"></a>05574 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l05575"></a>05575         } <span class="keywordflow">else</span> {
-<a name="l05576"></a>05576                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l05577"></a>05577                 <span class="keywordflow">return</span> (-1);
-<a name="l05578"></a>05578         }
-<a name="l05579"></a>05579 }
-<a name="l05580"></a>05580 
-<a name="l05595"></a><a class="code" href="_s_d_l__image_filter_8h.html#a67929babce179e1e333c5cd2e5fc4091">05595</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ac329e5a3b60351768c96c94db9f9cf97" title="Filter using ConvolveKernel3x3ShiftRight: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel3x3ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</sp [...]
-<a name="l05596"></a>05596                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
-<a name="l05597"></a>05597 {
-<a name="l05598"></a>05598         <span class="comment">/* Validate input parameters */</span>
-<a name="l05599"></a>05599         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l05600"></a>05600                 <span class="keywordflow">return</span>(-1);
-<a name="l05601"></a>05601 
-<a name="l05602"></a>05602         <span class="keywordflow">if</span> ((columns < 3) || (rows < 3) || (NRightShift > 7))
-<a name="l05603"></a>05603                 <span class="keywordflow">return</span> (-1);
-<a name="l05604"></a>05604 
-<a name="l05605"></a>05605         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l05606"></a>05606 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l05607"></a>05607 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l05608"></a>05608 <span class="preprocessor"></span>                __asm
-<a name="l05609"></a>05609                 {
-<a name="l05610"></a>05610                         pusha
-<a name="l05611"></a>05611                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l05612"></a>05612                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l05613"></a>05613                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
-<a name="l05614"></a>05614                                 movd mm4, ebx           <span class="comment">/* copy NRightShift into MM4 */</span>
-<a name="l05615"></a>05615                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l05616"></a>05616                                 movq mm5, [edx]         <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
-<a name="l05617"></a>05617                         add edx, 8      <span class="comment">/* second row              |K0 K1 K2 0| */</span>
-<a name="l05618"></a>05618                                 movq mm6, [edx]         <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
-<a name="l05619"></a>05619                         add edx, 8      <span class="comment">/* third row               |K6 K7 K8 0| */</span>
-<a name="l05620"></a>05620                                 movq mm7, [edx]         <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
-<a name="l05621"></a>05621                         <span class="comment">/* ---, */</span>
-<a name="l05622"></a>05622                         mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l05623"></a>05623                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l05624"></a>05624                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l05625"></a>05625                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l05626"></a>05626                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l05627"></a>05627                                 mov edx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l05628"></a>05628                                 sub edx, 2      <span class="comment">/* do not use first and last row */</span>
-<a name="l05629"></a>05629                                 <span class="comment">/* ---, */</span>
-<a name="l05630"></a>05630 L10360:
-<a name="l05631"></a>05631                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l05632"></a>05632                                 sub ecx, 2      <span class="comment">/* do not use first and last column */</span>
-<a name="l05633"></a>05633                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l05634"></a>05634 L10362:
-<a name="l05635"></a>05635                         <span class="comment">/* ---, */</span>
-<a name="l05636"></a>05636                         movq mm1, [esi]         <span class="comment">/* load 8 bytes of the image first row */</span>
-<a name="l05637"></a>05637                         add esi, eax    <span class="comment">/* move one row below */</span>
-<a name="l05638"></a>05638                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes of the image second row */</span>
-<a name="l05639"></a>05639                         add esi, eax    <span class="comment">/* move one row below */</span>
-<a name="l05640"></a>05640                                 movq mm3, [esi]         <span class="comment">/* load 8 bytes of the image third row */</span>
-<a name="l05641"></a>05641                         punpcklbw mm1, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l05642"></a>05642                                 punpcklbw mm2, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l05643"></a>05643                                 punpcklbw mm3, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l05644"></a>05644                                 psrlw mm1, mm4          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05645"></a>05645                                 psrlw mm2, mm4          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05646"></a>05646                                 psrlw mm3, mm4          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05647"></a>05647                                 pmullw mm1, mm5         <span class="comment">/* multiply words first row  image*Kernel */</span>
-<a name="l05648"></a>05648                                 pmullw mm2, mm6         <span class="comment">/* multiply words second row image*Kernel */</span>
-<a name="l05649"></a>05649                                 pmullw mm3, mm7         <span class="comment">/* multiply words third row  image*Kernel */</span>
-<a name="l05650"></a>05650                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the first and second rows */</span>
-<a name="l05651"></a>05651                                 paddsw mm1, mm3         <span class="comment">/* add 4 words of the third row and result */</span>
-<a name="l05652"></a>05652                                 movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05653"></a>05653                                 psrlq mm1, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l05654"></a>05654                                 paddsw mm1, mm2         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l05655"></a>05655                                 movq mm3, mm1           <span class="comment">/* copy MM1 into MM3 */</span>
-<a name="l05656"></a>05656                                 psrlq mm1, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l05657"></a>05657                                 paddsw mm1, mm3         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l05658"></a>05658                                 packuswb mm1, mm0       <span class="comment">/* pack shift result with saturation */</span>
-<a name="l05659"></a>05659                                 movd ebx, mm1           <span class="comment">/* copy saturated result into EBX */</span>
-<a name="l05660"></a>05660                                 mov [edi], bl           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l05661"></a>05661                                 <span class="comment">/* --, */</span>
-<a name="l05662"></a>05662                                 sub esi, eax    <span class="comment">/* move two rows up */</span>
-<a name="l05663"></a>05663                                 sub esi, eax
-<a name="l05664"></a>05664                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l05665"></a>05665                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l05666"></a>05666                                 <span class="comment">/* ---, */</span>
-<a name="l05667"></a>05667                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l05668"></a>05668                                 jnz            L10362           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05669"></a>05669                                 add esi, 2      <span class="comment">/* move to the next row in Src */</span>
-<a name="l05670"></a>05670                                 add edi, 2      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l05671"></a>05671                                 dec              edx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l05672"></a>05672                                 jnz            L10360           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05673"></a>05673                                 <span class="comment">/* ---, */</span>
-<a name="l05674"></a>05674                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l05675"></a>05675                                 popa
-<a name="l05676"></a>05676                 }
-<a name="l05677"></a>05677 <span class="preprocessor">#else</span>
-<a name="l05678"></a>05678 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l05679"></a>05679                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l05680"></a>05680                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l05681"></a>05681                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
-<a name="l05682"></a>05682                         <span class="stringliteral">"movd      %%ebx, %%mm4 \n\t"</span>   <span class="comment">/* copy NRightShift into MM4 */</span>
-<a name="l05683"></a>05683                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l05684"></a>05684                         <span class="stringliteral">"movq    (%%edx), %%mm5 \n\t"</span>   <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
-<a name="l05685"></a>05685                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* second row              |K0 K1 K2 0| */</span>
-<a name="l05686"></a>05686                         <span class="stringliteral">"movq    (%%edx), %%mm6 \n\t"</span>   <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
-<a name="l05687"></a>05687                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* third row               |K6 K7 K8 0| */</span>
-<a name="l05688"></a>05688                         <span class="stringliteral">"movq    (%%edx), %%mm7 \n\t"</span>   <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
-<a name="l05689"></a>05689                         <span class="comment">/* --- */</span>
-<a name="l05690"></a>05690                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l05691"></a>05691                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l05692"></a>05692                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l05693"></a>05693                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l05694"></a>05694                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l05695"></a>05695                         <span class="stringliteral">"mov          %2, %%edx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
-<a name="l05696"></a>05696                         <span class="stringliteral">"sub          $2, %%edx \n\t"</span>   <span class="comment">/* do not use first and last row */</span>
-<a name="l05697"></a>05697                         <span class="comment">/* --- */</span>
-<a name="l05698"></a>05698                         <span class="stringliteral">".L10360:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l05699"></a>05699                         <span class="stringliteral">"sub          $2, %%ecx \n\t"</span>   <span class="comment">/* do not use first and last column */</span>
-<a name="l05700"></a>05700                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l05701"></a>05701                         <span class="stringliteral">".L10362:               \n\t"</span>
-<a name="l05702"></a>05702                         <span class="comment">/* --- */</span>
-<a name="l05703"></a>05703                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the image first row */</span>
-<a name="l05704"></a>05704                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
-<a name="l05705"></a>05705                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes of the image second row */</span>
-<a name="l05706"></a>05706                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
-<a name="l05707"></a>05707                         <span class="stringliteral">"movq    (%%esi), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes of the image third row */</span>
-<a name="l05708"></a>05708                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l05709"></a>05709                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l05710"></a>05710                         <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
-<a name="l05711"></a>05711                         <span class="stringliteral">"psrlw     %%mm4, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05712"></a>05712                         <span class="stringliteral">"psrlw     %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05713"></a>05713                         <span class="stringliteral">"psrlw     %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05714"></a>05714                         <span class="stringliteral">"pmullw    %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* multiply words first row  image*Kernel */</span>
-<a name="l05715"></a>05715                         <span class="stringliteral">"pmullw    %%mm6, %%mm2 \n\t"</span>   <span class="comment">/* multiply words second row image*Kernel */</span>
-<a name="l05716"></a>05716                         <span class="stringliteral">"pmullw    %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* multiply words third row  image*Kernel */</span>
-<a name="l05717"></a>05717                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the first and second rows */</span>
-<a name="l05718"></a>05718                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the third row and result */</span>
-<a name="l05719"></a>05719                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05720"></a>05720                         <span class="stringliteral">"psrlq       $32, %%mm1 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l05721"></a>05721                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l05722"></a>05722                         <span class="stringliteral">"movq      %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* copy MM1 into MM3 */</span>
-<a name="l05723"></a>05723                         <span class="stringliteral">"psrlq       $16, %%mm1 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l05724"></a>05724                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l05725"></a>05725                         <span class="stringliteral">"packuswb  %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* pack shift result with saturation */</span>
-<a name="l05726"></a>05726                         <span class="stringliteral">"movd      %%mm1, %%ebx \n\t"</span>   <span class="comment">/* copy saturated result into EBX */</span>
-<a name="l05727"></a>05727                         <span class="stringliteral">"mov      %%bl, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l05728"></a>05728                         <span class="comment">/* -- */</span>
-<a name="l05729"></a>05729                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move two rows up */</span>
-<a name="l05730"></a>05730                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span> <span class="stringliteral">"inc              %%esi \n\t"</span>     <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l05731"></a>05731                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l05732"></a>05732                         <span class="comment">/* --- */</span>
-<a name="l05733"></a>05733                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l05734"></a>05734                         <span class="stringliteral">"jnz            .L10362 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05735"></a>05735                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l05736"></a>05736                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l05737"></a>05737                         <span class="stringliteral">"dec              %%edx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l05738"></a>05738                         <span class="stringliteral">"jnz            .L10360 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05739"></a>05739                         <span class="comment">/* --- */</span>
-<a name="l05740"></a>05740                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l05741"></a>05741                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l05742"></a>05742                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l05743"></a>05743                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l05744"></a>05744                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l05745"></a>05745                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l05746"></a>05746                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
-<a name="l05747"></a>05747                         );
-<a name="l05748"></a>05748 <span class="preprocessor">#endif</span>
-<a name="l05749"></a>05749 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l05750"></a>05750 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l05751"></a>05751         } <span class="keywordflow">else</span> {
-<a name="l05752"></a>05752                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l05753"></a>05753                 <span class="keywordflow">return</span> (-1);
-<a name="l05754"></a>05754         }
-<a name="l05755"></a>05755 }
-<a name="l05756"></a>05756 
-<a name="l05771"></a><a class="code" href="_s_d_l__image_filter_8h.html#a9aaa45452b04f51f52826c2104ea3b85">05771</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5253738dc4c892352b078d9a7dec2b20" title="Filter using ConvolveKernel5x5ShiftRight: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel5x5ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</sp [...]
-<a name="l05772"></a>05772                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
-<a name="l05773"></a>05773 {
-<a name="l05774"></a>05774         <span class="comment">/* Validate input parameters */</span>
-<a name="l05775"></a>05775         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l05776"></a>05776                 <span class="keywordflow">return</span>(-1);
-<a name="l05777"></a>05777 
-<a name="l05778"></a>05778         <span class="keywordflow">if</span> ((columns < 5) || (rows < 5) || (NRightShift > 7))
-<a name="l05779"></a>05779                 <span class="keywordflow">return</span> (-1);
-<a name="l05780"></a>05780 
-<a name="l05781"></a>05781         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l05782"></a>05782 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l05783"></a>05783 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l05784"></a>05784 <span class="preprocessor"></span>                __asm
-<a name="l05785"></a>05785                 {
-<a name="l05786"></a>05786                         pusha
-<a name="l05787"></a>05787                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l05788"></a>05788                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l05789"></a>05789                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
-<a name="l05790"></a>05790                                 movd mm5, ebx           <span class="comment">/* copy NRightShift into MM5 */</span>
-<a name="l05791"></a>05791                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l05792"></a>05792                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
-<a name="l05793"></a>05793                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l05794"></a>05794                                 add edi, 2      <span class="comment">/* 2 column offset from the left edge */</span>
-<a name="l05795"></a>05795                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l05796"></a>05796                                 shl eax, 1      <span class="comment">/* EAX = columns * 2 */</span>
-<a name="l05797"></a>05797                                 add edi, eax    <span class="comment">/* 2 row offset from the top edge */</span>
-<a name="l05798"></a>05798                                 shr eax, 1      <span class="comment">/* EAX = columns */</span>
-<a name="l05799"></a>05799                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l05800"></a>05800                                 sub ebx, 4      <span class="comment">/* do not use first 2 and last 2 rows */</span>
-<a name="l05801"></a>05801                                 <span class="comment">/* ---, */</span>
-<a name="l05802"></a>05802 L10370:
-<a name="l05803"></a>05803                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l05804"></a>05804                                 sub ecx, 4      <span class="comment">/* do not use first 2 and last 2 columns */</span>
-<a name="l05805"></a>05805                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l05806"></a>05806 L10372:
-<a name="l05807"></a>05807                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l05808"></a>05808                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
-<a name="l05809"></a>05809                                 <span class="comment">/* --- 1 */</span>
-<a name="l05810"></a>05810                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05811"></a>05811                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05812"></a>05812                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05813"></a>05813                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05814"></a>05814                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05815"></a>05815                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05816"></a>05816                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05817"></a>05817                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05818"></a>05818                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05819"></a>05819                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05820"></a>05820                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05821"></a>05821                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l05822"></a>05822                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l05823"></a>05823                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05824"></a>05824                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05825"></a>05825                                 <span class="comment">/* --- 2 */</span>
-<a name="l05826"></a>05826                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05827"></a>05827                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05828"></a>05828                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05829"></a>05829                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05830"></a>05830                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05831"></a>05831                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05832"></a>05832                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05833"></a>05833                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05834"></a>05834                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05835"></a>05835                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05836"></a>05836                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05837"></a>05837                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l05838"></a>05838                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l05839"></a>05839                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05840"></a>05840                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05841"></a>05841                                 <span class="comment">/* --- 3 */</span>
-<a name="l05842"></a>05842                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05843"></a>05843                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05844"></a>05844                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05845"></a>05845                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05846"></a>05846                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05847"></a>05847                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05848"></a>05848                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05849"></a>05849                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05850"></a>05850                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05851"></a>05851                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05852"></a>05852                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05853"></a>05853                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l05854"></a>05854                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l05855"></a>05855                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05856"></a>05856                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05857"></a>05857                                 <span class="comment">/* --- 4 */</span>
-<a name="l05858"></a>05858                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05859"></a>05859                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05860"></a>05860                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05861"></a>05861                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05862"></a>05862                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05863"></a>05863                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05864"></a>05864                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05865"></a>05865                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05866"></a>05866                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05867"></a>05867                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05868"></a>05868                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05869"></a>05869                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l05870"></a>05870                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l05871"></a>05871                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05872"></a>05872                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05873"></a>05873                                 <span class="comment">/* --- 5 */</span>
-<a name="l05874"></a>05874                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05875"></a>05875                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05876"></a>05876                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05877"></a>05877                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05878"></a>05878                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05879"></a>05879                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05880"></a>05880                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05881"></a>05881                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05882"></a>05882                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05883"></a>05883                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l05884"></a>05884                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l05885"></a>05885                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05886"></a>05886                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05887"></a>05887                                 <span class="comment">/* ---, */</span>
-<a name="l05888"></a>05888                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l05889"></a>05889                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l05890"></a>05890                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l05891"></a>05891                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l05892"></a>05892                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l05893"></a>05893                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l05894"></a>05894                                 movd mm1, eax           <span class="comment">/* save EAX in MM1 */</span>
-<a name="l05895"></a>05895                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
-<a name="l05896"></a>05896                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l05897"></a>05897                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l05898"></a>05898                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
-<a name="l05899"></a>05899                                 <span class="comment">/* --, */</span>
-<a name="l05900"></a>05900                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l05901"></a>05901                                 sub edx, 72     <span class="comment">/* EDX = Kernel address */</span>
-<a name="l05902"></a>05902                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l05903"></a>05903                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l05904"></a>05904                                 <span class="comment">/* ---, */</span>
-<a name="l05905"></a>05905                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l05906"></a>05906                                 jnz            L10372           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05907"></a>05907                                 add esi, 4      <span class="comment">/* move to the next row in Src */</span>
-<a name="l05908"></a>05908                                 add edi, 4      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l05909"></a>05909                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l05910"></a>05910                                 jnz            L10370           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l05911"></a>05911                                 <span class="comment">/* ---, */</span>
-<a name="l05912"></a>05912                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l05913"></a>05913                                 popa
-<a name="l05914"></a>05914                 }
-<a name="l05915"></a>05915 <span class="preprocessor">#else</span>
-<a name="l05916"></a>05916 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l05917"></a>05917                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l05918"></a>05918                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l05919"></a>05919                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
-<a name="l05920"></a>05920                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy NRightShift into MM5 */</span>
-<a name="l05921"></a>05921                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l05922"></a>05922                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
-<a name="l05923"></a>05923                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l05924"></a>05924                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* 2 column offset from the left edge */</span>
-<a name="l05925"></a>05925                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l05926"></a>05926                         <span class="stringliteral">"shl          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns * 2 */</span>
-<a name="l05927"></a>05927                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 2 row offset from the top edge */</span>
-<a name="l05928"></a>05928                         <span class="stringliteral">"shr          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns */</span>
-<a name="l05929"></a>05929                         <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
-<a name="l05930"></a>05930                         <span class="stringliteral">"sub          $4, %%ebx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 rows */</span>
-<a name="l05931"></a>05931                         <span class="comment">/* --- */</span>
-<a name="l05932"></a>05932                         <span class="stringliteral">".L10370:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l05933"></a>05933                         <span class="stringliteral">"sub          $4, %%ecx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 columns */</span>
-<a name="l05934"></a>05934                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l05935"></a>05935                         <span class="stringliteral">".L10372:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l05936"></a>05936                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
-<a name="l05937"></a>05937                         <span class="comment">/* --- 1 */</span>
-<a name="l05938"></a>05938                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05939"></a>05939                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05940"></a>05940                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05941"></a>05941                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05942"></a>05942                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05943"></a>05943                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05944"></a>05944                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05945"></a>05945                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05946"></a>05946                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05947"></a>05947                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05948"></a>05948                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05949"></a>05949                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05950"></a>05950                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05951"></a>05951                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05952"></a>05952                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05953"></a>05953                         <span class="comment">/* --- 2 */</span>
-<a name="l05954"></a>05954                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05955"></a>05955                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05956"></a>05956                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05957"></a>05957                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05958"></a>05958                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05959"></a>05959                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05960"></a>05960                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05961"></a>05961                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05962"></a>05962                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05963"></a>05963                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05964"></a>05964                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05965"></a>05965                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05966"></a>05966                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05967"></a>05967                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05968"></a>05968                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05969"></a>05969                         <span class="comment">/* --- 3 */</span>
-<a name="l05970"></a>05970                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05971"></a>05971                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05972"></a>05972                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05973"></a>05973                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05974"></a>05974                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05975"></a>05975                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05976"></a>05976                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05977"></a>05977                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05978"></a>05978                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05979"></a>05979                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05980"></a>05980                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05981"></a>05981                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05982"></a>05982                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05983"></a>05983                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l05984"></a>05984                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l05985"></a>05985                         <span class="comment">/* --- 4 */</span>
-<a name="l05986"></a>05986                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l05987"></a>05987                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l05988"></a>05988                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l05989"></a>05989                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05990"></a>05990                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05991"></a>05991                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l05992"></a>05992                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l05993"></a>05993                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l05994"></a>05994                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l05995"></a>05995                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05996"></a>05996                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l05997"></a>05997                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l05998"></a>05998                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l05999"></a>05999                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06000"></a>06000                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06001"></a>06001                         <span class="comment">/* --- 5 */</span>
-<a name="l06002"></a>06002                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06003"></a>06003                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06004"></a>06004                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06005"></a>06005                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06006"></a>06006                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06007"></a>06007                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06008"></a>06008                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06009"></a>06009                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06010"></a>06010                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06011"></a>06011                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06012"></a>06012                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06013"></a>06013                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06014"></a>06014                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06015"></a>06015                         <span class="comment">/* --- */</span>
-<a name="l06016"></a>06016                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l06017"></a>06017                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l06018"></a>06018                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l06019"></a>06019                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l06020"></a>06020                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l06021"></a>06021                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l06022"></a>06022                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EAX in MM1 */</span>
-<a name="l06023"></a>06023                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
-<a name="l06024"></a>06024                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l06025"></a>06025                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l06026"></a>06026                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
-<a name="l06027"></a>06027                         <span class="comment">/* -- */</span>
-<a name="l06028"></a>06028                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l06029"></a>06029                         <span class="stringliteral">"sub         $72, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
-<a name="l06030"></a>06030                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l06031"></a>06031                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l06032"></a>06032                         <span class="comment">/* --- */</span>
-<a name="l06033"></a>06033                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l06034"></a>06034                         <span class="stringliteral">"jnz            .L10372 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06035"></a>06035                         <span class="stringliteral">"add          $4, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l06036"></a>06036                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l06037"></a>06037                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l06038"></a>06038                         <span class="stringliteral">"jnz            .L10370 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06039"></a>06039                         <span class="comment">/* --- */</span>
-<a name="l06040"></a>06040                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l06041"></a>06041                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l06042"></a>06042                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l06043"></a>06043                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l06044"></a>06044                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l06045"></a>06045                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l06046"></a>06046                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
-<a name="l06047"></a>06047                         );
-<a name="l06048"></a>06048 <span class="preprocessor">#endif</span>
-<a name="l06049"></a>06049 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l06050"></a>06050 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l06051"></a>06051         } <span class="keywordflow">else</span> {
-<a name="l06052"></a>06052                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l06053"></a>06053                 <span class="keywordflow">return</span> (-1);
-<a name="l06054"></a>06054         }
-<a name="l06055"></a>06055 }
-<a name="l06056"></a>06056 
-<a name="l06071"></a><a class="code" href="_s_d_l__image_filter_8h.html#a6dbe52e917c0858bd311e9ce75219587">06071</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a48b40065652dda699875f1425b9227a6" title="Filter using ConvolveKernel7x7ShiftRight: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel7x7ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</sp [...]
-<a name="l06072"></a>06072                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
-<a name="l06073"></a>06073 {
-<a name="l06074"></a>06074         <span class="comment">/* Validate input parameters */</span>
-<a name="l06075"></a>06075         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l06076"></a>06076                 <span class="keywordflow">return</span>(-1);
-<a name="l06077"></a>06077 
-<a name="l06078"></a>06078         <span class="keywordflow">if</span> ((columns < 7) || (rows < 7) || (NRightShift > 7))
-<a name="l06079"></a>06079                 <span class="keywordflow">return</span> (-1);
-<a name="l06080"></a>06080 
-<a name="l06081"></a>06081         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l06082"></a>06082 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l06083"></a>06083 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l06084"></a>06084 <span class="preprocessor"></span>                __asm
-<a name="l06085"></a>06085                 {
-<a name="l06086"></a>06086                         pusha
-<a name="l06087"></a>06087                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l06088"></a>06088                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l06089"></a>06089                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
-<a name="l06090"></a>06090                                 movd mm5, ebx           <span class="comment">/* copy NRightShift into MM5 */</span>
-<a name="l06091"></a>06091                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l06092"></a>06092                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
-<a name="l06093"></a>06093                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l06094"></a>06094                                 add edi, 3      <span class="comment">/* 3 column offset from the left edge */</span>
-<a name="l06095"></a>06095                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l06096"></a>06096                                 add edi, eax    <span class="comment">/* 3 row offset from the top edge */</span>
-<a name="l06097"></a>06097                                 add edi, eax
-<a name="l06098"></a>06098                                 add edi, eax
-<a name="l06099"></a>06099                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l06100"></a>06100                                 sub ebx, 6      <span class="comment">/* do not use first 3 and last 3 rows */</span>
-<a name="l06101"></a>06101                                 <span class="comment">/* ---, */</span>
-<a name="l06102"></a>06102 L10380:
-<a name="l06103"></a>06103                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l06104"></a>06104                                 sub ecx, 6      <span class="comment">/* do not use first 3 and last 3 columns */</span>
-<a name="l06105"></a>06105                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l06106"></a>06106 L10382:
-<a name="l06107"></a>06107                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l06108"></a>06108                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
-<a name="l06109"></a>06109                                 <span class="comment">/* --- 1 */</span>
-<a name="l06110"></a>06110                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06111"></a>06111                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06112"></a>06112                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06113"></a>06113                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06114"></a>06114                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06115"></a>06115                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06116"></a>06116                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06117"></a>06117                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06118"></a>06118                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06119"></a>06119                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06120"></a>06120                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06121"></a>06121                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06122"></a>06122                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06123"></a>06123                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06124"></a>06124                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06125"></a>06125                                 <span class="comment">/* --- 2 */</span>
-<a name="l06126"></a>06126                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06127"></a>06127                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06128"></a>06128                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06129"></a>06129                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06130"></a>06130                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06131"></a>06131                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06132"></a>06132                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06133"></a>06133                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06134"></a>06134                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06135"></a>06135                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06136"></a>06136                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06137"></a>06137                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06138"></a>06138                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06139"></a>06139                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06140"></a>06140                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06141"></a>06141                                 <span class="comment">/* --- 3 */</span>
-<a name="l06142"></a>06142                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06143"></a>06143                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06144"></a>06144                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06145"></a>06145                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06146"></a>06146                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06147"></a>06147                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06148"></a>06148                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06149"></a>06149                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06150"></a>06150                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06151"></a>06151                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06152"></a>06152                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06153"></a>06153                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06154"></a>06154                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06155"></a>06155                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06156"></a>06156                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06157"></a>06157                                 <span class="comment">/* --- 4 */</span>
-<a name="l06158"></a>06158                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06159"></a>06159                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06160"></a>06160                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06161"></a>06161                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06162"></a>06162                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06163"></a>06163                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06164"></a>06164                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06165"></a>06165                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06166"></a>06166                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06167"></a>06167                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06168"></a>06168                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06169"></a>06169                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06170"></a>06170                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06171"></a>06171                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06172"></a>06172                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06173"></a>06173                                 <span class="comment">/* --- 5 */</span>
-<a name="l06174"></a>06174                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06175"></a>06175                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06176"></a>06176                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06177"></a>06177                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06178"></a>06178                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06179"></a>06179                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06180"></a>06180                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06181"></a>06181                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06182"></a>06182                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06183"></a>06183                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06184"></a>06184                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06185"></a>06185                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06186"></a>06186                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06187"></a>06187                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06188"></a>06188                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06189"></a>06189                                 <span class="comment">/* --- 6 */</span>
-<a name="l06190"></a>06190                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06191"></a>06191                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06192"></a>06192                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06193"></a>06193                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06194"></a>06194                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06195"></a>06195                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06196"></a>06196                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06197"></a>06197                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06198"></a>06198                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06199"></a>06199                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06200"></a>06200                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06201"></a>06201                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06202"></a>06202                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06203"></a>06203                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06204"></a>06204                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06205"></a>06205                                 <span class="comment">/* --- 7 */</span>
-<a name="l06206"></a>06206                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06207"></a>06207                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06208"></a>06208                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06209"></a>06209                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06210"></a>06210                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06211"></a>06211                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06212"></a>06212                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06213"></a>06213                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06214"></a>06214                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06215"></a>06215                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06216"></a>06216                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06217"></a>06217                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06218"></a>06218                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06219"></a>06219                                 <span class="comment">/* ---, */</span>
-<a name="l06220"></a>06220                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l06221"></a>06221                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l06222"></a>06222                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l06223"></a>06223                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l06224"></a>06224                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l06225"></a>06225                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l06226"></a>06226                                 movd mm1, eax           <span class="comment">/* save EAX in MM1 */</span>
-<a name="l06227"></a>06227                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
-<a name="l06228"></a>06228                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l06229"></a>06229                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l06230"></a>06230                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
-<a name="l06231"></a>06231                                 <span class="comment">/* --, */</span>
-<a name="l06232"></a>06232                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l06233"></a>06233                                 sub edx, 104    <span class="comment">/* EDX = Kernel address */</span>
-<a name="l06234"></a>06234                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l06235"></a>06235                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l06236"></a>06236                                 <span class="comment">/* ---, */</span>
-<a name="l06237"></a>06237                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l06238"></a>06238                                 jnz            L10382           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06239"></a>06239                                 add esi, 6      <span class="comment">/* move to the next row in Src */</span>
-<a name="l06240"></a>06240                                 add edi, 6      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l06241"></a>06241                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l06242"></a>06242                                 jnz            L10380           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06243"></a>06243                                 <span class="comment">/* ---, */</span>
-<a name="l06244"></a>06244                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l06245"></a>06245                                 popa
-<a name="l06246"></a>06246                 }
-<a name="l06247"></a>06247 <span class="preprocessor">#else</span>
-<a name="l06248"></a>06248 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l06249"></a>06249                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l06250"></a>06250                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l06251"></a>06251                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
-<a name="l06252"></a>06252                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy NRightShift into MM5 */</span>
-<a name="l06253"></a>06253                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l06254"></a>06254                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
-<a name="l06255"></a>06255                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l06256"></a>06256                         <span class="stringliteral">"add          $3, %%edi \n\t"</span>   <span class="comment">/* 3 column offset from the left edge */</span>
-<a name="l06257"></a>06257                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l06258"></a>06258                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 3 row offset from the top edge */</span>
-<a name="l06259"></a>06259                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>       <span class="comment">/* initialize ROWS counter */</span>
-<a name="l06260"></a>06260                         <span class="stringliteral">"sub          $6, %%ebx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 rows */</span>
-<a name="l06261"></a>06261                         <span class="comment">/* --- */</span>
-<a name="l06262"></a>06262                         <span class="stringliteral">".L10380:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l06263"></a>06263                         <span class="stringliteral">"sub          $6, %%ecx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 columns */</span>
-<a name="l06264"></a>06264                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l06265"></a>06265                         <span class="stringliteral">".L10382:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l06266"></a>06266                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
-<a name="l06267"></a>06267                         <span class="comment">/* --- 1 */</span>
-<a name="l06268"></a>06268                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06269"></a>06269                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06270"></a>06270                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06271"></a>06271                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06272"></a>06272                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06273"></a>06273                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06274"></a>06274                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06275"></a>06275                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06276"></a>06276                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06277"></a>06277                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06278"></a>06278                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06279"></a>06279                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06280"></a>06280                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06281"></a>06281                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06282"></a>06282                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06283"></a>06283                         <span class="comment">/* --- 2 */</span>
-<a name="l06284"></a>06284                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06285"></a>06285                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06286"></a>06286                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06287"></a>06287                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06288"></a>06288                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06289"></a>06289                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06290"></a>06290                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06291"></a>06291                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06292"></a>06292                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06293"></a>06293                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06294"></a>06294                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06295"></a>06295                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06296"></a>06296                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06297"></a>06297                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06298"></a>06298                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06299"></a>06299                         <span class="comment">/* --- 3 */</span>
-<a name="l06300"></a>06300                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06301"></a>06301                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06302"></a>06302                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06303"></a>06303                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06304"></a>06304                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06305"></a>06305                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06306"></a>06306                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06307"></a>06307                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06308"></a>06308                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06309"></a>06309                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06310"></a>06310                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06311"></a>06311                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06312"></a>06312                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06313"></a>06313                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06314"></a>06314                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06315"></a>06315                         <span class="comment">/* --- 4 */</span>
-<a name="l06316"></a>06316                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06317"></a>06317                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06318"></a>06318                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06319"></a>06319                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06320"></a>06320                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06321"></a>06321                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06322"></a>06322                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06323"></a>06323                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06324"></a>06324                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06325"></a>06325                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06326"></a>06326                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06327"></a>06327                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06328"></a>06328                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06329"></a>06329                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06330"></a>06330                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06331"></a>06331                         <span class="comment">/* --- 5 */</span>
-<a name="l06332"></a>06332                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06333"></a>06333                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06334"></a>06334                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06335"></a>06335                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06336"></a>06336                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06337"></a>06337                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06338"></a>06338                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06339"></a>06339                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06340"></a>06340                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06341"></a>06341                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06342"></a>06342                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06343"></a>06343                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06344"></a>06344                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06345"></a>06345                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06346"></a>06346                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06347"></a>06347                         <span class="comment">/* --- 6 */</span>
-<a name="l06348"></a>06348                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06349"></a>06349                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06350"></a>06350                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06351"></a>06351                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06352"></a>06352                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06353"></a>06353                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06354"></a>06354                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06355"></a>06355                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06356"></a>06356                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06357"></a>06357                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06358"></a>06358                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06359"></a>06359                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06360"></a>06360                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06361"></a>06361                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06362"></a>06362                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06363"></a>06363                         <span class="comment">/* --- 7 */</span>
-<a name="l06364"></a>06364                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06365"></a>06365                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06366"></a>06366                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06367"></a>06367                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06368"></a>06368                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06369"></a>06369                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06370"></a>06370                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06371"></a>06371                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06372"></a>06372                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06373"></a>06373                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06374"></a>06374                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06375"></a>06375                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06376"></a>06376                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06377"></a>06377                         <span class="comment">/* --- */</span>
-<a name="l06378"></a>06378                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l06379"></a>06379                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l06380"></a>06380                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l06381"></a>06381                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l06382"></a>06382                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l06383"></a>06383                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l06384"></a>06384                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EAX in MM1 */</span>
-<a name="l06385"></a>06385                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
-<a name="l06386"></a>06386                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l06387"></a>06387                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l06388"></a>06388                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
-<a name="l06389"></a>06389                         <span class="comment">/* -- */</span>
-<a name="l06390"></a>06390                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l06391"></a>06391                         <span class="stringliteral">"sub        $104, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
-<a name="l06392"></a>06392                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l06393"></a>06393                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l06394"></a>06394                         <span class="comment">/* --- */</span>
-<a name="l06395"></a>06395                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l06396"></a>06396                         <span class="stringliteral">"jnz            .L10382 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06397"></a>06397                         <span class="stringliteral">"add          $6, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l06398"></a>06398                         <span class="stringliteral">"add          $6, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l06399"></a>06399                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l06400"></a>06400                         <span class="stringliteral">"jnz            .L10380 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06401"></a>06401                         <span class="comment">/* --- */</span>
-<a name="l06402"></a>06402                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l06403"></a>06403                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l06404"></a>06404                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l06405"></a>06405                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l06406"></a>06406                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l06407"></a>06407                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l06408"></a>06408                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
-<a name="l06409"></a>06409                         );
-<a name="l06410"></a>06410 <span class="preprocessor">#endif</span>
-<a name="l06411"></a>06411 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l06412"></a>06412 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l06413"></a>06413         } <span class="keywordflow">else</span> {
-<a name="l06414"></a>06414                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l06415"></a>06415                 <span class="keywordflow">return</span> (-1);
-<a name="l06416"></a>06416         }
-<a name="l06417"></a>06417 }
-<a name="l06418"></a>06418 
-<a name="l06433"></a><a class="code" href="_s_d_l__image_filter_8h.html#ad2702d0524a16032118fdf67e3e0f44a">06433</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a6aaa30dc51d1e51585d02d123b0f1a7a" title="Filter using ConvolveKernel9x9ShiftRight: Dij = saturation255( ... )">SDL_imageFilterConvolveKernel9x9ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span>  [...]
-<a name="l06434"></a>06434                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
-<a name="l06435"></a>06435 {
-<a name="l06436"></a>06436         <span class="comment">/* Validate input parameters */</span>
-<a name="l06437"></a>06437         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
-<a name="l06438"></a>06438                 <span class="keywordflow">return</span>(-1);
-<a name="l06439"></a>06439 
-<a name="l06440"></a>06440         <span class="keywordflow">if</span> ((columns < 9) || (rows < 9) || (NRightShift > 7))
-<a name="l06441"></a>06441                 <span class="keywordflow">return</span> (-1);
-<a name="l06442"></a>06442 
-<a name="l06443"></a>06443         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l06444"></a>06444 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l06445"></a>06445 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l06446"></a>06446 <span class="preprocessor"></span>                __asm
-<a name="l06447"></a>06447                 {
-<a name="l06448"></a>06448                         pusha
-<a name="l06449"></a>06449                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l06450"></a>06450                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l06451"></a>06451                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
-<a name="l06452"></a>06452                                 movd mm5, ebx           <span class="comment">/* copy NRightShift into MM5 */</span>
-<a name="l06453"></a>06453                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l06454"></a>06454                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
-<a name="l06455"></a>06455                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l06456"></a>06456                                 add edi, 4      <span class="comment">/* 4 column offset from the left edge */</span>
-<a name="l06457"></a>06457                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l06458"></a>06458                                 add edi, eax    <span class="comment">/* 4 row offset from the top edge */</span>
-<a name="l06459"></a>06459                                 add edi, eax
-<a name="l06460"></a>06460                                 add edi, eax
-<a name="l06461"></a>06461                                 add edi, eax
-<a name="l06462"></a>06462                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l06463"></a>06463                                 sub ebx, 8      <span class="comment">/* do not use first 4 and last 4 rows */</span>
-<a name="l06464"></a>06464                                 <span class="comment">/* ---, */</span>
-<a name="l06465"></a>06465 L10390:
-<a name="l06466"></a>06466                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l06467"></a>06467                                 sub ecx, 8      <span class="comment">/* do not use first 4 and last 4 columns */</span>
-<a name="l06468"></a>06468                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l06469"></a>06469 L10392:
-<a name="l06470"></a>06470                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l06471"></a>06471                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
-<a name="l06472"></a>06472                                 <span class="comment">/* --- 1 */</span>
-<a name="l06473"></a>06473                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06474"></a>06474                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06475"></a>06475                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06476"></a>06476                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06477"></a>06477                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06478"></a>06478                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06479"></a>06479                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06480"></a>06480                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06481"></a>06481                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06482"></a>06482                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06483"></a>06483                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06484"></a>06484                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06485"></a>06485                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06486"></a>06486                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06487"></a>06487                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06488"></a>06488                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06489"></a>06489                         dec              esi
-<a name="l06490"></a>06490                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06491"></a>06491                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06492"></a>06492                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06493"></a>06493                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06494"></a>06494                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06495"></a>06495                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06496"></a>06496                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06497"></a>06497                                 <span class="comment">/* --- 2 */</span>
-<a name="l06498"></a>06498                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06499"></a>06499                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06500"></a>06500                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06501"></a>06501                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06502"></a>06502                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06503"></a>06503                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06504"></a>06504                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06505"></a>06505                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06506"></a>06506                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06507"></a>06507                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06508"></a>06508                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06509"></a>06509                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06510"></a>06510                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06511"></a>06511                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06512"></a>06512                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06513"></a>06513                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06514"></a>06514                         dec              esi
-<a name="l06515"></a>06515                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06516"></a>06516                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06517"></a>06517                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06518"></a>06518                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06519"></a>06519                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06520"></a>06520                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06521"></a>06521                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06522"></a>06522                                 <span class="comment">/* --- 3 */</span>
-<a name="l06523"></a>06523                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06524"></a>06524                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06525"></a>06525                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06526"></a>06526                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06527"></a>06527                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06528"></a>06528                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06529"></a>06529                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06530"></a>06530                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06531"></a>06531                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06532"></a>06532                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06533"></a>06533                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06534"></a>06534                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06535"></a>06535                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06536"></a>06536                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06537"></a>06537                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06538"></a>06538                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06539"></a>06539                         dec              esi
-<a name="l06540"></a>06540                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06541"></a>06541                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06542"></a>06542                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06543"></a>06543                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06544"></a>06544                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06545"></a>06545                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06546"></a>06546                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06547"></a>06547                                 <span class="comment">/* --- 4 */</span>
-<a name="l06548"></a>06548                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06549"></a>06549                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06550"></a>06550                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06551"></a>06551                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06552"></a>06552                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06553"></a>06553                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06554"></a>06554                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06555"></a>06555                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06556"></a>06556                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06557"></a>06557                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06558"></a>06558                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06559"></a>06559                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06560"></a>06560                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06561"></a>06561                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06562"></a>06562                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06563"></a>06563                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06564"></a>06564                         dec              esi
-<a name="l06565"></a>06565                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06566"></a>06566                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06567"></a>06567                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06568"></a>06568                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06569"></a>06569                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06570"></a>06570                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06571"></a>06571                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06572"></a>06572                                 <span class="comment">/* --- 5 */</span>
-<a name="l06573"></a>06573                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06574"></a>06574                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06575"></a>06575                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06576"></a>06576                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06577"></a>06577                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06578"></a>06578                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06579"></a>06579                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06580"></a>06580                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06581"></a>06581                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06582"></a>06582                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06583"></a>06583                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06584"></a>06584                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06585"></a>06585                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06586"></a>06586                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06587"></a>06587                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06588"></a>06588                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06589"></a>06589                         dec              esi
-<a name="l06590"></a>06590                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06591"></a>06591                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06592"></a>06592                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06593"></a>06593                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06594"></a>06594                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06595"></a>06595                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06596"></a>06596                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06597"></a>06597                                 <span class="comment">/* --- 6 */</span>
-<a name="l06598"></a>06598                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06599"></a>06599                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06600"></a>06600                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06601"></a>06601                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06602"></a>06602                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06603"></a>06603                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06604"></a>06604                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06605"></a>06605                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06606"></a>06606                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06607"></a>06607                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06608"></a>06608                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06609"></a>06609                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06610"></a>06610                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06611"></a>06611                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06612"></a>06612                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06613"></a>06613                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06614"></a>06614                         dec              esi
-<a name="l06615"></a>06615                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06616"></a>06616                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06617"></a>06617                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06618"></a>06618                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06619"></a>06619                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06620"></a>06620                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06621"></a>06621                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06622"></a>06622                                 <span class="comment">/* --- 7 */</span>
-<a name="l06623"></a>06623                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06624"></a>06624                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06625"></a>06625                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06626"></a>06626                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06627"></a>06627                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06628"></a>06628                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06629"></a>06629                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06630"></a>06630                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06631"></a>06631                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06632"></a>06632                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06633"></a>06633                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06634"></a>06634                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06635"></a>06635                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06636"></a>06636                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06637"></a>06637                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06638"></a>06638                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06639"></a>06639                         dec              esi
-<a name="l06640"></a>06640                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06641"></a>06641                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06642"></a>06642                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06643"></a>06643                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06644"></a>06644                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06645"></a>06645                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06646"></a>06646                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06647"></a>06647                                 <span class="comment">/* --- 8 */</span>
-<a name="l06648"></a>06648                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06649"></a>06649                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06650"></a>06650                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06651"></a>06651                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06652"></a>06652                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06653"></a>06653                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06654"></a>06654                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06655"></a>06655                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06656"></a>06656                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06657"></a>06657                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06658"></a>06658                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06659"></a>06659                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06660"></a>06660                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06661"></a>06661                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06662"></a>06662                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06663"></a>06663                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06664"></a>06664                         dec              esi
-<a name="l06665"></a>06665                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06666"></a>06666                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06667"></a>06667                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06668"></a>06668                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06669"></a>06669                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06670"></a>06670                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06671"></a>06671                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06672"></a>06672                                 <span class="comment">/* --- 9 */</span>
-<a name="l06673"></a>06673                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06674"></a>06674                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06675"></a>06675                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06676"></a>06676                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06677"></a>06677                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06678"></a>06678                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06679"></a>06679                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06680"></a>06680                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06681"></a>06681                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06682"></a>06682                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06683"></a>06683                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06684"></a>06684                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06685"></a>06685                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
-<a name="l06686"></a>06686                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06687"></a>06687                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06688"></a>06688                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06689"></a>06689                         movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06690"></a>06690                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06691"></a>06691                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06692"></a>06692                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
-<a name="l06693"></a>06693                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06694"></a>06694                                 <span class="comment">/* ---, */</span>
-<a name="l06695"></a>06695                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l06696"></a>06696                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l06697"></a>06697                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l06698"></a>06698                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l06699"></a>06699                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l06700"></a>06700                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l06701"></a>06701                                 movd mm1, eax           <span class="comment">/* save EAX in MM1 */</span>
-<a name="l06702"></a>06702                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
-<a name="l06703"></a>06703                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l06704"></a>06704                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l06705"></a>06705                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
-<a name="l06706"></a>06706                                 <span class="comment">/* --, */</span>
-<a name="l06707"></a>06707                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l06708"></a>06708                                 sub edx, 208    <span class="comment">/* EDX = Kernel address */</span>
-<a name="l06709"></a>06709                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l06710"></a>06710                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l06711"></a>06711                                 <span class="comment">/* ---, */</span>
-<a name="l06712"></a>06712                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l06713"></a>06713                                 jnz            L10392           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06714"></a>06714                                 add esi, 8      <span class="comment">/* move to the next row in Src */</span>
-<a name="l06715"></a>06715                                 add edi, 8      <span class="comment">/* move to the next row in Dest */</span>
-<a name="l06716"></a>06716                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l06717"></a>06717                                 jnz            L10390           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06718"></a>06718                                 <span class="comment">/* ---, */</span>
-<a name="l06719"></a>06719                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l06720"></a>06720                                 popa
-<a name="l06721"></a>06721                 }
-<a name="l06722"></a>06722 <span class="preprocessor">#else</span>
-<a name="l06723"></a>06723 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l06724"></a>06724                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l06725"></a>06725                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l06726"></a>06726                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
-<a name="l06727"></a>06727                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy NRightShift into MM5 */</span>
-<a name="l06728"></a>06728                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
-<a name="l06729"></a>06729                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
-<a name="l06730"></a>06730                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l06731"></a>06731                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* 4 column offset from the left edge */</span>
-<a name="l06732"></a>06732                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l06733"></a>06733                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 4 row offset from the top edge */</span>
-<a name="l06734"></a>06734                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span> <span class="comment">/* initialize ROWS counter */</span>
-<a name="l06735"></a>06735                         <span class="stringliteral">"sub          $8, %%ebx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 rows */</span>
-<a name="l06736"></a>06736                         <span class="comment">/* --- */</span>
-<a name="l06737"></a>06737                         <span class="stringliteral">".L10390:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
-<a name="l06738"></a>06738                         <span class="stringliteral">"sub          $8, %%ecx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 columns */</span>
-<a name="l06739"></a>06739                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l06740"></a>06740                         <span class="stringliteral">".L10392:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
-<a name="l06741"></a>06741                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
-<a name="l06742"></a>06742                         <span class="comment">/* --- 1 */</span>
-<a name="l06743"></a>06743                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06744"></a>06744                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06745"></a>06745                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06746"></a>06746                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06747"></a>06747                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06748"></a>06748                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06749"></a>06749                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06750"></a>06750                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06751"></a>06751                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06752"></a>06752                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06753"></a>06753                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06754"></a>06754                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06755"></a>06755                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06756"></a>06756                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06757"></a>06757                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06758"></a>06758                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06759"></a>06759                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06760"></a>06760                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06761"></a>06761                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06762"></a>06762                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06763"></a>06763                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06764"></a>06764                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06765"></a>06765                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06766"></a>06766                         <span class="comment">/* --- 2 */</span>
-<a name="l06767"></a>06767                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06768"></a>06768                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06769"></a>06769                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06770"></a>06770                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06771"></a>06771                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06772"></a>06772                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06773"></a>06773                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06774"></a>06774                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06775"></a>06775                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06776"></a>06776                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06777"></a>06777                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06778"></a>06778                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06779"></a>06779                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06780"></a>06780                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06781"></a>06781                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06782"></a>06782                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06783"></a>06783                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06784"></a>06784                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06785"></a>06785                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06786"></a>06786                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06787"></a>06787                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06788"></a>06788                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06789"></a>06789                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06790"></a>06790                         <span class="comment">/* --- 3 */</span>
-<a name="l06791"></a>06791                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06792"></a>06792                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06793"></a>06793                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06794"></a>06794                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06795"></a>06795                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06796"></a>06796                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06797"></a>06797                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06798"></a>06798                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06799"></a>06799                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06800"></a>06800                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06801"></a>06801                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06802"></a>06802                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06803"></a>06803                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06804"></a>06804                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06805"></a>06805                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06806"></a>06806                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06807"></a>06807                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06808"></a>06808                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06809"></a>06809                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06810"></a>06810                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06811"></a>06811                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06812"></a>06812                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06813"></a>06813                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06814"></a>06814                         <span class="comment">/* --- 4 */</span>
-<a name="l06815"></a>06815                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06816"></a>06816                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06817"></a>06817                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06818"></a>06818                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06819"></a>06819                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06820"></a>06820                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06821"></a>06821                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06822"></a>06822                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06823"></a>06823                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06824"></a>06824                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06825"></a>06825                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06826"></a>06826                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06827"></a>06827                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06828"></a>06828                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06829"></a>06829                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06830"></a>06830                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06831"></a>06831                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06832"></a>06832                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06833"></a>06833                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06834"></a>06834                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06835"></a>06835                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06836"></a>06836                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06837"></a>06837                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06838"></a>06838                         <span class="comment">/* --- 5 */</span>
-<a name="l06839"></a>06839                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06840"></a>06840                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06841"></a>06841                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06842"></a>06842                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06843"></a>06843                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06844"></a>06844                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06845"></a>06845                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06846"></a>06846                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06847"></a>06847                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06848"></a>06848                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06849"></a>06849                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06850"></a>06850                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06851"></a>06851                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06852"></a>06852                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06853"></a>06853                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06854"></a>06854                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06855"></a>06855                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06856"></a>06856                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06857"></a>06857                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06858"></a>06858                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06859"></a>06859                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06860"></a>06860                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06861"></a>06861                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06862"></a>06862                         <span class="comment">/* --- 6 */</span>
-<a name="l06863"></a>06863                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06864"></a>06864                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06865"></a>06865                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06866"></a>06866                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06867"></a>06867                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06868"></a>06868                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06869"></a>06869                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06870"></a>06870                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06871"></a>06871                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06872"></a>06872                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06873"></a>06873                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06874"></a>06874                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06875"></a>06875                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06876"></a>06876                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06877"></a>06877                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06878"></a>06878                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06879"></a>06879                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06880"></a>06880                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06881"></a>06881                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06882"></a>06882                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06883"></a>06883                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06884"></a>06884                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06885"></a>06885                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06886"></a>06886                         <span class="comment">/* --- 7 */</span>
-<a name="l06887"></a>06887                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06888"></a>06888                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06889"></a>06889                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06890"></a>06890                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06891"></a>06891                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06892"></a>06892                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06893"></a>06893                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06894"></a>06894                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06895"></a>06895                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06896"></a>06896                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06897"></a>06897                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06898"></a>06898                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06899"></a>06899                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06900"></a>06900                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06901"></a>06901                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06902"></a>06902                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06903"></a>06903                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06904"></a>06904                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06905"></a>06905                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06906"></a>06906                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06907"></a>06907                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06908"></a>06908                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06909"></a>06909                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06910"></a>06910                         <span class="comment">/* --- 8 */</span>
-<a name="l06911"></a>06911                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06912"></a>06912                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06913"></a>06913                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06914"></a>06914                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06915"></a>06915                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06916"></a>06916                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06917"></a>06917                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06918"></a>06918                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06919"></a>06919                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06920"></a>06920                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06921"></a>06921                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06922"></a>06922                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06923"></a>06923                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06924"></a>06924                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06925"></a>06925                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06926"></a>06926                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06927"></a>06927                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
-<a name="l06928"></a>06928                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06929"></a>06929                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06930"></a>06930                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06931"></a>06931                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06932"></a>06932                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06933"></a>06933                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06934"></a>06934                         <span class="comment">/* --- 9 */</span>
-<a name="l06935"></a>06935                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06936"></a>06936                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
-<a name="l06937"></a>06937                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
-<a name="l06938"></a>06938                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06939"></a>06939                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06940"></a>06940                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06941"></a>06941                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
-<a name="l06942"></a>06942                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06943"></a>06943                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
-<a name="l06944"></a>06944                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06945"></a>06945                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06946"></a>06946                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06947"></a>06947                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
-<a name="l06948"></a>06948                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
-<a name="l06949"></a>06949                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06950"></a>06950                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
-<a name="l06951"></a>06951                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
-<a name="l06952"></a>06952                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
-<a name="l06953"></a>06953                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l06954"></a>06954                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
-<a name="l06955"></a>06955                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
-<a name="l06956"></a>06956                         <span class="comment">/* --- */</span>
-<a name="l06957"></a>06957                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l06958"></a>06958                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l06959"></a>06959                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
-<a name="l06960"></a>06960                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
-<a name="l06961"></a>06961                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
-<a name="l06962"></a>06962                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
-<a name="l06963"></a>06963                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EAX in MM1 */</span>
-<a name="l06964"></a>06964                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
-<a name="l06965"></a>06965                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
-<a name="l06966"></a>06966                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
-<a name="l06967"></a>06967                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
-<a name="l06968"></a>06968                         <span class="comment">/* -- */</span>
-<a name="l06969"></a>06969                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
-<a name="l06970"></a>06970                         <span class="stringliteral">"sub        $208, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
-<a name="l06971"></a>06971                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
-<a name="l06972"></a>06972                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
-<a name="l06973"></a>06973                         <span class="comment">/* --- */</span>
-<a name="l06974"></a>06974                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l06975"></a>06975                         <span class="stringliteral">"jnz            .L10392 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06976"></a>06976                         <span class="stringliteral">"add          $8, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l06977"></a>06977                         <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l06978"></a>06978                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l06979"></a>06979                         <span class="stringliteral">"jnz            .L10390 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l06980"></a>06980                         <span class="comment">/* --- */</span>
-<a name="l06981"></a>06981                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l06982"></a>06982                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l06983"></a>06983                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l06984"></a>06984                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l06985"></a>06985                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l06986"></a>06986                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
-<a name="l06987"></a>06987                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
-<a name="l06988"></a>06988                         );
-<a name="l06989"></a>06989 <span class="preprocessor">#endif</span>
-<a name="l06990"></a>06990 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l06991"></a>06991 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l06992"></a>06992         } <span class="keywordflow">else</span> {
-<a name="l06993"></a>06993                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l06994"></a>06994                 <span class="keywordflow">return</span> (-1);
-<a name="l06995"></a>06995         }
-<a name="l06996"></a>06996 }
-<a name="l06997"></a>06997 
-<a name="l06998"></a>06998 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
-<a name="l06999"></a>06999 
-<a name="l07012"></a><a class="code" href="_s_d_l__image_filter_8h.html#a2a0e4e259150abbe33bcddb046c367ba">07012</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a015fe05161b701162d9ecffb01413f1e" title="Filter using SobelX: Dij = saturation255( ... )">SDL_imageFilterSobelX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Des [...]
-<a name="l07013"></a>07013 {
-<a name="l07014"></a>07014         <span class="comment">/* Validate input parameters */</span>
-<a name="l07015"></a>07015         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL))
-<a name="l07016"></a>07016                 <span class="keywordflow">return</span>(-1);
-<a name="l07017"></a>07017 
-<a name="l07018"></a>07018         <span class="keywordflow">if</span> ((columns < 8) || (rows < 3))
-<a name="l07019"></a>07019                 <span class="keywordflow">return</span> (-1);
-<a name="l07020"></a>07020 
-<a name="l07021"></a>07021         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l07022"></a>07022 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l07023"></a>07023 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l07024"></a>07024 <span class="preprocessor"></span>                __asm
-<a name="l07025"></a>07025                 {
-<a name="l07026"></a>07026                         pusha
-<a name="l07027"></a>07027                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l07028"></a>07028                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l07029"></a>07029                                 <span class="comment">/* ---, */</span>
-<a name="l07030"></a>07030                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l07031"></a>07031                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l07032"></a>07032                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l07033"></a>07033                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l07034"></a>07034                                 mov edx, rows           <span class="comment">/* initialize ROWS counter */</span>
-<a name="l07035"></a>07035                                 sub edx, 2      <span class="comment">/* do not use first and last rows */</span>
-<a name="l07036"></a>07036                                 <span class="comment">/* ---, */</span>
-<a name="l07037"></a>07037 L10400:
-<a name="l07038"></a>07038                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l07039"></a>07039                                 shr ecx, 3      <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l07040"></a>07040                                 mov ebx, esi    <span class="comment">/* save ESI in EBX */</span>
-<a name="l07041"></a>07041                                 movd mm1, edi           <span class="comment">/* save EDI in MM1 */</span>
-<a name="l07042"></a>07042                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l07043"></a>07043 L10402:
-<a name="l07044"></a>07044                         <span class="comment">/* ---, */</span>
-<a name="l07045"></a>07045                         movq mm4, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07046"></a>07046                         movq mm5, mm4           <span class="comment">/* save MM4 in MM5 */</span>
-<a name="l07047"></a>07047                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07048"></a>07048                                 punpcklbw mm4, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07049"></a>07049                                 punpckhbw mm5, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07050"></a>07050                                 movq mm6, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07051"></a>07051                         movq mm7, mm6           <span class="comment">/* save MM6 in MM7 */</span>
-<a name="l07052"></a>07052                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07053"></a>07053                                 punpcklbw mm6, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07054"></a>07054                                 punpckhbw mm7, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07055"></a>07055                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
-<a name="l07056"></a>07056                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07057"></a>07057                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07058"></a>07058                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07059"></a>07059                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07060"></a>07060                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07061"></a>07061                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07062"></a>07062                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07063"></a>07063                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07064"></a>07064                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07065"></a>07065                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07066"></a>07066                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07067"></a>07067                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07068"></a>07068                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07069"></a>07069                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07070"></a>07070                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07071"></a>07071                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07072"></a>07072                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07073"></a>07073                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07074"></a>07074                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
-<a name="l07075"></a>07075                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07076"></a>07076                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07077"></a>07077                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07078"></a>07078                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07079"></a>07079                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07080"></a>07080                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07081"></a>07081                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07082"></a>07082                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07083"></a>07083                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07084"></a>07084                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07085"></a>07085                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07086"></a>07086                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07087"></a>07087                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07088"></a>07088                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07089"></a>07089                                 <span class="comment">/* ---, */</span>
-<a name="l07090"></a>07090                                 movq mm2, mm4           <span class="comment">/* copy MM4 into MM2 */</span>
-<a name="l07091"></a>07091                                 psrlq mm4, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07092"></a>07092                                 psubw mm4, mm2          <span class="comment">/* MM4 = MM4 - MM2 */</span>
-<a name="l07093"></a>07093                                 movq mm3, mm6           <span class="comment">/* copy MM6 into MM3 */</span>
-<a name="l07094"></a>07094                                 psrlq mm6, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07095"></a>07095                                 psubw mm6, mm3          <span class="comment">/* MM6 = MM6 - MM3 */</span>
-<a name="l07096"></a>07096                                 punpckldq mm4, mm6      <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
-<a name="l07097"></a>07097                                 movq mm2, mm5           <span class="comment">/* copy MM6 into MM2 */</span>
-<a name="l07098"></a>07098                                 psrlq mm5, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07099"></a>07099                                 psubw mm5, mm2          <span class="comment">/* MM5 = MM5 - MM2 */</span>
-<a name="l07100"></a>07100                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l07101"></a>07101                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07102"></a>07102                                 psubw mm7, mm3          <span class="comment">/* MM7 = MM7 - MM3 */</span>
-<a name="l07103"></a>07103                                 punpckldq mm5, mm7      <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
-<a name="l07104"></a>07104                                 <span class="comment">/* Take abs values of MM4 and MM5 */</span>
-<a name="l07105"></a>07105                                 movq mm6, mm4           <span class="comment">/* copy MM4 into MM6 */</span>
-<a name="l07106"></a>07106                                 movq mm7, mm5           <span class="comment">/* copy MM5 into MM7 */</span>
-<a name="l07107"></a>07107                                 psraw mm6, 15           <span class="comment">/* fill MM6 words with word sign bit */</span>
-<a name="l07108"></a>07108                                 psraw mm7, 15           <span class="comment">/* fill MM7 words with word sign bit */</span>
-<a name="l07109"></a>07109                                 pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l07110"></a>07110                                 pxor mm5, mm7           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l07111"></a>07111                                 psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l07112"></a>07112                                 psubsw mm5, mm7         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l07113"></a>07113                                 packuswb mm4, mm5       <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
-<a name="l07114"></a>07114                                 movq [edi], mm4         <span class="comment">/* store result in Dest */</span>
-<a name="l07115"></a>07115                                 <span class="comment">/* ---, */</span>
-<a name="l07116"></a>07116                                 sub esi, eax    <span class="comment">/* move to the current top row in Src */</span>
-<a name="l07117"></a>07117                                 sub esi, eax
-<a name="l07118"></a>07118                                 add esi, 8      <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
-<a name="l07119"></a>07119                                 add edi, 8      <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
-<a name="l07120"></a>07120                                 <span class="comment">/* ---, */</span>
-<a name="l07121"></a>07121                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l07122"></a>07122                                 jnz            L10402           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07123"></a>07123                                 mov esi, ebx    <span class="comment">/* restore most left current row Src  address */</span>
-<a name="l07124"></a>07124                                 movd edi, mm1           <span class="comment">/* restore most left current row Dest address */</span>
-<a name="l07125"></a>07125                                 add esi, eax    <span class="comment">/* move to the next row in Src */</span>
-<a name="l07126"></a>07126                                 add edi, eax    <span class="comment">/* move to the next row in Dest */</span>
-<a name="l07127"></a>07127                                 dec              edx            <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l07128"></a>07128                                 jnz            L10400           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07129"></a>07129                                 <span class="comment">/* ---, */</span>
-<a name="l07130"></a>07130                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l07131"></a>07131                                 popa
-<a name="l07132"></a>07132                 }
-<a name="l07133"></a>07133 <span class="preprocessor">#else</span>
-<a name="l07134"></a>07134 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l07135"></a>07135                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l07136"></a>07136                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l07137"></a>07137                         <span class="comment">/* --- */</span>
-<a name="l07138"></a>07138                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l07139"></a>07139                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l07140"></a>07140                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l07141"></a>07141                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l07142"></a>07142                         <span class="stringliteral">"mov          %2, %%edx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
-<a name="l07143"></a>07143                         <span class="stringliteral">"sub          $2, %%edx \n\t"</span>   <span class="comment">/* do not use first and last rows */</span>
-<a name="l07144"></a>07144                         <span class="comment">/* --- */</span>
-<a name="l07145"></a>07145                         <span class="stringliteral">".L10400:                \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>    <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l07146"></a>07146                         <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l07147"></a>07147                         <span class="stringliteral">"mov       %%esi, %%ebx \n\t"</span>   <span class="comment">/* save ESI in EBX */</span>
-<a name="l07148"></a>07148                         <span class="stringliteral">"movd      %%edi, %%mm1 \n\t"</span>   <span class="comment">/* save EDI in MM1 */</span>
-<a name="l07149"></a>07149                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l07150"></a>07150                         <span class="stringliteral">".L10402:               \n\t"</span>
-<a name="l07151"></a>07151                         <span class="comment">/* --- */</span>
-<a name="l07152"></a>07152                         <span class="stringliteral">"movq    (%%esi), %%mm4 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07153"></a>07153                         <span class="stringliteral">"movq      %%mm4, %%mm5 \n\t"</span>   <span class="comment">/* save MM4 in MM5 */</span>
-<a name="l07154"></a>07154                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07155"></a>07155                         <span class="stringliteral">"punpcklbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07156"></a>07156                         <span class="stringliteral">"punpckhbw %%mm0, %%mm5 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07157"></a>07157                         <span class="stringliteral">"movq    (%%esi), %%mm6 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07158"></a>07158                         <span class="stringliteral">"movq      %%mm6, %%mm7 \n\t"</span>   <span class="comment">/* save MM6 in MM7 */</span>
-<a name="l07159"></a>07159                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07160"></a>07160                         <span class="stringliteral">"punpcklbw %%mm0, %%mm6 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07161"></a>07161                         <span class="stringliteral">"punpckhbw %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07162"></a>07162                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
-<a name="l07163"></a>07163                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07164"></a>07164                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07165"></a>07165                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07166"></a>07166                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07167"></a>07167                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07168"></a>07168                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07169"></a>07169                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07170"></a>07170                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07171"></a>07171                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07172"></a>07172                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07173"></a>07173                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07174"></a>07174                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07175"></a>07175                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07176"></a>07176                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07177"></a>07177                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07178"></a>07178                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07179"></a>07179                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07180"></a>07180                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07181"></a>07181                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
-<a name="l07182"></a>07182                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07183"></a>07183                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07184"></a>07184                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07185"></a>07185                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07186"></a>07186                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07187"></a>07187                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07188"></a>07188                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07189"></a>07189                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07190"></a>07190                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07191"></a>07191                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07192"></a>07192                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07193"></a>07193                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07194"></a>07194                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07195"></a>07195                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07196"></a>07196                         <span class="comment">/* --- */</span>
-<a name="l07197"></a>07197                         <span class="stringliteral">"movq      %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* copy MM4 into MM2 */</span>
-<a name="l07198"></a>07198                         <span class="stringliteral">"psrlq       $32, %%mm4 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07199"></a>07199                         <span class="stringliteral">"psubw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* MM4 = MM4 - MM2 */</span>
-<a name="l07200"></a>07200                         <span class="stringliteral">"movq      %%mm6, %%mm3 \n\t"</span>   <span class="comment">/* copy MM6 into MM3 */</span>
-<a name="l07201"></a>07201                         <span class="stringliteral">"psrlq       $32, %%mm6 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07202"></a>07202                         <span class="stringliteral">"psubw     %%mm3, %%mm6 \n\t"</span>   <span class="comment">/* MM6 = MM6 - MM3 */</span>
-<a name="l07203"></a>07203                         <span class="stringliteral">"punpckldq %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
-<a name="l07204"></a>07204                         <span class="stringliteral">"movq      %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* copy MM6 into MM2 */</span>
-<a name="l07205"></a>07205                         <span class="stringliteral">"psrlq       $32, %%mm5 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07206"></a>07206                         <span class="stringliteral">"psubw     %%mm2, %%mm5 \n\t"</span>   <span class="comment">/* MM5 = MM5 - MM2 */</span>
-<a name="l07207"></a>07207                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l07208"></a>07208                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07209"></a>07209                         <span class="stringliteral">"psubw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* MM7 = MM7 - MM3 */</span>
-<a name="l07210"></a>07210                         <span class="stringliteral">"punpckldq %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
-<a name="l07211"></a>07211                         <span class="comment">/* Take abs values of MM4 and MM5 */</span>
-<a name="l07212"></a>07212                         <span class="stringliteral">"movq      %%mm4, %%mm6 \n\t"</span>   <span class="comment">/* copy MM4 into MM6 */</span>
-<a name="l07213"></a>07213                         <span class="stringliteral">"movq      %%mm5, %%mm7 \n\t"</span>   <span class="comment">/* copy MM5 into MM7 */</span>
-<a name="l07214"></a>07214                         <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill MM6 words with word sign bit */</span>
-<a name="l07215"></a>07215                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* fill MM7 words with word sign bit */</span>
-<a name="l07216"></a>07216                         <span class="stringliteral">"pxor      %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l07217"></a>07217                         <span class="stringliteral">"pxor      %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l07218"></a>07218                         <span class="stringliteral">"psubsw    %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l07219"></a>07219                         <span class="stringliteral">"psubsw    %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l07220"></a>07220                         <span class="stringliteral">"packuswb  %%mm5, %%mm4 \n\t"</span>   <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
-<a name="l07221"></a>07221                         <span class="stringliteral">"movq    %%mm4, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l07222"></a>07222                         <span class="comment">/* --- */</span>
-<a name="l07223"></a>07223                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the current top row in Src */</span>
-<a name="l07224"></a>07224                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span> <span class="stringliteral">"add $8,          %%esi \n\t"</span>     <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
-<a name="l07225"></a>07225                         <span class="stringliteral">"add $8,          %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
-<a name="l07226"></a>07226                         <span class="comment">/* --- */</span>
-<a name="l07227"></a>07227                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l07228"></a>07228                         <span class="stringliteral">"jnz            .L10402 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07229"></a>07229                         <span class="stringliteral">"mov       %%ebx, %%esi \n\t"</span>   <span class="comment">/* restore most left current row Src  address */</span>
-<a name="l07230"></a>07230                         <span class="stringliteral">"movd      %%mm1, %%edi \n\t"</span>   <span class="comment">/* restore most left current row Dest address */</span>
-<a name="l07231"></a>07231                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l07232"></a>07232                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l07233"></a>07233                         <span class="stringliteral">"dec              %%edx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l07234"></a>07234                         <span class="stringliteral">"jnz            .L10400 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07235"></a>07235                         <span class="comment">/* --- */</span>
-<a name="l07236"></a>07236                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l07237"></a>07237                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l07238"></a>07238                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l07239"></a>07239                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l07240"></a>07240                         <span class="stringliteral">"m"</span>(columns)            <span class="comment">/* %3 */</span>
-<a name="l07241"></a>07241                         );
-<a name="l07242"></a>07242 <span class="preprocessor">#endif</span>
-<a name="l07243"></a>07243 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l07244"></a>07244 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l07245"></a>07245         } <span class="keywordflow">else</span> {
-<a name="l07246"></a>07246                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l07247"></a>07247                 <span class="keywordflow">return</span> (-1);
-<a name="l07248"></a>07248         }
-<a name="l07249"></a>07249 }
-<a name="l07250"></a>07250 
-<a name="l07264"></a><a class="code" href="_s_d_l__image_filter_8h.html#ab9cc925cd9b135e245936d718b459032">07264</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a0d21af83f0183fcd697324cffe3ab3d7" title="Filter using SobelXShiftRight: Dij = saturation255( ... )">SDL_imageFilterSobelXShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <span class="keywordty [...]
-<a name="l07265"></a>07265                                                                         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
-<a name="l07266"></a>07266 {
-<a name="l07267"></a>07267         <span class="comment">/* Validate input parameters */</span>
-<a name="l07268"></a>07268         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL))
-<a name="l07269"></a>07269                 <span class="keywordflow">return</span>(-1);
-<a name="l07270"></a>07270         <span class="keywordflow">if</span> ((columns < 8) || (rows < 3) || (NRightShift > 7))
-<a name="l07271"></a>07271                 <span class="keywordflow">return</span> (-1);
-<a name="l07272"></a>07272 
-<a name="l07273"></a>07273         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
-<a name="l07274"></a>07274 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l07275"></a>07275 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l07276"></a>07276 <span class="preprocessor"></span>                __asm
-<a name="l07277"></a>07277                 {
-<a name="l07278"></a>07278                         pusha
-<a name="l07279"></a>07279                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
-<a name="l07280"></a>07280                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
-<a name="l07281"></a>07281                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
-<a name="l07282"></a>07282                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
-<a name="l07283"></a>07283                                 movd mm1, ebx           <span class="comment">/* copy NRightShift into MM1 */</span>
-<a name="l07284"></a>07284                                 <span class="comment">/* ---, */</span>
-<a name="l07285"></a>07285                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l07286"></a>07286                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
-<a name="l07287"></a>07287                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l07288"></a>07288                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l07289"></a>07289                                 <span class="comment">/* initialize ROWS counter */</span>
-<a name="l07290"></a>07290                                 sub rows, 2     <span class="comment">/* do not use first and last rows */</span>
-<a name="l07291"></a>07291                                 <span class="comment">/* ---, */</span>
-<a name="l07292"></a>07292 L10410:
-<a name="l07293"></a>07293                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l07294"></a>07294                                 shr ecx, 3      <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l07295"></a>07295                                 mov ebx, esi    <span class="comment">/* save ESI in EBX */</span>
-<a name="l07296"></a>07296                                 mov edx, edi    <span class="comment">/* save EDI in EDX */</span>
-<a name="l07297"></a>07297                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l07298"></a>07298 L10412:
-<a name="l07299"></a>07299                         <span class="comment">/* ---, */</span>
-<a name="l07300"></a>07300                         movq mm4, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07301"></a>07301                         movq mm5, mm4           <span class="comment">/* save MM4 in MM5 */</span>
-<a name="l07302"></a>07302                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07303"></a>07303                                 punpcklbw mm4, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07304"></a>07304                                 punpckhbw mm5, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07305"></a>07305                                 psrlw mm4, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07306"></a>07306                                 psrlw mm5, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07307"></a>07307                                 movq mm6, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07308"></a>07308                         movq mm7, mm6           <span class="comment">/* save MM6 in MM7 */</span>
-<a name="l07309"></a>07309                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07310"></a>07310                                 punpcklbw mm6, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07311"></a>07311                                 punpckhbw mm7, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07312"></a>07312                                 psrlw mm6, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07313"></a>07313                                 psrlw mm7, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07314"></a>07314                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
-<a name="l07315"></a>07315                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07316"></a>07316                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07317"></a>07317                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07318"></a>07318                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07319"></a>07319                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07320"></a>07320                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07321"></a>07321                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07322"></a>07322                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07323"></a>07323                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07324"></a>07324                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07325"></a>07325                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07326"></a>07326                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07327"></a>07327                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07328"></a>07328                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07329"></a>07329                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07330"></a>07330                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07331"></a>07331                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07332"></a>07332                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07333"></a>07333                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07334"></a>07334                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07335"></a>07335                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07336"></a>07336                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07337"></a>07337                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
-<a name="l07338"></a>07338                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07339"></a>07339                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07340"></a>07340                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07341"></a>07341                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07342"></a>07342                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07343"></a>07343                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07344"></a>07344                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07345"></a>07345                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07346"></a>07346                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07347"></a>07347                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07348"></a>07348                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07349"></a>07349                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07350"></a>07350                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07351"></a>07351                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07352"></a>07352                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07353"></a>07353                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07354"></a>07354                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07355"></a>07355                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07356"></a>07356                                 <span class="comment">/* ---, */</span>
-<a name="l07357"></a>07357                                 movq mm2, mm4           <span class="comment">/* copy MM4 into MM2 */</span>
-<a name="l07358"></a>07358                                 psrlq mm4, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07359"></a>07359                                 psubw mm4, mm2          <span class="comment">/* MM4 = MM4 - MM2 */</span>
-<a name="l07360"></a>07360                                 movq mm3, mm6           <span class="comment">/* copy MM6 into MM3 */</span>
-<a name="l07361"></a>07361                                 psrlq mm6, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07362"></a>07362                                 psubw mm6, mm3          <span class="comment">/* MM6 = MM6 - MM3 */</span>
-<a name="l07363"></a>07363                                 punpckldq mm4, mm6      <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
-<a name="l07364"></a>07364                                 movq mm2, mm5           <span class="comment">/* copy MM6 into MM2 */</span>
-<a name="l07365"></a>07365                                 psrlq mm5, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07366"></a>07366                                 psubw mm5, mm2          <span class="comment">/* MM5 = MM5 - MM2 */</span>
-<a name="l07367"></a>07367                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l07368"></a>07368                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07369"></a>07369                                 psubw mm7, mm3          <span class="comment">/* MM7 = MM7 - MM3 */</span>
-<a name="l07370"></a>07370                                 punpckldq mm5, mm7      <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
-<a name="l07371"></a>07371                                 <span class="comment">/* Take abs values of MM4 and MM5 */</span>
-<a name="l07372"></a>07372                                 movq mm6, mm4           <span class="comment">/* copy MM4 into MM6 */</span>
-<a name="l07373"></a>07373                                 movq mm7, mm5           <span class="comment">/* copy MM5 into MM7 */</span>
-<a name="l07374"></a>07374                                 psraw mm6, 15           <span class="comment">/* fill MM6 words with word sign bit */</span>
-<a name="l07375"></a>07375                                 psraw mm7, 15           <span class="comment">/* fill MM7 words with word sign bit */</span>
-<a name="l07376"></a>07376                                 pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l07377"></a>07377                                 pxor mm5, mm7           <span class="comment">/* take 1's compliment of only neg words */</span>
-<a name="l07378"></a>07378                                 psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l07379"></a>07379                                 psubsw mm5, mm7         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
-<a name="l07380"></a>07380                                 packuswb mm4, mm5       <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
-<a name="l07381"></a>07381                                 movq [edi], mm4         <span class="comment">/* store result in Dest */</span>
-<a name="l07382"></a>07382                                 <span class="comment">/* ---, */</span>
-<a name="l07383"></a>07383                                 sub esi, eax    <span class="comment">/* move to the current top row in Src */</span>
-<a name="l07384"></a>07384                                 sub esi, eax
-<a name="l07385"></a>07385                                 add esi, 8      <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
-<a name="l07386"></a>07386                                 add edi, 8      <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
-<a name="l07387"></a>07387                                 <span class="comment">/* ---, */</span>
-<a name="l07388"></a>07388                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l07389"></a>07389                                 jnz            L10412           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07390"></a>07390                                 mov esi, ebx    <span class="comment">/* restore most left current row Src  address */</span>
-<a name="l07391"></a>07391                                 mov edi, edx    <span class="comment">/* restore most left current row Dest address */</span>
-<a name="l07392"></a>07392                                 add esi, eax    <span class="comment">/* move to the next row in Src */</span>
-<a name="l07393"></a>07393                                 add edi, eax    <span class="comment">/* move to the next row in Dest */</span>
-<a name="l07394"></a>07394                                 dec rows        <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l07395"></a>07395                                 jnz            L10410           <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07396"></a>07396                                 <span class="comment">/* ---, */</span>
-<a name="l07397"></a>07397                                 emms                            <span class="comment">/* exit MMX state */</span>
-<a name="l07398"></a>07398                                 popa
-<a name="l07399"></a>07399                 }
-<a name="l07400"></a>07400 <span class="preprocessor">#else</span>
-<a name="l07401"></a>07401 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l07402"></a>07402                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
-<a name="l07403"></a>07403                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
-<a name="l07404"></a>07404                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
-<a name="l07405"></a>07405                         <span class="stringliteral">"mov           %4, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
-<a name="l07406"></a>07406                         <span class="stringliteral">"movd      %%ebx, %%mm1 \n\t"</span>   <span class="comment">/* copy NRightShift into MM1 */</span>
-<a name="l07407"></a>07407                         <span class="comment">/* --- */</span>
-<a name="l07408"></a>07408                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
-<a name="l07409"></a>07409                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
-<a name="l07410"></a>07410                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
-<a name="l07411"></a>07411                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
-<a name="l07412"></a>07412                         <span class="comment">/* initialize ROWS counter */</span>
-<a name="l07413"></a>07413                         <span class="stringliteral">"subl            $2, %2 \n\t"</span>   <span class="comment">/* do not use first and last rows */</span>
-<a name="l07414"></a>07414                         <span class="comment">/* --- */</span>
-<a name="l07415"></a>07415                         <span class="stringliteral">".L10410:                \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>    <span class="comment">/* initialize COLUMS counter */</span>
-<a name="l07416"></a>07416                         <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
-<a name="l07417"></a>07417                         <span class="stringliteral">"mov       %%esi, %%ebx \n\t"</span>   <span class="comment">/* save ESI in EBX */</span>
-<a name="l07418"></a>07418                         <span class="stringliteral">"mov       %%edi, %%edx \n\t"</span>   <span class="comment">/* save EDI in EDX */</span>
-<a name="l07419"></a>07419                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
-<a name="l07420"></a>07420                         <span class="stringliteral">".L10412:               \n\t"</span>
-<a name="l07421"></a>07421                         <span class="comment">/* --- */</span>
-<a name="l07422"></a>07422                         <span class="stringliteral">"movq    (%%esi), %%mm4 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07423"></a>07423                         <span class="stringliteral">"movq      %%mm4, %%mm5 \n\t"</span>   <span class="comment">/* save MM4 in MM5 */</span>
-<a name="l07424"></a>07424                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07425"></a>07425                         <span class="stringliteral">"punpcklbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07426"></a>07426                         <span class="stringliteral">"punpckhbw %%mm0, %%mm5 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07427"></a>07427                         <span class="stringliteral">"psrlw     %%mm1, %%mm4 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07428"></a>07428                         <span class="stringliteral">"psrlw     %%mm1, %%mm5 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07429"></a>07429                         <span class="stringliteral">"movq    (%%esi), %%mm6 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07430"></a>07430                         <span class="stringliteral">"movq      %%mm6, %%mm7 \n\t"</span>   <span class="comment">/* save MM6 in MM7 */</span>
-<a name="l07431"></a>07431                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07432"></a>07432                         <span class="stringliteral">"punpcklbw %%mm0, %%mm6 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07433"></a>07433                         <span class="stringliteral">"punpckhbw %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07434"></a>07434                         <span class="stringliteral">"psrlw     %%mm1, %%mm6 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07435"></a>07435                         <span class="stringliteral">"psrlw     %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07436"></a>07436                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
-<a name="l07437"></a>07437                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07438"></a>07438                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07439"></a>07439                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07440"></a>07440                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07441"></a>07441                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07442"></a>07442                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07443"></a>07443                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07444"></a>07444                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07445"></a>07445                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07446"></a>07446                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07447"></a>07447                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07448"></a>07448                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07449"></a>07449                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07450"></a>07450                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07451"></a>07451                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07452"></a>07452                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07453"></a>07453                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07454"></a>07454                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07455"></a>07455                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07456"></a>07456                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07457"></a>07457                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07458"></a>07458                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07459"></a>07459                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
-<a name="l07460"></a>07460                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07461"></a>07461                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07462"></a>07462                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
-<a name="l07463"></a>07463                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07464"></a>07464                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07465"></a>07465                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07466"></a>07466                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07467"></a>07467                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
-<a name="l07468"></a>07468                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
-<a name="l07469"></a>07469                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
-<a name="l07470"></a>07470                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
-<a name="l07471"></a>07471                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
-<a name="l07472"></a>07472                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
-<a name="l07473"></a>07473                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
-<a name="l07474"></a>07474                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07475"></a>07475                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
-<a name="l07476"></a>07476                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
-<a name="l07477"></a>07477                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
-<a name="l07478"></a>07478                         <span class="comment">/* --- */</span>
-<a name="l07479"></a>07479                         <span class="stringliteral">"movq      %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* copy MM4 into MM2 */</span>
-<a name="l07480"></a>07480                         <span class="stringliteral">"psrlq       $32, %%mm4 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07481"></a>07481                         <span class="stringliteral">"psubw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* MM4 = MM4 - MM2 */</span>
-<a name="l07482"></a>07482                         <span class="stringliteral">"movq      %%mm6, %%mm3 \n\t"</span>   <span class="comment">/* copy MM6 into MM3 */</span>
-<a name="l07483"></a>07483                         <span class="stringliteral">"psrlq       $32, %%mm6 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07484"></a>07484                         <span class="stringliteral">"psubw     %%mm3, %%mm6 \n\t"</span>   <span class="comment">/* MM6 = MM6 - MM3 */</span>
-<a name="l07485"></a>07485                         <span class="stringliteral">"punpckldq %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
-<a name="l07486"></a>07486                         <span class="stringliteral">"movq      %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* copy MM6 into MM2 */</span>
-<a name="l07487"></a>07487                         <span class="stringliteral">"psrlq       $32, %%mm5 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07488"></a>07488                         <span class="stringliteral">"psubw     %%mm2, %%mm5 \n\t"</span>   <span class="comment">/* MM5 = MM5 - MM2 */</span>
-<a name="l07489"></a>07489                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
-<a name="l07490"></a>07490                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
-<a name="l07491"></a>07491                         <span class="stringliteral">"psubw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* MM7 = MM7 - MM3 */</span>
-<a name="l07492"></a>07492                         <span class="stringliteral">"punpckldq %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
-<a name="l07493"></a>07493                         <span class="comment">/* Take abs values of MM4 and MM5 */</span>
-<a name="l07494"></a>07494                         <span class="stringliteral">"movq      %%mm4, %%mm6 \n\t"</span>   <span class="comment">/* copy MM4 into MM6 */</span>
-<a name="l07495"></a>07495                         <span class="stringliteral">"movq      %%mm5, %%mm7 \n\t"</span>   <span class="comment">/* copy MM5 into MM7 */</span>
-<a name="l07496"></a>07496                         <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill MM6 words with word sign bit */</span>
-<a name="l07497"></a>07497                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* fill MM7 words with word sign bit */</span>
-<a name="l07498"></a>07498                         <span class="stringliteral">"pxor      %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l07499"></a>07499                         <span class="stringliteral">"pxor      %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
-<a name="l07500"></a>07500                         <span class="stringliteral">"psubsw    %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l07501"></a>07501                         <span class="stringliteral">"psubsw    %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
-<a name="l07502"></a>07502                         <span class="stringliteral">"packuswb  %%mm5, %%mm4 \n\t"</span>   <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
-<a name="l07503"></a>07503                         <span class="stringliteral">"movq    %%mm4, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
-<a name="l07504"></a>07504                         <span class="comment">/* --- */</span>
-<a name="l07505"></a>07505                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the current top row in Src */</span>
-<a name="l07506"></a>07506                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span> <span class="stringliteral">"add $8,          %%esi \n\t"</span>     <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
-<a name="l07507"></a>07507                         <span class="stringliteral">"add $8,          %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
-<a name="l07508"></a>07508                         <span class="comment">/* --- */</span>
-<a name="l07509"></a>07509                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
-<a name="l07510"></a>07510                         <span class="stringliteral">"jnz            .L10412 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07511"></a>07511                         <span class="stringliteral">"mov       %%ebx, %%esi \n\t"</span>   <span class="comment">/* restore most left current row Src  address */</span>
-<a name="l07512"></a>07512                         <span class="stringliteral">"mov       %%edx, %%edi \n\t"</span>   <span class="comment">/* restore most left current row Dest address */</span>
-<a name="l07513"></a>07513                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
-<a name="l07514"></a>07514                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
-<a name="l07515"></a>07515                         <span class="stringliteral">"decl                %2 \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
-<a name="l07516"></a>07516                         <span class="stringliteral">"jnz            .L10410 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
-<a name="l07517"></a>07517                         <span class="comment">/* --- */</span>
-<a name="l07518"></a>07518                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
-<a name="l07519"></a>07519                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
-<a name="l07520"></a>07520                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
-<a name="l07521"></a>07521                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
-<a name="l07522"></a>07522                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
-<a name="l07523"></a>07523                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %4 */</span>
-<a name="l07524"></a>07524                         );
-<a name="l07525"></a>07525 <span class="preprocessor">#endif</span>
-<a name="l07526"></a>07526 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l07527"></a>07527 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
-<a name="l07528"></a>07528         } <span class="keywordflow">else</span> {
-<a name="l07529"></a>07529                 <span class="comment">/* No non-MMX implementation yet */</span>
-<a name="l07530"></a>07530                 <span class="keywordflow">return</span> (-1);
-<a name="l07531"></a>07531         }
-<a name="l07532"></a>07532 }
-<a name="l07533"></a>07533 
-<a name="l07537"></a><a class="code" href="_s_d_l__image_filter_8h.html#a08a45265e9e84bf8beedebba26da947c">07537</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#afbfcc8c03e3d791ac74c955d14a135e4" title="Align stack to 32 byte boundary,.">SDL_imageFilterAlignStack</a>(<span class="keywordtype">void</span>)
-<a name="l07538"></a>07538 {
-<a name="l07539"></a>07539 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l07540"></a>07540 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l07541"></a>07541 <span class="preprocessor"></span>        __asm
-<a name="l07542"></a>07542         {                               <span class="comment">/* --- stack alignment --- */</span>
-<a name="l07543"></a>07543                 mov ebx, esp    <span class="comment">/* load ESP into EBX */</span>
-<a name="l07544"></a>07544                         sub ebx, 4      <span class="comment">/* reserve space on stack for old value of ESP */</span>
-<a name="l07545"></a>07545                         and ebx, -32    <span class="comment">/* align EBX along a 32 byte boundary */</span>
-<a name="l07546"></a>07546                         mov [ebx], esp          <span class="comment">/* save old value of ESP in stack, behind the bndry */</span>
-<a name="l07547"></a>07547                         mov esp, ebx    <span class="comment">/* align ESP along a 32 byte boundary */</span>
-<a name="l07548"></a>07548         }
-<a name="l07549"></a>07549 <span class="preprocessor">#else</span>
-<a name="l07550"></a>07550 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l07551"></a>07551                 (                               <span class="comment">/* --- stack alignment --- */</span>
-<a name="l07552"></a>07552                 <span class="stringliteral">"mov       %%esp, %%ebx \n\t"</span>   <span class="comment">/* load ESP into EBX */</span>
-<a name="l07553"></a>07553                 <span class="stringliteral">"sub          $4, %%ebx \n\t"</span>   <span class="comment">/* reserve space on stack for old value of ESP */</span>
-<a name="l07554"></a>07554                 <span class="stringliteral">"and        $-32, %%ebx \n\t"</span>   <span class="comment">/* align EBX along a 32 byte boundary */</span>
-<a name="l07555"></a>07555                 <span class="stringliteral">"mov     %%esp, (%%ebx) \n\t"</span>   <span class="comment">/* save old value of ESP in stack, behind the bndry */</span>
-<a name="l07556"></a>07556                 <span class="stringliteral">"mov       %%ebx, %%esp \n\t"</span>   <span class="comment">/* align ESP along a 32 byte boundary */</span>
-<a name="l07557"></a>07557                 ::);
-<a name="l07558"></a>07558 <span class="preprocessor">#endif</span>
-<a name="l07559"></a>07559 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l07560"></a>07560 <span class="preprocessor"></span>}
-<a name="l07561"></a>07561 
-<a name="l07565"></a><a class="code" href="_s_d_l__image_filter_8h.html#a84f360601d5e6e017f0e74a2cf83be6c">07565</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#a3147eb5ddd4965d65702f0e533b42974" title="Restore previously aligned stack.">SDL_imageFilterRestoreStack</a>(<span class="keywordtype">void</span>)
-<a name="l07566"></a>07566 {
-<a name="l07567"></a>07567 <span class="preprocessor">#ifdef USE_MMX</span>
-<a name="l07568"></a>07568 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
-<a name="l07569"></a>07569 <span class="preprocessor"></span>        __asm
-<a name="l07570"></a>07570         {                               <span class="comment">/* --- restoring old stack --- */</span>
-<a name="l07571"></a>07571                 mov ebx, [esp]          <span class="comment">/* load old value of ESP */</span>
-<a name="l07572"></a>07572                 mov esp, ebx    <span class="comment">/* restore old value of ESP */</span>
-<a name="l07573"></a>07573         }
-<a name="l07574"></a>07574 <span class="preprocessor">#else</span>
-<a name="l07575"></a>07575 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
-<a name="l07576"></a>07576                 (                               <span class="comment">/* --- restoring old stack --- */</span>
-<a name="l07577"></a>07577                 <span class="stringliteral">"mov     (%%esp), %%ebx \n\t"</span>   <span class="comment">/* load old value of ESP */</span>
-<a name="l07578"></a>07578                 <span class="stringliteral">"mov       %%ebx, %%esp \n\t"</span>   <span class="comment">/* restore old value of ESP */</span>
-<a name="l07579"></a>07579                 ::);
-<a name="l07580"></a>07580 <span class="preprocessor">#endif</span>
-<a name="l07581"></a>07581 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
-<a name="l07582"></a>07582 <span class="preprocessor"></span>}
+<a name="l03707"></a>03707         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l03708"></a>03708 
+<a name="l03709"></a>03709                 SDL_imageFilterClipToRangeMMX(Src1, Dest, length, Tmin, Tmax);
+<a name="l03710"></a>03710 
+<a name="l03711"></a>03711                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l03712"></a>03712                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l03713"></a>03713                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l03714"></a>03714                         istart = length & 0xfffffff8;
+<a name="l03715"></a>03715                         cursrc1 = &Src1[istart];
+<a name="l03716"></a>03716                         curdest = &Dest[istart];
+<a name="l03717"></a>03717                 } <span class="keywordflow">else</span> {
+<a name="l03718"></a>03718                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l03719"></a>03719                         <span class="keywordflow">return</span> (0);
+<a name="l03720"></a>03720                 }
+<a name="l03721"></a>03721         } <span class="keywordflow">else</span> {
+<a name="l03722"></a>03722                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l03723"></a>03723                 istart = 0;
+<a name="l03724"></a>03724                 cursrc1 = Src1;
+<a name="l03725"></a>03725                 curdest = Dest;
+<a name="l03726"></a>03726         }
+<a name="l03727"></a>03727 
+<a name="l03728"></a>03728         <span class="comment">/* C routine to process image */</span>
+<a name="l03729"></a>03729         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l03730"></a>03730                 <span class="keywordflow">if</span> (*cursrc1 < Tmin) {
+<a name="l03731"></a>03731                         *curdest = Tmin;
+<a name="l03732"></a>03732                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (*cursrc1 > Tmax) {
+<a name="l03733"></a>03733                         *curdest = Tmax;
+<a name="l03734"></a>03734                 } <span class="keywordflow">else</span> {
+<a name="l03735"></a>03735                         *curdest = *cursrc1;
+<a name="l03736"></a>03736                 }
+<a name="l03737"></a>03737                 <span class="comment">/* Advance pointers */</span>
+<a name="l03738"></a>03738                 cursrc1++;
+<a name="l03739"></a>03739                 curdest++;
+<a name="l03740"></a>03740         }
+<a name="l03741"></a>03741 
+<a name="l03742"></a>03742         <span class="keywordflow">return</span> (0);
+<a name="l03743"></a>03743 }
+<a name="l03744"></a>03744 
+<a name="l03758"></a>03758 <span class="keyword">static</span> <span class="keywordtype">int</span> SDL_imageFilterNormalizeLinearMMX(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src1, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Dest, <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> SrcLength, <span class="keywordtype">int</span> Cmin, <span class="keywordtype">int</span> Cmax,
+<a name="l03759"></a>03759                                                                           <span class="keywordtype">int</span> Nmin, <span class="keywordtype">int</span> Nmax)
+<a name="l03760"></a>03760 {
+<a name="l03761"></a>03761 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l03762"></a>03762 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l03763"></a>03763 <span class="preprocessor"></span>        __asm
+<a name="l03764"></a>03764         {
+<a name="l03765"></a>03765                 pusha
+<a name="l03766"></a>03766                         mov ax, WORD PTR Nmax           <span class="comment">/* load Nmax in AX */</span>
+<a name="l03767"></a>03767                         mov bx, WORD PTR Cmax           <span class="comment">/* load Cmax in BX */</span>
+<a name="l03768"></a>03768                         sub ax, WORD PTR Nmin           <span class="comment">/* AX = Nmax - Nmin */</span>
+<a name="l03769"></a>03769                         sub bx, WORD PTR Cmin           <span class="comment">/* BX = Cmax - Cmin */</span>
+<a name="l03770"></a>03770                         jz             L10311           <span class="comment">/* check division by zero */</span>
+<a name="l03771"></a>03771                         xor dx, dx      <span class="comment">/* prepare for division, zero DX */</span>
+<a name="l03772"></a>03772                         div               bx            <span class="comment">/* AX = AX/BX */</span>
+<a name="l03773"></a>03773                         jmp            L10312
+<a name="l03774"></a>03774 L10311:
+<a name="l03775"></a>03775                 mov ax, 255     <span class="comment">/* if div by zero, assume result max byte value */</span>
+<a name="l03776"></a>03776 L10312:                         <span class="comment">/* ** Duplicate AX in 4 words of MM0 ** */</span>
+<a name="l03777"></a>03777                 mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l03778"></a>03778                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l03779"></a>03779                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l03780"></a>03780                         movd mm0, eax           <span class="comment">/* copy EAX into MM0 */</span>
+<a name="l03781"></a>03781                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l03782"></a>03782                         punpckldq mm0, mm1      <span class="comment">/* fill higher words of MM0 with AX */</span>
+<a name="l03783"></a>03783                         <span class="comment">/* ** Duplicate Cmin in 4 words of MM1 ** */</span>
+<a name="l03784"></a>03784                         mov ax, WORD PTR Cmin           <span class="comment">/* load Cmin into AX */</span>
+<a name="l03785"></a>03785                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l03786"></a>03786                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l03787"></a>03787                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l03788"></a>03788                         movd mm1, eax           <span class="comment">/* copy EAX into MM1 */</span>
+<a name="l03789"></a>03789                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l03790"></a>03790                         punpckldq mm1, mm2      <span class="comment">/* fill higher words of MM1 with Cmin */</span>
+<a name="l03791"></a>03791                         <span class="comment">/* ** Duplicate Nmin in 4 words of MM2 ** */</span>
+<a name="l03792"></a>03792                         mov ax, WORD PTR Nmin           <span class="comment">/* load Nmin into AX */</span>
+<a name="l03793"></a>03793                         mov bx, ax      <span class="comment">/* copy AX into BX */</span>
+<a name="l03794"></a>03794                         shl eax, 16     <span class="comment">/* shift 2 bytes of EAX left */</span>
+<a name="l03795"></a>03795                         mov ax, bx      <span class="comment">/* copy BX into AX */</span>
+<a name="l03796"></a>03796                         movd mm2, eax           <span class="comment">/* copy EAX into MM2 */</span>
+<a name="l03797"></a>03797                         movd mm3, eax           <span class="comment">/* copy EAX into MM3 */</span>
+<a name="l03798"></a>03798                         punpckldq mm2, mm3      <span class="comment">/* fill higher words of MM2 with Nmin */</span>
+<a name="l03799"></a>03799                         pxor mm7, mm7           <span class="comment">/* zero MM7 register */</span>
+<a name="l03800"></a>03800                         mov eax, Src1           <span class="comment">/* load Src1 address into eax */</span>
+<a name="l03801"></a>03801                         mov edi, Dest           <span class="comment">/* load Dest address into edi */</span>
+<a name="l03802"></a>03802                         mov ecx, SrcLength      <span class="comment">/* load loop counter (SIZE) into ecx */</span>
+<a name="l03803"></a>03803                         shr ecx, 3      <span class="comment">/* counter/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l03804"></a>03804                         align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l03805"></a>03805 L1031:
+<a name="l03806"></a>03806                 movq mm3, [eax]         <span class="comment">/* load 8 bytes from Src1 into MM3 */</span>
+<a name="l03807"></a>03807                 movq mm4, mm3           <span class="comment">/* copy MM3 into MM4  */</span>
+<a name="l03808"></a>03808                         punpcklbw mm3, mm7      <span class="comment">/* unpack low  bytes of SrcDest into words */</span>
+<a name="l03809"></a>03809                         punpckhbw mm4, mm7      <span class="comment">/* unpack high bytes of SrcDest into words */</span>
+<a name="l03810"></a>03810                         psubusb mm3, mm1        <span class="comment">/* S-Cmin, low  bytes */</span>
+<a name="l03811"></a>03811                         psubusb mm4, mm1        <span class="comment">/* S-Cmin, high bytes */</span>
+<a name="l03812"></a>03812                         pmullw mm3, mm0         <span class="comment">/* MM0*(S-Cmin), low  bytes */</span>
+<a name="l03813"></a>03813                         pmullw mm4, mm0         <span class="comment">/* MM0*(S-Cmin), high bytes */</span>
+<a name="l03814"></a>03814                         paddusb mm3, mm2        <span class="comment">/* MM0*(S-Cmin)+Nmin, low  bytes */</span>
+<a name="l03815"></a>03815                         paddusb mm4, mm2        <span class="comment">/* MM0*(S-Cmin)+Nmin, high bytes */</span>
+<a name="l03816"></a>03816                         <span class="comment">/* ** Take abs value of the signed words ** */</span>
+<a name="l03817"></a>03817                         movq mm5, mm3           <span class="comment">/* copy mm3 into mm5 */</span>
+<a name="l03818"></a>03818                         movq mm6, mm4           <span class="comment">/* copy mm4 into mm6 */</span>
+<a name="l03819"></a>03819                         psraw mm5, 15           <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l03820"></a>03820                         psraw mm6, 15           <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l03821"></a>03821                         pxor mm3, mm5           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l03822"></a>03822                         pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l03823"></a>03823                         psubsw mm3, mm5         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l03824"></a>03824                         psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l03825"></a>03825                         packuswb mm3, mm4       <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l03826"></a>03826                         movq [edi], mm3         <span class="comment">/* store result in Dest */</span>
+<a name="l03827"></a>03827                         add eax, 8      <span class="comment">/* increase Src1 register pointer by 8 */</span>
+<a name="l03828"></a>03828                         add edi, 8      <span class="comment">/* increase Dest register pointer by 8 */</span>
+<a name="l03829"></a>03829                         dec              ecx            <span class="comment">/* decrease loop counter */</span>
+<a name="l03830"></a>03830                         jnz             L1031           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l03831"></a>03831                         emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l03832"></a>03832                         popa
+<a name="l03833"></a>03833         }
+<a name="l03834"></a>03834 <span class="preprocessor">#else</span>
+<a name="l03835"></a>03835 <span class="preprocessor"></span>        <span class="comment">/* i386 and x86_64 */</span>
+<a name="l03836"></a>03836         __m64 *mSrc1 = (__m64*)Src1;
+<a name="l03837"></a>03837         __m64 *mDest = (__m64*)Dest;
+<a name="l03838"></a>03838         __m64 mm0, mm1, mm2, mm3;
+<a name="l03839"></a>03839 
+<a name="l03840"></a>03840         <span class="keywordtype">int</span> i;
+<a name="l03841"></a>03841         <span class="comment">/* Duplicate (Nmax-Nmin)/(Cmax-Cmin) in 4 words of MM0 */</span>
+<a name="l03842"></a>03842         <span class="keywordtype">unsigned</span> <span class="keywordtype">short</span> a = Nmax - Nmin;
+<a name="l03843"></a>03843         <span class="keywordtype">unsigned</span> <span class="keywordtype">short</span> b = Cmax - Cmin;
+<a name="l03844"></a>03844         <span class="keywordflow">if</span> (b == 0) {
+<a name="l03845"></a>03845             a = 255;
+<a name="l03846"></a>03846         } <span class="keywordflow">else</span> {
+<a name="l03847"></a>03847             a /= b;
+<a name="l03848"></a>03848         }
+<a name="l03849"></a>03849         i = (a<<16)|a;
+<a name="l03850"></a>03850         mm0 = _m_from_int(i);
+<a name="l03851"></a>03851         mm1 = _m_from_int(i);
+<a name="l03852"></a>03852         mm0 = _m_punpckldq(mm0, mm1);                   <span class="comment">/* fill higher words of MM0 with AX */</span>
+<a name="l03853"></a>03853         <span class="comment">/* Duplicate Cmin in 4 words of MM1 */</span>
+<a name="l03854"></a>03854         i = (Cmin<<16)|(<span class="keywordtype">short</span>)Cmin;
+<a name="l03855"></a>03855         mm1 = _m_from_int(i);
+<a name="l03856"></a>03856         mm2 = _m_from_int(i);
+<a name="l03857"></a>03857         mm1 = _m_punpckldq(mm1, mm2);                   <span class="comment">/* fill higher words of MM1 with Cmin */</span>
+<a name="l03858"></a>03858         <span class="comment">/* Duplicate Nmin in 4 words of MM2 */</span>
+<a name="l03859"></a>03859         i = (Nmin<<16)|(<span class="keywordtype">short</span>)Nmin;
+<a name="l03860"></a>03860         mm2 = _m_from_int(i);
+<a name="l03861"></a>03861         mm3 = _m_from_int(i);
+<a name="l03862"></a>03862         mm2 = _m_punpckldq(mm2, mm3);                   <span class="comment">/* fill higher words of MM2 with Nmin */</span>
+<a name="l03863"></a>03863         __m64 mm7 = _m_from_int(0);                     <span class="comment">/* zero mm0 register */</span>
+<a name="l03864"></a>03864         <span class="keywordflow">for</span> (i = 0; i < SrcLength/8; i++) {
+<a name="l03865"></a>03865                 __m64 mm3, mm4, mm5, mm6;
+<a name="l03866"></a>03866                 mm3 = _m_punpcklbw(*mSrc1, mm7);        <span class="comment">/* unpack low  bytes of Src1 into words */</span>
+<a name="l03867"></a>03867                 mm4 = _m_punpckhbw(*mSrc1, mm7);        <span class="comment">/* unpack high bytes of Src1 into words */</span>
+<a name="l03868"></a>03868                 mm3 = _m_psubusb(mm3, mm1);             <span class="comment">/* S-Cmin, low  bytes */</span>
+<a name="l03869"></a>03869                 mm4 = _m_psubusb(mm4, mm1);             <span class="comment">/* S-Cmin, high bytes */</span>
+<a name="l03870"></a>03870                 mm3 = _m_pmullw(mm3, mm0);              <span class="comment">/* MM0*(S-Cmin), low  bytes */</span>
+<a name="l03871"></a>03871                 mm4 = _m_pmullw(mm4, mm0);              <span class="comment">/* MM0*(S-Cmin), high bytes */</span>
+<a name="l03872"></a>03872                 mm3 = _m_paddusb(mm3, mm2);             <span class="comment">/* MM0*(S-Cmin)+Nmin, low  bytes */</span>
+<a name="l03873"></a>03873                 mm4 = _m_paddusb(mm4, mm2);             <span class="comment">/* MM0*(S-Cmin)+Nmin, high bytes */</span>
+<a name="l03874"></a>03874                 <span class="comment">/* Take abs value of the signed words */</span>
+<a name="l03875"></a>03875                 mm5 = _m_psrawi(mm3, 15);               <span class="comment">/* fill mm5 words with word sign bit */</span>
+<a name="l03876"></a>03876                 mm6 = _m_psrawi(mm4, 15);               <span class="comment">/* fill mm6 words with word sign bit */</span>
+<a name="l03877"></a>03877                 mm3 = _m_pxor(mm3, mm5);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l03878"></a>03878                 mm4 = _m_pxor(mm4, mm6);                <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l03879"></a>03879                 mm3 = _m_psubsw(mm3, mm5);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l03880"></a>03880                 mm4 = _m_psubsw(mm4, mm6);              <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l03881"></a>03881                 *mDest = _m_packuswb(mm3, mm4);         <span class="comment">/* pack words back into bytes with saturation */</span>
+<a name="l03882"></a>03882                 mSrc1++;
+<a name="l03883"></a>03883                 mDest++;
+<a name="l03884"></a>03884         }
+<a name="l03885"></a>03885         _m_empty();                                     <span class="comment">/* clean MMX state */</span>
+<a name="l03886"></a>03886 <span class="preprocessor">#endif</span>
+<a name="l03887"></a>03887 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (0);
+<a name="l03888"></a>03888 <span class="preprocessor">#else</span>
+<a name="l03889"></a>03889 <span class="preprocessor"></span>        <span class="keywordflow">return</span> (-1);
+<a name="l03890"></a>03890 <span class="preprocessor">#endif</span>
+<a name="l03891"></a>03891 <span class="preprocessor"></span>}
+<a name="l03892"></a>03892 
+<a name="l03906"></a><a class="code" href="_s_d_l__image_filter_8h.html#aacb316a18d8cb7999d5d53ee5e7b9750">03906</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ab018ace4db884cac953b06b09c00828b" title="Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin)...">SDL_imageFilterNormalizeLinear</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype" [...]
+<a name="l03907"></a>03907                                                                    <span class="keywordtype">int</span> Nmax)
+<a name="l03908"></a>03908 {
+<a name="l03909"></a>03909         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> i, istart;
+<a name="l03910"></a>03910         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *cursrc;
+<a name="l03911"></a>03911         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *curdest;
+<a name="l03912"></a>03912         <span class="keywordtype">int</span> dN, dC, factor;
+<a name="l03913"></a>03913         <span class="keywordtype">int</span> result;
+<a name="l03914"></a>03914 
+<a name="l03915"></a>03915         <span class="comment">/* Validate input parameters */</span>
+<a name="l03916"></a>03916         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL))
+<a name="l03917"></a>03917                 <span class="keywordflow">return</span>(-1);
+<a name="l03918"></a>03918         <span class="keywordflow">if</span> (length == 0)
+<a name="l03919"></a>03919                 <span class="keywordflow">return</span>(0);
+<a name="l03920"></a>03920 
+<a name="l03921"></a>03921         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>()) && (length > 7)) {
+<a name="l03922"></a>03922 
+<a name="l03923"></a>03923                 SDL_imageFilterNormalizeLinearMMX(Src, Dest, length, Cmin, Cmax, Nmin, Nmax);
+<a name="l03924"></a>03924 
+<a name="l03925"></a>03925                 <span class="comment">/* Check for unaligned bytes */</span>
+<a name="l03926"></a>03926                 <span class="keywordflow">if</span> ((length & 7) > 0) {
+<a name="l03927"></a>03927                         <span class="comment">/* Setup to process unaligned bytes */</span>
+<a name="l03928"></a>03928                         istart = length & 0xfffffff8;
+<a name="l03929"></a>03929                         cursrc = &Src[istart];
+<a name="l03930"></a>03930                         curdest = &Dest[istart];
+<a name="l03931"></a>03931                 } <span class="keywordflow">else</span> {
+<a name="l03932"></a>03932                         <span class="comment">/* No unaligned bytes - we are done */</span>
+<a name="l03933"></a>03933                         <span class="keywordflow">return</span> (0);
+<a name="l03934"></a>03934                 }
+<a name="l03935"></a>03935         } <span class="keywordflow">else</span> {
+<a name="l03936"></a>03936                 <span class="comment">/* Setup to process whole image */</span>
+<a name="l03937"></a>03937                 istart = 0;
+<a name="l03938"></a>03938                 cursrc = Src;
+<a name="l03939"></a>03939                 curdest = Dest;
+<a name="l03940"></a>03940         }
+<a name="l03941"></a>03941 
+<a name="l03942"></a>03942         <span class="comment">/* C routine to process image */</span>
+<a name="l03943"></a>03943         dC = Cmax - Cmin;
+<a name="l03944"></a>03944         <span class="keywordflow">if</span> (dC == 0)
+<a name="l03945"></a>03945                 <span class="keywordflow">return</span> (0);
+<a name="l03946"></a>03946         dN = Nmax - Nmin;
+<a name="l03947"></a>03947         factor = dN / dC;
+<a name="l03948"></a>03948         <span class="keywordflow">for</span> (i = istart; i < length; i++) {
+<a name="l03949"></a>03949                 result = factor * ((int) (*cursrc) - Cmin) + Nmin;
+<a name="l03950"></a>03950                 <span class="keywordflow">if</span> (result > 255)
+<a name="l03951"></a>03951                         result = 255;
+<a name="l03952"></a>03952                 *curdest = (<span class="keywordtype">unsigned</span> char) result;
+<a name="l03953"></a>03953                 <span class="comment">/* Advance pointers */</span>
+<a name="l03954"></a>03954                 cursrc++;
+<a name="l03955"></a>03955                 curdest++;
+<a name="l03956"></a>03956         }
+<a name="l03957"></a>03957 
+<a name="l03958"></a>03958         <span class="keywordflow">return</span> (0);
+<a name="l03959"></a>03959 }
+<a name="l03960"></a>03960 
+<a name="l03961"></a>03961 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
+<a name="l03962"></a>03962 
+<a name="l03977"></a><a class="code" href="_s_d_l__image_filter_8h.html#a7286cd21fa0a0cfb0606806dacfbe121">03977</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a8e7e4138a93e26f1912763189d407770" title="Filter using ConvolveKernel3x3Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel3x3Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
+<a name="l03978"></a>03978                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
+<a name="l03979"></a>03979 {
+<a name="l03980"></a>03980         <span class="comment">/* Validate input parameters */</span>
+<a name="l03981"></a>03981         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l03982"></a>03982                 <span class="keywordflow">return</span>(-1);
+<a name="l03983"></a>03983 
+<a name="l03984"></a>03984         <span class="keywordflow">if</span> ((columns < 3) || (rows < 3) || (Divisor == 0))
+<a name="l03985"></a>03985                 <span class="keywordflow">return</span> (-1);
+<a name="l03986"></a>03986 
+<a name="l03987"></a>03987         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l03988"></a>03988 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l03989"></a>03989 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l03990"></a>03990 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l03991"></a>03991 <span class="preprocessor"></span>                __asm
+<a name="l03992"></a>03992                 {
+<a name="l03993"></a>03993                         pusha
+<a name="l03994"></a>03994                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l03995"></a>03995                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l03996"></a>03996                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
+<a name="l03997"></a>03997                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l03998"></a>03998                                 movq mm5, [edx]         <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
+<a name="l03999"></a>03999                         add edx, 8      <span class="comment">/* second row              |K0 K1 K2 0| */</span>
+<a name="l04000"></a>04000                                 movq mm6, [edx]         <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
+<a name="l04001"></a>04001                         add edx, 8      <span class="comment">/* third row               |K6 K7 K8 0| */</span>
+<a name="l04002"></a>04002                                 movq mm7, [edx]         <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
+<a name="l04003"></a>04003                         <span class="comment">/* ---, */</span>
+<a name="l04004"></a>04004                         mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l04005"></a>04005                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l04006"></a>04006                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l04007"></a>04007                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l04008"></a>04008                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l04009"></a>04009                                 mov edx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l04010"></a>04010                                 sub edx, 2      <span class="comment">/* do not use first and last row */</span>
+<a name="l04011"></a>04011                                 <span class="comment">/* ---, */</span>
+<a name="l04012"></a>04012 L10320:
+<a name="l04013"></a>04013                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l04014"></a>04014                                 sub ecx, 2      <span class="comment">/* do not use first and last column */</span>
+<a name="l04015"></a>04015                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l04016"></a>04016 L10322:
+<a name="l04017"></a>04017                         <span class="comment">/* ---, */</span>
+<a name="l04018"></a>04018                         movq mm1, [esi]         <span class="comment">/* load 8 bytes of the image first row */</span>
+<a name="l04019"></a>04019                         add esi, eax    <span class="comment">/* move one row below */</span>
+<a name="l04020"></a>04020                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes of the image second row */</span>
+<a name="l04021"></a>04021                         add esi, eax    <span class="comment">/* move one row below */</span>
+<a name="l04022"></a>04022                                 movq mm3, [esi]         <span class="comment">/* load 8 bytes of the image third row */</span>
+<a name="l04023"></a>04023                         punpcklbw mm1, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l04024"></a>04024                                 punpcklbw mm2, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l04025"></a>04025                                 punpcklbw mm3, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l04026"></a>04026                                 pmullw mm1, mm5         <span class="comment">/* multiply words first row  image*Kernel */</span>
+<a name="l04027"></a>04027                                 pmullw mm2, mm6         <span class="comment">/* multiply words second row image*Kernel */</span>
+<a name="l04028"></a>04028                                 pmullw mm3, mm7         <span class="comment">/* multiply words third row  image*Kernel */</span>
+<a name="l04029"></a>04029                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the first and second rows */</span>
+<a name="l04030"></a>04030                                 paddsw mm1, mm3         <span class="comment">/* add 4 words of the third row and result */</span>
+<a name="l04031"></a>04031                                 movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04032"></a>04032                                 psrlq mm1, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l04033"></a>04033                                 paddsw mm1, mm2         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l04034"></a>04034                                 movq mm3, mm1           <span class="comment">/* copy MM1 into MM3 */</span>
+<a name="l04035"></a>04035                                 psrlq mm1, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l04036"></a>04036                                 paddsw mm1, mm3         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l04037"></a>04037                                 <span class="comment">/* --, */</span>
+<a name="l04038"></a>04038                                 movd mm2, eax           <span class="comment">/* save EAX in MM2 */</span>
+<a name="l04039"></a>04039                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
+<a name="l04040"></a>04040                                 movd eax, mm1           <span class="comment">/* copy MM1 into EAX */</span>
+<a name="l04041"></a>04041                                 psraw mm1, 15           <span class="comment">/* spread sign bit of the result */</span>
+<a name="l04042"></a>04042                                 movd edx, mm1           <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l04043"></a>04043                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l04044"></a>04044                                 movd mm1, eax           <span class="comment">/* move result of division into MM1 */</span>
+<a name="l04045"></a>04045                                 packuswb mm1, mm0       <span class="comment">/* pack division result with saturation */</span>
+<a name="l04046"></a>04046                                 movd eax, mm1           <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l04047"></a>04047                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l04048"></a>04048                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
+<a name="l04049"></a>04049                                 movd eax, mm2           <span class="comment">/* restore saved EAX */</span>
+<a name="l04050"></a>04050                                 <span class="comment">/* --, */</span>
+<a name="l04051"></a>04051                                 sub esi, eax    <span class="comment">/* move two rows up */</span>
+<a name="l04052"></a>04052                                 sub esi, eax    <span class="comment">/* */</span>
+<a name="l04053"></a>04053                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l04054"></a>04054                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l04055"></a>04055                                 <span class="comment">/* ---, */</span>
+<a name="l04056"></a>04056                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l04057"></a>04057                                 jnz            L10322           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04058"></a>04058                                 add esi, 2      <span class="comment">/* move to the next row in Src */</span>
+<a name="l04059"></a>04059                                 add edi, 2      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l04060"></a>04060                                 dec              edx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l04061"></a>04061                                 jnz            L10320           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04062"></a>04062                                 <span class="comment">/* ---, */</span>
+<a name="l04063"></a>04063                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l04064"></a>04064                                 popa
+<a name="l04065"></a>04065                 }
+<a name="l04066"></a>04066 <span class="preprocessor">#else</span>
+<a name="l04067"></a>04067 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l04068"></a>04068                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l04069"></a>04069                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l04070"></a>04070                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
+<a name="l04071"></a>04071                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l04072"></a>04072                         <span class="stringliteral">"movq    (%%edx), %%mm5 \n\t"</span>   <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
+<a name="l04073"></a>04073                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* second row              |K0 K1 K2 0| */</span>
+<a name="l04074"></a>04074                         <span class="stringliteral">"movq    (%%edx), %%mm6 \n\t"</span>   <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
+<a name="l04075"></a>04075                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* third row               |K6 K7 K8 0| */</span>
+<a name="l04076"></a>04076                         <span class="stringliteral">"movq    (%%edx), %%mm7 \n\t"</span>   <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
+<a name="l04077"></a>04077                         <span class="comment">/* --- */</span>
+<a name="l04078"></a>04078                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l04079"></a>04079                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l04080"></a>04080                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l04081"></a>04081                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l04082"></a>04082                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l04083"></a>04083                         <span class="stringliteral">"mov          %2, %%edx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
+<a name="l04084"></a>04084                         <span class="stringliteral">"sub          $2, %%edx \n\t"</span>   <span class="comment">/* do not use first and last row */</span>
+<a name="l04085"></a>04085                         <span class="comment">/* --- */</span>
+<a name="l04086"></a>04086                         <span class="stringliteral">".L10320:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l04087"></a>04087                         <span class="stringliteral">"sub          $2, %%ecx \n\t"</span>   <span class="comment">/* do not use first and last column */</span>
+<a name="l04088"></a>04088                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l04089"></a>04089                         <span class="stringliteral">".L10322:               \n\t"</span>
+<a name="l04090"></a>04090                         <span class="comment">/* --- */</span>
+<a name="l04091"></a>04091                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the image first row */</span>
+<a name="l04092"></a>04092                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
+<a name="l04093"></a>04093                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes of the image second row */</span>
+<a name="l04094"></a>04094                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
+<a name="l04095"></a>04095                         <span class="stringliteral">"movq    (%%esi), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes of the image third row */</span>
+<a name="l04096"></a>04096                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l04097"></a>04097                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l04098"></a>04098                         <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l04099"></a>04099                         <span class="stringliteral">"pmullw    %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* multiply words first row  image*Kernel */</span>
+<a name="l04100"></a>04100                         <span class="stringliteral">"pmullw    %%mm6, %%mm2 \n\t"</span>   <span class="comment">/* multiply words second row image*Kernel */</span>
+<a name="l04101"></a>04101                         <span class="stringliteral">"pmullw    %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* multiply words third row  image*Kernel */</span>
+<a name="l04102"></a>04102                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the first and second rows */</span>
+<a name="l04103"></a>04103                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the third row and result */</span>
+<a name="l04104"></a>04104                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04105"></a>04105                         <span class="stringliteral">"psrlq       $32, %%mm1 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l04106"></a>04106                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l04107"></a>04107                         <span class="stringliteral">"movq      %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* copy MM1 into MM3 */</span>
+<a name="l04108"></a>04108                         <span class="stringliteral">"psrlq       $16, %%mm1 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l04109"></a>04109                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l04110"></a>04110                         <span class="comment">/* -- */</span>
+<a name="l04111"></a>04111                         <span class="stringliteral">"movd      %%eax, %%mm2 \n\t"</span>   <span class="comment">/* save EAX in MM2 */</span>
+<a name="l04112"></a>04112                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
+<a name="l04113"></a>04113                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* copy MM1 into EAX */</span>
+<a name="l04114"></a>04114                         <span class="stringliteral">"psraw       $15, %%mm1 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
+<a name="l04115"></a>04115                         <span class="stringliteral">"movd      %%mm1, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l04116"></a>04116                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l04117"></a>04117                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* move result of division into MM1 */</span>
+<a name="l04118"></a>04118                         <span class="stringliteral">"packuswb  %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
+<a name="l04119"></a>04119                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l04120"></a>04120                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l04121"></a>04121                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
+<a name="l04122"></a>04122                         <span class="stringliteral">"movd      %%mm2, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
+<a name="l04123"></a>04123                         <span class="comment">/* -- */</span>
+<a name="l04124"></a>04124                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move two rows up */</span>
+<a name="l04125"></a>04125                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* */</span>
+<a name="l04126"></a>04126                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l04127"></a>04127                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l04128"></a>04128                         <span class="comment">/* --- */</span>
+<a name="l04129"></a>04129                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l04130"></a>04130                         <span class="stringliteral">"jnz            .L10322 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04131"></a>04131                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l04132"></a>04132                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l04133"></a>04133                         <span class="stringliteral">"dec              %%edx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l04134"></a>04134                         <span class="stringliteral">"jnz            .L10320 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04135"></a>04135                         <span class="comment">/* --- */</span>
+<a name="l04136"></a>04136                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l04137"></a>04137                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l04138"></a>04138                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l04139"></a>04139                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l04140"></a>04140                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l04141"></a>04141                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l04142"></a>04142                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
+<a name="l04143"></a>04143                         );
+<a name="l04144"></a>04144 <span class="preprocessor">#endif</span>
+<a name="l04145"></a>04145 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l04146"></a>04146 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l04147"></a>04147         } <span class="keywordflow">else</span> {
+<a name="l04148"></a>04148                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l04149"></a>04149                 <span class="keywordflow">return</span> (-1);
+<a name="l04150"></a>04150         }
+<a name="l04151"></a>04151 }
+<a name="l04152"></a>04152 
+<a name="l04167"></a><a class="code" href="_s_d_l__image_filter_8h.html#a432d7bcc34b6bea42d1a07b4db795e1f">04167</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ac9a556492480ce71f54d456a0ff7e6cb" title="Filter using ConvolveKernel5x5Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel5x5Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
+<a name="l04168"></a>04168                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
+<a name="l04169"></a>04169 {
+<a name="l04170"></a>04170         <span class="comment">/* Validate input parameters */</span>
+<a name="l04171"></a>04171         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l04172"></a>04172                 <span class="keywordflow">return</span>(-1);
+<a name="l04173"></a>04173 
+<a name="l04174"></a>04174         <span class="keywordflow">if</span> ((columns < 5) || (rows < 5) || (Divisor == 0))
+<a name="l04175"></a>04175                 <span class="keywordflow">return</span> (-1);
+<a name="l04176"></a>04176 
+<a name="l04177"></a>04177         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l04178"></a>04178 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l04179"></a>04179 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l04180"></a>04180 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l04181"></a>04181 <span class="preprocessor"></span>                __asm
+<a name="l04182"></a>04182                 {
+<a name="l04183"></a>04183                         pusha
+<a name="l04184"></a>04184                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l04185"></a>04185                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l04186"></a>04186                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
+<a name="l04187"></a>04187                                 movd mm5, ebx           <span class="comment">/* copy Divisor into MM5 */</span>
+<a name="l04188"></a>04188                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l04189"></a>04189                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
+<a name="l04190"></a>04190                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l04191"></a>04191                                 add edi, 2      <span class="comment">/* 2 column offset from the left edge */</span>
+<a name="l04192"></a>04192                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l04193"></a>04193                                 shl eax, 1      <span class="comment">/* EAX = columns * 2 */</span>
+<a name="l04194"></a>04194                                 add edi, eax    <span class="comment">/* 2 row offset from the top edge */</span>
+<a name="l04195"></a>04195                                 shr eax, 1      <span class="comment">/* EAX = columns */</span>
+<a name="l04196"></a>04196                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l04197"></a>04197                                 sub ebx, 4      <span class="comment">/* do not use first 2 and last 2 rows */</span>
+<a name="l04198"></a>04198                                 <span class="comment">/* ---, */</span>
+<a name="l04199"></a>04199 L10330:
+<a name="l04200"></a>04200                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l04201"></a>04201                                 sub ecx, 4      <span class="comment">/* do not use first 2 and last 2 columns */</span>
+<a name="l04202"></a>04202                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l04203"></a>04203 L10332:
+<a name="l04204"></a>04204                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l04205"></a>04205                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
+<a name="l04206"></a>04206                                 <span class="comment">/* --- 1 */</span>
+<a name="l04207"></a>04207                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04208"></a>04208                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04209"></a>04209                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04210"></a>04210                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04211"></a>04211                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04212"></a>04212                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04213"></a>04213                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04214"></a>04214                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04215"></a>04215                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04216"></a>04216                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04217"></a>04217                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04218"></a>04218                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04219"></a>04219                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04220"></a>04220                                 <span class="comment">/* --- 2 */</span>
+<a name="l04221"></a>04221                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04222"></a>04222                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04223"></a>04223                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04224"></a>04224                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04225"></a>04225                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04226"></a>04226                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04227"></a>04227                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04228"></a>04228                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04229"></a>04229                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04230"></a>04230                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04231"></a>04231                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04232"></a>04232                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04233"></a>04233                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04234"></a>04234                                 <span class="comment">/* --- 3 */</span>
+<a name="l04235"></a>04235                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04236"></a>04236                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04237"></a>04237                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04238"></a>04238                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04239"></a>04239                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04240"></a>04240                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04241"></a>04241                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04242"></a>04242                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04243"></a>04243                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04244"></a>04244                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04245"></a>04245                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04246"></a>04246                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04247"></a>04247                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04248"></a>04248                                 <span class="comment">/* --- 4 */</span>
+<a name="l04249"></a>04249                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04250"></a>04250                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04251"></a>04251                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04252"></a>04252                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04253"></a>04253                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04254"></a>04254                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04255"></a>04255                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04256"></a>04256                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04257"></a>04257                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04258"></a>04258                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04259"></a>04259                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04260"></a>04260                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04261"></a>04261                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04262"></a>04262                                 <span class="comment">/* --- 5 */</span>
+<a name="l04263"></a>04263                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04264"></a>04264                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04265"></a>04265                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04266"></a>04266                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04267"></a>04267                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04268"></a>04268                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04269"></a>04269                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04270"></a>04270                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04271"></a>04271                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04272"></a>04272                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04273"></a>04273                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04274"></a>04274                                 <span class="comment">/* ---, */</span>
+<a name="l04275"></a>04275                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l04276"></a>04276                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l04277"></a>04277                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l04278"></a>04278                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l04279"></a>04279                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l04280"></a>04280                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l04281"></a>04281                                 <span class="comment">/* ---, */</span>
+<a name="l04282"></a>04282                                 movd mm1, eax           <span class="comment">/* save EDX in MM1 */</span>
+<a name="l04283"></a>04283                                 movd mm2, ebx           <span class="comment">/* save EDX in MM2 */</span>
+<a name="l04284"></a>04284                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
+<a name="l04285"></a>04285                                 movd eax, mm7           <span class="comment">/* load summation result into EAX */</span>
+<a name="l04286"></a>04286                                 psraw mm7, 15           <span class="comment">/* spread sign bit of the result */</span>
+<a name="l04287"></a>04287                                 movd ebx, mm5           <span class="comment">/* load Divisor into EBX */</span>
+<a name="l04288"></a>04288                                 movd edx, mm7           <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l04289"></a>04289                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l04290"></a>04290                                 movd mm7, eax           <span class="comment">/* move result of division into MM7 */</span>
+<a name="l04291"></a>04291                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
+<a name="l04292"></a>04292                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l04293"></a>04293                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l04294"></a>04294                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
+<a name="l04295"></a>04295                                 movd ebx, mm2           <span class="comment">/* restore saved EBX */</span>
+<a name="l04296"></a>04296                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
+<a name="l04297"></a>04297                                 <span class="comment">/* --, */</span>
+<a name="l04298"></a>04298                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l04299"></a>04299                                 sub edx, 72     <span class="comment">/* EDX = Kernel address */</span>
+<a name="l04300"></a>04300                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l04301"></a>04301                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l04302"></a>04302                                 <span class="comment">/* ---, */</span>
+<a name="l04303"></a>04303                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l04304"></a>04304                                 jnz            L10332           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04305"></a>04305                                 add esi, 4      <span class="comment">/* move to the next row in Src */</span>
+<a name="l04306"></a>04306                                 add edi, 4      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l04307"></a>04307                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l04308"></a>04308                                 jnz            L10330           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04309"></a>04309                                 <span class="comment">/* ---, */</span>
+<a name="l04310"></a>04310                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l04311"></a>04311                                 popa
+<a name="l04312"></a>04312                 }
+<a name="l04313"></a>04313 <span class="preprocessor">#else</span>
+<a name="l04314"></a>04314 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l04315"></a>04315                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l04316"></a>04316                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l04317"></a>04317                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
+<a name="l04318"></a>04318                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy Divisor into MM5 */</span>
+<a name="l04319"></a>04319                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l04320"></a>04320                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
+<a name="l04321"></a>04321                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l04322"></a>04322                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* 2 column offset from the left edge */</span>
+<a name="l04323"></a>04323                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l04324"></a>04324                         <span class="stringliteral">"shl          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns * 2 */</span>
+<a name="l04325"></a>04325                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 2 row offset from the top edge */</span>
+<a name="l04326"></a>04326                         <span class="stringliteral">"shr          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns */</span>
+<a name="l04327"></a>04327                         <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
+<a name="l04328"></a>04328                         <span class="stringliteral">"sub          $4, %%ebx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 rows */</span>
+<a name="l04329"></a>04329                         <span class="comment">/* --- */</span>
+<a name="l04330"></a>04330                         <span class="stringliteral">".L10330:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l04331"></a>04331                         <span class="stringliteral">"sub          $4, %%ecx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 columns */</span>
+<a name="l04332"></a>04332                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l04333"></a>04333                         <span class="stringliteral">".L10332:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l04334"></a>04334                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
+<a name="l04335"></a>04335                         <span class="comment">/* --- 1 */</span>
+<a name="l04336"></a>04336                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04337"></a>04337                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04338"></a>04338                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04339"></a>04339                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04340"></a>04340                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04341"></a>04341                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04342"></a>04342                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04343"></a>04343                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04344"></a>04344                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04345"></a>04345                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04346"></a>04346                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04347"></a>04347                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04348"></a>04348                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04349"></a>04349                         <span class="comment">/* --- 2 */</span>
+<a name="l04350"></a>04350                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04351"></a>04351                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04352"></a>04352                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04353"></a>04353                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04354"></a>04354                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04355"></a>04355                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04356"></a>04356                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04357"></a>04357                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04358"></a>04358                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04359"></a>04359                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04360"></a>04360                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04361"></a>04361                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04362"></a>04362                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04363"></a>04363                         <span class="comment">/* --- 3 */</span>
+<a name="l04364"></a>04364                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04365"></a>04365                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04366"></a>04366                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04367"></a>04367                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04368"></a>04368                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04369"></a>04369                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04370"></a>04370                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04371"></a>04371                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04372"></a>04372                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04373"></a>04373                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04374"></a>04374                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04375"></a>04375                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04376"></a>04376                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04377"></a>04377                         <span class="comment">/* --- 4 */</span>
+<a name="l04378"></a>04378                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04379"></a>04379                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04380"></a>04380                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04381"></a>04381                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04382"></a>04382                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04383"></a>04383                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04384"></a>04384                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04385"></a>04385                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04386"></a>04386                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04387"></a>04387                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04388"></a>04388                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04389"></a>04389                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04390"></a>04390                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04391"></a>04391                         <span class="comment">/* --- 5 */</span>
+<a name="l04392"></a>04392                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04393"></a>04393                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04394"></a>04394                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04395"></a>04395                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04396"></a>04396                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04397"></a>04397                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04398"></a>04398                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04399"></a>04399                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04400"></a>04400                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04401"></a>04401                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04402"></a>04402                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04403"></a>04403                         <span class="comment">/* --- */</span>
+<a name="l04404"></a>04404                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l04405"></a>04405                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l04406"></a>04406                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l04407"></a>04407                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l04408"></a>04408                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l04409"></a>04409                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l04410"></a>04410                         <span class="comment">/* --- */</span>
+<a name="l04411"></a>04411                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EDX in MM1 */</span>
+<a name="l04412"></a>04412                         <span class="stringliteral">"movd      %%ebx, %%mm2 \n\t"</span>   <span class="comment">/* save EDX in MM2 */</span>
+<a name="l04413"></a>04413                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
+<a name="l04414"></a>04414                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* load summation result into EAX */</span>
+<a name="l04415"></a>04415                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
+<a name="l04416"></a>04416                         <span class="stringliteral">"movd      %%mm5, %%ebx \n\t"</span>   <span class="comment">/* load Divisor into EBX */</span>
+<a name="l04417"></a>04417                         <span class="stringliteral">"movd      %%mm7, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l04418"></a>04418                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l04419"></a>04419                         <span class="stringliteral">"movd      %%eax, %%mm7 \n\t"</span>   <span class="comment">/* move result of division into MM7 */</span>
+<a name="l04420"></a>04420                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
+<a name="l04421"></a>04421                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l04422"></a>04422                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l04423"></a>04423                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
+<a name="l04424"></a>04424                         <span class="stringliteral">"movd      %%mm2, %%ebx \n\t"</span>   <span class="comment">/* restore saved EBX */</span>
+<a name="l04425"></a>04425                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
+<a name="l04426"></a>04426                         <span class="comment">/* -- */</span>
+<a name="l04427"></a>04427                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l04428"></a>04428                         <span class="stringliteral">"sub         $72, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
+<a name="l04429"></a>04429                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l04430"></a>04430                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l04431"></a>04431                         <span class="comment">/* --- */</span>
+<a name="l04432"></a>04432                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l04433"></a>04433                         <span class="stringliteral">"jnz            .L10332 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04434"></a>04434                         <span class="stringliteral">"add          $4, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l04435"></a>04435                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l04436"></a>04436                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l04437"></a>04437                         <span class="stringliteral">"jnz            .L10330 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04438"></a>04438                         <span class="comment">/* --- */</span>
+<a name="l04439"></a>04439                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l04440"></a>04440                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l04441"></a>04441                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l04442"></a>04442                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l04443"></a>04443                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l04444"></a>04444                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l04445"></a>04445                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
+<a name="l04446"></a>04446                         );
+<a name="l04447"></a>04447 <span class="preprocessor">#endif</span>
+<a name="l04448"></a>04448 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l04449"></a>04449 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l04450"></a>04450         } <span class="keywordflow">else</span> {
+<a name="l04451"></a>04451                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l04452"></a>04452                 <span class="keywordflow">return</span> (-1);
+<a name="l04453"></a>04453         }
+<a name="l04454"></a>04454 }
+<a name="l04455"></a>04455 
+<a name="l04470"></a><a class="code" href="_s_d_l__image_filter_8h.html#acc177cf891758fdc4bf7533fb266e339">04470</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a363f48e6843fd3f48da53688b89bca48" title="Filter using ConvolveKernel7x7Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel7x7Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
+<a name="l04471"></a>04471                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
+<a name="l04472"></a>04472 {
+<a name="l04473"></a>04473         <span class="comment">/* Validate input parameters */</span>
+<a name="l04474"></a>04474         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l04475"></a>04475                 <span class="keywordflow">return</span>(-1);
+<a name="l04476"></a>04476 
+<a name="l04477"></a>04477         <span class="keywordflow">if</span> ((columns < 7) || (rows < 7) || (Divisor == 0))
+<a name="l04478"></a>04478                 <span class="keywordflow">return</span> (-1);
+<a name="l04479"></a>04479 
+<a name="l04480"></a>04480         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l04481"></a>04481 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l04482"></a>04482 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l04483"></a>04483 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l04484"></a>04484 <span class="preprocessor"></span>                __asm
+<a name="l04485"></a>04485                 {
+<a name="l04486"></a>04486                         pusha
+<a name="l04487"></a>04487                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l04488"></a>04488                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l04489"></a>04489                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
+<a name="l04490"></a>04490                                 movd mm5, ebx           <span class="comment">/* copy Divisor into MM5 */</span>
+<a name="l04491"></a>04491                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l04492"></a>04492                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
+<a name="l04493"></a>04493                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l04494"></a>04494                                 add edi, 3      <span class="comment">/* 3 column offset from the left edge */</span>
+<a name="l04495"></a>04495                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l04496"></a>04496                                 add edi, eax    <span class="comment">/* 3 row offset from the top edge */</span>
+<a name="l04497"></a>04497                                 add edi, eax
+<a name="l04498"></a>04498                                 add edi, eax
+<a name="l04499"></a>04499                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l04500"></a>04500                                 sub ebx, 6      <span class="comment">/* do not use first 3 and last 3 rows */</span>
+<a name="l04501"></a>04501                                 <span class="comment">/* ---, */</span>
+<a name="l04502"></a>04502 L10340:
+<a name="l04503"></a>04503                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l04504"></a>04504                                 sub ecx, 6      <span class="comment">/* do not use first 3 and last 3 columns */</span>
+<a name="l04505"></a>04505                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l04506"></a>04506 L10342:
+<a name="l04507"></a>04507                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l04508"></a>04508                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
+<a name="l04509"></a>04509                                 <span class="comment">/* --- 1 */</span>
+<a name="l04510"></a>04510                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04511"></a>04511                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04512"></a>04512                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04513"></a>04513                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04514"></a>04514                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04515"></a>04515                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04516"></a>04516                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04517"></a>04517                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04518"></a>04518                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04519"></a>04519                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04520"></a>04520                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04521"></a>04521                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04522"></a>04522                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04523"></a>04523                                 <span class="comment">/* --- 2 */</span>
+<a name="l04524"></a>04524                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04525"></a>04525                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04526"></a>04526                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04527"></a>04527                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04528"></a>04528                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04529"></a>04529                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04530"></a>04530                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04531"></a>04531                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04532"></a>04532                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04533"></a>04533                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04534"></a>04534                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04535"></a>04535                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04536"></a>04536                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04537"></a>04537                                 <span class="comment">/* --- 3 */</span>
+<a name="l04538"></a>04538                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04539"></a>04539                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04540"></a>04540                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04541"></a>04541                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04542"></a>04542                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04543"></a>04543                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04544"></a>04544                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04545"></a>04545                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04546"></a>04546                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04547"></a>04547                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04548"></a>04548                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04549"></a>04549                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04550"></a>04550                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04551"></a>04551                                 <span class="comment">/* --- 4 */</span>
+<a name="l04552"></a>04552                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04553"></a>04553                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04554"></a>04554                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04555"></a>04555                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04556"></a>04556                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04557"></a>04557                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04558"></a>04558                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04559"></a>04559                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04560"></a>04560                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04561"></a>04561                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04562"></a>04562                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04563"></a>04563                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04564"></a>04564                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04565"></a>04565                                 <span class="comment">/* --- 5 */</span>
+<a name="l04566"></a>04566                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04567"></a>04567                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04568"></a>04568                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04569"></a>04569                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04570"></a>04570                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04571"></a>04571                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04572"></a>04572                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04573"></a>04573                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04574"></a>04574                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04575"></a>04575                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04576"></a>04576                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04577"></a>04577                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04578"></a>04578                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04579"></a>04579                                 <span class="comment">/* --- 6 */</span>
+<a name="l04580"></a>04580                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04581"></a>04581                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04582"></a>04582                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04583"></a>04583                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04584"></a>04584                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04585"></a>04585                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04586"></a>04586                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04587"></a>04587                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04588"></a>04588                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04589"></a>04589                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04590"></a>04590                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04591"></a>04591                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04592"></a>04592                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04593"></a>04593                                 <span class="comment">/* --- 7 */</span>
+<a name="l04594"></a>04594                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04595"></a>04595                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04596"></a>04596                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04597"></a>04597                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04598"></a>04598                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04599"></a>04599                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04600"></a>04600                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04601"></a>04601                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l04602"></a>04602                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l04603"></a>04603                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04604"></a>04604                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04605"></a>04605                                 <span class="comment">/* ---, */</span>
+<a name="l04606"></a>04606                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l04607"></a>04607                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l04608"></a>04608                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l04609"></a>04609                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l04610"></a>04610                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l04611"></a>04611                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l04612"></a>04612                                 <span class="comment">/* ---, */</span>
+<a name="l04613"></a>04613                                 movd mm1, eax           <span class="comment">/* save EDX in MM1 */</span>
+<a name="l04614"></a>04614                                 movd mm2, ebx           <span class="comment">/* save EDX in MM2 */</span>
+<a name="l04615"></a>04615                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
+<a name="l04616"></a>04616                                 movd eax, mm7           <span class="comment">/* load summation result into EAX */</span>
+<a name="l04617"></a>04617                                 psraw mm7, 15           <span class="comment">/* spread sign bit of the result */</span>
+<a name="l04618"></a>04618                                 movd ebx, mm5           <span class="comment">/* load Divisor into EBX */</span>
+<a name="l04619"></a>04619                                 movd edx, mm7           <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l04620"></a>04620                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l04621"></a>04621                                 movd mm7, eax           <span class="comment">/* move result of division into MM7 */</span>
+<a name="l04622"></a>04622                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
+<a name="l04623"></a>04623                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l04624"></a>04624                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l04625"></a>04625                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
+<a name="l04626"></a>04626                                 movd ebx, mm2           <span class="comment">/* restore saved EBX */</span>
+<a name="l04627"></a>04627                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
+<a name="l04628"></a>04628                                 <span class="comment">/* --, */</span>
+<a name="l04629"></a>04629                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l04630"></a>04630                                 sub edx, 104    <span class="comment">/* EDX = Kernel address */</span>
+<a name="l04631"></a>04631                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l04632"></a>04632                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l04633"></a>04633                                 <span class="comment">/* ---, */</span>
+<a name="l04634"></a>04634                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l04635"></a>04635                                 jnz            L10342           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04636"></a>04636                                 add esi, 6      <span class="comment">/* move to the next row in Src */</span>
+<a name="l04637"></a>04637                                 add edi, 6      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l04638"></a>04638                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l04639"></a>04639                                 jnz            L10340           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04640"></a>04640                                 <span class="comment">/* ---, */</span>
+<a name="l04641"></a>04641                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l04642"></a>04642                                 popa
+<a name="l04643"></a>04643                 }
+<a name="l04644"></a>04644 <span class="preprocessor">#else</span>
+<a name="l04645"></a>04645 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l04646"></a>04646                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l04647"></a>04647                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l04648"></a>04648                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
+<a name="l04649"></a>04649                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy Divisor into MM5 */</span>
+<a name="l04650"></a>04650                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l04651"></a>04651                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
+<a name="l04652"></a>04652                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l04653"></a>04653                         <span class="stringliteral">"add          $3, %%edi \n\t"</span>   <span class="comment">/* 3 column offset from the left edge */</span>
+<a name="l04654"></a>04654                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l04655"></a>04655                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 3 row offset from the top edge */</span>
+<a name="l04656"></a>04656                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>       <span class="comment">/* initialize ROWS counter */</span>
+<a name="l04657"></a>04657                         <span class="stringliteral">"sub          $6, %%ebx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 rows */</span>
+<a name="l04658"></a>04658                         <span class="comment">/* --- */</span>
+<a name="l04659"></a>04659                         <span class="stringliteral">".L10340:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l04660"></a>04660                         <span class="stringliteral">"sub          $6, %%ecx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 columns */</span>
+<a name="l04661"></a>04661                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l04662"></a>04662                         <span class="stringliteral">".L10342:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l04663"></a>04663                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
+<a name="l04664"></a>04664                         <span class="comment">/* --- 1 */</span>
+<a name="l04665"></a>04665                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04666"></a>04666                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04667"></a>04667                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04668"></a>04668                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04669"></a>04669                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04670"></a>04670                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04671"></a>04671                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04672"></a>04672                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04673"></a>04673                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04674"></a>04674                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04675"></a>04675                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04676"></a>04676                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04677"></a>04677                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04678"></a>04678                         <span class="comment">/* --- 2 */</span>
+<a name="l04679"></a>04679                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04680"></a>04680                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04681"></a>04681                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04682"></a>04682                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04683"></a>04683                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04684"></a>04684                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04685"></a>04685                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04686"></a>04686                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04687"></a>04687                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04688"></a>04688                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04689"></a>04689                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04690"></a>04690                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04691"></a>04691                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04692"></a>04692                         <span class="comment">/* --- 3 */</span>
+<a name="l04693"></a>04693                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04694"></a>04694                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04695"></a>04695                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04696"></a>04696                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04697"></a>04697                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04698"></a>04698                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04699"></a>04699                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04700"></a>04700                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04701"></a>04701                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04702"></a>04702                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04703"></a>04703                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04704"></a>04704                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04705"></a>04705                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04706"></a>04706                         <span class="comment">/* --- 4 */</span>
+<a name="l04707"></a>04707                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04708"></a>04708                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04709"></a>04709                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04710"></a>04710                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04711"></a>04711                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04712"></a>04712                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04713"></a>04713                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04714"></a>04714                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04715"></a>04715                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04716"></a>04716                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04717"></a>04717                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04718"></a>04718                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04719"></a>04719                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04720"></a>04720                         <span class="comment">/* --- 5 */</span>
+<a name="l04721"></a>04721                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04722"></a>04722                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04723"></a>04723                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04724"></a>04724                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04725"></a>04725                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04726"></a>04726                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04727"></a>04727                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04728"></a>04728                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04729"></a>04729                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04730"></a>04730                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04731"></a>04731                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04732"></a>04732                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04733"></a>04733                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04734"></a>04734                         <span class="comment">/* --- 6 */</span>
+<a name="l04735"></a>04735                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04736"></a>04736                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04737"></a>04737                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04738"></a>04738                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04739"></a>04739                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04740"></a>04740                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04741"></a>04741                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04742"></a>04742                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04743"></a>04743                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04744"></a>04744                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04745"></a>04745                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04746"></a>04746                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04747"></a>04747                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04748"></a>04748                         <span class="comment">/* --- 7 */</span>
+<a name="l04749"></a>04749                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04750"></a>04750                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04751"></a>04751                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04752"></a>04752                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04753"></a>04753                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04754"></a>04754                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04755"></a>04755                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04756"></a>04756                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04757"></a>04757                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04758"></a>04758                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04759"></a>04759                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04760"></a>04760                         <span class="comment">/* --- */</span>
+<a name="l04761"></a>04761                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l04762"></a>04762                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l04763"></a>04763                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l04764"></a>04764                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l04765"></a>04765                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l04766"></a>04766                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l04767"></a>04767                         <span class="comment">/* --- */</span>
+<a name="l04768"></a>04768                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EDX in MM1 */</span>
+<a name="l04769"></a>04769                         <span class="stringliteral">"movd      %%ebx, %%mm2 \n\t"</span>   <span class="comment">/* save EDX in MM2 */</span>
+<a name="l04770"></a>04770                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
+<a name="l04771"></a>04771                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* load summation result into EAX */</span>
+<a name="l04772"></a>04772                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
+<a name="l04773"></a>04773                         <span class="stringliteral">"movd      %%mm5, %%ebx \n\t"</span>   <span class="comment">/* load Divisor into EBX */</span>
+<a name="l04774"></a>04774                         <span class="stringliteral">"movd      %%mm7, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l04775"></a>04775                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l04776"></a>04776                         <span class="stringliteral">"movd      %%eax, %%mm7 \n\t"</span>   <span class="comment">/* move result of division into MM7 */</span>
+<a name="l04777"></a>04777                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
+<a name="l04778"></a>04778                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l04779"></a>04779                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l04780"></a>04780                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
+<a name="l04781"></a>04781                         <span class="stringliteral">"movd      %%mm2, %%ebx \n\t"</span>   <span class="comment">/* restore saved EBX */</span>
+<a name="l04782"></a>04782                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
+<a name="l04783"></a>04783                         <span class="comment">/* -- */</span>
+<a name="l04784"></a>04784                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l04785"></a>04785                         <span class="stringliteral">"sub        $104, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
+<a name="l04786"></a>04786                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l04787"></a>04787                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l04788"></a>04788                         <span class="comment">/* --- */</span>
+<a name="l04789"></a>04789                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l04790"></a>04790                         <span class="stringliteral">"jnz            .L10342 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04791"></a>04791                         <span class="stringliteral">"add          $6, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l04792"></a>04792                         <span class="stringliteral">"add          $6, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l04793"></a>04793                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l04794"></a>04794                         <span class="stringliteral">"jnz            .L10340 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l04795"></a>04795                         <span class="comment">/* --- */</span>
+<a name="l04796"></a>04796                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l04797"></a>04797                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l04798"></a>04798                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l04799"></a>04799                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l04800"></a>04800                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l04801"></a>04801                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l04802"></a>04802                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
+<a name="l04803"></a>04803                         );
+<a name="l04804"></a>04804 <span class="preprocessor">#endif</span>
+<a name="l04805"></a>04805 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l04806"></a>04806 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l04807"></a>04807         } <span class="keywordflow">else</span> {
+<a name="l04808"></a>04808                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l04809"></a>04809                 <span class="keywordflow">return</span> (-1);
+<a name="l04810"></a>04810         }
+<a name="l04811"></a>04811 }
+<a name="l04812"></a>04812 
+<a name="l04827"></a><a class="code" href="_s_d_l__image_filter_8h.html#af8a8114acd0509787ae5265990049720">04827</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ae1e91ff193beed110a71119ec901f09d" title="Filter using ConvolveKernel9x9Divide: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel9x9Divide</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <spa [...]
+<a name="l04828"></a>04828                                                                                    <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> Divisor)
+<a name="l04829"></a>04829 {
+<a name="l04830"></a>04830         <span class="comment">/* Validate input parameters */</span>
+<a name="l04831"></a>04831         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l04832"></a>04832                 <span class="keywordflow">return</span>(-1);
+<a name="l04833"></a>04833 
+<a name="l04834"></a>04834         <span class="keywordflow">if</span> ((columns < 9) || (rows < 9) || (Divisor == 0))
+<a name="l04835"></a>04835                 <span class="keywordflow">return</span> (-1);
+<a name="l04836"></a>04836 
+<a name="l04837"></a>04837         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l04838"></a>04838 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l04839"></a>04839 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l04840"></a>04840 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l04841"></a>04841 <span class="preprocessor"></span>                __asm
+<a name="l04842"></a>04842                 {
+<a name="l04843"></a>04843                         pusha
+<a name="l04844"></a>04844                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l04845"></a>04845                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l04846"></a>04846                                 mov bl, Divisor         <span class="comment">/* load Divisor into BL */</span>
+<a name="l04847"></a>04847                                 movd mm5, ebx           <span class="comment">/* copy Divisor into MM5 */</span>
+<a name="l04848"></a>04848                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l04849"></a>04849                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
+<a name="l04850"></a>04850                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l04851"></a>04851                                 add edi, 4      <span class="comment">/* 4 column offset from the left edge */</span>
+<a name="l04852"></a>04852                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l04853"></a>04853                                 add edi, eax    <span class="comment">/* 4 row offset from the top edge */</span>
+<a name="l04854"></a>04854                                 add edi, eax
+<a name="l04855"></a>04855                                 add edi, eax
+<a name="l04856"></a>04856                                 add edi, eax
+<a name="l04857"></a>04857                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l04858"></a>04858                                 sub ebx, 8      <span class="comment">/* do not use first 4 and last 4 rows */</span>
+<a name="l04859"></a>04859                                 <span class="comment">/* ---, */</span>
+<a name="l04860"></a>04860 L10350:
+<a name="l04861"></a>04861                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l04862"></a>04862                                 sub ecx, 8      <span class="comment">/* do not use first 4 and last 4 columns */</span>
+<a name="l04863"></a>04863                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l04864"></a>04864 L10352:
+<a name="l04865"></a>04865                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l04866"></a>04866                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
+<a name="l04867"></a>04867                                 <span class="comment">/* --- 1 */</span>
+<a name="l04868"></a>04868                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04869"></a>04869                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04870"></a>04870                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l04871"></a>04871                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04872"></a>04872                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04873"></a>04873                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04874"></a>04874                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04875"></a>04875                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04876"></a>04876                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04877"></a>04877                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04878"></a>04878                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04879"></a>04879                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04880"></a>04880                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04881"></a>04881                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04882"></a>04882                         dec              esi
+<a name="l04883"></a>04883                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04884"></a>04884                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04885"></a>04885                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04886"></a>04886                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04887"></a>04887                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04888"></a>04888                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04889"></a>04889                                 <span class="comment">/* --- 2 */</span>
+<a name="l04890"></a>04890                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04891"></a>04891                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04892"></a>04892                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l04893"></a>04893                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04894"></a>04894                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04895"></a>04895                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04896"></a>04896                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04897"></a>04897                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04898"></a>04898                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04899"></a>04899                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04900"></a>04900                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04901"></a>04901                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04902"></a>04902                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04903"></a>04903                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04904"></a>04904                         dec              esi
+<a name="l04905"></a>04905                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04906"></a>04906                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04907"></a>04907                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04908"></a>04908                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04909"></a>04909                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04910"></a>04910                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04911"></a>04911                                 <span class="comment">/* --- 3 */</span>
+<a name="l04912"></a>04912                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04913"></a>04913                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04914"></a>04914                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l04915"></a>04915                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04916"></a>04916                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04917"></a>04917                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04918"></a>04918                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04919"></a>04919                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04920"></a>04920                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04921"></a>04921                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04922"></a>04922                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04923"></a>04923                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04924"></a>04924                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04925"></a>04925                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04926"></a>04926                         dec              esi
+<a name="l04927"></a>04927                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04928"></a>04928                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04929"></a>04929                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04930"></a>04930                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04931"></a>04931                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04932"></a>04932                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04933"></a>04933                                 <span class="comment">/* --- 4 */</span>
+<a name="l04934"></a>04934                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04935"></a>04935                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04936"></a>04936                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l04937"></a>04937                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04938"></a>04938                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04939"></a>04939                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04940"></a>04940                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04941"></a>04941                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04942"></a>04942                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04943"></a>04943                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04944"></a>04944                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04945"></a>04945                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04946"></a>04946                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04947"></a>04947                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04948"></a>04948                         dec              esi
+<a name="l04949"></a>04949                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04950"></a>04950                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04951"></a>04951                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04952"></a>04952                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04953"></a>04953                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04954"></a>04954                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04955"></a>04955                                 <span class="comment">/* --- 5 */</span>
+<a name="l04956"></a>04956                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04957"></a>04957                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04958"></a>04958                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l04959"></a>04959                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04960"></a>04960                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04961"></a>04961                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04962"></a>04962                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04963"></a>04963                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04964"></a>04964                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04965"></a>04965                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04966"></a>04966                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04967"></a>04967                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04968"></a>04968                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04969"></a>04969                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04970"></a>04970                         dec              esi
+<a name="l04971"></a>04971                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04972"></a>04972                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04973"></a>04973                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04974"></a>04974                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04975"></a>04975                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04976"></a>04976                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04977"></a>04977                                 <span class="comment">/* --- 6 */</span>
+<a name="l04978"></a>04978                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04979"></a>04979                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l04980"></a>04980                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l04981"></a>04981                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04982"></a>04982                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04983"></a>04983                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04984"></a>04984                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04985"></a>04985                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04986"></a>04986                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l04987"></a>04987                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04988"></a>04988                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l04989"></a>04989                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l04990"></a>04990                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04991"></a>04991                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l04992"></a>04992                         dec              esi
+<a name="l04993"></a>04993                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l04994"></a>04994                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l04995"></a>04995                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l04996"></a>04996                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l04997"></a>04997                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l04998"></a>04998                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l04999"></a>04999                                 <span class="comment">/* --- 7 */</span>
+<a name="l05000"></a>05000                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05001"></a>05001                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05002"></a>05002                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05003"></a>05003                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05004"></a>05004                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05005"></a>05005                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05006"></a>05006                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05007"></a>05007                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05008"></a>05008                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05009"></a>05009                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05010"></a>05010                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05011"></a>05011                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05012"></a>05012                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05013"></a>05013                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05014"></a>05014                         dec              esi
+<a name="l05015"></a>05015                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05016"></a>05016                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05017"></a>05017                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05018"></a>05018                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05019"></a>05019                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05020"></a>05020                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05021"></a>05021                                 <span class="comment">/* --- 8 */</span>
+<a name="l05022"></a>05022                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05023"></a>05023                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05024"></a>05024                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05025"></a>05025                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05026"></a>05026                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05027"></a>05027                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05028"></a>05028                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05029"></a>05029                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05030"></a>05030                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05031"></a>05031                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05032"></a>05032                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05033"></a>05033                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05034"></a>05034                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05035"></a>05035                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05036"></a>05036                         dec              esi
+<a name="l05037"></a>05037                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05038"></a>05038                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05039"></a>05039                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05040"></a>05040                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05041"></a>05041                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05042"></a>05042                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05043"></a>05043                                 <span class="comment">/* --- 9 */</span>
+<a name="l05044"></a>05044                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05045"></a>05045                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05046"></a>05046                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05047"></a>05047                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05048"></a>05048                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05049"></a>05049                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05050"></a>05050                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05051"></a>05051                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05052"></a>05052                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05053"></a>05053                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05054"></a>05054                                 pmullw mm2, mm4         <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05055"></a>05055                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05056"></a>05056                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05057"></a>05057                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05058"></a>05058                         movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05059"></a>05059                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05060"></a>05060                                 pmullw mm1, mm3         <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05061"></a>05061                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05062"></a>05062                                 <span class="comment">/* ---, */</span>
+<a name="l05063"></a>05063                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l05064"></a>05064                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l05065"></a>05065                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l05066"></a>05066                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l05067"></a>05067                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l05068"></a>05068                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l05069"></a>05069                                 <span class="comment">/* ---, */</span>
+<a name="l05070"></a>05070                                 movd mm1, eax           <span class="comment">/* save EDX in MM1 */</span>
+<a name="l05071"></a>05071                                 movd mm2, ebx           <span class="comment">/* save EDX in MM2 */</span>
+<a name="l05072"></a>05072                                 movd mm3, edx           <span class="comment">/* save EDX in MM3 */</span>
+<a name="l05073"></a>05073                                 movd eax, mm7           <span class="comment">/* load summation result into EAX */</span>
+<a name="l05074"></a>05074                                 psraw mm7, 15           <span class="comment">/* spread sign bit of the result */</span>
+<a name="l05075"></a>05075                                 movd ebx, mm5           <span class="comment">/* load Divisor into EBX */</span>
+<a name="l05076"></a>05076                                 movd edx, mm7           <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l05077"></a>05077                                 idiv bx         <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l05078"></a>05078                                 movd mm7, eax           <span class="comment">/* move result of division into MM7 */</span>
+<a name="l05079"></a>05079                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
+<a name="l05080"></a>05080                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l05081"></a>05081                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l05082"></a>05082                                 movd edx, mm3           <span class="comment">/* restore saved EDX */</span>
+<a name="l05083"></a>05083                                 movd ebx, mm2           <span class="comment">/* restore saved EBX */</span>
+<a name="l05084"></a>05084                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
+<a name="l05085"></a>05085                                 <span class="comment">/* --, */</span>
+<a name="l05086"></a>05086                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l05087"></a>05087                                 sub edx, 208    <span class="comment">/* EDX = Kernel address */</span>
+<a name="l05088"></a>05088                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l05089"></a>05089                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l05090"></a>05090                                 <span class="comment">/* ---, */</span>
+<a name="l05091"></a>05091                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l05092"></a>05092                                 jnz            L10352           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05093"></a>05093                                 add esi, 8      <span class="comment">/* move to the next row in Src */</span>
+<a name="l05094"></a>05094                                 add edi, 8      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l05095"></a>05095                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l05096"></a>05096                                 jnz            L10350           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05097"></a>05097                                 <span class="comment">/* ---, */</span>
+<a name="l05098"></a>05098                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l05099"></a>05099                                 popa
+<a name="l05100"></a>05100                 }
+<a name="l05101"></a>05101 <span class="preprocessor">#else</span>
+<a name="l05102"></a>05102 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l05103"></a>05103                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l05104"></a>05104                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l05105"></a>05105                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load Divisor into BL */</span>
+<a name="l05106"></a>05106                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy Divisor into MM5 */</span>
+<a name="l05107"></a>05107                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l05108"></a>05108                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
+<a name="l05109"></a>05109                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l05110"></a>05110                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* 4 column offset from the left edge */</span>
+<a name="l05111"></a>05111                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l05112"></a>05112                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 4 row offset from the top edge */</span>
+<a name="l05113"></a>05113                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span> <span class="comment">/* initialize ROWS counter */</span>
+<a name="l05114"></a>05114                         <span class="stringliteral">"sub          $8, %%ebx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 rows */</span>
+<a name="l05115"></a>05115                         <span class="comment">/* --- */</span>
+<a name="l05116"></a>05116                         <span class="stringliteral">".L10350:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l05117"></a>05117                         <span class="stringliteral">"sub          $8, %%ecx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 columns */</span>
+<a name="l05118"></a>05118                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l05119"></a>05119                         <span class="stringliteral">".L10352:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l05120"></a>05120                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
+<a name="l05121"></a>05121                         <span class="comment">/* --- 1 */</span>
+<a name="l05122"></a>05122                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05123"></a>05123                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05124"></a>05124                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05125"></a>05125                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05126"></a>05126                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05127"></a>05127                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05128"></a>05128                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05129"></a>05129                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05130"></a>05130                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05131"></a>05131                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05132"></a>05132                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05133"></a>05133                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05134"></a>05134                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05135"></a>05135                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05136"></a>05136                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05137"></a>05137                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05138"></a>05138                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05139"></a>05139                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05140"></a>05140                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05141"></a>05141                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05142"></a>05142                         <span class="comment">/* --- 2 */</span>
+<a name="l05143"></a>05143                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05144"></a>05144                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05145"></a>05145                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05146"></a>05146                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05147"></a>05147                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05148"></a>05148                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05149"></a>05149                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05150"></a>05150                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05151"></a>05151                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05152"></a>05152                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05153"></a>05153                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05154"></a>05154                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05155"></a>05155                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05156"></a>05156                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05157"></a>05157                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05158"></a>05158                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05159"></a>05159                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05160"></a>05160                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05161"></a>05161                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05162"></a>05162                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05163"></a>05163                         <span class="comment">/* --- 3 */</span>
+<a name="l05164"></a>05164                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05165"></a>05165                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05166"></a>05166                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05167"></a>05167                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05168"></a>05168                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05169"></a>05169                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05170"></a>05170                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05171"></a>05171                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05172"></a>05172                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05173"></a>05173                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05174"></a>05174                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05175"></a>05175                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05176"></a>05176                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05177"></a>05177                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05178"></a>05178                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05179"></a>05179                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05180"></a>05180                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05181"></a>05181                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05182"></a>05182                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05183"></a>05183                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05184"></a>05184                         <span class="comment">/* --- 4 */</span>
+<a name="l05185"></a>05185                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05186"></a>05186                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05187"></a>05187                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05188"></a>05188                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05189"></a>05189                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05190"></a>05190                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05191"></a>05191                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05192"></a>05192                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05193"></a>05193                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05194"></a>05194                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05195"></a>05195                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05196"></a>05196                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05197"></a>05197                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05198"></a>05198                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05199"></a>05199                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05200"></a>05200                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05201"></a>05201                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05202"></a>05202                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05203"></a>05203                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05204"></a>05204                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05205"></a>05205                         <span class="comment">/* --- 5 */</span>
+<a name="l05206"></a>05206                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05207"></a>05207                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05208"></a>05208                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05209"></a>05209                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05210"></a>05210                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05211"></a>05211                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05212"></a>05212                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05213"></a>05213                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05214"></a>05214                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05215"></a>05215                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05216"></a>05216                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05217"></a>05217                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05218"></a>05218                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05219"></a>05219                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05220"></a>05220                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05221"></a>05221                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05222"></a>05222                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05223"></a>05223                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05224"></a>05224                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05225"></a>05225                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05226"></a>05226                         <span class="comment">/* --- 6 */</span>
+<a name="l05227"></a>05227                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05228"></a>05228                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05229"></a>05229                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05230"></a>05230                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05231"></a>05231                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05232"></a>05232                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05233"></a>05233                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05234"></a>05234                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05235"></a>05235                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05236"></a>05236                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05237"></a>05237                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05238"></a>05238                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05239"></a>05239                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05240"></a>05240                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05241"></a>05241                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05242"></a>05242                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05243"></a>05243                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05244"></a>05244                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05245"></a>05245                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05246"></a>05246                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05247"></a>05247                         <span class="comment">/* --- 7 */</span>
+<a name="l05248"></a>05248                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05249"></a>05249                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05250"></a>05250                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05251"></a>05251                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05252"></a>05252                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05253"></a>05253                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05254"></a>05254                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05255"></a>05255                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05256"></a>05256                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05257"></a>05257                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05258"></a>05258                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05259"></a>05259                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05260"></a>05260                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05261"></a>05261                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05262"></a>05262                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05263"></a>05263                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05264"></a>05264                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05265"></a>05265                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05266"></a>05266                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05267"></a>05267                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05268"></a>05268                         <span class="comment">/* --- 8 */</span>
+<a name="l05269"></a>05269                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05270"></a>05270                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05271"></a>05271                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05272"></a>05272                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05273"></a>05273                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05274"></a>05274                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05275"></a>05275                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05276"></a>05276                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05277"></a>05277                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05278"></a>05278                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05279"></a>05279                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05280"></a>05280                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05281"></a>05281                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05282"></a>05282                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05283"></a>05283                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05284"></a>05284                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05285"></a>05285                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05286"></a>05286                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05287"></a>05287                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05288"></a>05288                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05289"></a>05289                         <span class="comment">/* --- 9 */</span>
+<a name="l05290"></a>05290                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05291"></a>05291                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05292"></a>05292                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l05293"></a>05293                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05294"></a>05294                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05295"></a>05295                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05296"></a>05296                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05297"></a>05297                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05298"></a>05298                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05299"></a>05299                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05300"></a>05300                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05301"></a>05301                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05302"></a>05302                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05303"></a>05303                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05304"></a>05304                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05305"></a>05305                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05306"></a>05306                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05307"></a>05307                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05308"></a>05308                         <span class="comment">/* --- */</span>
+<a name="l05309"></a>05309                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l05310"></a>05310                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l05311"></a>05311                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l05312"></a>05312                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l05313"></a>05313                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l05314"></a>05314                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l05315"></a>05315                         <span class="comment">/* --- */</span>
+<a name="l05316"></a>05316                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EDX in MM1 */</span>
+<a name="l05317"></a>05317                         <span class="stringliteral">"movd      %%ebx, %%mm2 \n\t"</span>   <span class="comment">/* save EDX in MM2 */</span>
+<a name="l05318"></a>05318                         <span class="stringliteral">"movd      %%edx, %%mm3 \n\t"</span>   <span class="comment">/* save EDX in MM3 */</span>
+<a name="l05319"></a>05319                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* load summation result into EAX */</span>
+<a name="l05320"></a>05320                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* spread sign bit of the result */</span>
+<a name="l05321"></a>05321                         <span class="stringliteral">"movd      %%mm5, %%ebx \n\t"</span>   <span class="comment">/* load Divisor into EBX */</span>
+<a name="l05322"></a>05322                         <span class="stringliteral">"movd      %%mm7, %%edx \n\t"</span>   <span class="comment">/* fill EDX with a sign bit */</span>
+<a name="l05323"></a>05323                         <span class="stringliteral">"idivw             %%bx \n\t"</span>   <span class="comment">/* IDIV - VERY EXPENSIVE */</span>
+<a name="l05324"></a>05324                         <span class="stringliteral">"movd      %%eax, %%mm7 \n\t"</span>   <span class="comment">/* move result of division into MM7 */</span>
+<a name="l05325"></a>05325                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
+<a name="l05326"></a>05326                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l05327"></a>05327                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l05328"></a>05328                         <span class="stringliteral">"movd      %%mm3, %%edx \n\t"</span>   <span class="comment">/* restore saved EDX */</span>
+<a name="l05329"></a>05329                         <span class="stringliteral">"movd      %%mm2, %%ebx \n\t"</span>   <span class="comment">/* restore saved EBX */</span>
+<a name="l05330"></a>05330                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
+<a name="l05331"></a>05331                         <span class="comment">/* -- */</span>
+<a name="l05332"></a>05332                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l05333"></a>05333                         <span class="stringliteral">"sub        $208, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
+<a name="l05334"></a>05334                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l05335"></a>05335                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l05336"></a>05336                         <span class="comment">/* --- */</span>
+<a name="l05337"></a>05337                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l05338"></a>05338                         <span class="stringliteral">"jnz            .L10352 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05339"></a>05339                         <span class="stringliteral">"add          $8, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l05340"></a>05340                         <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l05341"></a>05341                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l05342"></a>05342                         <span class="stringliteral">"jnz            .L10350 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05343"></a>05343                         <span class="comment">/* --- */</span>
+<a name="l05344"></a>05344                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l05345"></a>05345                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l05346"></a>05346                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l05347"></a>05347                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l05348"></a>05348                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l05349"></a>05349                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l05350"></a>05350                         <span class="stringliteral">"m"</span>(Divisor)            <span class="comment">/* %5 */</span>
+<a name="l05351"></a>05351                         );
+<a name="l05352"></a>05352 <span class="preprocessor">#endif</span>
+<a name="l05353"></a>05353 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l05354"></a>05354 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l05355"></a>05355         } <span class="keywordflow">else</span> {
+<a name="l05356"></a>05356                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l05357"></a>05357                 <span class="keywordflow">return</span> (-1);
+<a name="l05358"></a>05358         }
+<a name="l05359"></a>05359 }
+<a name="l05360"></a>05360 
+<a name="l05375"></a><a class="code" href="_s_d_l__image_filter_8h.html#a67929babce179e1e333c5cd2e5fc4091">05375</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#ac329e5a3b60351768c96c94db9f9cf97" title="Filter using ConvolveKernel3x3ShiftRight: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel3x3ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</sp [...]
+<a name="l05376"></a>05376                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
+<a name="l05377"></a>05377 {
+<a name="l05378"></a>05378         <span class="comment">/* Validate input parameters */</span>
+<a name="l05379"></a>05379         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l05380"></a>05380                 <span class="keywordflow">return</span>(-1);
+<a name="l05381"></a>05381 
+<a name="l05382"></a>05382         <span class="keywordflow">if</span> ((columns < 3) || (rows < 3) || (NRightShift > 7))
+<a name="l05383"></a>05383                 <span class="keywordflow">return</span> (-1);
+<a name="l05384"></a>05384 
+<a name="l05385"></a>05385         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l05386"></a>05386 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l05387"></a>05387 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l05388"></a>05388 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l05389"></a>05389 <span class="preprocessor"></span>                __asm
+<a name="l05390"></a>05390                 {
+<a name="l05391"></a>05391                         pusha
+<a name="l05392"></a>05392                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l05393"></a>05393                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l05394"></a>05394                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
+<a name="l05395"></a>05395                                 movd mm4, ebx           <span class="comment">/* copy NRightShift into MM4 */</span>
+<a name="l05396"></a>05396                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l05397"></a>05397                                 movq mm5, [edx]         <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
+<a name="l05398"></a>05398                         add edx, 8      <span class="comment">/* second row              |K0 K1 K2 0| */</span>
+<a name="l05399"></a>05399                                 movq mm6, [edx]         <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
+<a name="l05400"></a>05400                         add edx, 8      <span class="comment">/* third row               |K6 K7 K8 0| */</span>
+<a name="l05401"></a>05401                                 movq mm7, [edx]         <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
+<a name="l05402"></a>05402                         <span class="comment">/* ---, */</span>
+<a name="l05403"></a>05403                         mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l05404"></a>05404                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l05405"></a>05405                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l05406"></a>05406                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l05407"></a>05407                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l05408"></a>05408                                 mov edx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l05409"></a>05409                                 sub edx, 2      <span class="comment">/* do not use first and last row */</span>
+<a name="l05410"></a>05410                                 <span class="comment">/* ---, */</span>
+<a name="l05411"></a>05411 L10360:
+<a name="l05412"></a>05412                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l05413"></a>05413                                 sub ecx, 2      <span class="comment">/* do not use first and last column */</span>
+<a name="l05414"></a>05414                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l05415"></a>05415 L10362:
+<a name="l05416"></a>05416                         <span class="comment">/* ---, */</span>
+<a name="l05417"></a>05417                         movq mm1, [esi]         <span class="comment">/* load 8 bytes of the image first row */</span>
+<a name="l05418"></a>05418                         add esi, eax    <span class="comment">/* move one row below */</span>
+<a name="l05419"></a>05419                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes of the image second row */</span>
+<a name="l05420"></a>05420                         add esi, eax    <span class="comment">/* move one row below */</span>
+<a name="l05421"></a>05421                                 movq mm3, [esi]         <span class="comment">/* load 8 bytes of the image third row */</span>
+<a name="l05422"></a>05422                         punpcklbw mm1, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l05423"></a>05423                                 punpcklbw mm2, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l05424"></a>05424                                 punpcklbw mm3, mm0      <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l05425"></a>05425                                 psrlw mm1, mm4          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05426"></a>05426                                 psrlw mm2, mm4          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05427"></a>05427                                 psrlw mm3, mm4          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05428"></a>05428                                 pmullw mm1, mm5         <span class="comment">/* multiply words first row  image*Kernel */</span>
+<a name="l05429"></a>05429                                 pmullw mm2, mm6         <span class="comment">/* multiply words second row image*Kernel */</span>
+<a name="l05430"></a>05430                                 pmullw mm3, mm7         <span class="comment">/* multiply words third row  image*Kernel */</span>
+<a name="l05431"></a>05431                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the first and second rows */</span>
+<a name="l05432"></a>05432                                 paddsw mm1, mm3         <span class="comment">/* add 4 words of the third row and result */</span>
+<a name="l05433"></a>05433                                 movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05434"></a>05434                                 psrlq mm1, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l05435"></a>05435                                 paddsw mm1, mm2         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l05436"></a>05436                                 movq mm3, mm1           <span class="comment">/* copy MM1 into MM3 */</span>
+<a name="l05437"></a>05437                                 psrlq mm1, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l05438"></a>05438                                 paddsw mm1, mm3         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l05439"></a>05439                                 packuswb mm1, mm0       <span class="comment">/* pack shift result with saturation */</span>
+<a name="l05440"></a>05440                                 movd ebx, mm1           <span class="comment">/* copy saturated result into EBX */</span>
+<a name="l05441"></a>05441                                 mov [edi], bl           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l05442"></a>05442                                 <span class="comment">/* --, */</span>
+<a name="l05443"></a>05443                                 sub esi, eax    <span class="comment">/* move two rows up */</span>
+<a name="l05444"></a>05444                                 sub esi, eax
+<a name="l05445"></a>05445                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l05446"></a>05446                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l05447"></a>05447                                 <span class="comment">/* ---, */</span>
+<a name="l05448"></a>05448                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l05449"></a>05449                                 jnz            L10362           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05450"></a>05450                                 add esi, 2      <span class="comment">/* move to the next row in Src */</span>
+<a name="l05451"></a>05451                                 add edi, 2      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l05452"></a>05452                                 dec              edx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l05453"></a>05453                                 jnz            L10360           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05454"></a>05454                                 <span class="comment">/* ---, */</span>
+<a name="l05455"></a>05455                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l05456"></a>05456                                 popa
+<a name="l05457"></a>05457                 }
+<a name="l05458"></a>05458 <span class="preprocessor">#else</span>
+<a name="l05459"></a>05459 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l05460"></a>05460                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l05461"></a>05461                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l05462"></a>05462                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
+<a name="l05463"></a>05463                         <span class="stringliteral">"movd      %%ebx, %%mm4 \n\t"</span>   <span class="comment">/* copy NRightShift into MM4 */</span>
+<a name="l05464"></a>05464                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l05465"></a>05465                         <span class="stringliteral">"movq    (%%edx), %%mm5 \n\t"</span>   <span class="comment">/* MM5 = {0,K2,K1,K0} */</span>
+<a name="l05466"></a>05466                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* second row              |K0 K1 K2 0| */</span>
+<a name="l05467"></a>05467                         <span class="stringliteral">"movq    (%%edx), %%mm6 \n\t"</span>   <span class="comment">/* MM6 = {0,K5,K4,K3}  K = |K3 K4 K5 0| */</span>
+<a name="l05468"></a>05468                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* third row               |K6 K7 K8 0| */</span>
+<a name="l05469"></a>05469                         <span class="stringliteral">"movq    (%%edx), %%mm7 \n\t"</span>   <span class="comment">/* MM7 = {0,K8,K7,K6} */</span>
+<a name="l05470"></a>05470                         <span class="comment">/* --- */</span>
+<a name="l05471"></a>05471                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l05472"></a>05472                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l05473"></a>05473                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l05474"></a>05474                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l05475"></a>05475                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l05476"></a>05476                         <span class="stringliteral">"mov          %2, %%edx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
+<a name="l05477"></a>05477                         <span class="stringliteral">"sub          $2, %%edx \n\t"</span>   <span class="comment">/* do not use first and last row */</span>
+<a name="l05478"></a>05478                         <span class="comment">/* --- */</span>
+<a name="l05479"></a>05479                         <span class="stringliteral">".L10360:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l05480"></a>05480                         <span class="stringliteral">"sub          $2, %%ecx \n\t"</span>   <span class="comment">/* do not use first and last column */</span>
+<a name="l05481"></a>05481                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l05482"></a>05482                         <span class="stringliteral">".L10362:               \n\t"</span>
+<a name="l05483"></a>05483                         <span class="comment">/* --- */</span>
+<a name="l05484"></a>05484                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the image first row */</span>
+<a name="l05485"></a>05485                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
+<a name="l05486"></a>05486                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes of the image second row */</span>
+<a name="l05487"></a>05487                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move one row below */</span>
+<a name="l05488"></a>05488                         <span class="stringliteral">"movq    (%%esi), %%mm3 \n\t"</span>   <span class="comment">/* load 8 bytes of the image third row */</span>
+<a name="l05489"></a>05489                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l05490"></a>05490                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l05491"></a>05491                         <span class="stringliteral">"punpcklbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack first 4 bytes into words */</span>
+<a name="l05492"></a>05492                         <span class="stringliteral">"psrlw     %%mm4, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05493"></a>05493                         <span class="stringliteral">"psrlw     %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05494"></a>05494                         <span class="stringliteral">"psrlw     %%mm4, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05495"></a>05495                         <span class="stringliteral">"pmullw    %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* multiply words first row  image*Kernel */</span>
+<a name="l05496"></a>05496                         <span class="stringliteral">"pmullw    %%mm6, %%mm2 \n\t"</span>   <span class="comment">/* multiply words second row image*Kernel */</span>
+<a name="l05497"></a>05497                         <span class="stringliteral">"pmullw    %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* multiply words third row  image*Kernel */</span>
+<a name="l05498"></a>05498                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the first and second rows */</span>
+<a name="l05499"></a>05499                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the third row and result */</span>
+<a name="l05500"></a>05500                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05501"></a>05501                         <span class="stringliteral">"psrlq       $32, %%mm1 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l05502"></a>05502                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l05503"></a>05503                         <span class="stringliteral">"movq      %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* copy MM1 into MM3 */</span>
+<a name="l05504"></a>05504                         <span class="stringliteral">"psrlq       $16, %%mm1 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l05505"></a>05505                         <span class="stringliteral">"paddsw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l05506"></a>05506                         <span class="stringliteral">"packuswb  %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* pack shift result with saturation */</span>
+<a name="l05507"></a>05507                         <span class="stringliteral">"movd      %%mm1, %%ebx \n\t"</span>   <span class="comment">/* copy saturated result into EBX */</span>
+<a name="l05508"></a>05508                         <span class="stringliteral">"mov      %%bl, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l05509"></a>05509                         <span class="comment">/* -- */</span>
+<a name="l05510"></a>05510                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move two rows up */</span>
+<a name="l05511"></a>05511                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span> <span class="stringliteral">"inc              %%esi \n\t"</span>     <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l05512"></a>05512                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l05513"></a>05513                         <span class="comment">/* --- */</span>
+<a name="l05514"></a>05514                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l05515"></a>05515                         <span class="stringliteral">"jnz            .L10362 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05516"></a>05516                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l05517"></a>05517                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l05518"></a>05518                         <span class="stringliteral">"dec              %%edx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l05519"></a>05519                         <span class="stringliteral">"jnz            .L10360 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05520"></a>05520                         <span class="comment">/* --- */</span>
+<a name="l05521"></a>05521                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l05522"></a>05522                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l05523"></a>05523                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l05524"></a>05524                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l05525"></a>05525                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l05526"></a>05526                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l05527"></a>05527                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
+<a name="l05528"></a>05528                         );
+<a name="l05529"></a>05529 <span class="preprocessor">#endif</span>
+<a name="l05530"></a>05530 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l05531"></a>05531 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l05532"></a>05532         } <span class="keywordflow">else</span> {
+<a name="l05533"></a>05533                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l05534"></a>05534                 <span class="keywordflow">return</span> (-1);
+<a name="l05535"></a>05535         }
+<a name="l05536"></a>05536 }
+<a name="l05537"></a>05537 
+<a name="l05552"></a><a class="code" href="_s_d_l__image_filter_8h.html#a9aaa45452b04f51f52826c2104ea3b85">05552</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a5253738dc4c892352b078d9a7dec2b20" title="Filter using ConvolveKernel5x5ShiftRight: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel5x5ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</sp [...]
+<a name="l05553"></a>05553                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
+<a name="l05554"></a>05554 {
+<a name="l05555"></a>05555         <span class="comment">/* Validate input parameters */</span>
+<a name="l05556"></a>05556         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l05557"></a>05557                 <span class="keywordflow">return</span>(-1);
+<a name="l05558"></a>05558 
+<a name="l05559"></a>05559         <span class="keywordflow">if</span> ((columns < 5) || (rows < 5) || (NRightShift > 7))
+<a name="l05560"></a>05560                 <span class="keywordflow">return</span> (-1);
+<a name="l05561"></a>05561 
+<a name="l05562"></a>05562         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l05563"></a>05563 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l05564"></a>05564 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l05565"></a>05565 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l05566"></a>05566 <span class="preprocessor"></span>                __asm
+<a name="l05567"></a>05567                 {
+<a name="l05568"></a>05568                         pusha
+<a name="l05569"></a>05569                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l05570"></a>05570                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l05571"></a>05571                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
+<a name="l05572"></a>05572                                 movd mm5, ebx           <span class="comment">/* copy NRightShift into MM5 */</span>
+<a name="l05573"></a>05573                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l05574"></a>05574                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
+<a name="l05575"></a>05575                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l05576"></a>05576                                 add edi, 2      <span class="comment">/* 2 column offset from the left edge */</span>
+<a name="l05577"></a>05577                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l05578"></a>05578                                 shl eax, 1      <span class="comment">/* EAX = columns * 2 */</span>
+<a name="l05579"></a>05579                                 add edi, eax    <span class="comment">/* 2 row offset from the top edge */</span>
+<a name="l05580"></a>05580                                 shr eax, 1      <span class="comment">/* EAX = columns */</span>
+<a name="l05581"></a>05581                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l05582"></a>05582                                 sub ebx, 4      <span class="comment">/* do not use first 2 and last 2 rows */</span>
+<a name="l05583"></a>05583                                 <span class="comment">/* ---, */</span>
+<a name="l05584"></a>05584 L10370:
+<a name="l05585"></a>05585                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l05586"></a>05586                                 sub ecx, 4      <span class="comment">/* do not use first 2 and last 2 columns */</span>
+<a name="l05587"></a>05587                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l05588"></a>05588 L10372:
+<a name="l05589"></a>05589                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l05590"></a>05590                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
+<a name="l05591"></a>05591                                 <span class="comment">/* --- 1 */</span>
+<a name="l05592"></a>05592                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05593"></a>05593                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05594"></a>05594                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05595"></a>05595                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05596"></a>05596                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05597"></a>05597                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05598"></a>05598                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05599"></a>05599                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05600"></a>05600                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05601"></a>05601                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05602"></a>05602                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05603"></a>05603                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05604"></a>05604                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05605"></a>05605                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05606"></a>05606                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05607"></a>05607                                 <span class="comment">/* --- 2 */</span>
+<a name="l05608"></a>05608                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05609"></a>05609                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05610"></a>05610                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05611"></a>05611                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05612"></a>05612                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05613"></a>05613                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05614"></a>05614                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05615"></a>05615                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05616"></a>05616                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05617"></a>05617                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05618"></a>05618                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05619"></a>05619                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05620"></a>05620                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05621"></a>05621                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05622"></a>05622                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05623"></a>05623                                 <span class="comment">/* --- 3 */</span>
+<a name="l05624"></a>05624                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05625"></a>05625                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05626"></a>05626                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05627"></a>05627                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05628"></a>05628                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05629"></a>05629                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05630"></a>05630                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05631"></a>05631                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05632"></a>05632                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05633"></a>05633                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05634"></a>05634                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05635"></a>05635                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05636"></a>05636                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05637"></a>05637                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05638"></a>05638                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05639"></a>05639                                 <span class="comment">/* --- 4 */</span>
+<a name="l05640"></a>05640                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05641"></a>05641                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05642"></a>05642                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05643"></a>05643                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05644"></a>05644                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05645"></a>05645                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05646"></a>05646                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05647"></a>05647                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05648"></a>05648                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05649"></a>05649                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05650"></a>05650                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05651"></a>05651                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05652"></a>05652                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05653"></a>05653                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05654"></a>05654                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05655"></a>05655                                 <span class="comment">/* --- 5 */</span>
+<a name="l05656"></a>05656                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05657"></a>05657                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05658"></a>05658                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05659"></a>05659                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05660"></a>05660                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05661"></a>05661                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05662"></a>05662                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05663"></a>05663                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05664"></a>05664                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05665"></a>05665                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05666"></a>05666                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05667"></a>05667                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05668"></a>05668                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05669"></a>05669                                 <span class="comment">/* ---, */</span>
+<a name="l05670"></a>05670                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l05671"></a>05671                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l05672"></a>05672                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l05673"></a>05673                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l05674"></a>05674                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l05675"></a>05675                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l05676"></a>05676                                 movd mm1, eax           <span class="comment">/* save EAX in MM1 */</span>
+<a name="l05677"></a>05677                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
+<a name="l05678"></a>05678                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l05679"></a>05679                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l05680"></a>05680                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
+<a name="l05681"></a>05681                                 <span class="comment">/* --, */</span>
+<a name="l05682"></a>05682                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l05683"></a>05683                                 sub edx, 72     <span class="comment">/* EDX = Kernel address */</span>
+<a name="l05684"></a>05684                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l05685"></a>05685                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l05686"></a>05686                                 <span class="comment">/* ---, */</span>
+<a name="l05687"></a>05687                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l05688"></a>05688                                 jnz            L10372           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05689"></a>05689                                 add esi, 4      <span class="comment">/* move to the next row in Src */</span>
+<a name="l05690"></a>05690                                 add edi, 4      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l05691"></a>05691                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l05692"></a>05692                                 jnz            L10370           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05693"></a>05693                                 <span class="comment">/* ---, */</span>
+<a name="l05694"></a>05694                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l05695"></a>05695                                 popa
+<a name="l05696"></a>05696                 }
+<a name="l05697"></a>05697 <span class="preprocessor">#else</span>
+<a name="l05698"></a>05698 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l05699"></a>05699                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l05700"></a>05700                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l05701"></a>05701                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
+<a name="l05702"></a>05702                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy NRightShift into MM5 */</span>
+<a name="l05703"></a>05703                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l05704"></a>05704                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
+<a name="l05705"></a>05705                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l05706"></a>05706                         <span class="stringliteral">"add          $2, %%edi \n\t"</span>   <span class="comment">/* 2 column offset from the left edge */</span>
+<a name="l05707"></a>05707                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l05708"></a>05708                         <span class="stringliteral">"shl          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns * 2 */</span>
+<a name="l05709"></a>05709                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 2 row offset from the top edge */</span>
+<a name="l05710"></a>05710                         <span class="stringliteral">"shr          $1, %%eax \n\t"</span>   <span class="comment">/* EAX = columns */</span>
+<a name="l05711"></a>05711                         <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
+<a name="l05712"></a>05712                         <span class="stringliteral">"sub          $4, %%ebx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 rows */</span>
+<a name="l05713"></a>05713                         <span class="comment">/* --- */</span>
+<a name="l05714"></a>05714                         <span class="stringliteral">".L10370:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l05715"></a>05715                         <span class="stringliteral">"sub          $4, %%ecx \n\t"</span>   <span class="comment">/* do not use first 2 and last 2 columns */</span>
+<a name="l05716"></a>05716                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l05717"></a>05717                         <span class="stringliteral">".L10372:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l05718"></a>05718                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
+<a name="l05719"></a>05719                         <span class="comment">/* --- 1 */</span>
+<a name="l05720"></a>05720                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05721"></a>05721                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05722"></a>05722                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05723"></a>05723                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05724"></a>05724                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05725"></a>05725                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05726"></a>05726                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05727"></a>05727                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05728"></a>05728                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05729"></a>05729                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05730"></a>05730                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05731"></a>05731                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05732"></a>05732                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05733"></a>05733                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05734"></a>05734                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05735"></a>05735                         <span class="comment">/* --- 2 */</span>
+<a name="l05736"></a>05736                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05737"></a>05737                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05738"></a>05738                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05739"></a>05739                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05740"></a>05740                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05741"></a>05741                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05742"></a>05742                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05743"></a>05743                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05744"></a>05744                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05745"></a>05745                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05746"></a>05746                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05747"></a>05747                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05748"></a>05748                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05749"></a>05749                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05750"></a>05750                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05751"></a>05751                         <span class="comment">/* --- 3 */</span>
+<a name="l05752"></a>05752                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05753"></a>05753                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05754"></a>05754                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05755"></a>05755                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05756"></a>05756                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05757"></a>05757                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05758"></a>05758                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05759"></a>05759                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05760"></a>05760                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05761"></a>05761                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05762"></a>05762                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05763"></a>05763                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05764"></a>05764                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05765"></a>05765                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05766"></a>05766                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05767"></a>05767                         <span class="comment">/* --- 4 */</span>
+<a name="l05768"></a>05768                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05769"></a>05769                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05770"></a>05770                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05771"></a>05771                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05772"></a>05772                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05773"></a>05773                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05774"></a>05774                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05775"></a>05775                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05776"></a>05776                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05777"></a>05777                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05778"></a>05778                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05779"></a>05779                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05780"></a>05780                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05781"></a>05781                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05782"></a>05782                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05783"></a>05783                         <span class="comment">/* --- 5 */</span>
+<a name="l05784"></a>05784                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05785"></a>05785                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05786"></a>05786                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05787"></a>05787                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05788"></a>05788                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05789"></a>05789                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05790"></a>05790                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05791"></a>05791                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05792"></a>05792                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05793"></a>05793                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l05794"></a>05794                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l05795"></a>05795                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05796"></a>05796                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05797"></a>05797                         <span class="comment">/* --- */</span>
+<a name="l05798"></a>05798                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l05799"></a>05799                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l05800"></a>05800                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l05801"></a>05801                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l05802"></a>05802                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l05803"></a>05803                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l05804"></a>05804                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EAX in MM1 */</span>
+<a name="l05805"></a>05805                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
+<a name="l05806"></a>05806                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l05807"></a>05807                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l05808"></a>05808                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
+<a name="l05809"></a>05809                         <span class="comment">/* -- */</span>
+<a name="l05810"></a>05810                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l05811"></a>05811                         <span class="stringliteral">"sub         $72, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
+<a name="l05812"></a>05812                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l05813"></a>05813                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l05814"></a>05814                         <span class="comment">/* --- */</span>
+<a name="l05815"></a>05815                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l05816"></a>05816                         <span class="stringliteral">"jnz            .L10372 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05817"></a>05817                         <span class="stringliteral">"add          $4, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l05818"></a>05818                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l05819"></a>05819                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l05820"></a>05820                         <span class="stringliteral">"jnz            .L10370 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l05821"></a>05821                         <span class="comment">/* --- */</span>
+<a name="l05822"></a>05822                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l05823"></a>05823                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l05824"></a>05824                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l05825"></a>05825                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l05826"></a>05826                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l05827"></a>05827                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l05828"></a>05828                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
+<a name="l05829"></a>05829                         );
+<a name="l05830"></a>05830 <span class="preprocessor">#endif</span>
+<a name="l05831"></a>05831 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l05832"></a>05832 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l05833"></a>05833         } <span class="keywordflow">else</span> {
+<a name="l05834"></a>05834                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l05835"></a>05835                 <span class="keywordflow">return</span> (-1);
+<a name="l05836"></a>05836         }
+<a name="l05837"></a>05837 }
+<a name="l05838"></a>05838 
+<a name="l05853"></a><a class="code" href="_s_d_l__image_filter_8h.html#a6dbe52e917c0858bd311e9ce75219587">05853</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a48b40065652dda699875f1425b9227a6" title="Filter using ConvolveKernel7x7ShiftRight: Dij = saturation0and255( ... )">SDL_imageFilterConvolveKernel7x7ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</sp [...]
+<a name="l05854"></a>05854                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
+<a name="l05855"></a>05855 {
+<a name="l05856"></a>05856         <span class="comment">/* Validate input parameters */</span>
+<a name="l05857"></a>05857         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l05858"></a>05858                 <span class="keywordflow">return</span>(-1);
+<a name="l05859"></a>05859 
+<a name="l05860"></a>05860         <span class="keywordflow">if</span> ((columns < 7) || (rows < 7) || (NRightShift > 7))
+<a name="l05861"></a>05861                 <span class="keywordflow">return</span> (-1);
+<a name="l05862"></a>05862 
+<a name="l05863"></a>05863         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l05864"></a>05864 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l05865"></a>05865 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l05866"></a>05866 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l05867"></a>05867 <span class="preprocessor"></span>                __asm
+<a name="l05868"></a>05868                 {
+<a name="l05869"></a>05869                         pusha
+<a name="l05870"></a>05870                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l05871"></a>05871                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l05872"></a>05872                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
+<a name="l05873"></a>05873                                 movd mm5, ebx           <span class="comment">/* copy NRightShift into MM5 */</span>
+<a name="l05874"></a>05874                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l05875"></a>05875                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
+<a name="l05876"></a>05876                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l05877"></a>05877                                 add edi, 3      <span class="comment">/* 3 column offset from the left edge */</span>
+<a name="l05878"></a>05878                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l05879"></a>05879                                 add edi, eax    <span class="comment">/* 3 row offset from the top edge */</span>
+<a name="l05880"></a>05880                                 add edi, eax
+<a name="l05881"></a>05881                                 add edi, eax
+<a name="l05882"></a>05882                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l05883"></a>05883                                 sub ebx, 6      <span class="comment">/* do not use first 3 and last 3 rows */</span>
+<a name="l05884"></a>05884                                 <span class="comment">/* ---, */</span>
+<a name="l05885"></a>05885 L10380:
+<a name="l05886"></a>05886                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l05887"></a>05887                                 sub ecx, 6      <span class="comment">/* do not use first 3 and last 3 columns */</span>
+<a name="l05888"></a>05888                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l05889"></a>05889 L10382:
+<a name="l05890"></a>05890                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l05891"></a>05891                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
+<a name="l05892"></a>05892                                 <span class="comment">/* --- 1 */</span>
+<a name="l05893"></a>05893                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05894"></a>05894                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05895"></a>05895                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05896"></a>05896                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05897"></a>05897                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05898"></a>05898                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05899"></a>05899                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05900"></a>05900                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05901"></a>05901                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05902"></a>05902                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05903"></a>05903                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05904"></a>05904                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05905"></a>05905                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05906"></a>05906                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05907"></a>05907                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05908"></a>05908                                 <span class="comment">/* --- 2 */</span>
+<a name="l05909"></a>05909                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05910"></a>05910                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05911"></a>05911                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05912"></a>05912                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05913"></a>05913                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05914"></a>05914                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05915"></a>05915                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05916"></a>05916                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05917"></a>05917                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05918"></a>05918                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05919"></a>05919                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05920"></a>05920                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05921"></a>05921                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05922"></a>05922                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05923"></a>05923                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05924"></a>05924                                 <span class="comment">/* --- 3 */</span>
+<a name="l05925"></a>05925                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05926"></a>05926                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05927"></a>05927                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05928"></a>05928                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05929"></a>05929                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05930"></a>05930                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05931"></a>05931                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05932"></a>05932                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05933"></a>05933                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05934"></a>05934                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05935"></a>05935                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05936"></a>05936                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05937"></a>05937                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05938"></a>05938                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05939"></a>05939                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05940"></a>05940                                 <span class="comment">/* --- 4 */</span>
+<a name="l05941"></a>05941                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05942"></a>05942                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05943"></a>05943                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05944"></a>05944                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05945"></a>05945                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05946"></a>05946                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05947"></a>05947                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05948"></a>05948                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05949"></a>05949                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05950"></a>05950                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05951"></a>05951                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05952"></a>05952                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05953"></a>05953                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05954"></a>05954                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05955"></a>05955                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05956"></a>05956                                 <span class="comment">/* --- 5 */</span>
+<a name="l05957"></a>05957                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05958"></a>05958                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05959"></a>05959                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05960"></a>05960                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05961"></a>05961                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05962"></a>05962                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05963"></a>05963                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05964"></a>05964                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05965"></a>05965                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05966"></a>05966                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05967"></a>05967                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05968"></a>05968                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05969"></a>05969                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05970"></a>05970                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05971"></a>05971                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05972"></a>05972                                 <span class="comment">/* --- 6 */</span>
+<a name="l05973"></a>05973                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05974"></a>05974                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05975"></a>05975                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l05976"></a>05976                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05977"></a>05977                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05978"></a>05978                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05979"></a>05979                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05980"></a>05980                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05981"></a>05981                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05982"></a>05982                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05983"></a>05983                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05984"></a>05984                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05985"></a>05985                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l05986"></a>05986                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l05987"></a>05987                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l05988"></a>05988                                 <span class="comment">/* --- 7 */</span>
+<a name="l05989"></a>05989                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l05990"></a>05990                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l05991"></a>05991                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05992"></a>05992                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l05993"></a>05993                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l05994"></a>05994                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l05995"></a>05995                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l05996"></a>05996                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05997"></a>05997                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l05998"></a>05998                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l05999"></a>05999                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06000"></a>06000                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06001"></a>06001                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06002"></a>06002                                 <span class="comment">/* ---, */</span>
+<a name="l06003"></a>06003                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l06004"></a>06004                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06005"></a>06005                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l06006"></a>06006                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l06007"></a>06007                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l06008"></a>06008                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l06009"></a>06009                                 movd mm1, eax           <span class="comment">/* save EAX in MM1 */</span>
+<a name="l06010"></a>06010                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
+<a name="l06011"></a>06011                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l06012"></a>06012                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l06013"></a>06013                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
+<a name="l06014"></a>06014                                 <span class="comment">/* --, */</span>
+<a name="l06015"></a>06015                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l06016"></a>06016                                 sub edx, 104    <span class="comment">/* EDX = Kernel address */</span>
+<a name="l06017"></a>06017                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l06018"></a>06018                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l06019"></a>06019                                 <span class="comment">/* ---, */</span>
+<a name="l06020"></a>06020                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l06021"></a>06021                                 jnz            L10382           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06022"></a>06022                                 add esi, 6      <span class="comment">/* move to the next row in Src */</span>
+<a name="l06023"></a>06023                                 add edi, 6      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l06024"></a>06024                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l06025"></a>06025                                 jnz            L10380           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06026"></a>06026                                 <span class="comment">/* ---, */</span>
+<a name="l06027"></a>06027                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l06028"></a>06028                                 popa
+<a name="l06029"></a>06029                 }
+<a name="l06030"></a>06030 <span class="preprocessor">#else</span>
+<a name="l06031"></a>06031 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l06032"></a>06032                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l06033"></a>06033                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l06034"></a>06034                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
+<a name="l06035"></a>06035                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy NRightShift into MM5 */</span>
+<a name="l06036"></a>06036                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l06037"></a>06037                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
+<a name="l06038"></a>06038                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l06039"></a>06039                         <span class="stringliteral">"add          $3, %%edi \n\t"</span>   <span class="comment">/* 3 column offset from the left edge */</span>
+<a name="l06040"></a>06040                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l06041"></a>06041                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 3 row offset from the top edge */</span>
+<a name="l06042"></a>06042                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span>       <span class="comment">/* initialize ROWS counter */</span>
+<a name="l06043"></a>06043                         <span class="stringliteral">"sub          $6, %%ebx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 rows */</span>
+<a name="l06044"></a>06044                         <span class="comment">/* --- */</span>
+<a name="l06045"></a>06045                         <span class="stringliteral">".L10380:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l06046"></a>06046                         <span class="stringliteral">"sub          $6, %%ecx \n\t"</span>   <span class="comment">/* do not use first 3 and last 3 columns */</span>
+<a name="l06047"></a>06047                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l06048"></a>06048                         <span class="stringliteral">".L10382:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l06049"></a>06049                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
+<a name="l06050"></a>06050                         <span class="comment">/* --- 1 */</span>
+<a name="l06051"></a>06051                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06052"></a>06052                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06053"></a>06053                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06054"></a>06054                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06055"></a>06055                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06056"></a>06056                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06057"></a>06057                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06058"></a>06058                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06059"></a>06059                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06060"></a>06060                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06061"></a>06061                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06062"></a>06062                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06063"></a>06063                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06064"></a>06064                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06065"></a>06065                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06066"></a>06066                         <span class="comment">/* --- 2 */</span>
+<a name="l06067"></a>06067                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06068"></a>06068                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06069"></a>06069                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06070"></a>06070                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06071"></a>06071                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06072"></a>06072                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06073"></a>06073                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06074"></a>06074                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06075"></a>06075                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06076"></a>06076                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06077"></a>06077                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06078"></a>06078                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06079"></a>06079                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06080"></a>06080                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06081"></a>06081                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06082"></a>06082                         <span class="comment">/* --- 3 */</span>
+<a name="l06083"></a>06083                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06084"></a>06084                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06085"></a>06085                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06086"></a>06086                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06087"></a>06087                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06088"></a>06088                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06089"></a>06089                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06090"></a>06090                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06091"></a>06091                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06092"></a>06092                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06093"></a>06093                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06094"></a>06094                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06095"></a>06095                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06096"></a>06096                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06097"></a>06097                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06098"></a>06098                         <span class="comment">/* --- 4 */</span>
+<a name="l06099"></a>06099                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06100"></a>06100                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06101"></a>06101                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06102"></a>06102                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06103"></a>06103                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06104"></a>06104                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06105"></a>06105                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06106"></a>06106                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06107"></a>06107                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06108"></a>06108                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06109"></a>06109                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06110"></a>06110                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06111"></a>06111                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06112"></a>06112                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06113"></a>06113                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06114"></a>06114                         <span class="comment">/* --- 5 */</span>
+<a name="l06115"></a>06115                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06116"></a>06116                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06117"></a>06117                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06118"></a>06118                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06119"></a>06119                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06120"></a>06120                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06121"></a>06121                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06122"></a>06122                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06123"></a>06123                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06124"></a>06124                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06125"></a>06125                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06126"></a>06126                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06127"></a>06127                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06128"></a>06128                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06129"></a>06129                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06130"></a>06130                         <span class="comment">/* --- 6 */</span>
+<a name="l06131"></a>06131                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06132"></a>06132                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06133"></a>06133                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06134"></a>06134                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06135"></a>06135                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06136"></a>06136                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06137"></a>06137                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06138"></a>06138                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06139"></a>06139                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06140"></a>06140                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06141"></a>06141                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06142"></a>06142                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06143"></a>06143                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06144"></a>06144                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06145"></a>06145                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06146"></a>06146                         <span class="comment">/* --- 7 */</span>
+<a name="l06147"></a>06147                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06148"></a>06148                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06149"></a>06149                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06150"></a>06150                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06151"></a>06151                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06152"></a>06152                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06153"></a>06153                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06154"></a>06154                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06155"></a>06155                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06156"></a>06156                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06157"></a>06157                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06158"></a>06158                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06159"></a>06159                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06160"></a>06160                         <span class="comment">/* --- */</span>
+<a name="l06161"></a>06161                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l06162"></a>06162                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06163"></a>06163                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l06164"></a>06164                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l06165"></a>06165                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l06166"></a>06166                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l06167"></a>06167                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EAX in MM1 */</span>
+<a name="l06168"></a>06168                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
+<a name="l06169"></a>06169                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l06170"></a>06170                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l06171"></a>06171                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
+<a name="l06172"></a>06172                         <span class="comment">/* -- */</span>
+<a name="l06173"></a>06173                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l06174"></a>06174                         <span class="stringliteral">"sub        $104, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
+<a name="l06175"></a>06175                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l06176"></a>06176                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l06177"></a>06177                         <span class="comment">/* --- */</span>
+<a name="l06178"></a>06178                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l06179"></a>06179                         <span class="stringliteral">"jnz            .L10382 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06180"></a>06180                         <span class="stringliteral">"add          $6, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l06181"></a>06181                         <span class="stringliteral">"add          $6, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l06182"></a>06182                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l06183"></a>06183                         <span class="stringliteral">"jnz            .L10380 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06184"></a>06184                         <span class="comment">/* --- */</span>
+<a name="l06185"></a>06185                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l06186"></a>06186                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l06187"></a>06187                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l06188"></a>06188                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l06189"></a>06189                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l06190"></a>06190                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l06191"></a>06191                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
+<a name="l06192"></a>06192                         );
+<a name="l06193"></a>06193 <span class="preprocessor">#endif</span>
+<a name="l06194"></a>06194 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l06195"></a>06195 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l06196"></a>06196         } <span class="keywordflow">else</span> {
+<a name="l06197"></a>06197                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l06198"></a>06198                 <span class="keywordflow">return</span> (-1);
+<a name="l06199"></a>06199         }
+<a name="l06200"></a>06200 }
+<a name="l06201"></a>06201 
+<a name="l06216"></a><a class="code" href="_s_d_l__image_filter_8h.html#ad2702d0524a16032118fdf67e3e0f44a">06216</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a6aaa30dc51d1e51585d02d123b0f1a7a" title="Filter using ConvolveKernel9x9ShiftRight: Dij = saturation255( ... )">SDL_imageFilterConvolveKernel9x9ShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span>  [...]
+<a name="l06217"></a>06217                                                                                            <span class="keywordtype">signed</span> <span class="keywordtype">short</span> *Kernel, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
+<a name="l06218"></a>06218 {
+<a name="l06219"></a>06219         <span class="comment">/* Validate input parameters */</span>
+<a name="l06220"></a>06220         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
+<a name="l06221"></a>06221                 <span class="keywordflow">return</span>(-1);
+<a name="l06222"></a>06222 
+<a name="l06223"></a>06223         <span class="keywordflow">if</span> ((columns < 9) || (rows < 9) || (NRightShift > 7))
+<a name="l06224"></a>06224                 <span class="keywordflow">return</span> (-1);
+<a name="l06225"></a>06225 
+<a name="l06226"></a>06226         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l06227"></a>06227 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l06228"></a>06228 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l06229"></a>06229 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l06230"></a>06230 <span class="preprocessor"></span>                __asm
+<a name="l06231"></a>06231                 {
+<a name="l06232"></a>06232                         pusha
+<a name="l06233"></a>06233                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l06234"></a>06234                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l06235"></a>06235                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
+<a name="l06236"></a>06236                                 movd mm5, ebx           <span class="comment">/* copy NRightShift into MM5 */</span>
+<a name="l06237"></a>06237                                 mov edx, Kernel         <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l06238"></a>06238                                 mov esi, Src    <span class="comment">/* load Src  address to ESI */</span>
+<a name="l06239"></a>06239                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l06240"></a>06240                                 add edi, 4      <span class="comment">/* 4 column offset from the left edge */</span>
+<a name="l06241"></a>06241                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l06242"></a>06242                                 add edi, eax    <span class="comment">/* 4 row offset from the top edge */</span>
+<a name="l06243"></a>06243                                 add edi, eax
+<a name="l06244"></a>06244                                 add edi, eax
+<a name="l06245"></a>06245                                 add edi, eax
+<a name="l06246"></a>06246                                 mov ebx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l06247"></a>06247                                 sub ebx, 8      <span class="comment">/* do not use first 4 and last 4 rows */</span>
+<a name="l06248"></a>06248                                 <span class="comment">/* ---, */</span>
+<a name="l06249"></a>06249 L10390:
+<a name="l06250"></a>06250                         mov ecx, eax    <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l06251"></a>06251                                 sub ecx, 8      <span class="comment">/* do not use first 4 and last 4 columns */</span>
+<a name="l06252"></a>06252                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l06253"></a>06253 L10392:
+<a name="l06254"></a>06254                         pxor mm7, mm7           <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l06255"></a>06255                                 movd mm6, esi           <span class="comment">/* save ESI in MM6 */</span>
+<a name="l06256"></a>06256                                 <span class="comment">/* --- 1 */</span>
+<a name="l06257"></a>06257                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06258"></a>06258                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06259"></a>06259                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06260"></a>06260                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06261"></a>06261                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06262"></a>06262                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06263"></a>06263                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06264"></a>06264                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06265"></a>06265                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06266"></a>06266                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06267"></a>06267                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06268"></a>06268                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06269"></a>06269                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06270"></a>06270                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06271"></a>06271                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06272"></a>06272                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06273"></a>06273                         dec              esi
+<a name="l06274"></a>06274                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06275"></a>06275                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06276"></a>06276                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06277"></a>06277                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06278"></a>06278                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06279"></a>06279                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06280"></a>06280                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06281"></a>06281                                 <span class="comment">/* --- 2 */</span>
+<a name="l06282"></a>06282                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06283"></a>06283                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06284"></a>06284                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06285"></a>06285                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06286"></a>06286                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06287"></a>06287                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06288"></a>06288                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06289"></a>06289                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06290"></a>06290                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06291"></a>06291                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06292"></a>06292                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06293"></a>06293                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06294"></a>06294                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06295"></a>06295                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06296"></a>06296                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06297"></a>06297                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06298"></a>06298                         dec              esi
+<a name="l06299"></a>06299                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06300"></a>06300                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06301"></a>06301                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06302"></a>06302                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06303"></a>06303                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06304"></a>06304                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06305"></a>06305                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06306"></a>06306                                 <span class="comment">/* --- 3 */</span>
+<a name="l06307"></a>06307                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06308"></a>06308                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06309"></a>06309                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06310"></a>06310                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06311"></a>06311                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06312"></a>06312                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06313"></a>06313                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06314"></a>06314                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06315"></a>06315                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06316"></a>06316                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06317"></a>06317                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06318"></a>06318                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06319"></a>06319                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06320"></a>06320                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06321"></a>06321                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06322"></a>06322                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06323"></a>06323                         dec              esi
+<a name="l06324"></a>06324                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06325"></a>06325                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06326"></a>06326                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06327"></a>06327                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06328"></a>06328                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06329"></a>06329                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06330"></a>06330                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06331"></a>06331                                 <span class="comment">/* --- 4 */</span>
+<a name="l06332"></a>06332                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06333"></a>06333                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06334"></a>06334                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06335"></a>06335                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06336"></a>06336                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06337"></a>06337                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06338"></a>06338                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06339"></a>06339                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06340"></a>06340                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06341"></a>06341                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06342"></a>06342                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06343"></a>06343                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06344"></a>06344                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06345"></a>06345                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06346"></a>06346                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06347"></a>06347                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06348"></a>06348                         dec              esi
+<a name="l06349"></a>06349                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06350"></a>06350                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06351"></a>06351                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06352"></a>06352                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06353"></a>06353                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06354"></a>06354                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06355"></a>06355                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06356"></a>06356                                 <span class="comment">/* --- 5 */</span>
+<a name="l06357"></a>06357                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06358"></a>06358                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06359"></a>06359                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06360"></a>06360                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06361"></a>06361                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06362"></a>06362                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06363"></a>06363                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06364"></a>06364                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06365"></a>06365                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06366"></a>06366                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06367"></a>06367                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06368"></a>06368                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06369"></a>06369                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06370"></a>06370                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06371"></a>06371                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06372"></a>06372                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06373"></a>06373                         dec              esi
+<a name="l06374"></a>06374                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06375"></a>06375                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06376"></a>06376                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06377"></a>06377                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06378"></a>06378                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06379"></a>06379                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06380"></a>06380                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06381"></a>06381                                 <span class="comment">/* --- 6 */</span>
+<a name="l06382"></a>06382                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06383"></a>06383                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06384"></a>06384                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06385"></a>06385                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06386"></a>06386                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06387"></a>06387                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06388"></a>06388                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06389"></a>06389                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06390"></a>06390                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06391"></a>06391                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06392"></a>06392                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06393"></a>06393                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06394"></a>06394                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06395"></a>06395                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06396"></a>06396                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06397"></a>06397                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06398"></a>06398                         dec              esi
+<a name="l06399"></a>06399                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06400"></a>06400                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06401"></a>06401                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06402"></a>06402                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06403"></a>06403                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06404"></a>06404                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06405"></a>06405                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06406"></a>06406                                 <span class="comment">/* --- 7 */</span>
+<a name="l06407"></a>06407                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06408"></a>06408                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06409"></a>06409                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06410"></a>06410                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06411"></a>06411                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06412"></a>06412                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06413"></a>06413                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06414"></a>06414                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06415"></a>06415                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06416"></a>06416                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06417"></a>06417                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06418"></a>06418                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06419"></a>06419                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06420"></a>06420                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06421"></a>06421                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06422"></a>06422                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06423"></a>06423                         dec              esi
+<a name="l06424"></a>06424                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06425"></a>06425                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06426"></a>06426                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06427"></a>06427                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06428"></a>06428                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06429"></a>06429                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06430"></a>06430                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06431"></a>06431                                 <span class="comment">/* --- 8 */</span>
+<a name="l06432"></a>06432                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06433"></a>06433                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06434"></a>06434                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06435"></a>06435                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06436"></a>06436                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06437"></a>06437                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06438"></a>06438                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06439"></a>06439                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06440"></a>06440                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06441"></a>06441                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06442"></a>06442                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06443"></a>06443                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06444"></a>06444                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06445"></a>06445                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06446"></a>06446                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06447"></a>06447                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06448"></a>06448                         dec              esi
+<a name="l06449"></a>06449                                 add esi, eax    <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06450"></a>06450                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06451"></a>06451                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06452"></a>06452                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06453"></a>06453                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06454"></a>06454                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06455"></a>06455                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06456"></a>06456                                 <span class="comment">/* --- 9 */</span>
+<a name="l06457"></a>06457                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06458"></a>06458                         movq mm2, mm1           <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06459"></a>06459                                 inc              esi            <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06460"></a>06460                                 movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06461"></a>06461                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06462"></a>06462                                 movq mm4, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06463"></a>06463                         add edx, 8      <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06464"></a>06464                                 punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06465"></a>06465                                 punpckhbw mm2, mm0      <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06466"></a>06466                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06467"></a>06467                                 psrlw mm2, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06468"></a>06468                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06469"></a>06469                                 pmullw mm2, mm4         <span class="comment">/* mult 4 high words of Src and Kernel */</span>
+<a name="l06470"></a>06470                                 paddsw mm1, mm2         <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06471"></a>06471                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06472"></a>06472                                 movq mm1, [esi]         <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06473"></a>06473                         movq mm3, [edx]         <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06474"></a>06474                         punpcklbw mm1, mm0      <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06475"></a>06475                                 psrlw mm1, mm5          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06476"></a>06476                                 pmullw mm1, mm3         <span class="comment">/* mult 4 low  words of Src and Kernel */</span>
+<a name="l06477"></a>06477                                 paddsw mm7, mm1         <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06478"></a>06478                                 <span class="comment">/* ---, */</span>
+<a name="l06479"></a>06479                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l06480"></a>06480                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06481"></a>06481                                 paddsw mm7, mm3         <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l06482"></a>06482                                 movq mm2, mm7           <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l06483"></a>06483                                 psrlq mm7, 16           <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l06484"></a>06484                                 paddsw mm7, mm2         <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l06485"></a>06485                                 movd mm1, eax           <span class="comment">/* save EAX in MM1 */</span>
+<a name="l06486"></a>06486                                 packuswb mm7, mm0       <span class="comment">/* pack division result with saturation */</span>
+<a name="l06487"></a>06487                                 movd eax, mm7           <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l06488"></a>06488                                 mov [edi], al           <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l06489"></a>06489                                 movd eax, mm1           <span class="comment">/* restore saved EAX */</span>
+<a name="l06490"></a>06490                                 <span class="comment">/* --, */</span>
+<a name="l06491"></a>06491                                 movd esi, mm6           <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l06492"></a>06492                                 sub edx, 208    <span class="comment">/* EDX = Kernel address */</span>
+<a name="l06493"></a>06493                                 inc              esi            <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l06494"></a>06494                                 inc              edi            <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l06495"></a>06495                                 <span class="comment">/* ---, */</span>
+<a name="l06496"></a>06496                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l06497"></a>06497                                 jnz            L10392           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06498"></a>06498                                 add esi, 8      <span class="comment">/* move to the next row in Src */</span>
+<a name="l06499"></a>06499                                 add edi, 8      <span class="comment">/* move to the next row in Dest */</span>
+<a name="l06500"></a>06500                                 dec              ebx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l06501"></a>06501                                 jnz            L10390           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06502"></a>06502                                 <span class="comment">/* ---, */</span>
+<a name="l06503"></a>06503                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l06504"></a>06504                                 popa
+<a name="l06505"></a>06505                 }
+<a name="l06506"></a>06506 <span class="preprocessor">#else</span>
+<a name="l06507"></a>06507 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l06508"></a>06508                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l06509"></a>06509                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l06510"></a>06510                         <span class="stringliteral">"mov           %5, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
+<a name="l06511"></a>06511                         <span class="stringliteral">"movd      %%ebx, %%mm5 \n\t"</span>   <span class="comment">/* copy NRightShift into MM5 */</span>
+<a name="l06512"></a>06512                         <span class="stringliteral">"mov          %4, %%edx \n\t"</span>   <span class="comment">/* load Kernel address into EDX */</span>
+<a name="l06513"></a>06513                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* load Src  address to ESI */</span>
+<a name="l06514"></a>06514                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l06515"></a>06515                         <span class="stringliteral">"add          $4, %%edi \n\t"</span>   <span class="comment">/* 4 column offset from the left edge */</span>
+<a name="l06516"></a>06516                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l06517"></a>06517                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* 4 row offset from the top edge */</span>
+<a name="l06518"></a>06518                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"add       %%eax, %%edi \n\t"</span> <span class="stringliteral">"mov          %2, %%ebx \n\t"</span> <span class="comment">/* initialize ROWS counter */</span>
+<a name="l06519"></a>06519                         <span class="stringliteral">"sub          $8, %%ebx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 rows */</span>
+<a name="l06520"></a>06520                         <span class="comment">/* --- */</span>
+<a name="l06521"></a>06521                         <span class="stringliteral">".L10390:               \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>     <span class="comment">/* initialize COLUMNS counter */</span>
+<a name="l06522"></a>06522                         <span class="stringliteral">"sub          $8, %%ecx \n\t"</span>   <span class="comment">/* do not use first 4 and last 4 columns */</span>
+<a name="l06523"></a>06523                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l06524"></a>06524                         <span class="stringliteral">".L10392:               \n\t"</span> <span class="stringliteral">"pxor      %%mm7, %%mm7 \n\t"</span>     <span class="comment">/* zero MM7 (accumulator) */</span>
+<a name="l06525"></a>06525                         <span class="stringliteral">"movd      %%esi, %%mm6 \n\t"</span>   <span class="comment">/* save ESI in MM6 */</span>
+<a name="l06526"></a>06526                         <span class="comment">/* --- 1 */</span>
+<a name="l06527"></a>06527                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06528"></a>06528                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06529"></a>06529                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06530"></a>06530                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06531"></a>06531                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06532"></a>06532                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06533"></a>06533                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06534"></a>06534                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06535"></a>06535                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06536"></a>06536                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06537"></a>06537                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06538"></a>06538                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06539"></a>06539                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06540"></a>06540                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06541"></a>06541                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06542"></a>06542                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06543"></a>06543                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06544"></a>06544                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06545"></a>06545                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06546"></a>06546                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06547"></a>06547                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06548"></a>06548                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06549"></a>06549                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06550"></a>06550                         <span class="comment">/* --- 2 */</span>
+<a name="l06551"></a>06551                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06552"></a>06552                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06553"></a>06553                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06554"></a>06554                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06555"></a>06555                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06556"></a>06556                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06557"></a>06557                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06558"></a>06558                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06559"></a>06559                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06560"></a>06560                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06561"></a>06561                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06562"></a>06562                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06563"></a>06563                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06564"></a>06564                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06565"></a>06565                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06566"></a>06566                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06567"></a>06567                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06568"></a>06568                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06569"></a>06569                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06570"></a>06570                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06571"></a>06571                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06572"></a>06572                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06573"></a>06573                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06574"></a>06574                         <span class="comment">/* --- 3 */</span>
+<a name="l06575"></a>06575                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06576"></a>06576                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06577"></a>06577                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06578"></a>06578                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06579"></a>06579                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06580"></a>06580                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06581"></a>06581                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06582"></a>06582                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06583"></a>06583                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06584"></a>06584                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06585"></a>06585                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06586"></a>06586                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06587"></a>06587                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06588"></a>06588                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06589"></a>06589                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06590"></a>06590                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06591"></a>06591                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06592"></a>06592                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06593"></a>06593                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06594"></a>06594                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06595"></a>06595                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06596"></a>06596                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06597"></a>06597                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06598"></a>06598                         <span class="comment">/* --- 4 */</span>
+<a name="l06599"></a>06599                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06600"></a>06600                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06601"></a>06601                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06602"></a>06602                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06603"></a>06603                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06604"></a>06604                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06605"></a>06605                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06606"></a>06606                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06607"></a>06607                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06608"></a>06608                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06609"></a>06609                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06610"></a>06610                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06611"></a>06611                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06612"></a>06612                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06613"></a>06613                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06614"></a>06614                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06615"></a>06615                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06616"></a>06616                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06617"></a>06617                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06618"></a>06618                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06619"></a>06619                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06620"></a>06620                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06621"></a>06621                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06622"></a>06622                         <span class="comment">/* --- 5 */</span>
+<a name="l06623"></a>06623                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06624"></a>06624                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06625"></a>06625                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06626"></a>06626                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06627"></a>06627                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06628"></a>06628                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06629"></a>06629                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06630"></a>06630                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06631"></a>06631                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06632"></a>06632                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06633"></a>06633                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06634"></a>06634                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06635"></a>06635                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06636"></a>06636                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06637"></a>06637                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06638"></a>06638                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06639"></a>06639                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06640"></a>06640                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06641"></a>06641                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06642"></a>06642                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06643"></a>06643                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06644"></a>06644                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06645"></a>06645                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06646"></a>06646                         <span class="comment">/* --- 6 */</span>
+<a name="l06647"></a>06647                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06648"></a>06648                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06649"></a>06649                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06650"></a>06650                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06651"></a>06651                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06652"></a>06652                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06653"></a>06653                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06654"></a>06654                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06655"></a>06655                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06656"></a>06656                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06657"></a>06657                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06658"></a>06658                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06659"></a>06659                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06660"></a>06660                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06661"></a>06661                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06662"></a>06662                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06663"></a>06663                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06664"></a>06664                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06665"></a>06665                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06666"></a>06666                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06667"></a>06667                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06668"></a>06668                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06669"></a>06669                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06670"></a>06670                         <span class="comment">/* --- 7 */</span>
+<a name="l06671"></a>06671                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06672"></a>06672                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06673"></a>06673                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06674"></a>06674                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06675"></a>06675                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06676"></a>06676                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06677"></a>06677                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06678"></a>06678                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06679"></a>06679                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06680"></a>06680                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06681"></a>06681                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06682"></a>06682                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06683"></a>06683                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06684"></a>06684                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06685"></a>06685                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06686"></a>06686                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06687"></a>06687                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06688"></a>06688                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06689"></a>06689                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06690"></a>06690                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06691"></a>06691                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06692"></a>06692                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06693"></a>06693                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06694"></a>06694                         <span class="comment">/* --- 8 */</span>
+<a name="l06695"></a>06695                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06696"></a>06696                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06697"></a>06697                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06698"></a>06698                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06699"></a>06699                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06700"></a>06700                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06701"></a>06701                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06702"></a>06702                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06703"></a>06703                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06704"></a>06704                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06705"></a>06705                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06706"></a>06706                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06707"></a>06707                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06708"></a>06708                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06709"></a>06709                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06710"></a>06710                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06711"></a>06711                         <span class="stringliteral">"dec              %%esi \n\t"</span> <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>     <span class="comment">/* move Src pointer 1 row below */</span>
+<a name="l06712"></a>06712                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06713"></a>06713                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06714"></a>06714                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06715"></a>06715                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06716"></a>06716                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06717"></a>06717                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06718"></a>06718                         <span class="comment">/* --- 9 */</span>
+<a name="l06719"></a>06719                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06720"></a>06720                         <span class="stringliteral">"movq      %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* copy MM1 into MM2 */</span>
+<a name="l06721"></a>06721                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move pointer to the next 8 bytes of Src */</span>
+<a name="l06722"></a>06722                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06723"></a>06723                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06724"></a>06724                         <span class="stringliteral">"movq    (%%edx), %%mm4 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06725"></a>06725                         <span class="stringliteral">"add          $8, %%edx \n\t"</span>   <span class="comment">/* move pointer to other 4 words */</span>
+<a name="l06726"></a>06726                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06727"></a>06727                         <span class="stringliteral">"punpckhbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack second 4 bytes into words */</span>
+<a name="l06728"></a>06728                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06729"></a>06729                         <span class="stringliteral">"psrlw     %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06730"></a>06730                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06731"></a>06731                         <span class="stringliteral">"pmullw    %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* mult. 4 high words of Src and Kernel */</span>
+<a name="l06732"></a>06732                         <span class="stringliteral">"paddsw    %%mm2, %%mm1 \n\t"</span>   <span class="comment">/* add 4 words of the high and low bytes */</span>
+<a name="l06733"></a>06733                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06734"></a>06734                         <span class="stringliteral">"movq    (%%esi), %%mm1 \n\t"</span>   <span class="comment">/* load 8 bytes of the Src */</span>
+<a name="l06735"></a>06735                         <span class="stringliteral">"movq    (%%edx), %%mm3 \n\t"</span>   <span class="comment">/* load 4 words of Kernel */</span>
+<a name="l06736"></a>06736                         <span class="stringliteral">"punpcklbw %%mm0, %%mm1 \n\t"</span>   <span class="comment">/* unpack first  4 bytes into words */</span>
+<a name="l06737"></a>06737                         <span class="stringliteral">"psrlw     %%mm5, %%mm1 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l06738"></a>06738                         <span class="stringliteral">"pmullw    %%mm3, %%mm1 \n\t"</span>   <span class="comment">/* mult. 4 low  words of Src and Kernel */</span>
+<a name="l06739"></a>06739                         <span class="stringliteral">"paddsw    %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* add MM1 to accumulator MM7 */</span>
+<a name="l06740"></a>06740                         <span class="comment">/* --- */</span>
+<a name="l06741"></a>06741                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l06742"></a>06742                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06743"></a>06743                         <span class="stringliteral">"paddsw    %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 2 left and 2 right result words */</span>
+<a name="l06744"></a>06744                         <span class="stringliteral">"movq      %%mm7, %%mm2 \n\t"</span>   <span class="comment">/* copy MM7 into MM2 */</span>
+<a name="l06745"></a>06745                         <span class="stringliteral">"psrlq       $16, %%mm7 \n\t"</span>   <span class="comment">/* shift 1 left word to the right */</span>
+<a name="l06746"></a>06746                         <span class="stringliteral">"paddsw    %%mm2, %%mm7 \n\t"</span>   <span class="comment">/* add 1 left and 1 right result words */</span>
+<a name="l06747"></a>06747                         <span class="stringliteral">"movd      %%eax, %%mm1 \n\t"</span>   <span class="comment">/* save EAX in MM1 */</span>
+<a name="l06748"></a>06748                         <span class="stringliteral">"packuswb  %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* pack division result with saturation */</span>
+<a name="l06749"></a>06749                         <span class="stringliteral">"movd      %%mm7, %%eax \n\t"</span>   <span class="comment">/* copy saturated result into EAX */</span>
+<a name="l06750"></a>06750                         <span class="stringliteral">"mov      %%al, (%%edi) \n\t"</span>   <span class="comment">/* copy a byte result into Dest */</span>
+<a name="l06751"></a>06751                         <span class="stringliteral">"movd      %%mm1, %%eax \n\t"</span>   <span class="comment">/* restore saved EAX */</span>
+<a name="l06752"></a>06752                         <span class="comment">/* -- */</span>
+<a name="l06753"></a>06753                         <span class="stringliteral">"movd      %%mm6, %%esi \n\t"</span>   <span class="comment">/* move Src pointer to the top pixel */</span>
+<a name="l06754"></a>06754                         <span class="stringliteral">"sub        $208, %%edx \n\t"</span>   <span class="comment">/* EDX = Kernel address */</span>
+<a name="l06755"></a>06755                         <span class="stringliteral">"inc              %%esi \n\t"</span>   <span class="comment">/* move Src  pointer to the next pixel */</span>
+<a name="l06756"></a>06756                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next pixel */</span>
+<a name="l06757"></a>06757                         <span class="comment">/* --- */</span>
+<a name="l06758"></a>06758                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l06759"></a>06759                         <span class="stringliteral">"jnz            .L10392 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06760"></a>06760                         <span class="stringliteral">"add          $8, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l06761"></a>06761                         <span class="stringliteral">"add          $8, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l06762"></a>06762                         <span class="stringliteral">"dec              %%ebx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l06763"></a>06763                         <span class="stringliteral">"jnz            .L10390 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06764"></a>06764                         <span class="comment">/* --- */</span>
+<a name="l06765"></a>06765                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l06766"></a>06766                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l06767"></a>06767                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l06768"></a>06768                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l06769"></a>06769                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l06770"></a>06770                         <span class="stringliteral">"m"</span>(Kernel),            <span class="comment">/* %4 */</span>
+<a name="l06771"></a>06771                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %5 */</span>
+<a name="l06772"></a>06772                         );
+<a name="l06773"></a>06773 <span class="preprocessor">#endif</span>
+<a name="l06774"></a>06774 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l06775"></a>06775 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l06776"></a>06776         } <span class="keywordflow">else</span> {
+<a name="l06777"></a>06777                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l06778"></a>06778                 <span class="keywordflow">return</span> (-1);
+<a name="l06779"></a>06779         }
+<a name="l06780"></a>06780 }
+<a name="l06781"></a>06781 
+<a name="l06782"></a>06782 <span class="comment">/* ------------------------------------------------------------------------------------ */</span>
+<a name="l06783"></a>06783 
+<a name="l06796"></a><a class="code" href="_s_d_l__image_filter_8h.html#a2a0e4e259150abbe33bcddb046c367ba">06796</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a015fe05161b701162d9ecffb01413f1e" title="Filter using SobelX: Dij = saturation255( ... )">SDL_imageFilterSobelX</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Des [...]
+<a name="l06797"></a>06797 {
+<a name="l06798"></a>06798         <span class="comment">/* Validate input parameters */</span>
+<a name="l06799"></a>06799         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL))
+<a name="l06800"></a>06800                 <span class="keywordflow">return</span>(-1);
+<a name="l06801"></a>06801 
+<a name="l06802"></a>06802         <span class="keywordflow">if</span> ((columns < 8) || (rows < 3))
+<a name="l06803"></a>06803                 <span class="keywordflow">return</span> (-1);
+<a name="l06804"></a>06804 
+<a name="l06805"></a>06805         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l06806"></a>06806 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l06807"></a>06807 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l06808"></a>06808 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l06809"></a>06809 <span class="preprocessor"></span>                __asm
+<a name="l06810"></a>06810                 {
+<a name="l06811"></a>06811                         pusha
+<a name="l06812"></a>06812                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l06813"></a>06813                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l06814"></a>06814                                 <span class="comment">/* ---, */</span>
+<a name="l06815"></a>06815                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l06816"></a>06816                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l06817"></a>06817                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l06818"></a>06818                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l06819"></a>06819                                 mov edx, rows           <span class="comment">/* initialize ROWS counter */</span>
+<a name="l06820"></a>06820                                 sub edx, 2      <span class="comment">/* do not use first and last rows */</span>
+<a name="l06821"></a>06821                                 <span class="comment">/* ---, */</span>
+<a name="l06822"></a>06822 L10400:
+<a name="l06823"></a>06823                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l06824"></a>06824                                 shr ecx, 3      <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l06825"></a>06825                                 mov ebx, esi    <span class="comment">/* save ESI in EBX */</span>
+<a name="l06826"></a>06826                                 movd mm1, edi           <span class="comment">/* save EDI in MM1 */</span>
+<a name="l06827"></a>06827                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l06828"></a>06828 L10402:
+<a name="l06829"></a>06829                         <span class="comment">/* ---, */</span>
+<a name="l06830"></a>06830                         movq mm4, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06831"></a>06831                         movq mm5, mm4           <span class="comment">/* save MM4 in MM5 */</span>
+<a name="l06832"></a>06832                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l06833"></a>06833                                 punpcklbw mm4, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06834"></a>06834                                 punpckhbw mm5, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06835"></a>06835                                 movq mm6, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06836"></a>06836                         movq mm7, mm6           <span class="comment">/* save MM6 in MM7 */</span>
+<a name="l06837"></a>06837                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l06838"></a>06838                                 punpcklbw mm6, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06839"></a>06839                                 punpckhbw mm7, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06840"></a>06840                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
+<a name="l06841"></a>06841                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06842"></a>06842                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06843"></a>06843                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l06844"></a>06844                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06845"></a>06845                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06846"></a>06846                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l06847"></a>06847                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l06848"></a>06848                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l06849"></a>06849                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l06850"></a>06850                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06851"></a>06851                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06852"></a>06852                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l06853"></a>06853                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06854"></a>06854                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06855"></a>06855                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l06856"></a>06856                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l06857"></a>06857                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l06858"></a>06858                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l06859"></a>06859                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
+<a name="l06860"></a>06860                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06861"></a>06861                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06862"></a>06862                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l06863"></a>06863                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06864"></a>06864                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06865"></a>06865                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l06866"></a>06866                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l06867"></a>06867                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06868"></a>06868                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06869"></a>06869                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l06870"></a>06870                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06871"></a>06871                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06872"></a>06872                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l06873"></a>06873                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l06874"></a>06874                                 <span class="comment">/* ---, */</span>
+<a name="l06875"></a>06875                                 movq mm2, mm4           <span class="comment">/* copy MM4 into MM2 */</span>
+<a name="l06876"></a>06876                                 psrlq mm4, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06877"></a>06877                                 psubw mm4, mm2          <span class="comment">/* MM4 = MM4 - MM2 */</span>
+<a name="l06878"></a>06878                                 movq mm3, mm6           <span class="comment">/* copy MM6 into MM3 */</span>
+<a name="l06879"></a>06879                                 psrlq mm6, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06880"></a>06880                                 psubw mm6, mm3          <span class="comment">/* MM6 = MM6 - MM3 */</span>
+<a name="l06881"></a>06881                                 punpckldq mm4, mm6      <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
+<a name="l06882"></a>06882                                 movq mm2, mm5           <span class="comment">/* copy MM6 into MM2 */</span>
+<a name="l06883"></a>06883                                 psrlq mm5, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06884"></a>06884                                 psubw mm5, mm2          <span class="comment">/* MM5 = MM5 - MM2 */</span>
+<a name="l06885"></a>06885                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l06886"></a>06886                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06887"></a>06887                                 psubw mm7, mm3          <span class="comment">/* MM7 = MM7 - MM3 */</span>
+<a name="l06888"></a>06888                                 punpckldq mm5, mm7      <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
+<a name="l06889"></a>06889                                 <span class="comment">/* Take abs values of MM4 and MM5 */</span>
+<a name="l06890"></a>06890                                 movq mm6, mm4           <span class="comment">/* copy MM4 into MM6 */</span>
+<a name="l06891"></a>06891                                 movq mm7, mm5           <span class="comment">/* copy MM5 into MM7 */</span>
+<a name="l06892"></a>06892                                 psraw mm6, 15           <span class="comment">/* fill MM6 words with word sign bit */</span>
+<a name="l06893"></a>06893                                 psraw mm7, 15           <span class="comment">/* fill MM7 words with word sign bit */</span>
+<a name="l06894"></a>06894                                 pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l06895"></a>06895                                 pxor mm5, mm7           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l06896"></a>06896                                 psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l06897"></a>06897                                 psubsw mm5, mm7         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l06898"></a>06898                                 packuswb mm4, mm5       <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
+<a name="l06899"></a>06899                                 movq [edi], mm4         <span class="comment">/* store result in Dest */</span>
+<a name="l06900"></a>06900                                 <span class="comment">/* ---, */</span>
+<a name="l06901"></a>06901                                 sub esi, eax    <span class="comment">/* move to the current top row in Src */</span>
+<a name="l06902"></a>06902                                 sub esi, eax
+<a name="l06903"></a>06903                                 add esi, 8      <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
+<a name="l06904"></a>06904                                 add edi, 8      <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
+<a name="l06905"></a>06905                                 <span class="comment">/* ---, */</span>
+<a name="l06906"></a>06906                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l06907"></a>06907                                 jnz            L10402           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06908"></a>06908                                 mov esi, ebx    <span class="comment">/* restore most left current row Src  address */</span>
+<a name="l06909"></a>06909                                 movd edi, mm1           <span class="comment">/* restore most left current row Dest address */</span>
+<a name="l06910"></a>06910                                 add esi, eax    <span class="comment">/* move to the next row in Src */</span>
+<a name="l06911"></a>06911                                 add edi, eax    <span class="comment">/* move to the next row in Dest */</span>
+<a name="l06912"></a>06912                                 dec              edx            <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l06913"></a>06913                                 jnz            L10400           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l06914"></a>06914                                 <span class="comment">/* ---, */</span>
+<a name="l06915"></a>06915                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l06916"></a>06916                                 popa
+<a name="l06917"></a>06917                 }
+<a name="l06918"></a>06918 <span class="preprocessor">#else</span>
+<a name="l06919"></a>06919 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l06920"></a>06920                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l06921"></a>06921                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l06922"></a>06922                         <span class="comment">/* --- */</span>
+<a name="l06923"></a>06923                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l06924"></a>06924                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l06925"></a>06925                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l06926"></a>06926                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l06927"></a>06927                         <span class="stringliteral">"mov          %2, %%edx \n\t"</span>   <span class="comment">/* initialize ROWS counter */</span>
+<a name="l06928"></a>06928                         <span class="stringliteral">"sub          $2, %%edx \n\t"</span>   <span class="comment">/* do not use first and last rows */</span>
+<a name="l06929"></a>06929                         <span class="comment">/* --- */</span>
+<a name="l06930"></a>06930                         <span class="stringliteral">".L10400:                \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>    <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l06931"></a>06931                         <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l06932"></a>06932                         <span class="stringliteral">"mov       %%esi, %%ebx \n\t"</span>   <span class="comment">/* save ESI in EBX */</span>
+<a name="l06933"></a>06933                         <span class="stringliteral">"movd      %%edi, %%mm1 \n\t"</span>   <span class="comment">/* save EDI in MM1 */</span>
+<a name="l06934"></a>06934                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l06935"></a>06935                         <span class="stringliteral">".L10402:               \n\t"</span>
+<a name="l06936"></a>06936                         <span class="comment">/* --- */</span>
+<a name="l06937"></a>06937                         <span class="stringliteral">"movq    (%%esi), %%mm4 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06938"></a>06938                         <span class="stringliteral">"movq      %%mm4, %%mm5 \n\t"</span>   <span class="comment">/* save MM4 in MM5 */</span>
+<a name="l06939"></a>06939                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l06940"></a>06940                         <span class="stringliteral">"punpcklbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06941"></a>06941                         <span class="stringliteral">"punpckhbw %%mm0, %%mm5 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06942"></a>06942                         <span class="stringliteral">"movq    (%%esi), %%mm6 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06943"></a>06943                         <span class="stringliteral">"movq      %%mm6, %%mm7 \n\t"</span>   <span class="comment">/* save MM6 in MM7 */</span>
+<a name="l06944"></a>06944                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l06945"></a>06945                         <span class="stringliteral">"punpcklbw %%mm0, %%mm6 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06946"></a>06946                         <span class="stringliteral">"punpckhbw %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06947"></a>06947                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
+<a name="l06948"></a>06948                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06949"></a>06949                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06950"></a>06950                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l06951"></a>06951                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06952"></a>06952                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06953"></a>06953                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l06954"></a>06954                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l06955"></a>06955                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l06956"></a>06956                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l06957"></a>06957                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06958"></a>06958                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06959"></a>06959                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l06960"></a>06960                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06961"></a>06961                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06962"></a>06962                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l06963"></a>06963                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l06964"></a>06964                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l06965"></a>06965                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l06966"></a>06966                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
+<a name="l06967"></a>06967                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06968"></a>06968                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06969"></a>06969                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l06970"></a>06970                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06971"></a>06971                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06972"></a>06972                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l06973"></a>06973                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l06974"></a>06974                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l06975"></a>06975                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l06976"></a>06976                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l06977"></a>06977                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l06978"></a>06978                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l06979"></a>06979                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l06980"></a>06980                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l06981"></a>06981                         <span class="comment">/* --- */</span>
+<a name="l06982"></a>06982                         <span class="stringliteral">"movq      %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* copy MM4 into MM2 */</span>
+<a name="l06983"></a>06983                         <span class="stringliteral">"psrlq       $32, %%mm4 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06984"></a>06984                         <span class="stringliteral">"psubw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* MM4 = MM4 - MM2 */</span>
+<a name="l06985"></a>06985                         <span class="stringliteral">"movq      %%mm6, %%mm3 \n\t"</span>   <span class="comment">/* copy MM6 into MM3 */</span>
+<a name="l06986"></a>06986                         <span class="stringliteral">"psrlq       $32, %%mm6 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06987"></a>06987                         <span class="stringliteral">"psubw     %%mm3, %%mm6 \n\t"</span>   <span class="comment">/* MM6 = MM6 - MM3 */</span>
+<a name="l06988"></a>06988                         <span class="stringliteral">"punpckldq %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
+<a name="l06989"></a>06989                         <span class="stringliteral">"movq      %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* copy MM6 into MM2 */</span>
+<a name="l06990"></a>06990                         <span class="stringliteral">"psrlq       $32, %%mm5 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06991"></a>06991                         <span class="stringliteral">"psubw     %%mm2, %%mm5 \n\t"</span>   <span class="comment">/* MM5 = MM5 - MM2 */</span>
+<a name="l06992"></a>06992                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l06993"></a>06993                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l06994"></a>06994                         <span class="stringliteral">"psubw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* MM7 = MM7 - MM3 */</span>
+<a name="l06995"></a>06995                         <span class="stringliteral">"punpckldq %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
+<a name="l06996"></a>06996                         <span class="comment">/* Take abs values of MM4 and MM5 */</span>
+<a name="l06997"></a>06997                         <span class="stringliteral">"movq      %%mm4, %%mm6 \n\t"</span>   <span class="comment">/* copy MM4 into MM6 */</span>
+<a name="l06998"></a>06998                         <span class="stringliteral">"movq      %%mm5, %%mm7 \n\t"</span>   <span class="comment">/* copy MM5 into MM7 */</span>
+<a name="l06999"></a>06999                         <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill MM6 words with word sign bit */</span>
+<a name="l07000"></a>07000                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* fill MM7 words with word sign bit */</span>
+<a name="l07001"></a>07001                         <span class="stringliteral">"pxor      %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l07002"></a>07002                         <span class="stringliteral">"pxor      %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l07003"></a>07003                         <span class="stringliteral">"psubsw    %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l07004"></a>07004                         <span class="stringliteral">"psubsw    %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l07005"></a>07005                         <span class="stringliteral">"packuswb  %%mm5, %%mm4 \n\t"</span>   <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
+<a name="l07006"></a>07006                         <span class="stringliteral">"movq    %%mm4, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
+<a name="l07007"></a>07007                         <span class="comment">/* --- */</span>
+<a name="l07008"></a>07008                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the current top row in Src */</span>
+<a name="l07009"></a>07009                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span> <span class="stringliteral">"add $8,          %%esi \n\t"</span>     <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
+<a name="l07010"></a>07010                         <span class="stringliteral">"add $8,          %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
+<a name="l07011"></a>07011                         <span class="comment">/* --- */</span>
+<a name="l07012"></a>07012                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l07013"></a>07013                         <span class="stringliteral">"jnz            .L10402 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l07014"></a>07014                         <span class="stringliteral">"mov       %%ebx, %%esi \n\t"</span>   <span class="comment">/* restore most left current row Src  address */</span>
+<a name="l07015"></a>07015                         <span class="stringliteral">"movd      %%mm1, %%edi \n\t"</span>   <span class="comment">/* restore most left current row Dest address */</span>
+<a name="l07016"></a>07016                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l07017"></a>07017                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l07018"></a>07018                         <span class="stringliteral">"dec              %%edx \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l07019"></a>07019                         <span class="stringliteral">"jnz            .L10400 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l07020"></a>07020                         <span class="comment">/* --- */</span>
+<a name="l07021"></a>07021                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l07022"></a>07022                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l07023"></a>07023                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l07024"></a>07024                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l07025"></a>07025                         <span class="stringliteral">"m"</span>(columns)            <span class="comment">/* %3 */</span>
+<a name="l07026"></a>07026                         );
+<a name="l07027"></a>07027 <span class="preprocessor">#endif</span>
+<a name="l07028"></a>07028 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l07029"></a>07029 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l07030"></a>07030         } <span class="keywordflow">else</span> {
+<a name="l07031"></a>07031                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l07032"></a>07032                 <span class="keywordflow">return</span> (-1);
+<a name="l07033"></a>07033         }
+<a name="l07034"></a>07034 }
+<a name="l07035"></a>07035 
+<a name="l07049"></a><a class="code" href="_s_d_l__image_filter_8h.html#ab9cc925cd9b135e245936d718b459032">07049</a> <span class="keywordtype">int</span> <a class="code" href="_s_d_l__image_filter_8c.html#a0d21af83f0183fcd697324cffe3ab3d7" title="Filter using SobelXShiftRight: Dij = saturation255( ... )">SDL_imageFilterSobelXShiftRight</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> *Src, <span class="keywordtype">unsigned</span> <span class="keywordty [...]
+<a name="l07050"></a>07050                                                                         <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> NRightShift)
+<a name="l07051"></a>07051 {
+<a name="l07052"></a>07052         <span class="comment">/* Validate input parameters */</span>
+<a name="l07053"></a>07053         <span class="keywordflow">if</span> ((Src == NULL) || (Dest == NULL))
+<a name="l07054"></a>07054                 <span class="keywordflow">return</span>(-1);
+<a name="l07055"></a>07055         <span class="keywordflow">if</span> ((columns < 8) || (rows < 3) || (NRightShift > 7))
+<a name="l07056"></a>07056                 <span class="keywordflow">return</span> (-1);
+<a name="l07057"></a>07057 
+<a name="l07058"></a>07058         <span class="keywordflow">if</span> ((<a class="code" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917" title="MMX detection routine (with override flag).">SDL_imageFilterMMXdetect</a>())) {
+<a name="l07059"></a>07059 <span class="comment">//#ifdef USE_MMX</span>
+<a name="l07060"></a>07060 <span class="preprocessor">#if defined(USE_MMX) && defined(i386)</span>
+<a name="l07061"></a>07061 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l07062"></a>07062 <span class="preprocessor"></span>                __asm
+<a name="l07063"></a>07063                 {
+<a name="l07064"></a>07064                         pusha
+<a name="l07065"></a>07065                                 pxor mm0, mm0           <span class="comment">/* zero MM0 */</span>
+<a name="l07066"></a>07066                                 mov eax, columns        <span class="comment">/* load columns into EAX */</span>
+<a name="l07067"></a>07067                                 xor ebx, ebx    <span class="comment">/* zero EBX */</span>
+<a name="l07068"></a>07068                                 mov bl, NRightShift     <span class="comment">/* load NRightShift into BL */</span>
+<a name="l07069"></a>07069                                 movd mm1, ebx           <span class="comment">/* copy NRightShift into MM1 */</span>
+<a name="l07070"></a>07070                                 <span class="comment">/* ---, */</span>
+<a name="l07071"></a>07071                                 mov esi, Src    <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l07072"></a>07072                                 mov edi, Dest           <span class="comment">/* load Dest address to EDI */</span>
+<a name="l07073"></a>07073                                 add edi, eax    <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l07074"></a>07074                                 inc              edi            <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l07075"></a>07075                                 <span class="comment">/* initialize ROWS counter */</span>
+<a name="l07076"></a>07076                                 sub rows, 2     <span class="comment">/* do not use first and last rows */</span>
+<a name="l07077"></a>07077                                 <span class="comment">/* ---, */</span>
+<a name="l07078"></a>07078 L10410:
+<a name="l07079"></a>07079                         mov ecx, eax    <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l07080"></a>07080                                 shr ecx, 3      <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l07081"></a>07081                                 mov ebx, esi    <span class="comment">/* save ESI in EBX */</span>
+<a name="l07082"></a>07082                                 mov edx, edi    <span class="comment">/* save EDI in EDX */</span>
+<a name="l07083"></a>07083                                 align 16                        <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l07084"></a>07084 L10412:
+<a name="l07085"></a>07085                         <span class="comment">/* ---, */</span>
+<a name="l07086"></a>07086                         movq mm4, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07087"></a>07087                         movq mm5, mm4           <span class="comment">/* save MM4 in MM5 */</span>
+<a name="l07088"></a>07088                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l07089"></a>07089                                 punpcklbw mm4, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07090"></a>07090                                 punpckhbw mm5, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07091"></a>07091                                 psrlw mm4, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07092"></a>07092                                 psrlw mm5, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07093"></a>07093                                 movq mm6, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07094"></a>07094                         movq mm7, mm6           <span class="comment">/* save MM6 in MM7 */</span>
+<a name="l07095"></a>07095                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l07096"></a>07096                                 punpcklbw mm6, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07097"></a>07097                                 punpckhbw mm7, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07098"></a>07098                                 psrlw mm6, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07099"></a>07099                                 psrlw mm7, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07100"></a>07100                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
+<a name="l07101"></a>07101                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07102"></a>07102                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07103"></a>07103                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l07104"></a>07104                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07105"></a>07105                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07106"></a>07106                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07107"></a>07107                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07108"></a>07108                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l07109"></a>07109                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l07110"></a>07110                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l07111"></a>07111                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l07112"></a>07112                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07113"></a>07113                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07114"></a>07114                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l07115"></a>07115                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07116"></a>07116                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07117"></a>07117                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07118"></a>07118                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07119"></a>07119                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l07120"></a>07120                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l07121"></a>07121                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l07122"></a>07122                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l07123"></a>07123                                 add esi, eax    <span class="comment">/* move to the next row of Src */</span>
+<a name="l07124"></a>07124                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07125"></a>07125                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07126"></a>07126                                 add esi, 2      <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l07127"></a>07127                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07128"></a>07128                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07129"></a>07129                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07130"></a>07130                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07131"></a>07131                                 paddw mm4, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l07132"></a>07132                                 paddw mm5, mm3          <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l07133"></a>07133                                 movq mm2, [esi]         <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07134"></a>07134                         movq mm3, mm2           <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07135"></a>07135                                 sub esi, 2      <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l07136"></a>07136                                 punpcklbw mm2, mm0      <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07137"></a>07137                                 punpckhbw mm3, mm0      <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07138"></a>07138                                 psrlw mm2, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07139"></a>07139                                 psrlw mm3, mm1          <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07140"></a>07140                                 paddw mm6, mm2          <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l07141"></a>07141                                 paddw mm7, mm3          <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l07142"></a>07142                                 <span class="comment">/* ---, */</span>
+<a name="l07143"></a>07143                                 movq mm2, mm4           <span class="comment">/* copy MM4 into MM2 */</span>
+<a name="l07144"></a>07144                                 psrlq mm4, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07145"></a>07145                                 psubw mm4, mm2          <span class="comment">/* MM4 = MM4 - MM2 */</span>
+<a name="l07146"></a>07146                                 movq mm3, mm6           <span class="comment">/* copy MM6 into MM3 */</span>
+<a name="l07147"></a>07147                                 psrlq mm6, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07148"></a>07148                                 psubw mm6, mm3          <span class="comment">/* MM6 = MM6 - MM3 */</span>
+<a name="l07149"></a>07149                                 punpckldq mm4, mm6      <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
+<a name="l07150"></a>07150                                 movq mm2, mm5           <span class="comment">/* copy MM6 into MM2 */</span>
+<a name="l07151"></a>07151                                 psrlq mm5, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07152"></a>07152                                 psubw mm5, mm2          <span class="comment">/* MM5 = MM5 - MM2 */</span>
+<a name="l07153"></a>07153                                 movq mm3, mm7           <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l07154"></a>07154                                 psrlq mm7, 32           <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07155"></a>07155                                 psubw mm7, mm3          <span class="comment">/* MM7 = MM7 - MM3 */</span>
+<a name="l07156"></a>07156                                 punpckldq mm5, mm7      <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
+<a name="l07157"></a>07157                                 <span class="comment">/* Take abs values of MM4 and MM5 */</span>
+<a name="l07158"></a>07158                                 movq mm6, mm4           <span class="comment">/* copy MM4 into MM6 */</span>
+<a name="l07159"></a>07159                                 movq mm7, mm5           <span class="comment">/* copy MM5 into MM7 */</span>
+<a name="l07160"></a>07160                                 psraw mm6, 15           <span class="comment">/* fill MM6 words with word sign bit */</span>
+<a name="l07161"></a>07161                                 psraw mm7, 15           <span class="comment">/* fill MM7 words with word sign bit */</span>
+<a name="l07162"></a>07162                                 pxor mm4, mm6           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l07163"></a>07163                                 pxor mm5, mm7           <span class="comment">/* take 1's compliment of only neg words */</span>
+<a name="l07164"></a>07164                                 psubsw mm4, mm6         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l07165"></a>07165                                 psubsw mm5, mm7         <span class="comment">/* add 1 to only neg words, W-(-1) or W-0 */</span>
+<a name="l07166"></a>07166                                 packuswb mm4, mm5       <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
+<a name="l07167"></a>07167                                 movq [edi], mm4         <span class="comment">/* store result in Dest */</span>
+<a name="l07168"></a>07168                                 <span class="comment">/* ---, */</span>
+<a name="l07169"></a>07169                                 sub esi, eax    <span class="comment">/* move to the current top row in Src */</span>
+<a name="l07170"></a>07170                                 sub esi, eax
+<a name="l07171"></a>07171                                 add esi, 8      <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
+<a name="l07172"></a>07172                                 add edi, 8      <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
+<a name="l07173"></a>07173                                 <span class="comment">/* ---, */</span>
+<a name="l07174"></a>07174                                 dec              ecx            <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l07175"></a>07175                                 jnz            L10412           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l07176"></a>07176                                 mov esi, ebx    <span class="comment">/* restore most left current row Src  address */</span>
+<a name="l07177"></a>07177                                 mov edi, edx    <span class="comment">/* restore most left current row Dest address */</span>
+<a name="l07178"></a>07178                                 add esi, eax    <span class="comment">/* move to the next row in Src */</span>
+<a name="l07179"></a>07179                                 add edi, eax    <span class="comment">/* move to the next row in Dest */</span>
+<a name="l07180"></a>07180                                 dec rows        <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l07181"></a>07181                                 jnz            L10410           <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l07182"></a>07182                                 <span class="comment">/* ---, */</span>
+<a name="l07183"></a>07183                                 emms                            <span class="comment">/* exit MMX state */</span>
+<a name="l07184"></a>07184                                 popa
+<a name="l07185"></a>07185                 }
+<a name="l07186"></a>07186 <span class="preprocessor">#else</span>
+<a name="l07187"></a>07187 <span class="preprocessor"></span>                <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l07188"></a>07188                         (<span class="stringliteral">"pusha              \n\t"</span> <span class="stringliteral">"pxor      %%mm0, %%mm0 \n\t"</span>        <span class="comment">/* zero MM0 */</span>
+<a name="l07189"></a>07189                         <span class="stringliteral">"mov          %3, %%eax \n\t"</span>   <span class="comment">/* load columns into EAX */</span>
+<a name="l07190"></a>07190                         <span class="stringliteral">"xor       %%ebx, %%ebx \n\t"</span>   <span class="comment">/* zero EBX */</span>
+<a name="l07191"></a>07191                         <span class="stringliteral">"mov           %4, %%bl \n\t"</span>   <span class="comment">/* load NRightShift into BL */</span>
+<a name="l07192"></a>07192                         <span class="stringliteral">"movd      %%ebx, %%mm1 \n\t"</span>   <span class="comment">/* copy NRightShift into MM1 */</span>
+<a name="l07193"></a>07193                         <span class="comment">/* --- */</span>
+<a name="l07194"></a>07194                         <span class="stringliteral">"mov          %1, %%esi \n\t"</span>   <span class="comment">/* ESI = Src row 0 address */</span>
+<a name="l07195"></a>07195                         <span class="stringliteral">"mov          %0, %%edi \n\t"</span>   <span class="comment">/* load Dest address to EDI */</span>
+<a name="l07196"></a>07196                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* EDI = EDI + columns */</span>
+<a name="l07197"></a>07197                         <span class="stringliteral">"inc              %%edi \n\t"</span>   <span class="comment">/* 1 byte offset from the left edge */</span>
+<a name="l07198"></a>07198                         <span class="comment">/* initialize ROWS counter */</span>
+<a name="l07199"></a>07199                         <span class="stringliteral">"subl            $2, %2 \n\t"</span>   <span class="comment">/* do not use first and last rows */</span>
+<a name="l07200"></a>07200                         <span class="comment">/* --- */</span>
+<a name="l07201"></a>07201                         <span class="stringliteral">".L10410:                \n\t"</span> <span class="stringliteral">"mov       %%eax, %%ecx \n\t"</span>    <span class="comment">/* initialize COLUMS counter */</span>
+<a name="l07202"></a>07202                         <span class="stringliteral">"shr          $3, %%ecx \n\t"</span>   <span class="comment">/* EBX/8 (MMX loads 8 bytes at a time) */</span>
+<a name="l07203"></a>07203                         <span class="stringliteral">"mov       %%esi, %%ebx \n\t"</span>   <span class="comment">/* save ESI in EBX */</span>
+<a name="l07204"></a>07204                         <span class="stringliteral">"mov       %%edi, %%edx \n\t"</span>   <span class="comment">/* save EDI in EDX */</span>
+<a name="l07205"></a>07205                         <span class="stringliteral">".align 16              \n\t"</span>   <span class="comment">/* 16 byte alignment of the loop entry */</span>
+<a name="l07206"></a>07206                         <span class="stringliteral">".L10412:               \n\t"</span>
+<a name="l07207"></a>07207                         <span class="comment">/* --- */</span>
+<a name="l07208"></a>07208                         <span class="stringliteral">"movq    (%%esi), %%mm4 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07209"></a>07209                         <span class="stringliteral">"movq      %%mm4, %%mm5 \n\t"</span>   <span class="comment">/* save MM4 in MM5 */</span>
+<a name="l07210"></a>07210                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l07211"></a>07211                         <span class="stringliteral">"punpcklbw %%mm0, %%mm4 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07212"></a>07212                         <span class="stringliteral">"punpckhbw %%mm0, %%mm5 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07213"></a>07213                         <span class="stringliteral">"psrlw     %%mm1, %%mm4 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07214"></a>07214                         <span class="stringliteral">"psrlw     %%mm1, %%mm5 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07215"></a>07215                         <span class="stringliteral">"movq    (%%esi), %%mm6 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07216"></a>07216                         <span class="stringliteral">"movq      %%mm6, %%mm7 \n\t"</span>   <span class="comment">/* save MM6 in MM7 */</span>
+<a name="l07217"></a>07217                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l07218"></a>07218                         <span class="stringliteral">"punpcklbw %%mm0, %%mm6 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07219"></a>07219                         <span class="stringliteral">"punpckhbw %%mm0, %%mm7 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07220"></a>07220                         <span class="stringliteral">"psrlw     %%mm1, %%mm6 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07221"></a>07221                         <span class="stringliteral">"psrlw     %%mm1, %%mm7 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07222"></a>07222                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
+<a name="l07223"></a>07223                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07224"></a>07224                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07225"></a>07225                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l07226"></a>07226                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07227"></a>07227                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07228"></a>07228                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07229"></a>07229                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07230"></a>07230                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l07231"></a>07231                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l07232"></a>07232                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l07233"></a>07233                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l07234"></a>07234                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07235"></a>07235                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07236"></a>07236                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l07237"></a>07237                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07238"></a>07238                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07239"></a>07239                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07240"></a>07240                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07241"></a>07241                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l07242"></a>07242                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l07243"></a>07243                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l07244"></a>07244                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l07245"></a>07245                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row of Src */</span>
+<a name="l07246"></a>07246                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07247"></a>07247                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07248"></a>07248                         <span class="stringliteral">"add          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer 2 bytes right */</span>
+<a name="l07249"></a>07249                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07250"></a>07250                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07251"></a>07251                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07252"></a>07252                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07253"></a>07253                         <span class="stringliteral">"paddw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM4 */</span>
+<a name="l07254"></a>07254                         <span class="stringliteral">"paddw     %%mm3, %%mm5 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM5 */</span>
+<a name="l07255"></a>07255                         <span class="stringliteral">"movq    (%%esi), %%mm2 \n\t"</span>   <span class="comment">/* load 8 bytes from Src */</span>
+<a name="l07256"></a>07256                         <span class="stringliteral">"movq      %%mm2, %%mm3 \n\t"</span>   <span class="comment">/* save MM2 in MM3 */</span>
+<a name="l07257"></a>07257                         <span class="stringliteral">"sub          $2, %%esi \n\t"</span>   <span class="comment">/* move ESI pointer back 2 bytes left */</span>
+<a name="l07258"></a>07258                         <span class="stringliteral">"punpcklbw %%mm0, %%mm2 \n\t"</span>   <span class="comment">/* unpack 4 low  bytes into words */</span>
+<a name="l07259"></a>07259                         <span class="stringliteral">"punpckhbw %%mm0, %%mm3 \n\t"</span>   <span class="comment">/* unpack 4 high bytes into words */</span>
+<a name="l07260"></a>07260                         <span class="stringliteral">"psrlw     %%mm1, %%mm2 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07261"></a>07261                         <span class="stringliteral">"psrlw     %%mm1, %%mm3 \n\t"</span>   <span class="comment">/* shift right each pixel NshiftRight times */</span>
+<a name="l07262"></a>07262                         <span class="stringliteral">"paddw     %%mm2, %%mm6 \n\t"</span>   <span class="comment">/* add 4 low  bytes to accumolator MM6 */</span>
+<a name="l07263"></a>07263                         <span class="stringliteral">"paddw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* add 4 high bytes to accumolator MM7 */</span>
+<a name="l07264"></a>07264                         <span class="comment">/* --- */</span>
+<a name="l07265"></a>07265                         <span class="stringliteral">"movq      %%mm4, %%mm2 \n\t"</span>   <span class="comment">/* copy MM4 into MM2 */</span>
+<a name="l07266"></a>07266                         <span class="stringliteral">"psrlq       $32, %%mm4 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07267"></a>07267                         <span class="stringliteral">"psubw     %%mm2, %%mm4 \n\t"</span>   <span class="comment">/* MM4 = MM4 - MM2 */</span>
+<a name="l07268"></a>07268                         <span class="stringliteral">"movq      %%mm6, %%mm3 \n\t"</span>   <span class="comment">/* copy MM6 into MM3 */</span>
+<a name="l07269"></a>07269                         <span class="stringliteral">"psrlq       $32, %%mm6 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07270"></a>07270                         <span class="stringliteral">"psubw     %%mm3, %%mm6 \n\t"</span>   <span class="comment">/* MM6 = MM6 - MM3 */</span>
+<a name="l07271"></a>07271                         <span class="stringliteral">"punpckldq %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* combine 2 words of MM6 and 2 words of MM4 */</span>
+<a name="l07272"></a>07272                         <span class="stringliteral">"movq      %%mm5, %%mm2 \n\t"</span>   <span class="comment">/* copy MM6 into MM2 */</span>
+<a name="l07273"></a>07273                         <span class="stringliteral">"psrlq       $32, %%mm5 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07274"></a>07274                         <span class="stringliteral">"psubw     %%mm2, %%mm5 \n\t"</span>   <span class="comment">/* MM5 = MM5 - MM2 */</span>
+<a name="l07275"></a>07275                         <span class="stringliteral">"movq      %%mm7, %%mm3 \n\t"</span>   <span class="comment">/* copy MM7 into MM3 */</span>
+<a name="l07276"></a>07276                         <span class="stringliteral">"psrlq       $32, %%mm7 \n\t"</span>   <span class="comment">/* shift 2 left words to the right */</span>
+<a name="l07277"></a>07277                         <span class="stringliteral">"psubw     %%mm3, %%mm7 \n\t"</span>   <span class="comment">/* MM7 = MM7 - MM3 */</span>
+<a name="l07278"></a>07278                         <span class="stringliteral">"punpckldq %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* combine 2 words of MM7 and 2 words of MM5 */</span>
+<a name="l07279"></a>07279                         <span class="comment">/* Take abs values of MM4 and MM5 */</span>
+<a name="l07280"></a>07280                         <span class="stringliteral">"movq      %%mm4, %%mm6 \n\t"</span>   <span class="comment">/* copy MM4 into MM6 */</span>
+<a name="l07281"></a>07281                         <span class="stringliteral">"movq      %%mm5, %%mm7 \n\t"</span>   <span class="comment">/* copy MM5 into MM7 */</span>
+<a name="l07282"></a>07282                         <span class="stringliteral">"psraw       $15, %%mm6 \n\t"</span>   <span class="comment">/* fill MM6 words with word sign bit */</span>
+<a name="l07283"></a>07283                         <span class="stringliteral">"psraw       $15, %%mm7 \n\t"</span>   <span class="comment">/* fill MM7 words with word sign bit */</span>
+<a name="l07284"></a>07284                         <span class="stringliteral">"pxor      %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l07285"></a>07285                         <span class="stringliteral">"pxor      %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* take 1's compliment of only neg. words */</span>
+<a name="l07286"></a>07286                         <span class="stringliteral">"psubsw    %%mm6, %%mm4 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l07287"></a>07287                         <span class="stringliteral">"psubsw    %%mm7, %%mm5 \n\t"</span>   <span class="comment">/* add 1 to only neg. words, W-(-1) or W-0 */</span>
+<a name="l07288"></a>07288                         <span class="stringliteral">"packuswb  %%mm5, %%mm4 \n\t"</span>   <span class="comment">/* combine and pack/saturate MM5 and MM4 */</span>
+<a name="l07289"></a>07289                         <span class="stringliteral">"movq    %%mm4, (%%edi) \n\t"</span>   <span class="comment">/* store result in Dest */</span>
+<a name="l07290"></a>07290                         <span class="comment">/* --- */</span>
+<a name="l07291"></a>07291                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the current top row in Src */</span>
+<a name="l07292"></a>07292                         <span class="stringliteral">"sub       %%eax, %%esi \n\t"</span> <span class="stringliteral">"add $8,          %%esi \n\t"</span>     <span class="comment">/* move Src  pointer to the next 8 pixels */</span>
+<a name="l07293"></a>07293                         <span class="stringliteral">"add $8,          %%edi \n\t"</span>   <span class="comment">/* move Dest pointer to the next 8 pixels */</span>
+<a name="l07294"></a>07294                         <span class="comment">/* --- */</span>
+<a name="l07295"></a>07295                         <span class="stringliteral">"dec              %%ecx \n\t"</span>   <span class="comment">/* decrease loop counter COLUMNS */</span>
+<a name="l07296"></a>07296                         <span class="stringliteral">"jnz            .L10412 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l07297"></a>07297                         <span class="stringliteral">"mov       %%ebx, %%esi \n\t"</span>   <span class="comment">/* restore most left current row Src  address */</span>
+<a name="l07298"></a>07298                         <span class="stringliteral">"mov       %%edx, %%edi \n\t"</span>   <span class="comment">/* restore most left current row Dest address */</span>
+<a name="l07299"></a>07299                         <span class="stringliteral">"add       %%eax, %%esi \n\t"</span>   <span class="comment">/* move to the next row in Src */</span>
+<a name="l07300"></a>07300                         <span class="stringliteral">"add       %%eax, %%edi \n\t"</span>   <span class="comment">/* move to the next row in Dest */</span>
+<a name="l07301"></a>07301                         <span class="stringliteral">"decl                %2 \n\t"</span>   <span class="comment">/* decrease loop counter ROWS */</span>
+<a name="l07302"></a>07302                         <span class="stringliteral">"jnz            .L10410 \n\t"</span>   <span class="comment">/* check loop termination, proceed if required */</span>
+<a name="l07303"></a>07303                         <span class="comment">/* --- */</span>
+<a name="l07304"></a>07304                         <span class="stringliteral">"emms                   \n\t"</span>   <span class="comment">/* exit MMX state */</span>
+<a name="l07305"></a>07305                         <span class="stringliteral">"popa                   \n\t"</span>:<span class="stringliteral">"=m"</span> (Dest)       <span class="comment">/* %0 */</span>
+<a name="l07306"></a>07306                         :<span class="stringliteral">"m"</span>(Src),              <span class="comment">/* %1 */</span>
+<a name="l07307"></a>07307                         <span class="stringliteral">"m"</span>(rows),              <span class="comment">/* %2 */</span>
+<a name="l07308"></a>07308                         <span class="stringliteral">"m"</span>(columns),           <span class="comment">/* %3 */</span>
+<a name="l07309"></a>07309                         <span class="stringliteral">"m"</span>(NRightShift)        <span class="comment">/* %4 */</span>
+<a name="l07310"></a>07310                         );
+<a name="l07311"></a>07311 <span class="preprocessor">#endif</span>
+<a name="l07312"></a>07312 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l07313"></a>07313 <span class="preprocessor"></span>                <span class="keywordflow">return</span> (0);
+<a name="l07314"></a>07314         } <span class="keywordflow">else</span> {
+<a name="l07315"></a>07315                 <span class="comment">/* No non-MMX implementation yet */</span>
+<a name="l07316"></a>07316                 <span class="keywordflow">return</span> (-1);
+<a name="l07317"></a>07317         }
+<a name="l07318"></a>07318 }
+<a name="l07319"></a>07319 
+<a name="l07323"></a><a class="code" href="_s_d_l__image_filter_8h.html#a08a45265e9e84bf8beedebba26da947c">07323</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#afbfcc8c03e3d791ac74c955d14a135e4" title="Align stack to 32 byte boundary,.">SDL_imageFilterAlignStack</a>(<span class="keywordtype">void</span>)
+<a name="l07324"></a>07324 {
+<a name="l07325"></a>07325 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l07326"></a>07326 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l07327"></a>07327 <span class="preprocessor"></span>        __asm
+<a name="l07328"></a>07328         {                               <span class="comment">/* --- stack alignment --- */</span>
+<a name="l07329"></a>07329                 mov ebx, esp    <span class="comment">/* load ESP into EBX */</span>
+<a name="l07330"></a>07330                         sub ebx, 4      <span class="comment">/* reserve space on stack for old value of ESP */</span>
+<a name="l07331"></a>07331                         and ebx, -32    <span class="comment">/* align EBX along a 32 byte boundary */</span>
+<a name="l07332"></a>07332                         mov [ebx], esp          <span class="comment">/* save old value of ESP in stack, behind the bndry */</span>
+<a name="l07333"></a>07333                         mov esp, ebx    <span class="comment">/* align ESP along a 32 byte boundary */</span>
+<a name="l07334"></a>07334         }
+<a name="l07335"></a>07335 <span class="preprocessor">#else</span>
+<a name="l07336"></a>07336 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l07337"></a>07337                 (                               <span class="comment">/* --- stack alignment --- */</span>
+<a name="l07338"></a>07338                 <span class="stringliteral">"mov       %%esp, %%ebx \n\t"</span>   <span class="comment">/* load ESP into EBX */</span>
+<a name="l07339"></a>07339                 <span class="stringliteral">"sub          $4, %%ebx \n\t"</span>   <span class="comment">/* reserve space on stack for old value of ESP */</span>
+<a name="l07340"></a>07340                 <span class="stringliteral">"and        $-32, %%ebx \n\t"</span>   <span class="comment">/* align EBX along a 32 byte boundary */</span>
+<a name="l07341"></a>07341                 <span class="stringliteral">"mov     %%esp, (%%ebx) \n\t"</span>   <span class="comment">/* save old value of ESP in stack, behind the bndry */</span>
+<a name="l07342"></a>07342                 <span class="stringliteral">"mov       %%ebx, %%esp \n\t"</span>   <span class="comment">/* align ESP along a 32 byte boundary */</span>
+<a name="l07343"></a>07343                 ::);
+<a name="l07344"></a>07344 <span class="preprocessor">#endif</span>
+<a name="l07345"></a>07345 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l07346"></a>07346 <span class="preprocessor"></span>}
+<a name="l07347"></a>07347 
+<a name="l07351"></a><a class="code" href="_s_d_l__image_filter_8h.html#a84f360601d5e6e017f0e74a2cf83be6c">07351</a> <span class="keywordtype">void</span> <a class="code" href="_s_d_l__image_filter_8c.html#a3147eb5ddd4965d65702f0e533b42974" title="Restore previously aligned stack.">SDL_imageFilterRestoreStack</a>(<span class="keywordtype">void</span>)
+<a name="l07352"></a>07352 {
+<a name="l07353"></a>07353 <span class="preprocessor">#ifdef USE_MMX</span>
+<a name="l07354"></a>07354 <span class="preprocessor"></span><span class="preprocessor">#if !defined(GCC__)</span>
+<a name="l07355"></a>07355 <span class="preprocessor"></span>        __asm
+<a name="l07356"></a>07356         {                               <span class="comment">/* --- restoring old stack --- */</span>
+<a name="l07357"></a>07357                 mov ebx, [esp]          <span class="comment">/* load old value of ESP */</span>
+<a name="l07358"></a>07358                 mov esp, ebx    <span class="comment">/* restore old value of ESP */</span>
+<a name="l07359"></a>07359         }
+<a name="l07360"></a>07360 <span class="preprocessor">#else</span>
+<a name="l07361"></a>07361 <span class="preprocessor"></span>        <span class="keyword">asm</span> <span class="keyword">volatile</span>
+<a name="l07362"></a>07362                 (                               <span class="comment">/* --- restoring old stack --- */</span>
+<a name="l07363"></a>07363                 <span class="stringliteral">"mov     (%%esp), %%ebx \n\t"</span>   <span class="comment">/* load old value of ESP */</span>
+<a name="l07364"></a>07364                 <span class="stringliteral">"mov       %%ebx, %%esp \n\t"</span>   <span class="comment">/* restore old value of ESP */</span>
+<a name="l07365"></a>07365                 ::);
+<a name="l07366"></a>07366 <span class="preprocessor">#endif</span>
+<a name="l07367"></a>07367 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+<a name="l07368"></a>07368 <span class="preprocessor"></span>}
 </pre></div></div><!-- contents -->
 
 
diff --git a/Docs/html/_s_d_l__image_filter_8h.html b/Docs/html/_s_d_l__image_filter_8h.html
index acf74b4..f12cfe9 100644
--- a/Docs/html/_s_d_l__image_filter_8h.html
+++ b/Docs/html/_s_d_l__image_filter_8h.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.h File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_imageFilter.h File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -55,7 +55,7 @@
 <a href="#define-members">Defines</a> |
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.h File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_imageFilter.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 
@@ -217,7 +217,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00607">607</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00539">539</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -270,7 +270,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00207">207</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00170">170</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -323,7 +323,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01808">1808</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01788">1788</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -376,7 +376,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02115">2115</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02065">2065</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -429,7 +429,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01949">1949</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01916">1916</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -450,7 +450,7 @@ Functions</h2></td></tr>
 
 <p>Align stack to 32 byte boundary,. </p>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07537">7537</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07323">7323</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -503,7 +503,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03717">3717</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03531">3531</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -556,7 +556,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01312">1312</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01275">1275</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -602,7 +602,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01674">1674</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01668">1668</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -655,7 +655,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01434">1434</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01389">1389</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -715,7 +715,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03890">3890</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03688">3688</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -783,7 +783,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04201">4201</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03977">3977</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -851,7 +851,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05595">5595</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05375">5375</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -919,7 +919,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04390">4390</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04167">4167</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -987,7 +987,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05771">5771</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05552">5552</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1055,7 +1055,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04692">4692</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04470">4470</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1123,7 +1123,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06071">6071</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05853">5853</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1191,7 +1191,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l05048">5048</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04827">4827</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1259,7 +1259,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06433">6433</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06216">6216</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1312,7 +1312,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01561">1561</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01546">1546</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1365,7 +1365,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00356">356</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00305">305</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1387,7 +1387,7 @@ Functions</h2></td></tr>
 <p>MMX detection routine (with override flag). </p>
 <dl class="section return"><dt>Returns:</dt><dd>1 of MMX was detected, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00100">100</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00077">77</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1408,7 +1408,7 @@ Functions</h2></td></tr>
 
 <p>Disable MMX check for filter functions and and force to use non-MMX C based code. </p>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00119">119</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00090">90</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1429,7 +1429,7 @@ Functions</h2></td></tr>
 
 <p>Enable MMX check for filter functions and use MMX code if available. </p>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00127">127</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00098">98</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1482,7 +1482,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00766">766</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00726">726</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1535,7 +1535,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02903">2903</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02787">2787</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1588,7 +1588,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01038">1038</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00997">997</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1641,7 +1641,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01189">1189</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l01138">1138</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1694,7 +1694,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00888">888</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00859">859</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1768,7 +1768,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l04130">4130</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03906">3906</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1789,7 +1789,7 @@ Functions</h2></td></tr>
 
 <p>Restore previously aligned stack. </p>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07565">7565</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07351">7351</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1842,7 +1842,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03562">3562</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03390">3390</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1895,7 +1895,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03237">3237</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03090">3090</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -1948,7 +1948,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03364">3364</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03207">3207</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2001,7 +2001,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02564">2564</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02473">2473</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2061,7 +2061,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l03074">3074</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02940">2940</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2114,7 +2114,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02692">2692</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02591">2591</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2168,7 +2168,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07012">7012</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l06796">6796</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2229,7 +2229,7 @@ Functions</h2></td></tr>
 <p>Note: Non-MMX implementation not available for this function.</p>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 1 if filter was applied, 0 otherwise. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07264">7264</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l07049">7049</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2282,7 +2282,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00478">478</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l00419">419</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2335,7 +2335,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02256">2256</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02193">2193</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
@@ -2388,7 +2388,7 @@ Functions</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns:</dt><dd>Returns 0 for success or -1 for error. </dd></dl>
 
-<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02397">2397</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
+<p>Definition at line <a class="el" href="_s_d_l__image_filter_8c_source.html#l02322">2322</a> of file <a class="el" href="_s_d_l__image_filter_8c_source.html">SDL_imageFilter.c</a>.</p>
 
 </div>
 </div>
diff --git a/Docs/html/_s_d_l__image_filter_8h_source.html b/Docs/html/_s_d_l__image_filter_8h_source.html
index 8f6007e..7fe5af6 100644
--- a/Docs/html/_s_d_l__image_filter_8h_source.html
+++ b/Docs/html/_s_d_l__image_filter_8h_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.h Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_imageFilter.h Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_imageFilter.h</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_imageFilter.h</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__image_filter_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span>
diff --git a/Docs/html/_s_d_l__rotozoom_8c.html b/Docs/html/_s_d_l__rotozoom_8c.html
index 2c2d60b..d227a23 100644
--- a/Docs/html/_s_d_l__rotozoom_8c.html
+++ b/Docs/html/_s_d_l__rotozoom_8c.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.c File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_rotozoom.c File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -57,7 +57,7 @@
 <a href="#typedef-members">Typedefs</a> |
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.c File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_rotozoom.c File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include <stdlib.h></code><br/>
diff --git a/Docs/html/_s_d_l__rotozoom_8c_source.html b/Docs/html/_s_d_l__rotozoom_8c_source.html
index 164717d..72cae77 100644
--- a/Docs/html/_s_d_l__rotozoom_8c_source.html
+++ b/Docs/html/_s_d_l__rotozoom_8c_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.c Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_rotozoom.c Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.c</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_rotozoom.c</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__rotozoom_8c.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*  </span>
diff --git a/Docs/html/_s_d_l__rotozoom_8h.html b/Docs/html/_s_d_l__rotozoom_8h.html
index 4ecc344..7b0dc13 100644
--- a/Docs/html/_s_d_l__rotozoom_8h.html
+++ b/Docs/html/_s_d_l__rotozoom_8h.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.h File Reference</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_rotozoom.h File Reference</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -55,7 +55,7 @@
 <a href="#define-members">Defines</a> |
 <a href="#func-members">Functions</a>  </div>
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.h File Reference</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_rotozoom.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include <math.h></code><br/>
diff --git a/Docs/html/_s_d_l__rotozoom_8h_source.html b/Docs/html/_s_d_l__rotozoom_8h_source.html
index 9a96034..928de2a 100644
--- a/Docs/html/_s_d_l__rotozoom_8h_source.html
+++ b/Docs/html/_s_d_l__rotozoom_8h_source.html
@@ -3,7 +3,7 @@
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
-<title>SDL_gfx: C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.h Source File</title>
+<title>SDL_gfx: I:/Sources/sdlgfx/SDL_rotozoom.h Source File</title>
 
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -52,7 +52,7 @@
 </div>
 <div class="header">
   <div class="headertitle">
-<div class="title">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/SDL_rotozoom.h</div>  </div>
+<div class="title">I:/Sources/sdlgfx/SDL_rotozoom.h</div>  </div>
 </div><!--header-->
 <div class="contents">
 <a href="_s_d_l__rotozoom_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*  </span>
diff --git a/Docs/html/annotated.html b/Docs/html/annotated.html
index d64cb49..d6b8a75 100644
--- a/Docs/html/annotated.html
+++ b/Docs/html/annotated.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/classes.html b/Docs/html/classes.html
index ff258a1..38f7d37 100644
--- a/Docs/html/classes.html
+++ b/Docs/html/classes.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/files.html b/Docs/html/files.html
index 21958ec..8373b02 100644
--- a/Docs/html/files.html
+++ b/Docs/html/files.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -56,18 +56,18 @@
 </div><!--header-->
 <div class="contents">
 <div class="textblock">Here is a list of all files with brief descriptions:</div><table>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_r_e_a_d_m_e.html">README</a> <a href="_r_e_a_d_m_e_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__framerate_8c.html">SDL_framerate.c</a> <a href="_s_d_l__framerate_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__framerate_8h.html">SDL_framerate.h</a> <a href="_s_d_l__framerate_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_blit_func_8c.html">SDL_gfxBlitFunc.c</a> <a href="_s_d_l__gfx_blit_func_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_blit_func_8h.html">SDL_gfxBlitFunc.h</a> <a href="_s_d_l__gfx_blit_func_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8c.html">SDL_gfxPrimitives.c</a> <a href="_s_d_l__gfx_primitives_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8h.html">SDL_gfxPrimitives.h</a> <a href="_s_d_l__gfx_primitives_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives__font_8h.html">SDL_gfxPrimitives_font.h</a> <a href="_s_d_l__gfx_primitives__font_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__image_filter_8c.html">SDL_imageFilter.c</a> <a href="_s_d_l__image_filter_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__image_filter_8h.html">SDL_imageFilter.h</a> <a href="_s_d_l__image_filter_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8c.html">SDL_rotozoom.c</a> <a href="_s_d_l__rotozoom_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
-  <tr><td class="indexkey">C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8h.html">SDL_rotozoom.h</a> <a href="_s_d_l__rotozoom_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_r_e_a_d_m_e.html">README</a> <a href="_r_e_a_d_m_e_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__framerate_8c.html">SDL_framerate.c</a> <a href="_s_d_l__framerate_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__framerate_8h.html">SDL_framerate.h</a> <a href="_s_d_l__framerate_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_blit_func_8c.html">SDL_gfxBlitFunc.c</a> <a href="_s_d_l__gfx_blit_func_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_blit_func_8h.html">SDL_gfxBlitFunc.h</a> <a href="_s_d_l__gfx_blit_func_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8c.html">SDL_gfxPrimitives.c</a> <a href="_s_d_l__gfx_primitives_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8h.html">SDL_gfxPrimitives.h</a> <a href="_s_d_l__gfx_primitives_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives__font_8h.html">SDL_gfxPrimitives_font.h</a> <a href="_s_d_l__gfx_primitives__font_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__image_filter_8c.html">SDL_imageFilter.c</a> <a href="_s_d_l__image_filter_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__image_filter_8h.html">SDL_imageFilter.h</a> <a href="_s_d_l__image_filter_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8c.html">SDL_rotozoom.c</a> <a href="_s_d_l__rotozoom_8c_source.html">[code]</a></td><td class="indexvalue"></td></tr>
+  <tr><td class="indexkey">I:/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8h.html">SDL_rotozoom.h</a> <a href="_s_d_l__rotozoom_8h_source.html">[code]</a></td><td class="indexvalue"></td></tr>
 </table>
 </div><!-- contents -->
 
diff --git a/Docs/html/functions.html b/Docs/html/functions.html
index f17e46c..551ff05 100644
--- a/Docs/html/functions.html
+++ b/Docs/html/functions.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/functions_vars.html b/Docs/html/functions_vars.html
index 3647b30..2a74e3c 100644
--- a/Docs/html/functions_vars.html
+++ b/Docs/html/functions_vars.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals.html b/Docs/html/globals.html
index a5f3ceb..7ec1179 100644
--- a/Docs/html/globals.html
+++ b/Docs/html/globals.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -96,9 +96,6 @@
 <li>_colorkey()
 : <a class="el" href="_s_d_l__rotozoom_8c.html#afe2a2e4dc6cc462c5fc98a2110b8e1ce">SDL_rotozoom.c</a>
 </li>
-<li>_cpuFlags()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ade15666303ddc71c543f44cf1536d00e">SDL_imageFilter.c</a>
-</li>
 <li>_evaluateBezier()
 : <a class="el" href="_s_d_l__gfx_primitives_8c.html#a888411ec724ddb9ff19cf9ba9fc067df">SDL_gfxPrimitives.c</a>
 </li>
diff --git a/Docs/html/globals_0x61.html b/Docs/html/globals_0x61.html
index 571a3ce..c995b42 100644
--- a/Docs/html/globals_0x61.html
+++ b/Docs/html/globals_0x61.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x62.html b/Docs/html/globals_0x62.html
index 6552c10..cbbbdd1 100644
--- a/Docs/html/globals_0x62.html
+++ b/Docs/html/globals_0x62.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x63.html b/Docs/html/globals_0x63.html
index e4ade5f..bab6d9b 100644
--- a/Docs/html/globals_0x63.html
+++ b/Docs/html/globals_0x63.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x64.html b/Docs/html/globals_0x64.html
index 4933539..ddd7a3c 100644
--- a/Docs/html/globals_0x64.html
+++ b/Docs/html/globals_0x64.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x65.html b/Docs/html/globals_0x65.html
index b2e0bf0..3e7c63f 100644
--- a/Docs/html/globals_0x65.html
+++ b/Docs/html/globals_0x65.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x66.html b/Docs/html/globals_0x66.html
index 90c5399..a1696a2 100644
--- a/Docs/html/globals_0x66.html
+++ b/Docs/html/globals_0x66.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x67.html b/Docs/html/globals_0x67.html
index 9eb4f42..919a30d 100644
--- a/Docs/html/globals_0x67.html
+++ b/Docs/html/globals_0x67.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x68.html b/Docs/html/globals_0x68.html
index e25e749..80c7dd5 100644
--- a/Docs/html/globals_0x68.html
+++ b/Docs/html/globals_0x68.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x6c.html b/Docs/html/globals_0x6c.html
index c4309e8..15d105e 100644
--- a/Docs/html/globals_0x6c.html
+++ b/Docs/html/globals_0x6c.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x6d.html b/Docs/html/globals_0x6d.html
index 4b53a11..5ff7bf5 100644
--- a/Docs/html/globals_0x6d.html
+++ b/Docs/html/globals_0x6d.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x70.html b/Docs/html/globals_0x70.html
index a2d94ef..04fdeed 100644
--- a/Docs/html/globals_0x70.html
+++ b/Docs/html/globals_0x70.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x72.html b/Docs/html/globals_0x72.html
index 0dff71e..121e8bc 100644
--- a/Docs/html/globals_0x72.html
+++ b/Docs/html/globals_0x72.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x73.html b/Docs/html/globals_0x73.html
index 013fbe3..47bac20 100644
--- a/Docs/html/globals_0x73.html
+++ b/Docs/html/globals_0x73.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -133,9 +133,6 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a472909f904274255cd6793c520172e48">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a789ce070edcc478ad97a0d7ff90e6aa2">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAbsDiffMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a601bf863185e51af32c6008ecb0a5095">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAdd()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a9f06507eb0b63198dbd67495d61c9b20">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a9034268e2f51550d8f1d6084bda45194">SDL_imageFilter.h</a>
@@ -144,26 +141,14 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a812cb307cb60ef31f1ffe81a9eee6bb1">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a6be6dccd000eff4baadd33297e5cc419">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAddByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a032e94beee7b3f7dc9e3bc999f51dfb3">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAddByteToHalf()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ab82db97d129c8cfc36780bcdc6286fcc">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a8cbdffd5dbcab3b5dc9207d57af616b3">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAddByteToHalfMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ae7c132373eb318713635c4e82f478f9d">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterAddMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ace0bf40de8d58bbd8d6ff9c3fc04ec6e">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAddUint()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a660543426c47dfec39a349eb3b8f905b">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#af1a17645dea69e52c7bd560521286765">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAddUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ac337129ad7f11e7e33d73fa39b8239eb">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAlignStack()
 : <a class="el" href="_s_d_l__image_filter_8c.html#afbfcc8c03e3d791ac74c955d14a135e4">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a08a45265e9e84bf8beedebba26da947c">SDL_imageFilter.h</a>
@@ -172,37 +157,22 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a951a062e15df290a137428e1e0f4d5ce">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#ad5bf97d7e39d018d2eeb570e97edf8c0">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterBinarizeUsingThresholdMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a6f06923cb26d510ad72d4b1dd6583284">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterBitAnd()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a85837ce1b5de1f907b6b9053922b5cbc">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a5f67460c0b89dadd49d04832608a345b">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterBitAndMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a8a86c969daeb874fb643347592003484">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterBitNegation()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ac3abfaa8ec2e88c3c4893588c5555856">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#abc3c3fc5f018e271f6393921f3964d31">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterBitNegationMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a1b522e196f9647501c6badd1de727b97">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterBitOr()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a5cf1c477f4e32d02f74ee95d9f7b0021">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a0acf0eabba33f8fa7acbc08dc3015cd3">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterBitOrMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a2cd7db5de491dce5dfcf292fc241031d">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterClipToRange()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ab7224abc4ecc1b8a6f4441ef8379515f">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#ae9d552de9cf5a4a1716d91ee905eafd7">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterClipToRangeMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#adc2b0f3e3a32724df1325a2121e9f96d">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterConvolveKernel3x3Divide()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a8e7e4138a93e26f1912763189d407770">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a7286cd21fa0a0cfb0606806dacfbe121">SDL_imageFilter.h</a>
@@ -239,27 +209,21 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a0ea22f01c6a4724bac307da3e5355f58">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#aeb8ed56aa7de3c8b0d0b2aa9163c3e37">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterDivASM()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a95791d257c510c597a2ef542f43d6678">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMean()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ace072118fef77973210eb04fb4bfc779">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a69cfa83c5d198c8ae4be4ab86e8d3b8f">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterMeanMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ae3a61d6df0940ef96ccc7b48a0fc8966">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMMXdetect()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#a5823f6eb23fe8e74764a94f3d78204ef">SDL_imageFilter.h</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#a5823f6eb23fe8e74764a94f3d78204ef">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterMMXoff()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a5dff661660755161bb4aaf6199cd1384">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a403adc470cb1dd34520f18d55804d4ea">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterMMXon()
-: <a class="el" href="_s_d_l__image_filter_8h.html#a848ce7e9551b25fea19fe1fb739f74fb">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a353ee234c3b51b33c4c5c4b30db5832d">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a353ee234c3b51b33c4c5c4b30db5832d">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#a848ce7e9551b25fea19fe1fb739f74fb">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterMult()
 : <a class="el" href="_s_d_l__image_filter_8c.html#af4633031d40a9ea0956a2f3c6c87a384">SDL_imageFilter.c</a>
@@ -269,26 +233,14 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a06f7a19d6e2fc89d7b48cc45d715806d">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#add06bb6ea7847fc13a3041ddceb4ac3c">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterMultByByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ad18d23ec352f7508f89e47cff9c9a4ea">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMultDivby2()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a80737f6427c7bdb30d39a92f6524fc14">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#aa19248767b1fd9ffdea4ba69b9f00175">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterMultDivby2MMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a12272cd24ce7f09bc2c35c609e025983">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMultDivby4()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a30e685653eb1050c7d48feaeb8f801a1">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#aa92bea3946c8081c9656304a7d944fae">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterMultDivby4MMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a1f8bf77328e934701c7a9e4ef51d9b41">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterMultMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ad565921b533977ad2059d58d3c4a3094">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMultNor()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a5f3c9fd40426bb46eba5ac167505dcc5">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#ac4f3446d0da18746b48606fe37c26385">SDL_imageFilter.h</a>
@@ -300,87 +252,60 @@
 : <a class="el" href="_s_d_l__image_filter_8h.html#aacb316a18d8cb7999d5d53ee5e7b9750">SDL_imageFilter.h</a>
 , <a class="el" href="_s_d_l__image_filter_8c.html#ab018ace4db884cac953b06b09c00828b">SDL_imageFilter.c</a>
 </li>
-<li>SDL_imageFilterNormalizeLinearMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a2e7631c748eb46544e7be40fa64bc232">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterRestoreStack()
-: <a class="el" href="_s_d_l__image_filter_8h.html#a84f360601d5e6e017f0e74a2cf83be6c">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a3147eb5ddd4965d65702f0e533b42974">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a3147eb5ddd4965d65702f0e533b42974">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#a84f360601d5e6e017f0e74a2cf83be6c">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterShiftLeft()
-: <a class="el" href="_s_d_l__image_filter_8h.html#a084f9544f049cc01e7b2f1090534abbf">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a98372fea76310903abef7808db10d226">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a98372fea76310903abef7808db10d226">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#a084f9544f049cc01e7b2f1090534abbf">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterShiftLeftByte()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a4561a73b249a26babc4c469ffbdae604">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#ac32f1ea9acbee51c2db94224ef6f7fd2">SDL_imageFilter.h</a>
-</li>
-<li>SDL_imageFilterShiftLeftByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a0d383d58c9a5262dbac636f6ebe26b62">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterShiftLeftMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a3ea84aa8cf313790dc7468f2f4f29497">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#ac32f1ea9acbee51c2db94224ef6f7fd2">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a4561a73b249a26babc4c469ffbdae604">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterShiftLeftUint()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a250e796fb2db470da0a78b74b78114e8">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a4fd6d4a9711c13163496587454d9f1a2">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterShiftLeftUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a4a4260369d38e7bbcd9e3690bf57b8d4">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterShiftRight()
 : <a class="el" href="_s_d_l__image_filter_8h.html#a931f1232cd03acd2ba90af222625f4ca">SDL_imageFilter.h</a>
 , <a class="el" href="_s_d_l__image_filter_8c.html#a68851aed2dcc5dfd2f3b258236f3b88c">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterShiftRightAndMultByByte()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a0713d6c267fba9756d6beae81e89f9e4">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#a40e1e21ede9a7ed1eddac2cdbfd0b079">SDL_imageFilter.h</a>
-</li>
-<li>SDL_imageFilterShiftRightAndMultByByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a80d18182b54de0ec1f8d9a79dc5b879a">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterShiftRightMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a696568e00b153011f0673bdf1297e9fa">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#a40e1e21ede9a7ed1eddac2cdbfd0b079">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a0713d6c267fba9756d6beae81e89f9e4">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterShiftRightUint()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a540d4625d76bcd03318c2a59ce650fdb">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#a4ccddf5c575cc4d6074c9a54789240a6">SDL_imageFilter.h</a>
-</li>
-<li>SDL_imageFilterShiftRightUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a23430360ee5ce8031158831a44e83d56">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#a4ccddf5c575cc4d6074c9a54789240a6">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a540d4625d76bcd03318c2a59ce650fdb">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterSobelX()
-: <a class="el" href="_s_d_l__image_filter_8h.html#a2a0e4e259150abbe33bcddb046c367ba">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a015fe05161b701162d9ecffb01413f1e">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a015fe05161b701162d9ecffb01413f1e">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#a2a0e4e259150abbe33bcddb046c367ba">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterSobelXShiftRight()
 : <a class="el" href="_s_d_l__image_filter_8h.html#ab9cc925cd9b135e245936d718b459032">SDL_imageFilter.h</a>
 , <a class="el" href="_s_d_l__image_filter_8c.html#a0d21af83f0183fcd697324cffe3ab3d7">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterSub()
-: <a class="el" href="_s_d_l__image_filter_8h.html#a0e0fb80a3dad33d61a8147c7fb9f529d">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a3c01cf8576ea7a0dfc09dbaa953c9287">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a3c01cf8576ea7a0dfc09dbaa953c9287">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#a0e0fb80a3dad33d61a8147c7fb9f529d">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterSubByte()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a387fb6f0d48cc5d08f37f7f9b92d14b2">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#af8f4ab4050a0661c7696783ba1a1b12b">SDL_imageFilter.h</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#af8f4ab4050a0661c7696783ba1a1b12b">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a387fb6f0d48cc5d08f37f7f9b92d14b2">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterSubByteMMX()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65">SDL_imageFilter.c</a>
 </li>
-<li>SDL_imageFilterSubMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a45d54d410e677d32ef33ef6226e9ea12">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterSubUint()
 : <a class="el" href="_s_d_l__image_filter_8h.html#ae2f3c5992701bded7c2d256bbbfb403f">SDL_imageFilter.h</a>
 , <a class="el" href="_s_d_l__image_filter_8c.html#abb343ef95e22945e1d4d648b2e176e64">SDL_imageFilter.c</a>
 </li>
-<li>SDL_imageFilterSubUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#acfb143905b751680650576e75847f9c1">SDL_imageFilter.c</a>
-</li>
 <li>SDL_initFramerate()
-: <a class="el" href="_s_d_l__framerate_8h.html#a3ca69231486837c809fdcbe5b0a10787">SDL_framerate.h</a>
-, <a class="el" href="_s_d_l__framerate_8c.html#a444ebaaaa6b1ceeafa921562bdab1a44">SDL_framerate.c</a>
+: <a class="el" href="_s_d_l__framerate_8c.html#a444ebaaaa6b1ceeafa921562bdab1a44">SDL_framerate.c</a>
+, <a class="el" href="_s_d_l__framerate_8h.html#a3ca69231486837c809fdcbe5b0a10787">SDL_framerate.h</a>
 </li>
 <li>SDL_ROTOZOOM_SCOPE
 : <a class="el" href="_s_d_l__rotozoom_8h.html#a5115250bd769f389e278cd5adfb63179">SDL_rotozoom.h</a>
@@ -400,12 +325,12 @@
 : <a class="el" href="_s_d_l__rotozoom_8h.html#abeb6ae7618fcb315d0399fe65849a2e8">SDL_rotozoom.h</a>
 </li>
 <li>stringColor()
-: <a class="el" href="_s_d_l__gfx_primitives_8c.html#a62d2ba55abc7673f2dfa29e6bbffefdf">SDL_gfxPrimitives.c</a>
-, <a class="el" href="_s_d_l__gfx_primitives_8h.html#a96b6a43c6ef4753996e33bb7fea483bc">SDL_gfxPrimitives.h</a>
+: <a class="el" href="_s_d_l__gfx_primitives_8h.html#a96b6a43c6ef4753996e33bb7fea483bc">SDL_gfxPrimitives.h</a>
+, <a class="el" href="_s_d_l__gfx_primitives_8c.html#a62d2ba55abc7673f2dfa29e6bbffefdf">SDL_gfxPrimitives.c</a>
 </li>
 <li>stringRGBA()
-: <a class="el" href="_s_d_l__gfx_primitives_8c.html#a6ca71826e311bdd9acf13b009256aa1c">SDL_gfxPrimitives.c</a>
-, <a class="el" href="_s_d_l__gfx_primitives_8h.html#a769833ae414222099783a9b69bed4009">SDL_gfxPrimitives.h</a>
+: <a class="el" href="_s_d_l__gfx_primitives_8h.html#a769833ae414222099783a9b69bed4009">SDL_gfxPrimitives.h</a>
+, <a class="el" href="_s_d_l__gfx_primitives_8c.html#a6ca71826e311bdd9acf13b009256aa1c">SDL_gfxPrimitives.c</a>
 </li>
 <li>SWAP_32
 : <a class="el" href="_s_d_l__image_filter_8c.html#a700fb30611761c46a674a45cc28ff561">SDL_imageFilter.c</a>
diff --git a/Docs/html/globals_0x74.html b/Docs/html/globals_0x74.html
index 744b698..cd84fac 100644
--- a/Docs/html/globals_0x74.html
+++ b/Docs/html/globals_0x74.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x76.html b/Docs/html/globals_0x76.html
index a76d739..9e76b30 100644
--- a/Docs/html/globals_0x76.html
+++ b/Docs/html/globals_0x76.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_0x7a.html b/Docs/html/globals_0x7a.html
index bae9538..53877fd 100644
--- a/Docs/html/globals_0x7a.html
+++ b/Docs/html/globals_0x7a.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_defs.html b/Docs/html/globals_defs.html
index cffda60..3bba08d 100644
--- a/Docs/html/globals_defs.html
+++ b/Docs/html/globals_defs.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func.html b/Docs/html/globals_func.html
index 7c9eeff..70f0664 100644
--- a/Docs/html/globals_func.html
+++ b/Docs/html/globals_func.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -94,9 +94,6 @@
 <li>_colorkey()
 : <a class="el" href="_s_d_l__rotozoom_8c.html#afe2a2e4dc6cc462c5fc98a2110b8e1ce">SDL_rotozoom.c</a>
 </li>
-<li>_cpuFlags()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ade15666303ddc71c543f44cf1536d00e">SDL_imageFilter.c</a>
-</li>
 <li>_evaluateBezier()
 : <a class="el" href="_s_d_l__gfx_primitives_8c.html#a888411ec724ddb9ff19cf9ba9fc067df">SDL_gfxPrimitives.c</a>
 </li>
diff --git a/Docs/html/globals_func_0x61.html b/Docs/html/globals_func_0x61.html
index f6a4380..fe059bb 100644
--- a/Docs/html/globals_func_0x61.html
+++ b/Docs/html/globals_func_0x61.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x62.html b/Docs/html/globals_func_0x62.html
index 45587b2..5ceded7 100644
--- a/Docs/html/globals_func_0x62.html
+++ b/Docs/html/globals_func_0x62.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x63.html b/Docs/html/globals_func_0x63.html
index 59c607d..1c9a673 100644
--- a/Docs/html/globals_func_0x63.html
+++ b/Docs/html/globals_func_0x63.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x65.html b/Docs/html/globals_func_0x65.html
index 038ba16..1d51068 100644
--- a/Docs/html/globals_func_0x65.html
+++ b/Docs/html/globals_func_0x65.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x66.html b/Docs/html/globals_func_0x66.html
index a7dc18a..3ee7675 100644
--- a/Docs/html/globals_func_0x66.html
+++ b/Docs/html/globals_func_0x66.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x67.html b/Docs/html/globals_func_0x67.html
index f124d92..b8aa55a 100644
--- a/Docs/html/globals_func_0x67.html
+++ b/Docs/html/globals_func_0x67.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x68.html b/Docs/html/globals_func_0x68.html
index 96ed4f2..8ae8d37 100644
--- a/Docs/html/globals_func_0x68.html
+++ b/Docs/html/globals_func_0x68.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x6c.html b/Docs/html/globals_func_0x6c.html
index 01ed742..1aa80c1 100644
--- a/Docs/html/globals_func_0x6c.html
+++ b/Docs/html/globals_func_0x6c.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x70.html b/Docs/html/globals_func_0x70.html
index b9c9c8f..2dc7695 100644
--- a/Docs/html/globals_func_0x70.html
+++ b/Docs/html/globals_func_0x70.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x72.html b/Docs/html/globals_func_0x72.html
index ac51508..7ea5be2 100644
--- a/Docs/html/globals_func_0x72.html
+++ b/Docs/html/globals_func_0x72.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x73.html b/Docs/html/globals_func_0x73.html
index 9ec1bc6..50b1dd4 100644
--- a/Docs/html/globals_func_0x73.html
+++ b/Docs/html/globals_func_0x73.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -110,37 +110,22 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a472909f904274255cd6793c520172e48">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a789ce070edcc478ad97a0d7ff90e6aa2">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAbsDiffMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a601bf863185e51af32c6008ecb0a5095">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAdd()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a9f06507eb0b63198dbd67495d61c9b20">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#a9034268e2f51550d8f1d6084bda45194">SDL_imageFilter.h</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#a9034268e2f51550d8f1d6084bda45194">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a9f06507eb0b63198dbd67495d61c9b20">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterAddByte()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a812cb307cb60ef31f1ffe81a9eee6bb1">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a6be6dccd000eff4baadd33297e5cc419">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAddByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a032e94beee7b3f7dc9e3bc999f51dfb3">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAddByteToHalf()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ab82db97d129c8cfc36780bcdc6286fcc">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a8cbdffd5dbcab3b5dc9207d57af616b3">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAddByteToHalfMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ae7c132373eb318713635c4e82f478f9d">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterAddMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ace0bf40de8d58bbd8d6ff9c3fc04ec6e">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAddUint()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a660543426c47dfec39a349eb3b8f905b">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#af1a17645dea69e52c7bd560521286765">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterAddUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ac337129ad7f11e7e33d73fa39b8239eb">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterAlignStack()
 : <a class="el" href="_s_d_l__image_filter_8c.html#afbfcc8c03e3d791ac74c955d14a135e4">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a08a45265e9e84bf8beedebba26da947c">SDL_imageFilter.h</a>
@@ -149,37 +134,22 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a951a062e15df290a137428e1e0f4d5ce">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#ad5bf97d7e39d018d2eeb570e97edf8c0">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterBinarizeUsingThresholdMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a6f06923cb26d510ad72d4b1dd6583284">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterBitAnd()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a85837ce1b5de1f907b6b9053922b5cbc">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a5f67460c0b89dadd49d04832608a345b">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterBitAndMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a8a86c969daeb874fb643347592003484">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterBitNegation()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ac3abfaa8ec2e88c3c4893588c5555856">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#abc3c3fc5f018e271f6393921f3964d31">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterBitNegationMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a1b522e196f9647501c6badd1de727b97">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterBitOr()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a5cf1c477f4e32d02f74ee95d9f7b0021">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#a0acf0eabba33f8fa7acbc08dc3015cd3">SDL_imageFilter.h</a>
-</li>
-<li>SDL_imageFilterBitOrMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a2cd7db5de491dce5dfcf292fc241031d">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#a0acf0eabba33f8fa7acbc08dc3015cd3">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a5cf1c477f4e32d02f74ee95d9f7b0021">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterClipToRange()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ab7224abc4ecc1b8a6f4441ef8379515f">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#ae9d552de9cf5a4a1716d91ee905eafd7">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterClipToRangeMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#adc2b0f3e3a32724df1325a2121e9f96d">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterConvolveKernel3x3Divide()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a8e7e4138a93e26f1912763189d407770">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a7286cd21fa0a0cfb0606806dacfbe121">SDL_imageFilter.h</a>
@@ -216,16 +186,10 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#a0ea22f01c6a4724bac307da3e5355f58">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#aeb8ed56aa7de3c8b0d0b2aa9163c3e37">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterDivASM()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a95791d257c510c597a2ef542f43d6678">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMean()
 : <a class="el" href="_s_d_l__image_filter_8c.html#ace072118fef77973210eb04fb4bfc779">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a69cfa83c5d198c8ae4be4ab86e8d3b8f">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterMeanMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ae3a61d6df0940ef96ccc7b48a0fc8966">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMMXdetect()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a798ce71024ee1a1d1b174fd60fe79917">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a5823f6eb23fe8e74764a94f3d78204ef">SDL_imageFilter.h</a>
@@ -243,29 +207,17 @@
 , <a class="el" href="_s_d_l__image_filter_8h.html#a4657c2a1e1bf55d3241dc737cd618409">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterMultByByte()
-: <a class="el" href="_s_d_l__image_filter_8h.html#add06bb6ea7847fc13a3041ddceb4ac3c">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a06f7a19d6e2fc89d7b48cc45d715806d">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterMultByByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ad18d23ec352f7508f89e47cff9c9a4ea">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a06f7a19d6e2fc89d7b48cc45d715806d">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#add06bb6ea7847fc13a3041ddceb4ac3c">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterMultDivby2()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a80737f6427c7bdb30d39a92f6524fc14">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#aa19248767b1fd9ffdea4ba69b9f00175">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterMultDivby2MMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a12272cd24ce7f09bc2c35c609e025983">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMultDivby4()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a30e685653eb1050c7d48feaeb8f801a1">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#aa92bea3946c8081c9656304a7d944fae">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterMultDivby4MMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a1f8bf77328e934701c7a9e4ef51d9b41">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterMultMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#ad565921b533977ad2059d58d3c4a3094">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterMultNor()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a5f3c9fd40426bb46eba5ac167505dcc5">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#ac4f3446d0da18746b48606fe37c26385">SDL_imageFilter.h</a>
@@ -277,9 +229,6 @@
 : <a class="el" href="_s_d_l__image_filter_8c.html#ab018ace4db884cac953b06b09c00828b">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#aacb316a18d8cb7999d5d53ee5e7b9750">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterNormalizeLinearMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a2e7631c748eb46544e7be40fa64bc232">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterRestoreStack()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a3147eb5ddd4965d65702f0e533b42974">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a84f360601d5e6e017f0e74a2cf83be6c">SDL_imageFilter.h</a>
@@ -292,69 +241,45 @@
 : <a class="el" href="_s_d_l__image_filter_8h.html#ac32f1ea9acbee51c2db94224ef6f7fd2">SDL_imageFilter.h</a>
 , <a class="el" href="_s_d_l__image_filter_8c.html#a4561a73b249a26babc4c469ffbdae604">SDL_imageFilter.c</a>
 </li>
-<li>SDL_imageFilterShiftLeftByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a0d383d58c9a5262dbac636f6ebe26b62">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterShiftLeftMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a3ea84aa8cf313790dc7468f2f4f29497">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterShiftLeftUint()
 : <a class="el" href="_s_d_l__image_filter_8h.html#a4fd6d4a9711c13163496587454d9f1a2">SDL_imageFilter.h</a>
 , <a class="el" href="_s_d_l__image_filter_8c.html#a250e796fb2db470da0a78b74b78114e8">SDL_imageFilter.c</a>
 </li>
-<li>SDL_imageFilterShiftLeftUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a4a4260369d38e7bbcd9e3690bf57b8d4">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterShiftRight()
-: <a class="el" href="_s_d_l__image_filter_8h.html#a931f1232cd03acd2ba90af222625f4ca">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a68851aed2dcc5dfd2f3b258236f3b88c">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a68851aed2dcc5dfd2f3b258236f3b88c">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#a931f1232cd03acd2ba90af222625f4ca">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterShiftRightAndMultByByte()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a0713d6c267fba9756d6beae81e89f9e4">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a40e1e21ede9a7ed1eddac2cdbfd0b079">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterShiftRightAndMultByByteMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a80d18182b54de0ec1f8d9a79dc5b879a">SDL_imageFilter.c</a>
-</li>
-<li>SDL_imageFilterShiftRightMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a696568e00b153011f0673bdf1297e9fa">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterShiftRightUint()
 : <a class="el" href="_s_d_l__image_filter_8h.html#a4ccddf5c575cc4d6074c9a54789240a6">SDL_imageFilter.h</a>
 , <a class="el" href="_s_d_l__image_filter_8c.html#a540d4625d76bcd03318c2a59ce650fdb">SDL_imageFilter.c</a>
 </li>
-<li>SDL_imageFilterShiftRightUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a23430360ee5ce8031158831a44e83d56">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterSobelX()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a015fe05161b701162d9ecffb01413f1e">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#a2a0e4e259150abbe33bcddb046c367ba">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterSobelXShiftRight()
-: <a class="el" href="_s_d_l__image_filter_8h.html#ab9cc925cd9b135e245936d718b459032">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a0d21af83f0183fcd697324cffe3ab3d7">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a0d21af83f0183fcd697324cffe3ab3d7">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#ab9cc925cd9b135e245936d718b459032">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterSub()
-: <a class="el" href="_s_d_l__image_filter_8h.html#a0e0fb80a3dad33d61a8147c7fb9f529d">SDL_imageFilter.h</a>
-, <a class="el" href="_s_d_l__image_filter_8c.html#a3c01cf8576ea7a0dfc09dbaa953c9287">SDL_imageFilter.c</a>
+: <a class="el" href="_s_d_l__image_filter_8c.html#a3c01cf8576ea7a0dfc09dbaa953c9287">SDL_imageFilter.c</a>
+, <a class="el" href="_s_d_l__image_filter_8h.html#a0e0fb80a3dad33d61a8147c7fb9f529d">SDL_imageFilter.h</a>
 </li>
 <li>SDL_imageFilterSubByte()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a387fb6f0d48cc5d08f37f7f9b92d14b2">SDL_imageFilter.c</a>
-, <a class="el" href="_s_d_l__image_filter_8h.html#af8f4ab4050a0661c7696783ba1a1b12b">SDL_imageFilter.h</a>
+: <a class="el" href="_s_d_l__image_filter_8h.html#af8f4ab4050a0661c7696783ba1a1b12b">SDL_imageFilter.h</a>
+, <a class="el" href="_s_d_l__image_filter_8c.html#a387fb6f0d48cc5d08f37f7f9b92d14b2">SDL_imageFilter.c</a>
 </li>
 <li>SDL_imageFilterSubByteMMX()
 : <a class="el" href="_s_d_l__image_filter_8c.html#a657e128016cc448778007d8b6475dd65">SDL_imageFilter.c</a>
 </li>
-<li>SDL_imageFilterSubMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#a45d54d410e677d32ef33ef6226e9ea12">SDL_imageFilter.c</a>
-</li>
 <li>SDL_imageFilterSubUint()
 : <a class="el" href="_s_d_l__image_filter_8c.html#abb343ef95e22945e1d4d648b2e176e64">SDL_imageFilter.c</a>
 , <a class="el" href="_s_d_l__image_filter_8h.html#ae2f3c5992701bded7c2d256bbbfb403f">SDL_imageFilter.h</a>
 </li>
-<li>SDL_imageFilterSubUintMMX()
-: <a class="el" href="_s_d_l__image_filter_8c.html#acfb143905b751680650576e75847f9c1">SDL_imageFilter.c</a>
-</li>
 <li>SDL_initFramerate()
 : <a class="el" href="_s_d_l__framerate_8c.html#a444ebaaaa6b1ceeafa921562bdab1a44">SDL_framerate.c</a>
 , <a class="el" href="_s_d_l__framerate_8h.html#a3ca69231486837c809fdcbe5b0a10787">SDL_framerate.h</a>
@@ -364,16 +289,16 @@
 , <a class="el" href="_s_d_l__framerate_8h.html#a186ef8e6b1ee4ab36e05b162545fb0e4">SDL_framerate.h</a>
 </li>
 <li>shrinkSurface()
-: <a class="el" href="_s_d_l__rotozoom_8h.html#a7a7ac2cc0d79d282b5a4c76143b7e7a9">SDL_rotozoom.h</a>
-, <a class="el" href="_s_d_l__rotozoom_8c.html#aad3bf0cd89cc39ff874ffa778fa1495d">SDL_rotozoom.c</a>
+: <a class="el" href="_s_d_l__rotozoom_8c.html#aad3bf0cd89cc39ff874ffa778fa1495d">SDL_rotozoom.c</a>
+, <a class="el" href="_s_d_l__rotozoom_8h.html#a7a7ac2cc0d79d282b5a4c76143b7e7a9">SDL_rotozoom.h</a>
 </li>
 <li>stringColor()
 : <a class="el" href="_s_d_l__gfx_primitives_8h.html#a96b6a43c6ef4753996e33bb7fea483bc">SDL_gfxPrimitives.h</a>
 , <a class="el" href="_s_d_l__gfx_primitives_8c.html#a62d2ba55abc7673f2dfa29e6bbffefdf">SDL_gfxPrimitives.c</a>
 </li>
 <li>stringRGBA()
-: <a class="el" href="_s_d_l__gfx_primitives_8h.html#a769833ae414222099783a9b69bed4009">SDL_gfxPrimitives.h</a>
-, <a class="el" href="_s_d_l__gfx_primitives_8c.html#a6ca71826e311bdd9acf13b009256aa1c">SDL_gfxPrimitives.c</a>
+: <a class="el" href="_s_d_l__gfx_primitives_8c.html#a6ca71826e311bdd9acf13b009256aa1c">SDL_gfxPrimitives.c</a>
+, <a class="el" href="_s_d_l__gfx_primitives_8h.html#a769833ae414222099783a9b69bed4009">SDL_gfxPrimitives.h</a>
 </li>
 </ul>
 </div><!-- contents -->
diff --git a/Docs/html/globals_func_0x74.html b/Docs/html/globals_func_0x74.html
index 4b016e6..061ff81 100644
--- a/Docs/html/globals_func_0x74.html
+++ b/Docs/html/globals_func_0x74.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x76.html b/Docs/html/globals_func_0x76.html
index 50b7c68..29824fe 100644
--- a/Docs/html/globals_func_0x76.html
+++ b/Docs/html/globals_func_0x76.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_func_0x7a.html b/Docs/html/globals_func_0x7a.html
index 49e1741..0bf598e 100644
--- a/Docs/html/globals_func_0x7a.html
+++ b/Docs/html/globals_func_0x7a.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_type.html b/Docs/html/globals_type.html
index 05aadaf..a0cc5d2 100755
--- a/Docs/html/globals_type.html
+++ b/Docs/html/globals_type.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/globals_vars.html b/Docs/html/globals_vars.html
index 6538bc4..24a4b42 100644
--- a/Docs/html/globals_vars.html
+++ b/Docs/html/globals_vars.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
diff --git a/Docs/html/index.html b/Docs/html/index.html
index a67eb0e..11bd794 100644
--- a/Docs/html/index.html
+++ b/Docs/html/index.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -113,7 +113,7 @@ Unix/Linux</h4>
 <p>The library compiles and is tested for a Linux target (gcc compiler) via the the usual configure;make;make install sequence.</p>
 <h4><a class="anchor" id="platformwindows"></a>
 Windows</h4>
-<p>A Win32 target is available (VisualC6/7/8/9, mingw32, xmingw32 cross-compiler). The SDL_gfx.sln will open VS2010 (the old VS2008 .sln is also still included) including express versions.</p>
+<p>A Win32 target is available (VisualC6/7/8/9, mingw32, xmingw32 cross-compiler). The SDL_gfx_VS2010.sln will open VS2010 (the old VS2008 .sln is also still included) including express versions.</p>
 <p>See "Other Builds" for additional makefiles (may be out of date).</p>
 <p>When using the cross-compiler (available on the author's homepage, very out of date), the build process generates .DLLs. You can use the command line 'LIB.EXE' tool to generate VC6 compatible .LIB files for linking purposes.</p>
 <h4><a class="anchor" id="platformosx"></a>
@@ -168,7 +168,7 @@ No-MMX</h3>
 <div class="fragment"><pre class="fragment">        ./configure --disable-mmx && make
 </pre></div><h3><a class="anchor" id="vs9"></a>
 Windows (VC9, VS2010)</h3>
-<p>Open SDL_gfx.sln solution file and review README.</p>
+<p>Open SDL_gfx_VS2010.sln solution file and review README.</p>
 <h3><a class="anchor" id="vs8"></a>
 Windows (VC8, VS2008)</h3>
 <p>Open SDL_gfx_VS2008.sln solution file and review README.</p>
@@ -378,14 +378,31 @@ Contributors</h2>
 <ul>
 <li>Pixel blend routine patches contributed by mitja at lxnav dot com - thanks Mitja.</li>
 </ul>
+<ul>
+<li>ImageFilter patches contributed by beuc at beuc dot net - thanks Sylvain.</li>
+</ul>
+<ul>
+<li>Bug reports contributed by Yannick dot Guesnet at univ-rouen dot fr -</li>
+<li>thanks Yannick.</li>
+</ul>
 <h2><a class="anchor" id="changelog_sec"></a>
 Change Log</h2>
 <div class="fragment"><pre class="fragment">CHANGES/VERSION
 ===============
 
-Ver 2.0.24 -
+Ver 2.0.25 - Sun, Oct 27, 2013  3:08:15 PM
+* Added patch for 32- and 64-bit GCC-compiled MMX support
+  (contributed by Sylvain Beucler, backported from SDL2_gfx)
+* Fixed bug in _aaline when x1>x2 and dy==0 (found by Yannick 
+  Guesnet - thanks!)
+* Updated documentation.
+* Fixed header in SDL_gfxBlitFunc.h (reported by Jaders77 on
+  sourceforge - thanks!)
+
+Ver 2.0.24 - Sun, Jul 22, 2012  9:27:29 AM
 * Removed some missed LGPL references
 * Fixed thick line swap bug (patch contributed by Thien-Thi)
+  and added accuracy test for thick line
 * Improved interface to SDL_framerateDelay (idea contributed by
   Neil)
 * Fixed dramerate modules dependency on SDL_GetTicks returning a 
diff --git a/Docs/html/struct_f_p_smanager.html b/Docs/html/struct_f_p_smanager.html
index 1544ebc..142b923 100644
--- a/Docs/html/struct_f_p_smanager.html
+++ b/Docs/html/struct_f_p_smanager.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -153,7 +153,7 @@ Data Fields</h2></td></tr>
 </div>
 </div>
 <hr/>The documentation for this struct was generated from the following file:<ul>
-<li>C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__framerate_8h_source.html">SDL_framerate.h</a></li>
+<li>I:/Sources/sdlgfx/<a class="el" href="_s_d_l__framerate_8h_source.html">SDL_framerate.h</a></li>
 </ul>
 </div><!-- contents -->
 
diff --git a/Docs/html/struct_s_d_l__gfx_blit_info.html b/Docs/html/struct_s_d_l__gfx_blit_info.html
index 6b46b9e..1c82d62 100644
--- a/Docs/html/struct_s_d_l__gfx_blit_info.html
+++ b/Docs/html/struct_s_d_l__gfx_blit_info.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -265,7 +265,7 @@ Data Fields</h2></td></tr>
 </div>
 </div>
 <hr/>The documentation for this struct was generated from the following file:<ul>
-<li>C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_blit_func_8h_source.html">SDL_gfxBlitFunc.h</a></li>
+<li>I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_blit_func_8h_source.html">SDL_gfxBlitFunc.h</a></li>
 </ul>
 </div><!-- contents -->
 
diff --git a/Docs/html/struct_s_d_l__gfx_bresenham_iterator.html b/Docs/html/struct_s_d_l__gfx_bresenham_iterator.html
index 456b149..2c56893 100644
--- a/Docs/html/struct_s_d_l__gfx_bresenham_iterator.html
+++ b/Docs/html/struct_s_d_l__gfx_bresenham_iterator.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -215,7 +215,7 @@ Data Fields</h2></td></tr>
 </div>
 </div>
 <hr/>The documentation for this struct was generated from the following file:<ul>
-<li>C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8c_source.html">SDL_gfxPrimitives.c</a></li>
+<li>I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8c_source.html">SDL_gfxPrimitives.c</a></li>
 </ul>
 </div><!-- contents -->
 
diff --git a/Docs/html/struct_s_d_l__gfx_murphy_iterator.html b/Docs/html/struct_s_d_l__gfx_murphy_iterator.html
index 7f34adf..a9e92b5 100644
--- a/Docs/html/struct_s_d_l__gfx_murphy_iterator.html
+++ b/Docs/html/struct_s_d_l__gfx_murphy_iterator.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -391,7 +391,7 @@ Data Fields</h2></td></tr>
 </div>
 </div>
 <hr/>The documentation for this struct was generated from the following file:<ul>
-<li>C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8c_source.html">SDL_gfxPrimitives.c</a></li>
+<li>I:/Sources/sdlgfx/<a class="el" href="_s_d_l__gfx_primitives_8c_source.html">SDL_gfxPrimitives.c</a></li>
 </ul>
 </div><!-- contents -->
 
diff --git a/Docs/html/structt_color_r_g_b_a.html b/Docs/html/structt_color_r_g_b_a.html
index 573dc17..a599a4f 100644
--- a/Docs/html/structt_color_r_g_b_a.html
+++ b/Docs/html/structt_color_r_g_b_a.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -135,7 +135,7 @@ Data Fields</h2></td></tr>
 </div>
 </div>
 <hr/>The documentation for this struct was generated from the following file:<ul>
-<li>C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8c_source.html">SDL_rotozoom.c</a></li>
+<li>I:/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8c_source.html">SDL_rotozoom.c</a></li>
 </ul>
 </div><!-- contents -->
 
diff --git a/Docs/html/structt_color_y.html b/Docs/html/structt_color_y.html
index fd2c29c..dd7fa22 100644
--- a/Docs/html/structt_color_y.html
+++ b/Docs/html/structt_color_y.html
@@ -23,7 +23,7 @@
   
   <td style="padding-left: 0.5em;">
    <div id="projectname">SDL_gfx
-    <span id="projectnumber">2.0.24</span>
+    <span id="projectnumber">2.0.25</span>
    </div>
    
   </td>
@@ -87,7 +87,7 @@ Data Fields</h2></td></tr>
 </div>
 </div>
 <hr/>The documentation for this struct was generated from the following file:<ul>
-<li>C:/Users/Andreas Schiffler/Desktop/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8c_source.html">SDL_rotozoom.c</a></li>
+<li>I:/Sources/sdlgfx/<a class="el" href="_s_d_l__rotozoom_8c_source.html">SDL_rotozoom.c</a></li>
 </ul>
 </div><!-- contents -->
 
diff --git a/INSTALL b/INSTALL
index 7d1c323..a1e89e1 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,8 +1,8 @@
 Installation Instructions
 *************************
 
-Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
-2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
+Inc.
 
    Copying and distribution of this file, with or without modification,
 are permitted in any medium without royalty provided the copyright
@@ -226,6 +226,11 @@ order to use an ANSI C compiler:
 
 and if that doesn't work, install pre-built binaries of GCC for HP-UX.
 
+   HP-UX `make' updates targets which have the same time stamps as
+their prerequisites, which makes it generally unusable when shipped
+generated files such as `configure' are involved.  Use GNU `make'
+instead.
+
    On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
 parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
 a workaround.  If GNU CC is not installed, it is therefore recommended
diff --git a/Makefile.am b/Makefile.am
index adb49c8..73b8882 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -31,4 +31,5 @@ DISTCLEANFILES = *~ *~c *~h *.cross.cache inc
 
 distclean-local:
 	-rm -rf autom4te.cache
+	-rm -f *.suo *.sdf
 	
diff --git a/Makefile.in b/Makefile.in
index 1928b0f..71a39a9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -20,6 +20,23 @@
 
 
 VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
 pkgdatadir = $(datadir)/@PACKAGE@
 pkgincludedir = $(includedir)/@PACKAGE@
 pkglibdir = $(libdir)/@PACKAGE@
@@ -76,6 +93,12 @@ am__nobase_list = $(am__nobase_strip_setup); \
 am__base_list = \
   sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
   sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
 am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)" \
 	"$(DESTDIR)$(libSDL_gfxincludedir)"
 LTLIBRARIES = $(lib_LTLIBRARIES)
@@ -101,6 +124,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
 	$(LDFLAGS) -o $@
 SOURCES = $(libSDL_gfx_la_SOURCES)
 DIST_SOURCES = $(libSDL_gfx_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
 DATA = $(pkgconfig_DATA)
 HEADERS = $(libSDL_gfxinclude_HEADERS)
 ETAGS = etags
@@ -109,12 +137,16 @@ DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 distdir = $(PACKAGE)-$(VERSION)
 top_distdir = $(distdir)
 am__remove_distdir = \
-  { test ! -d "$(distdir)" \
-    || { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
-         && rm -fr "$(distdir)"; }; }
+  if test -d "$(distdir)"; then \
+    find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+      && rm -rf "$(distdir)" \
+      || { sleep 5 && rm -rf "$(distdir)"; }; \
+  else :; fi
 DIST_ARCHIVES = $(distdir).tar.gz
 GZIP_ENV = --best
 distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+  | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
 distcleancheck_listfiles = find . -type f -print
 ACLOCAL = @ACLOCAL@
 AMTAR = @AMTAR@
@@ -286,7 +318,7 @@ all: all-am
 
 .SUFFIXES:
 .SUFFIXES: .c .lo .o .obj
-am--refresh:
+am--refresh: Makefile
 	@:
 $(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
 	@for dep in $?; do \
@@ -324,7 +356,6 @@ SDL_gfx.pc: $(top_builddir)/config.status $(srcdir)/SDL_gfx.pc.in
 	cd $(top_builddir) && $(SHELL) ./config.status $@
 install-libLTLIBRARIES: $(lib_LTLIBRARIES)
 	@$(NORMAL_INSTALL)
-	test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
 	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
 	list2=; for p in $$list; do \
 	  if test -f $$p; then \
@@ -332,6 +363,8 @@ install-libLTLIBRARIES: $(lib_LTLIBRARIES)
 	  else :; fi; \
 	done; \
 	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
 	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
 	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
 	}
@@ -353,7 +386,7 @@ clean-libLTLIBRARIES:
 	  echo "rm -f \"$${dir}/so_locations\""; \
 	  rm -f "$${dir}/so_locations"; \
 	done
-libSDL_gfx.la: $(libSDL_gfx_la_OBJECTS) $(libSDL_gfx_la_DEPENDENCIES) 
+libSDL_gfx.la: $(libSDL_gfx_la_OBJECTS) $(libSDL_gfx_la_DEPENDENCIES) $(EXTRA_libSDL_gfx_la_DEPENDENCIES) 
 	$(libSDL_gfx_la_LINK) -rpath $(libdir) $(libSDL_gfx_la_OBJECTS) $(libSDL_gfx_la_LIBADD) $(LIBS)
 
 mostlyclean-compile:
@@ -399,8 +432,11 @@ distclean-libtool:
 	-rm -f libtool config.lt
 install-pkgconfigDATA: $(pkgconfig_DATA)
 	@$(NORMAL_INSTALL)
-	test -z "$(pkgconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)"
 	@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \
+	fi; \
 	for p in $$list; do \
 	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
 	  echo "$$d$$p"; \
@@ -414,13 +450,14 @@ uninstall-pkgconfigDATA:
 	@$(NORMAL_UNINSTALL)
 	@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
 	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
-	test -n "$$files" || exit 0; \
-	echo " ( cd '$(DESTDIR)$(pkgconfigdir)' && rm -f" $$files ")"; \
-	cd "$(DESTDIR)$(pkgconfigdir)" && rm -f $$files
+	dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir)
 install-libSDL_gfxincludeHEADERS: $(libSDL_gfxinclude_HEADERS)
 	@$(NORMAL_INSTALL)
-	test -z "$(libSDL_gfxincludedir)" || $(MKDIR_P) "$(DESTDIR)$(libSDL_gfxincludedir)"
 	@list='$(libSDL_gfxinclude_HEADERS)'; test -n "$(libSDL_gfxincludedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libSDL_gfxincludedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libSDL_gfxincludedir)" || exit 1; \
+	fi; \
 	for p in $$list; do \
 	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
 	  echo "$$d$$p"; \
@@ -434,9 +471,7 @@ uninstall-libSDL_gfxincludeHEADERS:
 	@$(NORMAL_UNINSTALL)
 	@list='$(libSDL_gfxinclude_HEADERS)'; test -n "$(libSDL_gfxincludedir)" || list=; \
 	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
-	test -n "$$files" || exit 0; \
-	echo " ( cd '$(DESTDIR)$(libSDL_gfxincludedir)' && rm -f" $$files ")"; \
-	cd "$(DESTDIR)$(libSDL_gfxincludedir)" && rm -f $$files
+	dir='$(DESTDIR)$(libSDL_gfxincludedir)'; $(am__uninstall_files_from_dir)
 
 ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
 	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
@@ -534,7 +569,11 @@ dist-gzip: distdir
 	$(am__remove_distdir)
 
 dist-bzip2: distdir
-	tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
+	tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+	$(am__remove_distdir)
+
+dist-lzip: distdir
+	tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
 	$(am__remove_distdir)
 
 dist-lzma: distdir
@@ -542,7 +581,7 @@ dist-lzma: distdir
 	$(am__remove_distdir)
 
 dist-xz: distdir
-	tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
+	tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
 	$(am__remove_distdir)
 
 dist-tarZ: distdir
@@ -573,6 +612,8 @@ distcheck: dist
 	  bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
 	*.tar.lzma*) \
 	  lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
+	*.tar.lz*) \
+	  lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
 	*.tar.xz*) \
 	  xz -dc $(distdir).tar.xz | $(am__untar) ;;\
 	*.tar.Z*) \
@@ -582,7 +623,7 @@ distcheck: dist
 	*.zip*) \
 	  unzip $(distdir).zip ;;\
 	esac
-	chmod -R a-w $(distdir); chmod a+w $(distdir)
+	chmod -R a-w $(distdir); chmod u+w $(distdir)
 	mkdir $(distdir)/_build
 	mkdir $(distdir)/_inst
 	chmod a-w $(distdir)
@@ -592,6 +633,7 @@ distcheck: dist
 	  && am__cwd=`pwd` \
 	  && $(am__cd) $(distdir)/_build \
 	  && ../configure --srcdir=.. --prefix="$$dc_install_base" \
+	    $(AM_DISTCHECK_CONFIGURE_FLAGS) \
 	    $(DISTCHECK_CONFIGURE_FLAGS) \
 	  && $(MAKE) $(AM_MAKEFLAGS) \
 	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
@@ -620,8 +662,16 @@ distcheck: dist
 	  list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
 	  sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
 distuninstallcheck:
-	@$(am__cd) '$(distuninstallcheck_dir)' \
-	&& test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
+	@test -n '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: trying to run $@ with an empty' \
+	       '$$(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	$(am__cd) '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
 	   || { echo "ERROR: files left after uninstall:" ; \
 	        if test -n "$(DESTDIR)"; then \
 	          echo "  (check DESTDIR support)"; \
@@ -654,10 +704,15 @@ install-am: all-am
 
 installcheck: installcheck-am
 install-strip:
-	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	  `test -z '$(STRIP)' || \
-	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
 mostlyclean-generic:
 
 clean-generic:
@@ -750,26 +805,27 @@ uninstall-am: uninstall-libLTLIBRARIES \
 
 .PHONY: CTAGS GTAGS all all-am am--refresh check check-am clean \
 	clean-generic clean-libLTLIBRARIES clean-libtool ctags dist \
-	dist-all dist-bzip2 dist-gzip dist-lzma dist-shar dist-tarZ \
-	dist-xz dist-zip distcheck distclean distclean-compile \
-	distclean-generic distclean-libtool distclean-local \
-	distclean-tags distcleancheck distdir distuninstallcheck dvi \
-	dvi-am html html-am info info-am install install-am \
-	install-data install-data-am install-dvi install-dvi-am \
-	install-exec install-exec-am install-html install-html-am \
-	install-info install-info-am install-libLTLIBRARIES \
-	install-libSDL_gfxincludeHEADERS install-man install-pdf \
-	install-pdf-am install-pkgconfigDATA install-ps install-ps-am \
-	install-strip installcheck installcheck-am installdirs \
-	maintainer-clean maintainer-clean-generic mostlyclean \
-	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
-	pdf pdf-am ps ps-am tags uninstall uninstall-am \
-	uninstall-libLTLIBRARIES uninstall-libSDL_gfxincludeHEADERS \
-	uninstall-pkgconfigDATA
+	dist-all dist-bzip2 dist-gzip dist-lzip dist-lzma dist-shar \
+	dist-tarZ dist-xz dist-zip distcheck distclean \
+	distclean-compile distclean-generic distclean-libtool \
+	distclean-local distclean-tags distcleancheck distdir \
+	distuninstallcheck dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am \
+	install-libLTLIBRARIES install-libSDL_gfxincludeHEADERS \
+	install-man install-pdf install-pdf-am install-pkgconfigDATA \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags uninstall uninstall-am uninstall-libLTLIBRARIES \
+	uninstall-libSDL_gfxincludeHEADERS uninstall-pkgconfigDATA
 
 
 distclean-local:
 	-rm -rf autom4te.cache
+	-rm -f *.suo *.sdf
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/README b/README
index cc8409d..6036486 100644
--- a/README
+++ b/README
@@ -133,8 +133,8 @@ the usual configure;make;make install sequence.
 \subsubsection platformwindows Windows
  
 A Win32 target is available (VisualC6/7/8/9, mingw32, xmingw32 cross-compiler).
-The SDL_gfx.sln will open VS2010 (the old VS2008 .sln is also still included) 
-including express versions. 
+The SDL_gfx_VS2010.sln will open VS2010 (the old VS2008 .sln is also still included)
+including express versions.
 
 See "Other Builds" for additional makefiles (may be out of date).
 
@@ -238,7 +238,7 @@ i.e. to build on MacOSX 10.3+ use:
 
 \subsection vs9 Windows (VC9, VS2010)
 
-Open SDL_gfx.sln solution file and review README.
+Open SDL_gfx_VS2010.sln solution file and review README.
 
 \subsection vs8 Windows (VC8, VS2008)
 
@@ -493,6 +493,11 @@ See the source code .c files for some sample code and implementation hints.
 - Pixel blend routine patches contributed by mitja at lxnav dot com -
   thanks Mitja.
 
+- ImageFilter patches contributed by beuc at beuc dot net - thanks Sylvain.
+
+- Bug reports contributed by Yannick dot Guesnet at univ-rouen dot fr -
+- thanks Yannick.
+
 \section changelog_sec Change Log
 
 \verbinclude ChangeLog
diff --git a/SDL_gfx.spec b/SDL_gfx.spec
index 165b531..dde6bba 100644
--- a/SDL_gfx.spec
+++ b/SDL_gfx.spec
@@ -1,5 +1,5 @@
 %define prefix  %{_prefix}
-%define version 2.0.24
+%define version 2.0.25
 %define release 1
 %define _unpackaged_files_terminate_build 0
 
@@ -10,7 +10,7 @@ Release: %{release}
 License: ZLIB
 Group: System Environment/Libraries
 Prefix: %{prefix}
-Source: http://www.ferzkopp.net/Software/SDL_gfx-2.0/SDL_gfx-2.0.24.tar.gz
+Source: http://www.ferzkopp.net/Software/SDL_gfx-2.0/SDL_gfx-2.0.25.tar.gz
 Packager: Danny Sung <dannys at mail.com>
 Vendor: Andreas Schiffler <aschiffler at ferzkopp.net>
 BuildRoot: /tmp/%{name}-root-%{version}
diff --git a/SDL_gfxBlitFunc.h b/SDL_gfxBlitFunc.h
index 9764920..6491aad 100644
--- a/SDL_gfxBlitFunc.h
+++ b/SDL_gfxBlitFunc.h
@@ -38,8 +38,8 @@ extern    "C" {
 #include <stdio.h>
 #include <stdlib.h>
 
-#include <SDL.h>
-#include <SDL_video.h>
+#include "SDL.h"
+#include "SDL_video.h"
 
 
 	extern const unsigned int GFX_ALPHA_ADJUST_ARRAY[256];
diff --git a/SDL_gfxPrimitives.c b/SDL_gfxPrimitives.c
index b59995c..ae8b998 100644
--- a/SDL_gfxPrimitives.c
+++ b/SDL_gfxPrimitives.c
@@ -2659,7 +2659,7 @@ int _aalineColor(SDL_Surface * dst, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2,
 		{
 			return (hlineColor(dst, x1, x2, y1, color));
 		} else {
-			if (dx>0) {
+			if (dx!=0) {
 				return (hlineColor(dst, xx0, xx0+dx, y1, color));
 			} else {
 				return (pixelColor(dst, x1, y1, color));
diff --git a/SDL_gfxPrimitives.h b/SDL_gfxPrimitives.h
index c4ac6dd..c10ce5c 100644
--- a/SDL_gfxPrimitives.h
+++ b/SDL_gfxPrimitives.h
@@ -46,7 +46,7 @@ extern "C" {
 
 #define SDL_GFXPRIMITIVES_MAJOR	2
 #define SDL_GFXPRIMITIVES_MINOR	0
-#define SDL_GFXPRIMITIVES_MICRO	24
+#define SDL_GFXPRIMITIVES_MICRO	25
 
 
 	/* ---- Function Prototypes */
diff --git a/SDL_gfx_VS2008.vcproj b/SDL_gfx_VS2008.vcproj
index 4cc09f9..cb84864 100644
--- a/SDL_gfx_VS2008.vcproj
+++ b/SDL_gfx_VS2008.vcproj
@@ -41,7 +41,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories=""..\SDL-1.2.14\include""
+				AdditionalIncludeDirectories=""..\SDL-1.2.15\include""
 				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;DLL_EXPORT;USE_MMX"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -63,7 +63,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="SDL.lib"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="..\SDL-1.2.14\VisualC\SDL\Debug"
+				AdditionalLibraryDirectories="..\SDL-1.2.15\VisualC\SDL\Debug"
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				RandomizedBaseAddress="1"
@@ -91,7 +91,7 @@
 			/>
 			<Tool
 				Name="VCPostBuildEventTool"
-				CommandLine="copy "$(TargetPath)" "$(SolutionDir)\Test\$(ConfigurationName)"&#x0D;&#x0A;copy "$(ProjectDir)\..\SDL-1.2.14\VisualC\SDL\Debug\SDL.dll" "$(SolutionDir)\Test\$(ConfigurationName)"&#x0D;&#x0A;"
+				CommandLine="copy "$(TargetPath)" "$(SolutionDir)\Test\$(ConfigurationName)"&#x0D;&#x0A;copy "$(ProjectDir)\..\SDL-1.2.15\VisualC\SDL\Debug\SDL.dll" "$(SolutionDir)\Test\$(ConfigurationName)"&#x0D;&#x0A;"
 			/>
 		</Configuration>
 		<Configuration
diff --git a/SDL_gfx.sln b/SDL_gfx_VS2010.sln
similarity index 83%
rename from SDL_gfx.sln
rename to SDL_gfx_VS2010.sln
index 4b7bf2c..3d3ca95 100644
--- a/SDL_gfx.sln
+++ b/SDL_gfx_VS2010.sln
@@ -3,32 +3,27 @@ Microsoft Visual Studio Solution File, Format Version 11.00
 # Visual Studio 2010
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{0E209CE5-31A3-4039-9437-7F74DC13D8C8}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SDL_gfx", "SDL_gfx.vcxproj", "{AE22EFD3-6E6D-48C0-AF3D-EF190406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SDL_gfx", "SDL_gfx_VS2010.vcxproj", "{AE22EFD3-6E6D-48C0-AF3D-EF190406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestGfxPrimitives", "Test\TestGfxPrimitives.vcxproj", "{AE22EFD3-6F6D-48C0-AF3D-EF190406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestGfxPrimitives", "Test\TestGfxPrimitives_VS2010.vcxproj", "{AE22EFD3-6F6D-48C0-AF3D-EF190406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestABGR", "Test\TestABGR.vcxproj", "{AE22EFD3-7F7D-48C0-AF3D-EF190406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestABGR", "Test\TestABGR_VS2010.vcxproj", "{AE22EFD3-7F7D-48C0-AF3D-EF190406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestFonts", "Test\TestFonts.vcxproj", "{AE33EFD3-6F6D-48C0-AF3D-EF190406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestFonts", "Test\TestFonts_VS2010.vcxproj", "{AE33EFD3-6F6D-48C0-AF3D-EF190406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestFramerate", "Test\TestFramerate.vcxproj", "{AE22EFD3-6F6D-21C0-AF2D-EF190406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestFramerate", "Test\TestFramerate_VS2010.vcxproj", "{AE22EFD3-6F6D-21C0-AF2D-EF190406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestGfxBlit", "Test\TestGfxBlit.vcxproj", "{AE22EFD3-6F6D-48C0-AF3D-EF112306BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestGfxBlit", "Test\TestGfxBlit_VS2010.vcxproj", "{AE22EFD3-6F6D-48C0-AF3D-EF112306BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestGfxTexture", "Test\TestGfxTexture.vcxproj", "{AE22EFD3-6F6D-32C0-AA3D-EF190406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestGfxTexture", "Test\TestGfxTexture_VS2010.vcxproj", "{AE22EFD3-6F6D-32C0-AA3D-EF190406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestImageFilter", "Test\TestImageFilter.vcxproj", "{AE22AFD3-6F6D-48C0-AF3D-EF190406AAAA}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestImageFilter", "Test\TestImageFilter_VS2010.vcxproj", "{AE22AFD3-6F6D-48C0-AF3D-EF190406AAAA}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestRotozoom", "Test\TestRotozoom.vcxproj", "{AE98EFD3-6F6D-48C0-AF3D-EF560406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestRotozoom", "Test\TestRotozoom_VS2010.vcxproj", "{AE98EFD3-6F6D-48C0-AF3D-EF560406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestShrink", "Test\TestShrink.vcxproj", "{AE22EFD3-6161-48C0-123D-EF190406BEDC}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestShrink", "Test\TestShrink_VS2010.vcxproj", "{AE22EFD3-6161-48C0-123D-EF190406BEDC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LaplaceRelaxation", "Test\LaplaceRelaxation.vcxproj", "{AE9876D3-6F6D-48C0-3DAF-EF578406BEDC}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Docs", "Docs", "{582C0DBA-F50D-4F96-8EB4-07EBA29F9EA3}"
-	ProjectSection(SolutionItems) = preProject
-		Docs\html.doxyfile = Docs\html.doxyfile
-	EndProjectSection
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LaplaceRelaxation", "Test\LaplaceRelaxation_VS2010.vcxproj", "{AE9876D3-6F6D-48C0-3DAF-EF578406BEDC}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
diff --git a/SDL_gfx_VS2010.vcxproj b/SDL_gfx_VS2010.vcxproj
new file mode 100755
index 0000000..766f838
--- /dev/null
+++ b/SDL_gfx_VS2010.vcxproj
@@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectName>SDL_gfx</ProjectName>
+    <ProjectGuid>{AE22EFD3-6E6D-48C0-AF3D-EF190406BEDC}</ProjectGuid>
+    <RootNamespace>SDL_gfx</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;DLL_EXPORT;USE_MMX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>SDL.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>..\SDL-1.2.15\VisualC\SDL\Debug;..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <RandomizedBaseAddress>false</RandomizedBaseAddress>
+      <DataExecutionPrevention>
+      </DataExecutionPrevention>
+      <ImportLibrary>.\Debug\SDL_gfx.lib</ImportLibrary>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <PostBuildEvent>
+      <Command>copy "$(TargetPath)" "$(SolutionDir)\Test\$(Configuration)"
+copy "$(ProjectDir)\..\SDL-1.2.15\VisualC\SDL\Debug\SDL.dll" "$(SolutionDir)\Test\$(Configuration)"
+</Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;DLL_EXPORT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>SDL.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>..\SDL-1.2.15\VisualC\SDL\Debug;..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="SDL_framerate.c" />
+    <ClCompile Include="SDL_gfxBlitFunc.c" />
+    <ClCompile Include="SDL_gfxPrimitives.c" />
+    <ClCompile Include="SDL_imageFilter.c" />
+    <ClCompile Include="SDL_rotozoom.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="SDL_framerate.h" />
+    <ClInclude Include="SDL_gfxBlitFunc.h" />
+    <ClInclude Include="SDL_gfxPrimitives.h" />
+    <ClInclude Include="SDL_gfxPrimitives_font.h" />
+    <ClInclude Include="SDL_imageFilter.h" />
+    <ClInclude Include="SDL_rotozoom.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuildStep Include="ChangeLog">
+      <FileType>Document</FileType>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+    </CustomBuildStep>
+    <CustomBuildStep Include="README">
+      <FileType>Document</FileType>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+    </CustomBuildStep>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SDL_imageFilter.c b/SDL_imageFilter.c
index 67903e3..f3059c9 100644
--- a/SDL_imageFilter.c
+++ b/SDL_imageFilter.c
@@ -3,6 +3,7 @@
 SDL_imageFilter.c: byte-image "filter" routines
 
 Copyright (C) 2001-2012  Andreas Schiffler
+Copyright (C) 2013  Sylvain Beucler
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -41,6 +42,14 @@ him for his work.
 #include <stdlib.h>
 #include <string.h>
 
+/* Use GCC intrinsics if available: they support both i386 and x86_64,
+   provide ASM-grade performances, and lift the PUSHA/POPA issues. */
+#ifdef __GNUC__
+#  ifdef USE_MMX
+#    include <mmintrin.h>
+#  endif
+#endif
+#include <SDL_cpuinfo.h>
 #include "SDL_imageFilter.h"
 
 /*!
@@ -61,56 +70,18 @@ static int SDL_imageFilterUseMMX = 1;
 #endif
 
 /*!
-\brief Internal function returning the CPU flags. 
-
-\returns Flags of system CPU.
-*/
-unsigned int _cpuFlags()
-{
-	unsigned int flags = 0;
-
-#ifdef USE_MMX
-#if !defined(GCC__)
-	__asm
-	{
-		pusha
-			mov eax, 1
-			cpuid	/* get CPU ID flag */
-			mov flags,edx	/* move result to mmx_bit */
-			popa
-	}
-#else
-	asm volatile ("pusha		     \n\t" "mov    %1, %%eax     \n\t"	/* request feature flag */
-		"cpuid                \n\t"	/* get CPU ID flag */
-		"mov    %%edx, %0     \n\t"	/* move result to mmx_bit */
-		"popa		     \n\t":"=m" (flags)	/* %0 */
-		:"i"(0x00000001)	/* %1 */
-		);
-#endif
-#endif
-
-	return (flags);
-}
-
-/*!
 \brief MMX detection routine (with override flag). 
 
 \returns 1 of MMX was detected, 0 otherwise.
 */
 int SDL_imageFilterMMXdetect(void)
 {
-	unsigned int mmx_bit;
-
 	/* Check override flag */
 	if (SDL_imageFilterUseMMX == 0) {
 		return (0);
 	}
 
-	mmx_bit = _cpuFlags();
-	mmx_bit &= 0x00800000;
-	mmx_bit = (mmx_bit && 0x00800000);
-
-	return (int)(mmx_bit);
+        return SDL_HasMMX();
 }
 
 /*!
@@ -141,7 +112,7 @@ void SDL_imageFilterMMXon()
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterAddMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterAddMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -167,26 +138,18 @@ L1010:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov          %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"    	/* load 8 bytes from Src1 into mm1 */
-		"paddusb (%%ebx), %%mm1 \n\t"	/* mm1=Src1+Src2 (add 8 bytes with saturation) */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add          $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add          $8, %%edi \n\t" "dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz             1b     \n\t"     /* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_paddusb(*mSrc1, *mSrc2);	/* Src1+Src2 (add 8 bytes with saturation) */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -266,7 +229,7 @@ int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *
 ]
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterMeanMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength,
+static int SDL_imageFilterMeanMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength,
 						   unsigned char *Mask)
 {
 #ifdef USE_MMX
@@ -303,39 +266,25 @@ L21011:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "movl         %4, %%edx \n\t"	/* load Mask address into edx */
-		"movq    (%%edx), %%mm0 \n\t"	/* load Mask into mm0 */
-		"mov          %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1:                      \n\t"
-		"movq    (%%eax), %%mm1 \n\t"	/* load 8 bytes from Src1 into mm1 */
-		"movq    (%%ebx), %%mm2 \n\t"	/* load 8 bytes from Src2 into mm2 */
-		/* --- Byte shift via Word shift --- */
-		"psrlw        $1, %%mm1 \n\t"	/* shift 4 WORDS of mm1 1 bit to the right */
-		"psrlw        $1, %%mm2 \n\t"	/* shift 4 WORDS of mm2 1 bit to the right */
-		/*      "pand      %%mm0, %%mm1 \n\t"    // apply Mask to 8 BYTES of mm1 */
-		".byte     0x0f, 0xdb, 0xc8 \n\t"
-		/*      "pand      %%mm0, %%mm2 \n\t"    // apply Mask to 8 BYTES of mm2 */
-		".byte     0x0f, 0xdb, 0xd0 \n\t" 
-		"paddusb   %%mm2, %%mm1 \n\t"	/* mm1=mm1+mm2 (add 8 bytes with saturation) */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add          $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add          $8, %%edi \n\t" 
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"     /* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength),		/* %3 */
-		"m"(Mask)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 *mMask = (__m64*)Mask;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm1 = *mSrc1,
+		      mm2 = *mSrc2;
+		mm1 = _m_psrlwi(mm1, 1);	/* shift 4 WORDS of mm1 1 bit to the right */
+		mm2 = _m_psrlwi(mm2, 1);	/* shift 4 WORDS of mm2 1 bit to the right */
+		mm1 = _m_pand(mm1, *mMask);	/* apply Mask to 8 BYTES of mm1 */
+		mm2 = _m_pand(mm2, *mMask);	/* apply Mask to 8 BYTES of mm2 */
+		*mDest = _m_paddusb(mm1, mm2);	/* mm1+mm2 (add 8 bytes with saturation) */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();				/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -412,7 +361,7 @@ int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterSubMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterSubMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -438,26 +387,18 @@ L1012:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"     /* load 8 bytes from Src1 into mm1 */
-		"psubusb (%%ebx), %%mm1 \n\t"	/* mm1=Src1-Src2 (sub 8 bytes with saturation) */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b         \n\t"     /* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_psubusb(*mSrc1, *mSrc2);	/* Src1-Src2 (sub 8 bytes with saturation) */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -535,7 +476,7 @@ int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterAbsDiffMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterAbsDiffMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -564,29 +505,20 @@ L1013:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"     /* load 8 bytes from Src1 into mm1 */
-		"movq    (%%ebx), %%mm2 \n\t"	/* load 8 bytes from Src2 into mm2 */
-		"psubusb (%%ebx), %%mm1 \n\t"	/* mm1=Src1-Src2 (sub 8 bytes with saturation) */
-		"psubusb (%%eax), %%mm2 \n\t"	/* mm2=Src2-Src1 (sub 8 bytes with saturation) */
-		"por       %%mm2, %%mm1 \n\t"	/* combine both mm2 and mm1 results */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b        \n\t"      /* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm1 = _m_psubusb(*mSrc2, *mSrc1);	/* Src1-Src2 (sub 8 bytes with saturation) */
+		__m64 mm2 = _m_psubusb(*mSrc1, *mSrc2);	/* Src2-Src1 (sub 8 bytes with saturation) */
+		*mDest = _m_por(mm1, mm2);		/* combine both mm2 and mm1 results */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -662,7 +594,7 @@ int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned ch
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterMultMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterMultMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -707,45 +639,73 @@ L1014:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		"pxor      %%mm0, %%mm0 \n\t"	/* zero mm0 register */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"     /* load 8 bytes from Src1 into mm1 */
-		"movq    (%%ebx), %%mm3 \n\t"	/* load 8 bytes from Src2 into mm3 */
-		"movq      %%mm1, %%mm2 \n\t"	/* copy mm1 into mm2 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy mm3 into mm4  */
-		"punpcklbw %%mm0, %%mm1 \n\t"	/* unpack low  bytes of Src1 into words */
-		"punpckhbw %%mm0, %%mm2 \n\t"	/* unpack high bytes of Src1 into words */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of Src2 into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of Src2 into words */
-		"pmullw    %%mm3, %%mm1 \n\t"	/* mul low  bytes of Src1 and Src2  */
-		"pmullw    %%mm4, %%mm2 \n\t"	/* mul high bytes of Src1 and Src2 */
-		/* Take abs value of the results (signed words) */
-		"movq      %%mm1, %%mm5 \n\t"	/* copy mm1 into mm5 */
-		"movq      %%mm2, %%mm6 \n\t"	/* copy mm2 into mm6 */
-		"psraw       $15, %%mm5 \n\t"	/* fill mm5 words with word sign bit */
-		"psraw       $15, %%mm6 \n\t"	/* fill mm6 words with word sign bit */
-		"pxor      %%mm5, %%mm1 \n\t"	/* take 1's compliment of only neg. words */
-		"pxor      %%mm6, %%mm2 \n\t"	/* take 1's compliment of only neg. words */
-		"psubsw    %%mm5, %%mm1 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"psubsw    %%mm6, %%mm2 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"packuswb  %%mm2, %%mm1 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b        \n\t"      /* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* i386 ASM with constraints: */
+	/* asm volatile ( */
+	/* 	"shr $3, %%ecx \n\t"	/\* counter/8 (MMX loads 8 bytes at a time) *\/ */
+	/* 	"pxor      %%mm0, %%mm0 \n\t"	/\* zero mm0 register *\/ */
+	/* 	".align 16       \n\t"	/\* 16 byte alignment of the loop entry *\/ */
+	/* 	"1: movq (%%eax), %%mm1 \n\t"     /\* load 8 bytes from Src1 into mm1 *\/ */
+	/* 	"movq    (%%ebx), %%mm3 \n\t"	/\* load 8 bytes from Src2 into mm3 *\/ */
+	/* 	"movq      %%mm1, %%mm2 \n\t"	/\* copy mm1 into mm2 *\/ */
+	/* 	"movq      %%mm3, %%mm4 \n\t"	/\* copy mm3 into mm4  *\/ */
+	/* 	"punpcklbw %%mm0, %%mm1 \n\t"	/\* unpack low  bytes of Src1 into words *\/ */
+	/* 	"punpckhbw %%mm0, %%mm2 \n\t"	/\* unpack high bytes of Src1 into words *\/ */
+	/* 	"punpcklbw %%mm0, %%mm3 \n\t"	/\* unpack low  bytes of Src2 into words *\/ */
+	/* 	"punpckhbw %%mm0, %%mm4 \n\t"	/\* unpack high bytes of Src2 into words *\/ */
+	/* 	"pmullw    %%mm3, %%mm1 \n\t"	/\* mul low  bytes of Src1 and Src2  *\/ */
+	/* 	"pmullw    %%mm4, %%mm2 \n\t"	/\* mul high bytes of Src1 and Src2 *\/ */
+	/* 	/\* Take abs value of the results (signed words) *\/ */
+	/* 	"movq      %%mm1, %%mm5 \n\t"	/\* copy mm1 into mm5 *\/ */
+	/* 	"movq      %%mm2, %%mm6 \n\t"	/\* copy mm2 into mm6 *\/ */
+	/* 	"psraw       $15, %%mm5 \n\t"	/\* fill mm5 words with word sign bit *\/ */
+	/* 	"psraw       $15, %%mm6 \n\t"	/\* fill mm6 words with word sign bit *\/ */
+	/* 	"pxor      %%mm5, %%mm1 \n\t"	/\* take 1's compliment of only neg. words *\/ */
+	/* 	"pxor      %%mm6, %%mm2 \n\t"	/\* take 1's compliment of only neg. words *\/ */
+	/* 	"psubsw    %%mm5, %%mm1 \n\t"	/\* add 1 to only neg. words, W-(-1) or W-0 *\/ */
+	/* 	"psubsw    %%mm6, %%mm2 \n\t"	/\* add 1 to only neg. words, W-(-1) or W-0 *\/ */
+	/* 	"packuswb  %%mm2, %%mm1 \n\t"	/\* pack words back into bytes with saturation *\/ */
+	/* 	"movq    %%mm1, (%%edi) \n\t"	/\* store result in Dest *\/ */
+	/* 	"add $8, %%eax \n\t"	/\* increase Src1, Src2 and Dest  *\/ */
+	/* 	"add $8, %%ebx \n\t"	/\* register pointers by 8 *\/ */
+	/* 	"add $8, %%edi \n\t" */
+	/* 	"dec %%ecx     \n\t"	/\* decrease loop counter *\/ */
+	/* 	"jnz 1b        \n\t"	/\* check loop termination, proceed if required *\/ */
+	/* 	"emms          \n\t"	/\* exit MMX state *\/ */
+	/* 	: "+a" (Src1),		/\* load Src1 address into rax, modified by the loop *\/ */
+	/* 	  "+b" (Src2),		/\* load Src2 address into rbx, modified by the loop *\/ */
+	/* 	  "+c" (SrcLength),	/\* load loop counter (SIZE) into rcx, modified by the loop *\/ */
+	/* 	  "+D" (Dest)		/\* load Dest address into rdi, modified by the loop *\/ */
+	/* 	: */
+	/* 	: "memory",		/\* *Dest is modified *\/ */
+        /*           "mm0","mm1","mm2","mm3","mm4","mm5","mm6"	/\* registers modified *\/ */
+	/* ); */
+
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm0 = _m_from_int(0); /* zero mm0 register */
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm1, mm2, mm3, mm4, mm5, mm6;
+		mm1 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+		mm2 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+		mm3 = _m_punpcklbw(*mSrc2, mm0);	/* unpack low  bytes of Src2 into words */
+		mm4 = _m_punpckhbw(*mSrc2, mm0);	/* unpack high bytes of Src2 into words */
+		mm1 = _m_pmullw(mm1, mm3);		/* mul low  bytes of Src1 and Src2  */
+		mm2 = _m_pmullw(mm2, mm4);		/* mul high bytes of Src1 and Src2 */
+		mm5 = _m_psrawi(mm1, 15);		/* fill mm5 words with word sign bit */
+		mm6 = _m_psrawi(mm2, 15);		/* fill mm6 words with word sign bit */
+		mm1 = _m_pxor(mm1, mm5);		/* take 1's compliment of only neg. words */
+		mm2 = _m_pxor(mm2, mm6);		/* take 1's compliment of only neg. words */
+		mm1 = _m_psubsw(mm1, mm5);		/* add 1 to only neg. words, W-(-1) or W-0 */
+		mm2 = _m_psubsw(mm2, mm6);		/* add 1 to only neg. words, W-(-1) or W-0 */
+		*mDest = _m_packuswb(mm1, mm2);		/* pack words back into bytes with saturation */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -850,23 +810,34 @@ L10141:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov %2, %%edx \n\t"	/* load Src1 address into edx */
-		"mov %1, %%esi \n\t"	/* load Src2 address into esi */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
+	/* Note: ~5% gain on i386, less efficient than C on x86_64 */
+	/* Also depends on whether this function is static (?!) */
+	asm volatile (
 		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
+#  if defined(i386)
 		"1:mov  (%%edx), %%al \n\t"      /* load a byte from Src1 */
 		"mulb (%%esi)       \n\t"	/* mul with a byte from Src2 */
 		"mov %%al, (%%edi)  \n\t"       /* move a byte result to Dest */
 		"inc %%edx \n\t"		/* increment Src1, Src2, Dest */
 		"inc %%esi \n\t"		/* pointer registers by one */
-		"inc %%edi \n\t" "dec %%ecx      \n\t"	/* decrease loop counter */
-		"jnz 1b         \n\t"     /* check loop termination, proceed if required */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
+		"inc %%edi \n\t"
+		"dec %%ecx      \n\t"	/* decrease loop counter */
+#  elif defined(__x86_64__)
+		"1:mov  (%%rdx), %%al \n\t"      /* load a byte from Src1 */
+		"mulb (%%rsi)       \n\t"	/* mul with a byte from Src2 */
+		"mov %%al, (%%rdi)  \n\t"       /* move a byte result to Dest */
+		"inc %%rdx \n\t"		/* increment Src1, Src2, Dest */
+		"inc %%rsi \n\t"		/* pointer registers by one */
+		"inc %%rdi \n\t"
+		"dec %%rcx      \n\t"	/* decrease loop counter */
+#  endif
+		"jnz 1b         \n\t"	/* check loop termination, proceed if required */
+		: "+d" (Src1),		/* load Src1 address into edx */
+		  "+S" (Src2),		/* load Src2 address into esi */
+		  "+c" (SrcLength),	/* load loop counter (SIZE) into ecx */
+		  "+D" (Dest)		/* load Dest address into edi */
+		:
+		: "memory", "rax"
 		);
 #endif
 	return (0);
@@ -889,7 +860,6 @@ int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned ch
 {
 	unsigned int i, istart;
 	unsigned char *cursrc1, *cursrc2, *curdst;
-	int result;
 
 	/* Validate input parameters */
 	if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
@@ -927,8 +897,7 @@ int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned ch
 
 	/* C routine to process image */
 	for (i = istart; i < length; i++) {
-		result = (int) *cursrc1 * (int) *cursrc2;
-		*curdst = (unsigned char) result;
+		*curdst = (int)*cursrc1 * (int)*cursrc2;  // (int) for efficiency
 		/* Advance pointers */
 		cursrc1++;
 		cursrc2++;
@@ -948,7 +917,7 @@ int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned ch
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterMultDivby2MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterMultDivby2MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -986,38 +955,28 @@ L1015:
 			popa
 	}
 #else
-	asm volatile
-		("pusha \n\t" "mov %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		"pxor      %%mm0, %%mm0 \n\t"	/* zero mm0 register */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"	/* load 8 bytes from Src1 into mm1 */
-		"movq    (%%ebx), %%mm3 \n\t"	/* load 8 bytes from Src2 into mm3 */
-		"movq      %%mm1, %%mm2 \n\t"	/* copy mm1 into mm2 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy mm3 into mm4  */
-		"punpcklbw %%mm0, %%mm1 \n\t"	/* unpack low  bytes of Src1 into words */
-		"punpckhbw %%mm0, %%mm2 \n\t"	/* unpack high bytes of Src1 into words */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of Src2 into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of Src2 into words */
-		"psrlw        $1, %%mm1 \n\t"	/* divide mm1 words by 2, Src1 low bytes */
-		"psrlw        $1, %%mm2 \n\t"	/* divide mm2 words by 2, Src1 high bytes */
-		"pmullw    %%mm3, %%mm1 \n\t"	/* mul low  bytes of Src1 and Src2  */
-		"pmullw    %%mm4, %%mm2 \n\t"	/* mul high bytes of Src1 and Src2 */
-		"packuswb  %%mm2, %%mm1 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b        \n\t"	/* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm0 = _m_from_int(0); /* zero mm0 register */
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm1, mm2, mm3, mm4, mm5, mm6;
+		mm1 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+		mm2 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+		mm3 = _m_punpcklbw(*mSrc2, mm0);	/* unpack low  bytes of Src2 into words */
+		mm4 = _m_punpckhbw(*mSrc2, mm0);	/* unpack high bytes of Src2 into words */
+		mm1 = _m_psrlwi(mm1, 1);		/* divide mm1 words by 2, Src1 low bytes */
+		mm2 = _m_psrlwi(mm2, 1);		/* divide mm2 words by 2, Src1 high bytes */
+		mm1 = _m_pmullw(mm1, mm3);		/* mul low  bytes of Src1 and Src2  */
+		mm2 = _m_pmullw(mm2, mm4);		/* mul high bytes of Src1 and Src2 */
+		*mDest = _m_packuswb(mm1, mm2);		/* pack words back into bytes with saturation */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -1095,7 +1054,7 @@ int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterMultDivby4MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterMultDivby4MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -1135,40 +1094,30 @@ L1016:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		"pxor      %%mm0, %%mm0 \n\t"	/* zero mm0 register */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"	/* load 8 bytes from Src1 into mm1 */
-		"movq    (%%ebx), %%mm3 \n\t"	/* load 8 bytes from Src2 into mm3 */
-		"movq      %%mm1, %%mm2 \n\t"	/* copy mm1 into mm2 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy mm3 into mm4  */
-		"punpcklbw %%mm0, %%mm1 \n\t"	/* unpack low  bytes of Src1 into words */
-		"punpckhbw %%mm0, %%mm2 \n\t"	/* unpack high bytes of Src1 into words */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of Src2 into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of Src2 into words */
-		"psrlw        $1, %%mm1 \n\t"	/* divide mm1 words by 2, Src1 low bytes */
-		"psrlw        $1, %%mm2 \n\t"	/* divide mm2 words by 2, Src1 high bytes */
-		"psrlw        $1, %%mm3 \n\t"	/* divide mm3 words by 2, Src2 low bytes */
-		"psrlw        $1, %%mm4 \n\t"	/* divide mm4 words by 2, Src2 high bytes */
-		"pmullw    %%mm3, %%mm1 \n\t"	/* mul low  bytes of Src1 and Src2  */
-		"pmullw    %%mm4, %%mm2 \n\t"	/* mul high bytes of Src1 and Src2 */
-		"packuswb  %%mm2, %%mm1 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b        \n\t"	/* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm0 = _m_from_int(0); /* zero mm0 register */
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm1, mm2, mm3, mm4, mm5, mm6;
+		mm1 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+		mm2 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+		mm3 = _m_punpcklbw(*mSrc2, mm0);	/* unpack low  bytes of Src2 into words */
+		mm4 = _m_punpckhbw(*mSrc2, mm0);	/* unpack high bytes of Src2 into words */
+		mm1 = _m_psrlwi(mm1, 1);		/* divide mm1 words by 2, Src1 low bytes */
+		mm2 = _m_psrlwi(mm2, 1);		/* divide mm2 words by 2, Src1 high bytes */
+		mm3 = _m_psrlwi(mm3, 1);		/* divide mm3 words by 2, Src2 low bytes */
+		mm4 = _m_psrlwi(mm4, 1);		/* divide mm4 words by 2, Src2 high bytes */
+		mm1 = _m_pmullw(mm1, mm3);		/* mul low  bytes of Src1 and Src2  */
+		mm2 = _m_pmullw(mm2, mm4);		/* mul high bytes of Src1 and Src2 */
+		*mDest = _m_packuswb(mm1, mm2);		/* pack words back into bytes with saturation */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -1246,7 +1195,7 @@ int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterBitAndMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterBitAndMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -1272,26 +1221,40 @@ L1017:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"	/* load 8 bytes from Src1 into mm1 */
-		"pand    (%%ebx), %%mm1 \n\t"	/* mm1=Src1&Src2 */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b        \n\t"	/* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* x86_64 ASM with constraints: */
+	/* asm volatile ( */
+	/* 	"shr $3, %%rcx \n\t"	/\* counter/8 (MMX loads 8 bytes at a time) *\/ */
+	/* 	".align 16       \n\t"	/\* 16 byte alignment of the loop entry *\/ */
+	/* 	"1: movq (%%rax), %%mm1 \n\t"	/\* load 8 bytes from Src1 into mm1 *\/ */
+	/* 	"pand    (%%rbx), %%mm1 \n\t"	/\* mm1=Src1&Src2 *\/ */
+	/* 	"movq    %%mm1, (%%rdi) \n\t"	/\* store result in Dest *\/ */
+	/* 	"add $8, %%rax \n\t"	/\* increase Src1, Src2 and Dest  *\/ */
+	/* 	"add $8, %%rbx \n\t"	/\* register pointers by 8 *\/ */
+	/* 	"add $8, %%rdi \n\t" */
+	/* 	"dec %%rcx     \n\t"	/\* decrease loop counter *\/ */
+	/* 	"jnz 1b        \n\t"	/\* check loop termination, proceed if required *\/ */
+	/* 	"emms          \n\t"	/\* exit MMX state *\/ */
+	/* 	: "+a" (Src1),		/\* load Src1 address into rax, modified by the loop *\/ */
+	/* 	  "+b" (Src2),		/\* load Src2 address into rbx, modified by the loop *\/ */
+	/* 	  "+c" (SrcLength),	/\* load loop counter (SIZE) into rcx, modified by the loop *\/ */
+	/* 	  "+D" (Dest)		/\* load Dest address into rdi, modified by the loop *\/ */
+	/* 	: */
+	/* 	: "memory",		/\* *Dest is modified *\/ */
+        /*           "mm1"			/\* register mm1 modified *\/ */
+	/* ); */
+
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_pand(*mSrc1, *mSrc2);	/* Src1&Src2 */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -1368,7 +1331,7 @@ int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned cha
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterBitOrMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterBitOrMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -1394,26 +1357,18 @@ L91017:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov %2, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %1, %%ebx \n\t"	/* load Src2 address into ebx */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm1 \n\t"	/* load 8 bytes from Src1 into mm1 */
-		"por     (%%ebx), %%mm1 \n\t"	/* mm1=Src1|Src2 */
-		"movq    %%mm1, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%ebx \n\t"	/* register pointers by 8 */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b        \n\t"	/* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mSrc2 = (__m64*)Src2;
+	__m64 *mDest = (__m64*)Dest;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_por(*mSrc1, *mSrc2);	/* Src1|Src2 */
+		mSrc1++;
+		mSrc2++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -1487,7 +1442,7 @@ int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterDivASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterDivASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -1519,27 +1474,57 @@ L10193:
 			popa
 	}
 #else
-	asm volatile
-		("pusha \n\t" "mov %2, %%edx \n\t"	/* load Src1 address into edx */
-		"mov %1, %%esi \n\t"	/* load Src2 address into esi */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %3, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		".align 16     \n\t"	/* 16 byte alignment of the loop entry */
+	/* Note: ~15% gain on i386, less efficient than C on x86_64 */
+	/* Also depends on whether the function is static (?!) */
+	/* Also depends on whether we work on malloc() or static char[] */
+	asm volatile (
+#  if defined(i386)
+		"pushl %%ebx \n\t"		/* %ebx may be the PIC register.  */
+		".align 16     \n\t"		/* 16 byte alignment of the loop entry */
 		"1: mov (%%esi), %%bl  \n\t"	/* load a byte from Src2 */
-		"cmp       $0, %%bl  \n\t"	/* check if it zero */
-		"jnz 2f              \n\t" "movb  $255, (%%edi) \n\t"	/* division by zero = 255 !!! */
-		"jmp 3f              \n\t" "2:                  \n\t" "xor   %%ah, %%ah    \n\t"	/* prepare AX, zero AH register */
-		"mov   (%%edx), %%al \n\t"	/* load a byte from Src1 into AL */
-		"div   %%bl          \n\t"	/* divide AL by BL */
-		"mov   %%al, (%%edi) \n\t"	/* move a byte result to Dest */
-		"3: inc %%edx        \n\t"	/* increment Src1, Src2, Dest */
+		"cmp       $0, %%bl    \n\t"	/* check if it zero */
+		"jnz 2f                \n\t"
+		"movb  $255, (%%edi)   \n\t"	/* division by zero = 255 !!! */
+		"jmp 3f                \n\t"
+		"2: xor %%ah, %%ah     \n\t"	/* prepare AX, zero AH register */
+		"mov   (%%edx), %%al   \n\t"	/* load a byte from Src1 into AL */
+		"div   %%bl            \n\t"	/* divide AL by BL */
+		"mov   %%al, (%%edi)   \n\t"	/* move a byte result to Dest */
+		"3: inc %%edx          \n\t"	/* increment Src1, Src2, Dest */
 		"inc %%esi \n\t"		/* pointer registers by one */
-		"inc %%edi \n\t" "dec %%ecx    \n\t"	/* decrease loop counter */
-		"jnz 1b       \n\t"	/* check loop termination, proceed if required */
-		"popa \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src2),		/* %1 */
-		"m"(Src1),		/* %2 */
-		"m"(SrcLength)		/* %3 */
+		"inc %%edi \n\t"
+		"dec %%ecx \n\t"		/* decrease loop counter */
+		"jnz 1b    \n\t"		/* check loop termination, proceed if required */
+		"popl %%ebx \n\t"		/* restore %ebx */
+		: "+d" (Src1),		/* load Src1 address into edx */
+		  "+S" (Src2),		/* load Src2 address into esi */
+		  "+c" (SrcLength),	/* load loop counter (SIZE) into ecx */
+		  "+D" (Dest)		/* load Dest address into edi */
+		:
+		: "memory", "rax"
+#  elif defined(__x86_64__)
+		".align 16     \n\t"		/* 16 byte alignment of the loop entry */
+		"1: mov (%%rsi), %%bl  \n\t"	/* load a byte from Src2 */
+		"cmp       $0, %%bl    \n\t"	/* check if it zero */
+		"jnz 2f                \n\t"
+		"movb  $255, (%%rdi)   \n\t"	/* division by zero = 255 !!! */
+		"jmp 3f                \n\t"
+		"2: xor %%ah, %%ah     \n\t"	/* prepare AX, zero AH register */
+		"mov   (%%rdx), %%al   \n\t"	/* load a byte from Src1 into AL */
+		"div   %%bl            \n\t"	/* divide AL by BL */
+		"mov   %%al, (%%rdi)   \n\t"	/* move a byte result to Dest */
+		"3: inc %%rdx          \n\t"	/* increment Src1, Src2, Dest */
+		"inc %%rsi \n\t"		/* pointer registers by one */
+		"inc %%rdi \n\t"
+		"dec %%rcx \n\t"		/* decrease loop counter */
+		"jnz 1b    \n\t"		/* check loop termination, proceed if required */
+		: "+d" (Src1),		/* load Src1 address into edx */
+		  "+S" (Src2),		/* load Src2 address into esi */
+		  "+c" (SrcLength),	/* load loop counter (SIZE) into ecx */
+		  "+D" (Dest)		/* load Dest address into edi */
+		:
+		: "memory", "rax", "rbx"
+#  endif
 		);
 #endif
 	return (0);
@@ -1562,7 +1547,6 @@ int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *
 {
 	unsigned int i, istart;
 	unsigned char *cursrc1, *cursrc2, *curdst;
-	int result;
 
 	/* Validate input parameters */
 	if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
@@ -1589,9 +1573,24 @@ int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *
 	curdst = Dest;
 
 	/* C routine to process image */
+	/* for (i = istart; i < length; i++) { */
+	/* 	if (*cursrc2 == 0) { */
+	/* 		*curdst = 255; */
+	/* 	} else { */
+	/* 		result = (int) *cursrc1 / (int) *cursrc2; */
+	/* 		*curdst = (unsigned char) result; */
+	/* 	} */
+	/* 	/\* Advance pointers *\/ */
+	/* 	cursrc1++; */
+	/* 	cursrc2++; */
+	/* 	curdst++; */
+	/* } */
 	for (i = istart; i < length; i++) {
-		result = (int) *cursrc1 / (int) *cursrc2;
-		*curdst = (unsigned char) result;
+		if (*cursrc2 == 0) {
+			*curdst = 255;
+		} else {
+			*curdst = (int)*cursrc1 / (int)*cursrc2;  // (int) for efficiency
+		}
 		/* Advance pointers */
 		cursrc1++;
 		cursrc2++;
@@ -1612,7 +1611,7 @@ int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterBitNegationMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength)
+static int SDL_imageFilterBitNegationMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -1637,24 +1636,19 @@ L91117:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "pcmpeqb   %%mm1, %%mm1 \n\t"	/* generate all 1's in mm1 */
-		"mov %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16       \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm0 \n\t"	/* load 8 bytes from Src1 into mm1 */
-		"pxor      %%mm1, %%mm0 \n\t"	/* negate mm0 by xoring with mm1 */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in Dest */
-		"add $8, %%eax \n\t"	/* increase Src1, Src2 and Dest  */
-		"add $8, %%edi \n\t" "dec %%ecx     \n\t"	/* decrease loop counter */
-		"jnz 1b        \n\t"	/* check loop termination, proceed if required */
-		"emms          \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength)		/* %2 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+        __m64 mm1;
+	mm1 = _m_pcmpeqb(mm1, mm1);		/* generate all 1's in mm1 */
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_pxor(*mSrc1, mm1);	/* negate mm0 by xoring with mm1 */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();				/* clean MMX state */
+
 #endif
 	return (0);
 #else
@@ -1724,7 +1718,7 @@ int SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, unsigne
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterAddByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
+static int SDL_imageFilterAddByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -1757,36 +1751,22 @@ L1021:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate C in 8 bytes of MM1 ** */
-		"mov           %3, %%al \n\t"	/* load C into AL */
-		"mov         %%al, %%ah \n\t"	/* copy AL into AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher bytes of MM1 with C */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1:                     \n\t" 
-		"movq    (%%eax), %%mm0 \n\t"	/* load 8 bytes from Src1 into MM0 */
-		"paddusb   %%mm1, %%mm0 \n\t"	/* MM0=SrcDest+C (add 8 bytes with saturation) */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Dest register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(C)			/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	/* Duplicate C in 8 bytes of MM1 */
+	int i;
+	memset(&i, C, 4);
+	__m64 mm1 = _m_from_int(i);
+	__m64 mm2 = _m_from_int(i);
+	mm1 = _m_punpckldq(mm1, mm2);			/* fill higher bytes of MM1 with C */
+        //__m64 mm1 = _m_from_int64(lli); // x86_64 only
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_paddusb(*mSrc1, mm1);	/* Src1+C (add 8 bytes with saturation) */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -1871,7 +1851,7 @@ int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, unsigned in
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterAddUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
+static int SDL_imageFilterAddUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -1901,34 +1881,21 @@ L11023:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate (int)C in 8 bytes of MM1 ** */
-		"mov          %3, %%eax \n\t"	/* load C into EAX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"mov          %4, %%eax \n\t"	/* load D into EAX */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher bytes of MM1 with C */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1:                     \n\t" 
-		"movq    (%%eax), %%mm0 \n\t"	/* load 8 bytes from SrcDest into MM0 */
-		"paddusb   %%mm1, %%mm0 \n\t"	/* MM0=SrcDest+C (add 8 bytes with saturation) */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in SrcDest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(C),			/* %3 */
-		"m"(D)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	/* Duplicate (int)C in 8 bytes of MM1 */
+	__m64 mm1 = _m_from_int(C);
+	__m64 mm2 = _m_from_int(C);
+	mm1 = _m_punpckldq(mm1, mm2);			/* fill higher bytes of MM1 with C */
+        //__m64 mm1 = _m_from_int64(lli); // x86_64 only
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_paddusb(*mSrc1, mm1);	/* Src1+C (add 8 bytes with saturation) */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -2020,7 +1987,7 @@ int SDL_imageFilterAddUint(unsigned char *Src1, unsigned char *Dest, unsigned in
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterAddByteToHalfMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C,
+static int SDL_imageFilterAddByteToHalfMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C,
 									unsigned char *Mask)
 {
 #ifdef USE_MMX
@@ -2048,7 +2015,6 @@ L1022:
 		movq mm2, [eax]   	/* load 8 bytes from Src1 into MM2 */
 		psrlw mm2, 1   	/* shift 4 WORDS of MM2 1 bit to the right */
 			pand mm2, mm0        // apply Mask to 8 BYTES of MM2 */
-			/* byte     0x0f, 0xdb, 0xd0 */
 			paddusb mm2,  mm1 	/* MM2=SrcDest+C (add 8 bytes with saturation) */
 			movq [edi], mm2   	/* store result in Dest */
 			add eax, 8   	/* increase Src1 register pointer by 8 */
@@ -2059,42 +2025,26 @@ L1022:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate C in 8 bytes of MM1 ** */
-		"mov           %3, %%al \n\t"	/* load C into AL */
-		"mov         %%al, %%ah \n\t"	/* copy AL into AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher bytes of MM1 with C */
-		"movl         %4, %%edx \n\t"	/* load Mask address into edx */
-		"movq    (%%edx), %%mm0 \n\t"	/* load Mask into mm0 */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1:                     \n\t" 
-		"movq    (%%eax), %%mm2 \n\t"	/* load 8 bytes from Src1 into MM2 */
-		"psrlw        $1, %%mm2 \n\t"	/* shift 4 WORDS of MM2 1 bit to the right */
-		/*    "pand      %%mm0, %%mm2 \n\t"    // apply Mask to 8 BYTES of MM2 */
-		".byte     0x0f, 0xdb, 0xd0 \n\t" 
-		"paddusb   %%mm1, %%mm2 \n\t"	/* MM2=SrcDest+C (add 8 bytes with saturation) */
-		"movq    %%mm2, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                  1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(C),			/* %3 */
-		"m"(Mask)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 *mMask = (__m64*)Mask;
+	/* Duplicate C in 8 bytes of MM1 */
+	int i;
+	memset(&i, C, 4);
+	__m64 mm1 = _m_from_int(i);
+	__m64 mm2 = _m_from_int(i);
+	mm1 = _m_punpckldq(mm1, mm2);			/* fill higher bytes of MM1 with C */
+        //__m64 mm1 = _m_from_int64(lli); // x86_64 only
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm2 = _m_psrlwi(*mSrc1, 1);	/* shift 4 WORDS of MM2 1 bit to the right */
+		mm2 = _m_pand(mm2, *mMask);		/* apply Mask to 8 BYTES of MM2 */
+							/* byte     0x0f, 0xdb, 0xd0 */
+		*mDest = _m_paddusb(mm1, mm2);		/* Src1+C (add 8 bytes with saturation) */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -2207,35 +2157,22 @@ L1023:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate C in 8 bytes of MM1 ** */
-		"mov           %3, %%al \n\t"	/* load C into AL */
-		"mov         %%al, %%ah \n\t"	/* copy AL into AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher bytes of MM1 with C */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm0 \n\t"	/* load 8 bytes from SrcDest into MM0 */
-		"psubusb   %%mm1, %%mm0 \n\t"	/* MM0=SrcDest-C (sub 8 bytes with saturation) */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in SrcDest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(C)			/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	/* Duplicate C in 8 bytes of MM1 */
+	int i;
+	memset(&i, C, 4);
+	__m64 mm1 = _m_from_int(i);
+	__m64 mm2 = _m_from_int(i);
+	mm1 = _m_punpckldq(mm1, mm2);			/* fill higher bytes of MM1 with C */
+        //__m64 mm1 = _m_from_int64(lli); // x86_64 only
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_psubusb(*mSrc1, mm1);	/* Src1-C (sub 8 bytes with saturation) */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -2320,7 +2257,7 @@ int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, unsigned in
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterSubUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
+static int SDL_imageFilterSubUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -2350,33 +2287,21 @@ L11024:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate (int)C in 8 bytes of MM1 ** */
-		"mov          %3, %%eax \n\t"	/* load C into EAX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"mov          %4, %%eax \n\t"	/* load D into EAX */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher bytes of MM1 with C */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm0 \n\t"	/* load 8 bytes from SrcDest into MM0 */
-		"psubusb   %%mm1, %%mm0 \n\t"	/* MM0=SrcDest-C (sub 8 bytes with saturation) */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in SrcDest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                  1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(C),			/* %3 */
-		"m"(D)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	/* Duplicate (int)C in 8 bytes of MM1 */
+	__m64 mm1 = _m_from_int(C);
+	__m64 mm2 = _m_from_int(C);
+	mm1 = _m_punpckldq(mm1, mm2);			/* fill higher bytes of MM1 with C */
+        //__m64 mm1 = _m_from_int64(lli); // x86_64 only
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_psubusb(*mSrc1, mm1);	/* Src1-C (sub 8 bytes with saturation) */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -2468,7 +2393,7 @@ int SDL_imageFilterSubUint(unsigned char *Src1, unsigned char *Dest, unsigned in
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterShiftRightMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
+static int SDL_imageFilterShiftRightMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
 								 unsigned char *Mask)
 {
 #ifdef USE_MMX
@@ -2508,42 +2433,26 @@ L10241:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "movl         %4, %%edx \n\t"	/* load Mask address into edx */
-		"movq    (%%edx), %%mm0 \n\t"	/* load Mask into mm0 */
-		"xor       %%ecx, %%ecx \n\t"	/* zero ECX */
-		"mov           %3, %%cl \n\t"	/* load loop counter (N) into CL */
-		"movd      %%ecx, %%mm3 \n\t"	/* copy (N) into MM3  */
-		"pcmpeqb   %%mm1, %%mm1 \n\t"	/* generate all 1's in mm1 */
-		"1:                     \n\t"	/* ** Prepare proper bit-Mask in MM1 ** */
-		"psrlw        $1, %%mm1 \n\t"	/* shift 4 WORDS of MM1 1 bit to the right */
-		/*    "pand      %%mm0, %%mm1 \n\t"    // apply Mask to 8 BYTES of MM1 */
-		".byte     0x0f, 0xdb, 0xc8 \n\t" 
-		"dec               %%cl \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		/* ** Shift all bytes of the image ** */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"2:                     \n\t" 
-		"movq    (%%eax), %%mm0 \n\t"	/* load 8 bytes from SrcDest into MM0 */
-		"psrlw     %%mm3, %%mm0 \n\t"	/* shift 4 WORDS of MM0 (N) bits to the right */
-		/*    "pand      %%mm1, %%mm0 \n\t"    // apply proper bit-Mask to 8 BYTES of MM0 */
-		".byte     0x0f, 0xdb, 0xc1 \n\t" 
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in SrcDest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 2b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(N),			/* %3 */
-		"m"(Mask)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 *mMask = (__m64*)Mask;
+        __m64 mm1;
+	int i;
+	mm1 = _m_pcmpeqb(mm1, mm1);			/* generate all 1's in mm1 */
+	/* Prepare proper bit-Mask in MM1 */
+	for (i = 0; i < N; i++) {
+		mm1 = _m_psrlwi(mm1, 1);		/* shift 4 WORDS of MM1 1 bit to the right */
+		mm1 = _m_pand(mm1, *mMask);		/* apply Mask to 8 BYTES of MM1 */
+	}
+        /* Shift all bytes of the image */
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm0 = _m_psrlwi(*mSrc1, N);	/* shift 4 WORDS of MM0 (N) bits to the right */
+		*mDest = _m_pand(mm0, mm1);		/* apply proper bit-Mask to 8 BYTES of MM0 */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -2628,7 +2537,7 @@ int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, unsigned
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterShiftRightUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
+static int SDL_imageFilterShiftRightUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -2652,26 +2561,16 @@ L13023:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm0 \n\t"	/* load 8 bytes from SrcDest into MM0 */
-		"psrld   %3, %%mm0 \n\t"
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in SrcDest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(N)			/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_psrldi(*mSrc1, N);
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -2759,7 +2658,7 @@ int SDL_imageFilterShiftRightUint(unsigned char *Src1, unsigned char *Dest, unsi
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
+static int SDL_imageFilterMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -2825,64 +2724,49 @@ L10252:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate C in 4 words of MM1 ** */
-		"mov           %3, %%al \n\t"	/* load C into AL */
-		"xor         %%ah, %%ah \n\t"	/* zero AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher words of MM1 with C */
-		"pxor      %%mm0, %%mm0 \n\t"	/* zero MM0 register */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		"cmp         $128, %%al \n\t"	/* if (C <= 128) execute more efficient code */
-		"jg                  2f \n\t" ".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm3 \n\t"	/* load 8 bytes from Src1 into MM3 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy MM3 into MM4  */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of SrcDest into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of SrcDest into words */
-		"pmullw    %%mm1, %%mm3 \n\t"	/* mul low  bytes of SrcDest and MM1 */
-		"pmullw    %%mm1, %%mm4 \n\t"	/* mul high bytes of SrcDest and MM1 */
-		"packuswb  %%mm4, %%mm3 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm3, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"jmp                 3f \n\t" ".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"2: movq (%%eax), %%mm3 \n\t"	/* load 8 bytes from Src1 into MM3 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy MM3 into MM4  */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of SrcDest into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of SrcDest into words */
-		"pmullw    %%mm1, %%mm3 \n\t"	/* mul low  bytes of SrcDest and MM1 */
-		"pmullw    %%mm1, %%mm4 \n\t"	/* mul high bytes of SrcDest and MM1 */
-		/* ** Take abs value of the results (signed words) ** */
-		"movq      %%mm3, %%mm5 \n\t"	/* copy mm3 into mm5 */
-		"movq      %%mm4, %%mm6 \n\t"	/* copy mm4 into mm6 */
-		"psraw       $15, %%mm5 \n\t"	/* fill mm5 words with word sign bit */
-		"psraw       $15, %%mm6 \n\t"	/* fill mm6 words with word sign bit */
-		"pxor      %%mm5, %%mm3 \n\t"	/* take 1's compliment of only neg. words */
-		"pxor      %%mm6, %%mm4 \n\t"	/* take 1's compliment of only neg. words */
-		"psubsw    %%mm5, %%mm3 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"psubsw    %%mm6, %%mm4 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"packuswb  %%mm4, %%mm3 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm3, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 2b \n\t"	/* check loop termination, proceed if required */
-		"3: emms               \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(C)			/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm0 = _m_from_int(0);				/* zero mm0 register */
+	/* Duplicate C in 4 words of MM1 */
+	int i;
+	i = C | C<<16;
+	__m64 mm1 = _m_from_int(i);
+	__m64 mm2 = _m_from_int(i);
+	mm1 = _m_punpckldq(mm1, mm2);				/* fill higher words of MM1 with C */
+	// long long lli = C | C<<16 | (long long)C<<32 | (long long)C<<48;
+        //__m64 mm1 = _m_from_int64(lli); // x86_64 only
+	if (C <= 128) {						/* if (C <= 128) execute more efficient code */
+		for (i = 0; i < SrcLength/8; i++) {
+			__m64 mm3, mm4;
+			mm3 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+			mm4 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+			mm3 = _m_pmullw(mm3, mm1);		/* mul low  bytes of Src1 and MM1 */
+			mm4 = _m_pmullw(mm4, mm1);		/* mul high bytes of Src1 and MM1 */
+			*mDest = _m_packuswb(mm3, mm4);		/* pack words back into bytes with saturation */
+			mSrc1++;
+			mDest++;
+		}
+	} else {
+		for (i = 0; i < SrcLength/8; i++) {
+			__m64 mm3, mm4, mm5, mm6;
+			mm3 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+			mm4 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+			mm3 = _m_pmullw(mm3, mm1);		/* mul low  bytes of Src1 and MM1 */
+			mm4 = _m_pmullw(mm4, mm1);		/* mul high bytes of Src1 and MM1 */
+			/* Take abs value of the results (signed words) */
+			mm5 = _m_psrawi(mm3, 15);		/* fill mm5 words with word sign bit */
+			mm6 = _m_psrawi(mm4, 15);		/* fill mm6 words with word sign bit */
+			mm3 = _m_pxor(mm3, mm5);		/* take 1's compliment of only neg. words */
+			mm4 = _m_pxor(mm4, mm6);		/* take 1's compliment of only neg. words */
+			mm3 = _m_psubsw(mm3, mm5);		/* add 1 to only neg. words, W-(-1) or W-0 */
+			mm4 = _m_psubsw(mm4, mm6);		/* add 1 to only neg. words, W-(-1) or W-0 */
+			*mDest = _m_packuswb(mm3, mm4);		/* pack words back into bytes with saturation */
+			mSrc1++;
+			mDest++;
+		}
+	}
+	_m_empty();						/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -2967,7 +2851,7 @@ int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterShiftRightAndMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
+static int SDL_imageFilterShiftRightAndMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
 											  unsigned char C)
 {
 #ifdef USE_MMX
@@ -3012,47 +2896,29 @@ L1026:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate C in 4 words of MM1 ** */
-		"mov           %4, %%al \n\t"	/* load C into AL */
-		"xor         %%ah, %%ah \n\t"	/* zero AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher words of MM1 with C */
-		"xor       %%ecx, %%ecx \n\t"	/* zero ECX */
-		"mov           %3, %%cl \n\t"	/* load N into CL */
-		"movd      %%ecx, %%mm7 \n\t"	/* copy N into MM7 */
-		"pxor      %%mm0, %%mm0 \n\t"	/* zero MM0 register */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16             \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm3 \n\t"	/* load 8 bytes from Src1 into MM3 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy MM3 into MM4  */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of SrcDest into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of SrcDest into words */
-		"psrlw     %%mm7, %%mm3 \n\t"	/* shift 4 WORDS of MM3 (N) bits to the right */
-		"psrlw     %%mm7, %%mm4 \n\t"	/* shift 4 WORDS of MM4 (N) bits to the right */
-		"pmullw    %%mm1, %%mm3 \n\t"	/* mul low  bytes of SrcDest by MM1 */
-		"pmullw    %%mm1, %%mm4 \n\t"	/* mul high bytes of SrcDest by MM1 */
-		"packuswb  %%mm4, %%mm3 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm3, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(N),			/* %3 */
-		"m"(C)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm0 = _m_from_int(0);			/* zero mm0 register */
+	/* Duplicate C in 4 words of MM1 */
+	int i;
+	i = (C<<16)|C;
+	__m64 mm1 = _m_from_int(i);
+	__m64 mm2 = _m_from_int(i);
+	mm1 = _m_punpckldq(mm1, mm2);			/* fill higher words of MM1 with C */
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm3, mm4, mm5, mm6;
+		mm3 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+		mm4 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+		mm3 = _m_psrlwi(mm3, N);		/* shift 4 WORDS of MM3 (N) bits to the right */
+		mm4 = _m_psrlwi(mm4, N);		/* shift 4 WORDS of MM4 (N) bits to the right */
+		mm3 = _m_pmullw(mm3, mm1);		/* mul low  bytes of Src1 and MM1 */
+		mm4 = _m_pmullw(mm4, mm1);		/* mul high bytes of Src1 and MM1 */
+		*mDest = _m_packuswb(mm3, mm4);		/* pack words back into bytes with saturation */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -3144,7 +3010,7 @@ int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *D
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterShiftLeftByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
+static int SDL_imageFilterShiftLeftByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
 									unsigned char *Mask)
 {
 #ifdef USE_MMX
@@ -3184,39 +3050,26 @@ L10271:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "movl         %4, %%edx \n\t"	/* load Mask address into edx */
-		"movq    (%%edx), %%mm0 \n\t"	/* load Mask into mm0 */
-		"xor       %%ecx, %%ecx \n\t"	/* zero ECX */
-		"mov           %3, %%cl \n\t"	/* load loop counter (N) into CL */
-		"movd      %%ecx, %%mm3 \n\t"	/* copy (N) into MM3  */
-		"pcmpeqb   %%mm1, %%mm1 \n\t"	/* generate all 1's in mm1 */
-		"1:                     \n\t"	/* ** Prepare proper bit-Mask in MM1 ** */
-		"psllw        $1, %%mm1 \n\t"	/* shift 4 WORDS of MM1 1 bit to the left */
-		/*    "pand      %%mm0, %%mm1 \n\t"    // apply Mask to 8 BYTES of MM1 */
-		".byte     0x0f, 0xdb, 0xc8 \n\t" "dec %%cl               \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		/* ** Shift all bytes of the image ** */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load SrcDest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"2: movq (%%eax), %%mm0 \n\t"	/* load 8 bytes from Src1 into MM0 */
-		"psllw     %%mm3, %%mm0 \n\t"	/* shift 4 WORDS of MM0 (N) bits to the left */
-		/*    "pand      %%mm1, %%mm0 \n\t"    // apply proper bit-Mask to 8 BYTES of MM0 */
-		".byte     0x0f, 0xdb, 0xc1 \n\t" "movq    %%mm0, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 2b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(N),			/* %3 */
-		"m"(Mask)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 *mMask = (__m64*)Mask;
+        __m64 mm1;
+	int i;
+	mm1 = _m_pcmpeqb(mm1, mm1);			/* generate all 1's in mm1 */
+	/* Prepare proper bit-Mask in MM1 */
+	for (i = 0; i < N; i++) {
+		mm1 = _m_psllwi(mm1, 1);		/* shift 4 WORDS of MM1 1 bit to the left */
+		mm1 = _m_pand(mm1, *mMask);		/* apply Mask to 8 BYTES of MM1 */
+	}
+	/* ** Shift all bytes of the image ** */
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm0 = _m_psllwi(*mSrc1, N);	/* shift 4 WORDS of MM0 (N) bits to the left */
+		*mDest = _m_pand(mm0, mm1);		/* apply proper bit-Mask to 8 BYTES of MM0 */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -3300,7 +3153,7 @@ int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, unsig
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterShiftLeftUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
+static int SDL_imageFilterShiftLeftUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -3324,26 +3177,16 @@ L12023:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm0 \n\t"	/* load 8 bytes from SrcDest into MM0 */
-		"pslld   %3, %%mm0 \n\t"	/* MM0=SrcDest+C (add 8 bytes with saturation) */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in SrcDest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(N)			/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	int i;
+	for (i = 0; i < SrcLength/8; i++) {
+		*mDest = _m_pslldi(*mSrc1, N);	/* Src1+C (add 8 bytes with saturation) */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();				/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -3431,7 +3274,7 @@ int SDL_imageFilterShiftLeftUint(unsigned char *Src1, unsigned char *Dest, unsig
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterShiftLeftMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
+static int SDL_imageFilterShiftLeftMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -3454,8 +3297,8 @@ L10280:
 		movq mm4, mm3   	/* copy MM3 into MM4  */
 			punpcklbw mm3, mm0   	/* unpack low  bytes of SrcDest into words */
 			punpckhbw mm4, mm0   	/* unpack high bytes of SrcDest into words */
-			psllw mm3, mm7   	/* shift 4 WORDS of MM3 (N) bits to the right */
-			psllw mm4, mm7   	/* shift 4 WORDS of MM4 (N) bits to the right */
+			psllw mm3, mm7   	/* shift 4 WORDS of MM3 (N) bits to the left */
+			psllw mm4, mm7   	/* shift 4 WORDS of MM4 (N) bits to the left */
 			packuswb mm3, mm4   	/* pack words back into bytes with saturation */
 			movq [edi], mm3   	/* store result in Dest */
 			add eax, 8   	/* increase Src1 register pointer by 8 */
@@ -3469,8 +3312,8 @@ L10281:
 		movq mm4, mm3   	/* copy MM3 into MM4  */
 			punpcklbw mm3, mm0   	/* unpack low  bytes of SrcDest into words */
 			punpckhbw mm4, mm0   	/* unpack high bytes of SrcDest into words */
-			psllw mm3, mm7   	/* shift 4 WORDS of MM3 (N) bits to the right */
-			psllw mm4, mm7   	/* shift 4 WORDS of MM4 (N) bits to the right */
+			psllw mm3, mm7   	/* shift 4 WORDS of MM3 (N) bits to the left */
+			psllw mm4, mm7   	/* shift 4 WORDS of MM4 (N) bits to the left */
 			/* ** Take abs value of the signed words ** */
 			movq mm5, mm3   	/* copy mm3 into mm5 */
 			movq mm6, mm4   	/* copy mm4 into mm6 */
@@ -3491,57 +3334,42 @@ L10282:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "xor       %%eax, %%eax \n\t"	/* zero EAX */
-		"mov           %3, %%al \n\t"	/* load N into AL */
-		"movd      %%eax, %%mm7 \n\t"	/* copy N into MM7 */
-		"pxor      %%mm0, %%mm0 \n\t"	/* zero MM0 register */
-		"mov         %1, %%eax  \n\t"	/* load Src1 address into eax */
-		"mov         %0, %%edi  \n\t"	/* load Dest address into edi */
-		"mov         %2, %%ecx  \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr         $3, %%ecx  \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		"cmp           $7, %%al \n\t"	/* if (N <= 7) execute more efficient code */
-		"jg                  2f \n\t" ".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1: movq (%%eax), %%mm3 \n\t"	/* load 8 bytes from Src1 into MM3 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy MM3 into MM4  */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of SrcDest into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of SrcDest into words */
-		"psllw     %%mm7, %%mm3 \n\t"	/* shift 4 WORDS of MM3 (N) bits to the right */
-		"psllw     %%mm7, %%mm4 \n\t"	/* shift 4 WORDS of MM4 (N) bits to the right */
-		"packuswb  %%mm4, %%mm3 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm3, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"jmp                 3f \n\t" ".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"2: movq (%%eax), %%mm3 \n\t"	/* load 8 bytes from Src1 into MM3 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy MM3 into MM4  */
-		"punpcklbw %%mm0, %%mm3 \n\t"	/* unpack low  bytes of SrcDest into words */
-		"punpckhbw %%mm0, %%mm4 \n\t"	/* unpack high bytes of SrcDest into words */
-		"psllw     %%mm7, %%mm3 \n\t"	/* shift 4 WORDS of MM3 (N) bits to the right */
-		"psllw     %%mm7, %%mm4 \n\t"	/* shift 4 WORDS of MM4 (N) bits to the right */
-		/* ** Take abs value of the signed words ** */
-		"movq      %%mm3, %%mm5 \n\t"	/* copy mm3 into mm5 */
-		"movq      %%mm4, %%mm6 \n\t"	/* copy mm4 into mm6 */
-		"psraw       $15, %%mm5 \n\t"	/* fill mm5 words with word sign bit */
-		"psraw       $15, %%mm6 \n\t"	/* fill mm6 words with word sign bit */
-		"pxor      %%mm5, %%mm3 \n\t"	/* take 1's compliment of only neg. words */
-		"pxor      %%mm6, %%mm4 \n\t"	/* take 1's compliment of only neg. words */
-		"psubsw    %%mm5, %%mm3 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"psubsw    %%mm6, %%mm4 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"packuswb  %%mm4, %%mm3 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm3, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 2b \n\t"	/* check loop termination, proceed if required */
-		"3: emms                \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(N)			/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm0 = _m_from_int(0);				/* zero mm0 register */
+	int i;
+	if (N <= 7) {						/* if (N <= 7) execute more efficient code */
+		for (i = 0; i < SrcLength/8; i++) {
+			__m64 mm3, mm4;
+			mm3 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+			mm4 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+			mm3 = _m_psllwi(mm3, N);		/* shift 4 WORDS of MM3 (N) bits to the left */
+			mm4 = _m_psllwi(mm4, N);		/* shift 4 WORDS of MM4 (N) bits to the left */
+			*mDest = _m_packuswb(mm3, mm4);		/* pack words back into bytes with saturation */
+			mSrc1++;
+			mDest++;
+		}
+	} else {
+		for (i = 0; i < SrcLength/8; i++) {
+			__m64 mm3, mm4, mm5, mm6;
+			mm3 = _m_punpcklbw(*mSrc1, mm0);	/* unpack low  bytes of Src1 into words */
+			mm4 = _m_punpckhbw(*mSrc1, mm0);	/* unpack high bytes of Src1 into words */
+			mm3 = _m_psllwi(mm3, N);		/* shift 4 WORDS of MM3 (N) bits to the left */
+			mm4 = _m_psllwi(mm4, N);		/* shift 4 WORDS of MM4 (N) bits to the left */
+			/* Take abs value of the signed words */
+			mm5 = _m_psrawi(mm3, 15);		/* fill mm5 words with word sign bit */
+			mm6 = _m_psrawi(mm4, 15);		/* fill mm6 words with word sign bit */
+			mm3 = _m_pxor(mm3, mm5);		/* take 1's compliment of only neg. words */
+			mm4 = _m_pxor(mm4, mm6);		/* take 1's compliment of only neg. words */
+			mm3 = _m_psubsw(mm3, mm5);		/* add 1 to only neg. words, W-(-1) or W-0 */
+			mm4 = _m_psubsw(mm4, mm6);		/* add 1 to only neg. words, W-(-1) or W-0 */
+			*mDest = _m_packuswb(mm3, mm4);		/* pack words back into bytes with saturation */
+			mSrc1++;
+			mDest++;
+		}
+	}
+	_m_empty();						/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -3626,7 +3454,7 @@ int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, unsigned
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterBinarizeUsingThresholdMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char T)
+static int SDL_imageFilterBinarizeUsingThresholdMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char T)
 {
 #ifdef USE_MMX
 #if !defined(GCC__)
@@ -3663,40 +3491,26 @@ L1029:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t"
-		/* ** Duplicate T in 8 bytes of MM3 ** */
-		"pcmpeqb   %%mm1, %%mm1 \n\t"	/* generate all 1's in mm1 */
-		"pcmpeqb   %%mm2, %%mm2 \n\t"	/* generate all 1's in mm2 */
-		"mov           %3, %%al \n\t"	/* load T into AL */
-		"mov         %%al, %%ah \n\t"	/* copy AL into AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm3 \n\t"	/* copy EAX into MM3 */
-		"movd      %%eax, %%mm4 \n\t"	/* copy EAX into MM4 */
-		"punpckldq %%mm4, %%mm3 \n\t"	/* fill higher bytes of MM3 with T */
-		"psubusb   %%mm3, %%mm2 \n\t"	/* store 0xFF - T in MM2 */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1:                     \n\t" 
-		"movq    (%%eax), %%mm0 \n\t"	/* load 8 bytes from SrcDest into MM0 */
-		"paddusb   %%mm2, %%mm0 \n\t"	/* MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) */
-		"pcmpeqb   %%mm1, %%mm0 \n\t"	/* binarize 255:0, comparing to 255 */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in SrcDest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(T)			/* %3 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	/* Duplicate T in 8 bytes of MM3 */
+	__m64 mm1 = _m_pcmpeqb(mm1, mm1);			/* generate all 1's in mm1 */
+	__m64 mm2 = _m_pcmpeqb(mm2, mm2);			/* generate all 1's in mm1 */
+	int i;
+	memset(&i, T, 4);
+	__m64 mm3 = _m_from_int(i);
+	__m64 mm4 = _m_from_int(i);
+	mm3 = _m_punpckldq(mm3, mm4);			/* fill higher bytes of MM3 with T */
+	mm2 = _m_psubusb(mm2, mm3);			/* store 0xFF - T in MM2 */
+        //__m64 mm3 = _m_from_int64(lli); // x86_64 only
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm0 = _m_paddusb(*mSrc1, mm2);	/* Src1+(0xFF-T) (add 8 bytes with saturation) */
+		*mDest = _m_pcmpeqb(mm0, mm1);		/* binarize 255:0, comparing to 255 */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -3775,7 +3589,7 @@ int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *De
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterClipToRangeMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char Tmin,
+static int SDL_imageFilterClipToRangeMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char Tmin,
 								  unsigned char Tmax)
 {
 #ifdef USE_MMX
@@ -3824,51 +3638,35 @@ L1030:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "pcmpeqb   %%mm1, %%mm1 \n\t"	/* generate all 1's in mm1 */
-		/* ** Duplicate Tmax in 8 bytes of MM3 ** */
-		"mov           %4, %%al \n\t"	/* load Tmax into AL */
-		"mov         %%al, %%ah \n\t"	/* copy AL into AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm3 \n\t"	/* copy EAX into MM3 */
-		"movd      %%eax, %%mm4 \n\t"	/* copy EAX into MM4 */
-		"punpckldq %%mm4, %%mm3 \n\t"	/* fill higher bytes of MM3 with Tmax */
-		"psubusb   %%mm3, %%mm1 \n\t"	/* store 0xFF - Tmax in MM1 */
-		/* ** Duplicate Tmin in 8 bytes of MM5 ** */
-		"mov           %3, %%al \n\t"	/* load Tmin into AL */
-		"mov         %%al, %%ah \n\t"	/* copy AL into AH */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm5 \n\t"	/* copy EAX into MM5 */
-		"movd      %%eax, %%mm4 \n\t"	/* copy EAX into MM4 */
-		"punpckldq %%mm4, %%mm5 \n\t"	/* fill higher bytes of MM5 with Tmin */
-		"movq      %%mm5, %%mm7 \n\t"	/* copy MM5 into MM7 */
-		"paddusb   %%mm1, %%mm7 \n\t"	/* store 0xFF - Tmax + Tmin in MM7 */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1:                     \n\t" 
-		"movq    (%%eax), %%mm0 \n\t"	/* load 8 bytes from Src1 into MM0 */
-		"paddusb   %%mm1, %%mm0 \n\t"	/* MM0=SrcDest+(0xFF-Tmax) */
-		"psubusb   %%mm7, %%mm0 \n\t"	/* MM0=MM0-(0xFF-Tmax+Tmin) */
-		"paddusb   %%mm5, %%mm0 \n\t"	/* MM0=MM0+Tmin */
-		"movq    %%mm0, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(Tmin),		/* %3 */
-		"m"(Tmax)			/* %4 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm1 = _m_pcmpeqb(mm1, mm1);	/* generate all 1's in mm1 */
+	int i;
+	/* Duplicate Tmax in 8 bytes of MM3 */
+	__m64 mm3, mm4;
+	memset(&i, Tmax, 4);
+	mm3 = _m_from_int(i);
+	mm4 = _m_from_int(i);
+	mm3 = _m_punpckldq(mm3, mm4);		/* fill higher bytes of MM3 with Tmax */
+	mm1 = _m_psubusb(mm1, mm3);		/* store 0xFF - Tmax in MM1 */
+        //__m64 mm3 = _m_from_int64(lli); // x86_64 only
+	/* Duplicate Tmax in 8 bytes of MM3 */
+	__m64 mm5, mm7;
+	memset(&i, Tmin, 4);
+	mm5 = _m_from_int(i);
+	mm4 = _m_from_int(i);
+	mm5 = _m_punpckldq(mm5, mm4);		/* fill higher bytes of MM5 with Tmin */
+	mm7 = _m_paddusb(mm5, mm1);	/* store 0xFF - Tmax + Tmin in MM7 */
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm0;
+		mm0 = _m_paddusb(*mSrc1, mm1);	/* MM0=Src1+(0xFF-Tmax) */
+		mm0 = _m_psubusb(mm0, mm7);	/* MM0=MM0-(0xFF-Tmax+Tmin) */
+		*mDest = _m_paddusb(mm0, mm5);	/* MM0+Tmin */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();				/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -3957,7 +3755,7 @@ int SDL_imageFilterClipToRange(unsigned char *Src1, unsigned char *Dest, unsigne
 
 \return Returns 0 for success or -1 for error.
 */
-int SDL_imageFilterNormalizeLinearMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, int Cmin, int Cmax,
+static int SDL_imageFilterNormalizeLinearMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, int Cmin, int Cmax,
 									  int Nmin, int Nmax)
 {
 #ifdef USE_MMX
@@ -4034,79 +3832,57 @@ L1031:
 			popa
 	}
 #else
-	asm volatile
-		("pusha		     \n\t" "mov           %6, %%ax \n\t"	/* load Nmax in AX */
-		"mov           %4, %%bx \n\t"	/* load Cmax in BX */
-		"sub           %5, %%ax \n\t"	/* AX = Nmax - Nmin */
-		"sub           %3, %%bx \n\t"	/* BX = Cmax - Cmin */
-		"jz                  1f \n\t"	/* check division by zero */
-		"xor         %%dx, %%dx \n\t"	/* prepare for division, zero DX */
-		"div               %%bx \n\t"	/* AX = AX/BX */
-		"jmp                 2f \n\t" "1:                     \n\t" "mov         $255, %%ax \n\t"	/* if div by zero, assume result max. byte value */
-		"2:                    \n\t"	/* ** Duplicate AX in 4 words of MM0 ** */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm0 \n\t"	/* copy EAX into MM0 */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"punpckldq %%mm1, %%mm0 \n\t"	/* fill higher words of MM0 with AX */
-		/* ** Duplicate Cmin in 4 words of MM1 ** */
-		"mov           %3, %%ax \n\t"	/* load Cmin into AX */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm1 \n\t"	/* copy EAX into MM1 */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"punpckldq %%mm2, %%mm1 \n\t"	/* fill higher words of MM1 with Cmin */
-		/* ** Duplicate Nmin in 4 words of MM2 ** */
-		"mov           %5, %%ax \n\t"	/* load Nmin into AX */
-		"mov         %%ax, %%bx \n\t"	/* copy AX into BX */
-		"shl         $16, %%eax \n\t"	/* shift 2 bytes of EAX left */
-		"mov         %%bx, %%ax \n\t"	/* copy BX into AX */
-		"movd      %%eax, %%mm2 \n\t"	/* copy EAX into MM2 */
-		"movd      %%eax, %%mm3 \n\t"	/* copy EAX into MM3 */
-		"punpckldq %%mm3, %%mm2 \n\t"	/* fill higher words of MM2 with Nmin */
-		"pxor      %%mm7, %%mm7 \n\t"	/* zero MM7 register */
-		"mov          %1, %%eax \n\t"	/* load Src1 address into eax */
-		"mov          %0, %%edi \n\t"	/* load Dest address into edi */
-		"mov          %2, %%ecx \n\t"	/* load loop counter (SIZE) into ecx */
-		"shr          $3, %%ecx \n\t"	/* counter/8 (MMX loads 8 bytes at a time) */
-		".align 16              \n\t"	/* 16 byte alignment of the loop entry */
-		"1:                     \n\t" 
-		"movq    (%%eax), %%mm3 \n\t"	/* load 8 bytes from Src1 into MM3 */
-		"movq      %%mm3, %%mm4 \n\t"	/* copy MM3 into MM4  */
-		"punpcklbw %%mm7, %%mm3 \n\t"	/* unpack low  bytes of SrcDest into words */
-		"punpckhbw %%mm7, %%mm4 \n\t"	/* unpack high bytes of SrcDest into words */
-		"psubusb   %%mm1, %%mm3 \n\t"	/* S-Cmin, low  bytes */
-		"psubusb   %%mm1, %%mm4 \n\t"	/* S-Cmin, high bytes */
-		"pmullw    %%mm0, %%mm3 \n\t"	/* MM0*(S-Cmin), low  bytes */
-		"pmullw    %%mm0, %%mm4 \n\t"	/* MM0*(S-Cmin), high bytes */
-		"paddusb   %%mm2, %%mm3 \n\t"	/* MM0*(S-Cmin)+Nmin, low  bytes */
-		"paddusb   %%mm2, %%mm4 \n\t"	/* MM0*(S-Cmin)+Nmin, high bytes */
-		/* ** Take abs value of the signed words ** */
-		"movq      %%mm3, %%mm5 \n\t"	/* copy mm3 into mm5 */
-		"movq      %%mm4, %%mm6 \n\t"	/* copy mm4 into mm6 */
-		"psraw       $15, %%mm5 \n\t"	/* fill mm5 words with word sign bit */
-		"psraw       $15, %%mm6 \n\t"	/* fill mm6 words with word sign bit */
-		"pxor      %%mm5, %%mm3 \n\t"	/* take 1's compliment of only neg. words */
-		"pxor      %%mm6, %%mm4 \n\t"	/* take 1's compliment of only neg. words */
-		"psubsw    %%mm5, %%mm3 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"psubsw    %%mm6, %%mm4 \n\t"	/* add 1 to only neg. words, W-(-1) or W-0 */
-		"packuswb  %%mm4, %%mm3 \n\t"	/* pack words back into bytes with saturation */
-		"movq    %%mm3, (%%edi) \n\t"	/* store result in Dest */
-		"add          $8, %%eax \n\t"	/* increase Src1 register pointer by 8 */
-		"add          $8, %%edi \n\t"	/* increase Dest register pointer by 8 */
-		"dec              %%ecx \n\t"	/* decrease loop counter */
-		"jnz                 1b \n\t"	/* check loop termination, proceed if required */
-		"emms                   \n\t"	/* exit MMX state */
-		"popa                   \n\t":"=m" (Dest)	/* %0 */
-		:"m"(Src1),		/* %1 */
-		"m"(SrcLength),		/* %2 */
-		"m"(Cmin),		/* %3 */
-		"m"(Cmax),		/* %4 */
-		"m"(Nmin),		/* %5 */
-		"m"(Nmax)			/* %6 */
-		);
+	/* i386 and x86_64 */
+	__m64 *mSrc1 = (__m64*)Src1;
+	__m64 *mDest = (__m64*)Dest;
+	__m64 mm0, mm1, mm2, mm3;
+
+	int i;
+	/* Duplicate (Nmax-Nmin)/(Cmax-Cmin) in 4 words of MM0 */
+	unsigned short a = Nmax - Nmin;
+	unsigned short b = Cmax - Cmin;
+	if (b == 0) {
+	    a = 255;
+	} else {
+	    a /= b;
+	}
+	i = (a<<16)|a;
+	mm0 = _m_from_int(i);
+	mm1 = _m_from_int(i);
+	mm0 = _m_punpckldq(mm0, mm1);			/* fill higher words of MM0 with AX */
+	/* Duplicate Cmin in 4 words of MM1 */
+	i = (Cmin<<16)|(short)Cmin;
+	mm1 = _m_from_int(i);
+	mm2 = _m_from_int(i);
+	mm1 = _m_punpckldq(mm1, mm2);			/* fill higher words of MM1 with Cmin */
+	/* Duplicate Nmin in 4 words of MM2 */
+	i = (Nmin<<16)|(short)Nmin;
+	mm2 = _m_from_int(i);
+	mm3 = _m_from_int(i);
+	mm2 = _m_punpckldq(mm2, mm3);			/* fill higher words of MM2 with Nmin */
+	__m64 mm7 = _m_from_int(0);			/* zero mm0 register */
+	for (i = 0; i < SrcLength/8; i++) {
+		__m64 mm3, mm4, mm5, mm6;
+		mm3 = _m_punpcklbw(*mSrc1, mm7);	/* unpack low  bytes of Src1 into words */
+		mm4 = _m_punpckhbw(*mSrc1, mm7);	/* unpack high bytes of Src1 into words */
+		mm3 = _m_psubusb(mm3, mm1);		/* S-Cmin, low	bytes */
+		mm4 = _m_psubusb(mm4, mm1);		/* S-Cmin, high bytes */
+		mm3 = _m_pmullw(mm3, mm0);		/* MM0*(S-Cmin), low  bytes */
+		mm4 = _m_pmullw(mm4, mm0);		/* MM0*(S-Cmin), high bytes */
+		mm3 = _m_paddusb(mm3, mm2);		/* MM0*(S-Cmin)+Nmin, low  bytes */
+		mm4 = _m_paddusb(mm4, mm2);		/* MM0*(S-Cmin)+Nmin, high bytes */
+		/* Take abs value of the signed words */
+		mm5 = _m_psrawi(mm3, 15);		/* fill mm5 words with word sign bit */
+		mm6 = _m_psrawi(mm4, 15);		/* fill mm6 words with word sign bit */
+		mm3 = _m_pxor(mm3, mm5);		/* take 1's compliment of only neg. words */
+		mm4 = _m_pxor(mm4, mm6);		/* take 1's compliment of only neg. words */
+		mm3 = _m_psubsw(mm3, mm5);		/* add 1 to only neg. words, W-(-1) or W-0 */
+		mm4 = _m_psubsw(mm4, mm6);		/* add 1 to only neg. words, W-(-1) or W-0 */
+		*mDest = _m_packuswb(mm3, mm4);		/* pack words back into bytes with saturation */
+		mSrc1++;
+		mDest++;
+	}
+	_m_empty();					/* clean MMX state */
 #endif
 	return (0);
 #else
@@ -4209,7 +3985,8 @@ int SDL_imageFilterConvolveKernel3x3Divide(unsigned char *Src, unsigned char *De
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -4398,7 +4175,8 @@ int SDL_imageFilterConvolveKernel5x5Divide(unsigned char *Src, unsigned char *De
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -4700,7 +4478,8 @@ int SDL_imageFilterConvolveKernel7x7Divide(unsigned char *Src, unsigned char *De
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -5056,7 +4835,8 @@ int SDL_imageFilterConvolveKernel9x9Divide(unsigned char *Src, unsigned char *De
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -5603,7 +5383,8 @@ int SDL_imageFilterConvolveKernel3x3ShiftRight(unsigned char *Src, unsigned char
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -5779,7 +5560,8 @@ int SDL_imageFilterConvolveKernel5x5ShiftRight(unsigned char *Src, unsigned char
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -6079,7 +5861,8 @@ int SDL_imageFilterConvolveKernel7x7ShiftRight(unsigned char *Src, unsigned char
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -6441,7 +6224,8 @@ int SDL_imageFilterConvolveKernel9x9ShiftRight(unsigned char *Src, unsigned char
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -7019,7 +6803,8 @@ int SDL_imageFilterSobelX(unsigned char *Src, unsigned char *Dest, int rows, int
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
@@ -7271,7 +7056,8 @@ int SDL_imageFilterSobelXShiftRight(unsigned char *Src, unsigned char *Dest, int
 		return (-1);
 
 	if ((SDL_imageFilterMMXdetect())) {
-#ifdef USE_MMX
+//#ifdef USE_MMX
+#if defined(USE_MMX) && defined(i386)
 #if !defined(GCC__)
 		__asm
 		{
diff --git a/Test/INSTALL b/Test/INSTALL
index 7d1c323..a1e89e1 100644
--- a/Test/INSTALL
+++ b/Test/INSTALL
@@ -1,8 +1,8 @@
 Installation Instructions
 *************************
 
-Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
-2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
+Inc.
 
    Copying and distribution of this file, with or without modification,
 are permitted in any medium without royalty provided the copyright
@@ -226,6 +226,11 @@ order to use an ANSI C compiler:
 
 and if that doesn't work, install pre-built binaries of GCC for HP-UX.
 
+   HP-UX `make' updates targets which have the same time stamps as
+their prerequisites, which makes it generally unusable when shipped
+generated files such as `configure' are involved.  Use GNU `make'
+instead.
+
    On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
 parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
 a workaround.  If GNU CC is not installed, it is therefore recommended
diff --git a/Test/LaplaceRelaxation_VS2008.vcproj b/Test/LaplaceRelaxation_VS2008.vcproj
index eb59665..4ac05b3 100644
--- a/Test/LaplaceRelaxation_VS2008.vcproj
+++ b/Test/LaplaceRelaxation_VS2008.vcproj
@@ -41,7 +41,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.14\include""
+				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.15\include""
 				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS"
 				MinimalRebuild="false"
 				BasicRuntimeChecks="0"
@@ -69,7 +69,7 @@
 				AdditionalDependencies="SDL.lib SDLmain.lib SDL_gfx.lib"
 				OutputFile=".\Debug/LaplaceRelaxation.exe"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.14\VisualC\SDLmain\Debug";"..\..\SDL-1.2.14\VisualC\SDL\Debug""
+				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.15\VisualC\SDLmain\Debug";"..\..\SDL-1.2.15\VisualC\SDL\Debug""
 				GenerateDebugInformation="true"
 				ProgramDatabaseFile="./Debug\LaplaceRelaxation.pdb"
 				SubSystem="2"
@@ -126,7 +126,7 @@
 				Name="VCCLCompilerTool"
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
-				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.14\include""
+				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.15\include""
 				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
 				RuntimeLibrary="2"
 				EnableFunctionLevelLinking="false"
@@ -149,7 +149,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="SDL.lib SDLmain.lib SDL_gfx.lib"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.14\VisualC\SDLmain\Debug";"..\..\SDL-1.2.14\VisualC\SDL\Debug""
+				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.15\VisualC\SDLmain\Debug";"..\..\SDL-1.2.15\VisualC\SDL\Debug""
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
diff --git a/Test/LaplaceRelaxation.vcxproj b/Test/LaplaceRelaxation_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 65%
rename from Test/LaplaceRelaxation.vcxproj
rename to Test/LaplaceRelaxation_VS2010.vcxproj
index 1ffeb33..ce0ec9b
--- a/Test/LaplaceRelaxation.vcxproj
+++ b/Test/LaplaceRelaxation_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>LaplaceRelaxation</ProjectName>
     <ProjectGuid>{AE9876D3-6F6D-48C0-3DAF-EF578406BEDC}</ProjectGuid>
     <RootNamespace>LaplaceRelaxation</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,63 +37,58 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/LaplaceRelaxation.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration)\;..\..\SDL_gfx\$(Platform)\$(Configuration)\;..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\;..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/LaplaceRelaxation.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\LaplaceRelaxation.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreAllDefaultLibraries>
-      </IgnoreAllDefaultLibraries>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
     <PostBuildEvent>
-      <Command>
-copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
+      <Command>copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"</Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -105,7 +101,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -117,16 +113,6 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="LaplaceRelaxation.c" />
@@ -138,7 +124,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </CustomBuildStep>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/Makefile.am b/Test/Makefile.am
index c70be24..ac0a3b3 100644
--- a/Test/Makefile.am
+++ b/Test/Makefile.am
@@ -26,3 +26,5 @@ DISTCLEANFILES = *~ *~c *~h *.cross.cache inc
 
 distclean-local:
 	-rm -rf autom4te.cache
+	-rm -f *.user
+	
diff --git a/Test/Makefile.in b/Test/Makefile.in
index 3dc9deb..3748e61 100644
--- a/Test/Makefile.in
+++ b/Test/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -16,6 +16,23 @@
 @SET_MAKE@
 
 VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
 pkgdatadir = $(datadir)/@PACKAGE@
 pkgincludedir = $(includedir)/@PACKAGE@
 pkglibdir = $(libdir)/@PACKAGE@
@@ -104,18 +121,27 @@ DIST_SOURCES = $(TestABGR_SOURCES) $(TestFonts_SOURCES) \
 	$(TestGfxPrimitives_SOURCES) $(TestGfxTexture_SOURCES) \
 	$(TestImageFilter_SOURCES) $(TestRotozoom_SOURCES) \
 	$(TestShrink_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
 ETAGS = etags
 CTAGS = ctags
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 distdir = $(PACKAGE)-$(VERSION)
 top_distdir = $(distdir)
 am__remove_distdir = \
-  { test ! -d "$(distdir)" \
-    || { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
-         && rm -fr "$(distdir)"; }; }
+  if test -d "$(distdir)"; then \
+    find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+      && rm -rf "$(distdir)" \
+      || { sleep 5 && rm -rf "$(distdir)"; }; \
+  else :; fi
 DIST_ARCHIVES = $(distdir).tar.gz
 GZIP_ENV = --best
 distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+  | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
 distcleancheck_listfiles = find . -type f -print
 ACLOCAL = @ACLOCAL@
 AMTAR = @AMTAR@
@@ -260,7 +286,7 @@ all: all-am
 
 .SUFFIXES:
 .SUFFIXES: .c .lo .o .obj
-am--refresh:
+am--refresh: Makefile
 	@:
 $(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
 	@for dep in $?; do \
@@ -296,8 +322,11 @@ $(ACLOCAL_M4):  $(am__aclocal_m4_deps)
 $(am__aclocal_m4_deps):
 install-binPROGRAMS: $(bin_PROGRAMS)
 	@$(NORMAL_INSTALL)
-	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
 	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
 	for p in $$list; do echo "$$p $$p"; done | \
 	sed 's/$(EXEEXT)$$//' | \
 	while read p p1; do if test -f $$p || test -f $$p1; \
@@ -337,31 +366,31 @@ clean-binPROGRAMS:
 	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
 	echo " rm -f" $$list; \
 	rm -f $$list
-TestABGR$(EXEEXT): $(TestABGR_OBJECTS) $(TestABGR_DEPENDENCIES) 
+TestABGR$(EXEEXT): $(TestABGR_OBJECTS) $(TestABGR_DEPENDENCIES) $(EXTRA_TestABGR_DEPENDENCIES) 
 	@rm -f TestABGR$(EXEEXT)
 	$(LINK) $(TestABGR_OBJECTS) $(TestABGR_LDADD) $(LIBS)
-TestFonts$(EXEEXT): $(TestFonts_OBJECTS) $(TestFonts_DEPENDENCIES) 
+TestFonts$(EXEEXT): $(TestFonts_OBJECTS) $(TestFonts_DEPENDENCIES) $(EXTRA_TestFonts_DEPENDENCIES) 
 	@rm -f TestFonts$(EXEEXT)
 	$(LINK) $(TestFonts_OBJECTS) $(TestFonts_LDADD) $(LIBS)
-TestFramerate$(EXEEXT): $(TestFramerate_OBJECTS) $(TestFramerate_DEPENDENCIES) 
+TestFramerate$(EXEEXT): $(TestFramerate_OBJECTS) $(TestFramerate_DEPENDENCIES) $(EXTRA_TestFramerate_DEPENDENCIES) 
 	@rm -f TestFramerate$(EXEEXT)
 	$(LINK) $(TestFramerate_OBJECTS) $(TestFramerate_LDADD) $(LIBS)
-TestGfxBlit$(EXEEXT): $(TestGfxBlit_OBJECTS) $(TestGfxBlit_DEPENDENCIES) 
+TestGfxBlit$(EXEEXT): $(TestGfxBlit_OBJECTS) $(TestGfxBlit_DEPENDENCIES) $(EXTRA_TestGfxBlit_DEPENDENCIES) 
 	@rm -f TestGfxBlit$(EXEEXT)
 	$(LINK) $(TestGfxBlit_OBJECTS) $(TestGfxBlit_LDADD) $(LIBS)
-TestGfxPrimitives$(EXEEXT): $(TestGfxPrimitives_OBJECTS) $(TestGfxPrimitives_DEPENDENCIES) 
+TestGfxPrimitives$(EXEEXT): $(TestGfxPrimitives_OBJECTS) $(TestGfxPrimitives_DEPENDENCIES) $(EXTRA_TestGfxPrimitives_DEPENDENCIES) 
 	@rm -f TestGfxPrimitives$(EXEEXT)
 	$(LINK) $(TestGfxPrimitives_OBJECTS) $(TestGfxPrimitives_LDADD) $(LIBS)
-TestGfxTexture$(EXEEXT): $(TestGfxTexture_OBJECTS) $(TestGfxTexture_DEPENDENCIES) 
+TestGfxTexture$(EXEEXT): $(TestGfxTexture_OBJECTS) $(TestGfxTexture_DEPENDENCIES) $(EXTRA_TestGfxTexture_DEPENDENCIES) 
 	@rm -f TestGfxTexture$(EXEEXT)
 	$(LINK) $(TestGfxTexture_OBJECTS) $(TestGfxTexture_LDADD) $(LIBS)
-TestImageFilter$(EXEEXT): $(TestImageFilter_OBJECTS) $(TestImageFilter_DEPENDENCIES) 
+TestImageFilter$(EXEEXT): $(TestImageFilter_OBJECTS) $(TestImageFilter_DEPENDENCIES) $(EXTRA_TestImageFilter_DEPENDENCIES) 
 	@rm -f TestImageFilter$(EXEEXT)
 	$(LINK) $(TestImageFilter_OBJECTS) $(TestImageFilter_LDADD) $(LIBS)
-TestRotozoom$(EXEEXT): $(TestRotozoom_OBJECTS) $(TestRotozoom_DEPENDENCIES) 
+TestRotozoom$(EXEEXT): $(TestRotozoom_OBJECTS) $(TestRotozoom_DEPENDENCIES) $(EXTRA_TestRotozoom_DEPENDENCIES) 
 	@rm -f TestRotozoom$(EXEEXT)
 	$(LINK) $(TestRotozoom_OBJECTS) $(TestRotozoom_LDADD) $(LIBS)
-TestShrink$(EXEEXT): $(TestShrink_OBJECTS) $(TestShrink_DEPENDENCIES) 
+TestShrink$(EXEEXT): $(TestShrink_OBJECTS) $(TestShrink_DEPENDENCIES) $(EXTRA_TestShrink_DEPENDENCIES) 
 	@rm -f TestShrink$(EXEEXT)
 	$(LINK) $(TestShrink_OBJECTS) $(TestShrink_LDADD) $(LIBS)
 
@@ -485,7 +514,11 @@ dist-gzip: distdir
 	$(am__remove_distdir)
 
 dist-bzip2: distdir
-	tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
+	tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+	$(am__remove_distdir)
+
+dist-lzip: distdir
+	tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
 	$(am__remove_distdir)
 
 dist-lzma: distdir
@@ -493,7 +526,7 @@ dist-lzma: distdir
 	$(am__remove_distdir)
 
 dist-xz: distdir
-	tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
+	tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
 	$(am__remove_distdir)
 
 dist-tarZ: distdir
@@ -524,6 +557,8 @@ distcheck: dist
 	  bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
 	*.tar.lzma*) \
 	  lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
+	*.tar.lz*) \
+	  lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
 	*.tar.xz*) \
 	  xz -dc $(distdir).tar.xz | $(am__untar) ;;\
 	*.tar.Z*) \
@@ -533,7 +568,7 @@ distcheck: dist
 	*.zip*) \
 	  unzip $(distdir).zip ;;\
 	esac
-	chmod -R a-w $(distdir); chmod a+w $(distdir)
+	chmod -R a-w $(distdir); chmod u+w $(distdir)
 	mkdir $(distdir)/_build
 	mkdir $(distdir)/_inst
 	chmod a-w $(distdir)
@@ -543,6 +578,7 @@ distcheck: dist
 	  && am__cwd=`pwd` \
 	  && $(am__cd) $(distdir)/_build \
 	  && ../configure --srcdir=.. --prefix="$$dc_install_base" \
+	    $(AM_DISTCHECK_CONFIGURE_FLAGS) \
 	    $(DISTCHECK_CONFIGURE_FLAGS) \
 	  && $(MAKE) $(AM_MAKEFLAGS) \
 	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
@@ -571,8 +607,16 @@ distcheck: dist
 	  list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
 	  sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
 distuninstallcheck:
-	@$(am__cd) '$(distuninstallcheck_dir)' \
-	&& test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
+	@test -n '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: trying to run $@ with an empty' \
+	       '$$(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	$(am__cd) '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
 	   || { echo "ERROR: files left after uninstall:" ; \
 	        if test -n "$(DESTDIR)"; then \
 	          echo "  (check DESTDIR support)"; \
@@ -605,10 +649,15 @@ install-am: all-am
 
 installcheck: installcheck-am
 install-strip:
-	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	  `test -z '$(STRIP)' || \
-	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
 mostlyclean-generic:
 
 clean-generic:
@@ -696,24 +745,25 @@ uninstall-am: uninstall-binPROGRAMS
 
 .PHONY: CTAGS GTAGS all all-am am--refresh check check-am clean \
 	clean-binPROGRAMS clean-generic clean-libtool ctags dist \
-	dist-all dist-bzip2 dist-gzip dist-lzma dist-shar dist-tarZ \
-	dist-xz dist-zip distcheck distclean distclean-compile \
-	distclean-generic distclean-libtool distclean-local \
-	distclean-tags distcleancheck distdir distuninstallcheck dvi \
-	dvi-am html html-am info info-am install install-am \
-	install-binPROGRAMS install-data install-data-am install-dvi \
-	install-dvi-am install-exec install-exec-am install-html \
-	install-html-am install-info install-info-am install-man \
-	install-pdf install-pdf-am install-ps install-ps-am \
-	install-strip installcheck installcheck-am installdirs \
-	maintainer-clean maintainer-clean-generic mostlyclean \
-	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
-	pdf pdf-am ps ps-am tags uninstall uninstall-am \
-	uninstall-binPROGRAMS
+	dist-all dist-bzip2 dist-gzip dist-lzip dist-lzma dist-shar \
+	dist-tarZ dist-xz dist-zip distcheck distclean \
+	distclean-compile distclean-generic distclean-libtool \
+	distclean-local distclean-tags distcleancheck distdir \
+	distuninstallcheck dvi dvi-am html html-am info info-am \
+	install install-am install-binPROGRAMS install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags uninstall uninstall-am uninstall-binPROGRAMS
 
 
 distclean-local:
 	-rm -rf autom4te.cache
+	-rm -f *.user
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/Test/TestABGR_VS2008.vcproj b/Test/TestABGR_VS2008.vcproj
index 61d0365..b158a6b 100644
--- a/Test/TestABGR_VS2008.vcproj
+++ b/Test/TestABGR_VS2008.vcproj
@@ -41,7 +41,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.14\include""
+				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.15\include""
 				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS"
 				MinimalRebuild="false"
 				BasicRuntimeChecks="0"
@@ -69,7 +69,7 @@
 				AdditionalDependencies="SDL.lib SDLmain.lib SDL_gfx.lib"
 				OutputFile=".\Debug/TestABGR.exe"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.14\VisualC\SDLmain\Debug";"..\..\SDL-1.2.14\VisualC\SDL\Debug""
+				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.15\VisualC\SDLmain\Debug";"..\..\SDL-1.2.15\VisualC\SDL\Debug""
 				GenerateDebugInformation="true"
 				ProgramDatabaseFile="./Debug\TestABGR.pdb"
 				SubSystem="2"
@@ -125,7 +125,7 @@
 				Name="VCCLCompilerTool"
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
-				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.14\include""
+				AdditionalIncludeDirectories="..\..\sdlgfx;"..\..\SDL-1.2.15\include""
 				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
 				RuntimeLibrary="2"
 				EnableFunctionLevelLinking="false"
@@ -148,7 +148,7 @@
 				Name="VCLinkerTool"
 				AdditionalDependencies="SDL.lib SDLmain.lib SDL_gfx.lib"
 				LinkIncremental="2"
-				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.14\VisualC\SDLmain\Debug";"..\..\SDL-1.2.14\VisualC\SDL\Debug""
+				AdditionalLibraryDirectories="..\..\sdlgfx\Debug;"..\..\SDL-1.2.15\VisualC\SDLmain\Debug";"..\..\SDL-1.2.15\VisualC\SDL\Debug""
 				GenerateDebugInformation="true"
 				SubSystem="2"
 				OptimizeReferences="2"
diff --git a/Test/TestABGR.vcxproj b/Test/TestABGR_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 64%
rename from Test/TestABGR.vcxproj
rename to Test/TestABGR_VS2010.vcxproj
index 451a57e..d29914d
--- a/Test/TestABGR.vcxproj
+++ b/Test/TestABGR_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestABGR</ProjectName>
     <ProjectGuid>{AE22EFD3-7F7D-48C0-AF3D-EF190406BEDC}</ProjectGuid>
     <RootNamespace>TestABGR</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,60 +37,55 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestABGR.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestABGR.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestABGR.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -102,7 +98,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -114,21 +110,12 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestABGR.c" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestFonts.vcxproj b/Test/TestFonts_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 67%
rename from Test/TestFonts.vcxproj
rename to Test/TestFonts_VS2010.vcxproj
index 3047b2d..4b5cffc
--- a/Test/TestFonts.vcxproj
+++ b/Test/TestFonts_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestFonts</ProjectName>
     <ProjectGuid>{AE33EFD3-6F6D-48C0-AF3D-EF190406BEDC}</ProjectGuid>
     <RootNamespace>TestFonts</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,58 +37,61 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestFonts.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestFonts.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestFonts.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
     <PostBuildEvent>
+      <Message>
+      </Message>
       <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
     </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -100,7 +104,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -113,18 +117,16 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       <TargetMachine>NotSet</TargetMachine>
     </Link>
     <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
+      <Message>Copy dependent DLLs</Message>
+      <Command>copy $(<ProjectDir>)..\..\sdlgfx\Debug\SDL_gfx.dll $(<TargetDir>)
+copy $(<ProjectDir>)..\..\SDL-1.2.15\VisualC\SDL\Debug\SDL.dll $(<TargetDir>)</Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestFonts.c" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestFramerate.vcxproj b/Test/TestFramerate_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 65%
rename from Test/TestFramerate.vcxproj
rename to Test/TestFramerate_VS2010.vcxproj
index dd3bafc..b23cb1a
--- a/Test/TestFramerate.vcxproj
+++ b/Test/TestFramerate_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestFramerate</ProjectName>
     <ProjectGuid>{AE22EFD3-6F6D-21C0-AF2D-EF190406BEDC}</ProjectGuid>
     <RootNamespace>TestFramerate</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,58 +37,55 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestFramerate.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestFramerate.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestFramerate.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -100,7 +98,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -112,19 +110,12 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestFramerate.c" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestGfxBlit.vcxproj b/Test/TestGfxBlit_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 65%
rename from Test/TestGfxBlit.vcxproj
rename to Test/TestGfxBlit_VS2010.vcxproj
index 8b3cd13..1de5a0f
--- a/Test/TestGfxBlit.vcxproj
+++ b/Test/TestGfxBlit_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestGfxBlit</ProjectName>
     <ProjectGuid>{AE22EFD3-6F6D-48C0-AF3D-EF112306BEDC}</ProjectGuid>
     <RootNamespace>TestGfxBlit</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,58 +37,55 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestGfxBlit.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestGfxBlit.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestGfxBlit.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -100,7 +98,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -112,19 +110,12 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestGfxBlit.c" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestGfxPrimitives.vcxproj b/Test/TestGfxPrimitives_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 65%
rename from Test/TestGfxPrimitives.vcxproj
rename to Test/TestGfxPrimitives_VS2010.vcxproj
index 1dede41..918fbf0
--- a/Test/TestGfxPrimitives.vcxproj
+++ b/Test/TestGfxPrimitives_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestGfxPrimitives</ProjectName>
     <ProjectGuid>{AE22EFD3-6F6D-48C0-AF3D-EF190406BEDC}</ProjectGuid>
     <RootNamespace>TestGfxPrimitives</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,58 +37,55 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestGfxPrimitives.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestGfxPrimitives.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestGfxPrimitives.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -100,7 +98,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -112,19 +110,12 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestGfxPrimitives.c" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestGfxTexture.vcxproj b/Test/TestGfxTexture_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 69%
rename from Test/TestGfxTexture.vcxproj
rename to Test/TestGfxTexture_VS2010.vcxproj
index f99beb9..491a238
--- a/Test/TestGfxTexture.vcxproj
+++ b/Test/TestGfxTexture_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestGfxTexture</ProjectName>
     <ProjectGuid>{AE22EFD3-6F6D-32C0-AA3D-EF190406BEDC}</ProjectGuid>
     <RootNamespace>TestGfxTexture</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,58 +37,55 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestGfxTexture.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestGfxTexture.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestGfxTexture.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -100,7 +98,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -112,13 +110,6 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestGfxTexture.c" />
@@ -136,7 +127,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </CustomBuild>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestImageFilter.c b/Test/TestImageFilter.c
index 66f4137..8dbc389 100644
--- a/Test/TestImageFilter.c
+++ b/Test/TestImageFilter.c
@@ -3,25 +3,29 @@
 TestImageFilter.c: test program for MMX filter routines
 
 (C) A. Schiffler, 2006, zlib license
+(C) Sylvain Beucler, 2013, zlib license
 
 */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 
 #include "SDL.h"
 
 #ifdef WIN32
 #include <windows.h>
 #include "SDL_imageFilter.h"
-#ifndef bcmp
-#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
 #endif
 #else
 #include "SDL/SDL_imageFilter.h"
 #endif
 
+#define SRC_SIZE 23
+
 int total_count = 0;
 int ok_count = 0;
 
@@ -36,6 +40,7 @@ void setup_src(unsigned char *src1, unsigned char *src2)
 	src1[4]=33;
 	for (i=5; i<14; i++) src1[i]=i;
 	src1[14]=8;
+	for (i=15; i<SRC_SIZE; i++) src1[i]=rand();
 
 	src2[0]=1;
 	src2[1]=3;
@@ -44,27 +49,44 @@ void setup_src(unsigned char *src1, unsigned char *src2)
 	src2[4]=44;
 	for (i=5; i<14; i++) src2[i]=14-i;
 	src2[14]=10;
+	for (i=15; i<SRC_SIZE; i++) src2[i]=src1[i];
 }
 
-void print_result(char *label,unsigned char *src1, unsigned char *src2, unsigned char *dst) 
+void print_result(int mmx, char *label, unsigned char *src1, unsigned char *src2, unsigned char *dst) 
 {
 	char blabel[80];
+	int i;
+	memset((void *)blabel, ' ', 80);
+	blabel[strlen(label)+4]=0;
+
+	printf("\n");
+	printf ("%s   pos   ", blabel);
+	for (i = 0; i < SRC_SIZE; i++)
+		printf("%2d ", i);
+	printf("\n");
+
+	printf ("%s   src1  ", blabel);
+	for (i = 0; i < SRC_SIZE; i++)
+		printf("%02x ", src1[i]);
+	printf("\n");
 
-	memset((void *)blabel,(int)' ',80);
-	blabel[strlen(label)]=0;
-	printf ("%s   pos   %2d %2d %2d %2d %2d %2d %2d %2d %2d .. %2d\n",blabel,0,1,2,3,4,5,6,7,8,14);
-	printf ("%s   src1  %02x %02x %02x %02x %02x %02x %02x %02x %02x .. %02x\n",blabel,src1[0],src1[1],src1[2],src1[3],src1[4],src1[5],src1[6],src1[7],src1[8],src1[14]);
 	if (src2) {
-		printf ("%s   src2  %02x %02x %02x %02x %02x %02x %02x %02x %02x .. %02x\n",blabel,src2[0],src2[1],src2[2],src2[3],src2[4],src2[5],src2[6],src2[7],src2[8],src2[14]);
+		printf ("%s   src2  ", blabel);
+		for (i = 0; i < SRC_SIZE; i++)
+			printf("%02x ", src2[i]);
 	}
-	printf ("%s   dest  %02x %02x %02x %02x %02x %02x %02x %02x %02x .. %02x\n",label, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst[6], dst[7], dst[8],dst[14]);
-	printf ("\n");
+	printf("\n");
+
+	printf ("%s %s   dest  ",mmx?"MMX":" C ",label);
+	for (i = 0; i < SRC_SIZE; i++)
+		printf("%02x ", dst[i]);
+	printf("\n");
 }
 
 void print_compare(unsigned char *dst1, unsigned char *dst2) 
 { 
 	total_count++;
-	if (bcmp(dst1,dst2,15)==0) {
+	if (bcmp(dst1,dst2,SRC_SIZE)==0) {
 		printf ("OK\n");
 		ok_count++;
 	} else {
@@ -79,440 +101,318 @@ void print_line()
 
 void pause()
 {
-	char ch;
-	do {  
-		ch = getchar();  
-		putchar('.');  
-	} while (ch != '\n');
+	char ch;
+	do {
+		ch = getchar();
+		putchar('.');
+	} while (ch != '\n');
 }
 
 /* ----------- main ---------- */
 
 int main(int argc, char *argv[])
 {
-	unsigned char src1[15],src2[15],dstm[15],dstc[15];
-
-	/* SDL_imageFilter Test */
-
-	printf ("TestImageFilter\n\n");
-	printf ("Testing an array of 15 bytes - first 8 bytes should be processed\n");
-	printf ("by MMX or C code, the last 7 bytes only by C code.\n\n");
-
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2); 
-	SDL_imageFilterBitAnd ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15); 
-	print_result ("MMX BitAnd", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2); 
-	SDL_imageFilterBitAnd ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15); 
-	print_result (" C  BitAnd", src1, src2, dstc);
-
-	print_compare(dstm,dstc);
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterBitOr ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX BitOr", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterBitOr ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  BitOr", src1, src2, dstc);
-
-	print_compare(dstm,dstc);
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAdd ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX Add", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAdd ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  Add", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAbsDiff ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX AbsDiff", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAbsDiff ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  AbsDiff", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMean ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX Mean", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMean ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  Mean", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterSub ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX Sub", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterSub ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  Sub", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMult ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX Mult", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMult ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  Mult", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultNor ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("ASM MultNor", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultNor ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  MultNor", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultDivby2 ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX MultDivby2", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultDivby2 ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  MultDivby2", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultDivby4 ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("MMX MultDivby4", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultDivby4 ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  MultDivby4", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterDiv ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstm,15);
-	print_result ("ASM Div", src1, src2, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterDiv ((unsigned char *)src1,(unsigned char *)src2,(unsigned char *)dstc,15);
-	print_result (" C  Div", src1, src2, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterBitNegation ((unsigned char *)src1,(unsigned char *)dstm,15);
-	print_result ("MMX BitNegation", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterBitNegation ((unsigned char *)src1,(unsigned char *)dstc,15);
-	print_result (" C  BitNegation", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAddByte ((unsigned char *)src1,(unsigned char *)dstm,15, 3);
-	print_result ("MMX AddByte(3)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAddByte ((unsigned char *)src1,(unsigned char *)dstc,15, 3);
-	print_result (" C  AddByte(3)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAddByteToHalf ((unsigned char *)src1,(unsigned char *)dstm,15, 3);
-	print_result ("MMX AddByteToHalf(3)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAddByteToHalf ((unsigned char *)src1,(unsigned char *)dstc,15, 3);
-	print_result (" C  AddByteToHalf(3)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterSubByte ((unsigned char *)src1,(unsigned char *)dstm,15, 3);
-	print_result ("MMX SubByte(3)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterSubByte ((unsigned char *)src1,(unsigned char *)dstc,15, 3);
-	print_result (" C  SubByte(3)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftRight ((unsigned char *)src1,(unsigned char *)dstm,15, 1);
-	print_result ("MMX ShiftRight(1)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftRight ((unsigned char *)src1,(unsigned char *)dstc,15, 1);
-	print_result (" C  ShiftRight(1)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultByByte ((unsigned char *)src1,(unsigned char *)dstm,15, 3);
-	print_result ("MMX MultByByte(3)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterMultByByte ((unsigned char *)src1,(unsigned char *)dstc,15, 3);
-	print_result (" C  MultByByte(3)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftRightAndMultByByte ((unsigned char *)src1,(unsigned char *)dstm,15, 1, 3);
-	print_result ("MMX ShiftRightAndMultByByte(1,3)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftRightAndMultByByte ((unsigned char *)src1,(unsigned char *)dstc,15, 1, 3);
-	print_result (" C  ShuftRightAndMultByByte(1,3)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftLeftByte ((unsigned char *)src1,(unsigned char *)dstm,15, 3);
-	print_result ("MMX ShiftLeftByte(3)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftLeftByte ((unsigned char *)src1,(unsigned char *)dstc,15, 3);
-	print_result (" C  ShiftLeftByte(3)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftLeft ((unsigned char *)src1,(unsigned char *)dstm,15, 3);
-	print_result ("MMX ShiftLeft(3)", src1, NULL, dstm);
+	unsigned char src1[SRC_SIZE], src2[SRC_SIZE], dstm[SRC_SIZE], dstc[SRC_SIZE];
+	int size = 2*1024*1024;
+	unsigned char *t1 = (unsigned char *)malloc(size), *t2 = (unsigned char *)malloc(size), *d = (unsigned char *)malloc(size);
+	int i;
 
-	SDL_imageFilterMMXoff();
+	// Interestingly, C tests are about 4x faster
+	// on malloc(size) than on char[size]
 
-	setup_src(src1, src2);
-	SDL_imageFilterShiftLeft ((unsigned char *)src1,(unsigned char *)dstc,15, 3);
-	print_result (" C  ShiftLeft(3)", src1, NULL, dstc);
+	printf("src1:\t%s (%p)\tsrc2:\t%s (%p)\tdstm:\t%s (%p)\tdstc:\t%s (%p)\n",
+		((long long)src1%8) ? "not aligned" : "aligned", src1,
+		((long long)src2%8) ? "not aligned" : "aligned", src2,
+		((long long)dstm%8) ? "not aligned" : "aligned", dstm,
+		((long long)dstc%8) ? "not aligned" : "aligned", dstc);
 
-	print_compare(dstm,dstc); 
-	print_line();
+	printf("t1:\t%s (%p)\tt2:\t%s (%p)\td:\t%s (%p)\n",
+		((long long)t1%8) ? "not aligned" : "aligned", t1,
+		((long long)t2%8) ? "not aligned" : "aligned", t2,
+		((long long) d%8) ? "not aligned" : "aligned",  d);
 
-	SDL_imageFilterMMXon();
+	{
+		/* Initialize to make valgrind happy */
+		srand((unsigned int)time(NULL));
+		for (i = 0; i < size; i++) {
+			/* use more random lower-order bits (int->char) */
+			t1[i] = rand(); t2[i] = rand(); d[i] = rand();
+		}
+	}
 
-	setup_src(src1, src2);
-	SDL_imageFilterBinarizeUsingThreshold ((unsigned char *)src1,(unsigned char *)dstm,15, 2);
-	print_result ("MMX BinarizeUsingThreshold(2)", src1, NULL, dstm);
+	SDL_Init(SDL_INIT_TIMER);
 
-	SDL_imageFilterMMXoff();
+	/* SDL_imageFilter Test */
 
-	setup_src(src1, src2);
-	SDL_imageFilterBinarizeUsingThreshold ((unsigned char *)src1,(unsigned char *)dstc,15, 2);
-	print_result (" C  BinarizeUsingThreshold(2)", src1, NULL, dstc);
+	printf ("TestImageFilter\n\n");
+	printf ("Testing an array of 23 bytes - first 16 bytes should be processed\n");
+	printf ("by MMX or C code, the last 7 bytes only by C code.\n\n");
 
-	print_compare(dstm,dstc); 
 	print_line();
 
-	SDL_imageFilterMMXon();
 
-	setup_src(src1, src2);
-	SDL_imageFilterClipToRange ((unsigned char *)src1,(unsigned char *)dstm,15, 1,7);
-	print_result ("MMX ClipToRange(1,7)", src1, NULL, dstm);
+#define	TEST_C   0
+#define	TEST_MMX 1
+	{
+#define FUNC(f) { #f, SDL_imageFilter ## f }
+		struct func {
+			char* name;
+			int (*f)(unsigned char*, unsigned char*, unsigned char*, unsigned int);
+		};
+		struct func funcs[] = {
+			FUNC(BitAnd),
+			FUNC(BitOr),
+			FUNC(Add),
+			FUNC(AbsDiff),
+			FUNC(Mean),
+			FUNC(Sub),
+			FUNC(Mult),
+			FUNC(MultNor),
+			FUNC(MultDivby2),
+			FUNC(MultDivby4),
+			FUNC(Div),
+		};
+
+		int k;
+		for (k = 0; k < sizeof(funcs)/sizeof(struct func); k++) {
+			Uint32 start;
+			int i;
+
+			setup_src(src1, src2);
+
+			SDL_imageFilterMMXon();
+			funcs[k].f(src1, src2, dstm, SRC_SIZE);
+			print_result(TEST_MMX, funcs[k].name, src1, src2, dstm);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, t2, d, size);
+			}
+			printf("MMX %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			SDL_imageFilterMMXoff();
+			funcs[k].f(src1, src2, dstc, SRC_SIZE);
+			print_result(TEST_C, funcs[k].name, src1, src2, dstc);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, t2, d, size);
+			}
+			printf(" C  %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			print_compare(dstm,dstc);
+			print_line();
+		}
+	}
 
-	SDL_imageFilterMMXoff();
+	{
+		Uint32 start;
+		int i;
+		char call[1024];
+		sprintf(call, "BitNegation");
+
+		setup_src(src1, src2);
+
+		SDL_imageFilterMMXon();
+		SDL_imageFilterBitNegation(src1, dstm, SRC_SIZE);
+		print_result(TEST_MMX, call, src1, NULL, dstm);
+		start = SDL_GetTicks();
+		for (i = 0; i < 50; i++) {
+			SDL_imageFilterBitNegation(t1, d, size);
+		}
+		printf("MMX %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+		SDL_imageFilterMMXoff();
+		SDL_imageFilterBitNegation(src1, dstc, SRC_SIZE);
+		print_result(TEST_C, call, src1, NULL, dstc);
+		start = SDL_GetTicks();
+		for (i = 0; i < 50; i++) {
+			SDL_imageFilterBitNegation(t1, d, size);
+		}
+		printf(" C  %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+		print_compare(dstm,dstc);
+		print_line();
+	}
 
-	setup_src(src1, src2);
-	SDL_imageFilterClipToRange ((unsigned char *)src1,(unsigned char *)dstc,15, 1,7);
-	print_result (" C  ClipToRange(1,7)", src1, NULL, dstc);
 
-	print_compare(dstm,dstc); 
-	print_line();
+	{
+#undef FUNC
+#define FUNC(f, c) { #f, SDL_imageFilter ## f, c }
+		struct func {
+			char* name;
+			int (*f)(unsigned char*, unsigned char*, unsigned int, unsigned char);
+			unsigned char arg;
+		};
+		struct func funcs[] = {
+			FUNC(AddByte,                3),
+			FUNC(AddByteToHalf,          3),
+			FUNC(SubByte,                3),
+			FUNC(ShiftRight,             1),
+			FUNC(ShiftRightUint,         4),
+			FUNC(MultByByte,             3),
+			FUNC(ShiftLeftByte,          3),
+			FUNC(ShiftLeft,              3),
+			FUNC(ShiftLeftUint,          4),
+			FUNC(BinarizeUsingThreshold, 9),
+		};
+
+		int k;
+		for (k = 0; k < sizeof(funcs)/sizeof(struct func); k++) {
+			Uint32 start;
+			int i;
+			char call[1024];
+			sprintf(call, "%s(%u)", funcs[k].name, funcs[k].arg);
+
+			setup_src(src1, src2);
+
+			SDL_imageFilterMMXon();
+			funcs[k].f(src1, dstm, SRC_SIZE, funcs[k].arg);
+			print_result(TEST_MMX, call, src1, NULL, dstm);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, d, size, funcs[k].arg);
+			}
+			printf("MMX %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			SDL_imageFilterMMXoff();
+			funcs[k].f(src1, dstc, SRC_SIZE, funcs[k].arg);
+			print_result(TEST_C, call, src1, NULL, dstc);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, d, size, funcs[k].arg);
+			}
+			printf(" C  %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			print_compare(dstm,dstc);
+			print_line();
+		}
+	}
 
-	SDL_imageFilterMMXon();
 
-	setup_src(src1, src2);
-	SDL_imageFilterNormalizeLinear ((unsigned char *)src1,(unsigned char *)dstm,15, 0,33,0,255);
-	print_result ("MMX NormalizeLinear(0,33,0,255)", src1, NULL, dstm);
+	{
+#undef FUNC
+#define FUNC(f, c1, c2) { #f, SDL_imageFilter ## f, c1, c2 }
+		struct func {
+			char* name;
+			int (*f)(unsigned char*, unsigned char*, unsigned int, unsigned char, unsigned char);
+			unsigned char arg1, arg2;
+		};
+		struct func funcs[] = {
+			FUNC(ShiftRightAndMultByByte, 1, 3),
+			FUNC(ClipToRange, 3, 8),
+		};
+
+		int k;
+		for (k = 0; k < sizeof(funcs)/sizeof(struct func); k++) {
+			Uint32 start;
+			int i;
+			char call[1024];
+			sprintf(call, "%s(%u,%u)", funcs[k].name, funcs[k].arg1, funcs[k].arg2);
+
+			setup_src(src1, src2);
+
+			SDL_imageFilterMMXon();
+			funcs[k].f(src1, dstm, SRC_SIZE, funcs[k].arg1, funcs[k].arg2);
+			print_result(TEST_MMX, call, src1, NULL, dstm);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, d, size, funcs[k].arg1, funcs[k].arg2);
+			}
+			printf("MMX %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			SDL_imageFilterMMXoff();
+			funcs[k].f(src1, dstc, SRC_SIZE, funcs[k].arg1, funcs[k].arg2);
+			print_result(TEST_C, call, src1, NULL, dstc);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, d, size, funcs[k].arg1, funcs[k].arg2);
+			}
+			printf(" C  %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			print_compare(dstm,dstc);
+			print_line();
+		}
+	}
 
-	SDL_imageFilterMMXoff();
 
-	setup_src(src1, src2);
-	SDL_imageFilterNormalizeLinear ((unsigned char *)src1,(unsigned char *)dstc,15, 0,33,0,255);
-	print_result (" C  NormalizeLinear(0,33,0,255)", src1, NULL, dstc);
+	{
+		Uint32 start;
+		int i;
+		char call[1024];
+		sprintf(call, "NormalizeLinear(0,33,0,255)");
+
+		setup_src(src1, src2);
+
+		SDL_imageFilterMMXon();
+		SDL_imageFilterNormalizeLinear(src1, dstm, SRC_SIZE, 0,33, 0,255);
+		print_result(TEST_MMX, call, src1, NULL, dstm);
+		start = SDL_GetTicks();
+		for (i = 0; i < 50; i++) {
+			SDL_imageFilterNormalizeLinear(t1, d, size, 0,33, 0,255);
+		}
+		printf("MMX %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+		SDL_imageFilterMMXoff();
+		SDL_imageFilterNormalizeLinear(src1, dstc, SRC_SIZE, 0,33, 0,255);
+		print_result(TEST_C, call, src1, NULL, dstc);
+		start = SDL_GetTicks();
+		for (i = 0; i < 50; i++) {
+			SDL_imageFilterNormalizeLinear(t1, d, size, 0,33, 0,255);
+		}
+		printf(" C  %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+		print_compare(dstm,dstc);
+		print_line();
+	}
 
-	print_compare(dstm,dstc); 
-	print_line();
 
 	/* Uint functions */
+	/* Disabled, since broken *//* ??? */
+	{
+#undef FUNC
+#define FUNC(f, c) { #f, SDL_imageFilter ## f, c }
+		struct func {
+			char* name;
+			int (*f)(unsigned char*, unsigned char*, unsigned int, unsigned int);
+			unsigned int arg;
+		};
+		struct func funcs[] = {
+			FUNC(AddUint,       0x01020304),
+			FUNC(SubUint,       0x01020304),
+		};
+
+		int k;
+		for (k = 0; k < sizeof(funcs)/sizeof(struct func); k++) {
+			Uint32 start;
+			int i;
+			char call[1024];
+			sprintf(call, "%s(%u)", funcs[k].name, funcs[k].arg);
+
+			setup_src(src1, src2);
+
+			SDL_imageFilterMMXon();
+			funcs[k].f(src1, dstm, SRC_SIZE, funcs[k].arg);
+			print_result(TEST_MMX, call, src1, NULL, dstm);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, d, size, funcs[k].arg);
+			}
+			printf("MMX %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			SDL_imageFilterMMXoff();
+			funcs[k].f(src1, dstc, SRC_SIZE, funcs[k].arg);
+			print_result(TEST_C, call, src1, NULL, dstc);
+			start = SDL_GetTicks();
+			for (i = 0; i < 50; i++) {
+				funcs[k].f(t1, d, size, funcs[k].arg);
+			}
+			printf(" C  %dx%dk: %dms\n", i, size/1024, SDL_GetTicks() - start);
+
+			print_compare(dstm,dstc);
+			print_line();
+		}
+	}
 
-	/* Disabled, since broken */
-	/*
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAddUint ((unsigned char *)src1,(unsigned char *)dstm,15, 0x01020304);
-	print_result ("MMX AddUint(0x01020304)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterAddUint ((unsigned char *)src1,(unsigned char *)dstc,15, 0x01020304);
-	print_result (" C  AddUint(0x01020304)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterSubUint ((unsigned char *)src1,(unsigned char *)dstm,15, 0x01020304);
-	print_result ("MMX SubUint(0x01020304)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterSubUint ((unsigned char *)src1,(unsigned char *)dstc,15, 0x01020304);
-	print_result (" C  SubUint(0x01020304)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftRightUint ((unsigned char *)src1,(unsigned char *)dstm,15, 4);
-	print_result ("MMX ShiftRightUint(4)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftRightUint ((unsigned char *)src1,(unsigned char *)dstc,15, 4);
-	print_result (" C  ShiftRightUint(4)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	SDL_imageFilterMMXon();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftLeftUint ((unsigned char *)src1,(unsigned char *)dstm,15, 4);
-	print_result ("MMX ShiftLeftUint(4)", src1, NULL, dstm);
-
-	SDL_imageFilterMMXoff();
-
-	setup_src(src1, src2);
-	SDL_imageFilterShiftLeftUint ((unsigned char *)src1,(unsigned char *)dstc,15, 4);
-	print_result (" C  ShiftLeftUint(4)", src1, NULL, dstc);
-
-	print_compare(dstm,dstc); 
-	print_line();
-
-	*/
 
-#ifdef USE_MMX
 	SDL_imageFilterMMXon();
 	if (SDL_imageFilterMMXdetect())
 	{
@@ -522,17 +422,17 @@ int main(int argc, char *argv[])
 	{
 		printf("MMX was NOT detected\n\n");
 	}
-#else
-	printf("MMX support disabled in SDL_gfx.\n\n");
-#endif
 
 	printf ("Result: %i of %i passed OK.\n", ok_count, total_count);
 
 #ifdef WIN32 
-	printf("Press Enter to continue ...");
+	printf("Press Enter to continue ...");
 	pause();
 #endif
 
+	SDL_Quit();
+	free(d);
+	free(t2);
+	free(t1);
 	exit(0);
 }
-
diff --git a/Test/TestImageFilter.vcxproj b/Test/TestImageFilter_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 65%
rename from Test/TestImageFilter.vcxproj
rename to Test/TestImageFilter_VS2010.vcxproj
index 38da752..d799f8b
--- a/Test/TestImageFilter.vcxproj
+++ b/Test/TestImageFilter_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestImageFilter</ProjectName>
     <ProjectGuid>{AE22AFD3-6F6D-48C0-AF3D-EF190406AAAA}</ProjectGuid>
     <RootNamespace>TestImageFilter</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,58 +37,55 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;USE_MMX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestImageFilter.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestImageFilter.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestImageFilter.pdb</ProgramDatabaseFile>
       <SubSystem>Console</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -100,7 +98,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -112,19 +110,12 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestImageFilter.c" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestRotozoom.vcxproj b/Test/TestRotozoom_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 72%
rename from Test/TestRotozoom.vcxproj
rename to Test/TestRotozoom_VS2010.vcxproj
index 4ff607d..d3c402e
--- a/Test/TestRotozoom.vcxproj
+++ b/Test/TestRotozoom_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestRotozoom</ProjectName>
     <ProjectGuid>{AE98EFD3-6F6D-48C0-AF3D-EF560406BEDC}</ProjectGuid>
     <RootNamespace>TestRotozoom</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,59 +37,58 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestRotozoom.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestRotozoom.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestRotozoom.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
     <PostBuildEvent>
-      <Command>
-copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
+      <Command>copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"</Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -101,7 +101,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -113,14 +113,6 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestRotozoom.c" />
@@ -156,7 +148,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </CustomBuildStep>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/TestShrink.vcxproj b/Test/TestShrink_VS2010.vcxproj
old mode 100644
new mode 100755
similarity index 69%
rename from Test/TestShrink.vcxproj
rename to Test/TestShrink_VS2010.vcxproj
index aa4c91b..6c262bc
--- a/Test/TestShrink.vcxproj
+++ b/Test/TestShrink_VS2010.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <ProjectName>TestShrink</ProjectName>
     <ProjectGuid>{AE22EFD3-6161-48C0-123D-EF190406BEDC}</ProjectGuid>
     <RootNamespace>TestShrink</RootNamespace>
     <Keyword>Win32Proj</Keyword>
@@ -36,59 +37,58 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\Debug\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>false</MinimalRebuild>
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <PrecompiledHeaderOutputFile>$(IntDir)$(TargetName).pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>$(TargetName)\$(Platform)\$(Configuration)\</AssemblerListingLocation>
-      <ObjectFileName>$(TargetName)\$(Platform)\$(Configuration)\</ObjectFileName>
-      <ProgramDataBaseFileName>$(TargetName)\$(Platform)\$(Configuration)\</ProgramDataBaseFileName>
+      <PrecompiledHeaderOutputFile>.\Debug/TestShrink.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\Debug/</AssemblerListingLocation>
+      <ObjectFileName>.\Debug/</ObjectFileName>
+      <ProgramDataBaseFileName>.\Debug/</ProgramDataBaseFileName>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <OutputFile>.\Debug/TestShrink.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <ProgramDatabaseFile>$(TargetDir)$(TargetName).pdb</ProgramDatabaseFile>
+      <ProgramDatabaseFile>./Debug\TestShrink.pdb</ProgramDatabaseFile>
       <SubSystem>Windows</SubSystem>
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
-      <IgnoreSpecificDefaultLibraries>msvcrtd.lib;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
-      <AdditionalOptions>/MACHINE:X86 %(AdditionalOptions)</AdditionalOptions>
     </Link>
     <PostBuildEvent>
-      <Command>
-copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
+      <Command>copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"</Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL_gfx;..\..\SDL-1.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\sdlgfx;..\..\SDL-1.2.15\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>false</FunctionLevelLinking>
@@ -101,7 +101,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </ClCompile>
     <Link>
       <AdditionalDependencies>SDL.lib;SDLmain.lib;SDL_gfx.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\..\sdlgfx\$(Platform)\$(Configuration);..\..\SDL_gfx\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDLmain\$(Platform)\$(Configuration);..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\..\sdlgfx\Debug;..\..\SDL-1.2.15\VisualC\SDLmain\Debug;..\..\SDL-1.2.15\VisualC\SDL\Debug;..\..\SDL-1.2.15\VisualC\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -113,14 +113,6 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
       </DataExecutionPrevention>
       <TargetMachine>NotSet</TargetMachine>
     </Link>
-    <PostBuildEvent>
-      <Command>
-copy /y "$(ProjectDir)\*.bmp" "$(TargetDir)"
-copy /y "..\..\SDL-1.2\VisualC\SDL\$(Platform)\$(Configuration)\SDL.dll" "$(TargetDir)\SDL.dll"
-copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
-      </Command>
-      <Message>Copy SDL and SDL_gfx DLLs</Message>
-    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="TestShrink.c" />
@@ -144,7 +136,7 @@ copy /y "..\$(Platform)\$(Configuration)\SDL_gfx.dll" "$(TargetDir)\SDL_gfx.dll"
     </CustomBuildStep>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\SDL_gfx.vcxproj">
+    <ProjectReference Include="..\SDL_gfx_VS2010.vcxproj">
       <Project>{ae22efd3-6e6d-48c0-af3d-ef190406bedc}</Project>
       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
     </ProjectReference>
diff --git a/Test/aclocal.m4 b/Test/aclocal.m4
index 09b33a2..64820a1 100644
--- a/Test/aclocal.m4
+++ b/Test/aclocal.m4
@@ -1,7 +1,8 @@
-# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.11.6 -*- Autoconf -*-
 
 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2007, 2008, 2009  Free Software Foundation, Inc.
+# 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+# Inc.
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -13,8 +14,8 @@
 
 m4_ifndef([AC_AUTOCONF_VERSION],
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.68],,
-[m4_warning([this file was generated for autoconf 2.68.
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
 You have another version of autoconf.  It may work, but is not guaranteed to.
 If you have problems, you may need to regenerate the build system entirely.
 To do so, use the procedure documented by the package, typically `autoreconf'.])])
@@ -2525,17 +2526,6 @@ freebsd* | dragonfly*)
   esac
   ;;
 
-gnu*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
 haiku*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
@@ -2652,7 +2642,7 @@ linux*oldld* | linux*aout* | linux*coff*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
@@ -2697,6 +2687,18 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu)
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
+netbsdelf*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='NetBSD ld.elf_so'
+  ;;
+
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -3256,10 +3258,6 @@ freebsd* | dragonfly*)
   fi
   ;;
 
-gnu*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
 haiku*)
   lt_cv_deplibs_check_method=pass_all
   ;;
@@ -3298,11 +3296,11 @@ irix5* | irix6* | nonstopux*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
-netbsd*)
+netbsd* | netbsdelf*-gnu)
   if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
     lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
   else
@@ -4050,7 +4048,7 @@ m4_if([$1], [CXX], [
 	    ;;
 	esac
 	;;
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
 	case $cc_basename in
 	  KCC*)
 	    # KAI C++ Compiler
@@ -4114,7 +4112,7 @@ m4_if([$1], [CXX], [
 	    ;;
 	esac
 	;;
-      netbsd*)
+      netbsd* | netbsdelf*-gnu)
 	;;
       *qnx* | *nto*)
         # QNX uses GNU C++, but need to define -shared option too, otherwise
@@ -4349,7 +4347,7 @@ m4_if([$1], [CXX], [
       _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
       ;;
 
-    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+    linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
       case $cc_basename in
       # old Intel for x86_64 which still supported -KPIC.
       ecc*)
@@ -4591,6 +4589,9 @@ m4_if([$1], [CXX], [
       ;;
     esac
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    _LT_TAGVAR(link_all_deplibs, $1)=no
+    ;;
   *)
     _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
     ;;
@@ -4653,6 +4654,9 @@ dnl Note also adjust exclude_expsyms for C++ above.
   openbsd*)
     with_gnu_ld=no
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    _LT_TAGVAR(link_all_deplibs, $1)=no
+    ;;
   esac
 
   _LT_TAGVAR(ld_shlibs, $1)=yes
@@ -4874,7 +4878,7 @@ _LT_EOF
       fi
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
 	wlarc=
@@ -5051,6 +5055,7 @@ _LT_EOF
 	if test "$aix_use_runtimelinking" = yes; then
 	  shared_flag="$shared_flag "'${wl}-G'
 	fi
+	_LT_TAGVAR(link_all_deplibs, $1)=no
       else
 	# not using gcc
 	if test "$host_cpu" = ia64; then
@@ -5355,7 +5360,7 @@ _LT_EOF
       _LT_TAGVAR(link_all_deplibs, $1)=yes
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
       else
@@ -6231,9 +6236,6 @@ if test "$_lt_caught_CXX_error" != yes; then
         _LT_TAGVAR(ld_shlibs, $1)=yes
         ;;
 
-      gnu*)
-        ;;
-
       haiku*)
         _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
         _LT_TAGVAR(link_all_deplibs, $1)=yes
@@ -6395,7 +6397,7 @@ if test "$_lt_caught_CXX_error" != yes; then
         _LT_TAGVAR(inherit_rpath, $1)=yes
         ;;
 
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
         case $cc_basename in
           KCC*)
 	    # Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -8606,12 +8608,15 @@ m4_ifndef([_LT_PROG_F77],		[AC_DEFUN([_LT_PROG_F77])])
 m4_ifndef([_LT_PROG_FC],		[AC_DEFUN([_LT_PROG_FC])])
 m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
 
-# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008, 2011 Free Software
+# Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_AUTOMAKE_VERSION(VERSION)
 # ----------------------------
 # Automake X.Y traces this macro to ensure aclocal.m4 has been
@@ -8621,7 +8626,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
 [am__api_version='1.11'
 dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
 dnl require some minimum version.  Point them to the right macro.
-m4_if([$1], [1.11.1], [],
+m4_if([$1], [1.11.6], [],
       [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
 ])
 
@@ -8637,19 +8642,21 @@ m4_define([_AM_AUTOCONF_VERSION], [])
 # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
 # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
 AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.11.1])dnl
+[AM_AUTOMAKE_VERSION([1.11.6])dnl
 m4_ifndef([AC_AUTOCONF_VERSION],
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
 _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
 
 # AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
 
-# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
 # $ac_aux_dir to `$srcdir/foo'.  In other projects, it is set to
 # `$srcdir', `$srcdir/..', or `$srcdir/../..'.
@@ -8731,14 +8738,14 @@ AC_CONFIG_COMMANDS_PRE(
 Usually this means the macro was only invoked conditionally.]])
 fi])])
 
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009
-# Free Software Foundation, Inc.
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
-# serial 10
+# serial 12
 
 # There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
 # written in clear, in which case automake, when reading aclocal.m4,
@@ -8778,6 +8785,7 @@ AC_CACHE_CHECK([dependency style of $depcc],
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -8842,7 +8850,7 @@ AC_CACHE_CHECK([dependency style of $depcc],
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -8907,10 +8915,13 @@ AC_DEFUN([AM_DEP_TRACK],
 if test "x$enable_dependency_tracking" != xno; then
   am_depcomp="$ac_aux_dir/depcomp"
   AMDEPBACKSLASH='\'
+  am__nodep='_no'
 fi
 AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
 AC_SUBST([AMDEPBACKSLASH])dnl
 _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
+AC_SUBST([am__nodep])dnl
+_AM_SUBST_NOTMAKE([am__nodep])dnl
 ])
 
 # Generate code to set up dependency tracking.              -*- Autoconf -*-
@@ -9132,12 +9143,15 @@ for _am_header in $config_headers :; do
 done
 echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
 
-# Copyright (C) 2001, 2003, 2005, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2008, 2011 Free Software Foundation,
+# Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_PROG_INSTALL_SH
 # ------------------
 # Define $install_sh.
@@ -9269,12 +9283,15 @@ else
 fi
 ])
 
-# Copyright (C) 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+# Copyright (C) 2003, 2004, 2005, 2006, 2011 Free Software Foundation,
+# Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_PROG_MKDIR_P
 # ---------------
 # Check for `mkdir -p'.
@@ -9297,13 +9314,14 @@ esac
 
 # Helper functions for option handling.                     -*- Autoconf -*-
 
-# Copyright (C) 2001, 2002, 2003, 2005, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2002, 2003, 2005, 2008, 2010 Free Software
+# Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
-# serial 4
+# serial 5
 
 # _AM_MANGLE_OPTION(NAME)
 # -----------------------
@@ -9311,13 +9329,13 @@ AC_DEFUN([_AM_MANGLE_OPTION],
 [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
 
 # _AM_SET_OPTION(NAME)
-# ------------------------------
+# --------------------
 # Set option NAME.  Presently that only means defining a flag for this option.
 AC_DEFUN([_AM_SET_OPTION],
 [m4_define(_AM_MANGLE_OPTION([$1]), 1)])
 
 # _AM_SET_OPTIONS(OPTIONS)
-# ----------------------------------
+# ------------------------
 # OPTIONS is a space-separated list of Automake options.
 AC_DEFUN([_AM_SET_OPTIONS],
 [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
@@ -9393,12 +9411,14 @@ Check your system clock])
 fi
 AC_MSG_RESULT(yes)])
 
-# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_PROG_INSTALL_STRIP
 # ---------------------
 # One issue with vendor `install' (even GNU) is that you can't
@@ -9421,13 +9441,13 @@ fi
 INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
 AC_SUBST([INSTALL_STRIP_PROGRAM])])
 
-# Copyright (C) 2006, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2006, 2008, 2010 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
-# serial 2
+# serial 3
 
 # _AM_SUBST_NOTMAKE(VARIABLE)
 # ---------------------------
@@ -9436,13 +9456,13 @@ AC_SUBST([INSTALL_STRIP_PROGRAM])])
 AC_DEFUN([_AM_SUBST_NOTMAKE])
 
 # AM_SUBST_NOTMAKE(VARIABLE)
-# ---------------------------
+# --------------------------
 # Public sister of _AM_SUBST_NOTMAKE.
 AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
 
 # Check how to create a tarball.                            -*- Autoconf -*-
 
-# Copyright (C) 2004, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2004, 2005, 2012 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
@@ -9464,10 +9484,11 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
 # a tarball read from stdin.
 #     $(am__untar) < result.tar
 AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility.
-AM_MISSING_PROG([AMTAR], [tar])
+[# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
 m4_if([$1], [v7],
-     [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
+     [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
      [m4_case([$1], [ustar],, [pax],,
               [m4_fatal([Unknown tar format])])
 AC_MSG_CHECKING([how to create a $1 tar archive])
diff --git a/Test/config.guess b/Test/config.guess
index dc84c68..d622a44 100755
--- a/Test/config.guess
+++ b/Test/config.guess
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2009-11-20'
+timestamp='2012-02-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2009-11-20'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -56,8 +54,9 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
 	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
 	# switched to ELF, *-*-netbsd* would select the old
 	# object file format.  This provides both forward
@@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		fi
 		;;
 	    *)
-	        os=netbsd
+		os=netbsd
 		;;
 	esac
 	# The OS release
@@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit ;;
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
@@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo s390-ibm-zvmoe
 	exit ;;
     *:OS400:*:*)
-        echo powerpc-ibm-os400
+	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
@@ -394,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit ;;
+	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
 	exit ;;
@@ -480,8 +482,8 @@ EOF
 	echo m88k-motorola-sysv3
 	exit ;;
     AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
 	then
 	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -494,7 +496,7 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit ;;
+	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
 	exit ;;
@@ -551,7 +553,7 @@ EOF
 		echo rs6000-ibm-aix3.2
 	fi
 	exit ;;
-    *:AIX:*:[456])
+    *:AIX:*:[4567])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -594,52 +596,52 @@ EOF
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
 			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
+			esac ;;
+		    esac
 		fi
 		if [ "${HP_ARCH}" = "" ]; then
 		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
+		    sed 's/^		//' << EOF >$dummy.c
 
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
 
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
 
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
 EOF
 		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -730,22 +732,22 @@ EOF
 	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit ;;
+	exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit ;;
+	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit ;;
+	exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit ;;
+	exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit ;;
+	exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
@@ -769,14 +771,14 @@ EOF
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -788,13 +790,12 @@ EOF
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
 	exit ;;
     *:FreeBSD:*:*)
-	case ${UNAME_MACHINE} in
-	    pc98)
-		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
 	    amd64)
 		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	    *)
-		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	esac
 	exit ;;
     i*:CYGWIN*:*)
@@ -803,15 +804,18 @@ EOF
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
+    i*:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
     i*:windows32*:*)
-    	# uname -m includes "-pc" on this system.
-    	echo ${UNAME_MACHINE}-mingw32
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
 	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
 	exit ;;
     *:Interix*:*)
-    	case ${UNAME_MACHINE} in
+	case ${UNAME_MACHINE} in
 	    x86)
 		echo i586-pc-interix${UNAME_RELEASE}
 		exit ;;
@@ -857,6 +861,13 @@ EOF
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     alpha:Linux:*:*)
 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
@@ -866,7 +877,7 @@ EOF
 	  EV6)   UNAME_MACHINE=alphaev6 ;;
 	  EV67)  UNAME_MACHINE=alphaev67 ;;
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
+	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
 	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
@@ -878,20 +889,29 @@ EOF
 	then
 	    echo ${UNAME_MACHINE}-unknown-linux-gnu
 	else
-	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     cris:Linux:*:*)
-	echo cris-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     crisv32:Linux:*:*)
-	echo crisv32-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     frv:Linux:*:*)
-    	echo frv-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    hexagon:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:Linux:*:*)
 	LIBC=gnu
@@ -933,7 +953,7 @@ EOF
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
     or32:Linux:*:*)
-	echo or32-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     padre:Linux:*:*)
 	echo sparc-unknown-linux-gnu
@@ -959,7 +979,7 @@ EOF
 	echo ${UNAME_MACHINE}-ibm-linux
 	exit ;;
     sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     sh*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -967,14 +987,17 @@ EOF
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     vax:Linux:*:*)
 	echo ${UNAME_MACHINE}-dec-linux-gnu
 	exit ;;
     x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     xtensa*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -983,11 +1006,11 @@ EOF
 	echo i386-sequent-sysv4
 	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
+	# Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
 	exit ;;
     i*86:OS/2:*:*)
@@ -1019,7 +1042,7 @@ EOF
 	fi
 	exit ;;
     i*86:*:5:[678]*)
-    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
@@ -1047,13 +1070,13 @@ EOF
 	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i586.
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configury will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
-        exit ;;
+	exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
 	exit ;;
@@ -1088,8 +1111,8 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
 	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && { echo i486-ncr-sysv4; exit; } ;;
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
@@ -1132,10 +1155,10 @@ EOF
 		echo ns32k-sni-sysv
 	fi
 	exit ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel at ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel at ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes at openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
@@ -1161,11 +1184,11 @@ EOF
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
+		echo mips-nec-sysv${UNAME_RELEASE}
 	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit ;;
+	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
 	exit ;;
@@ -1230,6 +1253,9 @@ EOF
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
     NSE-?:NONSTOP_KERNEL:*:*)
 	echo nse-tandem-nsk${UNAME_RELEASE}
 	exit ;;
@@ -1275,13 +1301,13 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux${UNAME_RELEASE}
 	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
 	exit ;;
     *:*VMS:*:*)
-    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
 	case "${UNAME_MACHINE}" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
@@ -1299,6 +1325,9 @@ EOF
     i*86:AROS:*:*)
 	echo ${UNAME_MACHINE}-pc-aros
 	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1321,11 +1350,11 @@ main ()
 #include <sys/param.h>
   printf ("m68k-sony-newsos%s\n",
 #ifdef NEWSOS4
-          "4"
+	"4"
 #else
-	  ""
+	""
 #endif
-         ); exit (0);
+	); exit (0);
 #endif
 #endif
 
diff --git a/Test/config.sub b/Test/config.sub
index 2a55a50..6205f84 100755
--- a/Test/config.sub
+++ b/Test/config.sub
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Configuration validation subroutine script.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2009-11-20'
+timestamp='2012-04-18'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -21,9 +21,7 @@ timestamp='2009-11-20'
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -75,8 +73,9 @@ Report bugs and patches to <config-patches at gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -123,13 +122,18 @@ esac
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
-  uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
   kopensolaris*-gnu* | \
   storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
   *)
     basic_machine=`echo $1 | sed 's/-[^-]*$//'`
     if [ $basic_machine != $1 ]
@@ -156,8 +160,8 @@ case $os in
 		os=
 		basic_machine=$1
 		;;
-        -bluegene*)
-	        os=-cnk
+	-bluegene*)
+		os=-cnk
 		;;
 	-sim | -cisco | -oki | -wec | -winbond)
 		os=
@@ -173,10 +177,10 @@ case $os in
 		os=-chorusos
 		basic_machine=$1
 		;;
- 	-chorusrdb)
- 		os=-chorusrdb
+	-chorusrdb)
+		os=-chorusrdb
 		basic_machine=$1
- 		;;
+		;;
 	-hiux*)
 		os=-hiuxwe2
 		;;
@@ -221,6 +225,12 @@ case $os in
 	-isc*)
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
 		;;
+	-lynx*178)
+		os=-lynxos178
+		;;
+	-lynx*5)
+		os=-lynxos5
+		;;
 	-lynx*)
 		os=-lynxos
 		;;
@@ -245,17 +255,22 @@ case $basic_machine in
 	# Some are omitted here because they have special meanings below.
 	1750a | 580 \
 	| a29k \
+	| aarch64 | aarch64_be \
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+        | be32 | be64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
+	| epiphany \
 	| fido | fr30 | frv \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
+	| le32 | le64 \
 	| lm32 \
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
 	| maxq | mb | microblaze | mcore | mep | metag \
@@ -281,29 +296,39 @@ case $basic_machine in
 	| moxie \
 	| mt \
 	| msp430 \
+	| nds32 | nds32le | nds32be \
 	| nios | nios2 \
 	| ns16k | ns32k \
+	| open8 \
 	| or32 \
 	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
 	| pyramid \
-	| rx \
+	| rl78 | rx \
 	| score \
 	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
 	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
 	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
-	| spu | strongarm \
-	| tahoe | thumb | tic4x | tic80 | tron \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
 	| ubicom32 \
-	| v850 | v850e \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
 	| we32k \
-	| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
+	| x86 | xc16x | xstormy16 | xtensa \
 	| z8k | z80)
 		basic_machine=$basic_machine-unknown
 		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | picochip)
-		# Motorola 68HC11/12.
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
 		;;
@@ -313,6 +338,21 @@ case $basic_machine in
 		basic_machine=mt-unknown
 		;;
 
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
+
 	# We use `pc' rather than `unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
@@ -327,21 +367,25 @@ case $basic_machine in
 	# Recognize the basic CPU types with company name.
 	580-* \
 	| a29k-* \
+	| aarch64-* | aarch64_be-* \
 	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
 	| avr-* | avr32-* \
+	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
 	| clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
 	| elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
+	| le32-* | le64-* \
 	| lm32-* \
 	| m32c-* | m32r-* | m32rle-* \
 	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
@@ -367,25 +411,29 @@ case $basic_machine in
 	| mmix-* \
 	| mt-* \
 	| msp430-* \
+	| nds32-* | nds32le-* | nds32be-* \
 	| nios-* | nios2-* \
 	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
 	| pyramid-* \
-	| romp-* | rs6000-* | rx-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
 	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
 	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
 	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
-	| tahoe-* | thumb-* \
-	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| tahoe-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
 	| tron-* \
 	| ubicom32-* \
-	| v850-* | v850e-* | vax-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
 	| we32k-* \
-	| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
 	| xstormy16-* | xtensa*-* \
 	| ymp-* \
 	| z8k-* | z80-*)
@@ -410,7 +458,7 @@ case $basic_machine in
 		basic_machine=a29k-amd
 		os=-udi
 		;;
-    	abacus)
+	abacus)
 		basic_machine=abacus-unknown
 		;;
 	adobe68k)
@@ -480,11 +528,20 @@ case $basic_machine in
 		basic_machine=powerpc-ibm
 		os=-cnk
 		;;
+	c54x-*)
+		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	c90)
 		basic_machine=c90-cray
 		os=-unicos
 		;;
-        cegcc)
+	cegcc)
 		basic_machine=arm-unknown
 		os=-cegcc
 		;;
@@ -516,7 +573,7 @@ case $basic_machine in
 		basic_machine=craynv-cray
 		os=-unicosmp
 		;;
-	cr16)
+	cr16 | cr16-*)
 		basic_machine=cr16-unknown
 		os=-elf
 		;;
@@ -674,7 +731,6 @@ case $basic_machine in
 	i370-ibm* | ibm*)
 		basic_machine=i370-ibm
 		;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
 	i*86v32)
 		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
 		os=-sysv32
@@ -732,7 +788,7 @@ case $basic_machine in
 		basic_machine=ns32k-utek
 		os=-sysv
 		;;
-        microblaze)
+	microblaze)
 		basic_machine=microblaze-xilinx
 		;;
 	mingw32)
@@ -771,10 +827,18 @@ case $basic_machine in
 	ms1-*)
 		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
 		;;
+	msys)
+		basic_machine=i386-pc
+		os=-msys
+		;;
 	mvs)
 		basic_machine=i370-ibm
 		os=-mvs
 		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
 	ncr3000)
 		basic_machine=i486-ncr
 		os=-sysv4
@@ -839,6 +903,12 @@ case $basic_machine in
 	np1)
 		basic_machine=np1-gould
 		;;
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
+		;;
 	nsr-tandem)
 		basic_machine=nsr-tandem
 		;;
@@ -921,9 +991,10 @@ case $basic_machine in
 		;;
 	power)	basic_machine=power-ibm
 		;;
-	ppc)	basic_machine=powerpc-unknown
+	ppc | ppcbe)	basic_machine=powerpc-unknown
 		;;
-	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
 	ppcle | powerpclittle | ppc-le | powerpc-little)
 		basic_machine=powerpcle-unknown
@@ -1017,6 +1088,9 @@ case $basic_machine in
 		basic_machine=i860-stratus
 		os=-sysv4
 		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	sun2)
 		basic_machine=m68000-sun
 		;;
@@ -1073,20 +1147,8 @@ case $basic_machine in
 		basic_machine=t90-cray
 		os=-unicos
 		;;
-	tic54x | c54x*)
-		basic_machine=tic54x-unknown
-		os=-coff
-		;;
-	tic55x | c55x*)
-		basic_machine=tic55x-unknown
-		os=-coff
-		;;
-	tic6x | c6x*)
-		basic_machine=tic6x-unknown
-		os=-coff
-		;;
 	tile*)
-		basic_machine=tile-unknown
+		basic_machine=$basic_machine-unknown
 		os=-linux-gnu
 		;;
 	tx39)
@@ -1156,6 +1218,9 @@ case $basic_machine in
 	xps | xps100)
 		basic_machine=xps100-honeywell
 		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+		;;
 	ymp)
 		basic_machine=ymp-cray
 		os=-unicos
@@ -1253,11 +1318,11 @@ esac
 if [ x"$os" != x"" ]
 then
 case $os in
-        # First match some system type aliases
-        # that might get confused with valid system types.
+	# First match some system type aliases
+	# that might get confused with valid system types.
 	# -solaris* is a basic system type, with this one exception.
-        -auroraux)
-	        os=-auroraux
+	-auroraux)
+		os=-auroraux
 		;;
 	-solaris1 | -solaris1.*)
 		os=`echo $os | sed -e 's|solaris1|sunos4|'`
@@ -1293,8 +1358,9 @@ case $os in
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
 	      | -chorusos* | -chorusrdb* | -cegcc* \
-	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-uclibc* \
 	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
@@ -1341,7 +1407,7 @@ case $os in
 	-opened*)
 		os=-openedition
 		;;
-        -os400*)
+	-os400*)
 		os=-os400
 		;;
 	-wince*)
@@ -1390,7 +1456,7 @@ case $os in
 	-sinix*)
 		os=-sysv4
 		;;
-        -tpf*)
+	-tpf*)
 		os=-tpf
 		;;
 	-triton*)
@@ -1435,6 +1501,8 @@ case $os in
 	-dicos*)
 		os=-dicos
 		;;
+	-nacl*)
+		;;
 	-none)
 		;;
 	*)
@@ -1457,10 +1525,10 @@ else
 # system, and we'll never get to this point.
 
 case $basic_machine in
-        score-*)
+	score-*)
 		os=-elf
 		;;
-        spu-*)
+	spu-*)
 		os=-elf
 		;;
 	*-acorn)
@@ -1472,8 +1540,20 @@ case $basic_machine in
 	arm*-semi)
 		os=-aout
 		;;
-        c4x-* | tic4x-*)
-        	os=-coff
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	hexagon-*)
+		os=-elf
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
@@ -1493,14 +1573,11 @@ case $basic_machine in
 		;;
 	m68000-sun)
 		os=-sunos3
-		# This also exists in the configure program, but was not the
-		# default.
-		# os=-sunos4
 		;;
 	m68*-cisco)
 		os=-aout
 		;;
-        mep-*)
+	mep-*)
 		os=-elf
 		;;
 	mips*-cisco)
@@ -1527,7 +1604,7 @@ case $basic_machine in
 	*-ibm)
 		os=-aix
 		;;
-    	*-knuth)
+	*-knuth)
 		os=-mmixware
 		;;
 	*-wec)
diff --git a/Test/configure b/Test/configure
index 90b8f94..d2a69a8 100755
--- a/Test/configure
+++ b/Test/configure
@@ -1,11 +1,9 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68.
+# Generated by GNU Autoconf 2.69.
 #
 #
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
-# Foundation, Inc.
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
 #
 #
 # This configure script is free software; the Free Software Foundation
@@ -134,6 +132,31 @@ export LANGUAGE
 # CDPATH.
 (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
 
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
 if test "x$CONFIG_SHELL" = x; then
   as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
   emulate sh
@@ -167,7 +190,8 @@ if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
 else
   exitcode=1; echo positional parameters were not saved.
 fi
-test x\$exitcode = x0 || exit 1"
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
   as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
   as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
   eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
@@ -220,21 +244,25 @@ IFS=$as_save_IFS
 
 
       if test "x$CONFIG_SHELL" != x; then :
-  # We cannot yet assume a decent shell, so we have to provide a
-	# neutralization value for shells without unset; and this also
-	# works around shells that cannot unset nonexistent variables.
-	# Preserve -v and -x to the replacement shell.
-	BASH_ENV=/dev/null
-	ENV=/dev/null
-	(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
-	export CONFIG_SHELL
-	case $- in # ((((
-	  *v*x* | *x*v* ) as_opts=-vx ;;
-	  *v* ) as_opts=-v ;;
-	  *x* ) as_opts=-x ;;
-	  * ) as_opts= ;;
-	esac
-	exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"}
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
 fi
 
     if test x$as_have_required = xno; then :
@@ -336,6 +364,14 @@ $as_echo X"$as_dir" |
 
 
 } # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
 # as_fn_append VAR VALUE
 # ----------------------
 # Append the text in VALUE to the end of the definition contained in VAR. Take
@@ -457,6 +493,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits
   chmod +x "$as_me.lineno" ||
     { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
 
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
   # Don't try to exec as it changes $[0], causing all sort of problems
   # (the dirname of $[0] is not the place where we might find the
   # original and so on.  Autoconf is especially sensitive to this).
@@ -491,16 +531,16 @@ if (echo >conf$$.file) 2>/dev/null; then
     # ... but there are two gotchas:
     # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
     # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
     ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
   elif ln conf$$.file conf$$ 2>/dev/null; then
     as_ln_s=ln
   else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
   fi
 else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
 fi
 rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
 rmdir conf$$.dir 2>/dev/null
@@ -512,28 +552,8 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-	test -d "$1/.";
-      else
-	case $1 in #(
-	-*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -646,6 +666,7 @@ SED
 am__fastdepCC_FALSE
 am__fastdepCC_TRUE
 CCDEPMODE
+am__nodep
 AMDEPBACKSLASH
 AMDEP_FALSE
 AMDEP_TRUE
@@ -1219,8 +1240,6 @@ target=$target_alias
 if test "x$host_alias" != x; then
   if test "x$build_alias" = x; then
     cross_compiling=maybe
-    $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host.
-    If a cross compiler is detected then cross compile mode will be used" >&2
   elif test "x$build_alias" != "x$host_alias"; then
     cross_compiling=yes
   fi
@@ -1483,9 +1502,9 @@ test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
 configure
-generated by GNU Autoconf 2.68
+generated by GNU Autoconf 2.69
 
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 _ACEOF
@@ -1561,7 +1580,7 @@ $as_echo "$ac_try_echo"; } >&5
 	 test ! -s conftest.err
        } && test -s conftest$ac_exeext && {
 	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
+	 test -x conftest$ac_exeext
        }; then :
   ac_retval=0
 else
@@ -1859,7 +1878,7 @@ $as_echo "$ac_try_echo"; } >&5
 	 test ! -s conftest.err
        } && test -s conftest$ac_exeext && {
 	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
+	 test -x conftest$ac_exeext
        }; then :
   ac_retval=0
 else
@@ -1882,7 +1901,7 @@ This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
 It was created by $as_me, which was
-generated by GNU Autoconf 2.68.  Invocation command line was
+generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
 
@@ -2410,7 +2429,7 @@ case $as_dir/ in #((
     # by default.
     for ac_prog in ginstall scoinst install; do
       for ac_exec_ext in '' $ac_executable_extensions; do
-	if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then
+	if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
 	  if test $ac_prog = install &&
 	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
 	    # AIX install.  It has an incompatible calling convention.
@@ -2579,7 +2598,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP="${ac_tool_prefix}strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2619,7 +2638,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_STRIP="strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2670,7 +2689,7 @@ do
   test -z "$as_dir" && as_dir=.
     for ac_prog in mkdir gmkdir; do
 	 for ac_exec_ext in '' $ac_executable_extensions; do
-	   { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue
+	   as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
 	   case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
 	     'mkdir (GNU coreutils) '* | \
 	     'mkdir (coreutils) '* | \
@@ -2723,7 +2742,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AWK="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2839,11 +2858,11 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
 
 # We need awk for the "check" target.  The system "awk" is bad on
 # some platforms.
-# Always define AMTAR for backward compatibility.
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
 
-AMTAR=${AMTAR-"${am_missing_run}tar"}
-
-am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
 
 
 
@@ -2873,7 +2892,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS="${ac_tool_prefix}as"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2913,7 +2932,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_AS="as"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2965,7 +2984,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3005,7 +3024,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DLLTOOL="dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3057,7 +3076,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3097,7 +3116,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OBJDUMP="objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3301,6 +3320,7 @@ fi
 if test "x$enable_dependency_tracking" != xno; then
   am_depcomp="$ac_aux_dir/depcomp"
   AMDEPBACKSLASH='\'
+  am__nodep='_no'
 fi
  if test "x$enable_dependency_tracking" != xno; then
   AMDEP_TRUE=
@@ -3333,7 +3353,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3373,7 +3393,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3426,7 +3446,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}cc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3467,7 +3487,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
        ac_prog_rejected=yes
        continue
@@ -3525,7 +3545,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3569,7 +3589,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4015,8 +4035,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdarg.h>
 #include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
 /* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
 struct buf { int x; };
 FILE * (*rcsopen) (struct buf *, struct stat *, int);
@@ -4114,6 +4133,7 @@ else
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -4173,7 +4193,7 @@ else
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -4250,7 +4270,7 @@ do
     for ac_prog in sed gsed; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue
+      as_fn_executable_p "$ac_path_SED" || continue
 # Check for GNU ac_path_SED and select it if it is found.
   # Check for GNU $ac_path_SED
 case `"$ac_path_SED" --version 2>&1` in
@@ -4326,7 +4346,7 @@ do
     for ac_prog in grep ggrep; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+      as_fn_executable_p "$ac_path_GREP" || continue
 # Check for GNU ac_path_GREP and select it if it is found.
   # Check for GNU $ac_path_GREP
 case `"$ac_path_GREP" --version 2>&1` in
@@ -4392,7 +4412,7 @@ do
     for ac_prog in egrep; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+      as_fn_executable_p "$ac_path_EGREP" || continue
 # Check for GNU ac_path_EGREP and select it if it is found.
   # Check for GNU $ac_path_EGREP
 case `"$ac_path_EGREP" --version 2>&1` in
@@ -4459,7 +4479,7 @@ do
     for ac_prog in fgrep; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_FGREP" && $as_test_x "$ac_path_FGREP"; } || continue
+      as_fn_executable_p "$ac_path_FGREP" || continue
 # Check for GNU ac_path_FGREP and select it if it is found.
   # Check for GNU $ac_path_FGREP
 case `"$ac_path_FGREP" --version 2>&1` in
@@ -4715,7 +4735,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4759,7 +4779,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5183,7 +5203,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5223,7 +5243,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OBJDUMP="objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5346,10 +5366,6 @@ freebsd* | dragonfly*)
   fi
   ;;
 
-gnu*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
 haiku*)
   lt_cv_deplibs_check_method=pass_all
   ;;
@@ -5388,11 +5404,11 @@ irix5* | irix6* | nonstopux*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
-netbsd*)
+netbsd* | netbsdelf*-gnu)
   if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
     lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
   else
@@ -5526,7 +5542,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5566,7 +5582,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DLLTOOL="dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5667,7 +5683,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5711,7 +5727,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_AR="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5836,7 +5852,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP="${ac_tool_prefix}strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5876,7 +5892,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_STRIP="strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5935,7 +5951,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5975,7 +5991,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_RANLIB="ranlib"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6624,7 +6640,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6664,7 +6680,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6744,7 +6760,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6784,7 +6800,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6836,7 +6852,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6876,7 +6892,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_NMEDIT="nmedit"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6928,7 +6944,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6968,7 +6984,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_LIPO="lipo"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7020,7 +7036,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7060,7 +7076,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OTOOL="otool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7112,7 +7128,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7152,7 +7168,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OTOOL64="otool64"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8306,7 +8322,7 @@ lt_prog_compiler_static=
       lt_prog_compiler_static='-non_shared'
       ;;
 
-    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+    linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
       case $cc_basename in
       # old Intel for x86_64 which still supported -KPIC.
       ecc*)
@@ -8784,6 +8800,9 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
   openbsd*)
     with_gnu_ld=no
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    link_all_deplibs=no
+    ;;
   esac
 
   ld_shlibs=yes
@@ -9005,7 +9024,7 @@ _LT_EOF
       fi
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
 	wlarc=
@@ -9182,6 +9201,7 @@ _LT_EOF
 	if test "$aix_use_runtimelinking" = yes; then
 	  shared_flag="$shared_flag "'${wl}-G'
 	fi
+	link_all_deplibs=no
       else
 	# not using gcc
 	if test "$host_cpu" = ia64; then
@@ -9635,7 +9655,7 @@ $as_echo "$lt_cv_irix_exported_symbol" >&6; }
       link_all_deplibs=yes
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
       else
@@ -10472,17 +10492,6 @@ freebsd* | dragonfly*)
   esac
   ;;
 
-gnu*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
 haiku*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
@@ -10599,7 +10608,7 @@ linux*oldld* | linux*aout* | linux*coff*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
@@ -10663,6 +10672,18 @@ fi
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
+netbsdelf*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='NetBSD ld.elf_so'
+  ;;
+
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -11686,7 +11707,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11726,7 +11747,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11779,7 +11800,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}cc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11820,7 +11841,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
        ac_prog_rejected=yes
        continue
@@ -11878,7 +11899,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11922,7 +11943,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12118,8 +12139,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdarg.h>
 #include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
 /* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
 struct buf { int x; };
 FILE * (*rcsopen) (struct buf *, struct stat *, int);
@@ -12217,6 +12237,7 @@ else
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -12276,7 +12297,7 @@ else
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -12359,7 +12380,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12403,7 +12424,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CXX="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12601,6 +12622,7 @@ else
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -12660,7 +12682,7 @@ else
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -13524,9 +13546,6 @@ fi
         ld_shlibs_CXX=yes
         ;;
 
-      gnu*)
-        ;;
-
       haiku*)
         archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
         link_all_deplibs_CXX=yes
@@ -13688,7 +13707,7 @@ fi
         inherit_rpath_CXX=yes
         ;;
 
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
         case $cc_basename in
           KCC*)
 	    # Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -14548,7 +14567,7 @@ lt_prog_compiler_static_CXX=
 	    ;;
 	esac
 	;;
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
 	case $cc_basename in
 	  KCC*)
 	    # KAI C++ Compiler
@@ -14612,7 +14631,7 @@ lt_prog_compiler_static_CXX=
 	    ;;
 	esac
 	;;
-      netbsd*)
+      netbsd* | netbsdelf*-gnu)
 	;;
       *qnx* | *nto*)
         # QNX uses GNU C++, but need to define -shared option too, otherwise
@@ -14983,6 +15002,9 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
       ;;
     esac
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    link_all_deplibs_CXX=no
+    ;;
   *)
     export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
     ;;
@@ -15417,17 +15439,6 @@ freebsd* | dragonfly*)
   esac
   ;;
 
-gnu*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
 haiku*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
@@ -15544,7 +15555,7 @@ linux*oldld* | linux*aout* | linux*coff*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
@@ -15608,6 +15619,18 @@ fi
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
+netbsdelf*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='NetBSD ld.elf_so'
+  ;;
+
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -15927,11 +15950,11 @@ else
 int
 main ()
 {
-/* FIXME: Include the comments suggested by Paul. */
+
 #ifndef __cplusplus
-  /* Ultrix mips cc rejects this.  */
+  /* Ultrix mips cc rejects this sort of thing.  */
   typedef int charset[2];
-  const charset cs;
+  const charset cs = { 0, 0 };
   /* SunOS 4.1.1 cc rejects this.  */
   char const *const *pcpcc;
   char **ppc;
@@ -15948,8 +15971,9 @@ main ()
   ++pcpcc;
   ppc = (char**) pcpcc;
   pcpcc = (char const *const *) ppc;
-  { /* SCO 3.2v4 cc rejects this.  */
-    char *t;
+  { /* SCO 3.2v4 cc rejects this sort of thing.  */
+    char tx;
+    char *t = &tx;
     char const *s = 0 ? (char *) 0 : (char const *) 0;
 
     *t++ = 0;
@@ -15965,10 +15989,10 @@ main ()
     iptr p = 0;
     ++p;
   }
-  { /* AIX XL C 1.02.0.0 rejects this saying
+  { /* AIX XL C 1.02.0.0 rejects this sort of thing, saying
        "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
-    struct s { int j; const int *ap[3]; };
-    struct s *b; b->j = 5;
+    struct s { int j; const int *ap[3]; } bx;
+    struct s *b = &bx; b->j = 5;
   }
   { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
     const int foo = 10;
@@ -16098,7 +16122,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_SDL_CONFIG="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -16817,16 +16841,16 @@ if (echo >conf$$.file) 2>/dev/null; then
     # ... but there are two gotchas:
     # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
     # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
     ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
   elif ln conf$$.file conf$$ 2>/dev/null; then
     as_ln_s=ln
   else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
   fi
 else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
 fi
 rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
 rmdir conf$$.dir 2>/dev/null
@@ -16886,28 +16910,16 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-	test -d "$1/.";
-      else
-	case $1 in #(
-	-*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -16929,7 +16941,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # values after options handling.
 ac_log="
 This file was extended by $as_me, which was
-generated by GNU Autoconf 2.68.  Invocation command line was
+generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -16986,10 +16998,10 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
 config.status
-configured by $0, generated by GNU Autoconf 2.68,
+configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it."
 
@@ -17069,7 +17081,7 @@ fi
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 if \$ac_cs_recheck; then
-  set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
   shift
   \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
   CONFIG_SHELL='$SHELL'
diff --git a/Test/install-sh b/Test/install-sh
index 6781b98..a9244eb 100755
--- a/Test/install-sh
+++ b/Test/install-sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 # install - install a program, script, or datafile
 
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2011-01-19.21; # UTC
 
 # This originates from X11R5 (mit/util/scripts/install.sh), which was
 # later released in X11R6 (xc/config/util/install.sh) with the
@@ -156,6 +156,10 @@ while test $# -ne 0; do
     -s) stripcmd=$stripprog;;
 
     -t) dst_arg=$2
+	# Protect names problematic for `test' and other utilities.
+	case $dst_arg in
+	  -* | [=\(\)!]) dst_arg=./$dst_arg;;
+	esac
 	shift;;
 
     -T) no_target_directory=true;;
@@ -186,6 +190,10 @@ if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
     fi
     shift # arg
     dst_arg=$arg
+    # Protect names problematic for `test' and other utilities.
+    case $dst_arg in
+      -* | [=\(\)!]) dst_arg=./$dst_arg;;
+    esac
   done
 fi
 
@@ -200,7 +208,11 @@ if test $# -eq 0; then
 fi
 
 if test -z "$dir_arg"; then
-  trap '(exit $?); exit' 1 2 13 15
+  do_exit='(exit $ret); exit $ret'
+  trap "ret=129; $do_exit" 1
+  trap "ret=130; $do_exit" 2
+  trap "ret=141; $do_exit" 13
+  trap "ret=143; $do_exit" 15
 
   # Set umask so as not to create temps with too-generous modes.
   # However, 'strip' requires both read and write access to temps.
@@ -228,9 +240,9 @@ fi
 
 for src
 do
-  # Protect names starting with `-'.
+  # Protect names problematic for `test' and other utilities.
   case $src in
-    -*) src=./$src;;
+    -* | [=\(\)!]) src=./$src;;
   esac
 
   if test -n "$dir_arg"; then
@@ -252,12 +264,7 @@ do
       echo "$0: no destination specified." >&2
       exit 1
     fi
-
     dst=$dst_arg
-    # Protect names starting with `-'.
-    case $dst in
-      -*) dst=./$dst;;
-    esac
 
     # If destination is a directory, append the input filename; won't work
     # if double slashes aren't ignored.
@@ -385,7 +392,7 @@ do
 
       case $dstdir in
 	/*) prefix='/';;
-	-*) prefix='./';;
+	[-=\(\)!]*) prefix='./';;
 	*)  prefix='';;
       esac
 
@@ -403,7 +410,7 @@ do
 
       for d
       do
-	test -z "$d" && continue
+	test X"$d" = X && continue
 
 	prefix=$prefix$d
 	if test -d "$prefix"; then
diff --git a/Test/ltmain.sh b/Test/ltmain.sh
index 63ae69d..33f642a 100644
--- a/Test/ltmain.sh
+++ b/Test/ltmain.sh
@@ -70,7 +70,7 @@
 #         compiler:		$LTCC
 #         compiler flags:		$LTCFLAGS
 #         linker:		$LD (gnu? $with_gnu_ld)
-#         $progname:	(GNU libtool) 2.4.2
+#         $progname:	(GNU libtool) 2.4.2 Debian-2.4.2-1.1
 #         automake:	$automake_version
 #         autoconf:	$autoconf_version
 #
@@ -80,7 +80,7 @@
 
 PROGRAM=libtool
 PACKAGE=libtool
-VERSION=2.4.2
+VERSION="2.4.2 Debian-2.4.2-1.1"
 TIMESTAMP=""
 package_revision=1.3337
 
@@ -6124,7 +6124,10 @@ func_mode_link ()
 	case $pass in
 	dlopen) libs="$dlfiles" ;;
 	dlpreopen) libs="$dlprefiles" ;;
-	link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+	link)
+	  libs="$deplibs %DEPLIBS%"
+	  test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs"
+	  ;;
 	esac
       fi
       if test "$linkmode,$pass" = "lib,dlpreopen"; then
@@ -6444,19 +6447,19 @@ func_mode_link ()
 	    # It is a libtool convenience library, so add in its objects.
 	    func_append convenience " $ladir/$objdir/$old_library"
 	    func_append old_convenience " $ladir/$objdir/$old_library"
+	    tmp_libs=
+	    for deplib in $dependency_libs; do
+	      deplibs="$deplib $deplibs"
+	      if $opt_preserve_dup_deps ; then
+		case "$tmp_libs " in
+		*" $deplib "*) func_append specialdeplibs " $deplib" ;;
+		esac
+	      fi
+	      func_append tmp_libs " $deplib"
+	    done
 	  elif test "$linkmode" != prog && test "$linkmode" != lib; then
 	    func_fatal_error "\`$lib' is not a convenience library"
 	  fi
-	  tmp_libs=
-	  for deplib in $dependency_libs; do
-	    deplibs="$deplib $deplibs"
-	    if $opt_preserve_dup_deps ; then
-	      case "$tmp_libs " in
-	      *" $deplib "*) func_append specialdeplibs " $deplib" ;;
-	      esac
-	    fi
-	    func_append tmp_libs " $deplib"
-	  done
 	  continue
 	fi # $pass = conv
 
@@ -7349,6 +7352,9 @@ func_mode_link ()
 	    revision="$number_minor"
 	    lt_irix_increment=no
 	    ;;
+	  *)
+	    func_fatal_configuration "$modename: unknown library version type \`$version_type'"
+	    ;;
 	  esac
 	  ;;
 	no)
diff --git a/Test/missing b/Test/missing
index 28055d2..86a8fc3 100755
--- a/Test/missing
+++ b/Test/missing
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Common stub for a few missing GNU programs while installing.
 
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2012-01-06.13; # UTC
 
 # Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
-# 2008, 2009 Free Software Foundation, Inc.
+# 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
 # Originally by Fran,cois Pinard <pinard at iro.umontreal.ca>, 1996.
 
 # This program is free software; you can redistribute it and/or modify
@@ -84,7 +84,6 @@ Supported PROGRAM values:
   help2man     touch the output file
   lex          create \`lex.yy.c', if possible, from existing .c
   makeinfo     touch the output file
-  tar          try tar, gnutar, gtar, then tar without non-portable flags
   yacc         create \`y.tab.[ch]', if possible, from existing .[ch]
 
 Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and
@@ -122,15 +121,6 @@ case $1 in
     # Not GNU programs, they don't have --version.
     ;;
 
-  tar*)
-    if test -n "$run"; then
-       echo 1>&2 "ERROR: \`tar' requires --run"
-       exit 1
-    elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
-       exit 1
-    fi
-    ;;
-
   *)
     if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
        # We have it, but it failed.
@@ -226,7 +216,7 @@ WARNING: \`$1' $msg.  You should only need it if
          \`Bison' from any GNU archive site."
     rm -f y.tab.c y.tab.h
     if test $# -ne 1; then
-        eval LASTARG="\${$#}"
+        eval LASTARG=\${$#}
 	case $LASTARG in
 	*.y)
 	    SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
@@ -256,7 +246,7 @@ WARNING: \`$1' is $msg.  You should only need it if
          \`Flex' from any GNU archive site."
     rm -f lex.yy.c
     if test $# -ne 1; then
-        eval LASTARG="\${$#}"
+        eval LASTARG=\${$#}
 	case $LASTARG in
 	*.l)
 	    SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
@@ -318,41 +308,6 @@ WARNING: \`$1' is $msg.  You should only need it if
     touch $file
     ;;
 
-  tar*)
-    shift
-
-    # We have already tried tar in the generic part.
-    # Look for gnutar/gtar before invocation to avoid ugly error
-    # messages.
-    if (gnutar --version > /dev/null 2>&1); then
-       gnutar "$@" && exit 0
-    fi
-    if (gtar --version > /dev/null 2>&1); then
-       gtar "$@" && exit 0
-    fi
-    firstarg="$1"
-    if shift; then
-	case $firstarg in
-	*o*)
-	    firstarg=`echo "$firstarg" | sed s/o//`
-	    tar "$firstarg" "$@" && exit 0
-	    ;;
-	esac
-	case $firstarg in
-	*h*)
-	    firstarg=`echo "$firstarg" | sed s/h//`
-	    tar "$firstarg" "$@" && exit 0
-	    ;;
-	esac
-    fi
-
-    echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
-         You may want to install GNU tar or Free paxutils, or check the
-         command line arguments."
-    exit 1
-    ;;
-
   *)
     echo 1>&2 "\
 WARNING: \`$1' is needed, and is $msg.
diff --git a/aclocal.m4 b/aclocal.m4
index 09b33a2..64820a1 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,7 +1,8 @@
-# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.11.6 -*- Autoconf -*-
 
 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2007, 2008, 2009  Free Software Foundation, Inc.
+# 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+# Inc.
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -13,8 +14,8 @@
 
 m4_ifndef([AC_AUTOCONF_VERSION],
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.68],,
-[m4_warning([this file was generated for autoconf 2.68.
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
 You have another version of autoconf.  It may work, but is not guaranteed to.
 If you have problems, you may need to regenerate the build system entirely.
 To do so, use the procedure documented by the package, typically `autoreconf'.])])
@@ -2525,17 +2526,6 @@ freebsd* | dragonfly*)
   esac
   ;;
 
-gnu*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
 haiku*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
@@ -2652,7 +2642,7 @@ linux*oldld* | linux*aout* | linux*coff*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
@@ -2697,6 +2687,18 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu)
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
+netbsdelf*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='NetBSD ld.elf_so'
+  ;;
+
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -3256,10 +3258,6 @@ freebsd* | dragonfly*)
   fi
   ;;
 
-gnu*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
 haiku*)
   lt_cv_deplibs_check_method=pass_all
   ;;
@@ -3298,11 +3296,11 @@ irix5* | irix6* | nonstopux*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
-netbsd*)
+netbsd* | netbsdelf*-gnu)
   if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
     lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
   else
@@ -4050,7 +4048,7 @@ m4_if([$1], [CXX], [
 	    ;;
 	esac
 	;;
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
 	case $cc_basename in
 	  KCC*)
 	    # KAI C++ Compiler
@@ -4114,7 +4112,7 @@ m4_if([$1], [CXX], [
 	    ;;
 	esac
 	;;
-      netbsd*)
+      netbsd* | netbsdelf*-gnu)
 	;;
       *qnx* | *nto*)
         # QNX uses GNU C++, but need to define -shared option too, otherwise
@@ -4349,7 +4347,7 @@ m4_if([$1], [CXX], [
       _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
       ;;
 
-    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+    linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
       case $cc_basename in
       # old Intel for x86_64 which still supported -KPIC.
       ecc*)
@@ -4591,6 +4589,9 @@ m4_if([$1], [CXX], [
       ;;
     esac
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    _LT_TAGVAR(link_all_deplibs, $1)=no
+    ;;
   *)
     _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
     ;;
@@ -4653,6 +4654,9 @@ dnl Note also adjust exclude_expsyms for C++ above.
   openbsd*)
     with_gnu_ld=no
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    _LT_TAGVAR(link_all_deplibs, $1)=no
+    ;;
   esac
 
   _LT_TAGVAR(ld_shlibs, $1)=yes
@@ -4874,7 +4878,7 @@ _LT_EOF
       fi
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
 	wlarc=
@@ -5051,6 +5055,7 @@ _LT_EOF
 	if test "$aix_use_runtimelinking" = yes; then
 	  shared_flag="$shared_flag "'${wl}-G'
 	fi
+	_LT_TAGVAR(link_all_deplibs, $1)=no
       else
 	# not using gcc
 	if test "$host_cpu" = ia64; then
@@ -5355,7 +5360,7 @@ _LT_EOF
       _LT_TAGVAR(link_all_deplibs, $1)=yes
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
       else
@@ -6231,9 +6236,6 @@ if test "$_lt_caught_CXX_error" != yes; then
         _LT_TAGVAR(ld_shlibs, $1)=yes
         ;;
 
-      gnu*)
-        ;;
-
       haiku*)
         _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
         _LT_TAGVAR(link_all_deplibs, $1)=yes
@@ -6395,7 +6397,7 @@ if test "$_lt_caught_CXX_error" != yes; then
         _LT_TAGVAR(inherit_rpath, $1)=yes
         ;;
 
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
         case $cc_basename in
           KCC*)
 	    # Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -8606,12 +8608,15 @@ m4_ifndef([_LT_PROG_F77],		[AC_DEFUN([_LT_PROG_F77])])
 m4_ifndef([_LT_PROG_FC],		[AC_DEFUN([_LT_PROG_FC])])
 m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
 
-# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008, 2011 Free Software
+# Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_AUTOMAKE_VERSION(VERSION)
 # ----------------------------
 # Automake X.Y traces this macro to ensure aclocal.m4 has been
@@ -8621,7 +8626,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
 [am__api_version='1.11'
 dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
 dnl require some minimum version.  Point them to the right macro.
-m4_if([$1], [1.11.1], [],
+m4_if([$1], [1.11.6], [],
       [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
 ])
 
@@ -8637,19 +8642,21 @@ m4_define([_AM_AUTOCONF_VERSION], [])
 # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
 # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
 AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.11.1])dnl
+[AM_AUTOMAKE_VERSION([1.11.6])dnl
 m4_ifndef([AC_AUTOCONF_VERSION],
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
 _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
 
 # AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
 
-# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
 # $ac_aux_dir to `$srcdir/foo'.  In other projects, it is set to
 # `$srcdir', `$srcdir/..', or `$srcdir/../..'.
@@ -8731,14 +8738,14 @@ AC_CONFIG_COMMANDS_PRE(
 Usually this means the macro was only invoked conditionally.]])
 fi])])
 
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009
-# Free Software Foundation, Inc.
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
-# serial 10
+# serial 12
 
 # There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
 # written in clear, in which case automake, when reading aclocal.m4,
@@ -8778,6 +8785,7 @@ AC_CACHE_CHECK([dependency style of $depcc],
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -8842,7 +8850,7 @@ AC_CACHE_CHECK([dependency style of $depcc],
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -8907,10 +8915,13 @@ AC_DEFUN([AM_DEP_TRACK],
 if test "x$enable_dependency_tracking" != xno; then
   am_depcomp="$ac_aux_dir/depcomp"
   AMDEPBACKSLASH='\'
+  am__nodep='_no'
 fi
 AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
 AC_SUBST([AMDEPBACKSLASH])dnl
 _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
+AC_SUBST([am__nodep])dnl
+_AM_SUBST_NOTMAKE([am__nodep])dnl
 ])
 
 # Generate code to set up dependency tracking.              -*- Autoconf -*-
@@ -9132,12 +9143,15 @@ for _am_header in $config_headers :; do
 done
 echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
 
-# Copyright (C) 2001, 2003, 2005, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2008, 2011 Free Software Foundation,
+# Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_PROG_INSTALL_SH
 # ------------------
 # Define $install_sh.
@@ -9269,12 +9283,15 @@ else
 fi
 ])
 
-# Copyright (C) 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+# Copyright (C) 2003, 2004, 2005, 2006, 2011 Free Software Foundation,
+# Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_PROG_MKDIR_P
 # ---------------
 # Check for `mkdir -p'.
@@ -9297,13 +9314,14 @@ esac
 
 # Helper functions for option handling.                     -*- Autoconf -*-
 
-# Copyright (C) 2001, 2002, 2003, 2005, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2002, 2003, 2005, 2008, 2010 Free Software
+# Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
-# serial 4
+# serial 5
 
 # _AM_MANGLE_OPTION(NAME)
 # -----------------------
@@ -9311,13 +9329,13 @@ AC_DEFUN([_AM_MANGLE_OPTION],
 [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
 
 # _AM_SET_OPTION(NAME)
-# ------------------------------
+# --------------------
 # Set option NAME.  Presently that only means defining a flag for this option.
 AC_DEFUN([_AM_SET_OPTION],
 [m4_define(_AM_MANGLE_OPTION([$1]), 1)])
 
 # _AM_SET_OPTIONS(OPTIONS)
-# ----------------------------------
+# ------------------------
 # OPTIONS is a space-separated list of Automake options.
 AC_DEFUN([_AM_SET_OPTIONS],
 [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
@@ -9393,12 +9411,14 @@ Check your system clock])
 fi
 AC_MSG_RESULT(yes)])
 
-# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
+# serial 1
+
 # AM_PROG_INSTALL_STRIP
 # ---------------------
 # One issue with vendor `install' (even GNU) is that you can't
@@ -9421,13 +9441,13 @@ fi
 INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
 AC_SUBST([INSTALL_STRIP_PROGRAM])])
 
-# Copyright (C) 2006, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2006, 2008, 2010 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
-# serial 2
+# serial 3
 
 # _AM_SUBST_NOTMAKE(VARIABLE)
 # ---------------------------
@@ -9436,13 +9456,13 @@ AC_SUBST([INSTALL_STRIP_PROGRAM])])
 AC_DEFUN([_AM_SUBST_NOTMAKE])
 
 # AM_SUBST_NOTMAKE(VARIABLE)
-# ---------------------------
+# --------------------------
 # Public sister of _AM_SUBST_NOTMAKE.
 AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
 
 # Check how to create a tarball.                            -*- Autoconf -*-
 
-# Copyright (C) 2004, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2004, 2005, 2012 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
@@ -9464,10 +9484,11 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
 # a tarball read from stdin.
 #     $(am__untar) < result.tar
 AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility.
-AM_MISSING_PROG([AMTAR], [tar])
+[# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
 m4_if([$1], [v7],
-     [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
+     [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
      [m4_case([$1], [ustar],, [pax],,
               [m4_fatal([Unknown tar format])])
 AC_MSG_CHECKING([how to create a $1 tar archive])
diff --git a/config.guess b/config.guess
index dc84c68..d622a44 100755
--- a/config.guess
+++ b/config.guess
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2009-11-20'
+timestamp='2012-02-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2009-11-20'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -56,8 +54,9 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
 	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
 	# switched to ELF, *-*-netbsd* would select the old
 	# object file format.  This provides both forward
@@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		fi
 		;;
 	    *)
-	        os=netbsd
+		os=netbsd
 		;;
 	esac
 	# The OS release
@@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit ;;
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
@@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo s390-ibm-zvmoe
 	exit ;;
     *:OS400:*:*)
-        echo powerpc-ibm-os400
+	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
@@ -394,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit ;;
+	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
 	exit ;;
@@ -480,8 +482,8 @@ EOF
 	echo m88k-motorola-sysv3
 	exit ;;
     AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
 	then
 	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -494,7 +496,7 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit ;;
+	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
 	exit ;;
@@ -551,7 +553,7 @@ EOF
 		echo rs6000-ibm-aix3.2
 	fi
 	exit ;;
-    *:AIX:*:[456])
+    *:AIX:*:[4567])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -594,52 +596,52 @@ EOF
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
 			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
+			esac ;;
+		    esac
 		fi
 		if [ "${HP_ARCH}" = "" ]; then
 		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
+		    sed 's/^		//' << EOF >$dummy.c
 
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
 
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
 
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
 EOF
 		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -730,22 +732,22 @@ EOF
 	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit ;;
+	exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit ;;
+	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit ;;
+	exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit ;;
+	exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit ;;
+	exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
@@ -769,14 +771,14 @@ EOF
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -788,13 +790,12 @@ EOF
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
 	exit ;;
     *:FreeBSD:*:*)
-	case ${UNAME_MACHINE} in
-	    pc98)
-		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
 	    amd64)
 		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	    *)
-		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	esac
 	exit ;;
     i*:CYGWIN*:*)
@@ -803,15 +804,18 @@ EOF
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
+    i*:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
     i*:windows32*:*)
-    	# uname -m includes "-pc" on this system.
-    	echo ${UNAME_MACHINE}-mingw32
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
 	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
 	exit ;;
     *:Interix*:*)
-    	case ${UNAME_MACHINE} in
+	case ${UNAME_MACHINE} in
 	    x86)
 		echo i586-pc-interix${UNAME_RELEASE}
 		exit ;;
@@ -857,6 +861,13 @@ EOF
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     alpha:Linux:*:*)
 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
@@ -866,7 +877,7 @@ EOF
 	  EV6)   UNAME_MACHINE=alphaev6 ;;
 	  EV67)  UNAME_MACHINE=alphaev67 ;;
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
+	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
 	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
@@ -878,20 +889,29 @@ EOF
 	then
 	    echo ${UNAME_MACHINE}-unknown-linux-gnu
 	else
-	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     cris:Linux:*:*)
-	echo cris-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     crisv32:Linux:*:*)
-	echo crisv32-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     frv:Linux:*:*)
-    	echo frv-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    hexagon:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:Linux:*:*)
 	LIBC=gnu
@@ -933,7 +953,7 @@ EOF
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
     or32:Linux:*:*)
-	echo or32-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     padre:Linux:*:*)
 	echo sparc-unknown-linux-gnu
@@ -959,7 +979,7 @@ EOF
 	echo ${UNAME_MACHINE}-ibm-linux
 	exit ;;
     sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     sh*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -967,14 +987,17 @@ EOF
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     vax:Linux:*:*)
 	echo ${UNAME_MACHINE}-dec-linux-gnu
 	exit ;;
     x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     xtensa*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -983,11 +1006,11 @@ EOF
 	echo i386-sequent-sysv4
 	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
+	# Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
 	exit ;;
     i*86:OS/2:*:*)
@@ -1019,7 +1042,7 @@ EOF
 	fi
 	exit ;;
     i*86:*:5:[678]*)
-    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
@@ -1047,13 +1070,13 @@ EOF
 	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i586.
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configury will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
-        exit ;;
+	exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
 	exit ;;
@@ -1088,8 +1111,8 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
 	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && { echo i486-ncr-sysv4; exit; } ;;
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
@@ -1132,10 +1155,10 @@ EOF
 		echo ns32k-sni-sysv
 	fi
 	exit ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel at ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel at ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes at openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
@@ -1161,11 +1184,11 @@ EOF
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
+		echo mips-nec-sysv${UNAME_RELEASE}
 	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit ;;
+	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
 	exit ;;
@@ -1230,6 +1253,9 @@ EOF
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
     NSE-?:NONSTOP_KERNEL:*:*)
 	echo nse-tandem-nsk${UNAME_RELEASE}
 	exit ;;
@@ -1275,13 +1301,13 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux${UNAME_RELEASE}
 	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
 	exit ;;
     *:*VMS:*:*)
-    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
 	case "${UNAME_MACHINE}" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
@@ -1299,6 +1325,9 @@ EOF
     i*86:AROS:*:*)
 	echo ${UNAME_MACHINE}-pc-aros
 	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1321,11 +1350,11 @@ main ()
 #include <sys/param.h>
   printf ("m68k-sony-newsos%s\n",
 #ifdef NEWSOS4
-          "4"
+	"4"
 #else
-	  ""
+	""
 #endif
-         ); exit (0);
+	); exit (0);
 #endif
 #endif
 
diff --git a/config.sub b/config.sub
index 2a55a50..6205f84 100755
--- a/config.sub
+++ b/config.sub
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Configuration validation subroutine script.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2009-11-20'
+timestamp='2012-04-18'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -21,9 +21,7 @@ timestamp='2009-11-20'
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -75,8 +73,9 @@ Report bugs and patches to <config-patches at gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -123,13 +122,18 @@ esac
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
-  uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
   kopensolaris*-gnu* | \
   storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
   *)
     basic_machine=`echo $1 | sed 's/-[^-]*$//'`
     if [ $basic_machine != $1 ]
@@ -156,8 +160,8 @@ case $os in
 		os=
 		basic_machine=$1
 		;;
-        -bluegene*)
-	        os=-cnk
+	-bluegene*)
+		os=-cnk
 		;;
 	-sim | -cisco | -oki | -wec | -winbond)
 		os=
@@ -173,10 +177,10 @@ case $os in
 		os=-chorusos
 		basic_machine=$1
 		;;
- 	-chorusrdb)
- 		os=-chorusrdb
+	-chorusrdb)
+		os=-chorusrdb
 		basic_machine=$1
- 		;;
+		;;
 	-hiux*)
 		os=-hiuxwe2
 		;;
@@ -221,6 +225,12 @@ case $os in
 	-isc*)
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
 		;;
+	-lynx*178)
+		os=-lynxos178
+		;;
+	-lynx*5)
+		os=-lynxos5
+		;;
 	-lynx*)
 		os=-lynxos
 		;;
@@ -245,17 +255,22 @@ case $basic_machine in
 	# Some are omitted here because they have special meanings below.
 	1750a | 580 \
 	| a29k \
+	| aarch64 | aarch64_be \
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+        | be32 | be64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
+	| epiphany \
 	| fido | fr30 | frv \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
+	| le32 | le64 \
 	| lm32 \
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
 	| maxq | mb | microblaze | mcore | mep | metag \
@@ -281,29 +296,39 @@ case $basic_machine in
 	| moxie \
 	| mt \
 	| msp430 \
+	| nds32 | nds32le | nds32be \
 	| nios | nios2 \
 	| ns16k | ns32k \
+	| open8 \
 	| or32 \
 	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
 	| pyramid \
-	| rx \
+	| rl78 | rx \
 	| score \
 	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
 	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
 	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
-	| spu | strongarm \
-	| tahoe | thumb | tic4x | tic80 | tron \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
 	| ubicom32 \
-	| v850 | v850e \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
 	| we32k \
-	| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
+	| x86 | xc16x | xstormy16 | xtensa \
 	| z8k | z80)
 		basic_machine=$basic_machine-unknown
 		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | picochip)
-		# Motorola 68HC11/12.
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
 		;;
@@ -313,6 +338,21 @@ case $basic_machine in
 		basic_machine=mt-unknown
 		;;
 
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
+
 	# We use `pc' rather than `unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
@@ -327,21 +367,25 @@ case $basic_machine in
 	# Recognize the basic CPU types with company name.
 	580-* \
 	| a29k-* \
+	| aarch64-* | aarch64_be-* \
 	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
 	| avr-* | avr32-* \
+	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
 	| clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
 	| elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
+	| le32-* | le64-* \
 	| lm32-* \
 	| m32c-* | m32r-* | m32rle-* \
 	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
@@ -367,25 +411,29 @@ case $basic_machine in
 	| mmix-* \
 	| mt-* \
 	| msp430-* \
+	| nds32-* | nds32le-* | nds32be-* \
 	| nios-* | nios2-* \
 	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
 	| pyramid-* \
-	| romp-* | rs6000-* | rx-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
 	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
 	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
 	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
-	| tahoe-* | thumb-* \
-	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| tahoe-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
 	| tron-* \
 	| ubicom32-* \
-	| v850-* | v850e-* | vax-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
 	| we32k-* \
-	| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
 	| xstormy16-* | xtensa*-* \
 	| ymp-* \
 	| z8k-* | z80-*)
@@ -410,7 +458,7 @@ case $basic_machine in
 		basic_machine=a29k-amd
 		os=-udi
 		;;
-    	abacus)
+	abacus)
 		basic_machine=abacus-unknown
 		;;
 	adobe68k)
@@ -480,11 +528,20 @@ case $basic_machine in
 		basic_machine=powerpc-ibm
 		os=-cnk
 		;;
+	c54x-*)
+		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	c90)
 		basic_machine=c90-cray
 		os=-unicos
 		;;
-        cegcc)
+	cegcc)
 		basic_machine=arm-unknown
 		os=-cegcc
 		;;
@@ -516,7 +573,7 @@ case $basic_machine in
 		basic_machine=craynv-cray
 		os=-unicosmp
 		;;
-	cr16)
+	cr16 | cr16-*)
 		basic_machine=cr16-unknown
 		os=-elf
 		;;
@@ -674,7 +731,6 @@ case $basic_machine in
 	i370-ibm* | ibm*)
 		basic_machine=i370-ibm
 		;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
 	i*86v32)
 		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
 		os=-sysv32
@@ -732,7 +788,7 @@ case $basic_machine in
 		basic_machine=ns32k-utek
 		os=-sysv
 		;;
-        microblaze)
+	microblaze)
 		basic_machine=microblaze-xilinx
 		;;
 	mingw32)
@@ -771,10 +827,18 @@ case $basic_machine in
 	ms1-*)
 		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
 		;;
+	msys)
+		basic_machine=i386-pc
+		os=-msys
+		;;
 	mvs)
 		basic_machine=i370-ibm
 		os=-mvs
 		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
 	ncr3000)
 		basic_machine=i486-ncr
 		os=-sysv4
@@ -839,6 +903,12 @@ case $basic_machine in
 	np1)
 		basic_machine=np1-gould
 		;;
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
+		;;
 	nsr-tandem)
 		basic_machine=nsr-tandem
 		;;
@@ -921,9 +991,10 @@ case $basic_machine in
 		;;
 	power)	basic_machine=power-ibm
 		;;
-	ppc)	basic_machine=powerpc-unknown
+	ppc | ppcbe)	basic_machine=powerpc-unknown
 		;;
-	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
 	ppcle | powerpclittle | ppc-le | powerpc-little)
 		basic_machine=powerpcle-unknown
@@ -1017,6 +1088,9 @@ case $basic_machine in
 		basic_machine=i860-stratus
 		os=-sysv4
 		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	sun2)
 		basic_machine=m68000-sun
 		;;
@@ -1073,20 +1147,8 @@ case $basic_machine in
 		basic_machine=t90-cray
 		os=-unicos
 		;;
-	tic54x | c54x*)
-		basic_machine=tic54x-unknown
-		os=-coff
-		;;
-	tic55x | c55x*)
-		basic_machine=tic55x-unknown
-		os=-coff
-		;;
-	tic6x | c6x*)
-		basic_machine=tic6x-unknown
-		os=-coff
-		;;
 	tile*)
-		basic_machine=tile-unknown
+		basic_machine=$basic_machine-unknown
 		os=-linux-gnu
 		;;
 	tx39)
@@ -1156,6 +1218,9 @@ case $basic_machine in
 	xps | xps100)
 		basic_machine=xps100-honeywell
 		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+		;;
 	ymp)
 		basic_machine=ymp-cray
 		os=-unicos
@@ -1253,11 +1318,11 @@ esac
 if [ x"$os" != x"" ]
 then
 case $os in
-        # First match some system type aliases
-        # that might get confused with valid system types.
+	# First match some system type aliases
+	# that might get confused with valid system types.
 	# -solaris* is a basic system type, with this one exception.
-        -auroraux)
-	        os=-auroraux
+	-auroraux)
+		os=-auroraux
 		;;
 	-solaris1 | -solaris1.*)
 		os=`echo $os | sed -e 's|solaris1|sunos4|'`
@@ -1293,8 +1358,9 @@ case $os in
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
 	      | -chorusos* | -chorusrdb* | -cegcc* \
-	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-uclibc* \
 	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
@@ -1341,7 +1407,7 @@ case $os in
 	-opened*)
 		os=-openedition
 		;;
-        -os400*)
+	-os400*)
 		os=-os400
 		;;
 	-wince*)
@@ -1390,7 +1456,7 @@ case $os in
 	-sinix*)
 		os=-sysv4
 		;;
-        -tpf*)
+	-tpf*)
 		os=-tpf
 		;;
 	-triton*)
@@ -1435,6 +1501,8 @@ case $os in
 	-dicos*)
 		os=-dicos
 		;;
+	-nacl*)
+		;;
 	-none)
 		;;
 	*)
@@ -1457,10 +1525,10 @@ else
 # system, and we'll never get to this point.
 
 case $basic_machine in
-        score-*)
+	score-*)
 		os=-elf
 		;;
-        spu-*)
+	spu-*)
 		os=-elf
 		;;
 	*-acorn)
@@ -1472,8 +1540,20 @@ case $basic_machine in
 	arm*-semi)
 		os=-aout
 		;;
-        c4x-* | tic4x-*)
-        	os=-coff
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	hexagon-*)
+		os=-elf
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
@@ -1493,14 +1573,11 @@ case $basic_machine in
 		;;
 	m68000-sun)
 		os=-sunos3
-		# This also exists in the configure program, but was not the
-		# default.
-		# os=-sunos4
 		;;
 	m68*-cisco)
 		os=-aout
 		;;
-        mep-*)
+	mep-*)
 		os=-elf
 		;;
 	mips*-cisco)
@@ -1527,7 +1604,7 @@ case $basic_machine in
 	*-ibm)
 		os=-aix
 		;;
-    	*-knuth)
+	*-knuth)
 		os=-mmixware
 		;;
 	*-wec)
diff --git a/configure b/configure
index 6dd2fbc..b615d95 100755
--- a/configure
+++ b/configure
@@ -1,11 +1,9 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68.
+# Generated by GNU Autoconf 2.69.
 #
 #
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
-# Foundation, Inc.
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
 #
 #
 # This configure script is free software; the Free Software Foundation
@@ -134,6 +132,31 @@ export LANGUAGE
 # CDPATH.
 (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
 
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
 if test "x$CONFIG_SHELL" = x; then
   as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
   emulate sh
@@ -167,7 +190,8 @@ if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
 else
   exitcode=1; echo positional parameters were not saved.
 fi
-test x\$exitcode = x0 || exit 1"
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
   as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
   as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
   eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
@@ -220,21 +244,25 @@ IFS=$as_save_IFS
 
 
       if test "x$CONFIG_SHELL" != x; then :
-  # We cannot yet assume a decent shell, so we have to provide a
-	# neutralization value for shells without unset; and this also
-	# works around shells that cannot unset nonexistent variables.
-	# Preserve -v and -x to the replacement shell.
-	BASH_ENV=/dev/null
-	ENV=/dev/null
-	(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
-	export CONFIG_SHELL
-	case $- in # ((((
-	  *v*x* | *x*v* ) as_opts=-vx ;;
-	  *v* ) as_opts=-v ;;
-	  *x* ) as_opts=-x ;;
-	  * ) as_opts= ;;
-	esac
-	exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"}
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
 fi
 
     if test x$as_have_required = xno; then :
@@ -336,6 +364,14 @@ $as_echo X"$as_dir" |
 
 
 } # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
 # as_fn_append VAR VALUE
 # ----------------------
 # Append the text in VALUE to the end of the definition contained in VAR. Take
@@ -457,6 +493,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits
   chmod +x "$as_me.lineno" ||
     { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
 
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
   # Don't try to exec as it changes $[0], causing all sort of problems
   # (the dirname of $[0] is not the place where we might find the
   # original and so on.  Autoconf is especially sensitive to this).
@@ -491,16 +531,16 @@ if (echo >conf$$.file) 2>/dev/null; then
     # ... but there are two gotchas:
     # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
     # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
     ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
   elif ln conf$$.file conf$$ 2>/dev/null; then
     as_ln_s=ln
   else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
   fi
 else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
 fi
 rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
 rmdir conf$$.dir 2>/dev/null
@@ -512,28 +552,8 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-	test -d "$1/.";
-      else
-	case $1 in #(
-	-*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -651,6 +671,7 @@ SED
 am__fastdepCC_FALSE
 am__fastdepCC_TRUE
 CCDEPMODE
+am__nodep
 AMDEPBACKSLASH
 AMDEP_FALSE
 AMDEP_TRUE
@@ -1237,8 +1258,6 @@ target=$target_alias
 if test "x$host_alias" != x; then
   if test "x$build_alias" = x; then
     cross_compiling=maybe
-    $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host.
-    If a cross compiler is detected then cross compile mode will be used" >&2
   elif test "x$build_alias" != "x$host_alias"; then
     cross_compiling=yes
   fi
@@ -1508,9 +1527,9 @@ test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
 configure
-generated by GNU Autoconf 2.68
+generated by GNU Autoconf 2.69
 
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 _ACEOF
@@ -1586,7 +1605,7 @@ $as_echo "$ac_try_echo"; } >&5
 	 test ! -s conftest.err
        } && test -s conftest$ac_exeext && {
 	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
+	 test -x conftest$ac_exeext
        }; then :
   ac_retval=0
 else
@@ -1884,7 +1903,7 @@ $as_echo "$ac_try_echo"; } >&5
 	 test ! -s conftest.err
        } && test -s conftest$ac_exeext && {
 	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
+	 test -x conftest$ac_exeext
        }; then :
   ac_retval=0
 else
@@ -1920,7 +1939,8 @@ int
 main ()
 {
 static int test_array [1 - 2 * !(($2) >= 0)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
 
   ;
   return 0;
@@ -1936,7 +1956,8 @@ int
 main ()
 {
 static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
 
   ;
   return 0;
@@ -1962,7 +1983,8 @@ int
 main ()
 {
 static int test_array [1 - 2 * !(($2) < 0)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
 
   ;
   return 0;
@@ -1978,7 +2000,8 @@ int
 main ()
 {
 static int test_array [1 - 2 * !(($2) >= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
 
   ;
   return 0;
@@ -2012,7 +2035,8 @@ int
 main ()
 {
 static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
 
   ;
   return 0;
@@ -2085,7 +2109,7 @@ This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
 It was created by $as_me, which was
-generated by GNU Autoconf 2.68.  Invocation command line was
+generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
 
@@ -2449,7 +2473,7 @@ SDL_VERSION=1.2.0
 
 MAJOR_VERSION=2
 MINOR_VERSION=0
-MICRO_VERSION=23
+MICRO_VERSION=25
 INTERFACE_AGE=1
 BINARY_AGE=10
 VERSION=$MAJOR_VERSION.$MINOR_VERSION.$MICRO_VERSION
@@ -2651,7 +2675,7 @@ case $as_dir/ in #((
     # by default.
     for ac_prog in ginstall scoinst install; do
       for ac_exec_ext in '' $ac_executable_extensions; do
-	if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then
+	if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
 	  if test $ac_prog = install &&
 	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
 	    # AIX install.  It has an incompatible calling convention.
@@ -2820,7 +2844,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP="${ac_tool_prefix}strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2860,7 +2884,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_STRIP="strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2911,7 +2935,7 @@ do
   test -z "$as_dir" && as_dir=.
     for ac_prog in mkdir gmkdir; do
 	 for ac_exec_ext in '' $ac_executable_extensions; do
-	   { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue
+	   as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
 	   case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
 	     'mkdir (GNU coreutils) '* | \
 	     'mkdir (coreutils) '* | \
@@ -2964,7 +2988,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AWK="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3080,11 +3104,11 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
 
 # We need awk for the "check" target.  The system "awk" is bad on
 # some platforms.
-# Always define AMTAR for backward compatibility.
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
 
-AMTAR=${AMTAR-"${am_missing_run}tar"}
-
-am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
 
 
 
@@ -3112,7 +3136,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS="${ac_tool_prefix}as"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3152,7 +3176,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_AS="as"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3204,7 +3228,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3244,7 +3268,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DLLTOOL="dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3296,7 +3320,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3336,7 +3360,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OBJDUMP="objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3540,6 +3564,7 @@ fi
 if test "x$enable_dependency_tracking" != xno; then
   am_depcomp="$ac_aux_dir/depcomp"
   AMDEPBACKSLASH='\'
+  am__nodep='_no'
 fi
  if test "x$enable_dependency_tracking" != xno; then
   AMDEP_TRUE=
@@ -3572,7 +3597,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3612,7 +3637,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3665,7 +3690,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}cc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3706,7 +3731,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
        ac_prog_rejected=yes
        continue
@@ -3764,7 +3789,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -3808,7 +3833,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4254,8 +4279,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdarg.h>
 #include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
 /* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
 struct buf { int x; };
 FILE * (*rcsopen) (struct buf *, struct stat *, int);
@@ -4353,6 +4377,7 @@ else
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -4412,7 +4437,7 @@ else
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -4489,7 +4514,7 @@ do
     for ac_prog in sed gsed; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue
+      as_fn_executable_p "$ac_path_SED" || continue
 # Check for GNU ac_path_SED and select it if it is found.
   # Check for GNU $ac_path_SED
 case `"$ac_path_SED" --version 2>&1` in
@@ -4565,7 +4590,7 @@ do
     for ac_prog in grep ggrep; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+      as_fn_executable_p "$ac_path_GREP" || continue
 # Check for GNU ac_path_GREP and select it if it is found.
   # Check for GNU $ac_path_GREP
 case `"$ac_path_GREP" --version 2>&1` in
@@ -4631,7 +4656,7 @@ do
     for ac_prog in egrep; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+      as_fn_executable_p "$ac_path_EGREP" || continue
 # Check for GNU ac_path_EGREP and select it if it is found.
   # Check for GNU $ac_path_EGREP
 case `"$ac_path_EGREP" --version 2>&1` in
@@ -4698,7 +4723,7 @@ do
     for ac_prog in fgrep; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_FGREP" && $as_test_x "$ac_path_FGREP"; } || continue
+      as_fn_executable_p "$ac_path_FGREP" || continue
 # Check for GNU ac_path_FGREP and select it if it is found.
   # Check for GNU $ac_path_FGREP
 case `"$ac_path_FGREP" --version 2>&1` in
@@ -4954,7 +4979,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4998,7 +5023,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5422,7 +5447,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5462,7 +5487,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OBJDUMP="objdump"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5585,10 +5610,6 @@ freebsd* | dragonfly*)
   fi
   ;;
 
-gnu*)
-  lt_cv_deplibs_check_method=pass_all
-  ;;
-
 haiku*)
   lt_cv_deplibs_check_method=pass_all
   ;;
@@ -5627,11 +5648,11 @@ irix5* | irix6* | nonstopux*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
-netbsd*)
+netbsd* | netbsdelf*-gnu)
   if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
     lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
   else
@@ -5765,7 +5786,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5805,7 +5826,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DLLTOOL="dlltool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5906,7 +5927,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5950,7 +5971,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_AR="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6075,7 +6096,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP="${ac_tool_prefix}strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6115,7 +6136,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_STRIP="strip"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6174,7 +6195,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6214,7 +6235,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_RANLIB="ranlib"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6863,7 +6884,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6903,7 +6924,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6983,7 +7004,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7023,7 +7044,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7075,7 +7096,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7115,7 +7136,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_NMEDIT="nmedit"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7167,7 +7188,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7207,7 +7228,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_LIPO="lipo"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7259,7 +7280,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7299,7 +7320,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OTOOL="otool"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7351,7 +7372,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7391,7 +7412,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_OTOOL64="otool64"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8545,7 +8566,7 @@ lt_prog_compiler_static=
       lt_prog_compiler_static='-non_shared'
       ;;
 
-    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+    linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
       case $cc_basename in
       # old Intel for x86_64 which still supported -KPIC.
       ecc*)
@@ -9023,6 +9044,9 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
   openbsd*)
     with_gnu_ld=no
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    link_all_deplibs=no
+    ;;
   esac
 
   ld_shlibs=yes
@@ -9244,7 +9268,7 @@ _LT_EOF
       fi
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
 	wlarc=
@@ -9421,6 +9445,7 @@ _LT_EOF
 	if test "$aix_use_runtimelinking" = yes; then
 	  shared_flag="$shared_flag "'${wl}-G'
 	fi
+	link_all_deplibs=no
       else
 	# not using gcc
 	if test "$host_cpu" = ia64; then
@@ -9874,7 +9899,7 @@ $as_echo "$lt_cv_irix_exported_symbol" >&6; }
       link_all_deplibs=yes
       ;;
 
-    netbsd*)
+    netbsd* | netbsdelf*-gnu)
       if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
 	archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
       else
@@ -10711,17 +10736,6 @@ freebsd* | dragonfly*)
   esac
   ;;
 
-gnu*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
 haiku*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
@@ -10838,7 +10852,7 @@ linux*oldld* | linux*aout* | linux*coff*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
@@ -10902,6 +10916,18 @@ fi
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
+netbsdelf*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='NetBSD ld.elf_so'
+  ;;
+
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -11925,7 +11951,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11965,7 +11991,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12018,7 +12044,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}cc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12059,7 +12085,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
        ac_prog_rejected=yes
        continue
@@ -12117,7 +12143,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12161,7 +12187,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12357,8 +12383,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdarg.h>
 #include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
 /* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
 struct buf { int x; };
 FILE * (*rcsopen) (struct buf *, struct stat *, int);
@@ -12456,6 +12481,7 @@ else
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -12515,7 +12541,7 @@ else
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -12598,7 +12624,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12642,7 +12668,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CXX="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12840,6 +12866,7 @@ else
   # instance it was reported that on HP-UX the gcc test will end up
   # making a dummy file named `D' -- because `-MD' means `put the output
   # in D'.
+  rm -rf conftest.dir
   mkdir conftest.dir
   # Copy depcomp to subdir because otherwise we won't find it if we're
   # using a relative directory.
@@ -12899,7 +12926,7 @@ else
 	break
       fi
       ;;
-    msvisualcpp | msvcmsys)
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
       # This compiler won't grok `-c -o', but also, the minuso test has
       # not run yet.  These depmodes are late enough in the game, and
       # so weak that their functioning should not be impacted.
@@ -13763,9 +13790,6 @@ fi
         ld_shlibs_CXX=yes
         ;;
 
-      gnu*)
-        ;;
-
       haiku*)
         archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
         link_all_deplibs_CXX=yes
@@ -13927,7 +13951,7 @@ fi
         inherit_rpath_CXX=yes
         ;;
 
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
         case $cc_basename in
           KCC*)
 	    # Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -14787,7 +14811,7 @@ lt_prog_compiler_static_CXX=
 	    ;;
 	esac
 	;;
-      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
 	case $cc_basename in
 	  KCC*)
 	    # KAI C++ Compiler
@@ -14851,7 +14875,7 @@ lt_prog_compiler_static_CXX=
 	    ;;
 	esac
 	;;
-      netbsd*)
+      netbsd* | netbsdelf*-gnu)
 	;;
       *qnx* | *nto*)
         # QNX uses GNU C++, but need to define -shared option too, otherwise
@@ -15222,6 +15246,9 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
       ;;
     esac
     ;;
+  linux* | k*bsd*-gnu | gnu*)
+    link_all_deplibs_CXX=no
+    ;;
   *)
     export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
     ;;
@@ -15656,17 +15683,6 @@ freebsd* | dragonfly*)
   esac
   ;;
 
-gnu*)
-  version_type=linux # correct to gnu/linux during the next big refactor
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  ;;
-
 haiku*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
@@ -15783,7 +15799,7 @@ linux*oldld* | linux*aout* | linux*coff*)
   ;;
 
 # This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
   version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
@@ -15847,6 +15863,18 @@ fi
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
+netbsdelf*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='NetBSD ld.elf_so'
+  ;;
+
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -16166,11 +16194,11 @@ else
 int
 main ()
 {
-/* FIXME: Include the comments suggested by Paul. */
+
 #ifndef __cplusplus
-  /* Ultrix mips cc rejects this.  */
+  /* Ultrix mips cc rejects this sort of thing.  */
   typedef int charset[2];
-  const charset cs;
+  const charset cs = { 0, 0 };
   /* SunOS 4.1.1 cc rejects this.  */
   char const *const *pcpcc;
   char **ppc;
@@ -16187,8 +16215,9 @@ main ()
   ++pcpcc;
   ppc = (char**) pcpcc;
   pcpcc = (char const *const *) ppc;
-  { /* SCO 3.2v4 cc rejects this.  */
-    char *t;
+  { /* SCO 3.2v4 cc rejects this sort of thing.  */
+    char tx;
+    char *t = &tx;
     char const *s = 0 ? (char *) 0 : (char const *) 0;
 
     *t++ = 0;
@@ -16204,10 +16233,10 @@ main ()
     iptr p = 0;
     ++p;
   }
-  { /* AIX XL C 1.02.0.0 rejects this saying
+  { /* AIX XL C 1.02.0.0 rejects this sort of thing, saying
        "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
-    struct s { int j; const int *ap[3]; };
-    struct s *b; b->j = 5;
+    struct s { int j; const int *ap[3]; } bx;
+    struct s *b = &bx; b->j = 5;
   }
   { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
     const int foo = 10;
@@ -16968,7 +16997,9 @@ else
 fi
 
 
-# The cast to long int works around a bug in the HP C Compiler
+if test x$enable_mmx = xyes; then
+  CFLAGS="$CFLAGS -O -DUSE_MMX"
+    # The cast to long int works around a bug in the HP C Compiler
 # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
 # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
 # This bug is HP SR number 8606223364.
@@ -17001,12 +17032,9 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
-if test "$ac_cv_sizeof_long" = "8"; then
-       enable_mmx=no
-fi
-
-if test x$enable_mmx = xyes; then
- CFLAGS="$CFLAGS -O -DUSE_MMX"
+  if test "$ac_cv_sizeof_long" = "4"; then
+          CFLAGS="$CFLAGS -mmmx"
+  fi
 fi
 
 
@@ -17068,7 +17096,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_SDL_CONFIG="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -17740,16 +17768,16 @@ if (echo >conf$$.file) 2>/dev/null; then
     # ... but there are two gotchas:
     # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
     # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
     ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
   elif ln conf$$.file conf$$ 2>/dev/null; then
     as_ln_s=ln
   else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
   fi
 else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
 fi
 rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
 rmdir conf$$.dir 2>/dev/null
@@ -17809,28 +17837,16 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-	test -d "$1/.";
-      else
-	case $1 in #(
-	-*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -17852,7 +17868,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # values after options handling.
 ac_log="
 This file was extended by $as_me, which was
-generated by GNU Autoconf 2.68.  Invocation command line was
+generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -17909,10 +17925,10 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
 config.status
-configured by $0, generated by GNU Autoconf 2.68,
+configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it."
 
@@ -17992,7 +18008,7 @@ fi
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 if \$ac_cs_recheck; then
-  set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
   shift
   \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
   CONFIG_SHELL='$SHELL'
@@ -19769,3 +19785,7 @@ if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
 fi
 
+
+echo
+echo "Options summary:"
+echo "* --enable-mmx: $enable_mmx"
diff --git a/configure.in b/configure.in
index 0f39689..357479e 100644
--- a/configure.in
+++ b/configure.in
@@ -20,7 +20,7 @@ dnl Setup for automake
 
 MAJOR_VERSION=2
 MINOR_VERSION=0
-MICRO_VERSION=24
+MICRO_VERSION=25
 INTERFACE_AGE=1
 BINARY_AGE=10
 VERSION=$MAJOR_VERSION.$MINOR_VERSION.$MICRO_VERSION
@@ -105,15 +105,13 @@ LIBS="$LIBS $MATHLIB"
 dnl Check for NASM (for x86 assembly blit routines)
 AC_ARG_ENABLE(mmx,[  --enable-mmx            use mmx assembly on x86 [default=yes], disable this on non-x86 platforms],, enable_mmx=yes)
 
-dnl MMX not used on 64-bit platforms, as `popa' and `pusha' are not supported
-dnl in 64-bit mode.
-AC_CHECK_SIZEOF([long])
-if test "$ac_cv_sizeof_long" = "8"; then
-       enable_mmx=no
-fi
- 
 if test x$enable_mmx = xyes; then
- CFLAGS="$CFLAGS -O -DUSE_MMX"
+  CFLAGS="$CFLAGS -O -DUSE_MMX"
+  dnl MMX enabled by default in GCC on x86_64, but not i386:
+  AC_CHECK_SIZEOF([long])
+  if test "$ac_cv_sizeof_long" = "4"; then
+          CFLAGS="$CFLAGS -mmmx"
+  fi
 fi
 
 dnl Check for SDL
@@ -131,3 +129,7 @@ AC_OUTPUT([
 Makefile
 SDL_gfx.pc
 ])
+
+echo
+echo "Options summary:"
+echo "* --enable-mmx: $enable_mmx"
diff --git a/depcomp b/depcomp
index df8eea7..25a39e6 100755
--- a/depcomp
+++ b/depcomp
@@ -1,10 +1,10 @@
 #! /bin/sh
 # depcomp - compile a program generating dependencies as side-effects
 
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2012-03-27.16; # UTC
 
-# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009 Free
-# Software Foundation, Inc.
+# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
+# 2011, 2012 Free Software Foundation, Inc.
 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -28,7 +28,7 @@ scriptversion=2009-04-28.21; # UTC
 
 case $1 in
   '')
-     echo "$0: No command.  Try \`$0 --help' for more information." 1>&2
+     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
      exit 1;
      ;;
   -h | --h*)
@@ -40,11 +40,11 @@ as side-effects.
 
 Environment variables:
   depmode     Dependency tracking mode.
-  source      Source file read by `PROGRAMS ARGS'.
-  object      Object file output by `PROGRAMS ARGS'.
+  source      Source file read by 'PROGRAMS ARGS'.
+  object      Object file output by 'PROGRAMS ARGS'.
   DEPDIR      directory where to store dependencies.
   depfile     Dependency file to output.
-  tmpdepfile  Temporary file to use when outputing dependencies.
+  tmpdepfile  Temporary file to use when outputting dependencies.
   libtool     Whether libtool is used (yes/no).
 
 Report bugs to <bug-automake at gnu.org>.
@@ -57,6 +57,12 @@ EOF
     ;;
 esac
 
+# A tabulation character.
+tab='	'
+# A newline character.
+nl='
+'
+
 if test -z "$depmode" || test -z "$source" || test -z "$object"; then
   echo "depcomp: Variables source, object and depmode must be set" 1>&2
   exit 1
@@ -90,10 +96,24 @@ if test "$depmode" = msvcmsys; then
    # This is just like msvisualcpp but w/o cygpath translation.
    # Just convert the backslash-escaped backslashes to single forward
    # slashes to satisfy depend.m4
-   cygpath_u="sed s,\\\\\\\\,/,g"
+   cygpath_u='sed s,\\\\,/,g'
    depmode=msvisualcpp
 fi
 
+if test "$depmode" = msvc7msys; then
+   # This is just like msvc7 but w/o cygpath translation.
+   # Just convert the backslash-escaped backslashes to single forward
+   # slashes to satisfy depend.m4
+   cygpath_u='sed s,\\\\,/,g'
+   depmode=msvc7
+fi
+
+if test "$depmode" = xlc; then
+   # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
+   gccflag=-qmakedep=gcc,-MF
+   depmode=gcc
+fi
+
 case "$depmode" in
 gcc3)
 ## gcc 3 implements dependency tracking that does exactly what
@@ -148,20 +168,21 @@ gcc)
 ## The second -e expression handles DOS-style file names with drive letters.
   sed -e 's/^[^:]*: / /' \
       -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
-## This next piece of magic avoids the `deleted header file' problem.
+## This next piece of magic avoids the "deleted header file" problem.
 ## The problem is that when a header file which appears in a .P file
 ## is deleted, the dependency causes make to die (because there is
 ## typically no way to rebuild the header).  We avoid this by adding
 ## dummy dependencies for each header file.  Too bad gcc doesn't do
 ## this for us directly.
-  tr ' ' '
-' < "$tmpdepfile" |
-## Some versions of gcc put a space before the `:'.  On the theory
+  tr ' ' "$nl" < "$tmpdepfile" |
+## Some versions of gcc put a space before the ':'.  On the theory
 ## that the space means something, we add a space to the output as
-## well.
+## well.  hp depmode also adds that space, but also prefixes the VPATH
+## to the object.  Take care to not repeat it in the output.
 ## Some versions of the HPUX 10.20 sed can't process this invocation
 ## correctly.  Breaking it into two sed invocations is a workaround.
-    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+    sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
+      | sed -e 's/$/ :/' >> "$depfile"
   rm -f "$tmpdepfile"
   ;;
 
@@ -193,18 +214,15 @@ sgi)
     # clever and replace this with sed code, as IRIX sed won't handle
     # lines with more than a fixed number of characters (4096 in
     # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
-    # the IRIX cc adds comments like `#:fec' to the end of the
+    # the IRIX cc adds comments like '#:fec' to the end of the
     # dependency line.
-    tr ' ' '
-' < "$tmpdepfile" \
+    tr ' ' "$nl" < "$tmpdepfile" \
     | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
-    tr '
-' ' ' >> "$depfile"
+    tr "$nl" ' ' >> "$depfile"
     echo >> "$depfile"
 
     # The second pass generates a dummy entry for each header file.
-    tr ' ' '
-' < "$tmpdepfile" \
+    tr ' ' "$nl" < "$tmpdepfile" \
    | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
    >> "$depfile"
   else
@@ -216,10 +234,17 @@ sgi)
   rm -f "$tmpdepfile"
   ;;
 
+xlc)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
 aix)
   # The C for AIX Compiler uses -M and outputs the dependencies
   # in a .u file.  In older versions, this file always lives in the
-  # current directory.  Also, the AIX compiler puts `$object:' at the
+  # current directory.  Also, the AIX compiler puts '$object:' at the
   # start of each line; $object doesn't have directory information.
   # Version 6 uses the directory in both cases.
   dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
@@ -249,12 +274,11 @@ aix)
     test -f "$tmpdepfile" && break
   done
   if test -f "$tmpdepfile"; then
-    # Each line is of the form `foo.o: dependent.h'.
+    # Each line is of the form 'foo.o: dependent.h'.
     # Do two passes, one to just change these to
-    # `$object: dependent.h' and one to simply `dependent.h:'.
+    # '$object: dependent.h' and one to simply 'dependent.h:'.
     sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
-    # That's a tab and a space in the [].
-    sed -e 's,^.*\.[a-z]*:[	 ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
+    sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
   else
     # The sourcefile does not contain any dependencies, so just
     # store a dummy comment line, to avoid errors with the Makefile
@@ -265,23 +289,26 @@ aix)
   ;;
 
 icc)
-  # Intel's C compiler understands `-MD -MF file'.  However on
-  #    icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
+  # Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
+  # However on
+  #    $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
   # ICC 7.0 will fill foo.d with something like
   #    foo.o: sub/foo.c
   #    foo.o: sub/foo.h
-  # which is wrong.  We want:
+  # which is wrong.  We want
   #    sub/foo.o: sub/foo.c
   #    sub/foo.o: sub/foo.h
   #    sub/foo.c:
   #    sub/foo.h:
   # ICC 7.1 will output
   #    foo.o: sub/foo.c sub/foo.h
-  # and will wrap long lines using \ :
+  # and will wrap long lines using '\':
   #    foo.o: sub/foo.c ... \
   #     sub/foo.h ... \
   #     ...
-
+  # tcc 0.9.26 (FIXME still under development at the moment of writing)
+  # will emit a similar output, but also prepend the continuation lines
+  # with horizontal tabulation characters.
   "$@" -MD -MF "$tmpdepfile"
   stat=$?
   if test $stat -eq 0; then :
@@ -290,15 +317,21 @@ icc)
     exit $stat
   fi
   rm -f "$depfile"
-  # Each line is of the form `foo.o: dependent.h',
-  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+  # Each line is of the form 'foo.o: dependent.h',
+  # or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
   # Do two passes, one to just change these to
-  # `$object: dependent.h' and one to simply `dependent.h:'.
-  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
-  # Some versions of the HPUX 10.20 sed can't process this invocation
-  # correctly.  Breaking it into two sed invocations is a workaround.
-  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
-    sed -e 's/$/ :/' >> "$depfile"
+  # '$object: dependent.h' and one to simply 'dependent.h:'.
+  sed -e "s/^[ $tab][ $tab]*/  /" -e "s,^[^:]*:,$object :," \
+    < "$tmpdepfile" > "$depfile"
+  sed '
+    s/[ '"$tab"'][ '"$tab"']*/ /g
+    s/^ *//
+    s/ *\\*$//
+    s/^[^:]*: *//
+    /^$/d
+    /:$/d
+    s/$/ :/
+  ' < "$tmpdepfile" >> "$depfile"
   rm -f "$tmpdepfile"
   ;;
 
@@ -334,7 +367,7 @@ hp2)
   done
   if test -f "$tmpdepfile"; then
     sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
-    # Add `dependent.h:' lines.
+    # Add 'dependent.h:' lines.
     sed -ne '2,${
 	       s/^ *//
 	       s/ \\*$//
@@ -349,9 +382,9 @@ hp2)
 
 tru64)
    # The Tru64 compiler uses -MD to generate dependencies as a side
-   # effect.  `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
+   # effect.  'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
    # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
-   # dependencies in `foo.d' instead, so we check for that too.
+   # dependencies in 'foo.d' instead, so we check for that too.
    # Subdirectories are respected.
    dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
    test "x$dir" = "x$object" && dir=
@@ -397,14 +430,59 @@ tru64)
    done
    if test -f "$tmpdepfile"; then
       sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
-      # That's a tab and a space in the [].
-      sed -e 's,^.*\.[a-z]*:[	 ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
+      sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
    else
       echo "#dummy" > "$depfile"
    fi
    rm -f "$tmpdepfile"
    ;;
 
+msvc7)
+  if test "$libtool" = yes; then
+    showIncludes=-Wc,-showIncludes
+  else
+    showIncludes=-showIncludes
+  fi
+  "$@" $showIncludes > "$tmpdepfile"
+  stat=$?
+  grep -v '^Note: including file: ' "$tmpdepfile"
+  if test "$stat" = 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The first sed program below extracts the file names and escapes
+  # backslashes for cygpath.  The second sed program outputs the file
+  # name when reading, but also accumulates all include files in the
+  # hold buffer in order to output them again at the end.  This only
+  # works with sed implementations that can handle large buffers.
+  sed < "$tmpdepfile" -n '
+/^Note: including file:  *\(.*\)/ {
+  s//\1/
+  s/\\/\\\\/g
+  p
+}' | $cygpath_u | sort -u | sed -n '
+s/ /\\ /g
+s/\(.*\)/'"$tab"'\1 \\/p
+s/.\(.*\) \\/\1:/
+H
+$ {
+  s/.*/'"$tab"'/
+  G
+  p
+}' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvc7msys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
 #nosideeffect)
   # This comment above is used by automake to tell side-effect
   # dependency tracking mechanisms from slower ones.
@@ -422,7 +500,7 @@ dashmstdout)
     shift
   fi
 
-  # Remove `-o $object'.
+  # Remove '-o $object'.
   IFS=" "
   for arg
   do
@@ -442,15 +520,14 @@ dashmstdout)
   done
 
   test -z "$dashmflag" && dashmflag=-M
-  # Require at least two characters before searching for `:'
+  # Require at least two characters before searching for ':'
   # in the target name.  This is to cope with DOS-style filenames:
-  # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
+  # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
   "$@" $dashmflag |
-    sed 's:^[  ]*[^: ][^:][^:]*\:[    ]*:'"$object"'\: :' > "$tmpdepfile"
+    sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
   rm -f "$depfile"
   cat < "$tmpdepfile" > "$depfile"
-  tr ' ' '
-' < "$tmpdepfile" | \
+  tr ' ' "$nl" < "$tmpdepfile" | \
 ## Some versions of the HPUX 10.20 sed can't process this invocation
 ## correctly.  Breaking it into two sed invocations is a workaround.
     sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
@@ -503,9 +580,10 @@ makedepend)
   touch "$tmpdepfile"
   ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
   rm -f "$depfile"
-  cat < "$tmpdepfile" > "$depfile"
-  sed '1,2d' "$tmpdepfile" | tr ' ' '
-' | \
+  # makedepend may prepend the VPATH from the source file name to the object.
+  # No need to regex-escape $object, excess matching of '.' is harmless.
+  sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
+  sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
 ## Some versions of the HPUX 10.20 sed can't process this invocation
 ## correctly.  Breaking it into two sed invocations is a workaround.
     sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
@@ -525,7 +603,7 @@ cpp)
     shift
   fi
 
-  # Remove `-o $object'.
+  # Remove '-o $object'.
   IFS=" "
   for arg
   do
@@ -594,8 +672,8 @@ msvisualcpp)
   sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
   rm -f "$depfile"
   echo "$object : \\" > "$depfile"
-  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::	\1 \\:p' >> "$depfile"
-  echo "	" >> "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
+  echo "$tab" >> "$depfile"
   sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
   rm -f "$tmpdepfile"
   ;;
diff --git a/install-sh b/install-sh
index 6781b98..a9244eb 100755
--- a/install-sh
+++ b/install-sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 # install - install a program, script, or datafile
 
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2011-01-19.21; # UTC
 
 # This originates from X11R5 (mit/util/scripts/install.sh), which was
 # later released in X11R6 (xc/config/util/install.sh) with the
@@ -156,6 +156,10 @@ while test $# -ne 0; do
     -s) stripcmd=$stripprog;;
 
     -t) dst_arg=$2
+	# Protect names problematic for `test' and other utilities.
+	case $dst_arg in
+	  -* | [=\(\)!]) dst_arg=./$dst_arg;;
+	esac
 	shift;;
 
     -T) no_target_directory=true;;
@@ -186,6 +190,10 @@ if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
     fi
     shift # arg
     dst_arg=$arg
+    # Protect names problematic for `test' and other utilities.
+    case $dst_arg in
+      -* | [=\(\)!]) dst_arg=./$dst_arg;;
+    esac
   done
 fi
 
@@ -200,7 +208,11 @@ if test $# -eq 0; then
 fi
 
 if test -z "$dir_arg"; then
-  trap '(exit $?); exit' 1 2 13 15
+  do_exit='(exit $ret); exit $ret'
+  trap "ret=129; $do_exit" 1
+  trap "ret=130; $do_exit" 2
+  trap "ret=141; $do_exit" 13
+  trap "ret=143; $do_exit" 15
 
   # Set umask so as not to create temps with too-generous modes.
   # However, 'strip' requires both read and write access to temps.
@@ -228,9 +240,9 @@ fi
 
 for src
 do
-  # Protect names starting with `-'.
+  # Protect names problematic for `test' and other utilities.
   case $src in
-    -*) src=./$src;;
+    -* | [=\(\)!]) src=./$src;;
   esac
 
   if test -n "$dir_arg"; then
@@ -252,12 +264,7 @@ do
       echo "$0: no destination specified." >&2
       exit 1
     fi
-
     dst=$dst_arg
-    # Protect names starting with `-'.
-    case $dst in
-      -*) dst=./$dst;;
-    esac
 
     # If destination is a directory, append the input filename; won't work
     # if double slashes aren't ignored.
@@ -385,7 +392,7 @@ do
 
       case $dstdir in
 	/*) prefix='/';;
-	-*) prefix='./';;
+	[-=\(\)!]*) prefix='./';;
 	*)  prefix='';;
       esac
 
@@ -403,7 +410,7 @@ do
 
       for d
       do
-	test -z "$d" && continue
+	test X"$d" = X && continue
 
 	prefix=$prefix$d
 	if test -d "$prefix"; then
diff --git a/ltmain.sh b/ltmain.sh
index 63ae69d..33f642a 100644
--- a/ltmain.sh
+++ b/ltmain.sh
@@ -70,7 +70,7 @@
 #         compiler:		$LTCC
 #         compiler flags:		$LTCFLAGS
 #         linker:		$LD (gnu? $with_gnu_ld)
-#         $progname:	(GNU libtool) 2.4.2
+#         $progname:	(GNU libtool) 2.4.2 Debian-2.4.2-1.1
 #         automake:	$automake_version
 #         autoconf:	$autoconf_version
 #
@@ -80,7 +80,7 @@
 
 PROGRAM=libtool
 PACKAGE=libtool
-VERSION=2.4.2
+VERSION="2.4.2 Debian-2.4.2-1.1"
 TIMESTAMP=""
 package_revision=1.3337
 
@@ -6124,7 +6124,10 @@ func_mode_link ()
 	case $pass in
 	dlopen) libs="$dlfiles" ;;
 	dlpreopen) libs="$dlprefiles" ;;
-	link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+	link)
+	  libs="$deplibs %DEPLIBS%"
+	  test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs"
+	  ;;
 	esac
       fi
       if test "$linkmode,$pass" = "lib,dlpreopen"; then
@@ -6444,19 +6447,19 @@ func_mode_link ()
 	    # It is a libtool convenience library, so add in its objects.
 	    func_append convenience " $ladir/$objdir/$old_library"
 	    func_append old_convenience " $ladir/$objdir/$old_library"
+	    tmp_libs=
+	    for deplib in $dependency_libs; do
+	      deplibs="$deplib $deplibs"
+	      if $opt_preserve_dup_deps ; then
+		case "$tmp_libs " in
+		*" $deplib "*) func_append specialdeplibs " $deplib" ;;
+		esac
+	      fi
+	      func_append tmp_libs " $deplib"
+	    done
 	  elif test "$linkmode" != prog && test "$linkmode" != lib; then
 	    func_fatal_error "\`$lib' is not a convenience library"
 	  fi
-	  tmp_libs=
-	  for deplib in $dependency_libs; do
-	    deplibs="$deplib $deplibs"
-	    if $opt_preserve_dup_deps ; then
-	      case "$tmp_libs " in
-	      *" $deplib "*) func_append specialdeplibs " $deplib" ;;
-	      esac
-	    fi
-	    func_append tmp_libs " $deplib"
-	  done
 	  continue
 	fi # $pass = conv
 
@@ -7349,6 +7352,9 @@ func_mode_link ()
 	    revision="$number_minor"
 	    lt_irix_increment=no
 	    ;;
+	  *)
+	    func_fatal_configuration "$modename: unknown library version type \`$version_type'"
+	    ;;
 	  esac
 	  ;;
 	no)
diff --git a/missing b/missing
index 28055d2..86a8fc3 100755
--- a/missing
+++ b/missing
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Common stub for a few missing GNU programs while installing.
 
-scriptversion=2009-04-28.21; # UTC
+scriptversion=2012-01-06.13; # UTC
 
 # Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
-# 2008, 2009 Free Software Foundation, Inc.
+# 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
 # Originally by Fran,cois Pinard <pinard at iro.umontreal.ca>, 1996.
 
 # This program is free software; you can redistribute it and/or modify
@@ -84,7 +84,6 @@ Supported PROGRAM values:
   help2man     touch the output file
   lex          create \`lex.yy.c', if possible, from existing .c
   makeinfo     touch the output file
-  tar          try tar, gnutar, gtar, then tar without non-portable flags
   yacc         create \`y.tab.[ch]', if possible, from existing .[ch]
 
 Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and
@@ -122,15 +121,6 @@ case $1 in
     # Not GNU programs, they don't have --version.
     ;;
 
-  tar*)
-    if test -n "$run"; then
-       echo 1>&2 "ERROR: \`tar' requires --run"
-       exit 1
-    elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
-       exit 1
-    fi
-    ;;
-
   *)
     if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
        # We have it, but it failed.
@@ -226,7 +216,7 @@ WARNING: \`$1' $msg.  You should only need it if
          \`Bison' from any GNU archive site."
     rm -f y.tab.c y.tab.h
     if test $# -ne 1; then
-        eval LASTARG="\${$#}"
+        eval LASTARG=\${$#}
 	case $LASTARG in
 	*.y)
 	    SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
@@ -256,7 +246,7 @@ WARNING: \`$1' is $msg.  You should only need it if
          \`Flex' from any GNU archive site."
     rm -f lex.yy.c
     if test $# -ne 1; then
-        eval LASTARG="\${$#}"
+        eval LASTARG=\${$#}
 	case $LASTARG in
 	*.l)
 	    SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
@@ -318,41 +308,6 @@ WARNING: \`$1' is $msg.  You should only need it if
     touch $file
     ;;
 
-  tar*)
-    shift
-
-    # We have already tried tar in the generic part.
-    # Look for gnutar/gtar before invocation to avoid ugly error
-    # messages.
-    if (gnutar --version > /dev/null 2>&1); then
-       gnutar "$@" && exit 0
-    fi
-    if (gtar --version > /dev/null 2>&1); then
-       gtar "$@" && exit 0
-    fi
-    firstarg="$1"
-    if shift; then
-	case $firstarg in
-	*o*)
-	    firstarg=`echo "$firstarg" | sed s/o//`
-	    tar "$firstarg" "$@" && exit 0
-	    ;;
-	esac
-	case $firstarg in
-	*h*)
-	    firstarg=`echo "$firstarg" | sed s/h//`
-	    tar "$firstarg" "$@" && exit 0
-	    ;;
-	esac
-    fi
-
-    echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
-         You may want to install GNU tar or Free paxutils, or check the
-         command line arguments."
-    exit 1
-    ;;
-
   *)
     echo 1>&2 "\
 WARNING: \`$1' is needed, and is $msg.

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-sdl/packages/sdlgfx.git



More information about the pkg-sdl-commits mailing list