[mupen64plus] 177/262: Fix wrong high level emulation of rsp on big endian systems

Thu Nov 26 05:59:32 UTC 2015

This is an automated email from the git hooks/post-receive script.

ecsv-guest pushed a commit to branch master
in repository mupen64plus.

commit f183d3721ade9dd85fc095f35d9ab9e776da761f
Author: Sven Eckelmann <sven.eckelmann at gmx.de>
Date:   Mon Apr 26 23:12:18 2010 +0200

    Fix wrong high level emulation of rsp on big endian systems
---
 debian/changelog                       |    2 +
 debian/patches/rsp_hle_bigendian.patch | 1128 ++++++++++++++++++++++++++++++++
 debian/patches/series                  |    1 +
 3 files changed, 1131 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 470f560..e7b107c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,8 @@ mupen64plus (1.5+dfsg1-10) UNRELEASED; urgency=low
   * debian/patches:
     - Add rsp_ucode2_reset.patch, Reset status of specific ucode2 hacks after
       starting again
+    - Add rsp_hle_bigendian.patch, Fix wrong high level emulation of rsp on big
+      endian systems
 
  -- Sven Eckelmann <sven.eckelmann at gmx.de>  Mon, 26 Apr 2010 23:06:55 +0200
 
diff --git a/debian/patches/rsp_hle_bigendian.patch b/debian/patches/rsp_hle_bigendian.patch
new file mode 100644
index 0000000..e4b09f2
--- /dev/null
+++ b/debian/patches/rsp_hle_bigendian.patch
@@ -0,0 +1,1128 @@
+Description: Fix wrong high level emulation of rsp on big endian systems
+Origin: backport: http://bitbucket.org/richard42/mupen64plus-rsp-hle/changeset/c4309d79d3ba
+Origin: backport: http://bitbucket.org/richard42/mupen64plus-rsp-hle/changeset/199ec8223923
+
+---
+diff --git a/rsp_hle/hle.h b/rsp_hle/hle.h
+index 79ece2577c0cbf7b671257cdb5d20fa9241295d1..9ba7585ca38b4a98b5908e9612b895add3a4d7ce 100644
+--- a/rsp_hle/hle.h
++++ b/rsp_hle/hle.h
+@@ -26,9 +26,11 @@
+ 
+ #ifdef _BIG_ENDIAN
+ #define S 0
++#define S16 0
+ #define S8 0
+ #else
+ #define S 1
++#define S16 2
+ #define S8 3
+ #endif
+ 
+diff --git a/rsp_hle/jpeg.c b/rsp_hle/jpeg.c
+index 5785bea8e30f1c3b0ced1d31bc275e6ce57250f0..ac87bf265ed4815c4f8d460520714b2e9cb00d7e 100644
+--- a/rsp_hle/jpeg.c
++++ b/rsp_hle/jpeg.c
+@@ -404,8 +404,8 @@ void jpg_uncompress(OSTask_t *task)
+                 m[30*8+k] |= m[1*8+6];
+                 m[29*8+k] |= m[1*8+6];
+                 
+-                pic[(i*128+j*32+0+k)^1] = m[30*8+k];
+-                pic[(i*128+j*32+8+k)^1] = m[29*8+k];
++                pic[(i*128+j*32+0+k)^S] = m[30*8+k];
++                pic[(i*128+j*32+8+k)^S] = m[29*8+k];
+                 
+                 m[24*8+k] = m[20*8+k] + m[16*8+k];
+                 m[23*8+k] = m[19*8+k] + m[15*8+k];
+diff --git a/rsp_hle/ucode1.cpp b/rsp_hle/ucode1.cpp
+index c81d677b752b5ea0810663d6846577bcfe59b7bb..d8d47702636f4ad508546d23d5a29014ab384ea0 100644
+--- a/rsp_hle/ucode1.cpp
++++ b/rsp_hle/ucode1.cpp
+@@ -257,12 +257,12 @@ static void ENVMIXER () {
+         }
+ 
+     for (int x = 0; x < 8; x++) {
+-        i1=(int)inp[ptr^1];
+-        o1=(int)out[ptr^1];
+-        a1=(int)aux1[ptr^1];
++        i1=(int)inp[ptr^S];
++        o1=(int)out[ptr^S];
++        a1=(int)aux1[ptr^S];
+         if (AuxIncRate) {
+-            a2=(int)aux2[ptr^1];
+-            a3=(int)aux3[ptr^1];
++            a2=(int)aux2[ptr^S];
++            a3=(int)aux3[ptr^S];
+         }
+         // TODO: here...
+         //LAcc = LTrg;
+@@ -349,8 +349,8 @@ static void ENVMIXER () {
+         if(a1>32767) a1=32767;
+         else if(a1<-32768) a1=-32768;
+ 
+-        out[ptr^1]=o1;
+-        aux1[ptr^1]=a1;
++        out[ptr^S]=o1;
++        aux1[ptr^S]=a1;
+         if (AuxIncRate) {
+             //a2=((s64)(((s64)a2*0xfffe)+((s64)i1*AuxR*2)+0x8000)>>16);
+ 
+@@ -364,8 +364,8 @@ static void ENVMIXER () {
+             if(a3>32767) a3=32767;
+             else if(a3<-32768) a3=-32768;
+ 
+-            aux2[ptr^1]=a2;
+-            aux3[ptr^1]=a3;
++            aux2[ptr^S]=a2;
++            aux3[ptr^S]=a3;
+         }
+         ptr++;
+     }
+@@ -411,17 +411,18 @@ static void RESAMPLE () {
+     if ((Flags & 0x1) == 0) {
+         //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8);
+         for (int x=0; x < 4; x++)
+-            src[(srcPtr+x)^1] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^1];
++            src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S];
+         Accum = *(u16 *)(rsp.RDRAM+addy+10);
+     } else {
+         for (int x=0; x < 4; x++)
+-            src[(srcPtr+x)^1] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
++            src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
+     }
+ 
+     for(int i=0;i < ((AudioCount+0xf)&0xFFF0)/2;i++)    {
+         //location = (((Accum * 0x40) >> 0x10) * 8);
+-        location = (Accum >> 0xa) << 0x3;
+-        lut = (s16 *)(((u8 *)ResampleLUT) + location);
++       // location is the fractional position between two samples
++        location = (Accum >> 0xa) * 4;
++        lut = (s16*)ResampleLUT + location;
+ 
+         // mov eax, dword ptr [src+srcPtr];
+         // movsx edx, word ptr [lut];
+@@ -435,29 +436,29 @@ static void RESAMPLE () {
+         // and edx, 0f000h
+ 
+         // imul
+-        temp =  ((s32)*(s16*)(src+((srcPtr+0)^1))*((s32)((s16)lut[0])));
++        temp =  ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0])));
+         accum = (s32)(temp >> 15);
+ 
+-        temp = ((s32)*(s16*)(src+((srcPtr+1)^1))*((s32)((s16)lut[1])));
++        temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1])));
+         accum += (s32)(temp >> 15);
+ 
+-        temp = ((s32)*(s16*)(src+((srcPtr+2)^1))*((s32)((s16)lut[2])));
++        temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2])));
+         accum += (s32)(temp >> 15);
+ 
+-        temp = ((s32)*(s16*)(src+((srcPtr+3)^1))*((s32)((s16)lut[3])));
++        temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3])));
+         accum += (s32)(temp >> 15);
+ 
+         if (accum > 32767) accum = 32767;
+         if (accum < -32768) accum = -32768;
+ 
+-        dst[dstPtr^1] = (accum);
++        dst[dstPtr^S] = (accum);
+         dstPtr++;
+         Accum += Pitch;
+         srcPtr += (Accum>>16);
+         Accum&=0xffff;
+     }
+     for (int x=0; x < 4; x++)
+-        ((u16 *)rsp.RDRAM)[((addy/2)+x)^1] = src[(srcPtr+x)^1];
++        ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S];
+     //memcpy (RSWORK, src+srcPtr, 0x8);
+     *(u16 *)(rsp.RDRAM+addy+10) = Accum;
+ }
+@@ -489,17 +490,17 @@ static void SETVOL () {
+ //u16 VolRamp_Left; // 0x0012(T8)
+     if(flags & A_LEFT) { // Set the Ramping values Target, Ramp
+         //loopval = (((u32)vol << 0x10) | (u32)voltarg);
+-        VolTrg_Left  = *(s16 *)&inst1;      // m_LeftVol
++        VolTrg_Left  = (s16)inst1;      // m_LeftVol
+         //VolRamp_Left = (s32)inst2;
+-        VolRamp_Left = *(s32 *)&inst2;//(u16)(inst2) | (s32)(s16)(inst2 << 0x10);
++        VolRamp_Left = (s32)inst2;//(u16)(inst2) | (s32)(s16)(inst2 << 0x10);
+         //fprintf (dfile, "Ramp Left: %f\n", (float)VolRamp_Left/65536.0);
+         //fprintf (dfile, "Ramp Left: %08X\n", inst2);
+         //VolRamp_Left = (s16)voltarg;  // m_LeftVolTarget
+         //VolRate_Left = (s16)volrate;  // m_LeftVolRate
+     } else { // A_RIGHT
+-        VolTrg_Right  = *(s16 *)&inst1;     // m_RightVol
++        VolTrg_Right  = (s16)inst1;     // m_RightVol
+         //VolRamp_Right = (s32)inst2;
+-        VolRamp_Right = *(s32 *)&inst2;//(u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10);
++        VolRamp_Right = (s32)inst2;//(u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10);
+         //fprintf (dfile, "Ramp Right: %f\n", (float)VolRamp_Right/65536.0);
+         //fprintf (dfile, "Ramp Right: %08X\n", inst2);
+         //VolRamp_Right = (s16)voltarg; // m_RightVolTarget
+@@ -546,8 +547,8 @@ static void ADPCM () { // Work in progress! :)
+         }
+     }
+ 
+-    int l1=out[15];
+-    int l2=out[14];
++    int l1=out[14^S];
++    int l2=out[15^S];
+     int inp1[8];
+     int inp2[8];
+     out+=16;
+@@ -558,7 +559,7 @@ static void ADPCM () { // Work in progress! :)
+                                                     // area of memory in the case of A_LOOP or just
+                                                     // the values we calculated the last time
+ 
+-        code=BufferSpace[(AudioInBuffer+inPtr)^3];
++        code=BufferSpace[(AudioInBuffer+inPtr)^S8];
+         index=code&0xf;
+         index<<=4;                                  // index into the adpcm code table
+         book1=(short *)&adpcmtable[index];
+@@ -576,7 +577,7 @@ static void ADPCM () { // Work in progress! :)
+         while(j<8)                                  // loop of 8, for 8 coded nibbles from 4 bytes
+                                                     // which yields 8 short pcm values
+         {
+-            icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++            icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+             inPtr++;
+ 
+             inp1[j]=(s16)((icode&0xf0)<<8);         // this will in effect be signed
+@@ -596,7 +597,7 @@ static void ADPCM () { // Work in progress! :)
+         j=0;
+         while(j<8)
+         {
+-            icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++            icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+             inPtr++;
+ 
+             inp2[j]=(short)((icode&0xf0)<<8);           // this will in effect be signed
+@@ -676,10 +677,10 @@ static void ADPCM () { // Work in progress! :)
+ 
+         for(j=0;j<8;j++)
+         {
+-            a[j^1]>>=11;
+-            if(a[j^1]>32767) a[j^1]=32767;
+-            else if(a[j^1]<-32768) a[j^1]=-32768;
+-            *(out++)=a[j^1];
++            a[j^S]>>=11;
++            if(a[j^S]>32767) a[j^S]=32767;
++            else if(a[j^S]<-32768) a[j^S]=-32768;
++            *(out++)=a[j^S];
+         }
+         l1=a[6];
+         l2=a[7];
+@@ -746,10 +747,10 @@ static void ADPCM () { // Work in progress! :)
+ 
+         for(j=0;j<8;j++)
+         {
+-            a[j^1]>>=11;
+-            if(a[j^1]>32767) a[j^1]=32767;
+-            else if(a[j^1]<-32768) a[j^1]=-32768;
+-            *(out++)=a[j^1];
++            a[j^S]>>=11;
++            if(a[j^S]>32767) a[j^S]=32767;
++            else if(a[j^S]<-32768) a[j^S]=-32768;
++            *(out++)=a[j^S];
+         }
+         l1=a[6];
+         l2=a[7];
+@@ -805,7 +806,7 @@ static void DMEMMOVE () { // Doesn't sound just right?... will fix when HLE is r
+ 
+     //memcpy (BufferSpace+v1, BufferSpace+v0, count-1);
+     for (cnt = 0; cnt < count; cnt++) {
+-        *(u8 *)(BufferSpace+((cnt+v1)^3)) = *(u8 *)(BufferSpace+((cnt+v0)^3));
++        *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8));
+     }
+ }
+ 
+@@ -818,17 +819,17 @@ static void LOADADPCM () { // Loads an ADPCM table - Works 100% Now 03-13-01
+     //assert ((inst1&0xffff) <= 0x80);
+     u16 *table = (u16 *)(rsp.RDRAM+v0);
+     for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) {
+-        adpcmtable[0x1+(x<<3)] = table[0];
+-        adpcmtable[0x0+(x<<3)] = table[1];
++        adpcmtable[0x0+(x<<3)^S] = table[0];
++        adpcmtable[0x1+(x<<3)^S] = table[1];
+ 
+-        adpcmtable[0x3+(x<<3)] = table[2];
+-        adpcmtable[0x2+(x<<3)] = table[3];
++        adpcmtable[0x2+(x<<3)^S] = table[2];
++        adpcmtable[0x3+(x<<3)^S] = table[3];
+ 
+-        adpcmtable[0x5+(x<<3)] = table[4];
+-        adpcmtable[0x4+(x<<3)] = table[5];
++        adpcmtable[0x4+(x<<3)^S] = table[4];
++        adpcmtable[0x5+(x<<3)^S] = table[5];
+ 
+-        adpcmtable[0x7+(x<<3)] = table[6];
+-        adpcmtable[0x6+(x<<3)] = table[7];
++        adpcmtable[0x6+(x<<3)^S] = table[6];
++        adpcmtable[0x7+(x<<3)^S] = table[7];
+         table += 8;
+     }
+ }
+@@ -839,7 +840,7 @@ static void INTERLEAVE () { // Works... - 3-11-01
+     u16 *outbuff = (u16 *)(AudioOutBuffer+BufferSpace);
+     u16 *inSrcR;
+     u16 *inSrcL;
+-    u16 Left, Right;
++    u16 Left, Right, Left2, Right2;
+ 
+     inL = inst2 & 0xFFFF;
+     inR = (inst2 >> 16) & 0xFFFF;
+@@ -850,11 +851,20 @@ static void INTERLEAVE () { // Works... - 3-11-01
+     for (int x = 0; x < (AudioCount/4); x++) {
+         Left=*(inSrcL++);
+         Right=*(inSrcR++);
++        Left2=*(inSrcL++);
++        Right2=*(inSrcR++);
+ 
+-        *(outbuff++)=*(inSrcR++);
+-        *(outbuff++)=*(inSrcL++);
+-        *(outbuff++)=(u16)Right;
+-        *(outbuff++)=(u16)Left;
++#ifdef _BIG_ENDIAN
++        *(outbuff++)=Right;
++        *(outbuff++)=Left;
++        *(outbuff++)=Right2;
++        *(outbuff++)=Left2;
++#else
++        *(outbuff++)=Right2;
++        *(outbuff++)=Left2;
++        *(outbuff++)=Right;
++        *(outbuff++)=Left;
++#endif
+     }
+ }
+ 
+diff --git a/rsp_hle/ucode2.cpp b/rsp_hle/ucode2.cpp
+index aa53d2f298bc32186f8188a752db7825b27a8093..d7b6d7eeeb8fc5e92a74af15c7f0841af5210e90 100644
+--- a/rsp_hle/ucode2.cpp
++++ b/rsp_hle/ucode2.cpp
+@@ -64,17 +64,17 @@ static void LOADADPCM2 () { // Loads an ADPCM table - Works 100% Now 03-13-01
+     u16 *table = (u16 *)(rsp.RDRAM+v0); // Zelda2 Specific...
+ 
+     for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) {
+-        adpcmtable[0x1+(x<<3)] = table[0];
+-        adpcmtable[0x0+(x<<3)] = table[1];
++        adpcmtable[0x0+(x<<3)^S] = table[0];
++        adpcmtable[0x1+(x<<3)^S] = table[1];
+ 
+-        adpcmtable[0x3+(x<<3)] = table[2];
+-        adpcmtable[0x2+(x<<3)] = table[3];
++        adpcmtable[0x2+(x<<3)^S] = table[2];
++        adpcmtable[0x3+(x<<3)^S] = table[3];
+ 
+-        adpcmtable[0x5+(x<<3)] = table[4];
+-        adpcmtable[0x4+(x<<3)] = table[5];
++        adpcmtable[0x4+(x<<3)^S] = table[4];
++        adpcmtable[0x5+(x<<3)^S] = table[5];
+ 
+-        adpcmtable[0x7+(x<<3)] = table[6];
+-        adpcmtable[0x6+(x<<3)] = table[7];
++        adpcmtable[0x6+(x<<3)^S] = table[6];
++        adpcmtable[0x7+(x<<3)^S] = table[7];
+         table += 8;
+     }
+ }
+@@ -148,13 +148,13 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+         }
+     }
+ 
+-    int l1=out[15];
+-    int l2=out[14];
++    int l1=out[14^S];
++    int l2=out[15^S];
+     int inp1[8];
+     int inp2[8];
+     out+=16;
+     while(count>0) {
+-        code=BufferSpace[(AudioInBuffer+inPtr)^3];
++        code=BufferSpace[(AudioInBuffer+inPtr)^S8];
+         index=code&0xf;
+         index<<=4;
+         book1=(short *)&adpcmtable[index];
+@@ -166,7 +166,7 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+         j=0;
+ 
+         while(j<8) {
+-            icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++            icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+             inPtr++;
+ 
+             inp1[j]=(s16)((icode&mask1) << 8);          // this will in effect be signed
+@@ -196,7 +196,7 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+ 
+         j=0;
+         while(j<8) {
+-            icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++            icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+             inPtr++;
+ 
+             inp2[j]=(s16)((icode&mask1) << 8);
+@@ -284,10 +284,10 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+ 
+         for(j=0;j<8;j++)
+         {
+-            a[j^1]>>=11;
+-            if(a[j^1]>32767) a[j^1]=32767;
+-            else if(a[j^1]<-32768) a[j^1]=-32768;
+-            *(out++)=a[j^1];
++            a[j^S]>>=11;
++            if(a[j^S]>32767) a[j^S]=32767;
++            else if(a[j^S]<-32768) a[j^S]=-32768;
++            *(out++)=a[j^S];
+         }
+         l1=a[6];
+         l2=a[7];
+@@ -354,10 +354,10 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+ 
+         for(j=0;j<8;j++)
+         {
+-            a[j^1]>>=11;
+-            if(a[j^1]>32767) a[j^1]=32767;
+-            else if(a[j^1]<-32768) a[j^1]=-32768;
+-            *(out++)=a[j^1];
++            a[j^S]>>=11;
++            if(a[j^S]>32767) a[j^S]=32767;
++            else if(a[j^S]<-32768) a[j^S]=-32768;
++            *(out++)=a[j^S];
+         }
+         l1=a[6];
+         l2=a[7];
+@@ -394,12 +394,12 @@ static void MIXER2 () { // Needs accuracy verification...
+     u16 dmemin  = (u16)(inst2 >> 0x10);
+     u16 dmemout = (u16)(inst2 & 0xFFFF);
+     u32 count   = ((inst1 >> 12) & 0xFF0);
+-    s32 gain    = (s16)(inst1 & 0xFFFF)*2;
++    s32 gain    = (s16)(inst1 & 0xFFFF);
+     s32 temp;
+ 
+     for (unsigned int x=0; x < count; x+=2) { // I think I can do this a lot easier 
+ 
+-        temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 16;
++        temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 15;
+         temp += *(s16 *)(BufferSpace+dmemout+x);
+             
+         if ((s32)temp > 32767) 
+@@ -435,11 +435,11 @@ static void RESAMPLE2 () {
+ 
+     if ((Flags & 0x1) == 0) {   
+         for (int x=0; x < 4; x++) //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8);
+-            src[(srcPtr+x)^1] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^1];
++            src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S];
+         Accum = *(u16 *)(rsp.RDRAM+addy+10);
+     } else {
+         for (int x=0; x < 4; x++)
+-            src[(srcPtr+x)^1] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
++            src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
+     }
+ 
+     for(int i=0;i < ((AudioCount+0xf)&0xFFF0)/2;i++)    {
+@@ -447,29 +447,29 @@ static void RESAMPLE2 () {
+         //location = (Accum >> 0xa) << 0x3;
+         lut = (s16 *)(((u8 *)ResampleLUT) + location);
+ 
+-        temp =  ((s32)*(s16*)(src+((srcPtr+0)^1))*((s32)((s16)lut[0])));
++        temp =  ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0])));
+         accum = (s32)(temp >> 15);
+ 
+-        temp = ((s32)*(s16*)(src+((srcPtr+1)^1))*((s32)((s16)lut[1])));
++        temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1])));
+         accum += (s32)(temp >> 15);
+ 
+-        temp = ((s32)*(s16*)(src+((srcPtr+2)^1))*((s32)((s16)lut[2])));
++        temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2])));
+         accum += (s32)(temp >> 15);
+         
+-        temp = ((s32)*(s16*)(src+((srcPtr+3)^1))*((s32)((s16)lut[3])));
++        temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3])));
+         accum += (s32)(temp >> 15);
+ 
+         if (accum > 32767) accum = 32767;
+         if (accum < -32768) accum = -32768;
+ 
+-        dst[dstPtr^1] = (s16)(accum);
++        dst[dstPtr^S] = (s16)(accum);
+         dstPtr++;
+         Accum += Pitch;
+         srcPtr += (Accum>>16);
+         Accum&=0xffff;
+     }
+     for (int x=0; x < 4; x++)
+-        ((u16 *)rsp.RDRAM)[((addy/2)+x)^1] = src[(srcPtr+x)^1];
++        ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S];
+     *(u16 *)(rsp.RDRAM+addy+10) = (u16)Accum;
+     //memcpy (RSWORK, src+srcPtr, 0x8);
+ }
+@@ -489,7 +489,7 @@ static void DMEMMOVE2 () { // Needs accuracy verification...
+ 
+     //memcpy (dmem+v1, dmem+v0, count-1);
+     for (cnt = 0; cnt < count; cnt++) {
+-        *(u8 *)(BufferSpace+((cnt+v1)^3)) = *(u8 *)(BufferSpace+((cnt+v0)^3));
++        *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8));
+     }
+ }
+ 
+@@ -564,59 +564,59 @@ static void ENVMIXER2 () {
+     while (count > 0) {
+         int temp, x;
+         for (x=0; x < 0x8; x++) {
+-            vec9  = (s16)(((s32)buffs3[x^1] * (u32)env[0]) >> 0x10) ^ v2[0];
+-            vec10 = (s16)(((s32)buffs3[x^1] * (u32)env[2]) >> 0x10) ^ v2[1];
+-            temp = bufft6[x^1] + vec9;
++            vec9  = (s16)(((s32)buffs3[x^S] * (u32)env[0]) >> 0x10) ^ v2[0];
++            vec10 = (s16)(((s32)buffs3[x^S] * (u32)env[2]) >> 0x10) ^ v2[1];
++            temp = bufft6[x^S] + vec9;
+             if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-            bufft6[x^1] = temp;
+-            temp = bufft7[x^1] + vec10;
++            bufft6[x^S] = temp;
++            temp = bufft7[x^S] + vec10;
+             if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-            bufft7[x^1] = temp;
++            bufft7[x^S] = temp;
+             vec9  = (s16)(((s32)vec9  * (u32)env[4]) >> 0x10) ^ v2[2];
+             vec10 = (s16)(((s32)vec10 * (u32)env[4]) >> 0x10) ^ v2[3];
+             if (inst1 & 0x10) {
+-                temp = buffs0[x^1] + vec10;
++                temp = buffs0[x^S] + vec10;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs0[x^1] = temp;
+-                temp = buffs1[x^1] + vec9;
++                buffs0[x^S] = temp;
++                temp = buffs1[x^S] + vec9;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs1[x^1] = temp;
++                buffs1[x^S] = temp;
+             } else {
+-                temp = buffs0[x^1] + vec9;
++                temp = buffs0[x^S] + vec9;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs0[x^1] = temp;
+-                temp = buffs1[x^1] + vec10;
++                buffs0[x^S] = temp;
++                temp = buffs1[x^S] + vec10;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs1[x^1] = temp;
++                buffs1[x^S] = temp;
+             }
+         }
+ 
+         if (!isMKABI)
+         for (x=0x8; x < 0x10; x++) {
+-            vec9  = (s16)(((s32)buffs3[x^1] * (u32)env[1]) >> 0x10) ^ v2[0];
+-            vec10 = (s16)(((s32)buffs3[x^1] * (u32)env[3]) >> 0x10) ^ v2[1];
+-            temp = bufft6[x^1] + vec9;
++            vec9  = (s16)(((s32)buffs3[x^S] * (u32)env[1]) >> 0x10) ^ v2[0];
++            vec10 = (s16)(((s32)buffs3[x^S] * (u32)env[3]) >> 0x10) ^ v2[1];
++            temp = bufft6[x^S] + vec9;
+             if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-            bufft6[x^1] = temp;
+-            temp = bufft7[x^1] + vec10;
++            bufft6[x^S] = temp;
++           temp = bufft7[x^S] + vec10;
+             if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-            bufft7[x^1] = temp;
++            bufft7[x^S] = temp;
+             vec9  = (s16)(((s32)vec9  * (u32)env[5]) >> 0x10) ^ v2[2];
+             vec10 = (s16)(((s32)vec10 * (u32)env[5]) >> 0x10) ^ v2[3];
+             if (inst1 & 0x10) {
+-                temp = buffs0[x^1] + vec10;
++                temp = buffs0[x^S] + vec10;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs0[x^1] = temp;
+-                temp = buffs1[x^1] + vec9;
++                buffs0[x^S] = temp;
++                temp = buffs1[x^S] + vec9;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs1[x^1] = temp;
++                buffs1[x^S] = temp;
+             } else {
+-                temp = buffs0[x^1] + vec9;
++                temp = buffs0[x^S] + vec9;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs0[x^1] = temp;
+-                temp = buffs1[x^1] + vec10;
++                buffs0[x^S] = temp;
++                temp = buffs1[x^S] + vec10;
+                 if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-                buffs1[x^1] = temp;
++                buffs1[x^S] = temp;
+             }
+         }
+         bufft6 += adder; bufft7 += adder;
+@@ -684,7 +684,7 @@ static void INTERL2 () {
+     src=(BYTE *)(BufferSpace);//[In];
+     dst=(BYTE *)(BufferSpace);//[Out];
+     while(Count) {
+-        *(short *)(dst+(Out^3)) = *(short *)(src+(In^3));
++        *(short *)(dst+(Out^S8)) = *(short *)(src+(In^S8));
+         Out += 2;
+         In  += 4;
+         Count--;
+@@ -696,7 +696,7 @@ static void INTERLEAVE2 () { // Needs accuracy verification...
+     u16 *outbuff;
+     u16 *inSrcR;
+     u16 *inSrcL;
+-    u16 Left, Right;
++    u16 Left, Right, Left2, Right2;
+     u32 count;
+     count   = ((inst1 >> 12) & 0xFF0);
+     if (count == 0) {
+@@ -715,11 +715,20 @@ static void INTERLEAVE2 () { // Needs accuracy verification...
+     for (u32 x = 0; x < (count/4); x++) {
+         Left=*(inSrcL++);
+         Right=*(inSrcR++);
++        Left2=*(inSrcL++);
++        Right2=*(inSrcR++);
+ 
+-        *(outbuff++)=*(inSrcR++);
+-        *(outbuff++)=*(inSrcL++);
+-        *(outbuff++)=(u16)Right;
+-        *(outbuff++)=(u16)Left;
++#ifdef _BIG_ENDIAN
++        *(outbuff++)=Right;
++        *(outbuff++)=Left;
++        *(outbuff++)=Right2;
++        *(outbuff++)=Left2;
++#else
++        *(outbuff++)=Right2;
++        *(outbuff++)=Left2;
++        *(outbuff++)=Right;
++        *(outbuff++)=Left;
++#endif
+     }
+ }
+ 
+@@ -735,7 +744,8 @@ static void ADDMIXER () {
+     for (int cntr = 0; cntr < Count; cntr+=2) {
+         temp = *outp + *inp;
+         if (temp > 32767)  temp = 32767; if (temp < -32768) temp = -32768;
+-        outp++; inp++;
++        *(outp++) = temp;
++        inp++;
+     }
+ }
+ 
+diff --git a/rsp_hle/ucode3.cpp b/rsp_hle/ucode3.cpp
+index 679bb365303266fdd2129c3c062bbbdd6d7f6849..4291ac2d674dd7ee565c8f8856eb9fb23f6f7395 100644
+--- a/rsp_hle/ucode3.cpp
++++ b/rsp_hle/ucode3.cpp
+@@ -83,17 +83,17 @@ static void SETVOL3 () {
+     u8 Flags = (u8)(inst1 >> 0x10);
+     if (Flags & 0x4) { // 288
+         if (Flags & 0x2) { // 290
+-            Vol_Left  = *(s16*)&inst1; // 0x50
+-            Env_Dry     = (s16)(*(s32*)&inst2 >> 0x10); // 0x4E
+-            Env_Wet     = *(s16*)&inst2; // 0x4C
++            Vol_Left  = (s16)inst1; // 0x50
++            Env_Dry   = (s16)(inst2 >> 0x10); // 0x4E
++            Env_Wet   = (s16)inst2; // 0x4C
+         } else {
+-            VolTrg_Right  = *(s16*)&inst1; // 0x46
++            VolTrg_Right  = (s16)inst1; // 0x46
+             //VolRamp_Right = (u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10);
+-            VolRamp_Right = *(s32*)&inst2; // 0x48/0x4A
++            VolRamp_Right = (s32)inst2; // 0x48/0x4A
+         }
+     } else {
+-        VolTrg_Left  = *(s16*)&inst1; // 0x40
+-        VolRamp_Left = *(s32*)&inst2; // 0x42/0x44
++        VolTrg_Left  = (s16)inst1; // 0x40
++        VolRamp_Left = (s32)inst2; // 0x42/0x44
+     }
+ }
+ 
+@@ -121,7 +121,7 @@ static void ENVMIXER3 () {
+     s16 Wet, Dry;
+     s16 LTrg, RTrg;
+ 
+-    Vol_Right = (*(s16 *)&inst1);
++    Vol_Right = (s16)inst1;
+ 
+     if (flags & A_INIT) {
+         LAdder = VolRamp_Left / 8;
+@@ -197,9 +197,9 @@ static void ENVMIXER3 () {
+         MainL = ((Dry * LVol) + 0x4000) >> 15;
+         MainR = ((Dry * RVol) + 0x4000) >> 15;
+ 
+-        o1 = out [y^1];
+-        a1 = aux1[y^1];
+-        i1 = inp [y^1];
++        o1 = out [y^S];
++        a1 = aux1[y^S];
++        i1 = inp [y^S];
+ 
+         o1+=((i1*MainL)+0x4000)>>15;
+         a1+=((i1*MainR)+0x4000)>>15;
+@@ -214,13 +214,13 @@ static void ENVMIXER3 () {
+ 
+ // ****************************************************************
+ 
+-        out[y^1]=o1;
+-        aux1[y^1]=a1;
++        out[y^S]=o1;
++        aux1[y^S]=a1;
+ 
+ // ****************************************************************
+         //if (!(flags&A_AUX)) {
+-            a2 = aux2[y^1];
+-            a3 = aux3[y^1];
++            a2 = aux2[y^S];
++            a3 = aux3[y^S];
+ 
+             AuxL  = ((Wet * LVol) + 0x4000) >> 15;
+             AuxR  = ((Wet * RVol) + 0x4000) >> 15;
+@@ -234,8 +234,8 @@ static void ENVMIXER3 () {
+             if(a3>32767) a3=32767;
+             else if(a3<-32768) a3=-32768;
+ 
+-            aux2[y^1]=a2;
+-            aux3[y^1]=a3;
++            aux2[y^S]=a2;
++            aux3[y^S]=a3;
+         }
+     //}
+ 
+@@ -288,7 +288,7 @@ static void ENVMIXER3o () {
+     s16 Wet, Dry;
+ 
+     //fprintf (dfile, "\n----------------------------------------------------\n");
+-    Vol_Right = (*(s16 *)&inst1);
++    Vol_Right = inst1;
+     if (flags & A_INIT) {
+         LVol = (((s32)(s16)Vol_Left * VolRamp_Left) - ((s32)(s16)Vol_Left << 16)) >> 3; 
+         RVol = (((s32)(s16)Vol_Right * VolRamp_Right) - ((s32)(s16)Vol_Right << 16)) >> 3;
+@@ -320,12 +320,12 @@ static void ENVMIXER3o () {
+     //fprintf (dfile, "LTrg = %08X, LVol = %08X\n", LTrg, LVol);
+ 
+     for (int x=0; x<(0x170/2); x++) {
+-        i1=(int)inp[x^1];
+-        o1=(int)out[x^1];
+-        a1=(int)aux1[x^1];
++        i1=(int)inp[x^S];
++        o1=(int)out[x^S];
++        a1=(int)aux1[x^S];
+         if (AuxIncRate) {
+-            a2=(int)aux2[x^1];
+-            a3=(int)aux3[x^1];
++            a2=(int)aux2[x^S];
++            a3=(int)aux3[x^S];
+         }
+         // TODO: here...
+         //LAcc = (LTrg << 16);
+@@ -388,8 +388,8 @@ static void ENVMIXER3o () {
+         if(a1>32767) a1=32767;
+         else if(a1<-32768) a1=-32768;
+ 
+-        out[x^1]=o1;
+-        aux1[x^1]=a1;
++        out[x^S]=o1;
++        aux1[x^S]=a1;
+         if (AuxIncRate) {
+             a2+=(/*(a2*0x7fff)+*/(i1*AuxR)+0x4000)>>15;
+             a3+=(/*(a3*0x7fff)+*/(i1*AuxL)+0x4000)>>15;
+@@ -400,8 +400,8 @@ static void ENVMIXER3o () {
+             if(a3>32767) a3=32767;
+             else if(a3<-32768) a3=-32768;
+ 
+-            aux2[x^1]=a2;
+-            aux3[x^1]=a3;
++            aux2[x^S]=a2;
++            aux3[x^S]=a3;
+         }
+     }
+ 
+@@ -505,11 +505,11 @@ static void MIXER3 () { // Needs accuracy verification...
+     u16 dmemin  = (u16)(inst2 >> 0x10)  + 0x4f0;
+     u16 dmemout = (u16)(inst2 & 0xFFFF) + 0x4f0;
+     //u8  flags   = (u8)((inst1 >> 16) & 0xff);
+-    s32 gain    = (s16)(inst1 & 0xFFFF)*2;
++    s32 gain    = (s16)(inst1 & 0xFFFF);
+     s32 temp;
+ 
+-    for (int x=0; x < 0x170; x+=2) { // I think I can do this a lot easier 
+-        temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 16;
++    for (int x=0; x < 0x170; x+=2) { // I think I can do this a lot easier
++        temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 15;
+         temp += *(s16 *)(BufferSpace+dmemout+x);
+             
+         if ((s32)temp > 32767) 
+@@ -544,17 +544,17 @@ static void LOADADPCM3 () { // Loads an ADPCM table - Works 100% Now 03-13-01
+     //assert ((inst1&0xffff) <= 0x80);
+     u16 *table = (u16 *)(rsp.RDRAM+v0);
+     for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) {
+-        adpcmtable[0x1+(x<<3)] = table[0];
+-        adpcmtable[0x0+(x<<3)] = table[1];
++        adpcmtable[0x0+(x<<3)^S] = table[0];
++        adpcmtable[0x1+(x<<3)^S] = table[1];
+ 
+-        adpcmtable[0x3+(x<<3)] = table[2];
+-        adpcmtable[0x2+(x<<3)] = table[3];
++        adpcmtable[0x2+(x<<3)^S] = table[2];
++        adpcmtable[0x3+(x<<3)^S] = table[3];
+ 
+-        adpcmtable[0x5+(x<<3)] = table[4];
+-        adpcmtable[0x4+(x<<3)] = table[5];
++        adpcmtable[0x4+(x<<3)^S] = table[4];
++        adpcmtable[0x5+(x<<3)^S] = table[5];
+ 
+-        adpcmtable[0x7+(x<<3)] = table[6];
+-        adpcmtable[0x6+(x<<3)] = table[7];
++        adpcmtable[0x6+(x<<3)^S] = table[6];
++        adpcmtable[0x7+(x<<3)^S] = table[7];
+         table += 8;
+     }
+ }
+@@ -568,7 +568,7 @@ static void DMEMMOVE3 () { // Needs accuracy verification...
+ 
+     //memcpy (dmem+v1, dmem+v0, count-1);
+     for (cnt = 0; cnt < count; cnt++) {
+-        *(u8 *)(BufferSpace+((cnt+v1)^3)) = *(u8 *)(BufferSpace+((cnt+v0)^3));
++        *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8));
+     }
+ }
+ 
+@@ -615,8 +615,8 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+         }
+     }
+ 
+-    int l1=out[15];
+-    int l2=out[14];
++    int l1=out[14^S];
++    int l2=out[15^S];
+     int inp1[8];
+     int inp2[8];
+     out+=16;
+@@ -627,7 +627,7 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+                                                     // area of memory in the case of A_LOOP or just
+                                                     // the values we calculated the last time
+ 
+-        code=BufferSpace[(0x4f0+inPtr)^3];
++        code=BufferSpace[(0x4f0+inPtr)^S8];
+         index=code&0xf;
+         index<<=4;                                  // index into the adpcm code table
+         book1=(short *)&adpcmtable[index];
+@@ -645,7 +645,7 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+         while(j<8)                                  // loop of 8, for 8 coded nibbles from 4 bytes
+                                                     // which yields 8 short pcm values
+         {
+-            icode=BufferSpace[(0x4f0+inPtr)^3];
++            icode=BufferSpace[(0x4f0+inPtr)^S8];
+             inPtr++;
+ 
+             inp1[j]=(s16)((icode&0xf0)<<8);         // this will in effect be signed
+@@ -665,7 +665,7 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+         j=0;
+         while(j<8)
+         {
+-            icode=BufferSpace[(0x4f0+inPtr)^3];
++            icode=BufferSpace[(0x4f0+inPtr)^S8];
+             inPtr++;
+ 
+             inp2[j]=(short)((icode&0xf0)<<8);           // this will in effect be signed
+@@ -745,11 +745,11 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+ 
+         for(j=0;j<8;j++)
+         {
+-            a[j^1]>>=11;
+-            if(a[j^1]>32767) a[j^1]=32767;
+-            else if(a[j^1]<-32768) a[j^1]=-32768;
+-            *(out++)=a[j^1];
+-            //*(out+j)=a[j^1];
++            a[j^S]>>=11;
++            if(a[j^S]>32767) a[j^S]=32767;
++            else if(a[j^S]<-32768) a[j^S]=-32768;
++            *(out++)=a[j^S];
++            //*(out+j)=a[j^S];
+         }
+         //out += 0x10;
+         l1=a[6];
+@@ -817,11 +817,11 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+ 
+         for(j=0;j<8;j++)
+         {
+-            a[j^1]>>=11;
+-            if(a[j^1]>32767) a[j^1]=32767;
+-            else if(a[j^1]<-32768) a[j^1]=-32768;
+-            *(out++)=a[j^1];
+-            //*(out+j+0x1f8)=a[j^1];
++            a[j^S]>>=11;
++            if(a[j^S]>32767) a[j^S]=32767;
++            else if(a[j^S]<-32768) a[j^S]=-32768;
++            *(out++)=a[j^S];
++            //*(out+j+0x1f8)=a[j^S];
+         }
+         l1=a[6];
+         l2=a[7];
+@@ -861,11 +861,11 @@ static void RESAMPLE3 () {
+ 
+     if ((Flags & 0x1) == 0) {   
+         for (int x=0; x < 4; x++) //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8);
+-            src[(srcPtr+x)^1] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^1];
++            src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S];
+         Accum = *(u16 *)(rsp.RDRAM+addy+10);
+     } else {
+         for (int x=0; x < 4; x++)
+-            src[(srcPtr+x)^1] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
++            src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
+     }
+ 
+     for(int i=0;i < 0x170/2;i++)    {
+@@ -873,18 +873,18 @@ static void RESAMPLE3 () {
+         //location = (Accum >> 0xa) << 0x3;
+         lut = (s16 *)(((u8 *)ResampleLUT) + location);
+ 
+-        temp =  ((s32)*(s16*)(src+((srcPtr+0)^1))*((s32)((s16)lut[0])));
++        temp =  ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0])));
+         accum = (s32)(temp >> 15);
+ 
+-        temp = ((s32)*(s16*)(src+((srcPtr+1)^1))*((s32)((s16)lut[1])));
++        temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1])));
+         accum += (s32)(temp >> 15);
+ 
+-        temp = ((s32)*(s16*)(src+((srcPtr+2)^1))*((s32)((s16)lut[2])));
++        temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2])));
+         accum += (s32)(temp >> 15);
+         
+-        temp = ((s32)*(s16*)(src+((srcPtr+3)^1))*((s32)((s16)lut[3])));
++        temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3])));
+         accum += (s32)(temp >> 15);
+-/*      temp =  ((s64)*(s16*)(src+((srcPtr+0)^1))*((s64)((s16)lut[0]<<1)));
++/*      temp =  ((s64)*(s16*)(src+((srcPtr+0)^S))*((s64)((s16)lut[0]<<1)));
+         if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+         else temp = (temp^0x8000);
+         temp = (s32)(temp >> 16);
+@@ -892,7 +892,7 @@ static void RESAMPLE3 () {
+         if ((s32)temp < -32768) temp = -32768;
+         accum = (s32)(s16)temp;
+ 
+-        temp = ((s64)*(s16*)(src+((srcPtr+1)^1))*((s64)((s16)lut[1]<<1)));
++        temp = ((s64)*(s16*)(src+((srcPtr+1)^S))*((s64)((s16)lut[1]<<1)));
+         if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+         else temp = (temp^0x8000);
+         temp = (s32)(temp >> 16);
+@@ -900,7 +900,7 @@ static void RESAMPLE3 () {
+         if ((s32)temp < -32768) temp = -32768;
+         accum += (s32)(s16)temp;
+ 
+-        temp = ((s64)*(s16*)(src+((srcPtr+2)^1))*((s64)((s16)lut[2]<<1)));
++        temp = ((s64)*(s16*)(src+((srcPtr+2)^S))*((s64)((s16)lut[2]<<1)));
+         if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+         else temp = (temp^0x8000);
+         temp = (s32)(temp >> 16);
+@@ -908,7 +908,7 @@ static void RESAMPLE3 () {
+         if ((s32)temp < -32768) temp = -32768;
+         accum += (s32)(s16)temp;
+ 
+-        temp = ((s64)*(s16*)(src+((srcPtr+3)^1))*((s64)((s16)lut[3]<<1)));
++        temp = ((s64)*(s16*)(src+((srcPtr+3)^S))*((s64)((s16)lut[3]<<1)));
+         if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+         else temp = (temp^0x8000);
+         temp = (s32)(temp >> 16);
+@@ -919,14 +919,14 @@ static void RESAMPLE3 () {
+         if (accum > 32767) accum = 32767;
+         if (accum < -32768) accum = -32768;
+ 
+-        dst[dstPtr^1] = (accum);
++        dst[dstPtr^S] = (accum);
+         dstPtr++;
+         Accum += Pitch;
+         srcPtr += (Accum>>16);
+         Accum&=0xffff;
+     }
+     for (int x=0; x < 4; x++)
+-        ((u16 *)rsp.RDRAM)[((addy/2)+x)^1] = src[(srcPtr+x)^1];
++        ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S];
+     *(u16 *)(rsp.RDRAM+addy+10) = Accum;
+ }
+ 
+@@ -935,7 +935,7 @@ static void INTERLEAVE3 () { // Needs accuracy verification...
+     u16 *outbuff = (u16 *)(BufferSpace + 0x4f0);//(u16 *)(AudioOutBuffer+dmem);
+     u16 *inSrcR;
+     u16 *inSrcL;
+-    u16 Left, Right;
++    u16 Left, Right, Left2, Right2;
+ 
+     //inR = inst2 & 0xFFFF;
+     //inL = (inst2 >> 16) & 0xFFFF;
+@@ -946,11 +946,20 @@ static void INTERLEAVE3 () { // Needs accuracy verification...
+     for (int x = 0; x < (0x170/4); x++) {
+         Left=*(inSrcL++);
+         Right=*(inSrcR++);
++        Left2=*(inSrcL++);
++        Right2=*(inSrcR++);
+ 
+-        *(outbuff++)=*(inSrcR++);
+-        *(outbuff++)=*(inSrcL++);
+-        *(outbuff++)=(u16)Right;
+-        *(outbuff++)=(u16)Left;
++#ifdef _BIG_ENDIAN
++        *(outbuff++)=Right;
++        *(outbuff++)=Left;
++        *(outbuff++)=Right2;
++        *(outbuff++)=Left2;
++#else
++        *(outbuff++)=Right2;
++        *(outbuff++)=Left2;
++        *(outbuff++)=Right;
++        *(outbuff++)=Left;
++#endif
+ /*
+         Left=*(inSrcL++);
+         Right=*(inSrcR++);
+diff --git a/rsp_hle/ucode3mp3.cpp b/rsp_hle/ucode3mp3.cpp
+index c5a45a220b79d60ccb9e60b12a90f45ffe39a4b5..015839616096681e2ba4f0728d73f349b8571ee3 100644
+--- a/rsp_hle/ucode3mp3.cpp
++++ b/rsp_hle/ucode3mp3.cpp
+@@ -259,25 +259,25 @@ static void InnerLoop () {
+                 // Part 1: 100% Accurate
+ 
+                 int i;
+-                v[0] = *(s16 *)(mp3data+inPtr+(0x00^2)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^2)); v[0] += v[31];
+-                v[1] = *(s16 *)(mp3data+inPtr+(0x02^2)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^2)); v[1] += v[30];
+-                v[2] = *(s16 *)(mp3data+inPtr+(0x06^2)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^2)); v[2] += v[28];
+-                v[3] = *(s16 *)(mp3data+inPtr+(0x04^2)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^2)); v[3] += v[29];
++                v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] += v[31];
++                v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] += v[30];
++                v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] += v[28];
++                v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] += v[29];
+ 
+-                v[4] = *(s16 *)(mp3data+inPtr+(0x0E^2)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^2)); v[4] += v[24];
+-                v[5] = *(s16 *)(mp3data+inPtr+(0x0C^2)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^2)); v[5] += v[25];
+-                v[6] = *(s16 *)(mp3data+inPtr+(0x08^2)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^2)); v[6] += v[27];
+-                v[7] = *(s16 *)(mp3data+inPtr+(0x0A^2)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^2)); v[7] += v[26];
++                v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] += v[24];
++                v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] += v[25];
++                v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] += v[27];
++                v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] += v[26];
+ 
+-                v[8] = *(s16 *)(mp3data+inPtr+(0x1E^2)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^2)); v[8] += v[16];
+-                v[9] = *(s16 *)(mp3data+inPtr+(0x1C^2)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^2)); v[9] += v[17];
+-                v[10]= *(s16 *)(mp3data+inPtr+(0x18^2)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^2)); v[10]+= v[19];
+-                v[11]= *(s16 *)(mp3data+inPtr+(0x1A^2)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^2)); v[11]+= v[18];
++                v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] += v[16];
++                v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] += v[17];
++                v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]+= v[19];
++                v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]+= v[18];
+ 
+-                v[12]= *(s16 *)(mp3data+inPtr+(0x10^2)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^2)); v[12]+= v[23];
+-                v[13]= *(s16 *)(mp3data+inPtr+(0x12^2)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^2)); v[13]+= v[22];
+-                v[14]= *(s16 *)(mp3data+inPtr+(0x16^2)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^2)); v[14]+= v[20];
+-                v[15]= *(s16 *)(mp3data+inPtr+(0x14^2)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^2)); v[15]+= v[21];
++                v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]+= v[23];
++                v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]+= v[22];
++                v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]+= v[20];
++                v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]+= v[21];
+ 
+                 // Part 2-4
+ 
+@@ -380,25 +380,25 @@ static void InnerLoop () {
+ 
+                 // Part 6 - 100% Accurate
+ 
+-                v[0] = *(s16 *)(mp3data+inPtr+(0x00^2)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^2)); v[0] -= v[31];
+-                v[1] = *(s16 *)(mp3data+inPtr+(0x02^2)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^2)); v[1] -= v[30];
+-                v[2] = *(s16 *)(mp3data+inPtr+(0x06^2)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^2)); v[2] -= v[28];
+-                v[3] = *(s16 *)(mp3data+inPtr+(0x04^2)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^2)); v[3] -= v[29];
++                v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] -= v[31];
++                v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] -= v[30];
++                v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] -= v[28];
++                v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] -= v[29];
+ 
+-                v[4] = *(s16 *)(mp3data+inPtr+(0x0E^2)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^2)); v[4] -= v[24];
+-                v[5] = *(s16 *)(mp3data+inPtr+(0x0C^2)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^2)); v[5] -= v[25];
+-                v[6] = *(s16 *)(mp3data+inPtr+(0x08^2)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^2)); v[6] -= v[27];
+-                v[7] = *(s16 *)(mp3data+inPtr+(0x0A^2)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^2)); v[7] -= v[26];
++                v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] -= v[24];
++                v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] -= v[25];
++                v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] -= v[27];
++                v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] -= v[26];
+ 
+-                v[8] = *(s16 *)(mp3data+inPtr+(0x1E^2)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^2)); v[8] -= v[16];
+-                v[9] = *(s16 *)(mp3data+inPtr+(0x1C^2)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^2)); v[9] -= v[17];
+-                v[10]= *(s16 *)(mp3data+inPtr+(0x18^2)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^2)); v[10]-= v[19];
+-                v[11]= *(s16 *)(mp3data+inPtr+(0x1A^2)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^2)); v[11]-= v[18];
++                v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] -= v[16];
++                v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] -= v[17];
++                v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]-= v[19];
++                v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]-= v[18];
+ 
+-                v[12]= *(s16 *)(mp3data+inPtr+(0x10^2)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^2)); v[12]-= v[23];
+-                v[13]= *(s16 *)(mp3data+inPtr+(0x12^2)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^2)); v[13]-= v[22];
+-                v[14]= *(s16 *)(mp3data+inPtr+(0x16^2)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^2)); v[14]-= v[20];
+-                v[15]= *(s16 *)(mp3data+inPtr+(0x14^2)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^2)); v[15]-= v[21];
++                v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]-= v[23];
++                v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]-= v[22];
++                v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]-= v[20];
++                v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]-= v[21];
+ 
+                 //0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2
+                 const u16 LUT6[16] = { 0xFFB2, 0xFD3A, 0xF10A, 0xF854,
+@@ -519,8 +519,8 @@ static void InnerLoop () {
+                     //Clamp(v0);
+                     //Clamp(v18);
+                     // clamp???
+-                    *(s16 *)(mp3data+(outPtr^2)) = v0;
+-                    *(s16 *)(mp3data+((outPtr+2)^2)) = v18;
++                    *(s16 *)(mp3data+(outPtr^S16)) = v0;
++                    *(s16 *)(mp3data+((outPtr+2)^S16)) = v18;
+                     outPtr+=4;
+                     addptr += 0x30;
+                     offset += 0x38;
+@@ -540,10 +540,10 @@ static void InnerLoop () {
+                 s32 mult4 = *(s32 *)(mp3data+0xCEC);
+                 if (t4 & 0x2) {
+                     v2 = (v2 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
+-                    *(s16 *)(mp3data+(outPtr^2)) = v2;
++                    *(s16 *)(mp3data+(outPtr^S16)) = v2;
+                 } else {
+                     v4 = (v4 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
+-                    *(s16 *)(mp3data+(outPtr^2)) = v4;
++                    *(s16 *)(mp3data+(outPtr^S16)) = v4;
+                     mult4 = *(u32 *)(mp3data+0xCE8);
+                 }
+                 addptr -= 0x50;
+@@ -569,8 +569,8 @@ static void InnerLoop () {
+                     //Clamp(v0);
+                     //Clamp(v18);
+                     // clamp???
+-                    *(s16 *)(mp3data+((outPtr+2)^2)) = v0;
+-                    *(s16 *)(mp3data+((outPtr+4)^2)) = v18;
++                    *(s16 *)(mp3data+((outPtr+2)^S16)) = v0;
++                    *(s16 *)(mp3data+((outPtr+4)^S16)) = v18;
+                     outPtr+=4;
+                     addptr -= 0x50;
+                 }
+@@ -584,21 +584,21 @@ static void InnerLoop () {
+                 hi1 = (int)hi1 >> 0x10;
+                 for (i = 0; i < 8; i++) {
+                     // v0
+-                    v = (*(s16 *)(mp3data+((tmp-0x40)^2)) * hi0);
++                    v = (*(s16 *)(mp3data+((tmp-0x40)^S16)) * hi0);
+                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+-                    *(s16 *)((u8 *)mp3data+((tmp-0x40)^2)) = (s16)v;
++                    *(s16 *)((u8 *)mp3data+((tmp-0x40)^S16)) = (s16)v;
+                     // v17
+-                    v = (*(s16 *)(mp3data+((tmp-0x30)^2)) * hi0);
++                    v = (*(s16 *)(mp3data+((tmp-0x30)^S16)) * hi0);
+                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+-                    *(s16 *)((u8 *)mp3data+((tmp-0x30)^2)) = v;
++                    *(s16 *)((u8 *)mp3data+((tmp-0x30)^S16)) = v;
+                     // v2
+-                    v = (*(s16 *)(mp3data+((tmp-0x1E)^2)) * hi1);
++                    v = (*(s16 *)(mp3data+((tmp-0x1E)^S16)) * hi1);
+                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+-                    *(s16 *)((u8 *)mp3data+((tmp-0x1E)^2)) = v;
++                    *(s16 *)((u8 *)mp3data+((tmp-0x1E)^S16)) = v;
+                     // v4
+-                    v = (*(s16 *)(mp3data+((tmp-0xE)^2)) * hi1);
++                    v = (*(s16 *)(mp3data+((tmp-0xE)^S16)) * hi1);
+                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+-                    *(s16 *)((u8 *)mp3data+((tmp-0xE)^2)) = v;
++                    *(s16 *)((u8 *)mp3data+((tmp-0xE)^S16)) = v;
+                     tmp += 2;
+                 }
+ }
diff --git a/debian/patches/series b/debian/patches/series
index 74f863c..687f8d1 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -38,3 +38,4 @@ rice_fog.patch
 ftbfs-gvariant-type-conflicts.path
 dont-install-unneeded.patch
 rsp_ucode2_reset.patch
+rsp_hle_bigendian.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/mupen64plus.git