[mupen64plus] 177/262: Fix wrong high level emulation of rsp on big endian systems
Sven Eckelmann
ecsv-guest at moszumanska.debian.org
Thu Nov 26 05:59:32 UTC 2015
This is an automated email from the git hooks/post-receive script.
ecsv-guest pushed a commit to branch master
in repository mupen64plus.
commit f183d3721ade9dd85fc095f35d9ab9e776da761f
Author: Sven Eckelmann <sven.eckelmann at gmx.de>
Date: Mon Apr 26 23:12:18 2010 +0200
Fix wrong high level emulation of rsp on big endian systems
---
debian/changelog | 2 +
debian/patches/rsp_hle_bigendian.patch | 1128 ++++++++++++++++++++++++++++++++
debian/patches/series | 1 +
3 files changed, 1131 insertions(+)
diff --git a/debian/changelog b/debian/changelog
index 470f560..e7b107c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,8 @@ mupen64plus (1.5+dfsg1-10) UNRELEASED; urgency=low
* debian/patches:
- Add rsp_ucode2_reset.patch, Reset status of specific ucode2 hacks after
starting again
+ - Add rsp_hle_bigendian.patch, Fix wrong high level emulation of rsp on big
+ endian systems
-- Sven Eckelmann <sven.eckelmann at gmx.de> Mon, 26 Apr 2010 23:06:55 +0200
diff --git a/debian/patches/rsp_hle_bigendian.patch b/debian/patches/rsp_hle_bigendian.patch
new file mode 100644
index 0000000..e4b09f2
--- /dev/null
+++ b/debian/patches/rsp_hle_bigendian.patch
@@ -0,0 +1,1128 @@
+Description: Fix wrong high level emulation of rsp on big endian systems
+Origin: backport: http://bitbucket.org/richard42/mupen64plus-rsp-hle/changeset/c4309d79d3ba
+Origin: backport: http://bitbucket.org/richard42/mupen64plus-rsp-hle/changeset/199ec8223923
+
+---
+diff --git a/rsp_hle/hle.h b/rsp_hle/hle.h
+index 79ece2577c0cbf7b671257cdb5d20fa9241295d1..9ba7585ca38b4a98b5908e9612b895add3a4d7ce 100644
+--- a/rsp_hle/hle.h
++++ b/rsp_hle/hle.h
+@@ -26,9 +26,11 @@
+
+ #ifdef _BIG_ENDIAN
+ #define S 0
++#define S16 0
+ #define S8 0
+ #else
+ #define S 1
++#define S16 2
+ #define S8 3
+ #endif
+
+diff --git a/rsp_hle/jpeg.c b/rsp_hle/jpeg.c
+index 5785bea8e30f1c3b0ced1d31bc275e6ce57250f0..ac87bf265ed4815c4f8d460520714b2e9cb00d7e 100644
+--- a/rsp_hle/jpeg.c
++++ b/rsp_hle/jpeg.c
+@@ -404,8 +404,8 @@ void jpg_uncompress(OSTask_t *task)
+ m[30*8+k] |= m[1*8+6];
+ m[29*8+k] |= m[1*8+6];
+
+- pic[(i*128+j*32+0+k)^1] = m[30*8+k];
+- pic[(i*128+j*32+8+k)^1] = m[29*8+k];
++ pic[(i*128+j*32+0+k)^S] = m[30*8+k];
++ pic[(i*128+j*32+8+k)^S] = m[29*8+k];
+
+ m[24*8+k] = m[20*8+k] + m[16*8+k];
+ m[23*8+k] = m[19*8+k] + m[15*8+k];
+diff --git a/rsp_hle/ucode1.cpp b/rsp_hle/ucode1.cpp
+index c81d677b752b5ea0810663d6846577bcfe59b7bb..d8d47702636f4ad508546d23d5a29014ab384ea0 100644
+--- a/rsp_hle/ucode1.cpp
++++ b/rsp_hle/ucode1.cpp
+@@ -257,12 +257,12 @@ static void ENVMIXER () {
+ }
+
+ for (int x = 0; x < 8; x++) {
+- i1=(int)inp[ptr^1];
+- o1=(int)out[ptr^1];
+- a1=(int)aux1[ptr^1];
++ i1=(int)inp[ptr^S];
++ o1=(int)out[ptr^S];
++ a1=(int)aux1[ptr^S];
+ if (AuxIncRate) {
+- a2=(int)aux2[ptr^1];
+- a3=(int)aux3[ptr^1];
++ a2=(int)aux2[ptr^S];
++ a3=(int)aux3[ptr^S];
+ }
+ // TODO: here...
+ //LAcc = LTrg;
+@@ -349,8 +349,8 @@ static void ENVMIXER () {
+ if(a1>32767) a1=32767;
+ else if(a1<-32768) a1=-32768;
+
+- out[ptr^1]=o1;
+- aux1[ptr^1]=a1;
++ out[ptr^S]=o1;
++ aux1[ptr^S]=a1;
+ if (AuxIncRate) {
+ //a2=((s64)(((s64)a2*0xfffe)+((s64)i1*AuxR*2)+0x8000)>>16);
+
+@@ -364,8 +364,8 @@ static void ENVMIXER () {
+ if(a3>32767) a3=32767;
+ else if(a3<-32768) a3=-32768;
+
+- aux2[ptr^1]=a2;
+- aux3[ptr^1]=a3;
++ aux2[ptr^S]=a2;
++ aux3[ptr^S]=a3;
+ }
+ ptr++;
+ }
+@@ -411,17 +411,18 @@ static void RESAMPLE () {
+ if ((Flags & 0x1) == 0) {
+ //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8);
+ for (int x=0; x < 4; x++)
+- src[(srcPtr+x)^1] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^1];
++ src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S];
+ Accum = *(u16 *)(rsp.RDRAM+addy+10);
+ } else {
+ for (int x=0; x < 4; x++)
+- src[(srcPtr+x)^1] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
++ src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
+ }
+
+ for(int i=0;i < ((AudioCount+0xf)&0xFFF0)/2;i++) {
+ //location = (((Accum * 0x40) >> 0x10) * 8);
+- location = (Accum >> 0xa) << 0x3;
+- lut = (s16 *)(((u8 *)ResampleLUT) + location);
++ // location is the fractional position between two samples
++ location = (Accum >> 0xa) * 4;
++ lut = (s16*)ResampleLUT + location;
+
+ // mov eax, dword ptr [src+srcPtr];
+ // movsx edx, word ptr [lut];
+@@ -435,29 +436,29 @@ static void RESAMPLE () {
+ // and edx, 0f000h
+
+ // imul
+- temp = ((s32)*(s16*)(src+((srcPtr+0)^1))*((s32)((s16)lut[0])));
++ temp = ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0])));
+ accum = (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+1)^1))*((s32)((s16)lut[1])));
++ temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1])));
+ accum += (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+2)^1))*((s32)((s16)lut[2])));
++ temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2])));
+ accum += (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+3)^1))*((s32)((s16)lut[3])));
++ temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3])));
+ accum += (s32)(temp >> 15);
+
+ if (accum > 32767) accum = 32767;
+ if (accum < -32768) accum = -32768;
+
+- dst[dstPtr^1] = (accum);
++ dst[dstPtr^S] = (accum);
+ dstPtr++;
+ Accum += Pitch;
+ srcPtr += (Accum>>16);
+ Accum&=0xffff;
+ }
+ for (int x=0; x < 4; x++)
+- ((u16 *)rsp.RDRAM)[((addy/2)+x)^1] = src[(srcPtr+x)^1];
++ ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S];
+ //memcpy (RSWORK, src+srcPtr, 0x8);
+ *(u16 *)(rsp.RDRAM+addy+10) = Accum;
+ }
+@@ -489,17 +490,17 @@ static void SETVOL () {
+ //u16 VolRamp_Left; // 0x0012(T8)
+ if(flags & A_LEFT) { // Set the Ramping values Target, Ramp
+ //loopval = (((u32)vol << 0x10) | (u32)voltarg);
+- VolTrg_Left = *(s16 *)&inst1; // m_LeftVol
++ VolTrg_Left = (s16)inst1; // m_LeftVol
+ //VolRamp_Left = (s32)inst2;
+- VolRamp_Left = *(s32 *)&inst2;//(u16)(inst2) | (s32)(s16)(inst2 << 0x10);
++ VolRamp_Left = (s32)inst2;//(u16)(inst2) | (s32)(s16)(inst2 << 0x10);
+ //fprintf (dfile, "Ramp Left: %f\n", (float)VolRamp_Left/65536.0);
+ //fprintf (dfile, "Ramp Left: %08X\n", inst2);
+ //VolRamp_Left = (s16)voltarg; // m_LeftVolTarget
+ //VolRate_Left = (s16)volrate; // m_LeftVolRate
+ } else { // A_RIGHT
+- VolTrg_Right = *(s16 *)&inst1; // m_RightVol
++ VolTrg_Right = (s16)inst1; // m_RightVol
+ //VolRamp_Right = (s32)inst2;
+- VolRamp_Right = *(s32 *)&inst2;//(u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10);
++ VolRamp_Right = (s32)inst2;//(u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10);
+ //fprintf (dfile, "Ramp Right: %f\n", (float)VolRamp_Right/65536.0);
+ //fprintf (dfile, "Ramp Right: %08X\n", inst2);
+ //VolRamp_Right = (s16)voltarg; // m_RightVolTarget
+@@ -546,8 +547,8 @@ static void ADPCM () { // Work in progress! :)
+ }
+ }
+
+- int l1=out[15];
+- int l2=out[14];
++ int l1=out[14^S];
++ int l2=out[15^S];
+ int inp1[8];
+ int inp2[8];
+ out+=16;
+@@ -558,7 +559,7 @@ static void ADPCM () { // Work in progress! :)
+ // area of memory in the case of A_LOOP or just
+ // the values we calculated the last time
+
+- code=BufferSpace[(AudioInBuffer+inPtr)^3];
++ code=BufferSpace[(AudioInBuffer+inPtr)^S8];
+ index=code&0xf;
+ index<<=4; // index into the adpcm code table
+ book1=(short *)&adpcmtable[index];
+@@ -576,7 +577,7 @@ static void ADPCM () { // Work in progress! :)
+ while(j<8) // loop of 8, for 8 coded nibbles from 4 bytes
+ // which yields 8 short pcm values
+ {
+- icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++ icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+ inPtr++;
+
+ inp1[j]=(s16)((icode&0xf0)<<8); // this will in effect be signed
+@@ -596,7 +597,7 @@ static void ADPCM () { // Work in progress! :)
+ j=0;
+ while(j<8)
+ {
+- icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++ icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+ inPtr++;
+
+ inp2[j]=(short)((icode&0xf0)<<8); // this will in effect be signed
+@@ -676,10 +677,10 @@ static void ADPCM () { // Work in progress! :)
+
+ for(j=0;j<8;j++)
+ {
+- a[j^1]>>=11;
+- if(a[j^1]>32767) a[j^1]=32767;
+- else if(a[j^1]<-32768) a[j^1]=-32768;
+- *(out++)=a[j^1];
++ a[j^S]>>=11;
++ if(a[j^S]>32767) a[j^S]=32767;
++ else if(a[j^S]<-32768) a[j^S]=-32768;
++ *(out++)=a[j^S];
+ }
+ l1=a[6];
+ l2=a[7];
+@@ -746,10 +747,10 @@ static void ADPCM () { // Work in progress! :)
+
+ for(j=0;j<8;j++)
+ {
+- a[j^1]>>=11;
+- if(a[j^1]>32767) a[j^1]=32767;
+- else if(a[j^1]<-32768) a[j^1]=-32768;
+- *(out++)=a[j^1];
++ a[j^S]>>=11;
++ if(a[j^S]>32767) a[j^S]=32767;
++ else if(a[j^S]<-32768) a[j^S]=-32768;
++ *(out++)=a[j^S];
+ }
+ l1=a[6];
+ l2=a[7];
+@@ -805,7 +806,7 @@ static void DMEMMOVE () { // Doesn't sound just right?... will fix when HLE is r
+
+ //memcpy (BufferSpace+v1, BufferSpace+v0, count-1);
+ for (cnt = 0; cnt < count; cnt++) {
+- *(u8 *)(BufferSpace+((cnt+v1)^3)) = *(u8 *)(BufferSpace+((cnt+v0)^3));
++ *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8));
+ }
+ }
+
+@@ -818,17 +819,17 @@ static void LOADADPCM () { // Loads an ADPCM table - Works 100% Now 03-13-01
+ //assert ((inst1&0xffff) <= 0x80);
+ u16 *table = (u16 *)(rsp.RDRAM+v0);
+ for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) {
+- adpcmtable[0x1+(x<<3)] = table[0];
+- adpcmtable[0x0+(x<<3)] = table[1];
++ adpcmtable[0x0+(x<<3)^S] = table[0];
++ adpcmtable[0x1+(x<<3)^S] = table[1];
+
+- adpcmtable[0x3+(x<<3)] = table[2];
+- adpcmtable[0x2+(x<<3)] = table[3];
++ adpcmtable[0x2+(x<<3)^S] = table[2];
++ adpcmtable[0x3+(x<<3)^S] = table[3];
+
+- adpcmtable[0x5+(x<<3)] = table[4];
+- adpcmtable[0x4+(x<<3)] = table[5];
++ adpcmtable[0x4+(x<<3)^S] = table[4];
++ adpcmtable[0x5+(x<<3)^S] = table[5];
+
+- adpcmtable[0x7+(x<<3)] = table[6];
+- adpcmtable[0x6+(x<<3)] = table[7];
++ adpcmtable[0x6+(x<<3)^S] = table[6];
++ adpcmtable[0x7+(x<<3)^S] = table[7];
+ table += 8;
+ }
+ }
+@@ -839,7 +840,7 @@ static void INTERLEAVE () { // Works... - 3-11-01
+ u16 *outbuff = (u16 *)(AudioOutBuffer+BufferSpace);
+ u16 *inSrcR;
+ u16 *inSrcL;
+- u16 Left, Right;
++ u16 Left, Right, Left2, Right2;
+
+ inL = inst2 & 0xFFFF;
+ inR = (inst2 >> 16) & 0xFFFF;
+@@ -850,11 +851,20 @@ static void INTERLEAVE () { // Works... - 3-11-01
+ for (int x = 0; x < (AudioCount/4); x++) {
+ Left=*(inSrcL++);
+ Right=*(inSrcR++);
++ Left2=*(inSrcL++);
++ Right2=*(inSrcR++);
+
+- *(outbuff++)=*(inSrcR++);
+- *(outbuff++)=*(inSrcL++);
+- *(outbuff++)=(u16)Right;
+- *(outbuff++)=(u16)Left;
++#ifdef _BIG_ENDIAN
++ *(outbuff++)=Right;
++ *(outbuff++)=Left;
++ *(outbuff++)=Right2;
++ *(outbuff++)=Left2;
++#else
++ *(outbuff++)=Right2;
++ *(outbuff++)=Left2;
++ *(outbuff++)=Right;
++ *(outbuff++)=Left;
++#endif
+ }
+ }
+
+diff --git a/rsp_hle/ucode2.cpp b/rsp_hle/ucode2.cpp
+index aa53d2f298bc32186f8188a752db7825b27a8093..d7b6d7eeeb8fc5e92a74af15c7f0841af5210e90 100644
+--- a/rsp_hle/ucode2.cpp
++++ b/rsp_hle/ucode2.cpp
+@@ -64,17 +64,17 @@ static void LOADADPCM2 () { // Loads an ADPCM table - Works 100% Now 03-13-01
+ u16 *table = (u16 *)(rsp.RDRAM+v0); // Zelda2 Specific...
+
+ for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) {
+- adpcmtable[0x1+(x<<3)] = table[0];
+- adpcmtable[0x0+(x<<3)] = table[1];
++ adpcmtable[0x0+(x<<3)^S] = table[0];
++ adpcmtable[0x1+(x<<3)^S] = table[1];
+
+- adpcmtable[0x3+(x<<3)] = table[2];
+- adpcmtable[0x2+(x<<3)] = table[3];
++ adpcmtable[0x2+(x<<3)^S] = table[2];
++ adpcmtable[0x3+(x<<3)^S] = table[3];
+
+- adpcmtable[0x5+(x<<3)] = table[4];
+- adpcmtable[0x4+(x<<3)] = table[5];
++ adpcmtable[0x4+(x<<3)^S] = table[4];
++ adpcmtable[0x5+(x<<3)^S] = table[5];
+
+- adpcmtable[0x7+(x<<3)] = table[6];
+- adpcmtable[0x6+(x<<3)] = table[7];
++ adpcmtable[0x6+(x<<3)^S] = table[6];
++ adpcmtable[0x7+(x<<3)^S] = table[7];
+ table += 8;
+ }
+ }
+@@ -148,13 +148,13 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+ }
+ }
+
+- int l1=out[15];
+- int l2=out[14];
++ int l1=out[14^S];
++ int l2=out[15^S];
+ int inp1[8];
+ int inp2[8];
+ out+=16;
+ while(count>0) {
+- code=BufferSpace[(AudioInBuffer+inPtr)^3];
++ code=BufferSpace[(AudioInBuffer+inPtr)^S8];
+ index=code&0xf;
+ index<<=4;
+ book1=(short *)&adpcmtable[index];
+@@ -166,7 +166,7 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+ j=0;
+
+ while(j<8) {
+- icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++ icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+ inPtr++;
+
+ inp1[j]=(s16)((icode&mask1) << 8); // this will in effect be signed
+@@ -196,7 +196,7 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+
+ j=0;
+ while(j<8) {
+- icode=BufferSpace[(AudioInBuffer+inPtr)^3];
++ icode=BufferSpace[(AudioInBuffer+inPtr)^S8];
+ inPtr++;
+
+ inp2[j]=(s16)((icode&mask1) << 8);
+@@ -284,10 +284,10 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+
+ for(j=0;j<8;j++)
+ {
+- a[j^1]>>=11;
+- if(a[j^1]>32767) a[j^1]=32767;
+- else if(a[j^1]<-32768) a[j^1]=-32768;
+- *(out++)=a[j^1];
++ a[j^S]>>=11;
++ if(a[j^S]>32767) a[j^S]=32767;
++ else if(a[j^S]<-32768) a[j^S]=-32768;
++ *(out++)=a[j^S];
+ }
+ l1=a[6];
+ l2=a[7];
+@@ -354,10 +354,10 @@ static void ADPCM2 () { // Verified to be 100% Accurate...
+
+ for(j=0;j<8;j++)
+ {
+- a[j^1]>>=11;
+- if(a[j^1]>32767) a[j^1]=32767;
+- else if(a[j^1]<-32768) a[j^1]=-32768;
+- *(out++)=a[j^1];
++ a[j^S]>>=11;
++ if(a[j^S]>32767) a[j^S]=32767;
++ else if(a[j^S]<-32768) a[j^S]=-32768;
++ *(out++)=a[j^S];
+ }
+ l1=a[6];
+ l2=a[7];
+@@ -394,12 +394,12 @@ static void MIXER2 () { // Needs accuracy verification...
+ u16 dmemin = (u16)(inst2 >> 0x10);
+ u16 dmemout = (u16)(inst2 & 0xFFFF);
+ u32 count = ((inst1 >> 12) & 0xFF0);
+- s32 gain = (s16)(inst1 & 0xFFFF)*2;
++ s32 gain = (s16)(inst1 & 0xFFFF);
+ s32 temp;
+
+ for (unsigned int x=0; x < count; x+=2) { // I think I can do this a lot easier
+
+- temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 16;
++ temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 15;
+ temp += *(s16 *)(BufferSpace+dmemout+x);
+
+ if ((s32)temp > 32767)
+@@ -435,11 +435,11 @@ static void RESAMPLE2 () {
+
+ if ((Flags & 0x1) == 0) {
+ for (int x=0; x < 4; x++) //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8);
+- src[(srcPtr+x)^1] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^1];
++ src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S];
+ Accum = *(u16 *)(rsp.RDRAM+addy+10);
+ } else {
+ for (int x=0; x < 4; x++)
+- src[(srcPtr+x)^1] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
++ src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
+ }
+
+ for(int i=0;i < ((AudioCount+0xf)&0xFFF0)/2;i++) {
+@@ -447,29 +447,29 @@ static void RESAMPLE2 () {
+ //location = (Accum >> 0xa) << 0x3;
+ lut = (s16 *)(((u8 *)ResampleLUT) + location);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+0)^1))*((s32)((s16)lut[0])));
++ temp = ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0])));
+ accum = (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+1)^1))*((s32)((s16)lut[1])));
++ temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1])));
+ accum += (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+2)^1))*((s32)((s16)lut[2])));
++ temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2])));
+ accum += (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+3)^1))*((s32)((s16)lut[3])));
++ temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3])));
+ accum += (s32)(temp >> 15);
+
+ if (accum > 32767) accum = 32767;
+ if (accum < -32768) accum = -32768;
+
+- dst[dstPtr^1] = (s16)(accum);
++ dst[dstPtr^S] = (s16)(accum);
+ dstPtr++;
+ Accum += Pitch;
+ srcPtr += (Accum>>16);
+ Accum&=0xffff;
+ }
+ for (int x=0; x < 4; x++)
+- ((u16 *)rsp.RDRAM)[((addy/2)+x)^1] = src[(srcPtr+x)^1];
++ ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S];
+ *(u16 *)(rsp.RDRAM+addy+10) = (u16)Accum;
+ //memcpy (RSWORK, src+srcPtr, 0x8);
+ }
+@@ -489,7 +489,7 @@ static void DMEMMOVE2 () { // Needs accuracy verification...
+
+ //memcpy (dmem+v1, dmem+v0, count-1);
+ for (cnt = 0; cnt < count; cnt++) {
+- *(u8 *)(BufferSpace+((cnt+v1)^3)) = *(u8 *)(BufferSpace+((cnt+v0)^3));
++ *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8));
+ }
+ }
+
+@@ -564,59 +564,59 @@ static void ENVMIXER2 () {
+ while (count > 0) {
+ int temp, x;
+ for (x=0; x < 0x8; x++) {
+- vec9 = (s16)(((s32)buffs3[x^1] * (u32)env[0]) >> 0x10) ^ v2[0];
+- vec10 = (s16)(((s32)buffs3[x^1] * (u32)env[2]) >> 0x10) ^ v2[1];
+- temp = bufft6[x^1] + vec9;
++ vec9 = (s16)(((s32)buffs3[x^S] * (u32)env[0]) >> 0x10) ^ v2[0];
++ vec10 = (s16)(((s32)buffs3[x^S] * (u32)env[2]) >> 0x10) ^ v2[1];
++ temp = bufft6[x^S] + vec9;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- bufft6[x^1] = temp;
+- temp = bufft7[x^1] + vec10;
++ bufft6[x^S] = temp;
++ temp = bufft7[x^S] + vec10;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- bufft7[x^1] = temp;
++ bufft7[x^S] = temp;
+ vec9 = (s16)(((s32)vec9 * (u32)env[4]) >> 0x10) ^ v2[2];
+ vec10 = (s16)(((s32)vec10 * (u32)env[4]) >> 0x10) ^ v2[3];
+ if (inst1 & 0x10) {
+- temp = buffs0[x^1] + vec10;
++ temp = buffs0[x^S] + vec10;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs0[x^1] = temp;
+- temp = buffs1[x^1] + vec9;
++ buffs0[x^S] = temp;
++ temp = buffs1[x^S] + vec9;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs1[x^1] = temp;
++ buffs1[x^S] = temp;
+ } else {
+- temp = buffs0[x^1] + vec9;
++ temp = buffs0[x^S] + vec9;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs0[x^1] = temp;
+- temp = buffs1[x^1] + vec10;
++ buffs0[x^S] = temp;
++ temp = buffs1[x^S] + vec10;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs1[x^1] = temp;
++ buffs1[x^S] = temp;
+ }
+ }
+
+ if (!isMKABI)
+ for (x=0x8; x < 0x10; x++) {
+- vec9 = (s16)(((s32)buffs3[x^1] * (u32)env[1]) >> 0x10) ^ v2[0];
+- vec10 = (s16)(((s32)buffs3[x^1] * (u32)env[3]) >> 0x10) ^ v2[1];
+- temp = bufft6[x^1] + vec9;
++ vec9 = (s16)(((s32)buffs3[x^S] * (u32)env[1]) >> 0x10) ^ v2[0];
++ vec10 = (s16)(((s32)buffs3[x^S] * (u32)env[3]) >> 0x10) ^ v2[1];
++ temp = bufft6[x^S] + vec9;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- bufft6[x^1] = temp;
+- temp = bufft7[x^1] + vec10;
++ bufft6[x^S] = temp;
++ temp = bufft7[x^S] + vec10;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- bufft7[x^1] = temp;
++ bufft7[x^S] = temp;
+ vec9 = (s16)(((s32)vec9 * (u32)env[5]) >> 0x10) ^ v2[2];
+ vec10 = (s16)(((s32)vec10 * (u32)env[5]) >> 0x10) ^ v2[3];
+ if (inst1 & 0x10) {
+- temp = buffs0[x^1] + vec10;
++ temp = buffs0[x^S] + vec10;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs0[x^1] = temp;
+- temp = buffs1[x^1] + vec9;
++ buffs0[x^S] = temp;
++ temp = buffs1[x^S] + vec9;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs1[x^1] = temp;
++ buffs1[x^S] = temp;
+ } else {
+- temp = buffs0[x^1] + vec9;
++ temp = buffs0[x^S] + vec9;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs0[x^1] = temp;
+- temp = buffs1[x^1] + vec10;
++ buffs0[x^S] = temp;
++ temp = buffs1[x^S] + vec10;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- buffs1[x^1] = temp;
++ buffs1[x^S] = temp;
+ }
+ }
+ bufft6 += adder; bufft7 += adder;
+@@ -684,7 +684,7 @@ static void INTERL2 () {
+ src=(BYTE *)(BufferSpace);//[In];
+ dst=(BYTE *)(BufferSpace);//[Out];
+ while(Count) {
+- *(short *)(dst+(Out^3)) = *(short *)(src+(In^3));
++ *(short *)(dst+(Out^S8)) = *(short *)(src+(In^S8));
+ Out += 2;
+ In += 4;
+ Count--;
+@@ -696,7 +696,7 @@ static void INTERLEAVE2 () { // Needs accuracy verification...
+ u16 *outbuff;
+ u16 *inSrcR;
+ u16 *inSrcL;
+- u16 Left, Right;
++ u16 Left, Right, Left2, Right2;
+ u32 count;
+ count = ((inst1 >> 12) & 0xFF0);
+ if (count == 0) {
+@@ -715,11 +715,20 @@ static void INTERLEAVE2 () { // Needs accuracy verification...
+ for (u32 x = 0; x < (count/4); x++) {
+ Left=*(inSrcL++);
+ Right=*(inSrcR++);
++ Left2=*(inSrcL++);
++ Right2=*(inSrcR++);
+
+- *(outbuff++)=*(inSrcR++);
+- *(outbuff++)=*(inSrcL++);
+- *(outbuff++)=(u16)Right;
+- *(outbuff++)=(u16)Left;
++#ifdef _BIG_ENDIAN
++ *(outbuff++)=Right;
++ *(outbuff++)=Left;
++ *(outbuff++)=Right2;
++ *(outbuff++)=Left2;
++#else
++ *(outbuff++)=Right2;
++ *(outbuff++)=Left2;
++ *(outbuff++)=Right;
++ *(outbuff++)=Left;
++#endif
+ }
+ }
+
+@@ -735,7 +744,8 @@ static void ADDMIXER () {
+ for (int cntr = 0; cntr < Count; cntr+=2) {
+ temp = *outp + *inp;
+ if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768;
+- outp++; inp++;
++ *(outp++) = temp;
++ inp++;
+ }
+ }
+
+diff --git a/rsp_hle/ucode3.cpp b/rsp_hle/ucode3.cpp
+index 679bb365303266fdd2129c3c062bbbdd6d7f6849..4291ac2d674dd7ee565c8f8856eb9fb23f6f7395 100644
+--- a/rsp_hle/ucode3.cpp
++++ b/rsp_hle/ucode3.cpp
+@@ -83,17 +83,17 @@ static void SETVOL3 () {
+ u8 Flags = (u8)(inst1 >> 0x10);
+ if (Flags & 0x4) { // 288
+ if (Flags & 0x2) { // 290
+- Vol_Left = *(s16*)&inst1; // 0x50
+- Env_Dry = (s16)(*(s32*)&inst2 >> 0x10); // 0x4E
+- Env_Wet = *(s16*)&inst2; // 0x4C
++ Vol_Left = (s16)inst1; // 0x50
++ Env_Dry = (s16)(inst2 >> 0x10); // 0x4E
++ Env_Wet = (s16)inst2; // 0x4C
+ } else {
+- VolTrg_Right = *(s16*)&inst1; // 0x46
++ VolTrg_Right = (s16)inst1; // 0x46
+ //VolRamp_Right = (u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10);
+- VolRamp_Right = *(s32*)&inst2; // 0x48/0x4A
++ VolRamp_Right = (s32)inst2; // 0x48/0x4A
+ }
+ } else {
+- VolTrg_Left = *(s16*)&inst1; // 0x40
+- VolRamp_Left = *(s32*)&inst2; // 0x42/0x44
++ VolTrg_Left = (s16)inst1; // 0x40
++ VolRamp_Left = (s32)inst2; // 0x42/0x44
+ }
+ }
+
+@@ -121,7 +121,7 @@ static void ENVMIXER3 () {
+ s16 Wet, Dry;
+ s16 LTrg, RTrg;
+
+- Vol_Right = (*(s16 *)&inst1);
++ Vol_Right = (s16)inst1;
+
+ if (flags & A_INIT) {
+ LAdder = VolRamp_Left / 8;
+@@ -197,9 +197,9 @@ static void ENVMIXER3 () {
+ MainL = ((Dry * LVol) + 0x4000) >> 15;
+ MainR = ((Dry * RVol) + 0x4000) >> 15;
+
+- o1 = out [y^1];
+- a1 = aux1[y^1];
+- i1 = inp [y^1];
++ o1 = out [y^S];
++ a1 = aux1[y^S];
++ i1 = inp [y^S];
+
+ o1+=((i1*MainL)+0x4000)>>15;
+ a1+=((i1*MainR)+0x4000)>>15;
+@@ -214,13 +214,13 @@ static void ENVMIXER3 () {
+
+ // ****************************************************************
+
+- out[y^1]=o1;
+- aux1[y^1]=a1;
++ out[y^S]=o1;
++ aux1[y^S]=a1;
+
+ // ****************************************************************
+ //if (!(flags&A_AUX)) {
+- a2 = aux2[y^1];
+- a3 = aux3[y^1];
++ a2 = aux2[y^S];
++ a3 = aux3[y^S];
+
+ AuxL = ((Wet * LVol) + 0x4000) >> 15;
+ AuxR = ((Wet * RVol) + 0x4000) >> 15;
+@@ -234,8 +234,8 @@ static void ENVMIXER3 () {
+ if(a3>32767) a3=32767;
+ else if(a3<-32768) a3=-32768;
+
+- aux2[y^1]=a2;
+- aux3[y^1]=a3;
++ aux2[y^S]=a2;
++ aux3[y^S]=a3;
+ }
+ //}
+
+@@ -288,7 +288,7 @@ static void ENVMIXER3o () {
+ s16 Wet, Dry;
+
+ //fprintf (dfile, "\n----------------------------------------------------\n");
+- Vol_Right = (*(s16 *)&inst1);
++ Vol_Right = inst1;
+ if (flags & A_INIT) {
+ LVol = (((s32)(s16)Vol_Left * VolRamp_Left) - ((s32)(s16)Vol_Left << 16)) >> 3;
+ RVol = (((s32)(s16)Vol_Right * VolRamp_Right) - ((s32)(s16)Vol_Right << 16)) >> 3;
+@@ -320,12 +320,12 @@ static void ENVMIXER3o () {
+ //fprintf (dfile, "LTrg = %08X, LVol = %08X\n", LTrg, LVol);
+
+ for (int x=0; x<(0x170/2); x++) {
+- i1=(int)inp[x^1];
+- o1=(int)out[x^1];
+- a1=(int)aux1[x^1];
++ i1=(int)inp[x^S];
++ o1=(int)out[x^S];
++ a1=(int)aux1[x^S];
+ if (AuxIncRate) {
+- a2=(int)aux2[x^1];
+- a3=(int)aux3[x^1];
++ a2=(int)aux2[x^S];
++ a3=(int)aux3[x^S];
+ }
+ // TODO: here...
+ //LAcc = (LTrg << 16);
+@@ -388,8 +388,8 @@ static void ENVMIXER3o () {
+ if(a1>32767) a1=32767;
+ else if(a1<-32768) a1=-32768;
+
+- out[x^1]=o1;
+- aux1[x^1]=a1;
++ out[x^S]=o1;
++ aux1[x^S]=a1;
+ if (AuxIncRate) {
+ a2+=(/*(a2*0x7fff)+*/(i1*AuxR)+0x4000)>>15;
+ a3+=(/*(a3*0x7fff)+*/(i1*AuxL)+0x4000)>>15;
+@@ -400,8 +400,8 @@ static void ENVMIXER3o () {
+ if(a3>32767) a3=32767;
+ else if(a3<-32768) a3=-32768;
+
+- aux2[x^1]=a2;
+- aux3[x^1]=a3;
++ aux2[x^S]=a2;
++ aux3[x^S]=a3;
+ }
+ }
+
+@@ -505,11 +505,11 @@ static void MIXER3 () { // Needs accuracy verification...
+ u16 dmemin = (u16)(inst2 >> 0x10) + 0x4f0;
+ u16 dmemout = (u16)(inst2 & 0xFFFF) + 0x4f0;
+ //u8 flags = (u8)((inst1 >> 16) & 0xff);
+- s32 gain = (s16)(inst1 & 0xFFFF)*2;
++ s32 gain = (s16)(inst1 & 0xFFFF);
+ s32 temp;
+
+- for (int x=0; x < 0x170; x+=2) { // I think I can do this a lot easier
+- temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 16;
++ for (int x=0; x < 0x170; x+=2) { // I think I can do this a lot easier
++ temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 15;
+ temp += *(s16 *)(BufferSpace+dmemout+x);
+
+ if ((s32)temp > 32767)
+@@ -544,17 +544,17 @@ static void LOADADPCM3 () { // Loads an ADPCM table - Works 100% Now 03-13-01
+ //assert ((inst1&0xffff) <= 0x80);
+ u16 *table = (u16 *)(rsp.RDRAM+v0);
+ for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) {
+- adpcmtable[0x1+(x<<3)] = table[0];
+- adpcmtable[0x0+(x<<3)] = table[1];
++ adpcmtable[0x0+(x<<3)^S] = table[0];
++ adpcmtable[0x1+(x<<3)^S] = table[1];
+
+- adpcmtable[0x3+(x<<3)] = table[2];
+- adpcmtable[0x2+(x<<3)] = table[3];
++ adpcmtable[0x2+(x<<3)^S] = table[2];
++ adpcmtable[0x3+(x<<3)^S] = table[3];
+
+- adpcmtable[0x5+(x<<3)] = table[4];
+- adpcmtable[0x4+(x<<3)] = table[5];
++ adpcmtable[0x4+(x<<3)^S] = table[4];
++ adpcmtable[0x5+(x<<3)^S] = table[5];
+
+- adpcmtable[0x7+(x<<3)] = table[6];
+- adpcmtable[0x6+(x<<3)] = table[7];
++ adpcmtable[0x6+(x<<3)^S] = table[6];
++ adpcmtable[0x7+(x<<3)^S] = table[7];
+ table += 8;
+ }
+ }
+@@ -568,7 +568,7 @@ static void DMEMMOVE3 () { // Needs accuracy verification...
+
+ //memcpy (dmem+v1, dmem+v0, count-1);
+ for (cnt = 0; cnt < count; cnt++) {
+- *(u8 *)(BufferSpace+((cnt+v1)^3)) = *(u8 *)(BufferSpace+((cnt+v0)^3));
++ *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8));
+ }
+ }
+
+@@ -615,8 +615,8 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+ }
+ }
+
+- int l1=out[15];
+- int l2=out[14];
++ int l1=out[14^S];
++ int l2=out[15^S];
+ int inp1[8];
+ int inp2[8];
+ out+=16;
+@@ -627,7 +627,7 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+ // area of memory in the case of A_LOOP or just
+ // the values we calculated the last time
+
+- code=BufferSpace[(0x4f0+inPtr)^3];
++ code=BufferSpace[(0x4f0+inPtr)^S8];
+ index=code&0xf;
+ index<<=4; // index into the adpcm code table
+ book1=(short *)&adpcmtable[index];
+@@ -645,7 +645,7 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+ while(j<8) // loop of 8, for 8 coded nibbles from 4 bytes
+ // which yields 8 short pcm values
+ {
+- icode=BufferSpace[(0x4f0+inPtr)^3];
++ icode=BufferSpace[(0x4f0+inPtr)^S8];
+ inPtr++;
+
+ inp1[j]=(s16)((icode&0xf0)<<8); // this will in effect be signed
+@@ -665,7 +665,7 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+ j=0;
+ while(j<8)
+ {
+- icode=BufferSpace[(0x4f0+inPtr)^3];
++ icode=BufferSpace[(0x4f0+inPtr)^S8];
+ inPtr++;
+
+ inp2[j]=(short)((icode&0xf0)<<8); // this will in effect be signed
+@@ -745,11 +745,11 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+
+ for(j=0;j<8;j++)
+ {
+- a[j^1]>>=11;
+- if(a[j^1]>32767) a[j^1]=32767;
+- else if(a[j^1]<-32768) a[j^1]=-32768;
+- *(out++)=a[j^1];
+- //*(out+j)=a[j^1];
++ a[j^S]>>=11;
++ if(a[j^S]>32767) a[j^S]=32767;
++ else if(a[j^S]<-32768) a[j^S]=-32768;
++ *(out++)=a[j^S];
++ //*(out+j)=a[j^S];
+ }
+ //out += 0x10;
+ l1=a[6];
+@@ -817,11 +817,11 @@ static void ADPCM3 () { // Verified to be 100% Accurate...
+
+ for(j=0;j<8;j++)
+ {
+- a[j^1]>>=11;
+- if(a[j^1]>32767) a[j^1]=32767;
+- else if(a[j^1]<-32768) a[j^1]=-32768;
+- *(out++)=a[j^1];
+- //*(out+j+0x1f8)=a[j^1];
++ a[j^S]>>=11;
++ if(a[j^S]>32767) a[j^S]=32767;
++ else if(a[j^S]<-32768) a[j^S]=-32768;
++ *(out++)=a[j^S];
++ //*(out+j+0x1f8)=a[j^S];
+ }
+ l1=a[6];
+ l2=a[7];
+@@ -861,11 +861,11 @@ static void RESAMPLE3 () {
+
+ if ((Flags & 0x1) == 0) {
+ for (int x=0; x < 4; x++) //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8);
+- src[(srcPtr+x)^1] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^1];
++ src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S];
+ Accum = *(u16 *)(rsp.RDRAM+addy+10);
+ } else {
+ for (int x=0; x < 4; x++)
+- src[(srcPtr+x)^1] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
++ src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2));
+ }
+
+ for(int i=0;i < 0x170/2;i++) {
+@@ -873,18 +873,18 @@ static void RESAMPLE3 () {
+ //location = (Accum >> 0xa) << 0x3;
+ lut = (s16 *)(((u8 *)ResampleLUT) + location);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+0)^1))*((s32)((s16)lut[0])));
++ temp = ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0])));
+ accum = (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+1)^1))*((s32)((s16)lut[1])));
++ temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1])));
+ accum += (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+2)^1))*((s32)((s16)lut[2])));
++ temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2])));
+ accum += (s32)(temp >> 15);
+
+- temp = ((s32)*(s16*)(src+((srcPtr+3)^1))*((s32)((s16)lut[3])));
++ temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3])));
+ accum += (s32)(temp >> 15);
+-/* temp = ((s64)*(s16*)(src+((srcPtr+0)^1))*((s64)((s16)lut[0]<<1)));
++/* temp = ((s64)*(s16*)(src+((srcPtr+0)^S))*((s64)((s16)lut[0]<<1)));
+ if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+ else temp = (temp^0x8000);
+ temp = (s32)(temp >> 16);
+@@ -892,7 +892,7 @@ static void RESAMPLE3 () {
+ if ((s32)temp < -32768) temp = -32768;
+ accum = (s32)(s16)temp;
+
+- temp = ((s64)*(s16*)(src+((srcPtr+1)^1))*((s64)((s16)lut[1]<<1)));
++ temp = ((s64)*(s16*)(src+((srcPtr+1)^S))*((s64)((s16)lut[1]<<1)));
+ if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+ else temp = (temp^0x8000);
+ temp = (s32)(temp >> 16);
+@@ -900,7 +900,7 @@ static void RESAMPLE3 () {
+ if ((s32)temp < -32768) temp = -32768;
+ accum += (s32)(s16)temp;
+
+- temp = ((s64)*(s16*)(src+((srcPtr+2)^1))*((s64)((s16)lut[2]<<1)));
++ temp = ((s64)*(s16*)(src+((srcPtr+2)^S))*((s64)((s16)lut[2]<<1)));
+ if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+ else temp = (temp^0x8000);
+ temp = (s32)(temp >> 16);
+@@ -908,7 +908,7 @@ static void RESAMPLE3 () {
+ if ((s32)temp < -32768) temp = -32768;
+ accum += (s32)(s16)temp;
+
+- temp = ((s64)*(s16*)(src+((srcPtr+3)^1))*((s64)((s16)lut[3]<<1)));
++ temp = ((s64)*(s16*)(src+((srcPtr+3)^S))*((s64)((s16)lut[3]<<1)));
+ if (temp & 0x8000) temp = (temp^0x8000) + 0x10000;
+ else temp = (temp^0x8000);
+ temp = (s32)(temp >> 16);
+@@ -919,14 +919,14 @@ static void RESAMPLE3 () {
+ if (accum > 32767) accum = 32767;
+ if (accum < -32768) accum = -32768;
+
+- dst[dstPtr^1] = (accum);
++ dst[dstPtr^S] = (accum);
+ dstPtr++;
+ Accum += Pitch;
+ srcPtr += (Accum>>16);
+ Accum&=0xffff;
+ }
+ for (int x=0; x < 4; x++)
+- ((u16 *)rsp.RDRAM)[((addy/2)+x)^1] = src[(srcPtr+x)^1];
++ ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S];
+ *(u16 *)(rsp.RDRAM+addy+10) = Accum;
+ }
+
+@@ -935,7 +935,7 @@ static void INTERLEAVE3 () { // Needs accuracy verification...
+ u16 *outbuff = (u16 *)(BufferSpace + 0x4f0);//(u16 *)(AudioOutBuffer+dmem);
+ u16 *inSrcR;
+ u16 *inSrcL;
+- u16 Left, Right;
++ u16 Left, Right, Left2, Right2;
+
+ //inR = inst2 & 0xFFFF;
+ //inL = (inst2 >> 16) & 0xFFFF;
+@@ -946,11 +946,20 @@ static void INTERLEAVE3 () { // Needs accuracy verification...
+ for (int x = 0; x < (0x170/4); x++) {
+ Left=*(inSrcL++);
+ Right=*(inSrcR++);
++ Left2=*(inSrcL++);
++ Right2=*(inSrcR++);
+
+- *(outbuff++)=*(inSrcR++);
+- *(outbuff++)=*(inSrcL++);
+- *(outbuff++)=(u16)Right;
+- *(outbuff++)=(u16)Left;
++#ifdef _BIG_ENDIAN
++ *(outbuff++)=Right;
++ *(outbuff++)=Left;
++ *(outbuff++)=Right2;
++ *(outbuff++)=Left2;
++#else
++ *(outbuff++)=Right2;
++ *(outbuff++)=Left2;
++ *(outbuff++)=Right;
++ *(outbuff++)=Left;
++#endif
+ /*
+ Left=*(inSrcL++);
+ Right=*(inSrcR++);
+diff --git a/rsp_hle/ucode3mp3.cpp b/rsp_hle/ucode3mp3.cpp
+index c5a45a220b79d60ccb9e60b12a90f45ffe39a4b5..015839616096681e2ba4f0728d73f349b8571ee3 100644
+--- a/rsp_hle/ucode3mp3.cpp
++++ b/rsp_hle/ucode3mp3.cpp
+@@ -259,25 +259,25 @@ static void InnerLoop () {
+ // Part 1: 100% Accurate
+
+ int i;
+- v[0] = *(s16 *)(mp3data+inPtr+(0x00^2)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^2)); v[0] += v[31];
+- v[1] = *(s16 *)(mp3data+inPtr+(0x02^2)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^2)); v[1] += v[30];
+- v[2] = *(s16 *)(mp3data+inPtr+(0x06^2)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^2)); v[2] += v[28];
+- v[3] = *(s16 *)(mp3data+inPtr+(0x04^2)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^2)); v[3] += v[29];
++ v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] += v[31];
++ v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] += v[30];
++ v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] += v[28];
++ v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] += v[29];
+
+- v[4] = *(s16 *)(mp3data+inPtr+(0x0E^2)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^2)); v[4] += v[24];
+- v[5] = *(s16 *)(mp3data+inPtr+(0x0C^2)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^2)); v[5] += v[25];
+- v[6] = *(s16 *)(mp3data+inPtr+(0x08^2)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^2)); v[6] += v[27];
+- v[7] = *(s16 *)(mp3data+inPtr+(0x0A^2)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^2)); v[7] += v[26];
++ v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] += v[24];
++ v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] += v[25];
++ v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] += v[27];
++ v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] += v[26];
+
+- v[8] = *(s16 *)(mp3data+inPtr+(0x1E^2)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^2)); v[8] += v[16];
+- v[9] = *(s16 *)(mp3data+inPtr+(0x1C^2)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^2)); v[9] += v[17];
+- v[10]= *(s16 *)(mp3data+inPtr+(0x18^2)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^2)); v[10]+= v[19];
+- v[11]= *(s16 *)(mp3data+inPtr+(0x1A^2)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^2)); v[11]+= v[18];
++ v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] += v[16];
++ v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] += v[17];
++ v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]+= v[19];
++ v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]+= v[18];
+
+- v[12]= *(s16 *)(mp3data+inPtr+(0x10^2)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^2)); v[12]+= v[23];
+- v[13]= *(s16 *)(mp3data+inPtr+(0x12^2)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^2)); v[13]+= v[22];
+- v[14]= *(s16 *)(mp3data+inPtr+(0x16^2)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^2)); v[14]+= v[20];
+- v[15]= *(s16 *)(mp3data+inPtr+(0x14^2)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^2)); v[15]+= v[21];
++ v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]+= v[23];
++ v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]+= v[22];
++ v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]+= v[20];
++ v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]+= v[21];
+
+ // Part 2-4
+
+@@ -380,25 +380,25 @@ static void InnerLoop () {
+
+ // Part 6 - 100% Accurate
+
+- v[0] = *(s16 *)(mp3data+inPtr+(0x00^2)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^2)); v[0] -= v[31];
+- v[1] = *(s16 *)(mp3data+inPtr+(0x02^2)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^2)); v[1] -= v[30];
+- v[2] = *(s16 *)(mp3data+inPtr+(0x06^2)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^2)); v[2] -= v[28];
+- v[3] = *(s16 *)(mp3data+inPtr+(0x04^2)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^2)); v[3] -= v[29];
++ v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] -= v[31];
++ v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] -= v[30];
++ v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] -= v[28];
++ v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] -= v[29];
+
+- v[4] = *(s16 *)(mp3data+inPtr+(0x0E^2)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^2)); v[4] -= v[24];
+- v[5] = *(s16 *)(mp3data+inPtr+(0x0C^2)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^2)); v[5] -= v[25];
+- v[6] = *(s16 *)(mp3data+inPtr+(0x08^2)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^2)); v[6] -= v[27];
+- v[7] = *(s16 *)(mp3data+inPtr+(0x0A^2)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^2)); v[7] -= v[26];
++ v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] -= v[24];
++ v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] -= v[25];
++ v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] -= v[27];
++ v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] -= v[26];
+
+- v[8] = *(s16 *)(mp3data+inPtr+(0x1E^2)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^2)); v[8] -= v[16];
+- v[9] = *(s16 *)(mp3data+inPtr+(0x1C^2)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^2)); v[9] -= v[17];
+- v[10]= *(s16 *)(mp3data+inPtr+(0x18^2)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^2)); v[10]-= v[19];
+- v[11]= *(s16 *)(mp3data+inPtr+(0x1A^2)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^2)); v[11]-= v[18];
++ v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] -= v[16];
++ v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] -= v[17];
++ v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]-= v[19];
++ v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]-= v[18];
+
+- v[12]= *(s16 *)(mp3data+inPtr+(0x10^2)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^2)); v[12]-= v[23];
+- v[13]= *(s16 *)(mp3data+inPtr+(0x12^2)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^2)); v[13]-= v[22];
+- v[14]= *(s16 *)(mp3data+inPtr+(0x16^2)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^2)); v[14]-= v[20];
+- v[15]= *(s16 *)(mp3data+inPtr+(0x14^2)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^2)); v[15]-= v[21];
++ v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]-= v[23];
++ v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]-= v[22];
++ v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]-= v[20];
++ v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]-= v[21];
+
+ //0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2
+ const u16 LUT6[16] = { 0xFFB2, 0xFD3A, 0xF10A, 0xF854,
+@@ -519,8 +519,8 @@ static void InnerLoop () {
+ //Clamp(v0);
+ //Clamp(v18);
+ // clamp???
+- *(s16 *)(mp3data+(outPtr^2)) = v0;
+- *(s16 *)(mp3data+((outPtr+2)^2)) = v18;
++ *(s16 *)(mp3data+(outPtr^S16)) = v0;
++ *(s16 *)(mp3data+((outPtr+2)^S16)) = v18;
+ outPtr+=4;
+ addptr += 0x30;
+ offset += 0x38;
+@@ -540,10 +540,10 @@ static void InnerLoop () {
+ s32 mult4 = *(s32 *)(mp3data+0xCEC);
+ if (t4 & 0x2) {
+ v2 = (v2 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
+- *(s16 *)(mp3data+(outPtr^2)) = v2;
++ *(s16 *)(mp3data+(outPtr^S16)) = v2;
+ } else {
+ v4 = (v4 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
+- *(s16 *)(mp3data+(outPtr^2)) = v4;
++ *(s16 *)(mp3data+(outPtr^S16)) = v4;
+ mult4 = *(u32 *)(mp3data+0xCE8);
+ }
+ addptr -= 0x50;
+@@ -569,8 +569,8 @@ static void InnerLoop () {
+ //Clamp(v0);
+ //Clamp(v18);
+ // clamp???
+- *(s16 *)(mp3data+((outPtr+2)^2)) = v0;
+- *(s16 *)(mp3data+((outPtr+4)^2)) = v18;
++ *(s16 *)(mp3data+((outPtr+2)^S16)) = v0;
++ *(s16 *)(mp3data+((outPtr+4)^S16)) = v18;
+ outPtr+=4;
+ addptr -= 0x50;
+ }
+@@ -584,21 +584,21 @@ static void InnerLoop () {
+ hi1 = (int)hi1 >> 0x10;
+ for (i = 0; i < 8; i++) {
+ // v0
+- v = (*(s16 *)(mp3data+((tmp-0x40)^2)) * hi0);
++ v = (*(s16 *)(mp3data+((tmp-0x40)^S16)) * hi0);
+ if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+- *(s16 *)((u8 *)mp3data+((tmp-0x40)^2)) = (s16)v;
++ *(s16 *)((u8 *)mp3data+((tmp-0x40)^S16)) = (s16)v;
+ // v17
+- v = (*(s16 *)(mp3data+((tmp-0x30)^2)) * hi0);
++ v = (*(s16 *)(mp3data+((tmp-0x30)^S16)) * hi0);
+ if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+- *(s16 *)((u8 *)mp3data+((tmp-0x30)^2)) = v;
++ *(s16 *)((u8 *)mp3data+((tmp-0x30)^S16)) = v;
+ // v2
+- v = (*(s16 *)(mp3data+((tmp-0x1E)^2)) * hi1);
++ v = (*(s16 *)(mp3data+((tmp-0x1E)^S16)) * hi1);
+ if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+- *(s16 *)((u8 *)mp3data+((tmp-0x1E)^2)) = v;
++ *(s16 *)((u8 *)mp3data+((tmp-0x1E)^S16)) = v;
+ // v4
+- v = (*(s16 *)(mp3data+((tmp-0xE)^2)) * hi1);
++ v = (*(s16 *)(mp3data+((tmp-0xE)^S16)) * hi1);
+ if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
+- *(s16 *)((u8 *)mp3data+((tmp-0xE)^2)) = v;
++ *(s16 *)((u8 *)mp3data+((tmp-0xE)^S16)) = v;
+ tmp += 2;
+ }
+ }
diff --git a/debian/patches/series b/debian/patches/series
index 74f863c..687f8d1 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -38,3 +38,4 @@ rice_fog.patch
ftbfs-gvariant-type-conflicts.path
dont-install-unneeded.patch
rsp_ucode2_reset.patch
+rsp_hle_bigendian.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/mupen64plus.git
More information about the Pkg-games-commits
mailing list