memset_n.h

Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2002-2004 The Pentagram Team
00003 
00004 This program is free software; you can redistribute it and/or
00005 modify it under the terms of the GNU General Public License
00006 as published by the Free Software Foundation; either version 2
00007 of the License, or (at your option) any later version.
00008 
00009 This program is distributed in the hope that it will be useful,
00010 but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 GNU General Public License for more details.
00013 
00014 You should have received a copy of the GNU General Public License
00015 along with this program; if not, write to the Free Software
00016 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00017 */
00018 
00019 //
00020 // memset_n is a set of optimized functions for filling buffers of 
00021 // 16 and 32 bit integers
00022 // 
00023 
00024 #ifndef MEMSET_N_H_INCLUDED
00025 #define MEMSET_N_H_INCLUDED
00026 
00027 namespace Pentagram {
00028 
00029 #if defined(__GNUC__) && defined(i386)
00030 
00031 //
00032 // GNU i386 memset_32
00033 //
00034 // asm version for the memset32 for gcc/x86
00035 //
00036 // borrowed from SDL's src/video/SDL_memops.h (SDL_memset4)
00037 //
00038 // buf should be DWORD aligned
00039 //
00040 inline void memset_32_aligned(void *buf, uint32 val, uint32 dwords)
00041 {
00042         int u0, u1, u2;
00043     __asm__ __volatile__ (                                  \
00044                         "cld\n\t"                                       \
00045             "rep ; stosl\n\t"                               \
00046             : "=&D" (u0), "=&a" (u1), "=&c" (u2)            \
00047             : "0" (buf), "1" (val), "2" (dwords)            \
00048             : "memory" );
00049 }
00050 
00051 #elif defined(_MSC_VER) && defined(_M_IX86) && (_M_IX86 >= 300)
00052 
00053 #ifdef USE_MMX_ASM      // Disabled by default, no obvious speed up
00054 
00055 //
00056 // MSVC MMX memset_32
00057 //
00058 // asm version for the memset32 for MSVC MMX x86 
00059 //
00060 // buf should be DWORD aligned
00061 //
00062 inline void memset_32_aligned(void *buf, uint32 val, uint32 dwords)
00063 {
00064         // Qword align
00065         if ((uint32)(buf) & 4) 
00066         {
00067                 *(reinterpret_cast<uint32*>(buf)) = val;
00068                 buf = (reinterpret_cast<uint32*>(buf))+1;
00069                 dwords--;
00070         }
00071 
00072         if (dwords > 1) 
00073         {
00074                 __asm {
00075                         cld
00076                         mov   edi,  buf
00077                         mov   ecx,  dwords
00078                         shr       ecx,  1
00079                         mov   eax,  val
00080                         movd  mm0,  eax
00081                         movd  mm1,  eax
00082                         psllq mm1,  32
00083                         por   mm0,  mm1
00084                         align 16
00085         repeat:
00086                         movq [edi], mm0
00087                         add   edi,  8
00088                         loop  repeat
00089                         emms
00090                 };
00091         }
00092 
00093         // Final dword
00094         if (dwords & 1) *(reinterpret_cast<uint32*>(buf)) = val;
00095 }
00096 
00097 #else // USE_MMX_ASM
00098 
00099 //
00100 // MSVC i386 memset_32
00101 //
00102 // asm version for the memset32 for MSVC x86 
00103 //
00104 // buf should be DWORD aligned
00105 //
00106 inline void memset_32_aligned(void *buf, uint32 val, uint32 dwords)
00107 {
00108         __asm {
00109                 cld
00110                 mov edi, buf
00111                 mov eax, val
00112                 mov ecx, dwords
00113                 repne stosd
00114         };
00115 }
00116 
00117 #endif // USE_MMX_ASM
00118 
00119 #else
00120 
00121 //
00122 // Generic memset_32
00123 //
00124 // Can be used by all
00125 //
00126 inline void memset_32_aligned(void *buf, uint32 val, uint32 dwords)
00127 {
00128         do
00129         { 
00130                 *reinterpret_cast<uint32*>(buf) = val; 
00131                 buf = (reinterpret_cast<uint32*>(buf))+1;
00132         } 
00133         while (--dwords);
00134 }
00135 
00136 #endif
00137 
00138 //
00139 // memset_32
00140 //
00141 // Can be used by all
00142 //
00143 inline void memset_32(void *buf, uint32 val, uint32 dwords)
00144 {
00145         // Fisrly we should dword Align it
00146         int align = 0;
00147         if (reinterpret_cast<uintptr>(buf) & 3) 
00148         {
00149                 align = 4;
00150                 dwords--;
00151 
00152                 // Ok, shift along by 1 byte
00153                 if ((reinterpret_cast<uintptr>(buf) & 1))
00154                 {
00155                         *reinterpret_cast<uint8*>(buf) = static_cast<uint8>(val&0xFF); 
00156                         buf = (reinterpret_cast<uint8*>(buf))+1;
00157                         val = ((val& 0xFF) << 24) || ((val& 0xFFFFFF00) >> 8);
00158                         align --;
00159                 }
00160 
00161                 // Ok, shift along by 2 bytes
00162                 if ((reinterpret_cast<uintptr>(buf) & 2))
00163                 {
00164                         *reinterpret_cast<uint16*>(buf) = static_cast<uint16>(val&0xFFFF); 
00165                         buf = (reinterpret_cast<uint16*>(buf))+1;
00166                         val = ((val& 0xFFFF) << 16) || ((val& 0xFFFF0000) >> 16);
00167                         align-=2;
00168                 }
00169         }
00170 
00171         // Fill Aligned
00172         memset_32_aligned(buf,val,dwords);
00173 
00174         // Do the unaligned data
00175         if (align)
00176         {
00177                 // Ok, shift along by 1 byte
00178                 if (align == 1)
00179                 {
00180                         *reinterpret_cast<uint8*>(buf) = static_cast<uint8>(val&0xFF); 
00181                 }
00182                 // Ok, shift along by 2 bytes
00183                 else
00184                 {
00185                         *reinterpret_cast<uint16*>(buf) = static_cast<uint16>(val&0xFFFF); 
00186 
00187                         // Ok, shift along by another byte
00188                         if (align & 1) *(reinterpret_cast<uint8*>(buf)+2) = static_cast<uint8>((val>>16)&0xFF); 
00189                 }
00190         }
00191 }
00192 
00193 //
00194 // memset_16
00195 //
00196 // Can be used by all
00197 //
00198 inline void memset_16(void *buf, sint32 val, uint32 words)
00199 {
00200         // Use memset_32
00201         if (words > 1) memset_32(buf,val|val<<16,words>>1);
00202 
00203         // Final word
00204         if (words & 1) *(reinterpret_cast<uint16*>(buf)) = static_cast<uint16>(val&0xFFFF);
00205 }
00206 
00207 }
00208 
00209 #endif //MEMSET_N_H_INCLUDED

Generated on Fri Jul 27 22:27:25 2007 for pentagram by  doxygen 1.4.7