linux_common.c (8672B)
1 /* 2 =========================================================================== 3 Copyright (C) 1999-2005 Id Software, Inc. 4 5 This file is part of Quake III Arena source code. 6 7 Quake III Arena source code is free software; you can redistribute it 8 and/or modify it under the terms of the GNU General Public License as 9 published by the Free Software Foundation; either version 2 of the License, 10 or (at your option) any later version. 11 12 Quake III Arena source code is distributed in the hope that it will be 13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with Foobar; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 =========================================================================== 21 */ 22 /** 23 * GAS syntax equivalents of the MSVC asm memory calls in common.c 24 * 25 * The following changes have been made to the asm: 26 * 1. Registers are loaded by the inline asm arguments when possible 27 * 2. Labels have been changed to local label format (0,1,etc.) to allow inlining 28 * 29 * HISTORY: 30 * AH - Created on 08 Dec 2000 31 */ 32 33 #include <unistd.h> // AH - for size_t 34 #include <string.h> 35 36 // bk001207 - we need something under Linux, too. Mac? 37 #if 1 // defined(C_ONLY) // bk010102 - dedicated? 38 void Com_Memcpy (void* dest, const void* src, const size_t count) { 39 memcpy(dest, src, count); 40 } 41 42 void Com_Memset (void* dest, const int val, const size_t count) { 43 memset(dest, val, count); 44 } 45 46 #else 47 48 typedef enum { 49 PRE_READ, // prefetch assuming that buffer is used for reading only 50 PRE_WRITE, // prefetch assuming that buffer is used for writing only 51 PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing 52 } e_prefetch; 53 54 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type); 55 56 void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) { 57 // MMX version not used on standard Pentium MMX 58 // because the dword version is faster (with 59 // proper destination prefetching) 60 __asm__ __volatile__ (" \ 61 //mov eax,constant // eax = val \ 62 //mov edx,dest // dest \ 63 //mov ecx,count \ 64 movd %%eax, %%mm0 \ 65 punpckldq %%mm0, %%mm0 \ 66 \ 67 // ensure that destination is qword aligned \ 68 \ 69 testl $7, %%edx // qword padding?\ 70 jz 0f \ 71 movl %%eax, (%%edx) \ 72 decl %%ecx \ 73 addl $4, %%edx \ 74 \ 75 0: movl %%ecx, %%ebx \ 76 andl $0xfffffff0, %%ecx \ 77 jz 2f \ 78 jmp 1f \ 79 .align 16 \ 80 \ 81 // funny ordering here to avoid commands \ 82 // that cross 32-byte boundaries (the \ 83 // [edx+0] version has a special 3-byte opcode... \ 84 1: movq %%mm0, 8(%%edx) \ 85 movq %%mm0, 16(%%edx) \ 86 movq %%mm0, 24(%%edx) \ 87 movq %%mm0, 32(%%edx) \ 88 movq %%mm0, 40(%%edx) \ 89 movq %%mm0, 48(%%edx) \ 90 movq %%mm0, 56(%%edx) \ 91 movq %%mm0, (%%edx)\ 92 addl $64, %%edx \ 93 subl $16, %%ecx \ 94 jnz 1b \ 95 2: \ 96 movl %%ebx, %%ecx // ebx = cnt \ 97 andl $0xfffffff0, %%ecx // ecx = cnt&~15 \ 98 subl %%ecx, %%ebx \ 99 jz 6f \ 100 cmpl $8, %%ebx \ 101 jl 3f \ 102 \ 103 movq %%mm0, (%%edx) \ 104 movq %%mm0, 8(%%edx) \ 105 movq %%mm0, 16(%%edx) \ 106 movq %%mm0, 24(%%edx) \ 107 addl $32, %%edx \ 108 subl $8, %%ebx \ 109 jz 6f \ 110 \ 111 3: cmpl $4, %%ebx \ 112 jl 4f \ 113 \ 114 movq %%mm0, (%%edx) \ 115 movq %%mm0, 8(%%edx) \ 116 addl $16, %%edx \ 117 subl $4, %%ebx \ 118 \ 119 4: cmpl $2, %%ebx \ 120 jl 5f \ 121 movq %%mm0, (%%edx) \ 122 addl $8, %%edx \ 123 subl $2, %%ebx \ 124 \ 125 5: cmpl $1, %%ebx \ 126 jl 6f \ 127 movl %%eax, (%%edx) \ 128 6: \ 129 emms \ 130 " 131 : : "a" (constant), "c" (count), "d" (dest) 132 : "%ebx", "%edi", "%esi", "cc", "memory"); 133 } 134 135 // optimized memory copy routine that handles all alignment 136 // cases and block sizes efficiently 137 void Com_Memcpy (void* dest, const void* src, const size_t count) { 138 Com_Prefetch (src, count, PRE_READ); 139 __asm__ __volatile__ (" \ 140 pushl %%edi \ 141 pushl %%esi \ 142 //mov ecx,count \ 143 cmpl $0, %%ecx // count = 0 check (just to be on the safe side) \ 144 je 6f \ 145 //mov edx,dest \ 146 movl %0, %%ebx \ 147 cmpl $32, %%ecx // padding only? \ 148 jl 1f \ 149 \ 150 movl %%ecx, %%edi \ 151 andl $0xfffffe00, %%edi // edi = count&~31 \ 152 subl $32, %%edi \ 153 \ 154 .align 16 \ 155 0: \ 156 movl (%%ebx, %%edi, 1), %%eax \ 157 movl 4(%%ebx, %%edi, 1), %%esi \ 158 movl %%eax, (%%edx, %%edi, 1) \ 159 movl %%esi, 4(%%edx, %%edi, 1) \ 160 movl 8(%%ebx, %%edi, 1), %%eax \ 161 movl 12(%%ebx, %%edi, 1), %%esi \ 162 movl %%eax, 8(%%edx, %%edi, 1) \ 163 movl %%esi, 12(%%edx, %%edi, 1) \ 164 movl 16(%%ebx, %%edi, 1), %%eax \ 165 movl 20(%%ebx, %%edi, 1), %%esi \ 166 movl %%eax, 16(%%edx, %%edi, 1) \ 167 movl %%esi, 20(%%edx, %%edi, 1) \ 168 movl 24(%%ebx, %%edi, 1), %%eax \ 169 movl 28(%%ebx, %%edi, 1), %%esi \ 170 movl %%eax, 24(%%edx, %%edi, 1) \ 171 movl %%esi, 28(%%edx, %%edi, 1) \ 172 subl $32, %%edi \ 173 jge 0b \ 174 \ 175 movl %%ecx, %%edi \ 176 andl $0xfffffe00, %%edi \ 177 addl %%edi, %%ebx // increase src pointer \ 178 addl %%edi, %%edx // increase dst pointer \ 179 andl $31, %%ecx // new count \ 180 jz 6f // if count = 0, get outta here \ 181 \ 182 1: \ 183 cmpl $16, %%ecx \ 184 jl 2f \ 185 movl (%%ebx), %%eax \ 186 movl %%eax, (%%edx) \ 187 movl 4(%%ebx), %%eax \ 188 movl %%eax, 4(%%edx) \ 189 movl 8(%%ebx), %%eax \ 190 movl %%eax, 8(%%edx) \ 191 movl 12(%%ebx), %%eax \ 192 movl %%eax, 12(%%edx) \ 193 subl $16, %%ecx \ 194 addl $16, %%ebx \ 195 addl $16, %%edx \ 196 2: \ 197 cmpl $8, %%ecx \ 198 jl 3f \ 199 movl (%%ebx), %%eax \ 200 movl %%eax, (%%edx) \ 201 movl 4(%%ebx), %%eax \ 202 subl $8, %%ecx \ 203 movl %%eax, 4(%%edx) \ 204 addl $8, %%ebx \ 205 addl $8, %%edx \ 206 3: \ 207 cmpl $4, %%ecx \ 208 jl 4f \ 209 movl (%%ebx), %%eax // here 4-7 bytes \ 210 addl $4, %%ebx \ 211 subl $4, %%ecx \ 212 movl %%eax, (%%edx) \ 213 addl $4, %%edx \ 214 4: // 0-3 remaining bytes \ 215 cmpl $2, %%ecx \ 216 jl 5f \ 217 movw (%%ebx), %%ax // two bytes \ 218 cmpl $3, %%ecx // less than 3? \ 219 movw %%ax, (%%edx) \ 220 jl 6f \ 221 movb 2(%%ebx), %%al // last byte \ 222 movb %%al, 2(%%edx) \ 223 jmp 6f \ 224 5: \ 225 cmpl $1, %%ecx \ 226 jl 6f \ 227 movb (%%ebx), %%al \ 228 movb %%al, (%%edx) \ 229 6: \ 230 popl %%esi \ 231 popl %%edi \ 232 " 233 : : "m" (src), "d" (dest), "c" (count) 234 : "%eax", "%ebx", "%edi", "%esi", "cc", "memory"); 235 } 236 237 void Com_Memset (void* dest, const int val, const size_t count) 238 { 239 unsigned int fillval; 240 241 if (count < 8) 242 { 243 __asm__ __volatile__ (" \ 244 //mov edx,dest \ 245 //mov eax, val \ 246 movb %%al, %%ah \ 247 movl %%eax, %%ebx \ 248 andl $0xffff, %%ebx \ 249 shll $16, %%eax \ 250 addl %%ebx, %%eax // eax now contains pattern \ 251 //mov ecx,count \ 252 cmpl $4, %%ecx \ 253 jl 0f \ 254 movl %%eax, (%%edx) // copy first dword \ 255 addl $4, %%edx \ 256 subl $4, %%ecx \ 257 0: cmpl $2, %%ecx \ 258 jl 1f \ 259 movw %%ax, (%%edx) // copy 2 bytes \ 260 addl $2, %%edx \ 261 subl $2, %%ecx \ 262 1: cmpl $0, %%ecx \ 263 je 2f \ 264 movb %%al, (%%edx) // copy single byte \ 265 2: \ 266 " 267 : : "d" (dest), "a" (val), "c" (count) 268 : "%ebx", "%edi", "%esi", "cc", "memory"); 269 270 return; 271 } 272 273 fillval = val; 274 275 fillval = fillval|(fillval<<8); 276 fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern 277 278 _copyDWord ((unsigned int*)(dest),fillval, count/4); 279 280 __asm__ __volatile__ (" // padding of 0-3 bytes \ 281 //mov ecx,count \ 282 movl %%ecx, %%eax \ 283 andl $3, %%ecx \ 284 jz 1f \ 285 andl $0xffffff00, %%eax \ 286 //mov ebx,dest \ 287 addl %%eax, %%edx \ 288 movl %0, %%eax \ 289 cmpl $2, %%ecx \ 290 jl 0f \ 291 movw %%ax, (%%edx) \ 292 cmpl $2, %%ecx \ 293 je 1f \ 294 movb %%al, 2(%%edx) \ 295 jmp 1f \ 296 0: \ 297 cmpl $0, %%ecx\ 298 je 1f\ 299 movb %%al, (%%edx)\ 300 1: \ 301 " 302 : : "m" (fillval), "c" (count), "d" (dest) 303 : "%eax", "%ebx", "%edi", "%esi", "cc", "memory"); 304 } 305 306 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type) 307 { 308 // write buffer prefetching is performed only if 309 // the processor benefits from it. Read and read/write 310 // prefetching is always performed. 311 312 switch (type) 313 { 314 case PRE_WRITE : break; 315 case PRE_READ: 316 case PRE_READ_WRITE: 317 318 __asm__ __volatile__ ("\ 319 //mov ebx,s\ 320 //mov ecx,bytes\ 321 cmpl $4096, %%ecx // clamp to 4kB\ 322 jle 0f\ 323 movl $4096, %%ecx\ 324 0:\ 325 addl $0x1f, %%ecx\ 326 shrl $5, %%ecx // number of cache lines\ 327 jz 2f\ 328 jmp 1f\ 329 \ 330 .align 16\ 331 1: testb %%al, (%%edx)\ 332 addl $32, %%edx\ 333 decl %%ecx\ 334 jnz 1b\ 335 2:\ 336 " 337 : : "d" (s), "c" (bytes) 338 : "%eax", "%ebx", "%edi", "%esi", "memory", "cc"); 339 340 break; 341 } 342 } 343 344 #endif