Quake-III-Arena

Quake III Arena GPL Source Release
Log | Files | Refs

linux_common.c (8672B)


      1 /*
      2 ===========================================================================
      3 Copyright (C) 1999-2005 Id Software, Inc.
      4 
      5 This file is part of Quake III Arena source code.
      6 
      7 Quake III Arena source code is free software; you can redistribute it
      8 and/or modify it under the terms of the GNU General Public License as
      9 published by the Free Software Foundation; either version 2 of the License,
     10 or (at your option) any later version.
     11 
     12 Quake III Arena source code is distributed in the hope that it will be
     13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 GNU General Public License for more details.
     16 
     17 You should have received a copy of the GNU General Public License
     18 along with Foobar; if not, write to the Free Software
     19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     20 ===========================================================================
     21 */
     22 /** 
     23  * GAS syntax equivalents of the MSVC asm memory calls in common.c
     24  *
     25  * The following changes have been made to the asm:
     26  * 1. Registers are loaded by the inline asm arguments when possible
     27  * 2. Labels have been changed to local label format (0,1,etc.) to allow inlining
     28  *
     29  * HISTORY:
     30  *	AH - Created on 08 Dec 2000
     31  */
     32 
     33 #include <unistd.h>   // AH - for size_t
     34 #include <string.h>
     35 
     36 // bk001207 - we need something under Linux, too. Mac?
     37 #if 1 // defined(C_ONLY) // bk010102 - dedicated?
     38 void Com_Memcpy (void* dest, const void* src, const size_t count) {
     39   memcpy(dest, src, count);
     40 }
     41 
     42 void Com_Memset (void* dest, const int val, const size_t count) {
     43   memset(dest, val, count);
     44 }
     45 
     46 #else
     47 
     48 typedef enum {
     49   PRE_READ,         // prefetch assuming that buffer is used for reading only
     50   PRE_WRITE,        // prefetch assuming that buffer is used for writing only
     51   PRE_READ_WRITE    // prefetch assuming that buffer is used for both reading and writing
     52 } e_prefetch;
     53 
     54 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
     55 
     56 void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
     57 	// MMX version not used on standard Pentium MMX
     58 	// because the dword version is faster (with
     59 	// proper destination prefetching)
     60 		__asm__ __volatile__ (" \
     61 			//mov			eax,constant		// eax = val \
     62 			//mov			edx,dest			// dest \
     63 			//mov			ecx,count \
     64 			movd		%%eax, %%mm0 \
     65 			punpckldq	%%mm0, %%mm0 \
     66 \
     67 			// ensure that destination is qword aligned \
     68 \
     69 			testl		$7, %%edx				// qword padding?\
     70 			jz		0f	\
     71 			movl		%%eax, (%%edx) \
     72 			decl		%%ecx \
     73 			addl		$4, %%edx \
     74 \
     75 0:			movl		%%ecx, %%ebx				\
     76 			andl		$0xfffffff0, %%ecx	\
     77 			jz		2f \
     78 			jmp		1f \
     79 			.align 		16 \
     80 \
     81 			// funny ordering here to avoid commands \
     82 			// that cross 32-byte boundaries (the \
     83 			// [edx+0] version has a special 3-byte opcode... \
     84 1:			movq		%%mm0, 8(%%edx) \
     85 			movq		%%mm0, 16(%%edx) \
     86 			movq		%%mm0, 24(%%edx) \
     87 			movq		%%mm0, 32(%%edx) \
     88 			movq		%%mm0, 40(%%edx) \
     89 			movq		%%mm0, 48(%%edx) \
     90 			movq		%%mm0, 56(%%edx) \
     91 			movq		%%mm0, (%%edx)\
     92 			addl		$64, %%edx \
     93 			subl		$16, %%ecx \
     94 			jnz		1b \
     95 2:	\
     96 			movl		%%ebx, %%ecx				// ebx = cnt \
     97 			andl		$0xfffffff0, %%ecx				// ecx = cnt&~15 \
     98 			subl		%%ecx, %%ebx \
     99 			jz		6f \
    100 			cmpl		$8, %%ebx \
    101 			jl		3f \
    102 \
    103 			movq		%%mm0, (%%edx) \
    104 			movq		%%mm0, 8(%%edx) \
    105 			movq		%%mm0, 16(%%edx) \
    106 			movq		%%mm0, 24(%%edx) \
    107 			addl		$32, %%edx \
    108 			subl		$8, %%ebx \
    109 			jz		6f \
    110 \
    111 3:			cmpl		$4, %%ebx \
    112 			jl		4f \
    113 			\
    114 			movq		%%mm0, (%%edx) \
    115 			movq		%%mm0, 8(%%edx) \
    116 			addl		$16, %%edx \
    117 			subl		$4, %%ebx \
    118 \
    119 4:			cmpl		$2, %%ebx \
    120 			jl		5f \
    121 			movq		%%mm0, (%%edx) \
    122 			addl		$8, %%edx \
    123 			subl		$2, %%ebx \
    124 \
    125 5:			cmpl		$1, %%ebx \
    126 			jl		6f \
    127 			movl		%%eax, (%%edx) \
    128 6: \
    129 			emms \
    130 	"
    131 	: : "a" (constant), "c" (count), "d" (dest)
    132 	: "%ebx", "%edi", "%esi", "cc", "memory");
    133 }
    134 
    135 // optimized memory copy routine that handles all alignment
    136 // cases and block sizes efficiently
    137 void Com_Memcpy (void* dest, const void* src, const size_t count) {
    138 	Com_Prefetch (src, count, PRE_READ);
    139 	__asm__ __volatile__ (" \
    140 		pushl		%%edi \
    141 		pushl		%%esi \
    142 		//mov		ecx,count \
    143 		cmpl		$0, %%ecx						// count = 0 check (just to be on the safe side) \
    144 		je		6f \
    145 		//mov		edx,dest \
    146 		movl		%0, %%ebx \
    147 		cmpl		$32, %%ecx						// padding only? \
    148 		jl		1f \
    149 \
    150 		movl		%%ecx, %%edi					\
    151 		andl		$0xfffffe00, %%edi					// edi = count&~31 \
    152 		subl		$32, %%edi \
    153 \
    154 		.align 16 \
    155 0: \
    156 		movl		(%%ebx, %%edi, 1), %%eax \
    157 		movl		4(%%ebx, %%edi, 1), %%esi \
    158 		movl		%%eax, (%%edx, %%edi, 1) \
    159 		movl		%%esi, 4(%%edx, %%edi, 1) \
    160 		movl		8(%%ebx, %%edi, 1), %%eax \
    161 		movl		12(%%ebx, %%edi, 1), %%esi \
    162 		movl		%%eax, 8(%%edx, %%edi, 1) \
    163 		movl		%%esi, 12(%%edx, %%edi, 1) \
    164 		movl		16(%%ebx, %%edi, 1), %%eax \
    165 		movl		20(%%ebx, %%edi, 1), %%esi \
    166 		movl		%%eax, 16(%%edx, %%edi, 1) \
    167 		movl		%%esi, 20(%%edx, %%edi, 1) \
    168 		movl		24(%%ebx, %%edi, 1), %%eax \
    169 		movl		28(%%ebx, %%edi, 1), %%esi \
    170 		movl		%%eax, 24(%%edx, %%edi, 1) \
    171 		movl		%%esi, 28(%%edx, %%edi, 1) \
    172 		subl		$32, %%edi \
    173 		jge		0b \
    174 		\
    175 		movl		%%ecx, %%edi \
    176 		andl		$0xfffffe00, %%edi \
    177 		addl		%%edi, %%ebx					// increase src pointer \
    178 		addl		%%edi, %%edx					// increase dst pointer \
    179 		andl		$31, %%ecx					// new count \
    180 		jz		6f					// if count = 0, get outta here \
    181 \
    182 1: \
    183 		cmpl		$16, %%ecx \
    184 		jl		2f \
    185 		movl		(%%ebx), %%eax \
    186 		movl		%%eax, (%%edx) \
    187 		movl		4(%%ebx), %%eax \
    188 		movl		%%eax, 4(%%edx) \
    189 		movl		8(%%ebx), %%eax \
    190 		movl		%%eax, 8(%%edx) \
    191 		movl		12(%%ebx), %%eax \
    192 		movl		%%eax, 12(%%edx) \
    193 		subl		$16, %%ecx \
    194 		addl		$16, %%ebx \
    195 		addl		$16, %%edx \
    196 2: \
    197 		cmpl		$8, %%ecx \
    198 		jl		3f \
    199 		movl		(%%ebx), %%eax \
    200 		movl		%%eax, (%%edx) \
    201 		movl		4(%%ebx), %%eax \
    202 		subl		$8, %%ecx \
    203 		movl		%%eax, 4(%%edx) \
    204 		addl		$8, %%ebx \
    205 		addl		$8, %%edx \
    206 3: \
    207 		cmpl		$4, %%ecx \
    208 		jl		4f \
    209 		movl		(%%ebx), %%eax	// here 4-7 bytes \
    210 		addl		$4, %%ebx \
    211 		subl		$4, %%ecx \
    212 		movl		%%eax, (%%edx) \
    213 		addl		$4, %%edx \
    214 4:							// 0-3 remaining bytes \
    215 		cmpl		$2, %%ecx \
    216 		jl		5f \
    217 		movw		(%%ebx), %%ax	// two bytes \
    218 		cmpl		$3, %%ecx				// less than 3? \
    219 		movw		%%ax, (%%edx) \
    220 		jl		6f \
    221 		movb		2(%%ebx), %%al	// last byte \
    222 		movb		%%al, 2(%%edx) \
    223 		jmp		6f \
    224 5: \
    225 		cmpl		$1, %%ecx \
    226 		jl		6f \
    227 		movb		(%%ebx), %%al \
    228 		movb		%%al, (%%edx) \
    229 6: \
    230 		popl		%%esi \
    231 		popl		%%edi \
    232 	"
    233 	: : "m" (src), "d" (dest), "c" (count)
    234 	: "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
    235 }
    236 
    237 void Com_Memset (void* dest, const int val, const size_t count)
    238 {
    239 	unsigned int fillval;
    240 
    241 	if (count < 8)
    242 	{
    243 		__asm__ __volatile__ (" \
    244 			//mov		edx,dest \
    245 			//mov		eax, val \
    246 			movb		%%al, %%ah \
    247 			movl		%%eax, %%ebx \
    248 			andl		$0xffff, %%ebx \
    249 			shll		$16, %%eax \
    250 			addl		%%ebx, %%eax	// eax now contains pattern \
    251 			//mov		ecx,count \
    252 			cmpl		$4, %%ecx \
    253 			jl		0f \
    254 			movl		%%eax, (%%edx)	// copy first dword \
    255 			addl		$4, %%edx \
    256 			subl		$4, %%ecx \
    257 	0:		cmpl		$2, %%ecx \
    258 			jl		1f \
    259 			movw		%%ax, (%%edx)	// copy 2 bytes \
    260 			addl		$2, %%edx \
    261 			subl		$2, %%ecx \
    262 	1:		cmpl		$0, %%ecx \
    263 			je		2f \
    264 			movb		%%al, (%%edx)	// copy single byte \
    265 	2:		 \
    266 		"
    267 		: : "d" (dest), "a" (val), "c" (count)
    268 		: "%ebx", "%edi", "%esi", "cc", "memory");
    269 		
    270 		return;
    271 	}
    272 
    273 	fillval = val;
    274 	
    275 	fillval = fillval|(fillval<<8);
    276 	fillval = fillval|(fillval<<16);		// fill dword with 8-bit pattern
    277 
    278 	_copyDWord ((unsigned int*)(dest),fillval, count/4);
    279 	
    280 	__asm__ __volatile__ ("     		// padding of 0-3 bytes \
    281 		//mov		ecx,count \
    282 		movl		%%ecx, %%eax \
    283 		andl		$3, %%ecx \
    284 		jz		1f \
    285 		andl		$0xffffff00, %%eax \
    286 		//mov		ebx,dest \
    287 		addl		%%eax, %%edx \
    288 		movl		%0, %%eax \
    289 		cmpl		$2, %%ecx \
    290 		jl		0f \
    291 		movw		%%ax, (%%edx) \
    292 		cmpl		$2, %%ecx \
    293 		je		1f					\
    294 		movb		%%al, 2(%%edx)		\
    295 		jmp		1f \
    296 0:		\
    297 		cmpl		$0, %%ecx\
    298 		je		1f\
    299 		movb		%%al, (%%edx)\
    300 1:	\
    301 	"
    302 	: : "m" (fillval), "c" (count), "d" (dest)
    303 	: "%eax", "%ebx", "%edi", "%esi", "cc", "memory");	
    304 }
    305 
    306 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
    307 {
    308 	// write buffer prefetching is performed only if
    309 	// the processor benefits from it. Read and read/write
    310 	// prefetching is always performed.
    311 
    312 	switch (type)
    313 	{
    314 		case PRE_WRITE : break;
    315 		case PRE_READ:
    316 		case PRE_READ_WRITE:
    317 
    318 		__asm__ __volatile__ ("\
    319 			//mov		ebx,s\
    320 			//mov		ecx,bytes\
    321 			cmpl		$4096, %%ecx				// clamp to 4kB\
    322 			jle		0f\
    323 			movl		$4096, %%ecx\
    324 	0:\
    325 			addl		$0x1f, %%ecx\
    326 			shrl		$5, %%ecx					// number of cache lines\
    327 			jz		2f\
    328 			jmp		1f\
    329 \
    330 			.align 16\
    331 	1:		testb		%%al, (%%edx)\
    332 			addl		$32, %%edx\
    333 			decl		%%ecx\
    334 			jnz		1b\
    335 	2:\
    336 		"
    337 		: : "d" (s), "c" (bytes)
    338 		: "%eax", "%ebx", "%edi", "%esi", "memory", "cc");
    339 		
    340 		break;
    341 	}
    342 }
    343 
    344 #endif