#include "stdafx.h"
#include "windows.h"
#include "stdio.h"
void mmove_int(char *d, char *s, int l)
{
int *pd = (int *)d;
int *ps = (int *)s;
int i = 0;
for (i = 0; i < l / 4; i++)
*pd++ = *ps++;
for (i = 0; i < l % 4; i++)
*(char *)pd++ = *(char *)ps++;
}
void mmove_char(char *d, char *s, int l)
{
char *pd = d;
char *ps = s;
int i = 0;
for (i; i < l; i++)
*pd++ = *ps++;
}
void mmove_movsd(char *d, char *s, int l)
{
_asm {
mov esi, s;
mov edi, d;
mov edx, l;
mov ecx, edx;
shr ecx, 2;
rep movsd;
mov ecx, edx;
and ecx,3;
rep movsb;
}
}
void mmove_movd(void *d, void *s, int l)
{
_asm{
mov eax,s;
mov edx,d;
mov ecx,l;
shr ecx,5;
jz done;
ll: movd mm0, qword ptr [eax];
movd mm1, qword ptr [eax+8];
movd mm2, qword ptr [eax+16];
movd mm3, qword ptr [eax+24];
movd qword ptr [edx],mm0;
movd qword ptr [edx+8], mm1;
movd qword ptr [edx+16], mm2;
movd qword ptr [edx+24], mm3;
add eax,32;
add edx,32;
dec ecx;
jnz ll;
done:
};
}
//
//
//void mmove_movdqu(char *d, char *s, int l)
//{
//
// _asm{
// mov eax,s;
// mov edx,d;
// mov ecx,l;
// shr ecx,5;
// jz done;
//ll:
// movdqu xmm0, [eax];
// movdqu xmm1, [eax+16];
// /// movdqu xmm2, [eax+32];
// // movdqu xmm3, [eax+48];
// /*
// movlpd [edx],xmm0;
// movhpd [edx+8],xmm0;
//
// movlpd [edx+16],xmm1;
// movhpd [edx+24],xmm1;
// */
//
// movdqu [edx],xmm0;
// movdqu [edx+16],xmm1;
// // movdqu [edx+32],xmm2;
// // movdqu [edx+48],xmm3;
//
// add eax,32;
// add edx,32
// dec ecx;
// jnz ll;
//done:
//
// };
//}
//
void mmove_movdqu(char *d, char *s, int l)
{
_asm{
mov eax,s;
mov edx,d;
mov ecx,l;
shr ecx,14;
jz done;
ll:
movdqu xmm0, [eax];
movdqu xmm1, [eax+16];
movdqu xmm2, [eax+32];
movdqu xmm3, [eax+48];
movdqu xmm4, [eax+64]
movdqu xmm5, [eax+80]
movdqu xmm6, [eax+96]
movdqu xmm7, [eax+112]
movdqu [edx],xmm0;
movdqu [edx+16],xmm1;
movdqu [edx+32],xmm2;
movdqu [edx+48],xmm3;
movdqu [edx+64],xmm4;
movdqu [edx+80],xmm5;
movdqu [edx+96],xmm6;
movdqu [edx+112],xmm7;
add eax,128;
add edx,128
dec ecx;
jnz ll;
done:
};
}
#define SIZE_TEST (128 * 1024)
#define ALIGN(p,x) (((unsigned)p + (x-1)) & (~(x-1)))
int _tmain(int argc, _TCHAR* argv[])
{
LARGE_INTEGER l,start,end;
int i;
double interval;
double freq;
char *ps = (char*)malloc(SIZE_TEST + 128);
char *pd = (char*)malloc(SIZE_TEST + 128);
char *ps4 = (char *)(((unsigned)ps + 3) & 0xFFFFFFFC ); /* 4 byte align */
char *pd4 = (char *)(((unsigned)pd + 3) & 0xFFFFFFFC );
char *ps8 = (char *)(((unsigned)ps + 7) & 0xFFFFFFF8 ); /* 8 byte align */
char *pd8 = (char *)(((unsigned)pd + 7) & 0xFFFFFFF8 );
char *ps16 = (char *)(((unsigned)ps + 15) & 0xFFFFFFF0); /* 16 byte align */
char *pd16 = (char *)(((unsigned)pd + 15) & 0xFFFFFFF0);
char *ps64 = (char *)(((unsigned)ps + 63) & 0xFFFFFFC0); /* 64 byte align (cahce line) */
char *pd64 = (char *)(((unsigned)pd + 63) & 0xFFFFFFC0);
QueryPerformanceFrequency(&l);
freq = (double)l.QuadPart;
printf("test mmove_char()....\n");
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_char(pd,ps,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_char() without aglin,time is: %g ms\n", interval*1000/freq);
/*--------------------------------------*/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_char(pd4,ps4,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_char() with aglin 4 byte,time is: %g ms\n", interval*1000/freq);
/*********************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_char(pd64,ps64,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_char() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);
/*-----------------------------------------------------*/
printf("\n\ntest mmove_int()....\n");
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_int(pd,ps,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_int() without aglin, time is: %g ms\n", interval*1000/freq);
/***************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_int(pd4,ps4,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_int() with aglin 4 byte, time is: %g ms\n", interval*1000/freq);
/****************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_int(pd64,ps64,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_int() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);
/*-----------------------------------------------------*/
printf("\n\ntest mmove_movsd()....\n");
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movsd(pd,ps,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movsd() without aglin, time is: %g ms\n", interval*1000/freq);
/***************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movsd(pd4,ps4,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movsd() with aglin 4 byte, time is: %g ms\n", interval*1000/freq);
/****************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movsd(pd64,ps64,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movsd() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);
/*---------------------------------------*/
printf("\n\ntest mmove_movd()....\n");
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movd(pd,ps,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movd() without aglin , time is: %I64d ms\n", (end.QuadPart-start.QuadPart)*1000/l.QuadPart);
/************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movd(pd4,ps4,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movd() with aglin 4 byte, time is: %I64d ms\n", (end.QuadPart-start.QuadPart)*1000/l.QuadPart);
/**************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movd(pd64,ps64,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movd() with aglin 64 byte, time is: %I64d ms\n", (end.QuadPart-start.QuadPart)*1000/l.QuadPart);
/*-----------------------------------------------------*/
printf("\n\ntest mmove_movdqu().....\n");
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movdqu(pd,ps,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movdqu() without aglin, time is: %g ms\n", interval*1000/freq);
/**************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movdqu(pd16,ps16,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movdqu() with aglin 16 byte, time is: %g ms\n", interval*1000/freq);
/**************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
mmove_movdqu(pd64,ps64,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("move_movdqu() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);
/*-----------------------------------------------------*/
printf("\n\ntest memmove()....\n");
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
memmove(pd,ps,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("memmove() without aglin, time is: %g ms\n", interval*1000/freq);
/***************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
memmove(pd4,ps4,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("memmove() with aglin 32 bit, time is: %g ms\n", interval*1000/freq);
/****************************************/
QueryPerformanceCounter(&start);
for (i = 0; i < 1000; i++)
memmove(pd64,ps64,SIZE_TEST);
QueryPerformanceCounter(&end);
interval = (double)(end.QuadPart - start.QuadPart);
printf("memove() with aglin 64 bit, time is: %g ms\n", interval*1000/freq);
getchar();
free(ps);
free(pd);
return 0;
}