Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2717704
  • 博文数量: 416
  • 博客积分: 10220
  • 博客等级: 上将
  • 技术积分: 4193
  • 用 户 组: 普通用户
  • 注册时间: 2006-12-15 09:47
文章分类

全部博文(416)

文章存档

2022年(1)

2021年(1)

2020年(1)

2019年(5)

2018年(7)

2017年(6)

2016年(7)

2015年(11)

2014年(1)

2012年(5)

2011年(7)

2010年(35)

2009年(64)

2008年(48)

2007年(177)

2006年(40)

我的朋友

分类: C/C++

2010-03-12 17:20:08

#include "stdafx.h"
#include "windows.h"
#include "stdio.h"
void mmove_int(char *d, char *s, int l)
{
 int *pd = (int *)d;
 int *ps = (int *)s;
 int i = 0;
 for (i = 0; i < l / 4; i++)
  *pd++ = *ps++;
 for (i = 0; i < l % 4; i++)
  *(char *)pd++ = *(char *)ps++;
}

void mmove_char(char *d, char *s, int l)
{
 char *pd = d;
 char *ps = s;
 int i = 0;
 for (i; i < l; i++)
  *pd++ = *ps++;
}
 
void mmove_movsd(char *d, char *s, int l)
{
 _asm {
  mov esi, s;
  mov edi, d;
  mov edx, l;
  mov ecx, edx;
  shr ecx, 2;
  rep movsd;
  mov ecx, edx;
  and ecx,3;
  rep movsb;
 }
}
 

void mmove_movd(void *d, void *s, int l)
{
 _asm{
  mov eax,s;
  mov edx,d;
  mov ecx,l;
  shr ecx,5;
  jz done;
ll:                movd mm0, qword ptr [eax];
  movd mm1, qword ptr [eax+8];
  movd mm2, qword ptr [eax+16];
  movd mm3, qword ptr [eax+24];
  movd qword ptr [edx],mm0;
  movd qword ptr [edx+8], mm1;
  movd qword ptr [edx+16], mm2;
  movd qword ptr [edx+24], mm3;
  add eax,32;
  add edx,32;
  dec ecx;
  jnz ll;
done:
 };
}
//
//
//void mmove_movdqu(char *d, char *s, int l)
//{
//
// _asm{
//  mov eax,s;
//  mov edx,d;
//  mov ecx,l;
//  shr ecx,5;
//  jz done;
//ll:               
//  movdqu xmm0, [eax];
//  movdqu xmm1, [eax+16];
//  ///        movdqu xmm2, [eax+32];
//  //        movdqu xmm3, [eax+48];
//  /*
//  movlpd [edx],xmm0;
//  movhpd [edx+8],xmm0;
//
//  movlpd [edx+16],xmm1;
//  movhpd [edx+24],xmm1;
//  */               
//
//  movdqu [edx],xmm0;
//  movdqu [edx+16],xmm1;
//  //                movdqu [edx+32],xmm2;
//  //                movdqu [edx+48],xmm3;
//
//  add eax,32;
//  add edx,32
//   dec ecx;
//  jnz ll;
//done:
//
// };
//}
//
void mmove_movdqu(char *d, char *s, int l)
{
 _asm{
  mov eax,s;
  mov edx,d;
  mov ecx,l;
  shr ecx,14;
  jz done;
ll:    
  movdqu xmm0, [eax];
  movdqu xmm1, [eax+16];
  movdqu xmm2, [eax+32];
  movdqu xmm3, [eax+48];
  movdqu xmm4, [eax+64]
  movdqu xmm5, [eax+80]
  movdqu xmm6, [eax+96]
  movdqu xmm7, [eax+112]

  movdqu [edx],xmm0;
  movdqu [edx+16],xmm1;
  movdqu [edx+32],xmm2;
  movdqu [edx+48],xmm3;
  movdqu [edx+64],xmm4;
  movdqu [edx+80],xmm5;
  movdqu [edx+96],xmm6;
  movdqu [edx+112],xmm7;
  add eax,128;
  add edx,128
  dec ecx;
  jnz ll;
done:
 };
}
#define SIZE_TEST  (128 * 1024)
#define ALIGN(p,x) (((unsigned)p + (x-1)) & (~(x-1)))

int _tmain(int argc, _TCHAR* argv[])
{
 LARGE_INTEGER l,start,end;
 int i;
 double interval;
 double freq;
 char *ps = (char*)malloc(SIZE_TEST + 128);
 char *pd = (char*)malloc(SIZE_TEST + 128);
 char *ps4 = (char *)(((unsigned)ps + 3) & 0xFFFFFFFC );                /* 4 byte align */
 char *pd4 = (char *)(((unsigned)pd + 3) & 0xFFFFFFFC );
 char *ps8 = (char *)(((unsigned)ps + 7) & 0xFFFFFFF8 );                /* 8 byte align */
 char *pd8 = (char *)(((unsigned)pd + 7) & 0xFFFFFFF8 );
 char *ps16 = (char *)(((unsigned)ps + 15) & 0xFFFFFFF0);    /* 16 byte align */
 char *pd16 = (char *)(((unsigned)pd + 15) & 0xFFFFFFF0);
 char *ps64 = (char *)(((unsigned)ps + 63) & 0xFFFFFFC0);        /* 64 byte align (cahce line) */
 char *pd64 = (char *)(((unsigned)pd + 63) & 0xFFFFFFC0);
 
 QueryPerformanceFrequency(&l);
 freq = (double)l.QuadPart;

 printf("test mmove_char()....\n");
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_char(pd,ps,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_char() without aglin,time is: %g ms\n", interval*1000/freq);
 /*--------------------------------------*/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_char(pd4,ps4,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_char() with aglin 4 byte,time is: %g ms\n", interval*1000/freq);
 /*********************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_char(pd64,ps64,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_char() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);
 
 /*-----------------------------------------------------*/
 printf("\n\ntest mmove_int()....\n");
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_int(pd,ps,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_int() without aglin, time is: %g ms\n", interval*1000/freq);

 /***************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_int(pd4,ps4,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_int() with aglin 4 byte, time is: %g ms\n", interval*1000/freq);
 /****************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_int(pd64,ps64,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_int() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);

 /*-----------------------------------------------------*/
 printf("\n\ntest mmove_movsd()....\n");
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movsd(pd,ps,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movsd() without aglin, time is: %g ms\n", interval*1000/freq);

 /***************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movsd(pd4,ps4,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movsd() with aglin 4 byte, time is: %g ms\n", interval*1000/freq);
 /****************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movsd(pd64,ps64,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movsd() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);

 /*---------------------------------------*/
 printf("\n\ntest mmove_movd()....\n");
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movd(pd,ps,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movd() without aglin , time is: %I64d ms\n", (end.QuadPart-start.QuadPart)*1000/l.QuadPart);
 /************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movd(pd4,ps4,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movd() with aglin 4 byte, time is: %I64d ms\n", (end.QuadPart-start.QuadPart)*1000/l.QuadPart);
 /**************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movd(pd64,ps64,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movd() with aglin 64 byte, time is: %I64d ms\n", (end.QuadPart-start.QuadPart)*1000/l.QuadPart);
 
 /*-----------------------------------------------------*/
 printf("\n\ntest mmove_movdqu().....\n");
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movdqu(pd,ps,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movdqu() without aglin, time is: %g ms\n", interval*1000/freq);

 /**************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movdqu(pd16,ps16,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movdqu() with aglin 16 byte, time is: %g ms\n", interval*1000/freq);

 /**************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  mmove_movdqu(pd64,ps64,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("move_movdqu() with aglin 64 byte, time is: %g ms\n", interval*1000/freq);

 /*-----------------------------------------------------*/
 printf("\n\ntest memmove()....\n");
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  memmove(pd,ps,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("memmove() without aglin, time is: %g ms\n", interval*1000/freq);

 /***************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  memmove(pd4,ps4,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("memmove() with aglin 32 bit, time is: %g ms\n", interval*1000/freq);

 /****************************************/
 QueryPerformanceCounter(&start);
 for (i = 0; i < 1000; i++)
  memmove(pd64,ps64,SIZE_TEST);
 QueryPerformanceCounter(&end);
 interval = (double)(end.QuadPart - start.QuadPart);
 printf("memove() with aglin 64 bit, time is: %g ms\n", interval*1000/freq);
 getchar();
 free(ps);
 free(pd);
 return 0;
}
阅读(1362) | 评论(1) | 转发(0) |
给主人留下些什么吧!~~

chinaunix网友2010-03-15 12:46:10

C++ 编程语言思维导图 : http://bbs.chinaunix.net/thread-1675540-1-1.html