分类: LINUX
2010-10-20 09:23:33
2) less-naive solution: 对方法1)进行优化,x&(x-1)可以将最右端的(rightmost)1转化为0typedef unsigned __int64 uint64
int popcount_naive(uint64 x)
{
int count = 0;
for(; x; x>>1)
++count;
return count;
}
时间复杂度为O(n),n为1的个数。这样在1比较少的时候比第一种方法快很多。int pop_count_lnaive(uint64 x)
{
int count = 0;
for (; x; ++count)
x&=x-1;
return count;
}
3) shift_and_add 时间复杂度O(logN)const uint64 hff = 0xffffffffffffffff;
#define f(y) if ((x &= x-1) == 0) return y;
int popcount_5(uint64 x) {
if (x == 0) return 0;
f( 1) f( 2) f( 3) f( 4) f( 5) f( 6) f( 7) f( 8)
f( 9) f(10) f(11) f(12) f(13) f(14) f(15) f(16)
f(17) f(18) f(19) f(20) f(21) f(22) f(23) f(24)
f(25) f(26) f(27) f(28) f(29) f(30) f(31) f(32)
f(33) f(34) f(35) f(36) f(37) f(38) f(39) f(40)
f(41) f(42) f(43) f(44) f(45) f(46) f(47) f(48)
f(49) f(50) f(51) f(52) f(53) f(54) f(55) f(56)
f(57) f(58) f(59) f(60) f(61) f(62) f(63)
return 64;
}
//Use this instead if most bits in x are 1 instead of 0
#define f(y) if ((x |= x+1) == hff) return 64-y;
[0] [b] [0] [d]其中ef=a+b, gh=c+d。而0b0d = (abcd) & 0101, 0a0c = (abcd)>>1 & 0101
[0] [a] [0] [c]
---------------
[e f] [g h]
[0 0] [g h]其中ijkl = ef + gh,00gh = (efgh) & 0011, 00ef = (abcd)>>2 && 0011。这样得出的ijkl就是最终结果。这样只需要log(N)步。
[e f]
-----------
[i j k l]
总体需要 6次shift, 12次and, 6次add 共24次算术运算。typedef unsigned __int64 uint64; //assume this gives 64-bits
const uint64 m1 = 0x5555555555555555; //binary: 0101...
const uint64 m2 = 0x3333333333333333; //binary: 00110011..
const uint64 m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ...
const uint64 m8 = 0x00ff00ff00ff00ff; //binary: 8 zeros, 8 ones ...
const uint64 m16 = 0x0000ffff0000ffff; //binary: 16 zeros, 16 ones ...
const uint64 m32 = 0x00000000ffffffff; //binary: 32 zeros, 32 ones
int popcount_1(uint64 x) {
x = (x & m1 ) + ((x >> 1) & m1 );
x = (x & m2 ) + ((x >> 2) & m2 );
x = (x & m4 ) + ((x >> 4) & m4 );
x = (x & m8 ) + ((x >> 8) & m8 );
x = (x & m16) + ((x >> 16) & m16);
x = (x & m32) + ((x >> 32) & m32);
return x;
}
共需要 6次shift, 5次and, 5次add, 1次sub,共17次算数运算。下面具体来看这7个与运算是如何省掉的。int popcount_2(uint64 x) {
x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;
x += x >> 8;
x += x >> 16;
x += x >> 32;
return x & 0x7f;
}
[a] [b] [c] [d]我们所关心的结果只是奇数组(d+c)和(b+a)的结果,这里我们称之为有效组; 红色标注的部分(c+b)和(a+0)的结果是没有用的(上例中直接置为0),我们称之为无效组。每一步的操作,实际上是将有效组相加,结果存放在有效组和无效组连接所组成的新组中。
[0] [a] [b] [c]
---------------
[e f] [g h]