Chinaunix首页 | 论坛 | 博客
  • 博客访问: 690520
  • 博文数量: 109
  • 博客积分: 2033
  • 博客等级: 大尉
  • 技术积分: 1454
  • 用 户 组: 普通用户
  • 注册时间: 2011-03-11 13:26
文章分类

全部博文(109)

文章存档

2012年(5)

2011年(104)

分类: C/C++

2011-04-06 08:22:01

/* stringlib: fastsearch implementation */
2
3 #ifndef STRINGLIB_FASTSEARCH_H
4 #define STRINGLIB_FASTSEARCH_H
5
6 /* fast search/count implementation, based on a mix between boyer-
7 moore and horspool, with a few more bells and whistles on the top.
8 for some more background, see: */
9
10 /* note: fastsearch may access s[n], which isn't a problem when using
11 Python's ordinary string types, but may cause problems if you're
12 using this code in other contexts. also, the count mode returns -1
13 if there cannot possible be a match in the target string, and 0 if
14 it has actually checked for matches, but didn't find any. callers
15 beware! */
16
17 #define FAST_COUNT 0
18 #define FAST_SEARCH 1
19 #define FAST_RSEARCH 2
20
21 #if LONG_BIT >= 128
22 #define STRINGLIB_BLOOM_WIDTH 128
23 #elif LONG_BIT >= 64
24 #define STRINGLIB_BLOOM_WIDTH 64
25 #elif LONG_BIT >= 32
26 #define STRINGLIB_BLOOM_WIDTH 32
27 #else
28 #error "LONG_BIT is smaller than 32"
29 #endif
30
31 #define STRINGLIB_BLOOM_ADD(mask, ch) \
32 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
33 #define STRINGLIB_BLOOM(mask, ch) \
34 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
35
36 Py_LOCAL_INLINE(Py_ssize_t)
37 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
38 const STRINGLIB_CHAR* p, Py_ssize_t m,
39 Py_ssize_t maxcount, int mode)
40 {
41 unsigned long mask;
42 Py_ssize_t skip, count = 0;
43 Py_ssize_t i, j, mlast, w;
44
45 w = n - m;
46
47 if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
48 return -1;
49
50 /* look for special cases */
51 if (m <= 1) {
52 if (m <= 0)
53 return -1;
54 /* use special case for 1-character strings */
55 if (mode == FAST_COUNT) {
56 for (i = 0; i < n; i++)
57 if (s[i] == p[0]) {
58 count++;
59 if (count == maxcount)
60 return maxcount;
61 }
62 return count;
63 } else if (mode == FAST_SEARCH) {
64 for (i = 0; i < n; i++)
65 if (s[i] == p[0])
66 return i;
67 } else { /* FAST_RSEARCH */
68 for (i = n - 1; i > -1; i--)
69 if (s[i] == p[0])
70 return i;
71 }
72 return -1;
73 }
74
75 mlast = m - 1;
76 skip = mlast - 1;
77 mask = 0;
78
79 if (mode != FAST_RSEARCH) {
80
81 /* create compressed boyer-moore delta 1 table */
82
83 /* process pattern[:-1] */
84 for (i = 0; i < mlast; i++) {
85 STRINGLIB_BLOOM_ADD(mask, p[i]);
86 if (p[i] == p[mlast])
87 skip = mlast - i - 1;
88 }
89 /* process pattern[-1] outside the loop */
90 STRINGLIB_BLOOM_ADD(mask, p[mlast]);
91
92 for (i = 0; i <= w; i++) {
93 /* note: using mlast in the skip path slows things down on x86 */
94 if (s[i+m-1] == p[m-1]) {
95 /* candidate match */
96 for (j = 0; j < mlast; j++)
97 if (s[i+j] != p[j])
98 break;
99 if (j == mlast) {
100 /* got a match! */
101 if (mode != FAST_COUNT)
102 return i;
103 count++;
104 if (count == maxcount)
105 return maxcount;
106 i = i + mlast;
107 continue;
108 }
109 /* miss: check if next character is part of pattern */
110 if (!STRINGLIB_BLOOM(mask, s[i+m]))
111 i = i + m;
112 else
113 i = i + skip;
114 } else {
115 /* skip: check if next character is part of pattern */
116 if (!STRINGLIB_BLOOM(mask, s[i+m]))
117 i = i + m;
118 }
119 }
120 } else { /* FAST_RSEARCH */
121
122 /* create compressed boyer-moore delta 1 table */
123
124 /* process pattern[0] outside the loop */
125 STRINGLIB_BLOOM_ADD(mask, p[0]);
126 /* process pattern[:0:-1] */
127 for (i = mlast; i > 0; i--) {
128 STRINGLIB_BLOOM_ADD(mask, p[i]);
129 if (p[i] == p[0])
130 skip = i - 1;
131 }
132
133 for (i = w; i >= 0; i--) {
134 if (s[i] == p[0]) {
135 /* candidate match */
136 for (j = mlast; j > 0; j--)
137 if (s[i+j] != p[j])
138 break;
139 if (j == 0)
140 /* got a match! */
141 return i;
142 /* miss: check if previous character is part of pattern */
143 if (!STRINGLIB_BLOOM(mask, s[i-1]))
144 i = i - m;
145 else
146 i = i - skip;
147 } else {
148 /* skip: check if previous character is part of pattern */
149 if (!STRINGLIB_BLOOM(mask, s[i-1]))
150 i = i - m;
151 }
152 }
153 }
154
155 if (mode != FAST_COUNT)
156 return -1;
157 return count;
158 }
159
160 #endif
阅读(2062) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~