kmp算法核心在next函数。就是要计算出一下pattern字符串的位置。
比如:
p r e - a b a b a b c //i = 8
a b a b c //j = 4
当匹配到i=8,j=4时候, 出现了不匹配。需要调整j的位置继续匹配。(就是根据abab这个已经匹配了的子字符串来计算出下一个j的位置),调整后的结果为:
p r e - a b a b a b c //i = 8
a b a b c //j = 2
就是说当j=4时,next j应该是2.
next算法可以通过动态规划算法来实现 。如下:
a b a b c d
0 0 0 0 0 0 0
a 0 1 0 1 0 0 0
b 0 0 2 0 2 0 0
a 0 1 0 3 0 0 0
b 0 0 2 0 4 0 0
c 0 0 0 0 0 5 0
d 0 0 0 0 0 0 6
然后过滤掉无效的数据。生成next数组,就是每列的最大值。
代码如下:
#include <iostream> #include <string>
using namespace std;
int *prepro(const char *pattern);
int kmp(const char *pattern, const char *str);
int kmp(const char *pattern, const char *str) { //预处理,通过动态规划算法生成next数组
int *next = prepro(pattern);
const int str_len = strlen(str); const int pattern_len = strlen(pattern);
int i, j; i = j = 0;
while (i < str_len) { bool flag = false; while (j < pattern_len && pattern[j] == str[i]) { i++; j++; flag = true; }
//匹配成功
if (j == pattern_len) { return i-j; }
if (flag) //部分匹配,计算next
{ j = next[j]; } else //不匹配,继续处理
{ i++; j = 0; } } return -1; }
int *prepro(const char *pattern) { int len = strlen(pattern);
//初始化动态规划数组 arr[len+1][len+1]
int **arr = new int*[len+1]; for (int i=0; i<len+1; i++) { arr[i] = new int[len+1]; }
/* //打印数组 printf("\t\t"); for (int i=0; i printf("\n"); for (int i=0; i { if (i == 0) { printf("\t"); } else { printf("%c\t", pattern[i-1]); } for (int j=0; j { printf("%d\t", arr[i][j]); } printf("\n"); } */
//生成动态规划数组
for (int i=1; i<len+1; i++) { for (int j=1; j<len+1; j++) { if (pattern[i-1] == pattern[j-1]) { arr[i][j] = arr[i-1][j-1] + 1; } else { arr[i][j] = 0; } } }
cout << "------------------------------" << endl; printf("\t\t"); for (int i=0; i<len; i++)printf("%c\t", pattern[i]); printf("\n"); for (int i=0; i<len+1; i++) { if (i == 0) { printf("\t"); } else { printf("%c\t", pattern[i-1]); } for (int j=0; j<len+1; j++) { printf("%d\t", arr[i][j]); } printf("\n"); }
//生成next数组,首先过滤arr数组,next[j]就是arr[*][j]中的最大值
int *next = new int[len+1]; for (int i=1; i<len+1; i++) { for (int j=1; j<len+1; j++) { //匹配的长度最多一半
if (2*i > j) { arr[i][j] = 0; } //必须是从字符串头匹配的才算
if (arr[i][j] != 0 && arr[i][j] != i) { arr[i][j] = 0; }
if (arr[i][j] > next[j]) { next[j] = arr[i][j]; } } }
/* cout << endl << "------------------------------" << endl; printf("\t\t"); for (int i=0; i printf("\n"); for (int i=0; i { if (i == 0) { printf("\t"); } else { printf("%c\t", pattern[i-1]); } for (int j=0; j { printf("%d\t", arr[i][j]); } printf("\n"); } */
//如果子字符串去不相等,更新next
bool *equal = new bool[len+1]; for (int i=0; i<len+1; i++) { equal[i] = false; } equal[0] = equal[1] = true; for (int i=2; i<len+1; i++) { if (pattern[i-1] == pattern[i-2]) { equal[i] = true; } else { break; } }
//printf("next=\t");
for (int i=0; i<len+1; i++) { //printf("%d\t", next[i]);
if (equal[i] && next[i] > 0)next[i] = i-1; //printf("%d\t", next[i]);
} //cout << endl << "---------------------------------------" << endl;
return next; }
int main(int argc, char **argv) { const char *pattern = argv[1]; const char *str = argv[2]; cout << kmp(pattern, str) << endl; return 0; }
|
直接g++编译ok。
发现next函数不用动态规划算法,大财小用了啊.应该是:
#include <iostream> #include <string>
using namespace std;
void fun(const char *str) {
int len = strlen(str); int *p = new int[len];
for (int i=0; i<len; i++) { if (str[i] == str[0]) { p[i] = 1; } }
if (len < 2)return;
for (int i=2; i<len; i++) { if (p[i-1] != 0) { if (i+1 >= 2*(p[i-1]+1) && str[i] == str[p[i-1]]) p[i] = p[i-1]+1; } }
bool equal = true; for (int i=1; i<len; i++) { if (equal) { if (str[i] == str[i-1]) { p[i] = i; } else { equal = false; } } else { break; } }
for (int i=0; i<len; i++) { cout << str[i] << " "; } cout << endl; for (int i=0; i<len; i++) { cout << p[i] << " "; } cout << endl; return; }
int main(int argc, char **argv) { fun(argv[1]); return 0; }
|
阅读(1024) | 评论(0) | 转发(0) |