Chinaunix首页 | 论坛 | 博客
  • 博客访问: 293382
  • 博文数量: 82
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 874
  • 用 户 组: 普通用户
  • 注册时间: 2015-03-21 09:58
个人简介

traveling in cumputer science!!

文章分类

全部博文(82)

文章存档

2016年(13)

2015年(69)

我的朋友

分类: C/C++

2015-03-27 21:08:07

FIRST:KMP--abstract
    It's a algorithm improving from sample matching algorithm (BF algorithm). 
    It's detail description can view in WIKI from below:
    %E2%80%93Morris%E2%80%93Pratt_algorithm

SECOND:the coding in Cpp
          below code is from github!!
     the kmp.h file
        

点击(此处)折叠或打开

  1. /*******************************************************************************
  2.  * ALGORITHM IMPLEMENTAIONS
  3.  *
  4.  * /\ | _ _ ._ o _|_ |_ ._ _ _
  5.  * /--\ | (_| (_) | | |_ | | | | | _>
  6.  * _|
  7.  *
  8.  * KNUTH-MORRIS-PRATT ALGORITHMS
  9.  *
  10.  * Features:
  11.  * Complexity is O(n + k), where n is the target string length,
  12.  * and k is the pattern length
  13.  *
  14.  * http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
  15.  *
  16.  ******************************************************************************/

  17. #ifndef __KMP_H__
  18. #define __KMP_H__
  19. #include <string.h>

  20. namespace alg {
  21.     static void kmp_table(const char *W, int * T, int len);
  22.     /**
  23.      * S -> the text to be searched
  24.      * W -> the word to search
  25.      * return the position where W is found S
  26.      */
  27.     static int kmp_search(const char * S, const char * W) {
  28.         int LEN_S = strlen(S);
  29.         int LEN_W = strlen(W);

  30.         int m = 0;
  31.         int i = 0;
  32.         int T[LEN_W];

  33.         kmp_table(W,T, LEN_W);

  34.         while (m+i < LEN_S) {
  35.             if (W[i] == S[m+i]) {
  36.                 if (i == LEN_W -1) {
  37.                     return m;
  38.                 }
  39.                 i++;
  40.             } else {
  41.                 m = m+i-T[i];
  42.                 if (T[i] > -1) {
  43.                     i = T[i];
  44.                 } else {
  45.                     i = 0;
  46.                 }
  47.             }
  48.         }
  49.         return -1;
  50.     }

  51.     /**
  52.      * build a table for the word to be searched
  53.      * eg:
  54.      * i        0     1     2     3     4     5     6
  55.      * W[i]     A     B     C     D     A     B     D
  56.      * T[i]     -1     0     0     0     0     1     2
  57.      */
  58.     static void kmp_table(const char *W, int * T, int len) {
  59.         int pos = 2; // the current position we are computing in T
  60.         int cnd = 0; // the next character of the current candidate substring
  61.         T[0] = -1;
  62.         T[1] = 0;

  63.         while (pos < len) {
  64.             // first case: the substring continues
  65.             if (W[pos-1] == W[cnd]) {
  66.                 cnd++;
  67.                 T[pos] = cnd;
  68.                 pos++;
  69.             } else if (cnd >0) { // second case: it doesn't, but we can fall back
  70.                 cnd = T[cnd];
  71.             } else { // third case: we have run out of candidates. Note cnd = 0
  72.                 T[pos] = 0;
  73.                 pos++;
  74.             }
  75.         }
  76.     }
  77. }

  78. #endif //
 the test demo kmp_demo.cpp

点击(此处)折叠或打开

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <time.h>

  5. #include "kmp.h"
  6. using namespace alg;
  7. int main(void)
  8. {
  9.     srand(time(NULL));
  10.     char * S = (char*)malloc(1001);
  11.     char * W = (char*)malloc(5);

  12.     memset(S,0, 1001);
  13.     memset(W,0, 5);

  14.     // random genrate a pattern for A, G, C,T
  15.     const char P[] = {'A', 'G','C','T'};

  16.     for (int i=0;i<1000;i++) {
  17.         int k = rand()%4;
  18.         S[i] = P[k];
  19.     }

  20.     for (int i=0;i<4;i++) {
  21.         int k = rand()%4;
  22.         W[i] = P[k];
  23.     }

  24.     // do a search for W from S
  25.     int pos = kmp_search(S, W);
  26.     printf("to be searched:%s\n", W);

  27.     if (pos > 0) {
  28.         printf("found in pos:%d\n", pos);
  29.         printf("text:\n%.*s", pos, S);
  30.         printf("\033[31m%s\033[0m", W);
  31.         printf("%s\n",&S[pos + strlen(W)]);
  32.     }
  33. }


  
    
阅读(1045) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~