Chinaunix首页 | 论坛 | 博客
  • 博客访问: 17780
  • 博文数量: 11
  • 博客积分: 267
  • 博客等级: 二等列兵
  • 技术积分: 135
  • 用 户 组: 普通用户
  • 注册时间: 2011-10-20 21:56
文章分类

全部博文(11)

文章存档

2011年(11)

我的朋友

分类: C/C++

2011-11-18 20:02:09

Problem:
Aho-Corasick Automation

Code:
  1. #include <stdio.h>
  2. #include <string.h>

  3. #define KEYSIZE 256

  4. char tmp[2060];
  5. int virus[164];
  6. int email[1600];

  7. struct Node {
  8.     Node* fail;
  9.     Node* next[KEYSIZE];
  10.     int count;
  11.     bool visit;
  12.     Node() {
  13.      fail = NULL;
  14.      count = 0;
  15.      visit = false;
  16.      memset(next, 0, sizeof(next));
  17.     }
  18. }*build_ac_automation_que[40000];
  19. Node* query_temp_que[1000];

  20. int map[128];

  21. void base642char(char ch1, char ch2, char ch3, char ch4, int* out) {
  22.     int i1 = map[ch1];
  23.     int i2 = map[ch2];
  24.     int i3 = map[ch3];
  25.     int i4 = map[ch4];
  26.     out[0] = (i1<<2)+(i2>>4);
  27.     out[1] = ((i2%16)<<4)+(i3>>2);
  28.     out[2] = ((i3%4)<<6)+i4;
  29. }

  30. void decode(char* in, int* out, int* size) {
  31.     int i;
  32.     int len = strlen(in);
  33.     for( i=0; i<len-4; i+=4 ) {
  34.     base642char(in[i], in[i+1], in[i+2], in[i+3], out);
  35.     out += 3;
  36.     }
  37.     (*size) += (len-4)/4*3;
  38.     int i1 = map[in[len-4]];
  39.     int i2 = map[in[len-3]];

  40.     out[0] = (i1<<2)+(i2>>4);
  41.     (*size)++;
  42.     
  43.     int i3, i4;
  44.     if( in[len-2] != '=' ) {
  45.     i3 = map[in[len-2]];
  46.     out[1] = ((i2%16)<<4) + (i3>>2);
  47.     (*size)++;
  48.     }
  49.     if( in[len-1] != '=' ) {
  50.     i4 = map[in[len-1]];
  51.     out[2] = ((i3%4)<<6)+i4;
  52.     (*size)++;
  53.     }
  54. }

  55. void build_aho_corasick_automation(Node *root) {
  56.     root->fail = NULL;
  57.     int head, tail;
  58.     head = tail = 0;
  59.     build_ac_automation_que[tail++] = root;
  60.     while( head < tail ) {
  61.     Node *cur = build_ac_automation_que[head++];
  62.     for(int i=0; i<KEYSIZE; i++) {
  63.      if( cur->next[i] == NULL)
  64.         continue;
  65.      if( cur == root )
  66.         cur->next[i]->fail = root;
  67.      else {
  68.         Node *p = cur->fail;
  69.         while( p != NULL ) {
  70.          if( p->next[i] != NULL ) {
  71.             cur->next[i]->fail = p->next[i];
  72.             break;
  73.          }
  74.          p = p->fail;
  75.         }
  76.         if( p == NULL )
  77.          cur->next[i]->fail = root;
  78.      }
  79.      build_ac_automation_que[tail++] = cur->next[i];
  80.     }
  81.     }

  82. }

  83. int query(Node *root, int *str, int num ) {
  84.     int i = 0, cnt = 0;
  85.     int head, tail;
  86.     head = tail = 0;
  87.     Node *p = root;
  88.     for( i=0; i<num; i++ ) {
  89.     int index = str[i];
  90.     while( p->next[index] == NULL && p!=root )
  91.      p = p->fail;
  92.     p = p->next[index];
  93.     p = (p==NULL) ? root : p;
  94.     Node *temp = p;
  95.     while( temp != root && !temp->visit ) {
  96.      cnt += temp->count;
  97.      temp->visit = true;
  98.      query_temp_que[tail++] = temp;
  99.      temp = temp->fail;
  100.     }
  101.     }
  102.     while( head < tail ) {
  103.     Node* cur = query_temp_que[head++];
  104.     cur->visit = false;
  105.     }
  106.     return cnt;
  107. }

  108. void insert(Node *root, int *str, int num ) {
  109.     Node *p = root;
  110.     int i=0;
  111.     for( i=0; i<num; i++ ) {
  112.     int index = str[i];
  113.     if( p->next[index] == NULL ) {
  114.      p->next[index] = new Node();
  115.     }
  116.     p = p->next[index];
  117.     }
  118.     p->count++;
  119. }

  120. void init()
  121. {
  122.     int i;
  123.     for( i='a'; i<='z'; i++ ) {
  124.     map[i] = i-'a'+26;
  125.     }
  126.     for( i='A'; i<='Z'; i++ ) {
  127.     map[i] = i-'A';
  128.     }
  129.     for( i='0'; i<='9'; i++ ) {
  130.     map[i] = i-'0'+52;
  131.     }
  132.     map['+'] = 62;
  133.     map['/'] = 63;
  134. }

  135. int main()
  136. {
  137.     int n, m, i, j;
  138.     Node *root;
  139.     init();

  140.     while( EOF != scanf("%d", &n)) {
  141.     
  142.     int decode_size;
  143.     root = new Node();
  144.     for( i=0; i<n; i++ ) {
  145.      scanf("%s", tmp);
  146.      decode_size = 0;
  147.      decode(tmp, virus, &decode_size);
  148.      insert(root, virus, decode_size);
  149.     }

  150.     build_aho_corasick_automation(root);

  151.     scanf("%d", &m);
  152.     for( i=0; i<m; i++ ) {
  153.      scanf("%s", tmp);
  154.      decode_size = 0;
  155.      decode(tmp, email, &decode_size);

  156.      int res = query(root, email, decode_size);
  157.      printf("%d\n", res);
  158.     }
  159.     printf("\n");
  160.     }

  161.     return 0;
  162. }

阅读(1027) | 评论(0) | 转发(0) |
0

上一篇:poj 1226 Substrings

下一篇:2011年11月26日毅行

给主人留下些什么吧!~~