Chinaunix首页 | 论坛 | 博客
  • 博客访问: 967017
  • 博文数量: 403
  • 博客积分: 27
  • 博客等级: 民兵
  • 技术积分: 165
  • 用 户 组: 普通用户
  • 注册时间: 2011-12-25 22:20
文章分类

全部博文(403)

文章存档

2016年(3)

2015年(16)

2014年(163)

2013年(222)

分类: LINUX

2014-08-13 17:50:37

原文地址:C语言的正则表达式 作者:leonwang202


.匹配正则表达式
  1. #include <sys/types.h>
  2. #include <regex.h>

  3. typedef struct{
  4.      int reg_magic;
  5.      size_t re_nsub;
  6.      const char *re_endp'
  7.      struct re_guts *re_g;
  8. } regex_t;

  9. int regcomp(regex_t *preg, const char *pattern, int cflags)

  10. pattern 正则表达式字符串
  11. preg regex_t类型的结构变量, 调用放置编译结果的地方
  12. cflags 下面一外或多个进行"或"操作的位掩码
  13.            REG_EXTENDED 使用POSIX延伸表示语法,否则使用POSIX基本语法
  14.            REG_NOSPEC 禁用所有的元字符,即不认为模式字符具有特殊意义
  15.            REG_ICASE 忽略大小写
  16.            REG_NOSUB 忽略参数nmatch和pmatch
  17.            REG_NEWLINE 不对特殊字符进行比较如^ . * ? + | \ [ ] ( ) < >和$
  18.            REG_PEND 编译模式,在遇到第一个空字节时,正则表达式不会结束,
  19.                           正则表达式将在preg->re_endp指向的字节之前结束。
  20.                           这就允许正则表达式中引入空字节

.匹配正则表达式
  1. #include <sys/types.h>
  2. #include <regex.h>

  3. /*
  4. * 成功返回0
  5. * 失败返回错误代码
  6. */
  7. int regexec(const regex_t *preg, //指向以前编译过的而且由regcomp(3)函数初始化的正则表达式
  8.             const char *string, //要匹配的字符串
  9.             size_t nmatch, //和pmatch把参数用于把匹配的模式返回给调用程序
  10.             regmatch_t pmatch[],
  11.             int eflags); //包含由0个或以上标志的"或"操作 (REG_NOTBO, REG_NOTEOL, REG_STARTEND)

  12. typedef struct{
  13.      regoff_t rm_so; //符合条件的开始位置,如果值为-1,则代表此结构并未让regexec()使用
  14.      regoff_t rm_eo; //符合条件的结束位置 end of match offset
  15. } regmatch_t;

  16. 参数eflags有两种可能,可使用OR(|)组合:
  17.       REG_NOTBOL 让特殊字符^无作用
  18.       REG_NOTEOL 让特殊字符$无作用

.正则表达式错误, 用来取得regcomp()或regexec()的错误原因
  1. /**
  2. * errorcode 为由regcomp()或regexec()返回的错误代码
  3. * preg 指向pattern buffer的结构指针
  4. * errbuf 指向欲存放错误字符串的缓冲区, errbuf_size为缓冲区大小
  5. *
  6. * 返回错误字符串的长度
  7. */
  8. #include <sys/types.h>
  9. #include <regex.h>
  10. size_t regerror(int errorcode, const regex_t *preg,
  11.                 char *errbuf, size_t errbuf_size);

.释放正则表达式
  1. #include <sys/types.h>
  2. #include <regex.h>

  3. void regfree(regex_t *preg);
  4. 释放regcomp已经编译的正则表达式

.API示例
  1. int z; //Error code
  2. regex_t reg; //Compiled regexpr
  3. char ebuf[128]; //Error message buffer

  4. z = regcomp(&reg, pattern, REG_EXTENDED);

  5. if(z != 0 ){
  6.     regerror(z, &reg, buf, sizeof(ebuf));
  7.     printf("%s: regcomp(3)\n", ebuf);
  8.     exit(1);
  9. }
  10. //.....
  11. regfree(&reg);

示例代码:
  1. #include <stdio.h>
  2. #include <sys/types.h>
  3. #include <regex.h>
  4. #include <string.h>
  5. #include <unistd.h>
  6. #include <stdlib.h>

  7. int cns_reg(const char *str, const char *pattern)
  8. {
  9.     int z; //status
  10.     int cflags = 0; //compile flags
  11.     regex_t reg; //compiled regular expression
  12.     char ebuf[128]; //error buffer

  13.     regmatch_t pm[10]; //pattern matches 0-9
  14.     const size_t nmatch = 10; //The size of array pm[]
  15.      
  16.     /** //编译正则表达式
  17.      * @param const char* pattern 将要被编译的正则表达式
  18.      * @param regex_t* reg 用来保存编译结果
  19.      * @param int cflags 决定正则表达式将如何被处理的细节
  20.      *
  21.      * @return success int 0 并把编译结果填充到reg结构中
  22.      * fail int 非0
  23.      */
  24.     z = regcomp(&reg, pattern, cflags);

  25.     if(z != 0){
  26.         regerror(z, &reg, ebuf, sizeof(ebuf));
  27.         fprintf(stderr, "%s: pattern '%s'\n", ebuf, pattern);
  28.         return 1;
  29.     }

  30.     //report the number of subexpressions
  31.     if(!(cflags & REG_NOSUB))
  32.         printf("There were %d subexpression.\n", reg.re_nsub);

  33.     /**
  34.      * reg 指向编译后的正则表达式
  35.      * str 指向将要进行匹配的字符串
  36.      * pm str字符串中可能有多处和正则表达式相匹配, pm数组用来保存这些位置
  37.      * nmacth 指定pm数组最多可以存放的匹配位置数
  38.      *
  39.      * @return 函数匹配成功后,str+pm[0].rm_so到str+pm[0].rm_eo是第一个匹配的子串
  40.      * str+pm[1].rm_so到str+pm[1].rm_eo是第二个匹配的子串
  41.      * ....
  42.      */
  43.     z = regexec(&reg, str, nmatch, pm, 0);
  44.       
  45.     if(z == REG_NOMATCH)
  46.         return 1;
  47.     else if(z != 0){
  48.         regerror(z, &reg, ebuf, sizeof(ebuf));
  49.         fprintf(stderr, "%s: regcomp('%s')\n", ebuf, str);
  50.         return 2;
  51.     }

  52.     regfree(&reg);
  53.     return 0;
  54. }

  55. int main(int argc, char **argv)
  56. {
  57.     printf("%d\n", cns_reg(argv[1], argv[2]));
  58.     return 0;
  59. }

.运行
  1. [root@localhost sockets]# ./reg "I love you" "^I.*"
  2. There were 0 subexpression.
  3. 0
  4. [root@localhost sockets]# ./reg "I love you" "I.*"
  5. There were 0 subexpression.
  6. 0
  7. [root@localhost sockets]# ./reg "I love you" "dI.*"
  8. There were 0 subexpression.
  9. 1
  10. [root@localhost sockets]# ./reg "I love you" "//"
  11. There were 0 subexpression.
  12. 1
  13. [root@localhost sockets]# ./reg "" "//"
  14. There were 0 subexpression.
  15. 0
  16. [root@localhost sockets]# ./reg "" "http://"
  17. There were 0 subexpression.
  18. 0
  19. [root@localhost sockets]# ./reg "" "fttp://"
  20. There were 0 subexpression.
  21. 1
  22. [root@localhost sockets]# ./reg "" ""
  23. There were 0 subexpression.
  24. 0
  25. [root@localhost sockets]# ./reg "" "."
  26. There were 0 subexpression.
  27. 0
  28. [root@localhost sockets]# ./reg "" ".."
  29. There were 0 subexpression.
  30. 0
  31. [root@localhost sockets]# ./reg "" "\.\."
  32. There were 0 subexpression.
  33. 1
  34. [root@localhost sockets]# ./reg "" ".."
  35. There were 0 subexpression.
  36. 0
  37. [root@localhost sockets]# ./reg "" "..*"
  38. There were 0 subexpression.
  39. 0
  40. [root@localhost sockets]# ./reg "" "[a-z]3"
  41. There were 0 subexpression.
  42. 1
  43. [root@localhost sockets]# ./reg "" "[a-z]3.*"

阅读(1272) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~