.匹配正则表达式
- #include <sys/types.h>
-
#include <regex.h>
-
-
typedef struct{
-
int reg_magic;
-
size_t re_nsub;
-
const char *re_endp'
-
struct re_guts *re_g;
-
} regex_t;
-
-
int regcomp(regex_t *preg, const char *pattern, int cflags)
-
-
pattern 正则表达式字符串
-
preg regex_t类型的结构变量, 调用放置编译结果的地方
-
cflags 下面一外或多个进行"或"操作的位掩码
-
REG_EXTENDED 使用POSIX延伸表示语法,否则使用POSIX基本语法
-
REG_NOSPEC 禁用所有的元字符,即不认为模式字符具有特殊意义
-
REG_ICASE 忽略大小写
-
REG_NOSUB 忽略参数nmatch和pmatch
-
REG_NEWLINE 不对特殊字符进行比较如^ . * ? + | \ [ ] ( ) < >和$
-
REG_PEND 编译模式,在遇到第一个空字节时,正则表达式不会结束,
-
正则表达式将在preg->re_endp指向的字节之前结束。
-
这就允许正则表达式中引入空字节
.匹配正则表达式
- #include <sys/types.h>
-
#include <regex.h>
-
-
/*
-
* 成功返回0
-
* 失败返回错误代码
-
*/
-
int regexec(const regex_t *preg, //指向以前编译过的而且由regcomp(3)函数初始化的正则表达式
-
const char *string, //要匹配的字符串
-
size_t nmatch, //和pmatch把参数用于把匹配的模式返回给调用程序
-
regmatch_t pmatch[],
-
int eflags); //包含由0个或以上标志的"或"操作 (REG_NOTBO, REG_NOTEOL, REG_STARTEND)
-
-
typedef struct{
-
regoff_t rm_so; //符合条件的开始位置,如果值为-1,则代表此结构并未让regexec()使用
-
regoff_t rm_eo; //符合条件的结束位置 end of match offset
-
} regmatch_t;
-
-
参数eflags有两种可能,可使用OR(|)组合:
-
REG_NOTBOL 让特殊字符^无作用
-
REG_NOTEOL 让特殊字符$无作用
.正则表达式错误, 用来取得regcomp()或regexec()的错误原因
- /**
-
* errorcode 为由regcomp()或regexec()返回的错误代码
-
* preg 指向pattern buffer的结构指针
-
* errbuf 指向欲存放错误字符串的缓冲区, errbuf_size为缓冲区大小
-
*
-
* 返回错误字符串的长度
-
*/
-
#include <sys/types.h>
-
#include <regex.h>
-
size_t regerror(int errorcode, const regex_t *preg,
-
char *errbuf, size_t errbuf_size);
.释放正则表达式
- #include <sys/types.h>
-
#include <regex.h>
-
-
void regfree(regex_t *preg);
-
释放regcomp已经编译的正则表达式
.API示例
- int z; //Error code
-
regex_t reg; //Compiled regexpr
-
char ebuf[128]; //Error message buffer
-
-
z = regcomp(®, pattern, REG_EXTENDED);
-
-
if(z != 0 ){
-
regerror(z, ®, buf, sizeof(ebuf));
-
printf("%s: regcomp(3)\n", ebuf);
-
exit(1);
-
}
-
//.....
-
regfree(®);
示例代码:
- #include <stdio.h>
-
#include <sys/types.h>
-
#include <regex.h>
-
#include <string.h>
-
#include <unistd.h>
-
#include <stdlib.h>
-
-
int cns_reg(const char *str, const char *pattern)
-
{
-
int z; //status
-
int cflags = 0; //compile flags
-
regex_t reg; //compiled regular expression
-
char ebuf[128]; //error buffer
-
-
regmatch_t pm[10]; //pattern matches 0-9
-
const size_t nmatch = 10; //The size of array pm[]
-
-
/** //编译正则表达式
-
* @param const char* pattern 将要被编译的正则表达式
-
* @param regex_t* reg 用来保存编译结果
-
* @param int cflags 决定正则表达式将如何被处理的细节
-
*
-
* @return success int 0 并把编译结果填充到reg结构中
-
* fail int 非0
-
*/
-
z = regcomp(®, pattern, cflags);
-
-
if(z != 0){
-
regerror(z, ®, ebuf, sizeof(ebuf));
-
fprintf(stderr, "%s: pattern '%s'\n", ebuf, pattern);
-
return 1;
-
}
-
-
//report the number of subexpressions
-
if(!(cflags & REG_NOSUB))
-
printf("There were %d subexpression.\n", reg.re_nsub);
-
-
/**
-
* reg 指向编译后的正则表达式
-
* str 指向将要进行匹配的字符串
-
* pm str字符串中可能有多处和正则表达式相匹配, pm数组用来保存这些位置
-
* nmacth 指定pm数组最多可以存放的匹配位置数
-
*
-
* @return 函数匹配成功后,str+pm[0].rm_so到str+pm[0].rm_eo是第一个匹配的子串
-
* str+pm[1].rm_so到str+pm[1].rm_eo是第二个匹配的子串
-
* ....
-
*/
-
z = regexec(®, str, nmatch, pm, 0);
-
-
if(z == REG_NOMATCH)
-
return 1;
-
else if(z != 0){
-
regerror(z, ®, ebuf, sizeof(ebuf));
-
fprintf(stderr, "%s: regcomp('%s')\n", ebuf, str);
-
return 2;
-
}
-
-
regfree(®);
-
return 0;
-
}
-
-
int main(int argc, char **argv)
-
{
-
printf("%d\n", cns_reg(argv[1], argv[2]));
-
return 0;
-
}
.运行
- [root@localhost sockets]# ./reg "I love you" "^I.*"
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "I love you" "I.*"
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "I love you" "dI.*"
-
There were 0 subexpression.
-
1
-
[root@localhost sockets]# ./reg "I love you" "//"
-
There were 0 subexpression.
-
1
-
[root@localhost sockets]# ./reg "" "//"
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "" "http://"
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "" "fttp://"
-
There were 0 subexpression.
-
1
-
[root@localhost sockets]# ./reg "" ""
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "" "."
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "" ".."
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "" "\.\."
-
There were 0 subexpression.
-
1
-
[root@localhost sockets]# ./reg "" ".."
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "" "..*"
-
There were 0 subexpression.
-
0
-
[root@localhost sockets]# ./reg "" "[a-z]3"
-
There were 0 subexpression.
-
1
-
[root@localhost sockets]# ./reg "" "[a-z]3.*"
阅读(6282) | 评论(0) | 转发(3) |