Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1994820
  • 博文数量: 606
  • 博客积分: 9991
  • 博客等级: 中将
  • 技术积分: 5725
  • 用 户 组: 普通用户
  • 注册时间: 2008-07-17 19:07
文章分类

全部博文(606)

文章存档

2011年(10)

2010年(67)

2009年(155)

2008年(386)

分类:

2008-08-29 15:45:23

/**
* 获取文章的字数或则字符数
* @author montao
*/
public class StatWordCount {
   
    private final char[] CHS = {',',';','.','!','?',';','+','。','?','!'};  //符号数组
    
    private final char[] CHN = {'\n','\r'}; //转义符数组
    
    //private final char[] CHN = {'\n','\r',''}; //转义符数组
   
    private final char[] SPACE = {' ',' '}; //空格的数组(前半角,后全角)
   
    /**
     * 根据指定条件来筛选文章的字数
     * @param wordContent  文章内容
     * @param compriseInterpunction  是否包含指定字符
     * @param compriseSpace  是否包含空格
     * @return 返回文章经过指定筛选后的长度
     */
    public int getWordCount(String wordContent,boolean compriseInterpunction,boolean compriseSpace)
    {
        if(wordContent==null){
            return 0;
        }else if(wordContent.length()==0){
            return 0;
        }else{
            //既要包含符号又要包含空格
            if(compriseInterpunction && compriseSpace){
                //清除转义符
                String regex = "["+new String(CHN)+"]";
                wordContent = wordContent.replaceAll(regex," ");
                return this.getWordCount(wordContent);
            }
            //不包含符号包含空格
            else if(!compriseInterpunction && compriseSpace){
                //使用正则表达式去掉指定的符号和转义符
                String regex1 = "["+new String(CHN)+"]";
                String regex2 = "["+new String(CHS)+"]";
                wordContent = wordContent.replaceAll(regex1," ");
                wordContent = wordContent.replaceAll(regex2," ");
                return this.getWordCount(wordContent);
            }
            //包含指定符号不包含空格
            else if(compriseInterpunction && !compriseSpace){
                //使用正则表达式去掉空格和转义符
                String regex1 = "["+new String(CHN)+"]";
                String regex2 = "["+new String(SPACE)+"]";
                wordContent = wordContent.replaceAll(regex1," ");
                wordContent = wordContent.replaceAll(regex2," ");
                return this.getWordCount(wordContent);
            }
            //空格和指定符号都不包含
            else{
                //使用正则表达式去掉空格,指定符号和转义符
                String regex1 = "["+new String(CHN)+"]";
                String regex3 = "["+new String(CHS)+"]";
                String regex2 = "["+new String(SPACE)+"]";
                wordContent = wordContent.replaceAll(regex1," ");
                wordContent = wordContent.replaceAll(regex2," ");
                wordContent = wordContent.replaceAll(regex3," ");
                return this.getWordCount(wordContent);
            }
        }
    }
   
    /**
     * 返回文章中的字数
     * @param wordCount 文章内容
     * @return
     */
    @SuppressWarnings("unused")
    private int getWordCount(String wordContent){
        int count = 0;
        if(wordContent==null){ //判断是否为null,如果为null直接返回0
            count = 0;
        }else if(wordContent.length()==0){ //判断是否为空,如果为空直接返回0
            count = 0;
        }else{ //判断获取字数
            wordContent = wordContent.trim(); //清空空格
            //临时变量
            String s4 = "";
            String s3 = "";
            String s1 = "";
            boolean bb = false;
            if(wordContent.length()>0){
                s4 = String.valueOf(wordContent.charAt(wordContent.length()-1));
            }
            for(int i=0;i                s3 = String.valueOf(wordContent.charAt(i));
                int num = s3.getBytes().length;
                if(s3.hashCode()==32||s3.getBytes().length==2){
                    bb=true;
                }if(num==2){
                    count++;
                }else{
                        if(i+11)){
                            s1 = String.valueOf(wordContent.charAt(i+1));
                            if((s1.hashCode()==32||s1.getBytes().length==2)&&(s3.hashCode()!=32)){
                                count++;
                            }
                        }
                }
            }
            if(!bb){
                count++;
            }else{
                if(s4.getBytes().length==1){
                    count++;
                }
            }
        }
        return count;
    }
   
    /**
     * 根据条件来获取文章的字符数
     * @param wordContent 文章内容
     * @param compriseInterpunction 是否包含指定符号
     * @param compriseSpace 是否包含空格
     * @return 返回字符长度
     */
    public int getWordCharacter(String wordContent,boolean compriseInterpunction,boolean compriseSpace)
    {
        //既要包含符号又要包含空格
        if(compriseInterpunction && compriseSpace){
            //清除转义符
            String regex = "["+new String(CHN)+"]";
            wordContent = wordContent.replaceAll(regex," ");
            //首部的空格不算
            wordContent = wordContent.replaceAll("^\\s+","");
            return wordContent.length();
        }//不包含符号包含空格
        else if(!compriseInterpunction && compriseSpace){
            //首部的空格不算
            wordContent = wordContent.replaceAll("^\\s+","");
            //使用正则表达式去掉指定的符号和转义符
            String regex1 = "["+new String(CHN)+"]";
            String regex2 = "["+new String(CHS)+"]";
            wordContent = wordContent.replaceAll(regex1," ");
            wordContent = wordContent.replaceAll(regex2," ");
            return wordContent.length();
        }//包含指定符号不包含空格
        else if(compriseInterpunction && !compriseSpace){
            //使用正则表达式去掉空格和转义符
            return this.getNoSpaceCount(wordContent);
        }//空格和指定符号都不包含
        else{
            //使用正则表达式去掉指定符号
            String regex1 = "["+new String(CHS)+"]";
            wordContent = wordContent.replaceAll(regex1," ");
            return this.getNoSpaceCount(wordContent);
        }
    }
    /**
     * 获取文章中非空格的字符总数
     * @param wordContent 文章内容
     * @return
     */
    private int getNoSpaceCount(String wordContent) {
        int spaceCount = 0;
        if(wordContent==null)
        {
            spaceCount = 0;
        }else if(wordContent.length()==0)
        {
            spaceCount = 0;
        }else
        {
            //替换首部的
            wordContent = wordContent.replaceAll("^\\s+","");
            wordContent = wordContent.replaceAll(" ","");
            //使用正则替换转义符
            String regex = "["+new String(CHN)+"]";
            wordContent = wordContent.replaceAll(regex,"");
            spaceCount = wordContent.length();
        }
        return spaceCount;
    }
}
阅读(2472) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~