Chinaunix首页 | 论坛 | 博客
  • 博客访问: 66065
  • 博文数量: 11
  • 博客积分: 276
  • 博客等级: 二等列兵
  • 技术积分: 145
  • 用 户 组: 普通用户
  • 注册时间: 2012-02-17 11:14
文章分类

全部博文(11)

文章存档

2012年(11)

我的朋友
最近访客

分类: Java

2012-02-17 12:16:54

import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class keyFilter {
        /**
         * @param args
         */
        //取得数据库中的关键字,并将其存入列表中
        public List getKeywords(){
                String keyword = null;
                List list = new ArrayList();
//                Connection con = null;
//                ResultSet rs = null;
//                Statement sta = null;
//                String sql="select * from t_keys";
//                try{
//                        DBcon db = new DBcon(); //数据库连接
//                        con = db.getConnection();
//                        sta = con.createStatement();
//                        rs = sta.executeQuery(sql);
//                        while(rs.next()){
//                                keyword=rs.getString("c_keyword");
//                                list.add(keyword);
//                        }
//                }catch(Exception e){
//                        e.printStackTrace();
//                }finally{
//                        try{
//                                if(rs != null)
//                                        rs.close();
//                                if(sta != null)
//                                        sta.close();
//                                if(con != null)
//                                        con.close();
//                        }catch(Exception e){
//                                e.printStackTrace();
//                        }
//                }
                list.add("");
                list.add("你是你练");
                return list;
        }
        //文本格式函数,去除中英文标点符号,空格
        public String textFormat(String str){
                String text = null;
                String stext = null;
                Pattern pa = Pattern.compile("[.,\"]");
                Matcher ma = pa.matcher(str);
                text = ma.replaceAll("");//除去英文标点符号
                System.out.println("text "+text);
                pa=Pattern.compile(" {2,}");//去除多余空格
                ma = pa.matcher(text);
                stext = ma.replaceAll("");
                stext =stext.replaceAll("[^\\u4e00-\\u9fa5]", "");//汉字所在的utf-8编码范围,此句仅输出汉字,且输出中不包含标点符号,英文字母
                return stext;
        }
        //字符串切割函数,将格式化后的字符串切割为不同长度的短字符串
        public List cutText(String text){
                List list = new ArrayList();
                String temp = this.textFormat(text);
                int i=0,j=0;
                //将字符串切割为长度为4,3,2位的长度
                for(i=0;i                        j=i+4;
                        list.add(temp.substring(i,j));
                }
                for(i=0;i                        j=i+3;
                        list.add(temp.substring(i, j));
                }
                for(i=0;i                        j=i+2;
                        list.add(temp.substring(i, j));
                }
                return list;
        }
        //检查字符串中是否含有非法字符
        public boolean checkText(List textlist){
                boolean success = true;
                List keylist = new ArrayList();
                List text = new ArrayList();
                if(!this.getKeywords().isEmpty())
                        keylist = this.getKeywords();
                text = textlist;
                for(String check : text){
                        for(String keyword : keylist){
                                if(check.equals(keyword)){
                                        System.out.println("match sucess!"+keyword);
                                        success = false;
                            }
                        }
                }
                return success;               
        }
       
        public static void main(String[] args) {
                // TODO 自动生成方法存根
                String text = null;
                int i=0,j=0;
                boolean check = true;
                List keylist = new ArrayList();
                List match = new ArrayList();
                keyFilter kf = new keyFilter();
                if(!kf.getKeywords().isEmpty())
                        keylist = kf.getKeywords();
                text ="你是,你练,.?!!!***功.我要  把你干掉,    你这个坏人,让你早点升天。哈哈哈!";
                text= text + "注意,在替代字符串中使用反斜杠和***美元符号 ($) 与将其视为字面值替代字符串所得的结果可能不同;请参阅 Matcher.replaceAll。如有需要,可使用 Matcher.quoteReplacement(java.lang.String) 取消这些字符的特殊含义。";
                text = kf.textFormat(text);
                System.out.print("after format: "+text+"\n");
                match = kf.cutText(text);
                for(String temp : match)
                        System.out.print(temp+"\n");
                check = kf.checkText(match);
               
                if(check)
                        System.out.print("\ncheck success!hava not illegal chars\n");
               
        }
}
阅读(2432) | 评论(0) | 转发(0) |
0

上一篇:没有了

下一篇:java过滤文章中的敏感词(properties)

给主人留下些什么吧!~~