Chinaunix首页 | 论坛 | 博客
  • 博客访问: 30204736
  • 博文数量: 2065
  • 博客积分: 10377
  • 博客等级: 上将
  • 技术积分: 21525
  • 用 户 组: 普通用户
  • 注册时间: 2008-11-04 17:50
文章分类

全部博文(2065)

文章存档

2012年(2)

2011年(19)

2010年(1160)

2009年(969)

2008年(153)

分类: Java

2010-04-18 10:43:06


package cn.ty;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HTMLFilter {
    private final static String regexForHtml = "<[^>]*>";
    private final static String regexForImgTag = "]*>";
    private final static String regxpForImaTagSrc = "src=\"[^\"]+\"";
    
    /**
     * 替换HTML成实体
     * */

    public static String replaceTag(String input) {
        if(!hasSpecialChars(input)) {
            return input;
        }
        StringBuffer filtered = new StringBuffer("");
        char c;
        for (int i = 0; i < input.length() - 1; i++) {
            c = input.charAt(i);
            switch (c) {
            case '<':
                filtered.append("<");
                break;
            case '>':
                filtered.append(">");
                break;
            case '"':
                filtered.append(""");
                break;
            case '&':
                filtered.append("&");
                break;

            default:
                filtered.append(c);
                break;
            }
        }
        return filtered.toString();
    }
    private static boolean hasSpecialChars(String input) {
        boolean flag = false;
        if ((input != null) && (input.length() > 0)) {
            char c;
            for (int i = 0; i < input.length() - 1; i++) {
                c = input.charAt(i);
                if (c == '<' || c == '>' || c == '"' || c == '&') flag = true;
            }
        }
        return flag;
    }
    /**
     * 过滤所有的HTML标签
     */

    public static String filterHtml(String str,String regex) {
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean results = matcher.find();
        while(results) {
            matcher.appendReplacement(sb, "");
            results = matcher.find();
        }
        matcher.appendTail(sb);
        return sb.toString();
    }
    /**
     * 过滤指定标签
     * */

    public static String filterHtmlTag(String str,String tag) {
        String regxp = "<\\s*" + tag + "\\s+([^>])*\\s*>";
        return filterHtml(str,regxp);
    }
    
    public static void main(String[] args) {
        String inputString = replaceTag("
kkk
"
);
        
        System.out.println(replaceTag("
kkk
"
));
    }
}


解析HTML的时候非常有用。整理出来以备将来再用到!
阅读(883) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~