package cn.ty;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HTMLFilter {
private final static String regexForHtml = "<[^>]*>";
private final static String regexForImgTag = "]*>";
private final static String regxpForImaTagSrc = "src=\"[^\"]+\"";
/**
* 替换HTML成实体
* */
public static String replaceTag(String input) {
if(!hasSpecialChars(input)) {
return input;
}
StringBuffer filtered = new StringBuffer("");
char c;
for (int i = 0; i < input.length() - 1; i++) {
c = input.charAt(i);
switch (c) {
case '<':
filtered.append("<");
break;
case '>':
filtered.append(">");
break;
case '"':
filtered.append(""");
break;
case '&':
filtered.append("&");
break;
default:
filtered.append(c);
break;
}
}
return filtered.toString();
}
private static boolean hasSpecialChars(String input) {
boolean flag = false;
if ((input != null) && (input.length() > 0)) {
char c;
for (int i = 0; i < input.length() - 1; i++) {
c = input.charAt(i);
if (c == '<' || c == '>' || c == '"' || c == '&') flag = true;
}
}
return flag;
}
/**
* 过滤所有的HTML标签
*/
public static String filterHtml(String str,String regex) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean results = matcher.find();
while(results) {
matcher.appendReplacement(sb, "");
results = matcher.find();
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* 过滤指定标签
* */
public static String filterHtmlTag(String str,String tag) {
String regxp = "<\\s*" + tag + "\\s+([^>])*\\s*>";
return filterHtml(str,regxp);
}
public static void main(String[] args) {
String inputString = replaceTag(" kkk");
System.out.println(replaceTag(" kkk"));
}
}
|
解析HTML的时候非常有用。整理出来以备将来再用到!
阅读(883) | 评论(0) | 转发(0) |