本代码实现的是从文件的html代码中提取url信息~~ 感觉鲁棒性不是很好一样 有待改进~
#include "stdafx.h" #include "stdlib.h" #include <iostream> #include <fstream> #include <stack> using namespace std; int count = 0; void getthelink(string str ) { int begin = str.find("href"); int end; if(-1 != begin) { str = str.substr(begin,str.length()-begin); begin = str.find('='); begin++; while (str[begin] == ' ') { begin++; } if(str[begin] == '\"') { str[begin] = ' '; end = str.find("\""); cout<<str.substr(begin+1,end-begin-1).c_str()<<endl; count++; } if (str[begin] == '\'') { str[begin] = ' '; end = str.find("\'"); cout<<str.substr(begin+1,end-begin-1).c_str()<<endl; count++; } } } void analyse(string str ) { int i = 1; if(str[0] =='<') { while (str[i] == ' ') { i++; } if(str[i] == 'a' || str[i] == 'A' ) { if (str[++i] == ' ') { getthelink(str); } } } } int main(int argc, char* argv[]) { ifstream file; file.open("d:\\1.txt",ios::in); file.seekg(0,ios::beg); int begin = file.tellg(); file.seekg(0, ios::end ); int theend = file.tellg(); int length = theend - begin; file.seekg(0,ios::beg); char* temp = new char[length]; file.read(temp,length); file.close(); string str(temp); int i = 0; string ttt=""; stack<int> st; while (i < str.length()) { if(str[i] == '<') { st.push(i); }
if(str[i] == '>') { if(!st.empty()) { begin = st.top(); while(st.size()) st.pop(); analyse(str.substr(begin,i-begin+1)); } } i++; } }
|
阅读(534) | 评论(1) | 转发(0) |