Chinaunix首页 | 论坛 | 博客
  • 博客访问: 39919
  • 博文数量: 11
  • 博客积分: 530
  • 博客等级: 中士
  • 技术积分: 140
  • 用 户 组: 普通用户
  • 注册时间: 2008-04-22 21:56
文章分类

全部博文(11)

文章存档

2010年(1)

2009年(3)

2008年(7)

我的朋友

分类: C/C++

2008-12-06 00:33:01

本代码实现的是从文件的html代码中提取url信息~~  感觉鲁棒性不是很好一样 有待改进~


#include "stdafx.h"
#include "stdlib.h"
#include <iostream>
#include <fstream>
#include <stack>
using namespace std;
int count = 0;
void getthelink(string str )
{
    int begin = str.find("href");
    int end;
    if(-1 != begin)
    {
        str = str.substr(begin,str.length()-begin);
        begin = str.find('=');
        begin++;
        while (str[begin] == ' ') {
             begin++;
         }
        if(str[begin] == '\"')
        {
            str[begin] = ' ';
            end = str.find("\"");
            cout<<str.substr(begin+1,end-begin-1).c_str()<<endl;
            count++;
        }
        if (str[begin] == '\'') {
            str[begin] = ' ';
            end = str.find("\'");
            cout<<str.substr(begin+1,end-begin-1).c_str()<<endl;
            count++;
        }
    }
}
void analyse(string str )
{
    int i = 1;
     if(str[0] =='<')
     {
         while (str[i] == ' ') {
             i++;
         }
         if(str[i] == 'a' || str[i] == 'A' )
         {
             if (str[++i] == ' ') {
                 getthelink(str);
             }
         }
     }
}
int main(int argc, char* argv[])
{
        ifstream file;
        file.open("d:\\1.txt",ios::in);
        file.seekg(0,ios::beg);
        int begin = file.tellg();
        file.seekg(0, ios::end );
        int theend = file.tellg();
        int length = theend - begin;
        file.seekg(0,ios::beg);
        char* temp = new char[length];
        file.read(temp,length);
        file.close();
        string str(temp);
        int i = 0;
        string ttt="";
        stack<int> st;
        while (i < str.length())
        {
             if(str[i] == '<')
             {
                 st.push(i);
             }

             if(str[i] == '>')
             {
                if(!st.empty())
                {
                    begin = st.top();
                    while(st.size())
                    st.pop();
                    analyse(str.substr(begin,i-begin+1));
                }
             }
             i++;
            
        }
}

阅读(534) | 评论(1) | 转发(0) |
给主人留下些什么吧!~~

chinaunix网友2009-03-14 12:35:27

呵呵,支持你!