Chinaunix首页 | 论坛 | 博客
  • 博客访问: 498043
  • 博文数量: 59
  • 博客积分: 86
  • 博客等级: 民兵
  • 技术积分: 2141
  • 用 户 组: 普通用户
  • 注册时间: 2012-07-30 20:57
个人简介

为今天而努力的人很平凡,为昨天而努力的人叫失败,只有为美好明天而战斗不止才叫精彩!

文章分类

全部博文(59)

文章存档

2015年(1)

2014年(3)

2013年(53)

2012年(2)

我的朋友

分类: LINUX

2013-07-23 17:40:04


//通过Wget来获取网页

  1.  
  2. string GetHtmlByWget(string url)
  3. {
  4.     //获取待下载网页文件名
  5.     string fileName = url.substr((int)url.find_last_of("/") + 1);
  6.     if(fileName != "")
  7.     {
  8.         string strCom = "wget -q "; //wget命令,-q表示不显示下载信息
  9.         strCom.append(url);
  10.         system(strCom.c_str()); //执行wget
  11.   
  12.         ifstream fin(fileName.c_str());
  13.         if(!fin)
  14.         {
  15.             return "";
  16.         }
  17.         string strHtml = "";
  18.         char chTemp[1024] = "";
  19.         //读取网页文件到内存中
  20.         while(fin.getline(chTemp , 1024))
  21.         {
  22.             strHtml.append(string(chTemp));
  23.             strcpy(chTemp , "");
  24.         }
  25.         fin.close();
  26.         strCom = "rm -f "; //删除文件命令,-f表示直接删除不做任何提示
  27.         strCom.append(fileName);
  28.         system(strCom.c_str()); //删除刚才下载下来的文件
  29.         return strHtml; //返回网页源码
  30.     }
  31.     else
  32.     {
  33.         return "";
  34.     }
  35. }

//通过GET获取网页源码

  1.  
  2. string GetHtmlByGet(string url)
  3. {
  4.     string strHtmlContent = "";
  5.     int sockfd;
  6.     struct sockaddr_in addr;
  7.     struct hostent *pURL;
  8.     char text[RECVBUF];
  9.   
  10.     //分析链接
  11.     UrlInfo urlInfo = ParseURL(url);
  12.     string sAccept = "Accept: */*\r\nAccept-Language: zh-cn\r\nAccept-Encoding: gzip, deflate";
  13.     //不同的主机UserAgent不同
  14.     string sUserAgent = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10";
  15.     //将端口转换为字符串
  16.     char t[6];
  17.     string strPort;
  18.     sprintf(t,"%d", urlInfo.Port);
  19.     strPort = t;
  20.     //构造发送字符串
  21.     string strRequest = "";
  22.     strRequest.append("GET ");
  23.     strRequest.append(urlInfo.File);
  24.     strRequest.append("?");
  25.     strRequest.append(urlInfo.Body);
  26.     strRequest.append(" HTTP/1.1\r\n");
  27.     strRequest.append(sAccept);
  28.     strRequest.append("\r\nUser-Agent:");
  29.     strRequest.append(sUserAgent);
  30.     strRequest.append("\r\nHost:");
  31.     strRequest.append(urlInfo.Host);
  32.     strRequest.append(":");
  33.     strRequest.append(strPort);
  34.     strRequest.append("\r\nConnection: Keep-Alive\r\n\r\n");
  35.   
  36.     char* host = const_cast<char*>(urlInfo.Host.c_str());
  37.     sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); //TCP方式发送
  38.     pURL = gethostbyname(host);
  39.     addr.sin_family = AF_INET;
  40.     addr.sin_addr.s_addr = *((unsigned long*)pURL->h_addr);
  41.     addr.sin_port = htons(80);
  42.   
  43.     //连接
  44.     connect(sockfd,(struct sockaddr *)&addr,sizeof(addr));
  45.     //发送
  46.     send(sockfd, const_cast<char*>(strRequest.c_str()), strRequest.length(), 0);
  47.     //接受
  48.     while(recv(sockfd, text, RECVBUF, 0) > 0)
  49.     {
  50.         strHtmlContent.append(text);
  51.         bzero(text,RECVBUF);
  52.     }
  53.     //关闭socket
  54.     close(sockfd);
  55.     //返回接受结果
  56.     return strHtmlContent;
  57. }


使用libcurl 

  1. #include <stdio.h>
  2.  #include <string.h>
  3.  #include <curl/curl.h>
  4.   
  5.  #define MAX_BUF 65536
  6.   
  7.  char wr_buf[MAX_BUF+1];
  8.  int wr_index;
  9.   
  10.  /*
  11.  * Write data callback function (called within the context of
  12.  * curl_easy_perform.
  13.  */
  14.  size_t write_data( void *buffer, size_t size, size_t nmemb, void *userp )
  15.  {
  16.   int segsize = size * nmemb;
  17.   
  18.   /* Check to see if this data exceeds the size of our buffer. If so,
  19.    * set the user-defined context value and return 0 to indicate a
  20.    * problem to curl.
  21.    */
  22.   if ( wr_index + segsize > MAX_BUF ) {
  23.     *(int *)userp = 1;
  24.     return 0;
  25.   }
  26.   
  27.   /* Copy the data from the curl buffer into our buffer */
  28.   memcpy( (void *)&wr_buf[wr_index], buffer, (size_t)segsize );
  29.   
  30.   /* Update the write index */
  31.   wr_index += segsize;
  32.   
  33.   /* Null terminate the buffer */
  34.   wr_buf[wr_index] = 0;
  35.   
  36.   /* Return the number of bytes received, indicating to curl that all is okay */
  37.   return segsize;
  38.  }
  39.   
  40.   
  41.  /*
  42.  * Simple curl application to read the index.html file from a Web site.
  43.  */
  44.  int main( void )
  45.  {
  46.   CURL *curl;
  47.   CURLcode ret;
  48.   int wr_error;
  49.   
  50.   wr_error = 0;
  51.   wr_index = 0;
  52.   
  53.   /* First step, init curl */
  54.   curl = curl_easy_init();
  55.   if (!curl) {
  56.     printf("couldn't init curl\n");
  57.     return 0;
  58.   }
  59.   
  60.   /* Tell curl the URL of the file we're going to retrieve */
  61.   curl_easy_setopt( curl, CURLOPT_URL, "" );
  62.   
  63.   /* Tell curl that we'll receive data to the function write_data, and
  64.    * also provide it with a context pointer for our error return.
  65.    */
  66.   curl_easy_setopt( curl, CURLOPT_WRITEDATA, (void *)&wr_error );
  67.   curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, write_data );
  68.   
  69.   /* Allow curl to perform the action */
  70.   ret = curl_easy_perform( curl );
  71.   
  72.   printf( "ret = %d (write_error = %d)\n", ret, wr_error );
  73.   
  74.   /* Emit the page if curl indicates that no errors occurred */
  75.   if ( ret == 0 ) printf( "%s\n", wr_buf );
  76.   
  77.   curl_easy_cleanup( curl );
  78.   
  79.   return 0;
  80.  }
  81. http:
















阅读(4678) | 评论(0) | 转发(0) |
0

上一篇:vim 缩进设置

下一篇:Perl 回忆录

给主人留下些什么吧!~~