程序要求:通过socket函数编程,实现http get 功能。对返回的数据(页面的源代码)进行解析,提取出该界面的超链接,并存放至文件中。
程序如下:
-
#include <stdio.h>
-
#include <sys/socket.h>
-
#include <sys/types.h>
-
#include <time.h>
-
#include <errno.h>
-
#include <signal.h>
-
#include <stdlib.h>
-
#include <string.h>
-
#include <unistd.h>
-
#include <sys/wait.h>
-
#include <sys/time.h>
-
#include <netdb.h>
-
#include <netinet/in.h>
-
#include <arpa/inet.h>
-
#define PORT 80
-
#define BUFSIZE 8184
-
static FILE *frontier;
-
-
-
void parse(char *buf)
-
{
-
//解析所get的存储于buf的信息,从中解析出将要爬的超链接,存储于frontier.txt文件中
-
char *pts=buf,*qts;
-
-
while((pts=strstr(pts,"a href="http:"))&&(qts=strstr(pts+9,"""))) //从buf中查找"a =href"http:"
-
{
-
-
fwrite(pts+15,qts-pts-15,1,frontier);//向frontier.txt文件中写找到的超链接
-
putc('n',frontier);
-
fflush(frontier);
-
pts=qts;
-
}
-
-
}
-
//函数封装
-
int httpget(char *url)
-
{
-
-
FILE *fp;
-
char *host_id;
-
-
struct hostent *host;
-
int sockfd, ret, i, h;
-
struct sockaddr_in servaddr;
-
char str1[4096],buf[8184],*str;
-
socklen_t len;
-
fd_set t_set1;
-
struct timeval tv;
-
-
-
if((host = gethostbyname(url)) == NULL)
-
{
-
-
printf("gethostbyname error");
-
exit(-1);
-
-
}
-
-
host_id = inet_ntoa(*((struct in_addr*)host -> h_addr));
-
printf("ip adress %sn",host_id);
-
-
-
if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0 ) {
-
-
printf("socket error!n");
-
exit(0);
-
}
-
-
bzero(&servaddr, sizeof(servaddr));
-
servaddr.sin_family = AF_INET;
-
servaddr.sin_port = htons(PORT);
-
-
if (inet_pton(AF_INET,host_id,&servaddr.sin_addr) <= 0 ) {
-
-
printf("inet_pton error!n");
-
exit(0);
-
}
-
-
if (connect(sockfd, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0)
-
{
-
printf("connect error!n");
-
exit(0);
-
}
-
printf("connect success n");
-
-
memset(str1, 0, 4096);
-
-
//初始get请求信息
-
strcat(str1, "GET / HTTP/1.0rn");
-
strcat(str1, "Accept: */*rn");
-
strcat(str1, "Accept-Language: zh-CNrn");
-
strcat(str1, "User-Agent: Mozilla/4.0rn");
-
sprintf(str1,"HOST: %srn",url);
-
strcat(str1,"Connection: Keep-Alivern");
-
strcat(str1, "rnrn");
-
printf("%sn",str1);
-
-
ret = send(sockfd,(void *)str1,strlen(str1),0);
-
if (ret < 0) {
-
printf("send error %d,Error message'%s'n",errno, strerror(errno));
-
exit(0);
-
-
}else{
-
-
printf("send success ,total send %d n", ret);
-
}
-
-
-
while(1){
-
-
sleep(2);
-
printf("******n");
-
tv.tv_sec= 0;
-
tv.tv_usec= 0;
-
-
h= 0;
-
-
FD_ZERO(&t_set1);
-
FD_SET(sockfd, &t_set1);
-
printf("--------------->1n");
-
h= select(sockfd +1, &t_set1, NULL, NULL, &tv);
-
-
printf("--------------->2n");
-
-
if (h == 0) continue;
-
if (h < 0) {
-
close(sockfd);
-
printf("some thing read error!n");
-
return -1;
-
-
};
-
-
if (h > 0){
-
memset(buf, 0, 8184);
-
i= recv(sockfd, (void *)buf, 8184,0);
-
printf("i = %dn",i);
-
if (i==0){
-
-
close(sockfd);
-
printf("read message find error,stop!n");
-
return -1;
-
-
}
-
// fwrite(buf,sizeof(char),strlen(buf),fp);
-
// fflush(fp);
-
parse(buf);
-
printf("%sn", buf);
-
-
-
-
}
-
-
}
-
-
close(sockfd);
-
return 0;
-
-
}
-
int main(int argc, char *argv[])
{
FILE *fp;
int res;
char buf[BUFSIZE], *str;
if(argc != 2)
{
fprintf(stderr,"input domain name");
exit(-1);
}
frontier=fopen("frontier.txt","a+");
if(frontier==NULL)
{
printf("open error");
return 1;
}
if((fp = fopen("111.txt","a+")) < 0)
{
printf("fopen error");
exit(-1);
}
if((res = httpget(argv[1])) == 0)
{
printf("httpget success\n");
exit(-1);
}
return 0;
}
阅读(5247) | 评论(0) | 转发(2) |