Chinaunix首页 | 论坛 | 博客
  • 博客访问: 456591
  • 博文数量: 56
  • 博客积分: 517
  • 博客等级: 下士
  • 技术积分: 751
  • 用 户 组: 普通用户
  • 注册时间: 2010-11-12 18:16
文章分类

全部博文(56)

文章存档

2015年(2)

2014年(6)

2013年(29)

2012年(17)

2011年(2)

分类: C/C++

2012-11-29 15:09:20

一个简单的http获取网页的程序,只实现了简单的获取网页功能,还没有处理重定向,后面逐渐完善

/*
 *A simple http client download software
 *For learning http protocol 
 *writen by linux-person
 *v1.0
 */
#include
#include
#include
#include
#include
#include
#include
#include

#define MAXSIZE 1024

char *pos_html(const char *src, int len)
{
return strstr(src, "");
}

int parse_url(const char *url, int len, char *host, int hsize, char *port, int ptsize, char **page, int *pgsize)
{
#define HOST 0
#define PORT 1
#define PAGE 2
int type = HOST;
char *p, *port_init, ch;
port_init = port;
if ((p = strstr(url, "www."))) {
} else if ((p = strstr(url, "://"))) {
p += strlen("://");
} else
p = (char *)url;

while ((ch = *p)) {
if (':' == ch) {
type = PORT;
} else if ('/' == ch && (HOST == type || PORT == type)) {
type = PAGE;
}

if (HOST == type) {
*host = ch;
host++;
} else if (PORT == type) {
*port = ch;
port++;
} else if (PAGE == type) {
break;
}
p++;
}

/*url中无端口号给默认端口*/
if (port && port_init == port)
strcpy(port, "80");
if (*p == '\0')
*page = NULL;
else
*page = p;
*pgsize = len - (p - url);
return 0;
}

int main(int argc, char **argv)
{
/*flag:all response data*/
int flag = 0, n, fd, sockfd;
int pgsize, sndlen;
char recvbuf[MAXSIZE];
char *p, sndmsg[1024];
char host[512];
char port[8];
char *page;
struct in_addr **pptr;
struct hostent *hp;
struct servent *sp;
struct sockaddr_in srvaddr;
bzero(host, sizeof(host));
bzero(port, sizeof(port));
if (argc < 2) {
printf("Usage:%s url\n", argv[0]);
return 0;
}

parse_url(argv[1], strlen(argv[1]), host, sizeof(host), port, sizeof(port), &page, &pgsize);
if (-1 == (fd = open((page && strchr(page, '.')) ? page : "index.html", O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) {
printf("open file failed\n");
exit(EXIT_FAILURE);
}

sockfd = socket(AF_INET, SOCK_STREAM, 0);
if ((hp = gethostbyname(host)) == NULL) {
printf("gethostbyname error\n");
exit(EXIT_FAILURE);
} else
pptr = (struct in_addr **)hp->h_addr_list;
if ((sp = getservbyname("http", "tcp")) == NULL) {
printf("getservbyname error\n");
exit(EXIT_FAILURE);
}

for (; *pptr != NULL;pptr++) {
bzero(&srvaddr, sizeof(srvaddr));
srvaddr.sin_family = AF_INET;
srvaddr.sin_port   = sp->s_port;
memcpy(&srvaddr.sin_addr, *pptr, sizeof(struct in_addr));

if ((0 == connect(sockfd, (const struct sockaddr *)&srvaddr, sizeof(srvaddr)))) {
printf("connected\n");
break;
}
printf("connect error\n");

}

if (*pptr == NULL) {
printf("Unable to connect\n");
exit(EXIT_FAILURE);
}
snprintf(sndmsg, 1024, "GET %s HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: %s\r\nConnection: close\r\nUser-Agent: HttpClient\r\n\r\n", (page ? page : "/"), host);
//printf("snd:%s\n", sndmsg);
sndlen = strlen(sndmsg);
n = write(sockfd, sndmsg, sndlen);
while ((n = read(sockfd, recvbuf, MAXSIZE)) > 0) {
if (!flag) {
if ((p = pos_html(recvbuf, n))) {
flag = 1;
write(fd, p, n - (p - recvbuf));
} else
write(fd, recvbuf, n);
}
close(fd);
close(sockfd);
}

阅读(2322) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~