- 1225 url = alloca_array (char *, nurl + 1);/*alloca有多少个url地址,就分配几个指针*/
- 1226 for (i = 0; i < nurl; i++, optind++)
- 1227 {
- 1228 char *rewritten = rewrite_shorthand_url (argv[optind]);
- 1229 if (rewritten)
- 1230 url[i] = rewritten;
- 1231 else
- 1232 url[i] = xstrdup (argv[optind]);
- 1233 }
- 1234 url[i] = NULL;
- /*这里返回后,url指向的就是实际意义上的 或者 ftp://xxx.xxx.xxx:23端口,这里函数的作用是判断哪一种协议。*/
复制代码- 1237 log_init (opt.lfilename, append_to_log); /*打开设定的日志文件,准备写*/
复制代码- opt.output_document /*有输出打印到的文件*/
复制代码- 1330 for (t = url; *t; t++)
- 1331 {
- 1332 char *filename = NULL, *redirected_URL = NULL;
- 1333 int dt, url_err;
- 1337 struct iri *iri = iri_new (); /*这个数据结构储存检查要获取编码格式,文件编码格式,是否这url是用utf8编码,这里是分配一个新的1、_______--------->*/
- 1338 struct url *url_parsed;
- 1339
- 1340 set_uri_encoding (iri, opt.locale, true);
- 1341 url_parsed = url_parse (*t, &url_err, iri, true); 2、________----------->
-
- 1、________--------------->
- 302 struct iri *
- 303 iri_new (void)
- 304 {
- 305 struct iri *i = xmalloc (sizeof *i); /*分配一个空间*/
- 306 i->uri_encoding = opt.encoding_remote ? xstrdup (opt.encoding_remote) : NULL; /*是否有远程编码格式没有就初始化为NULL*/
- 307 i->content_encoding = NULL; /*内容编码为NULL*/
- 308 i->orig_url = NULL; /*url也是0*/
- 309 i->utf8_encode = opt.enable_iri; /*
- 310 return i;
- 311 }
- <------------_______________
复制代码- 1340 set_uri_encoding (iri, opt.locale, true); /*检查是否被设置编码,如果没有,就有本地编码进行初始化*/
复制代码-
- 2、______________---------->
- [code]
- /*url 解析*/
- 1341 url_parsed = url_parse (*t, &url_err, iri, true); /*t为 url, &url_err (int*类型),iri储存编码格式.bool 常量 ture*/
- 函数作用是解析一个url地址,返回一个新的url地址如果成功。否则就返回NULL或者error。
- 此函数中调用了url_scheme()又一次判断,地址类型,这里略去不分析*/
- 698 if (iri && iri->utf8_encode)
- 699 {
- 700 iri->utf8_encode = remote_to_utf8 (iri,
iri->orig_url ? iri->orig_url : url, (co nst char **)
&new_url); /*new_url 强制转化为 const char **类型
*这个函数作用是:如果发现远处的编码格式不是utf-8格式,那么就将其转化成utf-8格式 *new中保存这个转化后的地址。调用的外部函数/
- 701 if (!iri->utf8_encode)
- 702 new_url = NULL;
- 703 else
- 704 iri->orig_url = xstrdup (url); /*将源地址同化*/
- 705 }
复制代码- src/url.c*/
- 708 if (percent_encode)
- 709 url_encoded = reencode_escapes (new_url ? new_url : url); /*用utf-8格式将url进行格式化*/
- 713 p = url_encoded;
- 718 p += strlen (supported_schemes[scheme].leading_string); /*计算http://的长度*/
- 719 uname_b = p; /*算出真正url的地址,是转化后的*/
- 720 p = url_skip_credentials (p); /*去掉不规则的结束符比如 @ # 等。
- 721 uname_e = p; /*指向结束地址*/
- scheme://host[:port][/path][;params][?query][#fragment]
- 格式
- 731 path_b = path_e = NULL;
- 732 params_b = params_e = NULL;
- 733 query_b = query_e = NULL;
- 734 fragment_b = fragment_e = NULL;
- 739 seps = init_seps (scheme); /*加入分隔字符,比如ftp 有参数,http和https有请求字符和片段 2、__________------------>*/
- 741 host_b = p;
复制代码- 2、__-------------->
- 620 static const char *
- 621 init_seps (enum url_scheme scheme)
- 622 {
- 623 static char seps[8] = ":/"; /*固定的开始是:/字符
- 624 char *p = seps + 2;
- 625 int flags = supported_schemes[scheme].flags; /这里看这个scheme支持的格式*/
- 626
- 627 if (flags & scm_has_params) 判断*/
- 628 *p++ = ';';
- 629 if (flags & scm_has_query)
- 630 *p++ = '?';
- 631 if (flags & scm_has_fragment)
- 632 *p++ = '#';
- 633 *p = '\0';
- 634 return seps; /*返回格式*/
- 635 }
复制代码- <--------------____________
- src/url.c*/
- 741 host_b = p; /*
- 742
- 743 if (*p == '[')
- 744 {
- 745 /* Handle IPv6 address inside square brackets. Ideally we'd
- 746 just look for the terminating ']', but rfc2732 mandates
- 747 rejecting invalid IPv6 addresses. */
- 748
- 749 /* The address begins after '['. */
- 750 host_b = p + 1;
- 751 host_e = strchr (host_b, ']'); /*ipv6格式*/
- 752
- 753 if (!host_e)
- 754 {
- 755 error_code = PE_UNTERMINATED_IPV6_ADDRESS;
- 756 goto error;
- 757 }
- /*不说ipv6先*/
- /*后面的一些判断很简单*/
- 798 port = scheme_default_port (scheme); /*根据scheme的default_prot来初始化port*/
- 799 if (*p == ':') /*来确定端口,并赋给port_b和port_e;
- 800 {
- 801 const char *port_b, *port_e, *pp;
- 802
- 803 /* scheme://host:port/tralala */
- 804 /* ^ */
- 805 ++p;
- 806 port_b = p;
- 807 p = strpbrk_or_eos (p, seps);
- 808 port_e = p;
- /*比较简单*/
- /*获得含有第一个字符串的指针返回,过程中还有赋值操作,这是按照顺序依次来赋值的*/
- 837 #define GET_URL_PART(sepchar, var) do { \
- 838 if (*p == sepchar) \
- 839 var##_b = ++p, var##_e = p = strpbrk_or_eos (p, seps); \
- 840 ++seps; \
- 841 } while (0)
- 843 GET_URL_PART ('/', path);
- 844 if (supported_schemes[scheme].flags & scm_has_params)
- 845 GET_URL_PART (';', params);
- 846 if (supported_schemes[scheme].flags & scm_has_query)
- 866 u = xnew0 (struct url);
- 867 u->scheme = scheme;
- 868 u->host = strdupdelim (host_b, host_e);
- 869 u->port = port;
- 870 u->user = user;
- 871 u->passwd = passwd;
- 873 u->path = strdupdelim (path_b, path_e);
- 874 path_modified = path_simplify (scheme, u->path); /*矫正后的path*/
- 875 split_path (u->path, &u->dir, &u->file); /*将路径和文件分别存储,并且用格式化编码*/
- 876
- 877 host_modified = lowercase_str (u->host); /*将主机名称格式化成小写*/
- /*将url结构对应初始化*/
- 847 GET_URL_PART ('?', query);
- 848 if (supported_schemes[scheme].flags & scm_has_fragment)
- 849 GET_URL_PART ('#', fragment);
- 883 if (strchr (u->host, '%')) /*将主机名也格式化*/
- 884 {
- 885 url_unescape (u->host);
- 886 host_modified = true;
- 887
- 888 /* Apply IDNA regardless of iri->utf8_encode status */
- 889 if (opt.enable_iri && iri)
- 890 {
- 891 char *new = idn_encode (iri, u->host);/*参有IDNAN格式格式化,如果没有采用utf-8格式,那么就用ascii码格式*/
- 892 if (new)
- 893 {
- 894 xfree (u->host);
- 895 u->host = new;
- 896 host_modified = true;
- 897 }
- 898 }
- 899 }
- 剩下就是其他的格式化*/
- 最后是return u;
- }
- 失败的话,当然是打印错误,不分析了*/
阅读(1467) | 评论(0) | 转发(0) |