Chinaunix首页 | 论坛 | 博客
  • 博客访问: 577617
  • 博文数量: 158
  • 博客积分: 4380
  • 博客等级: 上校
  • 技术积分: 2356
  • 用 户 组: 普通用户
  • 注册时间: 2006-09-21 10:45
文章分类

全部博文(158)

文章存档

2012年(158)

我的朋友

分类: C/C++

2012-11-16 10:41:22

这是一个post数据编码和解码的函数,为了找到此编码规则,耗费了我大量时间,感叹书上对编码规则的胡说八道,也感谢乾坤一笑在工作中给我的帮助。下一步将对URL编码和解码进行研究。

#pragma warning( disable: 4786 )
#include
#include
#include
#include
using namespace std;

// 编码
const string post_encode( const string::const_iterator& begin, const string::const_iterator& end )
{
    string ret;
    ret.reserve( distance(begin,end)*3 );
    for( string::const_iterator p=begin; p!=end; ++p )
    {
        if( *p == '+' )
            ret += "%2B";
        else if( *p == ' ' )
            ret += '+';
        else if( *p == '%' )
            ret += "%25";
        else if( *p == '=' )
            ret += "%3D";
        else if( *p == '&' )
            ret += "%26";
        else if( *p<33 || *p>127 )
        {
            char tmp = 0;
            ret += '%';
            tmp = ((*p>>4)&0x0F) + '0';  if( tmp > '9' ) tmp += 7u;  ret += tmp;
            tmp = ( *p    &0x0F) + '0';  if( tmp > '9' ) tmp += 7u;  ret += tmp;
        }
        else
            ret += *p;
    }

    return ret;
}
const string post_encode( const string& scr )
{
    return post_encode( scr.begin(), scr.end() );
}

// 解码
const pair post_decode( const string::const_iterator& begin, const string::const_iterator& end )
{
    pair ret;
    ret.first = false;
    string& rs = ret.second;
    rs.reserve( distance(begin,end) );

    for( string::const_iterator p=begin; p!=end; ++p )
    {
        // 为了速度,这里忽略合法性验证
        if( *p == '+' )
            rs += ' ';
        else if( *p == '%' )
        {
            ++p;  if( p == end ) return ret;  char c1 = *p - '0'; if( c1 > 9 ) c1 -= 7;
            ++p;  if( p == end ) return ret;  char c2 = *p - '0'; if( c2 > 9 ) c2 -= 7;
            rs += ( (c1<<4) | c2 );
        }
        else
            rs += *p;
    }
    ret.first = true;
    return ret;
}
const pair post_decode( const string& scr )
{
    return post_decode( scr.begin(), scr.end() );
}

// 对整个post进行解码
const pair > post_decodes( const string::const_iterator& begin, const string::const_iterator& end )
{
    pair > ret;
    ret.first = false;
    map& hs = ret.second;

    string::const_iterator p=begin,p1=begin,p2=begin;
    for( ; p!=end; ++p )
    {
        if( *p == '=' )
        {
            p2 = p;
        }
        else if( *p == '&' )
        {
            if( p1 >= p2 ) // error
                return ret;
            const pair _key = post_decode( p1, p2 );
            if( !_key.first || _key.second.empty() ) return ret;
            ++p2;
            const pair _val = post_decode( p2, p );
            if( !_val.first ) return ret;
            p1 = p;
            ++p1;

            hs[ _key.second ] = _val.second;
        }
    }
    if( p2 != begin )
    {
        if( p1 >= p2 ) // error
                return ret;
        const pair _key = post_decode( p1, p2 );
        if( !_key.first || _key.second.empty() ) return ret;
        ++p2;
        const pair _val = post_decode( p2, p );
        if( !_val.first ) return ret;

        hs[ _key.second ] = _val.second;
    }
    ret.first = true;
    return ret;
}
const pair > post_decodes( const string& scr )
{
    return post_decodes( scr.begin(), scr.end() );
}

// 测试
int main( void )
{
    string str( 255, ' ' );
    for( string::value_type i=0; i    string en = post_encode( str );
    const pair ret = post_decode( en );
    cout << boolalpha << ( ret.first && str==ret.second ) << endl;

    string str2 = "a1=b1&a2=b2&a3=b3&a4=b4%2B";
    const pair > params = post_decodes( str2.begin(), str2.end() );
    cout << boolalpha << params.first << endl;
    for( map::const_iterator itor=params.second.begin(); itor!=params.second.end(); ++itor )
    {
        cout << itor->first << '\t' << itor->second << '\n';
    }

    system( "PAUSE" );
    return 0;
}
阅读(907) | 评论(4) | 转发(0) |
给主人留下些什么吧!~~

网友评论2012-11-16 10:43:25

恐怖分子
不懂ascii?
只有空格转成+
其余都是 转成%HH HH就是 ascii 值

网友评论2012-11-16 10:43:05

无名
URL编码可以参考RFC文档

网友评论2012-11-16 10:42:49

小熊
我想请问一下,用POST方法提交的数据都是采用的url编码吗,我在进行解码的时候,对于中文的处理出现了问题,想请问您一下如果对中文进行处理呢.谢谢.

网友评论2012-11-16 10:42:09

七猫的垃圾箱
UrlEscape Function

--------------------------------------------------------------------------------

Converts characters in a URL that might be altered during transport across the Internet ("unsafe" characters) into their corresponding escape sequences.

Syntax

HRESULT UrlEscape(          LPCTSTR pszURL,
    LPTSTR pszEscaped,
    LPDWORD pcchEs