Chinaunix首页 | 论坛 | 博客
  • 博客访问: 294483
  • 博文数量: 44
  • 博客积分: 2276
  • 博客等级: 大尉
  • 技术积分: 439
  • 用 户 组: 普通用户
  • 注册时间: 2007-03-01 09:07
文章分类

全部博文(44)

文章存档

2010年(1)

2009年(1)

2008年(30)

2007年(12)

分类:

2008-04-14 21:30:29

    以前想利用Kopete的源码将文件名提取出来,但kopete是用Qt开发的,是基于C++的。而我整个的毕设是基于C的。这样即时提取出来了也不方便使用。而且kopete源码中对应的部分也交复杂,用到了很多库函数和不熟悉的数据类型,这两天Qt的manual我看的也快头大了,也没将其调试成功。于是马上转变了策略,到sourceforge.net上下载了Pidgin的源码,pidgin是Gnome环境下的IM类软件,是用GTK开发的,基于C。其中也用到了一些Glib中的函数,于是也下载了Glib2.0的源码。找到其源码中对应与MSN传输文件的文件名等解码的部分,主要涉及到的文件有pidgin-2.4.1/libpurple/tuil.c(含有purple_base64_decode()函数),glib2.0/glib/gutf8.c中的g_unichar_to_utf8()函数和g_uf16_to_utf8()函数。这些库中都使用了大量的typedef。花了一下午时间改造并且调试成功。我的代码如下:
 
执行后的结果为:
The file name is : "MSN_FILE_TRANSFER_TEST.txt"
The file size is : 89

#include <stdio.h>
#include <stdlib.h>

#define MAX_FILE_NAME_LEN 0x226     
// quoted from


unsigned char *purple_base64_decode(const char *str, unsigned int *ret_len);                 
//from pidgin-2.4.1/libpurple/util.c

int g_unichar_to_utf8 (unsigned int c,char *outbuf);                            
//from glib/gutf8.c

char *g_utf16_to_utf8 (const unsigned short *str,long len,long *items_read,long *items_written);    
//from glib/gutf8.c




int main(){
         
                     
        char *context="fgIAAAMAAABZAAAAAAAAAAEAAABNAFMATgBfAEYASQBMAEUAXwBUAFIAQQBOAFMARgBFAFIAXwBUAEUAUwBUAC4Ad        
        /* File Transfer */
        char *bin;
        unsigned int bin_len;
        unsigned int file_size;
        char *file_name;
        unsigned short *uni_name;

        bin = (char *)purple_base64_decode(context, &bin_len);
        file_size = (unsigned int)(*(unsigned int *)(bin + 8));

        uni_name = (unsigned short *)(bin + 20);
        while(*uni_name != 0 && ((char *)uni_name - (bin + 20)) < MAX_FILE_NAME_LEN) {
                *uni_name = (unsigned short)(*uni_name);
                uni_name++;
        }

        file_name = g_utf16_to_utf8((const unsigned short *)(bin + 20), -1,NULL, NULL);

        printf("The file name is :\"%s\".\n",file_name);
        printf("The file size is :%ld\n",file_size);
                free(file_name);

        return 0;
}

    

unsigned char *purple_base64_decode(const char *str, unsigned int *ret_len)
{
    unsigned char *out = NULL;
    char tmp = 0;
    const char *c;
    signed int tmp2 = 0;
    int len = 0, n = 0;

    
//g_return_val_if_fail(str != NULL, NULL);


    c = str;

    while (*c) {
        if (*c >= 'A' && *c <= 'Z') {
            tmp = *c - 'A';
        } else if (*c >= 'a' && *c <= 'z') {
            tmp = 26 + (*c - 'a');
        } else if (*c >= '0' && *c <= 57) {
            tmp = 52 + (*c - '0');
        } else if (*c == '+') {
            tmp = 62;
        } else if (*c == '/') {
            tmp = 63;
        } else if (*c == '\r' || *c == '\n') {
            c++;
            continue;
        } else if (*c == '=') {
            if (n == 3) {
                out = (unsigned char*)realloc(out, len + 2);
                out[len] = (unsigned char)(tmp2 >> 10) & 0xff;
                len++;
                out[len] = (unsigned char)(tmp2 >> 2) & 0xff;
                len++;
            } else if (n == 2) {
                out = (unsigned char*)realloc(out, len + 1);
                out[len] = (unsigned char)(tmp2 >> 4) & 0xff;
                len++;
            }
            break;
        }
        tmp2 = ((tmp2 << 6) | (tmp & 0xff));
        n++;
        if (n == 4) {
            out = (unsigned char*)realloc(out, len + 3);
            out[len] = (unsigned char)((tmp2 >> 16) & 0xff);
            len++;
            out[len] = (unsigned char)((tmp2 >> 8) & 0xff);
            len++;
            out[len] = (unsigned char)(tmp2 & 0xff);
            len++;
            tmp2 = 0;
            n = 0;
        }
        c++;
    }

    out = realloc(out, len + 1);
    out[len] = 0;

    if (ret_len != NULL)
        *ret_len = len;

    return out;
}
/* unicode_strchr */
/**
 * g_unichar_to_utf8:
 * @c: a ISO10646 character code
 * @outbuf: output buffer, must have at least 6 bytes of space.
 * If %NULL, the length will be computed and returned
 * and nothing will be written to @outbuf.
 *
 * Converts a single character to UTF-8.
 *
 * Return value: number of bytes written
 **/

int g_unichar_to_utf8 (unsigned int c,char *outbuf)
{
  unsigned int len = 0;
  int first;
  int i;

  if (c < 0x80)
    {
      first = 0;
      len = 1;
    }
  else if (c < 0x800)
    {
      first = 0xc0;
      len = 2;
    }
  else if (c < 0x10000)
    {
      first = 0xe0;
      len = 3;
    }
   else if (c < 0x200000)
    {
      first = 0xf0;
      len = 4;
    }
  else if (c < 0x4000000)
    {
      first = 0xf8;
      len = 5;
    }
  else
    {
      first = 0xfc;
      len = 6;
    }

  if (outbuf)
    {
      for (i = len - 1; i > 0; --i)
    {
     outbuf[i] = (c & 0x3f) | 0x80;
     c >>= 6;
    }
      outbuf[0] = c | first;
    }

  return len;
}
/**
 * g_utf16_to_utf8:
 * @str: a UTF-16 encoded string
 * @len: the maximum length of @str to use. If @len < 0, then
 * the string is terminated with a 0 character.
 * @items_read: location to store number of words read, or %NULL.
 * If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
 * returned in case @str contains a trailing partial
 * character. If an error occurs then the index of the
 * invalid input is stored here.
 * @items_written: location to store number of bytes written, or %NULL.
 * The value stored here does not include the trailing
 * 0 byte.
 * @error: location to store the error occuring, or %NULL to ignore
 * errors. Any of the errors in #GConvertError other than
 * %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from UTF-16 to UTF-8. The result will be
 * terminated with a 0 byte.
 *
 * Return value: a pointer to a newly allocated UTF-8 string.
 * This value must be freed with g_free(). If an
 * error occurs, %NULL will be returned and
 * @error set.
 **/

#define SURROGATE_VALUE(h,l) (( - 0xd800) * 0x400 + - 0xdc00 + 0x10000)
#define g_return_val_if_fail (expr,val)
#define UTF8_LENGTH(Char) \
  ((Char) < 0x80 ? 1 : \
   ((Char) < 0x800 ? 2 : \
    ((Char) < 0x10000 ? 3 : \
     ((Char) < 0x200000 ? 4 : \
      ((Char) < 0x4000000 ? 5 : 6)))))
//from glib/gutf8.c

char *g_utf16_to_utf8 (const unsigned short *str,long len,long *items_read,long *items_written)
{
  
/* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
   * are marked.
   */

  const unsigned short *in;
  char *out;
  char *result = NULL;
  int n_bytes;
  unsigned int high_surrogate;

  
//g_return_val_if_fail (str != 0, NULL);


  n_bytes = 0;
  in = str;
  high_surrogate = 0;
  while ((len < 0 || in - str < len) && *in)
    {
      unsigned short c = *in;
      unsigned int wc;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
    {
     if (high_surrogate)
     {
     wc = SURROGATE_VALUE (high_surrogate, c);
     high_surrogate = 0;
     }
     else
     {
     puts("Flag1:Invalid sequence in conversion input.");
     goto err_out;
     }
    }
      else
    {
     if (high_surrogate)
     {
     puts("Flag2:Invalid sequence in conversion input.");
     goto err_out;
     }

     if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
     {
     high_surrogate = c;
     goto next1;
     }
     else
     wc = c;
    }

      /********** DIFFERENT for UTF8/UCS4 **********/
      n_bytes += UTF8_LENGTH (wc);

    next1:
      in++;
    }

  if (high_surrogate && !items_read)
    {
      puts("Flag3artial character sequence at end of input.");
      goto err_out;
    }
  
  
/* At this point, everything is valid, and we just need to convert
   */

  /********** DIFFERENT for UTF8/UCS4 **********/
  result = malloc(n_bytes + 1);
  
  high_surrogate = 0;
  out = result;
  in = str;
  while (out < result + n_bytes)
    {
      unsigned short c = *in;
      unsigned int wc;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
    {
     wc = SURROGATE_VALUE (high_surrogate, c);
     high_surrogate = 0;
    }
      else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
    {
     high_surrogate = c;
     goto next2;
    }
      else
    wc = c;

      /********** DIFFERENT for UTF8/UCS4 **********/
      out += g_unichar_to_utf8 (wc, out);

    next2:
      in++;
    }
  
  /********** DIFFERENT for UTF8/UCS4 **********/
  *out = '\0';

  if (items_written)
    /********** DIFFERENT for UTF8/UCS4 **********/
    *items_written = out - result;

 err_out:
  if (items_read)
    *items_read = in - str;

  return result;
}

阅读(2151) | 评论(2) | 转发(0) |
给主人留下些什么吧!~~

chinaunix网友2009-04-14 14:07:50

咋整的? 好像只能解析英文的? 楼主你试过中文的没有?

chinaunix网友2009-04-14 14:07:37

咋整的? 好像只能解析英文的? 楼主你试过中文的没有?