Chinaunix首页 | 论坛 | 博客
  • 博客访问: 372412
  • 博文数量: 64
  • 博客积分: 2975
  • 博客等级: 少校
  • 技术积分: 831
  • 用 户 组: 普通用户
  • 注册时间: 2007-01-14 10:59
文章存档

2014年(2)

2012年(7)

2010年(40)

2009年(5)

2008年(8)

2007年(2)

分类: WINDOWS

2010-05-21 17:50:55

ASPack压缩算法使用aPLib库,下载地址是

解压缩的代码树中examples\c\appack.c例程演示如何使用该库执行压缩和解压缩,该程序很简单,我们就从它开始分析吧。
C:\Documents and Settings\Administrator\My Documents\下载\aPLib-1.01\examples\c>
appack c 1.doc p1.doc
===============================================================================
aPLib example                   Copyright (c) 1998-2009 by Joergen Ibsen / Jibz
                                                            All Rights Reserved

                                                 
===============================================================================

compressed 110339 -> 36923 bytes (33%) in 0.05 seconds

这里显示压缩率是33%,
注意有可能压缩率会大于100%,文件大小不减反增,比较奇怪
对于已经压缩过的文件测试如下
C:\Documents and Settings\Administrator\My Documents\下载\aPLib-1.01\examples\c>
appack c 1.exe p1.exe
===============================================================================
aPLib example                   Copyright (c) 1998-2009 by Joergen Ibsen / Jibz
                                                            All Rights Reserved

                                                 
===============================================================================

compressed 1545096 -> 1663141 bytes (107%) in 0.56 seconds




/*
 * Show program syntax.
 */
void show_syntax(void)
{
    printf("syntax:\n\n"
           "   compress    :  appack c \n"
           "   decompress  :  appack d \n\n");
}
/*
 * Main.
 */
int main(int argc, char *argv[])
{
    /* show banner */
    printf("===============================================================================\n"
           "aPLib example                   Copyright (c) 1998-2009 by Joergen Ibsen / Jibz\n"
           "                                                            All Rights Reserved\n\n"
           "                                                  \n"
           "===============================================================================\n\n");

    /* check number of arguments */
    if (argc != 4)//参数个数必须为4
    {
        show_syntax();
        return 1;
    }

#ifdef __WATCOMC__
    /* OpenWatcom 1.2 line buffers stdout, so we unbuffer stdout manually
       to make the progress indication in the callback work.
    */
    setbuf(stdout, NULL);
#endif

    /* check first character of first argument to determine action */
    if (argv[1][0] && argv[1][1] == '\0')
    {
        switch (argv[1][0])
        {
        /* compress file */
        case 'c':
        case 'C': return compress_file(argv[2], argv[3]);

        /* decompress file */
        case 'd':
        case 'D': return decompress_file(argv[2], argv[3]);
        }
    }

    /* show program syntax */
    show_syntax();
    return 1;
}

先看压缩算法
/*
 * Unsigned char type.
 */
typedef unsigned char byte;

/*
 * Compute ratio between two numbers.
 */
unsigned int ratio(unsigned int x, unsigned int y)
{
    if (x <= UINT_MAX / 100) x *= 100; else y /= 100;

    if (y == 0) y = 1;

    return x / y;
}

/*
 * Compression callback.
 */
int CB_CALLCONV callback(unsigned int insize, unsigned int inpos, unsigned int outpos, void *cbparam)
{
   printf("\rcompressed %u -> %u bytes (%u%% done)", inpos, outpos, ratio(inpos, insize));//在原行显示压缩进度

   return 1;
}

/*
 * Compress a file.
 */
int compress_file(const char *oldname, const char *packedname)
{
    FILE *oldfile;
    FILE *packedfile;
    unsigned int insize, outsize;
    clock_t clocks;
    byte *data, *packed, *workmem;

    /* open input file */
    if ((oldfile = fopen(oldname, "rb")) == NULL)
    {
        printf("\nERR: unable to open input file\n");
        return 1;
    }

    /* get size of input file 取得输入文件大小,使用stat不是更好,还是因为考虑到兼容性?*/
    fseek(oldfile, 0, SEEK_END);
    insize = (unsigned int) ftell(oldfile);
    fseek(oldfile, 0, SEEK_SET);

    /* allocate memory 分配内存*/
    if ((data    = (byte *) malloc(insize))                     == NULL ||
        (packed  = (byte *) malloc(aP_max_packed_size(insize))) == NULL ||
        (workmem = (byte *) malloc(aP_workmem_size(insize)))    == NULL)
    {
        printf("\nERR: not enough memory\n");
        return 1;
    }

    if (fread(data, 1, insize, oldfile) != insize)//全部读入输入文件
    {
        printf("\nERR: error reading from input file\n");
        return 1;
    }

    clocks = clock();//计时

    /* compress data block 压缩数据 */
    outsize = aPsafe_pack(data, packed, insize, workmem, callback, NULL);

    clocks = clock() - clocks;

    /* check for compression error */
    if (outsize == APLIB_ERROR)
    {
        printf("\nERR: an error occured while compressing\n");
        return 1;
    }

    /* create output file */
    if ((packedfile = fopen(packedname, "wb")) == NULL)
    {
        printf("\nERR: unable to create output file\n");
        return 1;
    }

    fwrite(packed, 1, outsize, packedfile);//写入输出文件

    /* show result */
    printf("\rcompressed %u -> %u bytes (%u%%) in %.2f seconds\n",
           insize, outsize, ratio(outsize, insize),
           (double)clocks / (double)CLOCKS_PER_SEC);//在原行显示报告

    /* close files */
    fclose(packedfile);
    fclose(oldfile);

    /* free memory */
    free(workmem);
    free(packed);
    free(data);

    return 0;
}

逻辑很清晰.回调函数用于显示压缩进度,\r代表回车,即在原行显示。
回调函数原型是
int CB_CALLCONV callback(unsigned int insize, unsigned int inpos, unsigned int outpos, void *cbparam)
主要参数是源文件大小,当前输入位置,当前输出位置.


这里用到的三个函数
aP_max_packed_size
aP_workmem_size
aPsafe_pack
都是aPLib中的.

先看aP_max_packed_size的定义,这个函数没有提供源码,只能用IDA反汇编
aPLib-1.01\lib\dll\aplib.dll
来看
; __stdcall aP_max_packed_size(x)
public _aP_max_packed_size@4
_aP_max_packed_size@4 proc near

arg_0= dword ptr  4

mov     eax, [esp+arg_0] ; _aP_max_packed_size
mov     ecx, eax
shr     ecx, 3
lea     eax, [ecx+eax+40h]
retn    4
_aP_max_packed_size@4 endp


这个函数很简单,返回insize>>3+insize+40h

aP_workmem_size

; __stdcall aP_workmem_size(x)
public _aP_workmem_size@4
_aP_workmem_size@4 proc near
mov     eax, 0A0000h    ; _aP_workmem_size
retn    4
_aP_workmem_size@4 endp

直接返回0a0000h.


aPsafe_pack倒是提供了汇编源码,在aPLib-1.01\src\32bit\spack.asm中

; header format:
;压缩文件头部格式
;  offs  size    data
; --------------------------------------
;    0   dword   tag ('AP32')
;    4   dword   header_size (24 bytes)
;    8   dword   packed_size
;   12   dword   packed_crc
;   16   dword   orig_size
;   20   dword   orig_crc

format MS COFF

public aPsafe_pack as '_aPsafe_pack'

extrn '_aP_pack' as aP_pack
extrn '_aP_crc32' as aP_crc32

; =============================================================

section '.text' code readable executable

aPsafe_pack:
    ; aPsafe_pack(const void *source,
    ;             void *destination,
    ;             unsigned int length,
    ;             void *workmem,
    ;             int (*callback)(unsigned int, unsigned int, void *),
    ;             void *cbparam)

    .ret$  equ 7*4
    .src$  equ 8*4 + 4
    .dst$  equ 8*4 + 8
    .len$  equ 8*4 + 12
    .wmem$ equ 8*4 + 16
    .cb$   equ 8*4 + 20
    .cbp$  equ 8*4 + 24
/*
按顺序EAX、ECX、EDX、EBX、ESP、EBP、ESI 及 EDI,然後將堆疊指標值減去三十二
可见
.ret$ equ 7*4指向压入的eax处
8*4 指向返回地址
.src$  equ 8*4 + 4 指向inbuffer
等等
*/
    pushad

    mov    ebp, esp

    mov    esi, [ebp + .src$] ; esi -> inbuffer
    mov    edi, [ebp + .dst$] ; edi -> outbuffer
    mov    ecx, [ebp + .len$] ; ecx =  length

    or     eax, -1            ; eax = -1,错误值

    test   esi, esi           ; check parameters
    jz     .return_eax        ;
    test   edi, edi           ;
    jz     .return_eax        ;
    test   ecx, ecx           ;
    jz     .return_eax        ;

    mov    ebx, 032335041h  ;压缩文件头部magic,'AP32'
    mov    [edi], ebx         ; set header.tag
    mov    ebx, 24            ;头部大小
    mov    [edi + 4], ebx     ; set header.header_size

    add    ebx, edi           ; ebx -> destination for packed data,ebx指向头部后

    mov    [edi + 16], ecx    ; set header.orig_size,原始文件大小

    push   ecx
    push   esi
    call   aP_crc32 ;调用aP_crc32,计算原始文件crc32值
    add    esp, 8 ;平栈

    mov    [edi + 20], eax    ; set header.orig_crc,原始crc

    push   dword [ebp + .cbp$] ; callback param
    push   dword [ebp + .cb$] ; callback
    push   dword [ebp + .wmem$] ; workmem
    push   ecx                ; length
    push   ebx                ; destination
    push   esi                ; source
    call   aP_pack             ;调用aP_pack,执行压缩
    add    esp, 24             ;平栈

    cmp    eax, -1           ;返回值为错
    je     .return_eax

    mov    [edi + 8], eax     ; set header.packed_size,压缩后的大小

    mov    edx, eax           ; edx = packed size

    push   eax
    push   ebx
    call   aP_crc32 ;压缩后的crc32
    add    esp, 8

    mov    [edi + 12], eax    ; set header.packed_crc

    lea    eax, [edx + 24]    ; eax = packed size + header size ,压缩文件大小,包括头

  .return_eax:
    mov    [esp + .ret$], eax ; return unpacked length in eax存入栈上eax处,popad会弹到eax寄存器中

    popad

    ret

到这里又涉及到两个函数aP_crc32和aP_pack

ap_crc32提供了汇编源码,aP_pack没有提供源码,需要反汇编阅读

aPLib-1.01\src\32bit\crc32.asm

public aP_crc32 as '_aP_crc32'

; =============================================================

macro docrcM
{
    mov    ebx, 0x000000ff
    and    ebx, eax    ;取eax的最低字节,即al
    shr    eax, 8   ;eax右移8位
    xor    eax, [edi+ebx*4] ;用al索引crc_tab取双字,和eax异或
}

macro docrcbyteM
{
    xor    al, [esi]  ;取一字节,和al异或
    inc    esi
    docrcM ;进行一轮计算
}

macro docrcdwordM
{
    xor    eax, [esi],取一个双字
    add    esi, 4
    docrcM ;进行四轮计算
    docrcM
    docrcM
    docrcM
}

; =============================================================

section '.text' code readable executable

aP_crc32:
    ; aP_crc32(const void *source, unsigned int length)

    .ret$  equ 7*4
    .src$  equ 8*4 + 4
    .len$  equ 8*4 + 8

    pushad

    mov    esi, [esp + .src$] ; esi -> buffer
    mov    ecx, [esp + .len$] ; ecx =  length
    mov    edi, aP_crctab     ; edi -> crctab crc表,有255个dword

    sub    eax, eax ; eax清0

    test   esi, esi ;无效,返回0
    jz     .c_exit

    sub    eax, 1 ; eax==-1

    test   ecx, ecx  ;长度为0,返回-1
    jz     .c_done

  .c_align_loop:
    test   esi, 3 ;是否对其到4字节边界
    jz     .c_aligned_now ;esi已经对齐到4字节边界
    docrcbyteM ;否则先处理不对齐的部分
    dec    ecx ;已经处理1字节
    jnz    .c_align_loop ;是否还有未处理的字节

  .c_aligned_now:
    mov    edx, ecx
    and    edx, 7 ;长度对8的余数
    shr    ecx, 3 ;长度对8的商
    jz     .c_LT_eight ;商为0,处理余数部分

  .c_next_eight:
    docrcdwordM;计算一个双字
    docrcdwordM;计算下一个双字
    dec    ecx
    jnz    .c_next_eight

  .c_LT_eight:
    mov    ecx, edx ;余数部分
    test   ecx, ecx
    jz     .c_done ;余数部分处理完

  .c_last_loop:;余数部分未处理完
    docrcbyteM
    dec    ecx
    jnz    .c_last_loop

  .c_done:
    not    eax ;取反

  .c_exit:
    mov    [esp + .ret$], eax ; return crc32 in eax

    popad

    ret

; =============================================================

section '.rdata' data readable

aP_crctab  dd 000000000h, 077073096h, 0ee0e612ch, 0990951bah, 0076dc419h
           dd 0706af48fh, 0e963a535h, 09e6495a3h, 00edb8832h, 079dcb8a4h
           dd 0e0d5e91eh, 097d2d988h, 009b64c2bh, 07eb17cbdh, 0e7b82d07h
           dd 090bf1d91h, 01db71064h, 06ab020f2h, 0f3b97148h, 084be41deh
           dd 01adad47dh, 06ddde4ebh, 0f4d4b551h, 083d385c7h, 0136c9856h
           dd 0646ba8c0h, 0fd62f97ah, 08a65c9ech, 014015c4fh, 063066cd9h
           dd 0fa0f3d63h, 08d080df5h, 03b6e20c8h, 04c69105eh, 0d56041e4h
           dd 0a2677172h, 03c03e4d1h, 04b04d447h, 0d20d85fdh, 0a50ab56bh
           dd 035b5a8fah, 042b2986ch, 0dbbbc9d6h, 0acbcf940h, 032d86ce3h
           dd 045df5c75h, 0dcd60dcfh, 0abd13d59h, 026d930ach, 051de003ah
           dd 0c8d75180h, 0bfd06116h, 021b4f4b5h, 056b3c423h, 0cfba9599h
           dd 0b8bda50fh, 02802b89eh, 05f058808h, 0c60cd9b2h, 0b10be924h
           dd 02f6f7c87h, 058684c11h, 0c1611dabh, 0b6662d3dh, 076dc4190h
           dd 001db7106h, 098d220bch, 0efd5102ah, 071b18589h, 006b6b51fh
           dd 09fbfe4a5h, 0e8b8d433h, 07807c9a2h, 00f00f934h, 09609a88eh
           dd 0e10e9818h, 07f6a0dbbh, 0086d3d2dh, 091646c97h, 0e6635c01h
           dd 06b6b51f4h, 01c6c6162h, 0856530d8h, 0f262004eh, 06c0695edh
           dd 01b01a57bh, 08208f4c1h, 0f50fc457h, 065b0d9c6h, 012b7e950h
           dd 08bbeb8eah, 0fcb9887ch, 062dd1ddfh, 015da2d49h, 08cd37cf3h
           dd 0fbd44c65h, 04db26158h, 03ab551ceh, 0a3bc0074h, 0d4bb30e2h
           dd 04adfa541h, 03dd895d7h, 0a4d1c46dh, 0d3d6f4fbh, 04369e96ah
           dd 0346ed9fch, 0ad678846h, 0da60b8d0h, 044042d73h, 033031de5h
           dd 0aa0a4c5fh, 0dd0d7cc9h, 05005713ch, 0270241aah, 0be0b1010h
           dd 0c90c2086h, 05768b525h, 0206f85b3h, 0b966d409h, 0ce61e49fh
           dd 05edef90eh, 029d9c998h, 0b0d09822h, 0c7d7a8b4h, 059b33d17h
           dd 02eb40d81h, 0b7bd5c3bh, 0c0ba6cadh, 0edb88320h, 09abfb3b6h
           dd 003b6e20ch, 074b1d29ah, 0ead54739h, 09dd277afh, 004db2615h
           dd 073dc1683h, 0e3630b12h, 094643b84h, 00d6d6a3eh, 07a6a5aa8h
           dd 0e40ecf0bh, 09309ff9dh, 00a00ae27h, 07d079eb1h, 0f00f9344h
           dd 08708a3d2h, 01e01f268h, 06906c2feh, 0f762575dh, 0806567cbh
           dd 0196c3671h, 06e6b06e7h, 0fed41b76h, 089d32be0h, 010da7a5ah
           dd 067dd4acch, 0f9b9df6fh, 08ebeeff9h, 017b7be43h, 060b08ed5h
           dd 0d6d6a3e8h, 0a1d1937eh, 038d8c2c4h, 04fdff252h, 0d1bb67f1h
           dd 0a6bc5767h, 03fb506ddh, 048b2364bh, 0d80d2bdah, 0af0a1b4ch
           dd 036034af6h, 041047a60h, 0df60efc3h, 0a867df55h, 0316e8eefh
           dd 04669be79h, 0cb61b38ch, 0bc66831ah, 0256fd2a0h, 05268e236h
           dd 0cc0c7795h, 0bb0b4703h, 0220216b9h, 05505262fh, 0c5ba3bbeh
           dd 0b2bd0b28h, 02bb45a92h, 05cb36a04h, 0c2d7ffa7h, 0b5d0cf31h
           dd 02cd99e8bh, 05bdeae1dh, 09b64c2b0h, 0ec63f226h, 0756aa39ch
           dd 0026d930ah, 09c0906a9h, 0eb0e363fh, 072076785h, 005005713h
           dd 095bf4a82h, 0e2b87a14h, 07bb12baeh, 00cb61b38h, 092d28e9bh
           dd 0e5d5be0dh, 07cdcefb7h, 00bdbdf21h, 086d3d2d4h, 0f1d4e242h
           dd 068ddb3f8h, 01fda836eh, 081be16cdh, 0f6b9265bh, 06fb077e1h
           dd 018b74777h, 088085ae6h, 0ff0f6a70h, 066063bcah, 011010b5ch
           dd 08f659effh, 0f862ae69h, 0616bffd3h, 0166ccf45h, 0a00ae278h
           dd 0d70dd2eeh, 04e048354h, 03903b3c2h, 0a7672661h, 0d06016f7h
           dd 04969474dh, 03e6e77dbh, 0aed16a4ah, 0d9d65adch, 040df0b66h
           dd 037d83bf0h, 0a9bcae53h, 0debb9ec5h, 047b2cf7fh, 030b5ffe9h
           dd 0bdbdf21ch, 0cabac28ah, 053b39330h, 024b4a3a6h, 0bad03605h
           dd 0cdd70693h, 054de5729h, 023d967bfh, 0b3667a2eh, 0c4614ab8h
           dd 05d681b02h, 02a6f2b94h, 0b40bbe37h, 0c30c8ea1h, 05a05df1bh
           dd 02d02ef8dh

因此整个算法很简单,就是进行length轮计算,伪代码如下
unsigned int aP_crc32(unsigned char * src,unsigned int lenght)
{
  unsigned int a=-1;
  unsigned char * pa=&a;
  unsigned int b;
  for(int i=0;i  {
    pa[0]^=src[0];
    b=crc_tab[pa[0]];
    a=a>>8;
    a^=b;
    src++;
  }
  return ~a;
}

aP_pack下一篇再分析

阅读(1937) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~