Chinaunix首页 | 论坛 | 博客
  • 博客访问: 87672
  • 博文数量: 21
  • 博客积分: 1416
  • 博客等级: 上尉
  • 技术积分: 215
  • 用 户 组: 普通用户
  • 注册时间: 2005-09-02 14:45
文章分类

全部博文(21)

文章存档

2010年(2)

2009年(7)

2007年(3)

2006年(2)

2005年(7)

我的朋友

分类: C/C++

2009-12-17 19:44:22

     UTF-8格式文件的前三字节为0xef,0xbb,0xbf;读取的时候要跳过这三字节解码;写入的时候要先写这三字节到文件头。
    

/× utf8wr.h ×/
#ifndef UTF8WR_H
#define UTF8WR_H

#include <stdio.h>
#include <malloc.h>
#include <tchar.h>
#include <windows.h>

size_t utf8_encode(const TCHAR *lpszBuffer,TCHAR *lpszContext);
size_t utf8_decode(const TCHAR *lpszBuffer,TCHAR *lpszContext);

size_t utf8_write(const char *lpszFile,const TCHAR *lpszBuffer);
size_t utf8_read(const char *lpszFile,TCHAR *lpszBuffer);

#endif




/* utf8wr.c */
#include "utf8wr.h"

size_t utf8_encode(const TCHAR *lpszBuffer,TCHAR *lpszContext)
{
    size_t size=0;
    wchar_t *pUnicode=NULL;
    
    if(lpszBuffer==NULL){
        return(size);
    }
    size=MultiByteToWideChar(936,0,lpszBuffer,-1,NULL,0);     
    if(size>0){
        pUnicode=(wchar_t *)malloc(size*sizeof(wchar_t));
        if(pUnicode){
            MultiByteToWideChar(936,0,lpszBuffer,-1,(LPWSTR)pUnicode,size);
            size=WideCharToMultiByte(CP_UTF8,0,(LPWSTR)pUnicode,-1,NULL,0,NULL,NULL);
            if(size>0){
                if(lpszContext){
                    size=WideCharToMultiByte(CP_UTF8,0,(LPWSTR)pUnicode,-1,lpszContext,size,NULL,NULL);
                }
            }
            free(pUnicode);
        }
    }
    return(size);
}

size_t utf8_decode(const TCHAR *lpszBuffer,TCHAR *lpszContext)
{
    wchar_t *pUnicode=NULL;
    size_t size=0;
    
    if(lpszBuffer==NULL){
        return(size);
    }
    size=MultiByteToWideChar(CP_UTF8,0,lpszBuffer,-1,NULL,0);
    if(size>0){
        pUnicode=(wchar_t *)malloc((size+1)*sizeof(wchar_t));
        if(pUnicode){
            MultiByteToWideChar(CP_UTF8,0,lpszBuffer,-1,(LPWSTR)pUnicode,size);
            size=WideCharToMultiByte(936,0,(LPWSTR)pUnicode,-1,NULL,0,NULL,NULL);    
            if(lpszContext){
                size=WideCharToMultiByte(936,0,(LPWSTR)pUnicode,-1,lpszContext,size,NULL,NULL);
            }
            free(pUnicode);
        }
    }
    
    return(size);
}

size_t utf8_write(const char *lpszFile,const TCHAR *lpszBuffer)
{
    size_t size1=0;
    TCHAR utf8header[3];
    TCHAR *psz=NULL;
    FILE *fp=NULL;

    utf8header[0]=(TCHAR)0xef;
    utf8header[1]=(TCHAR)0xbb;
    utf8header[2]=(TCHAR)0xbf;
    fp=fopen(lpszFile,"wb");
    if(fp){
        fwrite(utf8header,sizeof(TCHAR),3,fp);
        size1=utf8_encode(lpszBuffer,NULL);
        if(size1>0){
            psz=(TCHAR *)malloc(size1);
            if(psz){
                size1=utf8_encode(lpszBuffer,psz);
                size1=fwrite(psz,sizeof(TCHAR),size1,fp);
                free(psz);
                psz=NULL;
            }
        }        
        fclose(fp);
    }
    return(size1);
}

size_t utf8_read(const char *lpszFile,TCHAR *lpszBuffer)
{
    size_t size=0;
    FILE *fp=NULL;
    TCHAR *psz=NULL;

    fp=fopen(lpszFile,"rb");
    if(fp){
        fseek(fp,0L,SEEK_END);
        size=ftell(fp);
        rewind(fp);
        if(size>0){
            psz=(TCHAR *)malloc(size);
            if(psz){
                size=fread(psz,sizeof(TCHAR),size,fp);
                if(size>3){
                    if((psz[0]==0xffffffef)&&(psz[1]==0xffffffbb)&&(psz[2]==0xffffffbf)){
                        size=utf8_decode(psz+3,NULL);                        
                        if(lpszBuffer){
                            size=utf8_decode(psz+3,lpszBuffer);
                        }
                    }
                }
                free(psz);
            }
        }
        fclose(fp);
    }
    
    return(size);
}

int main()
{
    const TCHAR *pszBuffer=_T("中国人民万岁^_^");
    TCHAR *psz=NULL;
    TCHAR *pszFile="utf8.txt";
    size_t size=0;
    
    size=utf8_write(pszFile,pszBuffer);
    printf("write %d bytes\n",size);
    size=utf8_read(pszFile,NULL);
    if(size>0){
        psz=(TCHAR *)malloc(size);
        if(psz){
            size=utf8_read(pszFile,psz);
            printf("read %d bytes:\n%s\n",size,psz);
            free(psz);
        }
    }
    
    return(0);
}

运行结果:
write 22 bytes
read 16 bytes:
中国人民万岁^_^


源文件下载:
文件:utf8wr.zip
大小:2KB
下载:下载

阅读(3283) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~