UTF8与GB2312之间的转换.docx

资源描述

UTF8与GB2312之间的转换.docx

《UTF8与GB2312之间的转换.docx》由会员分享，可在线阅读，更多相关《UTF8与GB2312之间的转换.docx（8页珍藏版）》请在冰豆网上搜索。

UTF8与GB2312之间的转换.docx

UTF8与GB2312之间的转换

相信一定有不少的程序开发人员时常会遇到字符编码的问题，而这个问题也是非常让人头痛的。

因为这些都是潜在的错误，要找出这些错误也得要有这方面的开发经验才行。

特别是在处理xml文档时，该问题的出现就更加的频繁了，有一次用java写服务器端程序，用vc写客户端与之交互。

交互的协议都是用xml写的。

结果在通讯时老是发现数据接受不正确。

纳闷！

于是用抓取网络数据包工具抓取数据，后来才发现原来是java上xml的头是这样的,而vc上默认的是GB2312。

所以一遇到汉字数据就不正确了。

去网上找资料，这方面的文章好象特别少，针对像这样的问题，下面我介绍一下我自己写的一个转换程序。

当然，程序很简单。

如果有画蛇添足的地方，还望各位高手一笑了之。

如果您对UTF-8、Unicode、GB2312等还是很陌生的话，请查看我这里就不浪费口舌了。

下面介绍一下WinAPI的两个函数：

WideCharToMultiByte、MultiByteToWideChar。

函数原型：

intWideCharToMultiByte（

UINTCodePage,//codepage

DWORDdwFlags,//performanceandmappingflags

LPCWSTRlpWideCharStr,//wide-characterstring

intcchWideChar,//numberofcharsinstring

LPSTRlpMultiByteStr,//bufferfornewstring

intcbMultiByte,//sizeofbuffer

LPCSTRlpDefaultChar,//defaultforunmappablechars

LPBOOLlpUsedDefaultChar//setwhendefaultcharused）;//将宽字符转换成多个窄字符intMultiByteToWideChar（

UINTCodePage,//codepage

DWORDdwFlags,//character-typeoptions

LPCSTRlpMultiByteStr,//stringtomap

intcbMultiByte,//numberofbytesinstring

LPWSTRlpWideCharStr,//wide-characterbuffer

intcchWideChar//sizeofbuffer

）;//将多个窄字符转换成宽字符需要用到的一些函数：

CStringCXmlProcess:

HexToBin（CStringstring）//将16进制数转换成2进制

{

if（string=="0"）return"0000";

if（string=="1"）return"0001";

if（string=="2"）return"0010";

if（string=

="3"）return"0011";

if（string=

="4"）return"0100";

if（string=

="5"）return"0101";

if（string=

="6"）return"0110";

if（string=

="7"）return"0111";

if（string=

="8"）return"1000";

if（string=

="9"）return"1001";

if（string=

="a"）return"1010";

if（string=

="b"）return"1011";

if（string=

="c"）return"1100";

if（string=

="d"）return"1101";

if（string=

="e"）return"1110";

if（string=

="f"）return"1111";

return"";

CStringCXmlProcess:

BinToHex（CStringBinString）//将2进制数转换成16进制

{

if（BinString=="0000"）return"0";

if（BinString=="0001"）return"1";

if（BinString=="0010"）return"2";if（BinString=="0011"）return"3";if（BinString=="0100"）return"4";

if（BinString=

="0101"）return"5";

if（BinString=

="0110"）return"6";

if（BinString=

="0111"）return"7";

if（BinString=

="1000"）return"8";

if（BinString=

="1001"）return"9";

if（BinString=

="1010"）return"a";

if（BinString=

="1011"）return"b";

if（BinString=

="1100"）return"c";

if（BinString=

="1101"）return"d";

if（BinString=

="1110"）return"e";

if（BinString=

="1111"）return"f";

return""

}

进制字符数据转

intCXmlProcess:

BinToInt（CStringstring）//2换成10进制整型

{

intlen=0;

inttempInt=0;

infs=r-nfH0八

fo「（infiHo二Asmng.GaLengfho二++）宀

CDmp-nf丄八

s=r-nfH（inoss.ng.GefAs—48」foanfkHo八kA7—i八k++）

宀

CDmp-nfH2*CDmp-nc

-en+HCDmp-nf*s=r-nc

refum-ep

）UTFOO>旃注GB2312出mUTFOO>旃注Unicode•^可囲瞥Unicode镒ff因選widechaHOMU-HByCDM旃注GB2312

WCHAR*cxm-p「ocessxuTF—8Tounicode（cha「*us5rli）lmUTFOO>旃注Unicode

宀

charchar—one八

charchar—fwp

charcha匚hree八

infHchan

intLchar;

charuchar[2];

WCHAR*unicode;

CStringstring_one;

CStringstring_two;

CStringstring_three;

CStringcombiString;

char_one=*ustart;

char_two=*（ustart+1）;

char_three=*（ustart+2）;

string_one.Format（"%x",char_one）;string_two.Format（"%x",char_two）;string_three.Format（"%x",char_three）;string_three=string_three.Right

（2）;

string_two=string_two.Right

（2）;

string_one=string_one.Right

（2）;

string_three=

HexToBin（string_three.Left

（1））+HexToBin（string_three.Right

（1））;

string_two=

HexToBin（string_two.Left

（1））+HexToBin（string_two.Right

（1））;

string_one=

HexToBin（string_one.Left

（1））+HexToBin（string_one.Right

（1））;combiString=string_one+string_two+string_three;combiString=combiString.Right（20）;combiString.Delete（4,2）;

combiString.Delete（10,2）;

Hchar=BinToInt（combiString.Left（8））;

Lchar=BinToInt（combiString.Right（8））;

uchar[1]=（char）Hchar;

uchar[0]=（char）Lchar;

unicode=（WCHAR*）uchar;

returnunicode;

}

char*CXmlProcess:

UnicodeToGB2312（unsignedshortuData）

//把Unicode转换成GB2312

{

char*buffer;

buffer=newchar[sizeof（WCHAR）];

WideCharToMultiByte（CP_ACP,NULL,&uData,1,buffer,sizeof（WCHAR）,NULL,NULL）;

returnbuffer;

GB2312转换成UTF-8：

先把GB2312通过函数MultiByteToWideChar转换成Unicode.然后再把Unicode通过拆开Unicode后拼装成UTF-8。

WCHAR*

CXmlProcess:

Gb2312ToUnicode（char*gbBuffer）//GB2312转换成Unicode

{

WCHAR*uniChar;

uniChar=newWCHAR[1];

MultiByteToWideChar（CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1）;

returnuniChar;

}

char*CXmlProcess:

UnicodeToUTF_8（WCHAR*UniChar）//Unicode转换成UTF-8

{

char*buffer;

CStringstrOne;

CStringstrTwo;

CStringstrThree;

CStringstrFour;

CStringstrAnd;

buffer=newchar[3];

inthInt,lInt;

hInt=（int）（（*UniChar）/256）;

lInt=（*UniChar）%256;

CStringstring;

string.Format（"%x",hInt）;

strTwo=HexToBin（string.Right

（1））;

string=string.Left（string.GetLength（）-1）;strOne=HexToBin（string.Right

（1））;string.Format（"%x",lInt）;

strFour=HexToBin（string.Right

（1））;string=string.Left（string.GetLength（）-1）;strThree=HexToBin（string.Right

（1））;strAnd=strOne+strTwo+strThree+strFour;strAnd.Insert（0,"1110"）;

strAnd.Insert（8,"10"）;strAnd.Insert（16,"10"）;

strOne=strAnd.Left（8）;

strAnd=strAnd.Right（16）;

strTwo=strAnd.Left（8）;

strThree=strAnd.Right（8）;

*buffer=（char）BinToInt（strOne）;

buffer[1]=（char）BinToInt（strTwo）;

buffer[2]=（char）BinToInt（strThree）;

returnbuffer;

}例子：

将GB2312转换成UTF-8的调用：

char*CXmlProcess:

translateCharToUTF_8（char*xmlStream,intlen）{

intnewCharLen=0;

intoldCharLen=0;

intrevCharLen=len;

char*newCharBuffer;

char*finalCharBuffer;

char*buffer;

CStringstring;

buffer=newchar[sizeof（WCHAR）];

newCharBuffer=newchar[int（1.5*revCharLen）];//设置最大的一个缓冲区

while（oldCharLen=0）

{

*（newCharBuffer+newCharLen）=*（xmlStream

+oldCharLen）;

newCharLen++;oldCharLen++;

}//如果是英文直接复制就可以

else

{

WCHAR*pbuffer=this->Gb2312ToUnicode（xmlStream+oldCharLen）;

buffer=this->UnicodeToUTF_8（pbuffer）;*（newCharBuffer+newCharLen）=*buffer;

*（newCharBuffer+newCharLen+1）=*（buffer+1）;

*（newCharBuffer+newCharLen+2）=*（buffer+2）;newCharLen+=3;

oldCharLen+=2;

}

newCharBuffer[newCharLen]=''\0'';

CStringstring1;

string1.Format（"%s",newCharBuffer）;finalCharBuffer=newchar[newCharLen+1];memcpy（finalCharBuffer,newCharBuffer,newCharLen+1）;returnfinalCharBuffer;

}程序都非常的简单，由于实在太穷。

已经吃了两天的方便面。

所以现在头昏，程序的详细说明就不写了。

程序员到了像我这样的地步也真是少见。

工资低没有办法。

哎！

！

展开阅读全文