分类: C/C++
2008-04-23 22:03:31
JNI中文处理问题小结
作者:大卫的思维空间
由于工作关系,需要利用JNI在C 与Java程序之间进行方法调用和数据传递,但以前总是在英文环境下工作,对中文(其他语言编码同理)问题反倒没有太关注,最近抽了点时间研究了一下,将自己的体会整理如下,供大家讨论或参考。
在进一步讨论之前,有几点基础知识需要说明:
Java中文乱码问题在很多情况下都可能发生:不同应用间,不同平台间等等,但以上问题已有大量优秀的文章讨论过,这里不作深入探讨,详见参考2、3、4、5。下面简要总结一下:
public class Encoding { public static void main(String[] args) { System.out.println(System.getProperty("file.encoding")); } }javac在不指定encoding参数时,如果区域设定不正确,则可能造成编/解码错误,这个问题在编译一个从别的环境传过来的文件时可能发生;
下面重点讨论JNI中在C 程序与Java程序间进行数据传递时需要注意的问题。
在JNI中jstring采用的是UCS-2编码,与Java中String的编码方式一致。但是在C 中,字符串是用char(8位)或者wchar_t(16位,Unicode编码与jchar一致,但并非所有开发平台上都是Unicode编码,详见参考6),下面的程序证明了这一点(编译环境:VC6):
#includeJNI提供了几个方法来实现jstring与char/wchar_t之间的转换。using namespace std; int main() { locale loc( "Chinese-simplified" ); //locale loc( "chs" ); //locale loc( "ZHI" ); //locale loc( ".936" ); wcout.imbue( loc ); wcout << L"中文" << endl; //若没有L,会出问题 wchar_t wch[] = {0x4E2D, 0x6587, 0x0}; //"中文"二字的Unicode编码 wcout << wch << endl; return 0; }
jsize GetStringLength(jstring str) const jchar *GetStringChars(jstring str, jboolean *isCopy) void ReleaseStringChars(jstring str, const jchar *chars)此外,为了便于以UTF-8方式进行传输、存储,JNI还提供了几个操作UTF格式的方法:
jsize GetStringUTFLength(jstring str) const char* GetStringUTFChars(jstring str, jboolean *isCopy) void ReleaseStringUTFChars(jstring str, const char* chars)GetStringChars返回的是Unicode格式的编码串,而GetStringUTFChars返回的是UTF-8格式的编码串。 要创建一个jstring,可以用如下方式:
jstring NewJString( JNIEnv * env, LPCTSTR str ) { if (!env || !str) return 0; int slen = strlen(str); jchar * buffer = new jchar[slen]; int len = MultiByteToWideChar(CP_ACP, 0, str, strlen(str), buffer, slen); if (len > 0 && len < slen) buffer[len] = 0; jstring js = env->NewString(buffer, len); delete [] buffer; return js; }而要将一个jstring对象转为一个char字符串数组,可以:
int JStringToChar( JNIEnv * env, jstring str, LPTSTR desc, int desc_len ) { int len = 0; if (desc == NULL || str == NULL) return -1; // Check buffer size if (env->GetStringLength(str) * 2 1 > desc_len) { return -2; } memset(desc, 0, desc_len); const wchar_t * w_buffer = env->GetStringChars(str, 0); len = WideCharToMultiByte(CP_ACP, 0, w_buffer, wcslen(w_buffer) 1, desc, desc_len, NULL, NULL); env->ReleaseStringChars(str, w_buffer); if (len > 0 && len < desc_len) desc[len] = 0; return strlen(desc); }当然,按照上面的分析,你也可以直接将GetStringChars的返回结果作为wchar_t串来进行操作。或者,如果你愿意,你也可以将GetStringUTFChars的结果通过MultiByteToWideChar转换为UCS2编码串,再通过WideCharToMultiByte转换为多字节串。
const char* pstr = env->GetStringUTFChars(str, false); int nLen = MultiByteToWideChar( CP_UTF8, 0, pstr, -1, NULL, NULL );//得到UTF-8编码的字符串长度 LPWSTR lpwsz = new WCHAR[nLen]; MultiByteToWideChar( CP_UTF8, 0, pstr, -1, lpwsz, nLen );//转换的结果是UCS2格式的编码串 int nLen1 = WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, NULL, NULL, NULL, NULL ); LPSTR lpsz = new CHAR[nLen1]; WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, lpsz, nLen1, NULL, NULL );//将UCS2格式的编码串转换为多字节 cout << "Out:" << lpsz << endl; delete [] lpwsz; delete [] lpsz;当然,我相信很少有人想要或者需要这么做。 这里需要注意一点,GetStringChars的返回值是jchar,而GetStringUTFChars的返回值是const char*。 除了上面的办法外,当需要经常在jstring和char*之间进行转换时我们还有一个选择,那就是下面的这个类。这个类本来是一个叫Roger S. Reynolds的老外提供的,想法非常棒,但用起来却不太灵光,因为作者将考虑的重心放在UTF格式串上,但在实际操作中,我们往往使用的却是ACP(ANSI code page)串。下面是原作者的程序:
class UTFString { private: UTFString (); // Default ctor - disallowed public: // Create a new instance from the specified jstring UTFString(JNIEnv* env, const jstring& str) : mEnv (env), mJstr (str), mUtfChars ((char* )mEnv->GetStringUTFChars (mJstr, 0)), mString (mUtfChars) { } // Create a new instance from the specified string UTFString(JNIEnv* env, const string& str) : mEnv (env), mString (str), mJstr (env->NewStringUTF (str.c_str ())), mUtfChars ((char* )mEnv->GetStringUTFChars (mJstr, 0)) { } // Create a new instance as a copy of the specified UTFString UTFString(const UTFString& rhs) : mEnv (rhs.mEnv), mJstr (mEnv->NewStringUTF (rhs.mUtfChars)), mUtfChars ((char* )mEnv->GetStringUTFChars (mJstr, 0)), mString (mUtfChars) { } // Delete the instance and release allocated storage ~UTFString() { mEnv->ReleaseStringUTFChars (mJstr, mUtfChars); } // assign a new value to this instance from the given string UTFString & operator =(const string& rhs) { mEnv->ReleaseStringUTFChars (mJstr, mUtfChars); mJstr = mEnv->NewStringUTF (rhs.c_str ()); mUtfChars = (char* )mEnv->GetStringUTFChars (mJstr, 0); mString = mUtfChars; return *this; } // assign a new value to this instance from the given char* UTFString & operator =(const char* ptr) { mEnv->ReleaseStringUTFChars (mJstr, mUtfChars); mJstr = mEnv->NewStringUTF (ptr); mUtfChars = (char* )mEnv->GetStringUTFChars (mJstr, 0); mString = mUtfChars; return *this; } // Supply operator methods for converting the UTFString to a string // or char*, making it easy to pass UTFString arguments to functions // that require string or char* parameters. string & GetString() { return mString; } operator string() { return mString; } operator const char* () { return mString.c_str (); } operator jstring() { return mJstr; } private: JNIEnv* mEnv; // The enviroment pointer for this native method. jstring mJstr; // A copy of the jstring object that this UTFString represents char* mUtfChars; // Pointer to the data returned by GetStringUTFChars string mString; // string buffer for holding the "value" of this instance };我将它改了改:
class JNIString { private: JNIString (); // Default ctor - disallowed public: // Create a new instance from the specified jstring JNIString(JNIEnv* env, const jstring& str) : mEnv (env) { const jchar* w_buffer = env->GetStringChars (str, 0); mJstr = env->NewString (w_buffer, wcslen (w_buffer)); // Deep Copy, in usual case we only need // Shallow Copy as we just need this class to // provide some convenience for handling jstring mChars = new char[wcslen (w_buffer) * 2 1]; WideCharToMultiByte (CP_ACP, 0, w_buffer, wcslen (w_buffer) 1, mChars, wcslen (w_buffer) * 2 1, NULL, NULL); env->ReleaseStringChars (str, w_buffer); mString = mChars; } // Create a new instance from the specified string JNIString(JNIEnv* env, const string& str) : mEnv (env) { int slen = str.length (); jchar* buffer = new jchar[slen]; int len = MultiByteToWideChar (CP_ACP, 0, str.c_str (), str.length (), buffer, slen); if (len > 0 && len < slen) buffer[len] = 0; mJstr = env->NewString (buffer, len); delete [] buffer; mChars = new char[str.length () 1]; strcpy (mChars, str.c_str ()); mString.empty (); mString = str.c_str (); } // Create a new instance as a copy of the specified JNIString JNIString(const JNIString& rhs) : mEnv (rhs.mEnv) { const jchar* wstr = mEnv->GetStringChars (rhs.mJstr, 0); mJstr = mEnv->NewString (wstr, wcslen (wstr)); mEnv->ReleaseStringChars (rhs.mJstr, wstr); mChars = new char[strlen (rhs.mChars) 1]; strcpy (mChars, rhs.mChars); mString = rhs.mString.c_str (); } // Delete the instance and release allocated storage ~JNIString() { delete [] mChars; } // assign a new value to this instance from the given string JNIString & operator =(const string& rhs) { delete [] mChars; int slen = rhs.length (); jchar* buffer = new jchar[slen]; int len = MultiByteToWideChar (CP_ACP, 0, rhs.c_str (), rhs.length (), buffer, slen); if (len > 0 && len < slen) buffer[len] = 0; mJstr = mEnv->NewString (buffer, len); delete [] buffer; mChars = new char[rhs.length () 1]; strcpy (mChars, rhs.c_str ()); mString = rhs.c_str (); return *this; } // Supply operator methods for converting the JNIString to a string // or char*, making it easy to pass JNIString arguments to functions // that require string or char* parameters. string & GetString() { return mString; } operator string() { return mString; } operator const char* () { return mString.c_str (); } operator jstring() { return mJstr; } private: JNIEnv* mEnv; // The enviroment pointer for this native method. jstring mJstr; // A copy of the jstring object that this JNIString represents char* mChars; // Pointer to a ANSI code page char array string mString; // string buffer for holding the "value" of this instance (ANSI code page) };后者除了将面向UTF编码改成了面向ANSI编码外,还去掉了operator =(const char* ptr)的定义,因为 operator =(const string& rhs)可以在需要的时候替代前者而无需任何额外编码。(因为按照C 规范,const reference可以自动转换,详见本人另一文章“关于 const reference 的几点说明”)
#include参考资料:#include #include #include using namespace std; int main() { int res; JavaVM* jvm; JNIEnv* env; JavaVMInitArgs vm_args; JavaVMOption options[3]; options[0].optionString = "-Djava.compiler=NONE"; options[1].optionString = "-Djava.class.path=.;.."; // .. is specially for this project options[2].optionString = "-verbose:jni"; vm_args.version = JNI_VERSION_1_4; vm_args.nOptions = 3; vm_args.options = options; vm_args.ignoreUnrecognized = JNI_TRUE; res = JNI_CreateJavaVM (& jvm, (void* * )& env, & vm_args); if (res < 0) { fprintf (stderr, "Can''t create Java VM\n"); return 1; } jclass cls = env->FindClass ("jni/test/Demo"); assert (0 != cls); jmethodID mid = env->GetMethodID (cls, " ", "(Ljava/lang/String;)V"); assert (0 != mid); wchar_t* p = L"中国"; jobject obj = env->NewObject (cls, mid, env->NewString (reinterpret_cast (p), wcslen (p))); assert (0 != obj); mid = env->GetMethodID (cls, "getMessage", "()Ljava/lang/String;"); assert (0 != mid); jstring str = (jstring)env->CallObjectMethod (obj, mid); // use JNIString for easier handling. JNIString jnistr (env, str); cout << "JNIString:" << jnistr.GetString () << endl; jnistr = "中文"; cout << jnistr.GetString () << endl; jvm->DestroyJavaVM (); fprintf (stdout, "Java VM destory.\n"); return 0; }