/** * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string * @ins: input multibyte string buffer * @outs: on return contains the (allocated) output Unicode string * @outs_len: length of output buffer in Unicode characters * * Convert the input multibyte string @ins, from the current locale into the * corresponding little endian, 2-byte Unicode string. //16bit的unicode * * If *@outs is NULL, the function allocates the string and the caller is * responsible for calling free(*@outs); when finished with it. * * On success the function returns the number of Unicode characters written to * the output string *@outs (>= 0), not counting the terminating Unicode NULL * character. If the output string buffer was allocated, *@outs is set to it. * * On error, -1 is returned, and errno is set to the error code. The following * error codes can be expected: * EINVAL Invalid arguments (e.g. @ins or @outs is NULL). * EILSEQ The input string cannot be represented as a Unicode * string according to the current locale. * ENAMETOOLONG Destination buffer is too small for input string. * ENOMEM Not enough memory to allocate destination buffer. */ int ntfs_mbstoucs(const char *ins, ntfschar **outs, int outs_len) { ntfschar *ucs; const char *s; wchar_t wc; int i, o, cnt, ins_len, ucs_len, ins_size; #ifdef HAVE_MBSINIT mbstate_t mbstate; #endif
if (!ins || !outs) { errno = EINVAL; return -1; } ucs = *outs; ucs_len = outs_len; if (ucs && !ucs_len) { errno = ENAMETOOLONG; return -1; } /* Determine the size of the multi-byte string in bytes. */ ins_size = strlen(ins); /* Determine the length of the multi-byte string. */ s = ins; #if defined(HAVE_MBSINIT) memset(&mbstate, 0, sizeof(mbstate)); ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate); #ifdef __CYGWIN32__ if (!ins_len && *ins) { /* Older Cygwin had broken mbsrtowcs() implementation. */ ins_len = strlen(ins); } #endif #elif !defined(DJGPP) ins_len = mbstowcs(NULL, s, 0); #else /* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */ ins_len = strlen(ins); #endif if (ins_len == -1) return ins_len; #ifdef HAVE_MBSINIT if ((s != ins) || !mbsinit(&mbstate)) { #else if (s != ins) { #endif errno = EILSEQ; return -1; }
/* Add the NULL terminator. */ ins_len++; printf("ins_len=%d\n",ins_len); if (!ucs) { ucs_len = ins_len; ucs = ntfs_malloc(ucs_len * sizeof(ntfschar)); if (!ucs) return -1; } #ifdef HAVE_MBSINIT memset(&mbstate, 0, sizeof(mbstate)); #else mbtowc(NULL, NULL, 0); #endif for (i = o = cnt = 0; i < ins_size; i += cnt, o++) { /* Reallocate memory if necessary or abort. */ if (o >= ucs_len) { ntfschar *tc; if (ucs == *outs) { errno = ENAMETOOLONG; return -1; } /* * We will never get here but hey, it's only a bit of * extra code... */ ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63; tc = (ntfschar*)realloc(ucs, ucs_len); if (!tc) goto err_out; ucs = tc; ucs_len /= sizeof(ntfschar); } /* Convert the multibyte character to a wide character. */ #ifdef HAVE_MBSINIT cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate); #else cnt = mbtowc(&wc, ins + i, ins_size - i); #endif printf("cnt=%d\n",cnt); printf("wc=%ls\n",&wc); if (!cnt) break; if (cnt == -1) goto err_out; if (cnt < -1) { ntfs_log_trace("Eeek. cnt = %i\n", cnt); errno = EINVAL; goto err_out; } /* Make sure we are not overflowing the NTFS Unicode set. */ if ((unsigned long)wc >= (unsigned long)(1 << (8 * sizeof(ntfschar)))) { errno = EILSEQ; goto err_out; } /* Convert the CPU wide character to a LE Unicode character. */ ucs[o] = cpu_to_le16(wc); } #ifdef HAVE_MBSINIT /* Make sure we are back in the initial state. */ if (!mbsinit(&mbstate)) { ntfs_log_trace("Eeek. mbstate not in initial state!\n"); errno = EILSEQ; goto err_out; } #endif /* Now write the NULL character. */ ucs[o] = cpu_to_le16(L'\0'); if (*outs != ucs) *outs = ucs; return o; err_out: if (ucs != *outs) { int eo = errno; free(ucs); errno = eo; } return -1; }
|