NetBSD/OpenBSD msdosfs文件系统的UTF-8补丁-pagx-ChinaUnix博客

小雨看世界

首页　| 　博文目录　| 　关于我

pagx

博客访问： 942055
博文数量： 201
博客积分： 8078
博客等级：中将
技术积分： 2162
用户组：普通用户
注册时间： 2008-05-20 17:22

文章分类

全部博文（201）

android（5）
杂文（17）
隐藏（8）
develop（41）
linux（2）
FreeBSD（27）

用户手册（0）
player（8）
bittorrent（36）
未分配的博文（57）

文章存档

2013年（3）

2012年（11）

2011年（34）

2010年（25）

2009年（51）

2008年（77）

我的朋友

相关博文

NetBSD/OpenBSD msdosfs文件系统的UTF-8补丁

分类： BSD

2010-05-11 21:38:38

该补丁是基于 NetBSD 5.0.1的i386版本的(对于OpenBSD同样适用)。该补丁以FreeBSD的msdos补丁作为参考写的。注意: 使用该补丁时要求系统的locale使用UTF-8。GBK/GB2312/BIG5一概不支持。还好NetBSD/OpenBSD已经直接支持NTFS的文了。

diff -rdup old/direntry.h new/direntry.h
--- old/direntry.h    2010-05-08 22:10:26.000000000 +0800
+++ new/direntry.h    2010-05-10 10:21:35.000000000 +0800
@@ -133,6 +133,7 @@ int    unix2winfn(const u_char *un, int unl
int    winChkName(const u_char *un, int unlen, struct winentry *wep,
      int chksum);
int    win2unixfn(struct winentry *wep, struct dirent *dp, int chksum);
+int    dosdir_wchar_to_utf8(struct dirent *dp, int chksum);
u_int8_t winChksum(u_int8_t *name);
int    winSlotCnt(const u_char *un, int unlen);
#endif    /* _KERNEL */
diff -rdup old/msdosfs_conv.c new/msdosfs_conv.c
--- old/msdosfs_conv.c    2010-05-08 22:10:26.000000000 +0800
+++ new/msdosfs_conv.c    2010-05-10 10:21:35.000000000 +0800
@@ -91,6 +91,71 @@ u_long lastday;
u_short lastddate;
u_short lastdtime;

+static int
+wput_utf8(char *s, size_t n, u_int16_t wc)
+{
+    if (wc & 0xF800) {
+        if (n < 3)
+            return 0;
+        s[0] = 0xE0 | (wc >> 12);
+        s[1] = 0x80 | ((wc >> 6) & 0x3F);
+        s[2] = 0x80 | (wc & 0x3F);
+        return 3;
+    } else if (wc & 0x0780) {
+        if (n < 2)
+            return 0;
+        s[0] = 0xC0 | (wc >> 6);
+        s[1] = 0x80 | ((wc) & 0x3F);

+ return 2;
+    } else {
+        if (n < 1)
+            return 0;
+        s[0] = wc;
+        return 1;
+    }
+    return 0;
+}
+
+static u_int16_t
+wget_utf8(const char **str, size_t *sz)
+{
+    int c;
+    u_int16_t rune = 0;
+    const char *s = *str;
+    static const int _utf_count[16] = {
+        1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 2, 2, 3, 0,
+    };
+
+    c = _utf_count[(s[0] & 0xF0) >> 4];
+    if (c == 0 || c > *sz) {
+decoding_error:
+        c = 1;
+    }
+
+    switch (c) {
+    case 1:
+        rune = s[0] & 0xFF;
+        break;
+    case 2:
+        if ((s[1] & 0xC0) != 0x80)
+            goto decoding_error;
+        rune = ((s[0] & 0x1F) << 6) | (s[1] & 0x3f);
+        break;
+    case 3:
+        if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
+            goto decoding_error;
+        rune = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) |
+            (s[2] & 0x3F);
+        break;
+    }
+
+    *str += c;
+    *sz -= c;
+    return rune;
+}
+
+
/*
  * Convert the unix version of time to dos's idea of time to be used in
  * file timestamps. The passed in unix time is assumed to be in GMT.
@@ -588,13 +653,30 @@ unix2winfn(un, unlen, wep, cnt, chksum)
     u_int8_t *wcp;
     int i;

+    size_t count1;
+    const char * name;
+    u_char buf[WIN_MAXLEN * 2];
+
     /*
      * Drop trailing blanks and dots
      */
     for (cp = un + unlen; *--cp == ' ' || *cp == '.'; unlen--);

-    un += (cnt - 1) * WIN_CHARS;
-    unlen -= (cnt - 1) * WIN_CHARS;
+    name = un;
+    count1 = 0;
+    while (unlen > 0) {
+        u_int16_t wc;
+        size_t len1 = unlen;
+        wc = wget_utf8(&name, &len1);
+        buf[count1++] = (wc & 0xFF);
+        buf[count1++] = (wc >> 8);
+        unlen = len1;
+    }
+    unlen = count1;
+    un = buf;
+
+    un += (cnt - 1) * WIN_CHARS * 2;
+    unlen -= (cnt - 1) * WIN_CHARS * 2;

     /*
      * Initialize winentry to some useful default
@@ -610,22 +692,25 @@ unix2winfn(un, unlen, wep, cnt, chksum)
      * Now convert the filename parts
      */
     for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
-        if (--unlen < 0)
+        unlen -= 2;
+        if (unlen < 0)
             goto done;
         *wcp++ = *un++;
-        *wcp++ = 0;
+        *wcp++ = *un++;
     }
     for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
-        if (--unlen < 0)
+        unlen -= 2;
+        if (unlen < 0)
             goto done;
         *wcp++ = *un++;
-        *wcp++ = 0;
+        *wcp++ = *un++;
     }
     for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
-        if (--unlen < 0)
+        unlen -= 2;
+        if (unlen < 0)
             goto done;
         *wcp++ = *un++;
-        *wcp++ = 0;
+        *wcp++ = *un++;
     }
     if (!unlen)
         wep->weCnt |= WIN_LAST;
@@ -649,9 +734,14 @@ winChkName(un, unlen, wep, chksum)
     struct winentry *wep;
     int chksum;
{
-    u_int8_t *cp;
+    u_char we_name[2 + sizeof(*wep)];
+    u_int8_t *cp, *we_p;
     int i;

+    size_t count1;
+    const char * name;
+    u_char buf[WIN_MAXLEN * 2];
+
     /*
      * First compare checksums
      */
@@ -662,10 +751,26 @@ winChkName(un, unlen, wep, chksum)
     if (chksum == -1)
         return -1;

+    count1 = 0;
+    name = (const char *)un;
+    while (unlen > 0) {
+        u_int16_t wc;
+        size_t len1 = unlen;
+        /* const char * t_name = name; */
+        wc = wget_utf8(&name, &len1);
+        buf[count1++] = (wc & 0xFF);
+        buf[count1++] = (wc >> 8);
+        /* assert(t_name != name); */
+        unlen = len1;
+    }
+    unlen = count1;
+    un = buf;
+
+
     /*
      * Offset of this entry
      */
-    i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
+    i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS * 2;
     un += i;
     if ((unlen -= i) < 0)
         return -1;
@@ -682,12 +788,13 @@ winChkName(un, unlen, wep, chksum)
             return -1;
     }

-    if ((wep->weCnt&WIN_LAST) && unlen > WIN_CHARS)
+    if ((wep->weCnt&WIN_LAST) && unlen > WIN_CHARS * 2)
         return -1;

     /*
      * Compare the name parts
      */
+#if 0
     for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
         if (--unlen < 0) {
             if (!*cp++ && !*cp)
@@ -715,6 +821,25 @@ winChkName(un, unlen, wep, chksum)
         if (u2l[*cp++] != u2l[*un++] || *cp++)
             return -1;
     }
+#endif
+    cp = we_p = we_name;
+    memcpy(we_p, wep->wePart1, sizeof(wep->wePart1));
+    we_p += sizeof(wep->wePart1);
+    memcpy(we_p, wep->wePart2, sizeof(wep->wePart2));
+    we_p += sizeof(wep->wePart2);
+    memcpy(we_p, wep->wePart3, sizeof(wep->wePart3));
+    we_p += sizeof(wep->wePart3);
+    we_p[0] = we_p[1] = 0;
+
+    while (cp < we_p) {
+        if (--unlen < 0) {
+            if (cp[0] == 0 && cp[1] == 0)
+                return chksum;
+            return -1;
+        }
+        if (*cp++ != *un++)
+            return -1;
+    }
     return chksum;
}

@@ -732,7 +858,7 @@ win2unixfn(wep, dp, chksum)
     u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN;
     int i;

-    if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
+    if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS * 2)
      || !(wep->weCnt&WIN_CNT))
         return -1;

@@ -744,7 +870,7 @@ win2unixfn(wep, dp, chksum)
         /*
          * This works even though d_namlen is one byte!
          */
-        dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS;
+        dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS * 2;
     } else if (chksum != wep->weChksum)
         chksum = -1;
     if (chksum == -1)
@@ -753,9 +879,20 @@ win2unixfn(wep, dp, chksum)
     /*
      * Offset of this entry
      */
-    i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
+    i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS * 2;
     np = (u_int8_t *)dp->d_name + i;

+    memcpy(np, wep->wePart1, sizeof(wep->wePart1));
+    np += sizeof(wep->wePart1);
+    memcpy(np, wep->wePart2, sizeof(wep->wePart2));
+    np += sizeof(wep->wePart2);
+    memcpy(np, wep->wePart3, sizeof(wep->wePart3));
+    np += sizeof(wep->wePart3);
+
+    /* avoid warnning */
+    ep = cp = np;
+
+#if 0
     /*
      * Convert the name parts
      */
@@ -822,6 +959,7 @@ win2unixfn(wep, dp, chksum)
         if (*cp++)
             return -1;
     }
+#endif
     return chksum;
}

@@ -848,10 +986,65 @@ winSlotCnt(un, unlen)
     const u_char *un;
     int unlen;
{
-    for (un += unlen; unlen > 0; unlen--)
-        if (*--un != ' ' && *un != '.')
-            break;
-    if (unlen > WIN_MAXLEN)
+    u_int16_t wc;
+    size_t count = 0;
+
+    size_t count1 = 0;
+    const char * name = un;
+    while (unlen > 0) {
+        int len = unlen;
+        wc = wget_utf8(&name, &len);
+        count += 2;
+        if (wc != ' ' && wc != '.')
+            count1 = count;
+        unlen = len;
+    }
+    if (count1 > WIN_MAXLEN * 2)
         return 0;
-    return howmany(unlen, WIN_CHARS);
+    return howmany(count1, WIN_CHARS * 2);
}
+
+int
+dosdir_wchar_to_utf8(dp, chksum)
+struct dirent *dp;
+int chksum;
+{
+    u_int16_t wc;
+    int n, count;
+
+    char *cp, *ep;
+    u_int8_t *np;
+    u_int8_t name[sizeof(dp->d_name)];
+
+    if ((dp->d_namlen & 0x01) ||
+        dp->d_namlen > sizeof(name))
+        return -1;
+
+    memcpy(name, dp->d_name, dp->d_namlen);
+    count = (dp->d_namlen / 2);
+
+    np = name;
+    cp = dp->d_name;
+    ep = dp->d_name + sizeof(dp->d_name);
+    while (count-- > 0 && cp < ep) {
+        wc = *np++;
+        wc |= (*np++ << 8);
+        switch(wc) {
+            case '/':
+                dp->d_namlen = (cp - dp->d_name);
+                *cp = 0;
+                return -1;
+            case 0:
+                dp->d_namlen = (cp - dp->d_name);
+                *cp = 0;
+                return chksum;
+        }
+        n = wput_utf8(cp, ep - cp, wc);
+        if (n == 0)
+            break;
+        cp += n;
+    }
+    dp->d_namlen = (cp < ep)? (cp - dp->d_name): dp->d_namlen;
+    return (cp < ep)? chksum: -1;
+}
+
diff -rdup old/msdosfs_vnops.c new/msdosfs_vnops.c
--- old/msdosfs_vnops.c    2010-05-08 22:10:26.000000000 +0800
+++ new/msdosfs_vnops.c    2010-05-10 10:21:44.000000000 +0800
@@ -1637,6 +1637,7 @@ msdosfs_readdir(v)
                  offset / sizeof(struct direntry);
                 dirbuf->d_type = DT_REG;
             }
+            chksum = dosdir_wchar_to_utf8(dirbuf, chksum);
             if (chksum != winChksum(dentp->deName))
                 dirbuf->d_namlen = dos2unixfn(dentp->deName,
                  (u_char *)dirbuf->d_name,

阅读(1547) | 评论(0) | 转发(0) |

上一篇：[整理]FreeBSD msdosfs/ntfs文件系统补丁

下一篇：windows 下生成 dump 文件

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6