`
winyee
  • 浏览: 53427 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

UTF8 byte 与 String type相互转换

阅读更多
class MyEncoding
    {
        public static string EncodeUtf8ByteToString(byte[] buffer)
        {
            int count = 0;
            int index = 0;
            byte a = 0;
            int utfLength = buffer.Length;
            char[] result = new char[utfLength];

            while (count < utfLength)
            {
                if ((result[index] = (char)buffer[count++]) < 0x80)
                {
                    index++;
                }
                else if (((a = (byte)result[index]) & 0xE0) == 0xC0)
                {
                    if (count >= utfLength)
                    {
                        throw new IOException("Invalid UTF-8 encoding found, start of two byte char found at end.");
                    }

                    byte b = buffer[count++];
                    if ((b & 0xC0) != 0x80)
                    {
                        throw new IOException(
                            "Invalid UTF-8 encoding found, byte two does not start with 0x80.");
                    }

                    result[index++] = (char)(((a & 0x1F) << 6) | (b & 0x3F));

                }
                else if ((a & 0xF0) == 0xE0)
                {

                    if (count + 1 >= utfLength)
                    {
                        throw new IOException(
                            "Invalid UTF-8 encoding found, start of three byte char found at end.");
                    }

                    byte b = buffer[count++];
                    byte c = buffer[count++];
                    if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80))
                    {
                        throw new IOException(
                            "Invalid UTF-8 encoding found, byte two does not start with 0x80.");
                    }

                    result[index++] = (char)(((a & 0x0F) << 12) |
                                              ((b & 0x3F) << 6) | (c & 0x3F));

                }
                else
                {
                    throw new IOException("Invalid UTF-8 encoding found, aborting.");
                }
            }
            return new String(result, 0, index); ;
        }

        /// <summary>
        /// Method ReadString
        /// </summary>
        /// <returns>A string</returns>
        public static String ReadString(byte[] bytearr)
        {
            int utflen = bytearr.Length;
            if (utflen > -1)
            {
                StringBuilder str = new StringBuilder(utflen);

                //byte[] bytearr = new byte[utflen];
                //int bytesRead = 0;
                //while (bytesRead < utflen)
                //{
                //    int rc = Read(bytearr, bytesRead, utflen - bytesRead);
                //    if (rc == 0)
                //        throw new IOException("premature end of stream");
                //    bytesRead += rc;
                //}

                int c, char2, char3;
                int count = 0;

                while (count < utflen)
                {
                    c = bytearr[count] & 0xff;
                    switch (c >> 4)
                    {
                        case 0:
                        case 1:
                        case 2:
                        case 3:
                        case 4:
                        case 5:
                        case 6:
                        case 7:
                            /* 0xxxxxxx */
                            count++;
                            str.Append((char)c);
                            break;
                        case 12:
                        case 13:
                            /* 110x xxxx 10xx xxxx */
                            count += 2;
                            if (count > utflen)
                            {
                                throw new IOException("Invalid UTF-8 encoding found, aborting.");
                            }
                            char2 = bytearr[count - 1];
                            if ((char2 & 0xC0) != 0x80)
                            {
                                throw new IOException("Invalid UTF-8 encoding found, aborting.");
                            }
                            str.Append((char)(((c & 0x1F) << 6) | (char2 & 0x3F)));
                            break;
                        case 14:
                            /* 1110 xxxx 10xx xxxx 10xx xxxx */
                            count += 3;
                            if (count > utflen)
                            {
                                throw new IOException("Invalid UTF-8 encoding found, aborting.");
                            }
                            char2 = bytearr[count - 2];
                            char3 = bytearr[count - 1];
                            if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
                            {
                                throw new IOException("Invalid UTF-8 encoding found, aborting.");
                            }
                            str.Append((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
                            break;
                        default:
                            /* 10xx xxxx, 1111 xxxx */
                            throw new IOException("Invalid UTF-8 encoding found, aborting.");
                    }
                }
                // The number of chars produced may be less than utflen
                return str.ToString();
            }
            else
            {
                return null;
            }
        }

        public static byte[] EncodeStringToUtf8Byte(string text)
        {
            if (text != null)
            {

                char[] charr = text.ToCharArray();
                uint utfLength = CountUtf8Bytes(charr);

                if (utfLength > int.MaxValue)
                {
                    throw new IOException(
                        String.Format(
                            "Cannot marshall an encoded string longer than: {0} bytes, supplied" +
                            "string requires: {1} characters to encode", int.MaxValue, utfLength));
                }

                byte[] bytearr = new byte[utfLength];
                EncodeUTF8toBuffer(charr, bytearr);

                return bytearr;
               
            }
            else
            {
               return new byte[0];
            }
        }

        private static uint CountUtf8Bytes(char[] chars)
        {
            uint utfLength = 0;
            int c = 0;

            for (int i = 0; i < chars.Length; i++)
            {
                c = chars[i];
                if ((c >= 0x0001) && (c <= 0x007F))
                {
                    utfLength++;
                }
                else if (c > 0x07FF)
                {
                    utfLength += 3;
                }
                else
                {
                    utfLength += 2;
                }
            }

            return utfLength;
        }
        private static void EncodeUTF8toBuffer(char[] chars, byte[] buffer)
        {
            int c = 0;
            int count = 0;

            for (int i = 0; i < chars.Length; i++)
            {
                c = chars[i];
                if ((c >= 0x0001) && (c <= 0x007F))
                {
                    buffer[count++] = (byte)c;
                }
                else if (c > 0x07FF)
                {
                    buffer[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
                    buffer[count++] = (byte)(0x80 | ((c >> 6) & 0x3F));
                    buffer[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
                }
                else
                {
                    buffer[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
                    buffer[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
                }
            }
        }
    }


Endian helper:

 public class EndianSupport
    {

        public static char SwitchEndian(char x)
        {
            return (char)(
                (((char)((byte)(x))) << 8) |
                (((char)((byte)(x >> 8))))
                );
        }

        public static short SwitchEndian(short x)
        {
            return (short)(
                (((ushort)((byte)(x))) << 8) |
                (((ushort)((byte)(x >> 8))))
                );
        }

        public static int SwitchEndian(int x)
        {
            return
                (((int)((byte)(x))) << 24) |
                (((int)((byte)(x >> 8))) << 16) |
                (((int)((byte)(x >> 16))) << 8) |
                (((int)((byte)(x >> 24))));
        }

        public static long SwitchEndian(long x)
        {
            return
                (((long)((byte)(x))) << 56) |
                (((long)((byte)(x >> 8))) << 48) |
                (((long)((byte)(x >> 16))) << 40) |
                (((long)((byte)(x >> 24))) << 32) |
                (((long)((byte)(x >> 32))) << 24) |
                (((long)((byte)(x >> 40))) << 16) |
                (((long)((byte)(x >> 48))) << 8) |
                (((long)((byte)(x >> 56))));
        }

        public static ushort SwitchEndian(ushort x)
        {
            return (ushort)(
                (((ushort)((byte)(x))) << 8) |
                (((ushort)((byte)(x >> 8))))
                );
        }

        public static uint SwitchEndian(uint x)
        {
            return
                (((uint)((byte)(x))) << 24) |
                (((uint)((byte)(x >> 8))) << 16) |
                (((uint)((byte)(x >> 16))) << 8) |
                (((uint)((byte)(x >> 24))));
        }

        public static ulong SwitchEndian(ulong x)
        {
            return
                (((ulong)((byte)(x))) << 56) |
                (((ulong)((byte)(x >> 8))) << 48) |
                (((ulong)((byte)(x >> 16))) << 40) |
                (((ulong)((byte)(x >> 24))) << 32) |
                (((ulong)((byte)(x >> 32))) << 24) |
                (((ulong)((byte)(x >> 40))) << 16) |
                (((ulong)((byte)(x >> 48))) << 8) |
                (((ulong)((byte)(x >> 56))));
        }

        public static double SwitchEndian(double x)
        {
            MemoryStream ms = new MemoryStream();
            BinaryWriter bw = new BinaryWriter(ms);
            bw.Write(x);
            bw.Flush();
            ms = new MemoryStream(SwitchEndian(ms.ToArray()));
            BinaryReader br = new BinaryReader(ms);
            return br.ReadDouble();
        }


        public static float SwitchEndian(float x)
        {
            MemoryStream ms = new MemoryStream();
            BinaryWriter bw = new BinaryWriter(ms);
            bw.Write(x);
            bw.Flush();
            ms = new MemoryStream(SwitchEndian(ms.ToArray()));
            BinaryReader br = new BinaryReader(ms);
            return br.ReadSingle();
        }


        public static byte[] SwitchEndian(byte[] x)
        {
            byte[] rc = new byte[x.Length];
            int j = x.Length - 1;
            for (int i = 0; i < x.Length; i++)
            {
                rc[i] = x[j];
                j--;
            }
            return rc;
        }
    }
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics