嗚嗚喔學習筆記: 7月 2023

搜尋此網誌

2023年7月6日 星期四

C# - run-length encoding - RLE - Unity ( For byte or Ushort )

這段CODE只處理連續byte資料 或是連續ushort資料 非string版本
在資料集中情況下壓縮比還不錯 , 依據資料格式類型壓縮到上百倍也不是問題
未來: 可加入平行化處理 加速Encode 跟 Decode

各種演算法參考資料
Run Length Encoding 參考資料

其他壓縮法簡單比較:
CodeTree ( 四元樹演算法 O(n)
LZMA ( 對重複的組合 壓縮比較有效 可平行化 O(n)
RunLen ( 目前使用 實作簡單 可平行化 適合資料單一結構 O(n)
QOI (針對圖片地壓縮格式 純資料可能沒這麼好
Huffman Compression ( 把重複地編碼建表 再來壓縮

Code:


using System;
using System.Collections.Generic;


public class RunLengthCompression
{
    public byte[] EncodeUshort(ushort[] bAy)
    {
        if (bAy == null)
        {
            return null;
        }

        List codeList = new List();
        for (int i = 0; i < 4; i++)
        {
            codeList.Add(0);
        }

        byte count = 1;
        for (int i = 0; i < bAy.Length - 1; i++)
        {
            ushort now = bAy[i];
            ushort next = bAy[i + 1];
            if (now.Equals(next))
            {
                count++;
                if (count == 255 || i >= bAy.Length - 2)
                {
                    AddUshort(codeList, now);
                    codeList.Add(count);
                    count = 0;
                }
            }
            else
            {
                if (count == 255)
                {
                    AddUshort(codeList, now);
                    codeList.Add(count);
                }
                else if (i >= bAy.Length - 2)
                {
                    AddUshort(codeList, now);
                    codeList.Add(count);

                    AddUshort(codeList, next);
                    codeList.Add(1);
                }
                else
                {
                    AddUshort(codeList, now);
                    codeList.Add(count);
                    count = 1;
                }
            }
        }

        byte[] result = codeList.ToArray();
        byte[] grouptCountBytes = BitConverter.GetBytes(bAy.Length);
        for (int i = 0; i < 4; i++)
        {
            result[i] = grouptCountBytes[i];
        }

        return result;
    }
    public ushort[] DecodeUshort(byte[] bAy, ushort[] ogAy = null)
    {
        byte header = 4;
        if (bAy == null || bAy.Length < 4)
        {
            return null;
        }

        int orginLen = BitConverter.ToInt32(bAy, 0);
        ushort[] result = new ushort[orginLen];
        int idx = 0;
        for (int i = header; i < bAy.Length; i += 3)
        {
            ushort val = GetUshort(bAy, i);
            byte count = bAy[i + 2];
            for (int j = 0; j < count; j++)
            {
                if (idx >= orginLen)
                {
                    ErrorLog("over idx ?");
                    break;
                }

                result[idx] = val;
                if (ogAy != null && ogAy[idx] != result[idx])
                {
                    ErrorLog($"not same in {idx}");
                    break;
                }
                idx++;
            }
        }
        return result;
    }
    private void AddUshort(List list, ushort val)
    {
        list.Add((byte)val);
        list.Add((byte)(val >> 8));
    }
    private ushort GetUshort(byte[] bAy, int idx)
    {
        ushort val = (ushort)((bAy[idx]) + (bAy[idx + 1] << 8));
        return val;
    }

    public byte[] Encode(byte[] bAy)
    {
        if (bAy == null)
        {
            return null;
        }

        List codeList = new List();
        for (int i = 0; i < 4; i++)
        {
            codeList.Add(0);
        }

        byte count = 1;
        for (int i = 0; i < bAy.Length - 1; i++)
        {
            byte now = bAy[i];
            byte next = bAy[i + 1];
            if (now == next)
            {
                count++;
                if (count == 255 || i >= bAy.Length - 2)
                {
                    codeList.Add(now);
                    codeList.Add(count);
                    count = 0;
                }
            }
            else
            {
                if (count == 255)
                {
                    codeList.Add(now);
                    codeList.Add(count);
                }
                else if (i >= bAy.Length - 2)
                {
                    codeList.Add(now);
                    codeList.Add(count);

                    codeList.Add(next);
                    codeList.Add(1);
                }
                else
                {
                    codeList.Add(now);
                    codeList.Add(count);
                    count = 1;
                }
            }
        }

        int orginLen = bAy.Length;
        codeList[0] = (byte)orginLen;
        codeList[1] = (byte)(orginLen >> 8);
        codeList[2] = (byte)(orginLen >> 16);
        codeList[3] = (byte)(orginLen >> 24);

        byte[] result = codeList.ToArray();
        return result;
    }
    public byte[] Decode(byte[] bAy, byte[] ogAy = null)
    {
        byte header = 4;
        if (bAy == null || bAy.Length < 4)
        {
            return null;
        }
        int orginLen = (bAy[3] << 24) | (bAy[2] << 16) | (bAy[1] << 8) | bAy[0];

        byte[] result = new byte[orginLen];
        int idx = 0;
        for (int i = header; i < bAy.Length; i += 2)
        {
            byte val = bAy[i];
            byte count = bAy[i + 1];
            for (int j = 0; j < count; j++)
            {
                if (idx >= orginLen)
                {
                    ErrorLog("over idx ?");
                    goto ERROR;
                }

                result[idx] = val;
                if (ogAy != null && ogAy[idx] != result[idx])
                {
                    ErrorLog($"not same in {idx}");
                    goto ERROR;
                }
                idx++;
            }
        }
        return result;

    ERROR:
        {
            return null;
        }
    }
    public static void TestCase(byte[] orgin)
    {
        RunLengthCompression compreesion = new RunLengthCompression();

        byte[] encode = compreesion.Encode(orgin);
        byte[] decode = compreesion.Decode(encode);

        if (orgin.Length != decode.Length)
        {
            ErrorLog("not same len");
        }

        for (int i = 0; i < orgin.Length; i++)
        {
            if (orgin[i] != decode[i])
            {
                ErrorLog($"not same [{i}] ,{orgin[i]},{decode[i]}");
                continue;
            }
        }
    }
    public static void TestCase(ushort[] orgin)
    {
        RunLengthCompression compreesion = new RunLengthCompression();

        byte[] encode = compreesion.EncodeUshort(orgin);
        ushort[] decode = compreesion.DecodeUshort(encode);

        if (orgin.Length != decode.Length)
        {
            ErrorLog("not same len");
        }

        for (int i = 0; i < orgin.Length; i++)
        {
            if (orgin[i] != decode[i])
            {
                ErrorLog($"not same [{i}] ,{orgin[i]},{decode[i]}");
                continue;
            }
        }
    }

    static void ErrorLog(string errorLog, params string[] p)
    {
        UnityEngine.Debug.LogErrorFormat(errorLog, p);
    }
}


Test Code:


    using NUnit.Framework;

public class RunLengthCompressionTester
{
    [Test]
    public void ByteUshort123()
    {
        RunLengthCompression.TestCase(new ushort[] { 1, 2, 3 });
    }

    [Test]
    public void ByteUshort1212()
    {
        RunLengthCompression.TestCase(new ushort[] { 1, 2, 1, 2 });
    }

    [Test]
    public void ByteUshort655_655_1_1()
    {
        RunLengthCompression.TestCase(new ushort[] { 655, 655, 1, 1 });
    }
    [Test]
    public void ByteUshort1111_9999_9999_9999()
    {
        RunLengthCompression.TestCase(new ushort[] { 1111, 9999, 9999, 9999 });
    }

    [Test]
    public void Ushort6666Reapeat257Times()
    {
        ushort[] b = new ushort[257];
        for (int i = 0; i < 257; i++)
        {
            b[i] = 6666;
        }

        RunLengthCompression.TestCase(b);
    }

    [Test]
    public void Ushort6666Reapeat513Times()
    {
        ushort[] b = new ushort[513];
        for (int i = 0; i < 513; i++)
        {
            b[i] = 6666;
        }

        RunLengthCompression.TestCase(b);
    }

    [Test]
    public void Byte123()
    {
        RunLengthCompression.TestCase(new byte[] { 1, 2, 3 });
    }
    [Test]
    public void Byte1212()
    {
        RunLengthCompression.TestCase(new byte[] { 1, 2, 1, 2 });
    }
    [Test]
    public void Byte122()
    {
        RunLengthCompression.TestCase(new byte[] { 1, 2, 2 });
    }
    [Test]
    public void Byte1122()
    {
        RunLengthCompression.TestCase(new byte[] { 1, 1, 2, 2 });
    }
    [Test]
    public void Byte1112()
    {
        RunLengthCompression.TestCase(new byte[] { 1, 1, 1, 2 });
    }
    [Test]
    public void Byte12123()
    {
        RunLengthCompression.TestCase(new byte[] { 1, 2, 1, 2, 3 });
    }
    [Test]
    public void Byte12344()
    {
        RunLengthCompression.TestCase(new byte[] { 1, 2, 3, 4, 4 });
    }
    [Test]
    public void Byte257()
    {
        byte[] b = new byte[257];
        for (int i = 0; i < 256; i++)
        {
            b[i] = 1;
        }

        b[256] = 2;
        RunLengthCompression.TestCase(b);
    }
    [Test]
    public void Byte258()
    {
        byte[] b = new byte[258];
        for (int i = 0; i < 256; i++)
        {
            b[i] = 1;
        }

        b[256] = 2;
        b[257] = 2;
        RunLengthCompression.TestCase(b);
    }
    [Test]
    public void Byte259()
    {
        byte[] b = new byte[259];
        for (int i = 2; i < 259; i++)
        {
            b[i] = 1;
        }

        b[0] = 2;
        b[1] = 2;
        RunLengthCompression.TestCase(b);
    }
    [Test]
    public void Byte515()
    {
        byte[] b = new byte[515];
        for (int i = 2; i < 515; i++)
        {
            b[i] = 1;
        }

        b[0] = 2;
        b[1] = 2;
        RunLengthCompression.TestCase(b);
    }
}