Search code examples
c#performanceazureazure-blob-storageazure-caching

Techniques for storing objects larger than 8MB in Azure cache


Does anyone have any suggestions for how you could store objects that are larger than 8MB in the Azure Cache. In my case i use byte[] to store files in the blob. But if i could somehow split the byte[] into smaller chunks and store it as partial files and then perform a merge after retrieving the files from the cache.

Pseudokode:

Storing

bs <- split file into byte[] that are smaller than 8MB
s <- string[bs.Length]
foreach bs with index i
    s[i] <- name of bs[i]
    Add bs[i] to Azure cache using s[i] as key

Add s to cache

Retrieving:

s <- Get list of byte[] names
bs <- byte[s.Length]
foreach s with index i
   bs[i] <- Get byte[] using s[i]

outBs <- Join bs to one byte[]
  • Are there any performance issues here?

  • Is there any other way that out-performs Azure cache?


Solution

  • After a few hours of work i found that it is possible to split files that are large into smaller files and store it to Azure cache. I would like to share the code with you.

    Class for splitting and joining byte[]

        public class CacheHelper
        {
            private const int kMaxFileSize = 8000000;
            private readonly int fFileSize;
            private readonly string fFileName;
            public CacheHelper(int sizeOfFile, string nameOfFile)
            {
                fFileSize = sizeOfFile;
                fFileName = nameOfFile;
            }
    
            public CachingObjectHolder Split(byte[] file)
            {
                var remainingSize = file.Length;
                var partialList = new List<byte[]>();
                var partial = new byte[file.Length > kMaxFileSize ? kMaxFileSize : file.Length];
                for (int i = 0; i < file.Length; i++)
                {
                    if (i % kMaxFileSize == 0 && i > 0)
                    {
                        partialList.Add(partial);
                        partial = new byte[remainingSize > kMaxFileSize ? kMaxFileSize : remainingSize];
                    }
    
                    partial[i % kMaxFileSize] = file[i];
                    remainingSize--;
                }
    
                partialList.Add(partial);
    
                return new CachingObjectHolder(fFileName, partialList);
            }
    
            public static byte[] Join(CachingObjectHolder cachingObjectHolder)
            {
                var totalByteSize = cachingObjectHolder.Partials.Sum(x => x.Length);
                var output = new byte[totalByteSize];
                var globalCounter = 0;
                for (int i = 0; i < cachingObjectHolder.Partials.Count; i++)
                {
                    for (int j = 0; j < cachingObjectHolder.Partials[i].Length; j++)
                    {
                        output[globalCounter] = cachingObjectHolder.Partials[i][j];
                        globalCounter++;
                    }
                }
    
                return output;
            }
    
            public static byte[] CreateFile(int size)
            {
                var tempFile = Path.GetTempFileName();
                using (var stream = new FileStream(tempFile, FileMode.OpenOrCreate))
                {
                    using (var memStream = new MemoryStream())
                    {
                        stream.SetLength(size);
                        stream.CopyTo(memStream);
                        return memStream.ToArray();
                    }
                }
            }
        }
    

    Here is the code for communication with the Azure Cache

        public class Cache
        {
            private const string kFileListName = "FileList";
    
            public static DataCacheFactory DataCacheFactory
            {
                get
                {
                    return new DataCacheFactory();
                }
            }
    
            private static DataCache fDataCache;
            public static DataCache DataCache
            {
                get
                {
                    if(fDataCache == null)
                    {
                        fDataCache = DataCacheFactory.GetDefaultCache();
                    }
    
                    return fDataCache;
                }
            }
    
            public static byte[] Get(string name)
            {
                var dic = GetFileList();
                if (dic == null)
                {
                    return (byte[])DataCache.Get(name);
                }
                if (dic.ContainsKey(name))
                {
                    var list = dic[name];
                    var input = new List<byte[]>();
                    var cache = DataCache;
                    list = list.OrderBy(x => x.Item2).ToList();
                    for (int i = 0; i < list.Count; i++)
                    {
                        input.Add(cache.Get(list[i].Item1) as byte[]);
                    }
    
                    var holder = new CachingObjectHolder(name, input);
                    return CacheHelper.Join(holder);
                }
                else
                {
                    return (byte[])DataCache.Get(name);
                }
            }
    
            public static void Put(string name, byte[] file)
            {
                if (file.Length > CacheHelper.kMaxFileSize)
                {
                    var helper = new CacheHelper(file.Length, name);
                    var output = helper.Split(file);
                    var dic = GetFileList();
                    if (dic == null)
                    {
                        dic = new Dictionary<string, List<Tuple<string, int>>>();
                    }
    
                    var partials = new List<Tuple<string, int>>();
                    for (int i = 0; i < output.CachingObjects.Count; i++)
                    {
                        DataCache.Add(output.CachingObjects[i].Name, output.Partials[output.CachingObjects[i].Index]);
                        partials.Add(new Tuple<string, int>(output.CachingObjects[i].Name, 
                                                   output.CachingObjects[i].Index));   
                    }
    
                    dic.Add(name, partials.OrderBy(x => x.Item2).ToList());
                    PutFileList(dic);
                }
                else
                {
                    DataCache.Add(name, file);
                }
            }
    
            public static void Remove(string name)
            {
                var dic = GetFileList();
                if (dic == null)
                {
                    DataCache.Remove(name);
                    return;
                }
    
                if (dic.ContainsKey(name))
                {
                    var list = dic[name];
                    for (int i = 0; i < list.Count; i++)
                    {
                        DataCache.Remove(list[i].Item1);
                    }
    
                    dic.Remove(name);
                    PutFileList(dic);
                }
                else
                {
                    DataCache.Remove(name);
                }
            }
    
            private static void PutFileList(Dictionary<string, List<Tuple<string, int>>> input)
            {
                DataCache.Put(kFileListName, input);
            }
    
            private static Dictionary<string, List<Tuple<string, int>>> GetFileList()
            {
                return DataCache.Get(kFileListName) as Dictionary<string, List<Tuple<string, int>>>;
            }
        }
    

    Aaaand two classes used for data holders

        public class CachingObjectHolder
        {
            public readonly List<byte[]> Partials;
            public readonly List<CachingObject> CachingObjects;
            public readonly string CacheName;
    
            public CachingObjectHolder(string name, List<byte[]> partialList)
            {
                Partials = partialList;
                CacheName = name;
                CachingObjects = new List<CachingObject>();
                CreateCachingObjects();
            }
    
            private void CreateCachingObjects()
            {
                for (int i = 0; i < Partials.Count; i++)
                {
                    CachingObjects.Add(new CachingObject(string.Format("{0}_{1}", CacheName, i), i));
                }
            }
        }
    
        public class CachingObject
        {
            public int Index { get; set; }
            public string Name { get; set; }
    
            public CachingObject(string name, int index)
            {
                Index = index;
                Name = name;
            }
        }
    

    Here are the results from testing the solution on the cloud. The R/W times are in ms. Results from live testing