Search code examples
c#hdf5hdf5dotnet

How to make a compound datatype with HDF5DOTNET?


I have problems when I write a struct with arrays in it into an HDF5 dataset. Firstly, the window form doesn't start with the line:

H5T.insert(typeStruct, "string", 0, H5T.create_array(new H5DataTypeId(H5T.H5Type.C_S1), dims2));

The window form at least starts without the line, so I think there's something wrong with defining the compound datatype. I've looked into manuals and many examples, but I can't still fix the problems. Could I get an example of using compound datatypes to write a struct with multiple arrays in C#?

using HDF5DotNet;
using System.Globalization;
using System.IO;
using System.Runtime.InteropServices;
using System.Reflection;


namespace WindowsFormsApplication1
{
    public unsafe partial class Form1 : Form
    {

        public unsafe struct struct_TR
        {
            public string[] arr_currentLong;

            public struct_TR(byte size_currentTime)
            {
                arr_currentLong = new string[size_currentTime];
            }
        }


        public Form1()
        {
            InitializeComponent();

            long ARRAY_SIZE = 255;
            struct_TR structMade = new struct_TR(255);

            for (int i = 0; i < 255; i++)
            {
                structMade.arr_currentLong[i] = i.ToString();
            }

            string currentPath = Path.GetDirectoryName(Application.ExecutablePath);     
            Directory.SetCurrentDirectory(currentPath);                                 


            H5FileId fileId = H5F.create(@"weights.h5", H5F.CreateMode.ACC_TRUNC);

            long[] dims1 = { 1 };
            long[] dims2 = { 1, ARRAY_SIZE };

            H5DataSpaceId myDataSpace = H5S.create_simple(1, dims1);

            H5DataTypeId string_type = H5T.copy(H5T.H5Type.C_S1);
            H5DataTypeId array_tid1 = H5T.create_array(string_type, dims2);

            H5DataTypeId typeStruct = H5T.create(H5T.CreateClass.COMPOUND, Marshal.SizeOf(typeof(struct_TR)));
            H5T.insert(typeStruct, "string", 0, H5T.create_array(new H5DataTypeId(H5T.H5Type.C_S1), dims2));

            H5DataSetId myDataSet = H5D.create(fileId, "/dset", typeStruct, myDataSpace);

            H5D.writeScalar<struct_TR>(myDataSet, typeStruct, ref structMade);

        }
    }
}


Solution

  • the only way I know how to save structs with arrays is to create an array that is constant So for example this is a struct with an array of length 4.

        [StructLayout(LayoutKind.Sequential)]
        public struct Responses
        {
            public Int64 MCID;
            public int PanelIdx;
            [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)]
            public short[] ResponseValues;
        }
    

    Here an array of 4 structs containing an array is created:

           responseList = new Responses[4] {
                new Responses() { MCID=1,PanelIdx=5,ResponseValues=new short[4]{ 1,2,3,4} },
                new Responses() { MCID=2,PanelIdx=6,ResponseValues=new short[4]{ 5,6,7,8}},
                new Responses() { MCID=3,PanelIdx=7,ResponseValues=new short[4]{ 1,2,3,4}},
                new Responses() { MCID=4,PanelIdx=8,ResponseValues=new short[4]{ 5,6,7,8}}
            };
    

    The following lines of code write an array of structs to a HDF5 file:

            string filename = "testArrayCompounds.H5";
            var fileId =H5F.create(filename, H5F.ACC_TRUNC);
            var status = WriteCompounds(fileId, "/test", responseList);
            H5F.close(fileId);
    

    The WriteCompounds method looks like this:

        public static int WriteCompounds<T>(hid_t groupId, string name, IEnumerable<T> list) //where T : struct
        {
            Type type = typeof(T);
            var size = Marshal.SizeOf(type);
            var cnt = list.Count();
    
            var typeId = CreateType(type);
    
            var log10 = (int)Math.Log10(cnt);
            ulong pow = (ulong)Math.Pow(10, log10);
            ulong c_s = Math.Min(1000, pow);
            ulong[] chunk_size = new ulong[] { c_s };
    
            ulong[] dims = new ulong[] { (ulong)cnt };
    
            long dcpl = 0;
            if (!list.Any() || log10 == 0) { }
            else
            {
                dcpl = CreateProperty(chunk_size);
            }
    
            // Create dataspace.  Setting maximum size to NULL sets the maximum
            // size to be the current size.
            var spaceId = H5S.create_simple(dims.Length, dims, null);
    
            // Create the dataset and write the compound data to it.
            var datasetId = H5D.create(groupId, name, typeId, spaceId, H5P.DEFAULT, dcpl);
    
            IntPtr p = Marshal.AllocHGlobal(size * (int)dims[0]);
    
            var ms = new MemoryStream();
            BinaryWriter writer = new BinaryWriter(ms);
            foreach (var strct in list)
                writer.Write(getBytes(strct));
            var bytes = ms.ToArray();
    
            GCHandle hnd = GCHandle.Alloc(bytes, GCHandleType.Pinned);
            var statusId = H5D.write(datasetId, typeId, spaceId, H5S.ALL,
                H5P.DEFAULT, hnd.AddrOfPinnedObject());
    
            hnd.Free();
            /*
             * Close and release resources.
             */
            H5D.close(datasetId);
            H5S.close(spaceId);
            H5T.close(typeId);
            H5P.close(dcpl);
            Marshal.FreeHGlobal(p);
            return statusId;
        }
    

    Three additional help functions are needed, two are shown here:

        private static long CreateType(Type t)
        {
            var size = Marshal.SizeOf(t);
            var float_size = Marshal.SizeOf(typeof(float));
            var int_size = Marshal.SizeOf(typeof(int));
            var typeId = H5T.create(H5T.class_t.COMPOUND, new IntPtr(size));
    
            var compoundInfo = Hdf5.GetCompoundInfo(t);
            foreach (var cmp in compoundInfo)
            {
                H5T.insert(typeId, cmp.name, Marshal.OffsetOf(t, cmp.name), cmp.datatype);
            }
            return typeId;
        }
    
        private static long CreateProperty(ulong[] chunk_size)
        {
            var dcpl = H5P.create(H5P.DATASET_CREATE);
            H5P.set_layout(dcpl, H5D.layout_t.CHUNKED);
            H5P.set_chunk(dcpl, chunk_size.Length, chunk_size);
            H5P.set_deflate(dcpl, 6);
            return dcpl;
        }
    

    I also have a ReadCompounds to read the hdf5 file. The Hdf5.GetCompoundInfo method used in the CreateType method is also very long. So I won't show these methods here.

    So that's quite a lot of code just for writing some structs. I have made a library called HDF5DotnetTools that allows you to read and write classes and structs much more easily. There you can also find the ReadCompounds and GetCompoundInfo methods.

    In the unit tests of the HDF5DotnetTools you can also find examples of how to write classes with arrays