Search code examples
c#cntk

How to "flatten" variable


Sometimes, usually after convolution layer, there can be found shapes in form ( width, height, depth) where depth is a number of filters from convolution operation.

I want to reproduce GoogleNet inception module and "squish" (width, height, depth) to (width, height, f(depth)) where f would produce a scalar value.

I know there is CNTKLib.Splice but that is not exactly what I need. I need to get a weighted sum of all values in the column with (x, y) coordinates.

How can that be done in C# API?

edit: added code sample

    public static void PrintOutputDims(Function source)
    {
        var shape = source.Output.Shape;

        var sb = new string[shape.Rank];
        for (var i = 0; i < shape.Rank; ++i)
        {
            sb[i] = ($"dim{i}: {shape[i]}");
        }

        Console.WriteLine(string.Join(", ", sb));
    }

    static void Main(string[] args)
    {
        var variable = CNTKLib.InputVariable(NDShape.CreateNDShape(new[] { 100, 100, 20 }), DataType.Float, "source");
        PrintOutputDims(variable); // dim0: 100, dim1: 100, dim2: 20
        var squished = Squish(variable);
        PrintOutputDims(variable); // dim0: 100, dim1: 100, dim2: 1
    }

How Squish function may be implemented?


Solution

  • the answer would be something like this:

        public static Function SpatialReduceWeightedSum(this Function source, DeviceDescriptor device)
        {
            var sourceShape = source.Output.Shape;
            if (sourceShape.Rank != 3)
            {
                throw new ArgumentException("exected rank = 3 but was: " + sourceShape.Rank);
            }
    
            var sourceDimensions = sourceShape.Dimensions;
            var blocksCount = sourceDimensions[0] * sourceDimensions[1];
            var temporaryDimensions = new[]
                                          {
                                              blocksCount,
                                              sourceDimensions[2]
                                          };
            var temporatyShape = NDShape.CreateNDShape(temporaryDimensions);
            var reshaped = CNTKLib.Reshape(source, temporatyShape);
    
            var initializer = CNTKLib.ConstantInitializer(1d);
            var axis0 = new Axis(0);
            var axis1 = new Axis(1);
            var axisVector = new AxisVector() { axis0 };
            var weightedSums = new Variable[blocksCount];
            for (var i = 0; i < blocksCount; i++)
            {
                var beginIndex = new IntVector() { i };
                var endIndex = new IntVector() { i + 1 };
                var block = CNTKLib.Slice(reshaped, axisVector, beginIndex, endIndex);
                var blockShape = NDShape.CreateNDShape(block.Output.Shape.Dimensions.Reverse());
                var blockParameters = new Parameter(blockShape, DataType.Float, initializer, device);
    
                var weightedBlock = CNTKLib.Times(block, blockParameters);
                weightedSums[i] = CNTKLib.ReduceSum(weightedBlock, axis1);
            }
    
            var combined = CNTKLib.Splice(new VariableVector(weightedSums), axis0);
    
            var flatShapeDimensions = new[]
                                          {
                                              sourceDimensions[0],
                                              sourceDimensions[1],
                                              1
                                          };
            var flatShape = NDShape.CreateNDShape(flatShapeDimensions);
    
            return CNTKLib.Reshape(combined, flatShape);
        }