Merge pull request #77 from Sergio0694/bugfixes

Sergio0694 · web-flow · commit ceb73adc46c1 · 2018-01-26T11:51:13.000+01:00
Bug fixes
diff --git a/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs b/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs
@@ -92,6 +92,7 @@ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input, (int X, int
             int
                 h = (input.Height - field.X + 2 * VerticalPadding) / VerticalStride + 1,
                 w = (input.Width - field.Y + 2 * HorizontalPadding) / HorizontalStride + 1;
+            if (h <= 0 || w <= 0) throw new InvalidOperationException("The input convolution kernels can't be applied to the input tensor shape");
             return new TensorInfo(h, w, kernels);
         }
 
diff --git a/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs b/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs
@@ -99,6 +99,7 @@ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input)
             int
                 h = (input.Height - WindowHeight + 2 * VerticalPadding) / VerticalStride + 1,
                 w = (input.Width - WindowWidth + 2 * HorizontalPadding) / HorizontalStride + 1;
+            if (h <= 0 || w <= 0) throw new InvalidOperationException("The input tensor shape is not valid to apply the current pooling operation");
             return new TensorInfo(h, w, input.Channels);
         }
 
diff --git a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs
@@ -40,7 +40,6 @@ namespace NeuralNetworkNET.APIs.Structs
         [JsonProperty(nameof(Size), Order = 4)]
         public int Size
         {
-            [Pure]
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
             get => Height * Width * Channels;
         }
@@ -50,18 +49,27 @@ public int Size
         /// </summary>
         public int SliceSize
         {
-            [Pure]
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
             get => Height * Width;
         }
 
+        /// <summary>
+        /// Gets whether the current <see cref="Tensor"/> instance is invalid (empty or with invalid parameters)
+        /// </summary>
+        public bool IsEmptyOrInvalid
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get => Height <= 0 || Width <= 0 || Channels <= 0;
+        }
+
         #endregion
 
         #region Constructors
 
         internal TensorInfo(int height, int width, int channels)
         {
-            if (height * width <= 0) throw new ArgumentException("The height and width of the kernels must be positive values");
+            if (height <= 0 || width <= 0) throw new ArgumentException("The height and width of the kernels must be positive values");
+            if (channels <= 0) throw new ArgumentException("The number of channels must be positive");
             Height = height;
             Width = width;
             Channels = channels >= 1 ? channels :  throw new ArgumentOutOfRangeException(nameof(channels), "The number of channels must be at least equal to 1");
diff --git a/NeuralNetwork.NET/Networks/Graph/ComputationGraph.cs b/NeuralNetwork.NET/Networks/Graph/ComputationGraph.cs
@@ -156,6 +156,8 @@ void BuildMap(NodeBuilder node, Guid id)
                         }
                         else
                         {
+                            if (parents.Skip(1).Any(p => p.Info.Height != parents[0].Info.Height || p.Info.Width != parents[0].Info.Width))
+                                throw new ComputationGraphBuildException("The inputs of a depth concatenation node must all have the same height and width");
                             shape = TensorInfo.Volume(parents[0].Info.Height, parents[0].Info.Width, parents.Sum(p => p.Info.Channels));
                             next = new DepthConcatenationNode(parents.Select(t => t.Node).ToArray());
                         }
@@ -309,14 +311,14 @@ int[] GetIndexes(IEnumerable<IComputationGraphNode> nodes, IReadOnlyList<IComput
                     !GetIndexes(n1.Children, Nodes).SequenceEqual(GetIndexes(n2.Children, other.Nodes))) return false;
                 switch (n1)
                 {
-                    case DepthConcatenationNode merge:
-                        if (!GetIndexes(merge.Parents, Nodes).SequenceEqual(GetIndexes(n2.To<IComputationGraphNode, DepthConcatenationNode>().Parents, other.Nodes))) return false;
+                    case MergeNodeBase merge:
+                        if (!GetIndexes(merge.Parents, Nodes).SequenceEqual(GetIndexes(n2.To<IComputationGraphNode, MergeNodeBase>().Parents, other.Nodes))) return false;
                         break;
                     case ProcessingNode processing:
                         if (Nodes.IndexOf(processing.Parent) != other.Nodes.IndexOf(n2.To<IComputationGraphNode, ProcessingNode>().Parent)) return false;
                         break;
                     case TrainingNode split:
-                        if (Nodes.IndexOf(split.Parent) != other.Nodes.IndexOf(n2.To<IComputationGraphNode, ProcessingNode>().Parent)) return false;
+                        if (Nodes.IndexOf(split.Parent) != other.Nodes.IndexOf(n2.To<IComputationGraphNode, TrainingNode>().Parent)) return false;
                         break;
                     case InputNode _: break;
                     default: throw new InvalidOperationException("The graph contains an invalid node");
diff --git a/NeuralNetwork.NET/Networks/Implementations/ComputationGraphNetwork.cs b/NeuralNetwork.NET/Networks/Implementations/ComputationGraphNetwork.cs
@@ -68,14 +68,13 @@ protected override void Forward(in Tensor x, out Tensor yHat)
             using (TensorMap<IComputationGraphNode> aMap = new TensorMap<IComputationGraphNode> { [Graph.Root] = x })
             {
                 // Recursive forward function
-                Tensor xc = x; // Local copy for closure
                 void Forward(IComputationGraphNode node)
                 {
                     switch (node)
                     {
                         case ProcessingNode processing:
                         {
-                            processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(processing.Parent is InputNode ? xc : aMap[processing.Parent], out Tensor z, out Tensor a);
+                            processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(aMap[processing.Parent], out Tensor z, out Tensor a);
                             z.Free();
                             aMap[processing] = a;
                             if (processing == Graph.OutputNode) return;
@@ -145,7 +144,7 @@ void Forward(IComputationGraphNode node)
                         {
                             case ProcessingNode processing:
                             {
-                                processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(processing.Parent is InputNode ? x : aMap[processing.Parent], out Tensor z, out Tensor a);
+                                processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(aMap[processing.Parent], out Tensor z, out Tensor a);
                                 zMap[processing] = z;
                                 aMap[processing] = a;
                                 if (processing.Layer.LayerType == LayerType.FullyConnected && dropout > 0)
@@ -217,7 +216,19 @@ void Backward(IComputationGraphNode node)
                         bool linked = false;
                         if (node.Children.Count == 1)
                         {
-                            if (node.Type == ComputationGraphNodeType.Processing)
+                            if (node.Children[0] is DepthConcatenationNode merge)
+                            {
+                                int offset = 0, length = -1;
+                                for (int j = 0; j < merge.Parents.Count; j++)
+                                {
+                                    length = aMap[merge.Parents[j]].Length;
+                                    if (merge.Parents[j] == node) break;
+                                    offset += j == 0 ? 0 : aMap[merge.Parents[j - 1]].Length;
+                                }
+                                Tensor.New(x.Entities, length, out dy);
+                                CpuDnn.DepthConcatenationBackward(dMap[merge], offset, dy);
+                            }
+                            else if (node.Type == ComputationGraphNodeType.Processing)
                             {
                                 dy = dMap[node.Children[0]];
                                 linked = true; // Just use a shallow copy, but mark it as non-disposable
@@ -242,9 +253,9 @@ void Backward(IComputationGraphNode node)
                                     int offset = 0, length = -1;
                                     for (int j = 0; j < merge.Parents.Count; j++)
                                     {
+                                        length = aMap[merge.Parents[j]].Length;
                                         if (merge.Parents[j] == node) break;
                                         offset += j == 0 ? 0 : aMap[merge.Parents[j - 1]].Length;
-                                        length = aMap[merge.Parents[j]].Length;
                                     }
                                     Tensor.New(x.Entities, length, out dyt[i]);
                                     CpuDnn.DepthConcatenationBackward(dMap[merge], offset, dyt[i]);
@@ -324,7 +335,7 @@ void Backward(IComputationGraphNode node)
                                 break;
                             case SumNode sum:
                             {
-                                Tensor.Like(zMap[node], out Tensor dx);
+                                Tensor.Like(dy, out Tensor dx); // Inputs and outputs have the same shape for sum nodes
                                 sum.Backpropagate(zMap[node], dy, dx);
                                 dy.Free();
                                 dMap[node] = dx;
@@ -434,7 +445,7 @@ void Forward(IComputationGraphNode node)
                         {
                             case ProcessingNode processing:
                             {
-                                processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(processing.Parent is InputNode ? xc : aMap[processing.Parent], out Tensor z, out Tensor a);
+                                processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(aMap[processing.Parent], out Tensor z, out Tensor a);
                                 zMap[processing] = z;
                                 aMap[processing] = a;
                                 break;
diff --git a/NeuralNetwork.NET/Networks/Layers/Abstract/NetworkLayerBase.cs b/NeuralNetwork.NET/Networks/Layers/Abstract/NetworkLayerBase.cs
@@ -1,4 +1,5 @@
-﻿using System.IO;
+﻿using System;
+using System.IO;
 using System.Runtime.CompilerServices;
 using JetBrains.Annotations;
 using NeuralNetworkNET.APIs.Enums;
@@ -58,8 +59,8 @@ public ref readonly TensorInfo OutputInfo
 
         protected NetworkLayerBase(in TensorInfo input, in TensorInfo output, ActivationFunctionType activation)
         {
-            _InputInfo = input;
-            _OutputInfo = output;
+            _InputInfo = input.IsEmptyOrInvalid ? throw new ArgumentException("The layer input info is not valid", nameof(input)) : input;
+            _OutputInfo = output.IsEmptyOrInvalid ? throw new ArgumentException("The layer output info is not valid", nameof(output)) : output;
             ActivationFunctionType = activation;
             ActivationFunctions = ActivationFunctionProvider.GetActivations(activation);
         }
diff --git a/NeuralNetwork.NET/Networks/Layers/Abstract/OutputLayerBase.cs b/NeuralNetwork.NET/Networks/Layers/Abstract/OutputLayerBase.cs
@@ -82,6 +82,7 @@ public virtual unsafe void Backpropagate(in Tensor x, in Tensor yHat, in Tensor
             dw.Reshape(1, dw.Size, out dJdw); // Flatten the result
             Tensor.New(1, Biases.Length, out dJdb);
             CpuDnn.FullyConnectedBackwardBias(dy, dJdb);
+            dy.Free();
         }
 
         /// <summary>
diff --git a/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnFullyConnectedLayer.cs b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnFullyConnectedLayer.cs
@@ -68,7 +68,7 @@ public override void Backpropagate(in Tensor x, in Tensor y, in Tensor dy, in Te
                 // Gradient
                 DnnInstance.FullyConnectedBackwardFilter(x.Entities, x.Length, dy.Length, x_gpu.Ptr, dy_gpu.Ptr, w_gpu.Ptr);
                 w_gpu.CopyToHost(1, Weights.Length, out dJdw);
-                DnnInstance.FullyConnectedBackwardBias(dy.Entities, dy.Length, dy_gpu.Ptr, dJdb_gpu.Ptr); // Doing this on CPU is generally faster than launching the kernels
+                DnnInstance.FullyConnectedBackwardBias(dy.Entities, dy.Length, dy_gpu.Ptr, dJdb_gpu.Ptr);
                 dJdb_gpu.CopyToHost(1, Biases.Length, out dJdb);
             }
         }
diff --git a/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnSoftmaxLayer.cs b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnSoftmaxLayer.cs
@@ -68,6 +68,30 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
             }
         }
 
+        /// <inheritdoc/>
+        public override void Backpropagate(in Tensor x, in Tensor yHat, in Tensor y, in Tensor z, in Tensor dx, out Tensor dJdw, out Tensor dJdb)
+        {
+            using (DeviceMemory<float>
+                yHat_gpu = DnnInstance.Gpu.AllocateDevice(yHat),
+                dy_gpu = DnnInstance.Gpu.AllocateDevice(y),
+                w_gpu = DnnInstance.Gpu.AllocateDevice(Weights), // Shared for the weights and dJdw, for better efficiency
+                x_gpu = DnnInstance.Gpu.AllocateDevice(x),
+                dx_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Size),
+                dJdb_gpu = DnnInstance.Gpu.AllocateDevice<float>(Biases.Length))
+            {
+                // The derivative is just yHat - y
+                DnnInstance.AddTensor(1, SoftmaxInfo, yHat_gpu.Ptr, -1, SoftmaxInfo, dy_gpu.Ptr);
+                DnnInstance.FullyConnectedBackwardData(y.Entities, InputInfo.Size, OutputInfo.Size, dy_gpu.Ptr, w_gpu.Ptr, dx_gpu.Ptr);
+                dx_gpu.CopyTo(dx);
+
+                // Gradient
+                DnnInstance.FullyConnectedBackwardFilter(x.Entities, x.Length, y.Length, x_gpu.Ptr, dy_gpu.Ptr, w_gpu.Ptr);
+                w_gpu.CopyToHost(1, Weights.Length, out dJdw);
+                DnnInstance.FullyConnectedBackwardBias(y.Entities, y.Length, dy_gpu.Ptr, dJdb_gpu.Ptr); // Doing this on CPU is generally faster than launching the kernels
+                dJdb_gpu.CopyToHost(1, Biases.Length, out dJdb);
+            }
+        }
+
         #endregion
 
         /// <summary>
diff --git a/NeuralNetwork.NET/cpuDNN/CpuDnn{Convolution}.cs b/NeuralNetwork.NET/cpuDNN/CpuDnn{Convolution}.cs
@@ -35,7 +35,6 @@ public static unsafe void ConvolutionForward(
                 kSize = kw / wInfo.Channels,
                 kHeight = wInfo.Height,
                 kWidth = wInfo.Width;
-            if (kHeight < 2 || kWidth < 2) throw new ArgumentException("The kernel must be at least 2x2", nameof(w));
             int
                 n = x.Entities,
                 l = x.Length,
@@ -130,7 +129,6 @@ public static unsafe void ConvolutionBackwardData(
                 kHeight = wInfo.Height,
                 kWidth = wInfo.Width,
                 kDepth = wInfo.Channels;
-            if (kHeight < 2 || kWidth < 2) throw new ArgumentException("The kernel must be at least 2x2", nameof(w));
             int
                 n = dy.Entities,
                 l = dy.Length,
@@ -229,7 +227,6 @@ public static unsafe void ConvolutionBackwardFilter(
                 kSize = kw / dyInfo.Channels,
                 kHeight = dyInfo.Height,
                 kWidth = dyInfo.Width;
-            if (kHeight < 2 || kWidth < 2) throw new ArgumentException(nameof(dy), "The kernel must be at least 2x2");
             int
                 n = x.Entities,
                 l = x.Length,
diff --git a/NeuralNetwork.NET/cpuDNN/CpuDnn{Main}.cs b/NeuralNetwork.NET/cpuDNN/CpuDnn{Main}.cs
@@ -244,28 +244,30 @@ public static unsafe void DepthConcatenationForward(Span<Tensor> inputs, in Tens
             int* offsets = stackalloc int[inputs.Length];
             fixed (Tensor* p = &inputs.DangerousGetPinnableReference())
             {
+                // Extract input info
                 for (int i = 0; i < inputs.Length; i++)
                 {
                     offsets[i] = count;
                     count += p[i].Length;
                     if (p[i].Entities != y.Entities) throw new ArgumentException("The number of samples must be the same for all tensors");
                 }
-            }
-            if (y.Length != count) throw new ArgumentException("The target tensor doesn't have the right size", nameof(y));
+                if (y.Length != count) throw new ArgumentException("The target tensor doesn't have the right size", nameof(y));
 
-            // Concatenate the tensors in parallel
-            float* py = y;
-            void Kernel(int i)
-            {
-                float*
-                    psource = inputs[i],
-                    ptarget = py + offsets[i];
-                int l = inputs[i].Length;
-                long bytes = sizeof(float) * l;
-                for (int j = 0; j < n; j++, psource += l, ptarget += count)
-                    Buffer.MemoryCopy(psource, ptarget, bytes, bytes);
+                // Concatenate the tensors in parallel
+                float* py = y;
+                Tensor* pf = p; // Local copy for closure
+                void Kernel(int i)
+                {
+                    float*
+                        psource = pf[i],
+                        ptarget = py + offsets[i];
+                    int l = pf[i].Length;
+                    long bytes = sizeof(float) * l;
+                    for (int j = 0; j < n; j++, psource += l, ptarget += count)
+                        Buffer.MemoryCopy(psource, ptarget, bytes, bytes);
+                }
+                Parallel.For(0, inputs.Length, Kernel).AssertCompleted();
             }
-            Parallel.For(0, inputs.Length, Kernel).AssertCompleted();
         }
 
         /// <summary>
@@ -284,12 +286,10 @@ public static unsafe void DepthConcatenationBackward(in Tensor dy, int offset, i
             // Backpropagate in parallel
             float* pdy = dy, pdx = dx;
             int
-                l = dx.Length,
-                pitch = dy.Length,
-                left = sizeof(float) * offset,
-                dyRowSize = sizeof(float) * pitch,
-                dxRowSize = sizeof(float) * l;
-            void Kernel(int i) => Buffer.MemoryCopy(pdy + dyRowSize * i + left,pdx + i * dxRowSize, dxRowSize, dxRowSize);
+                xl = dx.Length,
+                yl = dy.Length,
+                bytes = sizeof(float) * xl;
+            void Kernel(int i) => Buffer.MemoryCopy(pdy + yl * i + offset, pdx + i * xl, bytes, bytes);
             Parallel.For(0, dy.Entities, Kernel);
         }
 
diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
@@ -74,12 +74,13 @@ private static unsafe void TestBackward(OutputLayerBase cpu, OutputLayerBase gpu
                 Tensor.Like(x, out Tensor dx1);
                 Tensor.Like(x, out Tensor dx2);
                 cpu.Forward(x, out Tensor z_cpu, out Tensor a_cpu);
+                gpu.Forward(x, out Tensor z_gpu, out Tensor a_gpu);
                 cpu.Backpropagate(x, a_cpu, yTensor, z_cpu, dx1, out Tensor dJdw_cpu, out Tensor dJdb_cpu);
                 gpu.Backpropagate(x, a_cpu, yTensor, z_cpu, dx2, out Tensor dJdw_gpu, out Tensor dJdb_gpu);
                 Assert.IsTrue(dx1.ContentEquals(dx2));
                 Assert.IsTrue(dJdw_cpu.ContentEquals(dJdw_gpu));
                 Assert.IsTrue(dJdb_cpu.ContentEquals(dJdb_gpu, 1e-4f, 1e-5f));
-                Tensor.Free(x, dy, dx1, dx2, z_cpu, a_cpu, dJdw_cpu, dJdw_gpu, dJdb_cpu, dJdb_gpu);
+                Tensor.Free(x, dy, dx1, dx2, z_cpu, a_cpu, z_gpu, a_gpu, dJdw_cpu, dJdw_gpu, dJdb_cpu, dJdb_gpu);
             }
         }
 
diff --git a/Unit/NeuralNetwork.NET.Unit/GraphNetworkTest.cs b/Unit/NeuralNetwork.NET.Unit/GraphNetworkTest.cs

Original file line number	Diff line number	Diff line change
`@@ -92,6 +92,7 @@ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input, (int X, int`
`92`	`92`	`int`
`93`	`93`	`h = (input.Height - field.X + 2 * VerticalPadding) / VerticalStride + 1,`
`94`	`94`	`w = (input.Width - field.Y + 2 * HorizontalPadding) / HorizontalStride + 1;`
	`95`	`+ if (h <= 0 \|\| w <= 0) throw new InvalidOperationException("The input convolution kernels can't be applied to the input tensor shape");`
`95`	`96`	`return new TensorInfo(h, w, kernels);`
`96`	`97`	`}`
`97`	`98`
Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,7 @@ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input)`
`99`	`99`	`int`
`100`	`100`	`h = (input.Height - WindowHeight + 2 * VerticalPadding) / VerticalStride + 1,`
`101`	`101`	`w = (input.Width - WindowWidth + 2 * HorizontalPadding) / HorizontalStride + 1;`
	`102`	`+ if (h <= 0 \|\| w <= 0) throw new InvalidOperationException("The input tensor shape is not valid to apply the current pooling operation");`
`102`	`103`	`return new TensorInfo(h, w, input.Channels);`
`103`	`104`	`}`
`104`	`105`
Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,7 @@ public virtual unsafe void Backpropagate(in Tensor x, in Tensor yHat, in Tensor`
`82`	`82`	`dw.Reshape(1, dw.Size, out dJdw); // Flatten the result`
`83`	`83`	`Tensor.New(1, Biases.Length, out dJdb);`
`84`	`84`	`CpuDnn.FullyConnectedBackwardBias(dy, dJdb);`
	`85`	`+ dy.Free();`
`85`	`86`	`}`
`86`	`87`
`87`	`88`	`/// <summary>`
Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ public override void Backpropagate(in Tensor x, in Tensor y, in Tensor dy, in Te`
`68`	`68`	`// Gradient`
`69`	`69`	`DnnInstance.FullyConnectedBackwardFilter(x.Entities, x.Length, dy.Length, x_gpu.Ptr, dy_gpu.Ptr, w_gpu.Ptr);`
`70`	`70`	`w_gpu.CopyToHost(1, Weights.Length, out dJdw);`
`71`		`- DnnInstance.FullyConnectedBackwardBias(dy.Entities, dy.Length, dy_gpu.Ptr, dJdb_gpu.Ptr); // Doing this on CPU is generally faster than launching the kernels`
	`71`	`+ DnnInstance.FullyConnectedBackwardBias(dy.Entities, dy.Length, dy_gpu.Ptr, dJdb_gpu.Ptr);`
`72`	`72`	`dJdb_gpu.CopyToHost(1, Biases.Length, out dJdb);`
`73`	`73`	`}`
`74`	`74`	`}`