Skip to content

Commit ceb73ad

Browse files
authored
Merge pull request #77 from Sergio0694/bugfixes
Bug fixes
2 parents 566b05a + 68631fd commit ceb73ad

File tree

13 files changed

+149
-57
lines changed

13 files changed

+149
-57
lines changed

NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input, (int X, int
9292
int
9393
h = (input.Height - field.X + 2 * VerticalPadding) / VerticalStride + 1,
9494
w = (input.Width - field.Y + 2 * HorizontalPadding) / HorizontalStride + 1;
95+
if (h <= 0 || w <= 0) throw new InvalidOperationException("The input convolution kernels can't be applied to the input tensor shape");
9596
return new TensorInfo(h, w, kernels);
9697
}
9798

NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input)
9999
int
100100
h = (input.Height - WindowHeight + 2 * VerticalPadding) / VerticalStride + 1,
101101
w = (input.Width - WindowWidth + 2 * HorizontalPadding) / HorizontalStride + 1;
102+
if (h <= 0 || w <= 0) throw new InvalidOperationException("The input tensor shape is not valid to apply the current pooling operation");
102103
return new TensorInfo(h, w, input.Channels);
103104
}
104105

NeuralNetwork.NET/APIs/Structs/TensorInfo.cs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ namespace NeuralNetworkNET.APIs.Structs
4040
[JsonProperty(nameof(Size), Order = 4)]
4141
public int Size
4242
{
43-
[Pure]
4443
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4544
get => Height * Width * Channels;
4645
}
@@ -50,18 +49,27 @@ public int Size
5049
/// </summary>
5150
public int SliceSize
5251
{
53-
[Pure]
5452
[MethodImpl(MethodImplOptions.AggressiveInlining)]
5553
get => Height * Width;
5654
}
5755

56+
/// <summary>
57+
/// Gets whether the current <see cref="Tensor"/> instance is invalid (empty or with invalid parameters)
58+
/// </summary>
59+
public bool IsEmptyOrInvalid
60+
{
61+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
62+
get => Height <= 0 || Width <= 0 || Channels <= 0;
63+
}
64+
5865
#endregion
5966

6067
#region Constructors
6168

6269
internal TensorInfo(int height, int width, int channels)
6370
{
64-
if (height * width <= 0) throw new ArgumentException("The height and width of the kernels must be positive values");
71+
if (height <= 0 || width <= 0) throw new ArgumentException("The height and width of the kernels must be positive values");
72+
if (channels <= 0) throw new ArgumentException("The number of channels must be positive");
6573
Height = height;
6674
Width = width;
6775
Channels = channels >= 1 ? channels : throw new ArgumentOutOfRangeException(nameof(channels), "The number of channels must be at least equal to 1");

NeuralNetwork.NET/Networks/Graph/ComputationGraph.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ void BuildMap(NodeBuilder node, Guid id)
156156
}
157157
else
158158
{
159+
if (parents.Skip(1).Any(p => p.Info.Height != parents[0].Info.Height || p.Info.Width != parents[0].Info.Width))
160+
throw new ComputationGraphBuildException("The inputs of a depth concatenation node must all have the same height and width");
159161
shape = TensorInfo.Volume(parents[0].Info.Height, parents[0].Info.Width, parents.Sum(p => p.Info.Channels));
160162
next = new DepthConcatenationNode(parents.Select(t => t.Node).ToArray());
161163
}
@@ -309,14 +311,14 @@ int[] GetIndexes(IEnumerable<IComputationGraphNode> nodes, IReadOnlyList<IComput
309311
!GetIndexes(n1.Children, Nodes).SequenceEqual(GetIndexes(n2.Children, other.Nodes))) return false;
310312
switch (n1)
311313
{
312-
case DepthConcatenationNode merge:
313-
if (!GetIndexes(merge.Parents, Nodes).SequenceEqual(GetIndexes(n2.To<IComputationGraphNode, DepthConcatenationNode>().Parents, other.Nodes))) return false;
314+
case MergeNodeBase merge:
315+
if (!GetIndexes(merge.Parents, Nodes).SequenceEqual(GetIndexes(n2.To<IComputationGraphNode, MergeNodeBase>().Parents, other.Nodes))) return false;
314316
break;
315317
case ProcessingNode processing:
316318
if (Nodes.IndexOf(processing.Parent) != other.Nodes.IndexOf(n2.To<IComputationGraphNode, ProcessingNode>().Parent)) return false;
317319
break;
318320
case TrainingNode split:
319-
if (Nodes.IndexOf(split.Parent) != other.Nodes.IndexOf(n2.To<IComputationGraphNode, ProcessingNode>().Parent)) return false;
321+
if (Nodes.IndexOf(split.Parent) != other.Nodes.IndexOf(n2.To<IComputationGraphNode, TrainingNode>().Parent)) return false;
320322
break;
321323
case InputNode _: break;
322324
default: throw new InvalidOperationException("The graph contains an invalid node");

NeuralNetwork.NET/Networks/Implementations/ComputationGraphNetwork.cs

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,13 @@ protected override void Forward(in Tensor x, out Tensor yHat)
6868
using (TensorMap<IComputationGraphNode> aMap = new TensorMap<IComputationGraphNode> { [Graph.Root] = x })
6969
{
7070
// Recursive forward function
71-
Tensor xc = x; // Local copy for closure
7271
void Forward(IComputationGraphNode node)
7372
{
7473
switch (node)
7574
{
7675
case ProcessingNode processing:
7776
{
78-
processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(processing.Parent is InputNode ? xc : aMap[processing.Parent], out Tensor z, out Tensor a);
77+
processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(aMap[processing.Parent], out Tensor z, out Tensor a);
7978
z.Free();
8079
aMap[processing] = a;
8180
if (processing == Graph.OutputNode) return;
@@ -145,7 +144,7 @@ void Forward(IComputationGraphNode node)
145144
{
146145
case ProcessingNode processing:
147146
{
148-
processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(processing.Parent is InputNode ? x : aMap[processing.Parent], out Tensor z, out Tensor a);
147+
processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(aMap[processing.Parent], out Tensor z, out Tensor a);
149148
zMap[processing] = z;
150149
aMap[processing] = a;
151150
if (processing.Layer.LayerType == LayerType.FullyConnected && dropout > 0)
@@ -217,7 +216,19 @@ void Backward(IComputationGraphNode node)
217216
bool linked = false;
218217
if (node.Children.Count == 1)
219218
{
220-
if (node.Type == ComputationGraphNodeType.Processing)
219+
if (node.Children[0] is DepthConcatenationNode merge)
220+
{
221+
int offset = 0, length = -1;
222+
for (int j = 0; j < merge.Parents.Count; j++)
223+
{
224+
length = aMap[merge.Parents[j]].Length;
225+
if (merge.Parents[j] == node) break;
226+
offset += j == 0 ? 0 : aMap[merge.Parents[j - 1]].Length;
227+
}
228+
Tensor.New(x.Entities, length, out dy);
229+
CpuDnn.DepthConcatenationBackward(dMap[merge], offset, dy);
230+
}
231+
else if (node.Type == ComputationGraphNodeType.Processing)
221232
{
222233
dy = dMap[node.Children[0]];
223234
linked = true; // Just use a shallow copy, but mark it as non-disposable
@@ -242,9 +253,9 @@ void Backward(IComputationGraphNode node)
242253
int offset = 0, length = -1;
243254
for (int j = 0; j < merge.Parents.Count; j++)
244255
{
256+
length = aMap[merge.Parents[j]].Length;
245257
if (merge.Parents[j] == node) break;
246258
offset += j == 0 ? 0 : aMap[merge.Parents[j - 1]].Length;
247-
length = aMap[merge.Parents[j]].Length;
248259
}
249260
Tensor.New(x.Entities, length, out dyt[i]);
250261
CpuDnn.DepthConcatenationBackward(dMap[merge], offset, dyt[i]);
@@ -324,7 +335,7 @@ void Backward(IComputationGraphNode node)
324335
break;
325336
case SumNode sum:
326337
{
327-
Tensor.Like(zMap[node], out Tensor dx);
338+
Tensor.Like(dy, out Tensor dx); // Inputs and outputs have the same shape for sum nodes
328339
sum.Backpropagate(zMap[node], dy, dx);
329340
dy.Free();
330341
dMap[node] = dx;
@@ -434,7 +445,7 @@ void Forward(IComputationGraphNode node)
434445
{
435446
case ProcessingNode processing:
436447
{
437-
processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(processing.Parent is InputNode ? xc : aMap[processing.Parent], out Tensor z, out Tensor a);
448+
processing.Layer.To<INetworkLayer, NetworkLayerBase>().Forward(aMap[processing.Parent], out Tensor z, out Tensor a);
438449
zMap[processing] = z;
439450
aMap[processing] = a;
440451
break;

NeuralNetwork.NET/Networks/Layers/Abstract/NetworkLayerBase.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.IO;
1+
using System;
2+
using System.IO;
23
using System.Runtime.CompilerServices;
34
using JetBrains.Annotations;
45
using NeuralNetworkNET.APIs.Enums;
@@ -58,8 +59,8 @@ public ref readonly TensorInfo OutputInfo
5859

5960
protected NetworkLayerBase(in TensorInfo input, in TensorInfo output, ActivationFunctionType activation)
6061
{
61-
_InputInfo = input;
62-
_OutputInfo = output;
62+
_InputInfo = input.IsEmptyOrInvalid ? throw new ArgumentException("The layer input info is not valid", nameof(input)) : input;
63+
_OutputInfo = output.IsEmptyOrInvalid ? throw new ArgumentException("The layer output info is not valid", nameof(output)) : output;
6364
ActivationFunctionType = activation;
6465
ActivationFunctions = ActivationFunctionProvider.GetActivations(activation);
6566
}

NeuralNetwork.NET/Networks/Layers/Abstract/OutputLayerBase.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ public virtual unsafe void Backpropagate(in Tensor x, in Tensor yHat, in Tensor
8282
dw.Reshape(1, dw.Size, out dJdw); // Flatten the result
8383
Tensor.New(1, Biases.Length, out dJdb);
8484
CpuDnn.FullyConnectedBackwardBias(dy, dJdb);
85+
dy.Free();
8586
}
8687

8788
/// <summary>

NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnFullyConnectedLayer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public override void Backpropagate(in Tensor x, in Tensor y, in Tensor dy, in Te
6868
// Gradient
6969
DnnInstance.FullyConnectedBackwardFilter(x.Entities, x.Length, dy.Length, x_gpu.Ptr, dy_gpu.Ptr, w_gpu.Ptr);
7070
w_gpu.CopyToHost(1, Weights.Length, out dJdw);
71-
DnnInstance.FullyConnectedBackwardBias(dy.Entities, dy.Length, dy_gpu.Ptr, dJdb_gpu.Ptr); // Doing this on CPU is generally faster than launching the kernels
71+
DnnInstance.FullyConnectedBackwardBias(dy.Entities, dy.Length, dy_gpu.Ptr, dJdb_gpu.Ptr);
7272
dJdb_gpu.CopyToHost(1, Biases.Length, out dJdb);
7373
}
7474
}

NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnSoftmaxLayer.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,30 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
6868
}
6969
}
7070

71+
/// <inheritdoc/>
72+
public override void Backpropagate(in Tensor x, in Tensor yHat, in Tensor y, in Tensor z, in Tensor dx, out Tensor dJdw, out Tensor dJdb)
73+
{
74+
using (DeviceMemory<float>
75+
yHat_gpu = DnnInstance.Gpu.AllocateDevice(yHat),
76+
dy_gpu = DnnInstance.Gpu.AllocateDevice(y),
77+
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights), // Shared for the weights and dJdw, for better efficiency
78+
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
79+
dx_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Size),
80+
dJdb_gpu = DnnInstance.Gpu.AllocateDevice<float>(Biases.Length))
81+
{
82+
// The derivative is just yHat - y
83+
DnnInstance.AddTensor(1, SoftmaxInfo, yHat_gpu.Ptr, -1, SoftmaxInfo, dy_gpu.Ptr);
84+
DnnInstance.FullyConnectedBackwardData(y.Entities, InputInfo.Size, OutputInfo.Size, dy_gpu.Ptr, w_gpu.Ptr, dx_gpu.Ptr);
85+
dx_gpu.CopyTo(dx);
86+
87+
// Gradient
88+
DnnInstance.FullyConnectedBackwardFilter(x.Entities, x.Length, y.Length, x_gpu.Ptr, dy_gpu.Ptr, w_gpu.Ptr);
89+
w_gpu.CopyToHost(1, Weights.Length, out dJdw);
90+
DnnInstance.FullyConnectedBackwardBias(y.Entities, y.Length, dy_gpu.Ptr, dJdb_gpu.Ptr); // Doing this on CPU is generally faster than launching the kernels
91+
dJdb_gpu.CopyToHost(1, Biases.Length, out dJdb);
92+
}
93+
}
94+
7195
#endregion
7296

7397
/// <summary>

NeuralNetwork.NET/cpuDNN/CpuDnn{Convolution}.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ public static unsafe void ConvolutionForward(
3535
kSize = kw / wInfo.Channels,
3636
kHeight = wInfo.Height,
3737
kWidth = wInfo.Width;
38-
if (kHeight < 2 || kWidth < 2) throw new ArgumentException("The kernel must be at least 2x2", nameof(w));
3938
int
4039
n = x.Entities,
4140
l = x.Length,
@@ -130,7 +129,6 @@ public static unsafe void ConvolutionBackwardData(
130129
kHeight = wInfo.Height,
131130
kWidth = wInfo.Width,
132131
kDepth = wInfo.Channels;
133-
if (kHeight < 2 || kWidth < 2) throw new ArgumentException("The kernel must be at least 2x2", nameof(w));
134132
int
135133
n = dy.Entities,
136134
l = dy.Length,
@@ -229,7 +227,6 @@ public static unsafe void ConvolutionBackwardFilter(
229227
kSize = kw / dyInfo.Channels,
230228
kHeight = dyInfo.Height,
231229
kWidth = dyInfo.Width;
232-
if (kHeight < 2 || kWidth < 2) throw new ArgumentException(nameof(dy), "The kernel must be at least 2x2");
233230
int
234231
n = x.Entities,
235232
l = x.Length,

0 commit comments

Comments
 (0)