copy_transpose! and setweights!

MikeInnes · MikeInnes · commit 1d566960ec0a · 2019-09-17T17:14:51.000+01:00
diff --git a/src/CuArrays.jl b/src/CuArrays.jl
@@ -64,6 +64,7 @@ include("broadcast.jl")
 include("matmul.jl")
 include("mapreduce.jl")
 include("accumulate.jl")
+include("linalg.jl")
 
 include("gpuarray_interface.jl")
 
diff --git a/src/array.jl b/src/array.jl
@@ -239,45 +239,6 @@ function Base.fill!(A::CuArray{T}, x) where T <: MemsetCompatTypes
 end
 
 
-## generic linear algebra routines
-
-function LinearAlgebra.tril!(A::CuMatrix{T}, d::Integer = 0) where T
-  function kernel!(_A, _d)
-    li = (blockIdx().x - 1) * blockDim().x + threadIdx().x
-    m, n = size(_A)
-    if 0 < li <= m*n
-      i, j = Tuple(CartesianIndices(_A)[li])
-      if i < j - _d
-        _A[i, j] = 0
-      end
-    end
-    return nothing
-  end
-
-  blk, thr = cudims(A)
-  @cuda blocks=blk threads=thr kernel!(A, d)
-  return A
-end
-
-function LinearAlgebra.triu!(A::CuMatrix{T}, d::Integer = 0) where T
-  function kernel!(_A, _d)
-    li = (blockIdx().x - 1) * blockDim().x + threadIdx().x
-    m, n = size(_A)
-    if 0 < li <= m*n
-      i, j = Tuple(CartesianIndices(_A)[li])
-      if j < i + _d
-        _A[i, j] = 0
-      end
-    end
-    return nothing
-  end
-
-  blk, thr = cudims(A)
-  @cuda blocks=blk threads=thr kernel!(A, d)
-  return A
-end
-
-
 ## reversing
 
 function _reverse(input::CuVector{T}, output::CuVector{T}) where {T}
diff --git a/src/dnn/libcudnn.jl b/src/dnn/libcudnn.jl
@@ -639,7 +639,7 @@ function cudnnGetRNNLinLayerBiasParams(handle, rnnDesc, pseudoLayer, xDesc, wDes
 end
 
 function cudnnRNNForwardInference(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes)
-    @check ccall((:cudnnRNNForwardInference, @libcudnn), cudnnStatus_t, (cudnnHandle_t, cudnnRNNDescriptor_t, Cint, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnFilterDescriptor_t, Ptr{Cvoid}, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, CuPtr{Cvoid}, Csize_t), handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes)
+    @check ccall((:cudnnRNNForwardInference, @libcudnn), cudnnStatus_t, (cudnnHandle_t, cudnnRNNDescriptor_t, Cint, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnFilterDescriptor_t, CuPtr{Cvoid}, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, CuPtr{Cvoid}, Csize_t), handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes)
 end
 
 function cudnnRNNForwardTraining(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes)
diff --git a/src/dnn/rnn.jl b/src/dnn/rnn.jl
@@ -8,6 +8,8 @@
 # GRU: [weight, bias] × [input, hidden] × [reset, update, newmem]
 # LSTM: [weight, bias] × [input, hidden] × [input, forget, newmem, output]
 
+import LinearAlgebra: copy_transpose!
+
 function params(w::CuVector, input, hidden, n = 1)
   slice(offset, shape) = reshape(view(w, offset.+(1:prod(shape))), shape)
   wx = slice(0, (input, hidden*n))
@@ -56,6 +58,13 @@ function RNNDesc{T}(mode::cudnnRNNMode_t, input::Int, hidden::Int; layers = 1) w
   return rd
 end
 
+function setweights!(d::RNNDesc, Wi, Wh, b)
+  copy_transpose!(d.weights[1], Wi)
+  copy_transpose!(d.weights[2], Wh)
+  copy_transpose!(d.bias, b)
+  return
+end
+
 function cudnnGetRNNWorkspaceSize(r::RNNDesc, seqlen, xdesc)
   size = Csize_t[0]
   cudnnGetRNNWorkspaceSize(handle(), r, seqlen, xdesc, size)