Skip to content
This repository was archived by the owner on Mar 12, 2021. It is now read-only.

Commit 1d56696

Browse files
committed
copy_transpose! and setweights!
1 parent d2446ae commit 1d56696

File tree

4 files changed

+11
-40
lines changed

4 files changed

+11
-40
lines changed

src/CuArrays.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ include("broadcast.jl")
6464
include("matmul.jl")
6565
include("mapreduce.jl")
6666
include("accumulate.jl")
67+
include("linalg.jl")
6768

6869
include("gpuarray_interface.jl")
6970

src/array.jl

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -239,45 +239,6 @@ function Base.fill!(A::CuArray{T}, x) where T <: MemsetCompatTypes
239239
end
240240

241241

242-
## generic linear algebra routines
243-
244-
function LinearAlgebra.tril!(A::CuMatrix{T}, d::Integer = 0) where T
245-
function kernel!(_A, _d)
246-
li = (blockIdx().x - 1) * blockDim().x + threadIdx().x
247-
m, n = size(_A)
248-
if 0 < li <= m*n
249-
i, j = Tuple(CartesianIndices(_A)[li])
250-
if i < j - _d
251-
_A[i, j] = 0
252-
end
253-
end
254-
return nothing
255-
end
256-
257-
blk, thr = cudims(A)
258-
@cuda blocks=blk threads=thr kernel!(A, d)
259-
return A
260-
end
261-
262-
function LinearAlgebra.triu!(A::CuMatrix{T}, d::Integer = 0) where T
263-
function kernel!(_A, _d)
264-
li = (blockIdx().x - 1) * blockDim().x + threadIdx().x
265-
m, n = size(_A)
266-
if 0 < li <= m*n
267-
i, j = Tuple(CartesianIndices(_A)[li])
268-
if j < i + _d
269-
_A[i, j] = 0
270-
end
271-
end
272-
return nothing
273-
end
274-
275-
blk, thr = cudims(A)
276-
@cuda blocks=blk threads=thr kernel!(A, d)
277-
return A
278-
end
279-
280-
281242
## reversing
282243

283244
function _reverse(input::CuVector{T}, output::CuVector{T}) where {T}

src/dnn/libcudnn.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ function cudnnGetRNNLinLayerBiasParams(handle, rnnDesc, pseudoLayer, xDesc, wDes
639639
end
640640

641641
function cudnnRNNForwardInference(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes)
642-
@check ccall((:cudnnRNNForwardInference, @libcudnn), cudnnStatus_t, (cudnnHandle_t, cudnnRNNDescriptor_t, Cint, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnFilterDescriptor_t, Ptr{Cvoid}, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, CuPtr{Cvoid}, Csize_t), handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes)
642+
@check ccall((:cudnnRNNForwardInference, @libcudnn), cudnnStatus_t, (cudnnHandle_t, cudnnRNNDescriptor_t, Cint, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnFilterDescriptor_t, CuPtr{Cvoid}, Ptr{cudnnTensorDescriptor_t}, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, cudnnTensorDescriptor_t, CuPtr{Cvoid}, CuPtr{Cvoid}, Csize_t), handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes)
643643
end
644644

645645
function cudnnRNNForwardTraining(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes)

src/dnn/rnn.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
# GRU: [weight, bias] × [input, hidden] × [reset, update, newmem]
99
# LSTM: [weight, bias] × [input, hidden] × [input, forget, newmem, output]
1010

11+
import LinearAlgebra: copy_transpose!
12+
1113
function params(w::CuVector, input, hidden, n = 1)
1214
slice(offset, shape) = reshape(view(w, offset.+(1:prod(shape))), shape)
1315
wx = slice(0, (input, hidden*n))
@@ -56,6 +58,13 @@ function RNNDesc{T}(mode::cudnnRNNMode_t, input::Int, hidden::Int; layers = 1) w
5658
return rd
5759
end
5860

61+
function setweights!(d::RNNDesc, Wi, Wh, b)
62+
copy_transpose!(d.weights[1], Wi)
63+
copy_transpose!(d.weights[2], Wh)
64+
copy_transpose!(d.bias, b)
65+
return
66+
end
67+
5968
function cudnnGetRNNWorkspaceSize(r::RNNDesc, seqlen, xdesc)
6069
size = Csize_t[0]
6170
cudnnGetRNNWorkspaceSize(handle(), r, seqlen, xdesc, size)

0 commit comments

Comments
 (0)