Closed
Description
Ran into this because invoke
is GPU incompatible and check_top_bit
is used by integer conversions. Nonetheless, it seems bad to have a dynamic dispatch and gcframe in such hot code. It kinda defeats the purpose of the noinline throw_inexacterror
.
Bisected to #31191:
commit 8c445663547791e59f33f9e8d49b276332107614
Author: Jameson Nash <[email protected]>
Date: Fri Mar 29 12:11:33 2019 -0400
internals: better representation for code (#31191)
_ _ _(_)_ | Documentation: https://docs.julialang.org
(_) | (_) (_) |
_ _ _| |_ __ _ | Type "?" for help, "]?" for Pkg help.
| | | | | | |/ _` | |
| | |_| | | | (_| | | Version 1.2.0-DEV.573 (2019-03-29)
_/ |\__'_|_|_|\__'_| | Commit 8c44566354* (26 days old master)
|__/ |
julia> code_llvm(Core.check_top_bit, Tuple{UInt})
; @ boot.jl:573 within `check_top_bit'
define i64 @julia_check_top_bit_10434(i64) {
top:
%1 = alloca %jl_value_t addrspace(10)*, i32 4
%gcframe = alloca %jl_value_t addrspace(10)*, i32 3
%2 = bitcast %jl_value_t addrspace(10)** %gcframe to i8*
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 24, i32 0, i1 false)
%thread_ptr = call i8* asm "movq %fs:0, $0", "=r"()
%ptls_i8 = getelementptr i8, i8* %thread_ptr, i64 -15560
%ptls = bitcast i8* %ptls_i8 to %jl_value_t***
; @ boot.jl:574 within `check_top_bit'
; ┌ @ boot.jl:564 within `is_top_bit_set'
%3 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 0
%4 = bitcast %jl_value_t addrspace(10)** %3 to i64*
store i64 2, i64* %4
%5 = getelementptr %jl_value_t**, %jl_value_t*** %ptls, i32 0
%6 = load %jl_value_t**, %jl_value_t*** %5
%7 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 1
%8 = bitcast %jl_value_t addrspace(10)** %7 to %jl_value_t***
store %jl_value_t** %6, %jl_value_t*** %8
%9 = bitcast %jl_value_t*** %5 to %jl_value_t addrspace(10)***
store %jl_value_t addrspace(10)** %gcframe, %jl_value_t addrspace(10)*** %9
%10 = icmp sgt i64 %0, -1
; └
br i1 %10, label %L7, label %L5
L5: ; preds = %top
%11 = call %jl_value_t addrspace(10)* @jl_box_uint64(i64 zeroext %0)
%12 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 2
store %jl_value_t addrspace(10)* %11, %jl_value_t addrspace(10)** %12
%13 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i32 0
store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140226490904560 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %13
%14 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i32 1
store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140226415235320 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %14
%15 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i32 2
store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140226418510032 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %15
%16 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i32 3
store %jl_value_t addrspace(10)* %11, %jl_value_t addrspace(10)** %16
%17 = call nonnull %jl_value_t addrspace(10)* @jl_invoke(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140226491409328 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %1, i32 4)
call void @llvm.trap()
unreachable
L7: ; preds = %top
; @ boot.jl:575 within `check_top_bit'
%18 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 1
%19 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %18
%20 = getelementptr %jl_value_t**, %jl_value_t*** %ptls, i32 0
%21 = bitcast %jl_value_t*** %20 to %jl_value_t addrspace(10)**
store %jl_value_t addrspace(10)* %19, %jl_value_t addrspace(10)** %21
ret i64 %0
}
Latest good commit, right before that:
commit 4b1230277ca50c70ee517578db1ec30483cf9ad9
Author: Kristoffer Carlsson <[email protected]>
Date: Fri Mar 29 16:16:47 2019 +0100
add a nop conversion for some (#31536)
_ _ _(_)_ | Documentation: https://docs.julialang.org
(_) | (_) (_) |
_ _ _| |_ __ _ | Type "?" for help, "]?" for Pkg help.
| | | | | | |/ _` | |
| | |_| | | | (_| | | Version 1.2.0-DEV.572 (2019-03-29)
_/ |\__'_|_|_|\__'_| | Commit 4b1230277c* (26 days old master)
|__/ |
julia> code_llvm(Core.check_top_bit, Tuple{UInt})
; @ boot.jl:570 within `check_top_bit'
define i64 @julia_check_top_bit_8001(i64) {
top:
; @ boot.jl:571 within `check_top_bit'
; ┌ @ boot.jl:561 within `is_top_bit_set'
%1 = icmp sgt i64 %0, -1
; └
br i1 %1, label %L7, label %L5
L5: ; preds = %top
call void @julia_throw_inexacterror_90(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139625077153768 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139625080981712 to %jl_value_t*) to %jl_value_t addrspace(10)*), i64 %0)
call void @llvm.trap()
unreachable
L7: ; preds = %top
; @ boot.jl:572 within `check_top_bit'
ret i64 %0
}
Adding to the 1.2 milestone (are we doing that now we've branched?) since it seems bad to have this in a release, and it breaks lots of GPU code.