Skip to content

[bug][opt][AMDGPU]At the -O2 optimization level, the division computation result for uint64_t type is incorrect #151676

@0oyyo0

Description

@0oyyo0

①I tested the following code on gfx906 and found that the results were not as expected. The core of this code is to check whether the input string can be safely converted to a uint64_t integer.
②When compiled using the following command, the calculation result is incorrect. The cause of the error is located in the incorrect calculation result of the line "a2 / uint64_t{10}".
hipcc demo.cpp -o demo -std=c++17 -O2

Image

③When DivExpand is disabled by adding the "disable-idiv-expansion" option, the calculation result is correct.
hipcc demo.cpp -o demo -std=c++17 -O2 -mllvm -amdgpu-codegenprepare-disable-idiv-expansion=true

Image

④Current analysis: In the amdgpu-codegenprepare pass, udiv is converted to fmul.
⑤Please advise: What is the specific role of the expandDivRem32 function in the amdgpu-codegenprepare pass? Why does using it lead to incorrect computation results?

#include <hip/hip_runtime.h>
#include <iostream>
#include <limits>
#include <string>
#include <type_traits>

__device__ bool check_string(const char* d_str, size_t len) {
    auto iter = d_str;
    auto const iter_end = d_str + len;

    auto const bound_val = std::numeric_limits<uint64_t>::max();
    uint64_t value = 0;

    while (iter != iter_end) {
      auto const chr = *iter++;
      if (chr < '0' || chr > '9') { return false; }

      auto const digit = static_cast<uint64_t>(chr - '0');

      // auto const bound_check = (bound_val - sign * digit) / uint64_t{10} * sign;
      uint64_t const a1 = digit;
      uint64_t const a2 = bound_val - a1;
      uint64_t const bound_check = a2 / uint64_t{10};
      
      printf("bound_val: %llu, digit: %llu, value: %llu, bound_check: %llu\n", bound_val, digit, value, bound_check);

      if (value > bound_check) {
        return false;
      }

      value = value * uint64_t{10} + digit;
    }

    return true;
  
}

__global__ void check_string_kernel(const char* str, size_t len, bool* result) {
    if (threadIdx.x == 0 && blockIdx.x == 0) {
        result[0] = check_string(str, len);
    }
}

int main() {
    std::string input = "9223372036854775806";
    
    char* d_str;
    bool* d_result;
    hipMalloc(&d_str, input.size());
    hipMalloc(&d_result, sizeof(bool));
    
    hipMemcpy(d_str, input.c_str(), input.size(), hipMemcpyHostToDevice);
    
    check_string_kernel<<<1, 256>>>(d_str, input.size(), d_result);
    hipDeviceSynchronize();
    
    bool h_result;
    hipMemcpy(&h_result, d_result, sizeof(bool), hipMemcpyDeviceToHost);
    
    std::cout << (h_result ? "check passed" : "Got 'false' when checking if \"9223372036854775806\" is a uint64_t. Expect 'true'.") << std::endl;
    
    hipFree(d_str);
    hipFree(d_result);
    
    return 0;
}

Metadata

Metadata

Assignees

No one assigned

    Type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions