Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cpp/kernels/fmha_v2/fmha_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import subprocess

import pytest
from cuda import cuda, nvrtc

try:
from cuda.bindings import driver as cuda
from cuda.bindings import nvrtc
except ImportError:
from cuda import cuda, nvrtc


def ASSERT_DRV(err):
Expand Down
13 changes: 8 additions & 5 deletions tensorrt_llm/_ipc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,20 @@
import sys
from typing import List, Tuple

from cuda import cuda, cudart
from cuda.cudart import cudaError_t
try:
from cuda.bindings import driver as cuda
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cuda, cudart

from ._utils import mpi_comm
from .logger import logger
from .mapping import Mapping


def _raise_if_error(error: cudaError_t | cuda.CUresult):
if isinstance(error, cudaError_t):
if error != cudaError_t.cudaSuccess:
def _raise_if_error(error: cudart.cudaError_t | cuda.CUresult):
if isinstance(error, cudart.cudaError_t):
if error != cudart.cudaError_t.cudaSuccess:
raise RuntimeError(f"CUDA Runtime API error: {repr(error)}")
if isinstance(error, cuda.CUresult):
if error != cuda.CUresult.CUDA_SUCCESS:
Expand Down
6 changes: 5 additions & 1 deletion tensorrt_llm/_mnnvl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@

import pynvml
import torch
from cuda import cuda

try:
from cuda.bindings import driver as cuda
except ImportError:
from cuda import cuda

from ._dlpack_utils import pack_strided_memory
from ._utils import mpi_comm
Expand Down
6 changes: 5 additions & 1 deletion tensorrt_llm/_torch/pyexecutor/py_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
from typing import Dict, Iterable, List, Optional, Tuple, Union

import torch
from cuda import cudart

try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart

from tensorrt_llm._torch.pyexecutor.resource_manager import ResourceManagerType
from tensorrt_llm._torch.pyexecutor.seq_slot_manager import SeqSlotManager
Expand Down
6 changes: 5 additions & 1 deletion tensorrt_llm/auto_parallel/cluster_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

import pynvml
import torch
from cuda import cudart

try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart

from tensorrt_llm._utils import DictConversion
from tensorrt_llm.logger import logger
Expand Down
5 changes: 4 additions & 1 deletion tensorrt_llm/runtime/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
import torch
import tensorrt as trt
# isort: on
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart

from tensorrt_llm.runtime.memory_pools.memory_pools_allocator import \
MemoryPoolsAllocator
Expand Down
7 changes: 6 additions & 1 deletion tensorrt_llm/runtime/multimodal_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
from typing import Optional, Tuple

import torch.nn.functional as F
from cuda import cudart

try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart

from huggingface_hub import hf_hub_download
from PIL import Image, UnidentifiedImageError
from safetensors import safe_open
Expand Down
6 changes: 5 additions & 1 deletion tests/integration/defs/sysinfo/get_sysinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@

import psutil
import pynvml
from cuda import cuda

try:
from cuda.bindings import driver as cuda
except ImportError:
from cuda import cuda

# Logger
logger = logging.getLogger(__name__)
Expand Down
5 changes: 4 additions & 1 deletion tests/microbenchmarks/all_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
# isort: off
import torch
# isort: on
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart

import tensorrt_llm as tllm
from tensorrt_llm import Mapping
Expand Down
6 changes: 5 additions & 1 deletion tests/microbenchmarks/build_time_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
import traceback

import tensorrt as trt
from cuda import cudart

try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart

import tensorrt_llm
from tensorrt_llm import (AutoConfig, AutoModelForCausalLM, BuildConfig,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
def run_single_rank(dtype, strategy, message_size):
import numpy as np
import torch
from cuda import cuda
try:
from cuda.bindings import driver as cuda
except ImportError:
from cuda import cuda

import tensorrt_llm
from tensorrt_llm._torch.distributed import AllReduce, AllReduceStrategy
Expand Down
5 changes: 4 additions & 1 deletion tests/unittest/trt/functional/test_allreduce_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import torch
# isort: on

from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import torch
# isort: on

from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

Expand Down
5 changes: 4 additions & 1 deletion tests/unittest/trt/functional/test_nccl.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import torch
# isort: on

from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

Expand Down
5 changes: 4 additions & 1 deletion tests/unittest/trt/functional/test_pp_reduce_scatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import torch
# isort: on

from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

Expand Down
8 changes: 7 additions & 1 deletion tests/unittest/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
import pytest
import tensorrt as trt
import torch
from cuda import cuda, nvrtc

try:
from cuda.bindings import driver as cuda
from cuda.bindings import nvrtc
except ImportError:
from cuda import cuda, nvrtc

from parameterized import parameterized

import tensorrt_llm
Expand Down