diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml new file mode 100644 index 0000000000..9b017a9811 --- /dev/null +++ b/.github/workflows/test-linux-gpu.yml @@ -0,0 +1,73 @@ +name: Unit-tests on Linux GPU + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: + +env: + CHANNEL: "nightly" + +jobs: + tests: + strategy: + matrix: + python_version: ["3.8"] + cuda_arch_version: ["11.6"] + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.g5.4xlarge.nvidia.gpu + repository: pytorch/text + gpu-arch-type: cuda + gpu-arch-version: ${{ matrix.cuda_arch_version }} + timeout: 120 + script: | + # Mark Build Directory Safe + git config --global --add safe.directory /__w/text/text + + # Set up Environment Variables + export PYTHON_VERSION="${{ matrix.python_version }}" + export VERSION="${{ matrix.cuda_arch_version }}" + export CUDATOOLKIT="pytorch-cuda=${VERSION}" + + # Set CHANNEL + if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then + export CHANNEL=test + else + export CHANNEL=nightly + fi + + # Create Conda Env + conda create --quiet -yp ci_env python="${PYTHON_VERSION}" + conda activate /work/ci_env + python3 -m pip --quiet install cmake>=3.18.0 ninja + conda env update --file ".circleci/unittest/linux/scripts/environment.yml" --prune + + # TorchText-specific Setup + printf "* Downloading SpaCy English models\n" + python -m spacy download en_core_web_sm + printf "* Downloading SpaCy German models\n" + python -m spacy download de_core_news_sm + + # Install PyTorch and TorchData + set -ex + conda install \ + --yes \ + --quiet \ + -c "pytorch-${CHANNEL}" \ + -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ + "${CUDATOOLKIT}" + printf "Installing torchdata nightly\n" + python3 -m pip install --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu --quiet + python3 setup.py develop + python3 -m pip install parameterized --quiet + + # Run Tests + python3 -m torch.utils.collect_env + cd test + python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 torchtext_unittest/models/gpu_tests diff --git a/test/torchtext_unittest/common/case_utils.py b/test/torchtext_unittest/common/case_utils.py index 9ed9a1ce62..b4d040547a 100644 --- a/test/torchtext_unittest/common/case_utils.py +++ b/test/torchtext_unittest/common/case_utils.py @@ -4,6 +4,7 @@ import unittest from itertools import zip_longest +import torch from torchtext._internal.module_utils import is_module_available @@ -37,6 +38,17 @@ def get_temp_path(self, *paths): return path +class TestBaseMixin: + """Mixin to provide consistent way to define device/dtype/backend aware TestCase""" + + dtype = None + device = None + + def setUp(self): + super().setUp() + torch.random.manual_seed(2434) + + def skipIfNoModule(module, display_name=None): display_name = display_name or module return unittest.skipIf(not is_module_available(module), f'"{display_name}" is not available') diff --git a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py new file mode 100644 index 0000000000..07452b4619 --- /dev/null +++ b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py @@ -0,0 +1,11 @@ +import unittest + +import torch +from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase +from torchtext_unittest.models.models_test_impl import BaseTestModels + + +@unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available") +class TestModels32GPU(BaseTestModels, TorchtextTestCase): + dtype = torch.float32 + device = torch.device("cuda") diff --git a/test/torchtext_unittest/models/models_cpu_test.py b/test/torchtext_unittest/models/models_cpu_test.py new file mode 100644 index 0000000000..3bcd3e4eb5 --- /dev/null +++ b/test/torchtext_unittest/models/models_cpu_test.py @@ -0,0 +1,9 @@ +import torch + +from ..common.torchtext_test_case import TorchtextTestCase +from .models_test_impl import BaseTestModels + + +class TestModels32CPU(BaseTestModels, TorchtextTestCase): + dtype = torch.float32 + device = torch.device("cpu") diff --git a/test/torchtext_unittest/models/test_models.py b/test/torchtext_unittest/models/models_test_impl.py similarity index 85% rename from test/torchtext_unittest/models/test_models.py rename to test/torchtext_unittest/models/models_test_impl.py index 0e92f6b631..cdfd196268 100644 --- a/test/torchtext_unittest/models/test_models.py +++ b/test/torchtext_unittest/models/models_test_impl.py @@ -2,15 +2,27 @@ from unittest.mock import patch import torch -import torchtext from torch.nn import functional as torch_F -from ..common.torchtext_test_case import TorchtextTestCase +from ..common.case_utils import TestBaseMixin -class TestModels(TorchtextTestCase): +class BaseTestModels(TestBaseMixin): + def get_model(self, encoder_conf, head=None, freeze_encoder=False, checkpoint=None, override_checkpoint_head=False): + from torchtext.models import RobertaBundle + + model = RobertaBundle.build_model( + encoder_conf=encoder_conf, + head=head, + freeze_encoder=freeze_encoder, + checkpoint=checkpoint, + override_checkpoint_head=override_checkpoint_head, + ) + model.to(device=self.device, dtype=self.dtype) + return model + def test_roberta_bundler_build_model(self) -> None: - from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel, RobertaBundle + from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel dummy_encoder_conf = RobertaEncoderConf( vocab_size=10, embedding_dim=16, ffn_dimension=64, num_attention_heads=2, num_encoder_layers=2 @@ -18,14 +30,14 @@ def test_roberta_bundler_build_model(self) -> None: # case: user provide encoder checkpoint state dict dummy_encoder = RobertaModel(dummy_encoder_conf) - model = RobertaBundle.build_model(encoder_conf=dummy_encoder_conf, checkpoint=dummy_encoder.state_dict()) + model = self.get_model(encoder_conf=dummy_encoder_conf, checkpoint=dummy_encoder.state_dict()) self.assertEqual(model.state_dict(), dummy_encoder.state_dict()) # case: user provide classifier checkpoint state dict when head is given and override_head is False (by default) dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) another_dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) dummy_classifier = RobertaModel(dummy_encoder_conf, dummy_classifier_head) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=another_dummy_classifier_head, checkpoint=dummy_classifier.state_dict(), @@ -34,7 +46,7 @@ def test_roberta_bundler_build_model(self) -> None: # case: user provide classifier checkpoint state dict when head is given and override_head is set True another_dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=another_dummy_classifier_head, checkpoint=dummy_classifier.state_dict(), @@ -48,13 +60,13 @@ def test_roberta_bundler_build_model(self) -> None: encoder_state_dict = {} for k, v in dummy_classifier.encoder.state_dict().items(): encoder_state_dict["encoder." + k] = v - model = torchtext.models.RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=dummy_classifier_head, checkpoint=encoder_state_dict ) self.assertEqual(model.state_dict(), dummy_classifier.state_dict()) def test_roberta_bundler_train(self) -> None: - from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel, RobertaBundle + from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel dummy_encoder_conf = RobertaEncoderConf( vocab_size=10, embedding_dim=16, ffn_dimension=64, num_attention_heads=2, num_encoder_layers=2 @@ -63,8 +75,8 @@ def test_roberta_bundler_train(self) -> None: def _train(model): optim = SGD(model.parameters(), lr=1) - model_input = torch.tensor([[0, 1, 2, 3, 4, 5]]) - target = torch.tensor([0]) + model_input = torch.tensor([[0, 1, 2, 3, 4, 5]]).to(device=self.device) + target = torch.tensor([0]).to(device=self.device) logits = model(model_input) loss = torch_F.cross_entropy(logits, target) loss.backward() @@ -73,7 +85,7 @@ def _train(model): # does not freeze encoder dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) dummy_classifier = RobertaModel(dummy_encoder_conf, dummy_classifier_head) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=dummy_classifier_head, freeze_encoder=False, @@ -91,7 +103,7 @@ def _train(model): # freeze encoder dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) dummy_classifier = RobertaModel(dummy_encoder_conf, dummy_classifier_head) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=dummy_classifier_head, freeze_encoder=True,