diff --git a/.github/pr-labeler-config.yml b/.github/pr-labeler-config.yml index d77df24f8bd9..f517cecf20b1 100644 --- a/.github/pr-labeler-config.yml +++ b/.github/pr-labeler-config.yml @@ -1,55 +1,55 @@ # Add 'docker' to any changes within 'docker' folder or any subfolders docker: - changed-files: - - any-glob-to-any-file: docker/** + - any-glob-to-any-file: docker/** # Add 'docs' to any changes within 'docs' folder docs: - changed-files: - - any-glob-to-any-file: docs/** + - any-glob-to-any-file: docs/** # Add 'ci' to any changes in '.github' folder ci: - changed-files: - - any-glob-to-any-file: .github/** + - any-glob-to-any-file: .github/** # Add 'examples' to any changes within 'examples' folder examples: - changed-files: - - any-glob-to-any-file: examples/** + - any-glob-to-any-file: examples/** # Add 'base' to any changes within 'base' folder "module: base": - changed-files: - - any-glob-to-any-file: ignite/base/**/* + - any-glob-to-any-file: ignite/base/**/* # Add 'contrib' to any changes within 'contrib' folder "module: contrib": - changed-files: - - any-glob-to-any-file: ignite/contrib/**/* + - any-glob-to-any-file: ignite/contrib/**/* # Add 'distributed' to any changes within 'distributed' folder "module: distributed": - changed-files: - - any-glob-to-any-file: ignite/distributed/**/* + - any-glob-to-any-file: ignite/distributed/**/* # Add 'engine' to any changes within 'engine' folder "module: engine": - changed-files: - - any-glob-to-any-file: ignite/engine/**/* + - any-glob-to-any-file: ignite/engine/**/* # Add 'handlers' to any changes within 'handlers' folder "module: handlers": - changed-files: - - any-glob-to-any-file: ignite/handlers/**/* + - any-glob-to-any-file: ignite/handlers/**/* # Add 'metrics' to any changes within 'metrics' folder "module: metrics": - changed-files: - - any-glob-to-any-file: ignite/metrics/**/* + - any-glob-to-any-file: ignite/metrics/**/* - # Add 'utils' to any changes within 'utils' module "module: utils": - changed-files: - - any-glob-to-any-file: ignite/utils.py + - any-glob-to-any-file: ignite/utils.py diff --git a/.github/workflows/discord_issues.yml b/.github/workflows/discord_issues.yml index 6fd0e46a4d45..81cacad75d94 100644 --- a/.github/workflows/discord_issues.yml +++ b/.github/workflows/discord_issues.yml @@ -7,7 +7,7 @@ on: workflow_dispatch: inputs: issue_number: - description: 'Issue number' + description: "Issue number" required: true permissions: @@ -26,5 +26,3 @@ jobs: issue_number: ${{ github.event.inputs.issue_number || github.event.issue.number }} issue_comment: Hey 👋, I've just created a [thread]($THREAD_LINK$) for this issue on [PyTorch-Ignite Discord](https://pytorch-ignite.ai/chat) where you can quickly talk to the community on the topic. discord_message: New issue created in `${{ github.repository }}`: - - diff --git a/.github/workflows/discord_pull_requests.yaml b/.github/workflows/discord_pull_requests.yaml index caea94ad0734..ffeea5da7cff 100644 --- a/.github/workflows/discord_pull_requests.yaml +++ b/.github/workflows/discord_pull_requests.yaml @@ -7,7 +7,7 @@ on: workflow_dispatch: inputs: pull_request_number: - description: 'Pull request number' + description: "Pull request number" required: true permissions: diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 81a0914e34c2..0fdaeb9bb852 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -9,14 +9,12 @@ on: types: [published] workflow_dispatch: - concurrency: # -- group: docker-build-publish-${{ github.ref_name }}-${{ !(github.ref_protected) || github.sha }} cancel-in-progress: true jobs: - setup: name: Setup runs-on: ubuntu-latest @@ -46,7 +44,6 @@ jobs: echo "pth_version=$(python -c "import configparser; cfg=configparser.ConfigParser(); cfg.read('docker.cfg'); print(cfg.get('DEFAULT', 'build_docker_image_pytorch_version'))")" >> $GITHUB_OUTPUT echo "hvd_version=$(python -c "import configparser; cfg=configparser.ConfigParser(); cfg.read('docker.cfg'); print(cfg.get('DEFAULT', 'build_docker_image_hvd_version'))")" >> $GITHUB_OUTPUT - build-hvd: name: Build all Horovod flavoured PyTorch-Ignite images needs: setup @@ -106,7 +103,6 @@ jobs: bash push_all.sh hvd-vision bash push_all.sh hvd-nlp - build-hvd-apex: name: Build all Horovod + Apex flavoured PyTorch-Ignite images needs: setup @@ -166,7 +162,6 @@ jobs: bash push_all.sh hvd-apex-vision bash push_all.sh hvd-apex-nlp - build-main: name: Build all PyTorch-Ignite images needs: setup @@ -226,7 +221,6 @@ jobs: bash push_all.sh vision bash push_all.sh nlp - build-main-apex: name: Build all PyTorch-Ignite images with Apex needs: setup diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml index 906f499f3b95..e1841d2682a7 100644 --- a/.github/workflows/pytorch-version-tests.yml +++ b/.github/workflows/pytorch-version-tests.yml @@ -16,8 +16,7 @@ jobs: fail-fast: false matrix: python-version: [3.9, "3.10", "3.11"] - pytorch-version: - [2.5.1, 2.4.1, 2.3.1, 2.2.2, 1.13.1, 1.12.1, 1.10.0] + pytorch-version: [2.5.1, 2.4.1, 2.3.1, 2.2.2, 1.13.1, 1.12.1, 1.10.0] exclude: - pytorch-version: 1.10.0 python-version: "3.10" diff --git a/.github/workflows/triage.yml b/.github/workflows/triage.yml index aa4526191701..b60bb8a40ef9 100644 --- a/.github/workflows/triage.yml +++ b/.github/workflows/triage.yml @@ -19,7 +19,6 @@ jobs: with: configuration-path: .github/pr-labeler-config.yml repo-token: "${{ secrets.GITHUB_TOKEN }}" - # Turned off due to unexpected behavior on issue opening+labeling? https://github.com/pytorch/ignite/issues/1836 # - name: Welcome # uses: actions/first-interaction@v1 diff --git a/docker/README.md b/docker/README.md index bca8f0add8cb..57c83e9f5b85 100644 --- a/docker/README.md +++ b/docker/README.md @@ -50,6 +50,7 @@ Available Tensor Operations: - `docker pull pytorchignite/hvd-apex-nlp:latest` **Deprecated images** (no version updates) + - [msdp/Dockerfile.msdp-apex-base](msdp/Dockerfile.msdp-apex): multi-stage MSDeepSpeed build with latest Pytorch, Ignite image with minimal dependencies - `docker pull pytorchignite/msdp-apex:latest` - [msdp/Dockerfile.msdp-apex-vision](msdp/Dockerfile.msdp-apex-vision): base MSDeepSpeed build with useful computer vision libraries diff --git a/examples/mnist/README.md b/examples/mnist/README.md index 3523cd37615a..e630fb0a0020 100644 --- a/examples/mnist/README.md +++ b/examples/mnist/README.md @@ -17,7 +17,6 @@ python mnist.py Same example with logging using TQDM progress bar - ```bash python mnist_with_tqdm_logger.py ``` @@ -30,7 +29,6 @@ MNIST example with training and validation monitoring using Tensorboard - Tensorboard: `pip install tensorboard` - Run the example: ```bash diff --git a/examples/references/classification/imagenet/README.md b/examples/references/classification/imagenet/README.md index 02efeba9dc57..fdaf0151cde2 100644 --- a/examples/references/classification/imagenet/README.md +++ b/examples/references/classification/imagenet/README.md @@ -12,9 +12,9 @@ Features: | --------- | ----------------------- | ----------------------- | ------------------- | ------------------- | | ResNet-50 | 78% | 92% | 77% | 94% | -Experiment | Model | Training Top-1 Accuracy | Training Top-5 Accuracy | Test Top-1 Accuracy | Test Top-5 Accuracy | ClearML Link ----|---|---|---|---|---|--- -configs/???.py | +| Experiment | Model | Training Top-1 Accuracy | Training Top-5 Accuracy | Test Top-1 Accuracy | Test Top-5 Accuracy | ClearML Link | +| -------------- | ----- | ----------------------- | ----------------------- | ------------------- | ------------------- | ------------ | +| configs/???.py | ## Setup @@ -25,6 +25,7 @@ pip install -r requirements.txt ### Docker For docker users, you can use the following images to run the example: + ```bash docker pull pytorchignite/vision:latest ``` @@ -47,9 +48,11 @@ export DATASET_PATH=/path/to/imagenet - Adjust batch size for your GPU type in the configuration file: `configs/baseline_resnet50.py` or `configs/baseline_resnet50.py` Run the following command: + ```bash CUDA_VISIBLE_DEVICES=0 python -u main.py training configs/baseline_resnet50.py ``` + #### Multiple GPUs - Adjust total batch size for your GPUs in the configuration file: `configs/baseline_resnet50.py` or `configs/baseline_resnet50.py` @@ -58,7 +61,6 @@ CUDA_VISIBLE_DEVICES=0 python -u main.py training configs/baseline_resnet50.py OMP_NUM_THREADS=1 torchrun --nproc_per_node=2 main.py training configs/baseline_resnet50.py ``` - ## Acknowledgements Trainings were done using credits provided by [trainml.ai](trainml.ai) platform. diff --git a/examples/references/segmentation/pascal_voc2012/README.md b/examples/references/segmentation/pascal_voc2012/README.md index 011ead1f281c..5235b05903fa 100644 --- a/examples/references/segmentation/pascal_voc2012/README.md +++ b/examples/references/segmentation/pascal_voc2012/README.md @@ -8,11 +8,10 @@ Features: - Distributed training with native automatic mixed precision - Experiments tracking with [ClearML](https://github.com/allegroai/clearml) -Experiment | Model | Dataset | Val Avg IoU | ClearML Link ----|---|---|---|--- -configs/baseline_dplv3_resnet101.py | DeepLabV3 Resnet101 | VOC Only | 0.659161 | [link](https://app.clear.ml/projects/0e9a3a92d3134283b7d5572d516d60c5/experiments/a7254f084a9e47ca9380dfd739f89520/output/execution) -configs/baseline_dplv3_resnet101_sbd.py | DeepLabV3 Resnet101 | VOC+SBD | 0.6853087 | [link](https://app.clear.ml/projects/0e9a3a92d3134283b7d5572d516d60c5/experiments/dc4cee3377a74d19bc2d0e0e4d638c1f/output/execution) - +| Experiment | Model | Dataset | Val Avg IoU | ClearML Link | +| --------------------------------------- | ------------------- | -------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| configs/baseline_dplv3_resnet101.py | DeepLabV3 Resnet101 | VOC Only | 0.659161 | [link](https://app.clear.ml/projects/0e9a3a92d3134283b7d5572d516d60c5/experiments/a7254f084a9e47ca9380dfd739f89520/output/execution) | +| configs/baseline_dplv3_resnet101_sbd.py | DeepLabV3 Resnet101 | VOC+SBD | 0.6853087 | [link](https://app.clear.ml/projects/0e9a3a92d3134283b7d5572d516d60c5/experiments/dc4cee3377a74d19bc2d0e0e4d638c1f/output/execution) | ## Setup @@ -23,10 +22,13 @@ pip install -r requirements.txt ### Docker For docker users, you can use the following images to run the example: + ```bash docker pull pytorchignite/vision:latest ``` + or + ```bash docker pull pytorchignite/hvd-vision:latest ``` @@ -51,7 +53,6 @@ This script will download and extract the following datasets into `/path/to/data - The [Pascal VOC2012](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) dataset - Optionally, the [SBD](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz) evaluation dataset - ## Usage Please, export the `DATASET_PATH` environment variable for the Pascal VOC2012 dataset. @@ -75,6 +76,7 @@ export SBD_DATASET_PATH=/path/to/SBD/ - Adjust batch size for your GPU type in the configuration file: `configs/baseline_dplv3_resnet101_sbd.py` or `configs/baseline_dplv3_resnet101.py` Run the following command: + ```bash CUDA_VISIBLE_DEVICES=0 python -u main.py training configs/baseline_dplv3_resnet101_sbd.py # or without SBD @@ -121,7 +123,6 @@ torchrun --nproc_per_node=2 main.py eval configs/eval_baseline_dplv3_resnet101_s horovodrun -np=2 python -u main.py eval configs/eval_baseline_dplv3_resnet101_sbd.py --backend="horovod" ``` - ## Acknowledgements Trainings were done using credits provided by AWS for open-source development via NumFOCUS diff --git a/examples/super_resolution/README.md b/examples/super_resolution/README.md index c778a04234e0..a55dbb8ec4aa 100644 --- a/examples/super_resolution/README.md +++ b/examples/super_resolution/README.md @@ -41,9 +41,11 @@ This example trains a super-resolution network on the [Caltech101 dataset](https ### Super-resolve example on a Cifar-10 image #### Input Image + ![Cifar input image](./images/input_cifar.png) #### Output Images -| Output image from Model | Output from bicubic sampling | -|-------------------------------|------------------------------------| -| ![Cifar output image](./images/out_cifar.png) | ![Cifar output from bicubic sampling](./images/bicubic_image_cifar.png)| + +| Output image from Model | Output from bicubic sampling | +| --------------------------------------------- | ----------------------------------------------------------------------- | +| ![Cifar output image](./images/out_cifar.png) | ![Cifar output from bicubic sampling](./images/bicubic_image_cifar.png) | diff --git a/examples/transformers/README.md b/examples/transformers/README.md index c1c03049179f..e0a025cd2cb8 100644 --- a/examples/transformers/README.md +++ b/examples/transformers/README.md @@ -32,24 +32,24 @@ Run the example on a single GPU: ```bash python main.py run ``` + If needed, please, adjust the batch size to your GPU device with `--batch_size` argument. The default model is `bert-base-uncased` incase you need to change that use the `--model` argument, for details on which models can be used refer [here](https://huggingface.co/transformers/v3.0.2/model_doc/auto.html#automodelforsequenceclassification) Example: + ```bash #Using DistilBERT which has 40% less parameters than bert-base-uncased python main.py run --model="distilbert-base-uncased" ``` - For details on accepted arguments: ```bash python main.py run -- --help ``` - ### Distributed training #### Single node, multiple GPUs @@ -93,6 +93,7 @@ python -u main.py run --backend="horovod" --nproc_per_node=2 import os assert os.environ['COLAB_TPU_ADDR'], 'Make sure to select TPU from Edit > Notebook settings > Hardware accelerator' ``` + ```bash VERSION = "nightly" !curl -q https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py