From 52575a0f0ad574e36188529a2fa8c6f189878480 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Mon, 5 Sep 2022 11:53:17 +0100
Subject: [PATCH 1/3] S3D weight deployment

---
 references/video_classification/README.md | 15 +++++++++++++++
 torchvision/models/video/s3d.py           | 14 ++++++--------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/references/video_classification/README.md b/references/video_classification/README.md
index 9bd1b9cc285..0bf670a42db 100644
--- a/references/video_classification/README.md
+++ b/references/video_classification/README.md
@@ -97,6 +97,21 @@ Video resnet models:
 --val-crop-size 112 112
 ```
 
+### S3D
+
+The S3D model was trained similarly to the above but with the following changes on the default configuration:
+```
+--batch-size=12 --lr 0.2 --clip-len 64 --clips-per-video 5 --sync-bn \
+--train-resize-size 256 256 --train-crop-size 224 224 --val-resize-size 256 256 --val-crop-size 224 224
+```
+
+We used 64 GPUs to train the architecture. 
+
+To estimate the validation statistics of the model, we run the reference script with the following configuration:
+```
+--batch-size=16 --test-only --clip-len 128 --clips-per-video 1 
+```
+
 ### Additional video modelling resources
 
 - [Video Model Zoo](https://github.com/facebookresearch/VMZ)
diff --git a/torchvision/models/video/s3d.py b/torchvision/models/video/s3d.py
index f80d849683c..2be68f15494 100644
--- a/torchvision/models/video/s3d.py
+++ b/torchvision/models/video/s3d.py
@@ -104,7 +104,7 @@ class S3D(nn.Module):
     def __init__(
         self,
         num_classes: int = 400,
-        dropout: float = 0.0,
+        dropout: float = 0.2,
         norm_layer: Optional[Callable[..., torch.nn.Module]] = None,
     ) -> None:
         super().__init__()
@@ -153,28 +153,26 @@ def forward(self, x):
 
 class S3D_Weights(WeightsEnum):
     KINETICS400_V1 = Weights(
-        url="https://download.pytorch.org/models/s3d-1bd8ae63.pth",
+        url="https://download.pytorch.org/models/s3d-d76dad2f.pth",
         transforms=partial(
             VideoClassification,
             crop_size=(224, 224),
             resize_size=(256, 256),
-            mean=(0.5, 0.5, 0.5),
-            std=(0.5, 0.5, 0.5),
         ),
         meta={
             "min_size": (224, 224),
             "min_temporal_size": 14,
             "categories": _KINETICS400_CATEGORIES,
-            "recipe": "https://github.com/pytorch/vision/pull/6412#issuecomment-1219687434",
+            "recipe": "https://github.com/pytorch/vision/tree/main/references/video_classification#s3d",
             "_docs": (
-                "The weights are ported from a community repository. The accuracies are estimated on clip-level "
+                "The weights aim to approximate the accuracy of the paper. The accuracies are estimated on clip-level "
                 "with parameters `frame_rate=15`, `clips_per_video=1`, and `clip_len=128`."
             ),
             "num_params": 8320048,
             "_metrics": {
                 "Kinetics-400": {
-                    "acc@1": 67.315,
-                    "acc@5": 87.593,
+                    "acc@1": 68.345,
+                    "acc@5": 88.050,
                 }
             },
         },

From 81a153b69343f0b14c0599f2c5daafd719f89d6b Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Mon, 5 Sep 2022 13:04:25 +0100
Subject: [PATCH 2/3] Update accuracies.

---
 torchvision/models/video/s3d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/models/video/s3d.py b/torchvision/models/video/s3d.py
index 2be68f15494..f7d364c665f 100644
--- a/torchvision/models/video/s3d.py
+++ b/torchvision/models/video/s3d.py
@@ -171,7 +171,7 @@ class S3D_Weights(WeightsEnum):
             "num_params": 8320048,
             "_metrics": {
                 "Kinetics-400": {
-                    "acc@1": 68.345,
+                    "acc@1": 68.368,
                     "acc@5": 88.050,
                 }
             },

From 3ca5ac43e3a955084313f96aa85e74aadd48be68 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Mon, 5 Sep 2022 13:21:04 +0100
Subject: [PATCH 3/3] Address review comments.

---
 references/video_classification/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/references/video_classification/README.md b/references/video_classification/README.md
index 0bf670a42db..cbd303275e5 100644
--- a/references/video_classification/README.md
+++ b/references/video_classification/README.md
@@ -81,6 +81,7 @@ Video resnet models:
 ```
 # number of frames per clip
 --clip_len 16 \ 
+--frame-rate 15 \
 # allow for temporal jittering
 --clips_per_video 5 \
 --batch-size 24 \