Revert removing requests and make it a hard dependency instead (#5047)

pmeier · web-flow · commit 4282c9fc4426 · 2021-12-08T08:49:06.000+01:00
* Revert "[FBcode->GH] remove unused requests functionality (#5014)" This reverts commit 33123be. * Revert "replace requests with urllib (#4973)" This reverts commit 8d25de7. * add requests as hard dependency * install library stubs in CI * fix syntax * add requests to conda dependencies * fix mypy CI
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -311,7 +311,7 @@ jobs:
           descr: Install Python type check utilities
       - run:
           name: Check Python types statically
-          command: mypy --config-file mypy.ini
+          command: mypy --install-types --non-interactive --config-file mypy.ini
 
   unittest_torchhub:
     docker:
diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml
@@ -24,6 +24,7 @@ requirements:
   run:
     - python
     - defaults::numpy >=1.11
+    - requests
     - libpng
     - ffmpeg >=4.2  # [not win]
     - jpeg
diff --git a/setup.py b/setup.py
@@ -59,6 +59,7 @@ def write_version_file():
 
 requirements = [
     "numpy",
+    "requests",
     pytorch_dep,
 ]
 
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
@@ -15,6 +15,7 @@
 from typing import Any, Callable, List, Iterable, Optional, TypeVar, Dict, IO, Tuple, Iterator
 from urllib.parse import urlparse
 
+import requests
 import torch
 from torch.utils.model_zoo import tqdm
 
@@ -199,28 +200,37 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
         filename (str, optional): Name to save the file under. If None, use the id of the file.
         md5 (str, optional): MD5 checksum of the download. If None, do not check
     """
-    url = f"https://drive.google.com/uc?export=download&id={file_id}"
+    # Based on https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
+
+    url = "https://docs.google.com/uc?export=download"
 
     root = os.path.expanduser(root)
     if not filename:
         filename = file_id
     fpath = os.path.join(root, filename)
 
+    os.makedirs(root, exist_ok=True)
+
     if os.path.isfile(fpath) and check_integrity(fpath, md5):
         print("Using downloaded and verified file: " + fpath)
-        return
+    else:
+        session = requests.Session()
 
-    os.makedirs(root, exist_ok=True)
+        response = session.get(url, params={"id": file_id}, stream=True)
+        token = _get_confirm_token(response)
+
+        if token:
+            params = {"id": file_id, "confirm": token}
+            response = session.get(url, params=params, stream=True)
 
-    with urllib.request.urlopen(url) as response:
         # Ideally, one would use response.status_code to check for quota limits, but google drive is not consistent
         # with their own API, refer https://github.com/pytorch/vision/issues/2992#issuecomment-730614517.
         # Should this be fixed at some place in future, one could refactor the following to no longer rely on decoding
         # the first_chunk of the payload
-        content = iter(lambda: response.read(32768), b"")
+        response_content_generator = response.iter_content(32768)
         first_chunk = None
         while not first_chunk:  # filter out keep-alive new chunks
-            first_chunk = next(content)
+            first_chunk = next(response_content_generator)
 
         if _quota_exceeded(first_chunk):
             msg = (
@@ -230,12 +240,21 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
             )
             raise RuntimeError(msg)
 
-        _save_response_content(itertools.chain((first_chunk,), content), fpath)
+        _save_response_content(itertools.chain((first_chunk,), response_content_generator), fpath)
+        response.close()
+
+
+def _get_confirm_token(response: requests.models.Response) -> Optional[str]:
+    for key, value in response.cookies.items():
+        if key.startswith("download_warning"):
+            return value
+
+    return None
 
 
 def _save_response_content(
     response_gen: Iterator[bytes],
-    destination: str,  # type: ignore[name-defined]
+    destination: str,
 ) -> None:
     with open(destination, "wb") as f:
         pbar = tqdm(total=None)

Original file line number	Diff line number	Diff line change
`@@ -59,6 +59,7 @@ def write_version_file():`
`59`	`59`
`60`	`60`	`requirements = [`
`61`	`61`	`"numpy",`
	`62`	`+ "requests",`
`62`	`63`	`pytorch_dep,`
`63`	`64`	`]`
`64`	`65`