From 9c6c31308616a144bba295c245606c81a165d27b Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 21 Nov 2024 06:11:07 +0000 Subject: [PATCH 1/6] GH-127090: Fix `urllib.request.addinfourl.url` for `file:` URIs The original `file:` URL that was passed to `urlopen()` is now used as the `url` attribute of the returned `addinfourl` object. The `addinfourl.url` attribute *always* reflects the original `file:`, `data:` or `ftp:` URL now. --- Lib/test/test_urllib.py | 4 ++-- Lib/urllib/request.py | 6 +----- .../Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst | 3 +++ 3 files changed, 6 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index c66b1c49c316e6..e70583596264d3 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -156,7 +156,7 @@ def test_headers(self): self.assertIsInstance(self.returned_obj.headers, email.message.Message) def test_url(self): - self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.url, "file:" + self.quoted_pathname) def test_status(self): self.assertIsNone(self.returned_obj.status) @@ -165,7 +165,7 @@ def test_info(self): self.assertIsInstance(self.returned_obj.info(), email.message.Message) def test_geturl(self): - self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.geturl(), "file:" + self.quoted_pathname) def test_getcode(self): self.assertIsNone(self.returned_obj.getcode()) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index bcfdcc51fac369..a1e3937a0fe61a 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1488,11 +1488,7 @@ def open_local_file(self, req): host, port = _splitport(host) if not host or \ (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename - return addinfourl(open(localfile, 'rb'), headers, origurl) + return addinfourl(open(localfile, 'rb'), headers, req.full_url) except OSError as exp: raise URLError(exp, exp.filename) raise URLError('file not on local host') diff --git a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst new file mode 100644 index 00000000000000..bbdb75be9d66ee --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst @@ -0,0 +1,3 @@ +Fix value of :attr:`urllib.request.addinfourl.url` for ``file:`` URIs that +express relative paths and absolute Windows paths. The original URL is now +used. From ab171a39ed16d126fa3bf65e2c2384a0202e06b9 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 21 Nov 2024 06:26:28 +0000 Subject: [PATCH 2/6] Fix news blurb --- .../next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst index bbdb75be9d66ee..8d3817fbb95c4e 100644 --- a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst +++ b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst @@ -1,3 +1,3 @@ -Fix value of :attr:`urllib.request.addinfourl.url` for ``file:`` URIs that +Fix value of :attr:`urllib.response.addinfourl.url` for ``file:`` URIs that express relative paths and absolute Windows paths. The original URL is now used. From 331bec6f3b13479f0a6557f378d8045abf2612ed Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 25 Nov 2024 20:45:18 +0000 Subject: [PATCH 3/6] Use `pathname2url()` to canonicalise URL --- Lib/test/test_urllib2.py | 23 +++++++------------ Lib/urllib/request.py | 3 ++- ...-11-21-06-03-46.gh-issue-127090.yUYwdh.rst | 6 ++--- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 99ad11cf0552eb..749029d01869a0 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -717,14 +717,6 @@ def test_processors(self): self.assertIsInstance(args[1], MockResponse) -def sanepathname2url(path): - urlpath = urllib.request.pathname2url(path) - if os.name == "nt" and urlpath.startswith("///"): - urlpath = urlpath[2:] - # XXX don't ask me about the mac... - return urlpath - - class HandlerTests(unittest.TestCase): def test_ftp(self): @@ -818,19 +810,20 @@ def test_file(self): o = h.parent = MockOpener() TESTFN = os_helper.TESTFN - urlpath = sanepathname2url(os.path.abspath(TESTFN)) towrite = b"hello, world\n" + canonurl = 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)) + parsed = urlparse(canonurl) urls = [ - "file://localhost%s" % urlpath, - "file://%s" % urlpath, - "file://%s%s" % (socket.gethostbyname('localhost'), urlpath), + canonurl, + parsed._replace(netloc='localhost').geturl(), + parsed._replace(netloc=socket.gethostbyname('localhost')).geturl(), ] try: localaddr = socket.gethostbyname(socket.gethostname()) except socket.gaierror: localaddr = '' if localaddr: - urls.append("file://%s%s" % (localaddr, urlpath)) + urls.append(parsed._replace(netloc=localaddr).geturl()) for url in urls: f = open(TESTFN, "wb") @@ -855,10 +848,10 @@ def test_file(self): self.assertEqual(headers["Content-type"], "text/plain") self.assertEqual(headers["Content-length"], "13") self.assertEqual(headers["Last-modified"], modified) - self.assertEqual(respurl, url) + self.assertEqual(respurl, canonurl) for url in [ - "file://localhost:80%s" % urlpath, + parsed._replace(netloc='localhost:80').geturl(), "file:///file_does_not_exist.txt", "file://not-a-local-host.com//dir/file.txt", "file://%s:80%s/%s" % (socket.gethostbyname('localhost'), diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index eb2a612fa1ab5c..7ef85431b718ad 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1488,7 +1488,8 @@ def open_local_file(self, req): host, port = _splitport(host) if not host or \ (not port and _safe_gethostbyname(host) in self.get_names()): - return addinfourl(open(localfile, 'rb'), headers, req.full_url) + origurl = 'file:' + pathname2url(localfile) + return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: raise URLError(exp, exp.filename) raise URLError('file not on local host') diff --git a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst index 8d3817fbb95c4e..8efe563f443774 100644 --- a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst +++ b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst @@ -1,3 +1,3 @@ -Fix value of :attr:`urllib.response.addinfourl.url` for ``file:`` URIs that -express relative paths and absolute Windows paths. The original URL is now -used. +Fix value of :attr:`urllib.response.addinfourl.url` for ``file:`` URLs that +express relative paths and absolute Windows paths. The canonical URL generated +by :func:`urllib.request.pathname2url` is now used. From 74f5a491e1dff430f17a3ee573f8a144561fd55f Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 25 Nov 2024 21:05:15 +0000 Subject: [PATCH 4/6] Fix tests --- Lib/test/test_urllib2net.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index f0874d8d3ce463..b84290a7368c29 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -4,7 +4,6 @@ from test.support import os_helper from test.support import socket_helper from test.support import ResourceDenied -from test.test_urllib2 import sanepathname2url import os import socket @@ -151,7 +150,7 @@ def test_file(self): f.write('hi there\n') f.close() urls = [ - 'file:' + sanepathname2url(os.path.abspath(TESTFN)), + 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)), ('file:///nonsensename/etc/passwd', None, urllib.error.URLError), ] From 6d7cfdb5fde20f6ff317ab3595d29813543e256b Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 25 Nov 2024 21:21:45 +0000 Subject: [PATCH 5/6] Test tweaks --- Lib/test/test_urllib.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 4b352aa8f4aa33..0feb9f28037868 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -471,11 +471,14 @@ def test_missing_localfile(self): def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() - tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file) + parsed = urllib.parse.urlparse(tmp_file_canon_url) + tmp_fileurl = parsed._replace(netloc='localhost').geturl() try: self.assertTrue(os.path.exists(tmp_file)) with urllib.request.urlopen(tmp_fileurl) as fobj: self.assertTrue(fobj) + self.assertEqual(fobj.url, tmp_file_canon_url) finally: os.close(fd) os.unlink(tmp_file) @@ -609,7 +612,7 @@ def tearDown(self): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) - return "file://%s" % urllib.request.pathname2url(filePath) + return "file:" + urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, From 9810737712e2fc7b7a71ee827f617688f690bc0d Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 5 Dec 2024 20:28:41 +0000 Subject: [PATCH 6/6] Address review feedback --- Lib/test/test_urllib.py | 2 +- Lib/test/test_urllib2.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 0feb9f28037868..042d3b35b77022 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -472,7 +472,7 @@ def test_missing_localfile(self): def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file) - parsed = urllib.parse.urlparse(tmp_file_canon_url) + parsed = urllib.parse.urlsplit(tmp_file_canon_url) tmp_fileurl = parsed._replace(netloc='localhost').geturl() try: self.assertTrue(os.path.exists(tmp_file)) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 749029d01869a0..4a9e653515be5b 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -23,7 +23,7 @@ _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) -from urllib.parse import urlparse +from urllib.parse import urlsplit import urllib.error import http.client @@ -812,7 +812,9 @@ def test_file(self): TESTFN = os_helper.TESTFN towrite = b"hello, world\n" canonurl = 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)) - parsed = urlparse(canonurl) + parsed = urlsplit(canonurl) + if parsed.netloc: + raise unittest.SkipTest("non-local working directory") urls = [ canonurl, parsed._replace(netloc='localhost').geturl(), @@ -1149,13 +1151,13 @@ def test_full_url_setter(self): r = Request('http://example.com') for url in urls: r.full_url = url - parsed = urlparse(url) + parsed = urlsplit(url) self.assertEqual(r.get_full_url(), url) # full_url setter uses splittag to split into components. # splittag sets the fragment as None while urlparse sets it to '' self.assertEqual(r.fragment or '', parsed.fragment) - self.assertEqual(urlparse(r.get_full_url()).query, parsed.query) + self.assertEqual(urlsplit(r.get_full_url()).query, parsed.query) def test_full_url_deleter(self): r = Request('http://www.example.com')