Skip to content

bpo-45506: Normalize _PyPathConfig.stdlib_dir when calculated. #29040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Include/internal/pycore_fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ extern int _Py_add_relfile(wchar_t *dirname,
const wchar_t *relfile,
size_t bufsize);
extern size_t _Py_find_basename(const wchar_t *filename);
PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path,
wchar_t *buf, const size_t buf_len);


// Macros to protect CRT calls against instant termination when passed an
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
Expand Down
30 changes: 30 additions & 0 deletions Lib/test/test_fileutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Run tests for functions in Python/fileutils.c.

import os
import os.path
import unittest
from test.support import import_helper

# Skip this test if the _testcapi module isn't available.
_testcapi = import_helper.import_module('_testinternalcapi')


class PathTests(unittest.TestCase):

def test_capi_normalize_path(self):
if os.name == 'nt':
raise unittest.SkipTest('Windows has its own helper for this')
else:
from .test_posixpath import PosixPathTest as posixdata
tests = posixdata.NORMPATH_CASES
for filename, expected in tests:
if not os.path.isabs(filename):
continue
with self.subTest(filename):
result = _testcapi.normalize_path(filename)
self.assertEqual(result, expected,
msg=f'input: {filename!r} expected output: {expected!r}')


if __name__ == "__main__":
unittest.main()
62 changes: 44 additions & 18 deletions Lib/test/test_posixpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,25 +304,51 @@ def test_expanduser_pwd(self):
for path in ('~', '~/.local', '~vstinner/'):
self.assertEqual(posixpath.expanduser(path), path)

NORMPATH_CASES = [
("", "."),
("/", "/"),
("/.", "/"),
("/./", "/"),
("/.//.", "/"),
("/foo", "/foo"),
("/foo/bar", "/foo/bar"),
("//", "//"),
("///", "/"),
("///foo/.//bar//", "/foo/bar"),
("///foo/.//bar//.//..//.//baz///", "/foo/baz"),
("///..//./foo/.//bar", "/foo/bar"),
(".", "."),
(".//.", "."),
("..", ".."),
("../", ".."),
("../foo", "../foo"),
("../../foo", "../../foo"),
("../foo/../bar", "../bar"),
("../../foo/../bar/./baz/boom/..", "../../bar/baz"),
("/..", "/"),
("/..", "/"),
("/../", "/"),
("/..//", "/"),
("//..", "//"),
("/../foo", "/foo"),
("/../../foo", "/foo"),
("/../foo/../", "/"),
("/../foo/../bar", "/bar"),
("/../../foo/../bar/./baz/boom/..", "/bar/baz"),
("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"),
]

def test_normpath(self):
self.assertEqual(posixpath.normpath(""), ".")
self.assertEqual(posixpath.normpath("/"), "/")
self.assertEqual(posixpath.normpath("//"), "//")
self.assertEqual(posixpath.normpath("///"), "/")
self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar")
self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"),
"/foo/baz")
self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar")

self.assertEqual(posixpath.normpath(b""), b".")
self.assertEqual(posixpath.normpath(b"/"), b"/")
self.assertEqual(posixpath.normpath(b"//"), b"//")
self.assertEqual(posixpath.normpath(b"///"), b"/")
self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar")
self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
b"/foo/baz")
self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"),
b"/foo/bar")
for path, expected in self.NORMPATH_CASES:
with self.subTest(path):
result = posixpath.normpath(path)
self.assertEqual(result, expected)

path = path.encode('utf-8')
expected = expected.encode('utf-8')
with self.subTest(path, type=bytes):
result = posixpath.normpath(path)
self.assertEqual(result, expected)

@skip_if_ABSTFN_contains_backslash
def test_realpath_curdir(self):
Expand Down
24 changes: 24 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
#include "Python.h"
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
#include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_fileutils.h" // _Py_normalize_path
#include "pycore_gc.h" // PyGC_Head
#include "pycore_hashtable.h" // _Py_hashtable_new()
#include "pycore_initconfig.h" // _Py_GetConfigsAsDict()
#include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy()
#include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost()
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "osdefs.h" // MAXPATHLEN


static PyObject *
Expand Down Expand Up @@ -366,6 +368,27 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
}


static PyObject *
normalize_path(PyObject *self, PyObject *filename)
{
Py_ssize_t size = -1;
wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size);
if (encoded == NULL) {
return NULL;
}

wchar_t buf[MAXPATHLEN + 1];
int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf));
PyMem_Free(encoded);
if (res != 0) {
PyErr_SetString(PyExc_ValueError, "string too long");
return NULL;
}

return PyUnicode_FromWideChar(buf, -1);
}


static PyMethodDef TestMethods[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
Expand All @@ -377,6 +400,7 @@ static PyMethodDef TestMethods[] = {
{"set_config", test_set_config, METH_O},
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
{"test_edit_cost", test_edit_cost, METH_NOARGS},
{"normalize_path", normalize_path, METH_O, NULL},
{NULL, NULL} /* sentinel */
};

Expand Down
46 changes: 40 additions & 6 deletions Modules/getpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,42 @@ search_for_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig,
}


static PyStatus
calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{
// Note that, unlike calculate_set_prefix(), here we allow a negative
// prefix_found. That means the source tree Lib dir gets used.
if (!calculate->prefix_found) {
return _PyStatus_OK();
}
PyStatus status;
wchar_t *prefix = calculate->prefix;
if (!_Py_isabs(prefix)) {
prefix = _PyMem_RawWcsdup(prefix);
if (prefix == NULL) {
return _PyStatus_NO_MEMORY();
}
status = absolutize(&prefix);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}
wchar_t buf[MAXPATHLEN + 1];
int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf));
if (prefix != calculate->prefix) {
PyMem_RawFree(prefix);
}
if (res < 0) {
return PATHLEN_ERR();
}
pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf);
if (pathconfig->stdlib_dir == NULL) {
return _PyStatus_NO_MEMORY();
}
return _PyStatus_OK();
}


static PyStatus
calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{
Expand Down Expand Up @@ -1494,12 +1530,10 @@ calculate_path(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
}

if (pathconfig->stdlib_dir == NULL) {
if (calculate->prefix_found) {
/* This must be done *before* calculate_set_prefix() is called. */
pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix);
if (pathconfig->stdlib_dir == NULL) {
return _PyStatus_NO_MEMORY();
}
/* This must be done *before* calculate_set_prefix() is called. */
status = calculate_set_stdlib_dir(calculate, pathconfig);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}

Expand Down
95 changes: 95 additions & 0 deletions Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -2181,6 +2181,101 @@ _Py_find_basename(const wchar_t *filename)
}


/* Remove navigation elements such as "." and "..".

This is mostly a C implementation of posixpath.normpath().
Return 0 on success. Return -1 if "orig" is too big for the buffer. */
int
_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len)
{
assert(path && *path != L'\0');
assert(*path == SEP); // an absolute path
if (wcslen(path) + 1 >= buf_len) {
return -1;
}

int dots = -1;
int check_leading = 1;
const wchar_t *buf_start = buf;
wchar_t *buf_next = buf;
// The resulting filename will never be longer than path.
for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) {
wchar_t c = *remainder;
buf_next[0] = c;
buf_next++;
if (c == SEP) {
assert(dots <= 2);
if (dots == 2) {
// Turn "/x/y/../z" into "/x/z".
buf_next -= 4; // "/../"
assert(*buf_next == SEP);
// We cap it off at the root, so "/../spam" becomes "/spam".
if (buf_next == buf_start) {
buf_next++;
}
else {
// Move to the previous SEP in the buffer.
while (*(buf_next - 1) != SEP) {
assert(buf_next != buf_start);
buf_next--;
}
}
}
else if (dots == 1) {
// Turn "/./" into "/".
buf_next -= 2; // "./"
assert(*(buf_next - 1) == SEP);
}
else if (dots == 0) {
// Turn "//" into "/".
buf_next--;
assert(*(buf_next - 1) == SEP);
if (check_leading) {
if (buf_next - 1 == buf && *(remainder + 1) != SEP) {
// Leave a leading "//" alone, unless "///...".
buf_next++;
buf_start++;
}
check_leading = 0;
}
}
dots = 0;
}
else {
check_leading = 0;
if (dots >= 0) {
if (c == L'.' && dots < 2) {
dots++;
}
else {
dots = -1;
}
}
}
}
if (dots >= 0) {
// Strip any trailing dots and trailing slash.
buf_next -= dots + 1; // "/" or "/." or "/.."
assert(*buf_next == SEP);
if (buf_next == buf_start) {
// Leave the leading slash for root.
buf_next++;
}
else {
if (dots == 2) {
// Move to the previous SEP in the buffer.
do {
assert(buf_next != buf_start);
buf_next--;
} while (*(buf_next) != SEP);
}
}
}
*buf_next = L'\0';
return 0;
}


/* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding.

Expand Down