From 6872edd4264b45f608dda459c6b37997d3c7429c Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Thu, 25 Apr 2024 11:07:38 +0200 Subject: [PATCH 1/5] gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089) --- Include/internal/pycore_fileutils.h | 2 + Lib/ntpath.py | 116 ++++++++------ Lib/posixpath.py | 74 +++++---- Lib/test/test_ntpath.py | 1 + ...-04-19-08-50-48.gh-issue-102511.qDEB66.rst | 1 + Modules/clinic/posixmodule.c.h | 63 +++++++- Modules/posixmodule.c | 44 ++++++ Python/fileutils.c | 147 ++++++++++++++---- 8 files changed, 340 insertions(+), 108 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 7c2b6ec0bffef5..899d3dacd8b04f 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -264,6 +264,8 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd); #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); + // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. // Usage: diff --git a/Lib/ntpath.py b/Lib/ntpath.py index df3402d46c9cc6..a9155365283a23 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -176,56 +176,76 @@ def splitdrive(p): return drive, root + tail -def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' - normp = p.replace(altsep, sep) - if normp[:1] == sep: - if normp[1:2] == sep: - # UNC drives, e.g. \\server\share or \\?\UNC\server\share - # Device drives, e.g. \\.\device or \\?\device - start = 8 if normp[:8].upper() == unc_prefix else 2 - index = normp.find(sep, start) - if index == -1: - return p, empty, empty - index2 = normp.find(sep, index + 1) - if index2 == -1: - return p, empty, empty - return p[:index2], p[index2:index2 + 1], p[index2 + 1:] +try: + from nt import _path_splitroot_ex +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' else: - # Relative path with root, e.g. \Windows - return empty, p[:1], p[1:] - elif normp[1:2] == colon: - if normp[2:3] == sep: - # Absolute drive-letter path, e.g. X:\Windows - return p[:2], p[2:3], p[3:] + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + start = 8 if normp[:8].upper() == unc_prefix else 2 + index = normp.find(sep, start) + if index == -1: + return p, empty, empty + index2 = normp.find(sep, index + 1) + if index2 == -1: + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] else: - # Relative path with drive, e.g. X:Windows - return p[:2], empty, p[2:] - else: - # Relative path, e.g. Windows - return empty, empty, p + # Relative path, e.g. Windows + return empty, empty, p +else: + def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ + p = os.fspath(p) + if isinstance(p, bytes): + drive, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index bd81c09bb2bdd0..6b18890fc93af7 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -135,33 +135,53 @@ def splitdrive(p): return p[:0], p -def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'/' - empty = b'' - else: - sep = '/' - empty = '' - if p[:1] != sep: - # Relative path, e.g.: 'foo' - return empty, empty, p - elif p[1:2] != sep or p[2:3] == sep: - # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. - return empty, sep, p[1:] - else: - # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see - # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 - return empty, p[:2], p[2:] +try: + from posix import _path_splitroot_ex +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return empty, empty, p + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] + else: + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] +else: + def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + # Optimisation: the drive is always empty + _, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return b'', os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index d91dcdfb0c5fac..f4d3f310b6b074 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -354,6 +354,7 @@ def test_normpath(self): tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\') tester("ntpath.normpath('\\\\foo')", '\\\\foo') tester("ntpath.normpath('\\\\')", '\\\\') + tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\') def test_realpath_curdir(self): expected = ntpath.normpath(os.getcwd()) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst new file mode 100644 index 00000000000000..dfdf250710778e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst @@ -0,0 +1 @@ +Speed up :func:`os.path.splitroot` with a native implementation. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 3802182143c76e..f6b460bef5b16d 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2208,6 +2208,67 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj #endif /* defined(MS_WINDOWS) */ +PyDoc_STRVAR(os__path_splitroot_ex__doc__, +"_path_splitroot_ex($module, /, path)\n" +"--\n" +"\n"); + +#define OS__PATH_SPLITROOT_EX_METHODDEF \ + {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path); + +static PyObject * +os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_splitroot_ex", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *path; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("_path_splitroot_ex", "argument 'path'", "str", args[0]); + goto exit; + } + if (PyUnicode_READY(args[0]) == -1) { + goto exit; + } + path = args[0]; + return_value = os__path_splitroot_ex_impl(module, path); + +exit: + return return_value; +} + PyDoc_STRVAR(os__path_normpath__doc__, "_path_normpath($module, /, path)\n" "--\n" @@ -11999,4 +12060,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=56e83d6b7cac0d58 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a0b0e187a6d1c331 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 23cfcdeca246e1..8d561c7cb4c62e 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5286,6 +5286,49 @@ os__path_islink_impl(PyObject *module, PyObject *path) #endif /* MS_WINDOWS */ +/*[clinic input] +os._path_splitroot_ex + + path: unicode + +[clinic start generated code]*/ + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path) +/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/ +{ + Py_ssize_t len, drvsize, rootsize; + PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; + + wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); + if (!buffer) { + goto exit; + } + + _Py_skiproot(buffer, len, &drvsize, &rootsize); + drv = PyUnicode_FromWideChar(buffer, drvsize); + if (drv == NULL) { + goto exit; + } + root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize); + if (root == NULL) { + goto exit; + } + tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], + len - drvsize - rootsize); + if (tail == NULL) { + goto exit; + } + result = Py_BuildValue("(OOO)", drv, root, tail); +exit: + PyMem_Free(buffer); + Py_XDECREF(drv); + Py_XDECREF(root); + Py_XDECREF(tail); + return result; +} + + /*[clinic input] os._path_normpath @@ -16029,6 +16072,7 @@ static PyMethodDef posix_methods[] = { OS__GETFINALPATHNAME_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF OS__PATH_SPLITROOT_METHODDEF + OS__PATH_SPLITROOT_EX_METHODDEF OS__PATH_NORMPATH_METHODDEF OS_GETLOADAVG_METHODDEF OS_URANDOM_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index c7521751cd26d1..a715de8907dcd7 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2292,6 +2292,99 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +void +_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, + Py_ssize_t *rootsize) +{ + assert(drvsize); + assert(rootsize); +#ifndef MS_WINDOWS +#define IS_SEP(x) (*(x) == SEP) + *drvsize = 0; + if (!IS_SEP(&path[0])) { + // Relative path, e.g.: 'foo' + *rootsize = 0; + } + else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) { + // Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + *rootsize = 1; + } + else { + // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + *rootsize = 2; + } +#undef IS_SEP +#else + const wchar_t *pEnd = size >= 0 ? &path[size] : NULL; +#define IS_END(x) (pEnd ? (x) == pEnd : !*(x)) +#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP) +#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) + if (IS_SEP(&path[0])) { + if (IS_SEP(&path[1])) { + // Device drives, e.g. \\.\device or \\?\device + // UNC drives, e.g. \\server\share or \\?\UNC\server\share + Py_ssize_t idx; + if (path[2] == L'?' && IS_SEP(&path[3]) && + (path[4] == L'U' || path[4] == L'u') && + (path[5] == L'N' || path[5] == L'n') && + (path[6] == L'C' || path[6] == L'c') && + IS_SEP(&path[7])) + { + idx = 8; + } + else { + idx = 2; + } + while (!SEP_OR_END(&path[idx])) { + idx++; + } + if (IS_END(&path[idx])) { + *drvsize = idx; + *rootsize = 0; + } + else { + idx++; + while (!SEP_OR_END(&path[idx])) { + idx++; + } + *drvsize = idx; + if (IS_END(&path[idx])) { + *rootsize = 0; + } + else { + *rootsize = 1; + } + } + } + else { + // Relative path with root, e.g. \Windows + *drvsize = 0; + *rootsize = 1; + } + } + else if (!IS_END(&path[0]) && path[1] == L':') { + *drvsize = 2; + if (IS_SEP(&path[2])) { + // Absolute drive-letter path, e.g. X:\Windows + *rootsize = 1; + } + else { + // Relative path with drive, e.g. X:Windows + *rootsize = 0; + } + } + else { + // Relative path, e.g. Windows + *drvsize = 0; + *rootsize = 0; + } +#undef SEP_OR_END +#undef IS_SEP +#undef IS_END +#endif +} + // The caller must ensure "buffer" is big enough. static int join_relfile(wchar_t *buffer, size_t bufsize, @@ -2408,49 +2501,39 @@ _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize) #endif #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) - // Skip leading '.\' if (p1[0] == L'.' && IS_SEP(&p1[1])) { + // Skip leading '.\' path = &path[2]; - while (IS_SEP(path) && !IS_END(path)) { + while (IS_SEP(path)) { path++; } p1 = p2 = minP2 = path; lastC = SEP; } + else { + Py_ssize_t drvsize, rootsize; + _Py_skiproot(path, size, &drvsize, &rootsize); + if (drvsize || rootsize) { + // Skip past root and update minP2 + p1 = &path[drvsize + rootsize]; +#ifndef ALTSEP + p2 = p1; +#else + for (; p2 < p1; ++p2) { + if (*p2 == ALTSEP) { + *p2 = SEP; + } + } +#endif + minP2 = p2 - 1; + lastC = *minP2; #ifdef MS_WINDOWS - // Skip past drive segment and update minP2 - else if (p1[0] && p1[1] == L':') { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2; - lastC = L':'; - } - // Skip past all \\-prefixed paths, including \\?\, \\.\, - // and network paths, including the first segment. - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) { - int sepCount = 2; - *p2++ = SEP; - *p2++ = SEP; - p1 += 2; - for (; !IS_END(p1) && sepCount; ++p1) { - if (IS_SEP(p1)) { - --sepCount; - *p2++ = lastC = SEP; - } else { - *p2++ = lastC = *p1; + if (lastC != SEP) { + minP2++; } +#endif } - minP2 = p2 - 1; - } -#else - // Skip past two leading SEPs - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2 - 1; // Absolute path has SEP at minP2 - lastC = SEP; } -#endif /* MS_WINDOWS */ /* if pEnd is specified, check that. Else, check for null terminator */ for (; !IS_END(p1); ++p1) { From d51eea6b919b8a838e8891b6ba5e659b6c0d6c23 Mon Sep 17 00:00:00 2001 From: Nineteendo Date: Tue, 28 May 2024 12:39:28 +0200 Subject: [PATCH 2/5] Direct C call for `os.path.splitroot()` --- Lib/ntpath.py | 32 ++++------------------------ Lib/posixpath.py | 31 ++++----------------------- Lib/test/test_ntpath.py | 2 ++ Lib/test/test_posixpath.py | 2 ++ Modules/clinic/posixmodule.c.h | 23 ++++++++++---------- Modules/posixmodule.c | 38 ++++++++++++++++++++++------------ 6 files changed, 49 insertions(+), 79 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 87e40738a88d97..fe355b98907373 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -177,19 +177,12 @@ def splitdrive(p): try: - from nt import _path_splitroot_ex + from nt import _path_splitroot_ex as splitroot except ImportError: def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ + """Split a pathname into drive, root and tail. + + The tail contains anything after the root.""" p = os.fspath(p) if isinstance(p, bytes): sep = b'\\' @@ -229,23 +222,6 @@ def splitroot(p): else: # Relative path, e.g. Windows return empty, empty, p -else: - def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - drive, root, tail = _path_splitroot_ex(os.fsdecode(p)) - return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) - return _path_splitroot_ex(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 5be9087a7650ad..0a47f96be3fb03 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -136,18 +136,12 @@ def splitdrive(p): try: - from posix import _path_splitroot_ex + from posix import _path_splitroot_ex as splitroot except ImportError: def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ + """Split a pathname into drive, root and tail. + + The tail contains anything after the root.""" p = os.fspath(p) if isinstance(p, bytes): sep = b'/' @@ -165,23 +159,6 @@ def splitroot(p): # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 return empty, p[:2], p[2:] -else: - def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ - p = os.fspath(p) - if isinstance(p, bytes): - # Optimisation: the drive is always empty - _, root, tail = _path_splitroot_ex(os.fsdecode(p)) - return b'', os.fsencode(root), os.fsencode(tail) - return _path_splitroot_ex(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index db8376bed7a72b..79aceff4d95f60 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -1011,6 +1011,8 @@ def test_fast_paths_in_use(self): # There are fast paths of these functions implemented in posixmodule.c. # Confirm that they are being used, and not the Python fallbacks in # genericpath.py. + self.assertTrue(os.path.splitroot is nt._path_splitroot_ex) + self.assertFalse(inspect.isfunction(os.path.splitroot)) self.assertTrue(os.path.normpath is nt._path_normpath) self.assertFalse(inspect.isfunction(os.path.normpath)) self.assertTrue(os.path.isdir is nt._path_isdir) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 932d8a35d31fa9..7d9787c2328d44 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -279,6 +279,8 @@ def test_isjunction(self): def test_fast_paths_in_use(self): # There are fast paths of these functions implemented in posixmodule.c. # Confirm that they are being used, and not the Python fallbacks + self.assertTrue(os.path.splitroot is posix._path_splitroot_ex) + self.assertFalse(inspect.isfunction(os.path.splitroot)) self.assertTrue(os.path.normpath is posix._path_normpath) self.assertFalse(inspect.isfunction(os.path.normpath)) diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index bdff5ae00fcedb..ca01d4fb36fb01 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2221,13 +2221,16 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(os__path_splitroot_ex__doc__, "_path_splitroot_ex($module, /, path)\n" "--\n" -"\n"); +"\n" +"Split a pathname into drive, root and tail.\n" +"\n" +"The tail contains anything after the root."); #define OS__PATH_SPLITROOT_EX_METHODDEF \ {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path); +os__path_splitroot_ex_impl(PyObject *module, path_t *path); static PyObject * os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -2259,23 +2262,21 @@ os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, }; #undef KWTUPLE PyObject *argsbuf[1]; - PyObject *path; + path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "path", 0, 1, 1, 0, 0); args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { goto exit; } - if (!PyUnicode_Check(args[0])) { - _PyArg_BadArgument("_path_splitroot_ex", "argument 'path'", "str", args[0]); - goto exit; - } - if (PyUnicode_READY(args[0]) == -1) { + if (!path_converter(args[0], &path)) { goto exit; } - path = args[0]; - return_value = os__path_splitroot_ex_impl(module, path); + return_value = os__path_splitroot_ex_impl(module, &path); exit: + /* Cleanup for path */ + path_cleanup(&path); + return return_value; } @@ -12075,4 +12076,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=66b9ec0427c8ff23 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=96bef929fd0eccb4 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f16395828b6362..45bf272e79fa6f 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5255,23 +5255,22 @@ os__path_islink_impl(PyObject *module, path_t *path) /*[clinic input] os._path_splitroot_ex - path: unicode + path: path_t(make_wide=True, nonstrict=True) + +Split a pathname into drive, root and tail. +The tail contains anything after the root. [clinic start generated code]*/ static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path) -/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/ +os__path_splitroot_ex_impl(PyObject *module, path_t *path) +/*[clinic end generated code: output=4b0072b6cdf4b611 input=6eb76e9173412c92]*/ { - Py_ssize_t len, drvsize, rootsize; + Py_ssize_t drvsize, rootsize; PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; - wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); - if (!buffer) { - goto exit; - } - - _Py_skiproot(buffer, len, &drvsize, &rootsize); + const wchar_t *buffer = path->wide; + _Py_skiproot(buffer, path->length, &drvsize, &rootsize); drv = PyUnicode_FromWideChar(buffer, drvsize); if (drv == NULL) { goto exit; @@ -5281,13 +5280,26 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *path) goto exit; } tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], - len - drvsize - rootsize); + path->length - drvsize - rootsize); if (tail == NULL) { goto exit; } - result = Py_BuildValue("(OOO)", drv, root, tail); + if (PyBytes_Check(path->object)) { + Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); + if (drv == NULL) { + goto exit; + } + Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); + if (root == NULL) { + goto exit; + } + Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); + if (tail == NULL) { + goto exit; + } + } + result = PyTuple_Pack(3, drv, root, tail); exit: - PyMem_Free(buffer); Py_XDECREF(drv); Py_XDECREF(root); Py_XDECREF(tail); From 0b9547f6cc38908f2e44b93fe35b2e3343677bf1 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 31 May 2024 14:43:27 +0000 Subject: [PATCH 3/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst new file mode 100644 index 00000000000000..b395bad0c82fb6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst @@ -0,0 +1 @@ +Fix :func:`os.path.normpath` for UNC paths on Windows. From e800e8573bdf0e49deb92ab2c949f853572c54cb Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 31 May 2024 16:43:49 +0200 Subject: [PATCH 4/5] Delete Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst --- .../2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst deleted file mode 100644 index dfdf250710778e..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst +++ /dev/null @@ -1 +0,0 @@ -Speed up :func:`os.path.splitroot` with a native implementation. From 9b930a5f9cf390d2e761e5c99951ed256a166fe1 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Wed, 19 Jun 2024 13:17:48 +0200 Subject: [PATCH 5/5] Rename 2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst to 2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst --- .../2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Core and Builtins => Library}/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst (100%) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst b/Misc/NEWS.d/next/Library/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst similarity index 100% rename from Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst rename to Misc/NEWS.d/next/Library/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst