From d04b5f4fd843ab19a80fa62065ad40ed0aa3fc35 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sat, 1 May 2021 01:25:26 -0400 Subject: [PATCH 1/8] Refactor suggestions --- Lib/test/test_exceptions.py | 4 +- Python/suggestions.c | 213 +++++++++++++++++++++++++++--------- 2 files changed, 163 insertions(+), 54 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 3810108e356637..db80c65d9002f0 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1486,13 +1486,13 @@ def func(): def test_name_error_suggestions_from_builtins(self): def func(): - print(AttributeErrop) + print(ZeroDivisionErrrrr) try: func() except NameError as exc: with support.captured_stderr() as err: sys.__excepthook__(*sys.exc_info()) - self.assertIn("'AttributeError'?", err.getvalue()) + self.assertIn("'ZeroDivisionError'?", err.getvalue()) def test_name_error_suggestions_do_not_trigger_for_long_names(self): def f(): diff --git a/Python/suggestions.c b/Python/suggestions.c index 2fd6714e847874..5b270f4e638f79 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -7,13 +7,39 @@ #define MAX_CANDIDATE_ITEMS 160 #define MAX_STRING_SIZE 25 +#define MOVE_COST 2 +#define CASE_COST 1 + +static inline int +substitution_cost(char a, char b) +{ + if ((a & 31) != (b & 31)) { + // Not the same, not a case flip. + return MOVE_COST; + } + if (a == b) { + return 0; + } + if ('A' <= a && a <= 'Z') { + a += ('a' - 'A'); + } + if ('A' <= b && b <= 'Z') { + b += ('a' - 'A'); + } + if (a == b) { + return CASE_COST; + } + return MOVE_COST; +} + /* Calculate the Levenshtein distance between string1 and string2 */ static Py_ssize_t levenshtein_distance(const char *a, size_t a_size, - const char *b, size_t b_size) { - + const char *b, size_t b_size, + size_t *buffer, size_t max_cost) +{ if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) { - return 0; + return max_cost + 1; } // Both strings are the same (by identity) @@ -21,60 +47,141 @@ levenshtein_distance(const char *a, size_t a_size, return 0; } - // The first string is empty - if (a_size == 0) { - return b_size; + // Trim away common affixes. + while (a_size && b_size && a[0] == b[0]) { + a++; a_size--; + b++; b_size--; + } + while (a_size && b_size && a[a_size-1] == b[b_size-1]) { + a_size--; + b_size--; + } + if (a_size == 0 || b_size == 0) { + return (a_size + b_size) * MOVE_COST; } - // The second string is empty - if (b_size == 0) { - return a_size; + // Prefer shorter buffer + if (b_size < a_size) { + const char *t = a; a = b; b = t; + size_t t_size = a_size; a_size = b_size; b_size = t_size; } - size_t *buffer = PyMem_Calloc(a_size, sizeof(size_t)); - if (buffer == NULL) { - return -1; + // quick fail when a match is impossible. + if ((b_size - a_size)*MOVE_COST > max_cost) { + return max_cost + 1; } + // Instead of producing the whole traditional len(a)-by-len(b) + // matrix, we can update just one row in place. // Initialize the buffer row - size_t index = 0; - while (index < a_size) { - buffer[index] = index + 1; - index++; + for (size_t i = 0; i < a_size; i++) { + // cost from a[:0] to b[:index+1] + buffer[i] = (i + 1) * MOVE_COST; } - size_t b_index = 0; size_t result = 0; - while (b_index < b_size) { + for (size_t b_index = 0; b_index < b_size; b_index++) { char code = b[b_index]; - size_t distance = result = b_index++; - index = SIZE_MAX; - while (++index < a_size) { - size_t b_distance = code == a[index] ? distance : distance + 1; + // cost(b[:b_index], a[:0]) == b_index * MOVE_COST + size_t distance = result = b_index * MOVE_COST; + size_t minimum = SIZE_MAX; + for (size_t index = 0; index < a_size; index++) { + + // cost(b[:b_index+1], a[:index+1]) = min( + // // 1) substitute + // cost(b[:b_index], a[:index]) + // + substitution_cost(b[b_index], a[index]), + // // 2) delete from b + // cost(b[:b_index], a[:index+1]) + MOVE_COST, + // // 3) delete from a + // cost(b[:b_index+1], a[index]) + MOVE_COST + // ) + + // 1) Previous distance in this row is cost(b[:b_index], a[:index]) + size_t substitute = distance + substitution_cost(code, a[index]); + // 2) cost(b[:b_index], a[:index+1]) from previous row distance = buffer[index]; - if (distance > result) { - if (b_distance > result) { - result = result + 1; - } else { - result = b_distance; - } - } else { - if (b_distance > distance) { - result = distance + 1; - } else { - result = b_distance; - } - } + // 3) existing result is cost(b[:b_index+1], a[index]) + + size_t insert_delete = Py_MIN(result, distance) + MOVE_COST; + result = Py_MIN(insert_delete, substitute); + + // cost(b[:b_index+1], a[:index+1]) buffer[index] = result; + if (result < minimum) { + minimum = result; + } + } + if (minimum > max_cost) { + // Everything in this row is too big, so bail early. + return max_cost + 1; } } - PyMem_Free(buffer); return result; } +#define TEST_LEVENSHTEIN 0 +#if TEST_LEVENSHTEIN + +static void +do_test(const char *a, const char *b, size_t d) +{ + static size_t buffer[500]; + size_t a_len = strlen(a); + size_t b_len = strlen(b); + size_t d2 = levenshtein_distance(a, a_len, b, b_len, buffer, 9999); + printf("'%s' -> '%s':", a, b); + if (d != d2) { + printf("\n FAILURE: Expected %d, got %d.\n", (int)d, (int)d2); + } + else { + printf(" passed.\n"); + } + size_t d3 = levenshtein_distance(a, a_len, b, b_len, buffer, d2); + assert(d2 == d3); + size_t d4 = levenshtein_distance(b, b_len, a, a_len, buffer, d2); + assert(d2 == d4); +} + +static void +test_levenshtein_distances() +{ + printf("\n\n--- Testing Levenshtein Distances ---\n\n"); + do_test("", "", 0); + do_test("", "a", 2); + do_test("a", "A", 1); + do_test("Apple", "Aple", 2); + do_test("Banana", "B@n@n@", 6); + do_test("Cherry", "Cherry!", 2); + do_test("abc", "y", 6); + do_test("aa", "bb", 4); + do_test("aaaaa", "AAAAA", 5); + do_test("wxyz", "wXyZ", 2); + do_test("wxyz", "wXyZ123", 8); + do_test("Python", "Java", 12); + do_test("Java", "C#", 8); + do_test("AbstractFoobarManager", "abstract_foobar_manager", 3+2*2); + do_test("CPython", "PyPy", 10); + do_test("CPython", "pypy", 11); + do_test("AttributeError", "AttributeErrop", 2); + do_test("AttributeError", "AttributeErrorTests", 10); + printf("\n\n-------------------------------------\n\n"); +} + +#endif + static inline PyObject * calculate_suggestions(PyObject *dir, - PyObject *name) { + PyObject *name) +{ + #if TEST_LEVENSHTEIN + static int did_it; + if (!did_it) { + did_it = 1; + test_levenshtein_distances(); + } + #endif + assert(!PyErr_Occurred()); assert(PyList_CheckExact(dir)); @@ -90,22 +197,25 @@ calculate_suggestions(PyObject *dir, if (name_str == NULL) { return NULL; } + size_t *work_buffer = PyMem_Calloc(name_size, sizeof(size_t)); + if (work_buffer == NULL) { + return NULL; + } + for (int i = 0; i < dir_size; ++i) { PyObject *item = PyList_GET_ITEM(dir, i); Py_ssize_t item_size; const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size); if (item_str == NULL) { + PyMem_Free(work_buffer); return NULL; } - Py_ssize_t current_distance = levenshtein_distance( - name_str, name_size, item_str, item_size); - if (current_distance == -1) { - return NULL; - } - if (current_distance == 0 || - current_distance > MAX_DISTANCE || - current_distance * 2 > name_size) - { + Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6; + Py_ssize_t current_distance = + levenshtein_distance(name_str, name_size, + item_str, item_size, + work_buffer, max_distance); + if (current_distance > max_distance) { continue; } if (!suggestion || current_distance < suggestion_distance) { @@ -113,15 +223,13 @@ calculate_suggestions(PyObject *dir, suggestion_distance = current_distance; } } - if (!suggestion) { - return NULL; - } - Py_INCREF(suggestion); + Py_XINCREF(suggestion); return suggestion; } static PyObject * -offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc) { +offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc) +{ PyObject *name = exc->name; // borrowed reference PyObject *obj = exc->obj; // borrowed reference @@ -142,7 +250,8 @@ offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc) { static PyObject * -offer_suggestions_for_name_error(PyNameErrorObject *exc) { +offer_suggestions_for_name_error(PyNameErrorObject *exc) +{ PyObject *name = exc->name; // borrowed reference PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference // Abort if we don't have a variable name or we have an invalid one From e680014e99e96420b218d55f0bea6be6b1ff3f72 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sat, 1 May 2021 17:42:40 -0400 Subject: [PATCH 2/8] Apply changes from review --- Python/suggestions.c | 91 ++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 63 deletions(-) diff --git a/Python/suggestions.c b/Python/suggestions.c index 5b270f4e638f79..91981c18e3ccde 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -10,10 +10,12 @@ #define MOVE_COST 2 #define CASE_COST 1 +#define LEAST_FIVE_BITS(n) ((n) * 31) + static inline int substitution_cost(char a, char b) { - if ((a & 31) != (b & 31)) { + if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) { // Not the same, not a case flip. return MOVE_COST; } @@ -36,8 +38,9 @@ substitution_cost(char a, char b) static Py_ssize_t levenshtein_distance(const char *a, size_t a_size, const char *b, size_t b_size, - size_t *buffer, size_t max_cost) + size_t max_cost) { + static size_t buffer[MAX_STRING_SIZE]; if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) { return max_cost + 1; } @@ -120,68 +123,10 @@ levenshtein_distance(const char *a, size_t a_size, return result; } -#define TEST_LEVENSHTEIN 0 -#if TEST_LEVENSHTEIN - -static void -do_test(const char *a, const char *b, size_t d) -{ - static size_t buffer[500]; - size_t a_len = strlen(a); - size_t b_len = strlen(b); - size_t d2 = levenshtein_distance(a, a_len, b, b_len, buffer, 9999); - printf("'%s' -> '%s':", a, b); - if (d != d2) { - printf("\n FAILURE: Expected %d, got %d.\n", (int)d, (int)d2); - } - else { - printf(" passed.\n"); - } - size_t d3 = levenshtein_distance(a, a_len, b, b_len, buffer, d2); - assert(d2 == d3); - size_t d4 = levenshtein_distance(b, b_len, a, a_len, buffer, d2); - assert(d2 == d4); -} - -static void -test_levenshtein_distances() -{ - printf("\n\n--- Testing Levenshtein Distances ---\n\n"); - do_test("", "", 0); - do_test("", "a", 2); - do_test("a", "A", 1); - do_test("Apple", "Aple", 2); - do_test("Banana", "B@n@n@", 6); - do_test("Cherry", "Cherry!", 2); - do_test("abc", "y", 6); - do_test("aa", "bb", 4); - do_test("aaaaa", "AAAAA", 5); - do_test("wxyz", "wXyZ", 2); - do_test("wxyz", "wXyZ123", 8); - do_test("Python", "Java", 12); - do_test("Java", "C#", 8); - do_test("AbstractFoobarManager", "abstract_foobar_manager", 3+2*2); - do_test("CPython", "PyPy", 10); - do_test("CPython", "pypy", 11); - do_test("AttributeError", "AttributeErrop", 2); - do_test("AttributeError", "AttributeErrorTests", 10); - printf("\n\n-------------------------------------\n\n"); -} - -#endif - static inline PyObject * calculate_suggestions(PyObject *dir, PyObject *name) { - #if TEST_LEVENSHTEIN - static int did_it; - if (!did_it) { - did_it = 1; - test_levenshtein_distances(); - } - #endif - assert(!PyErr_Occurred()); assert(PyList_CheckExact(dir)); @@ -213,8 +158,7 @@ calculate_suggestions(PyObject *dir, Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6; Py_ssize_t current_distance = levenshtein_distance(name_str, name_size, - item_str, item_size, - work_buffer, max_distance); + item_str, item_size, max_distance); if (current_distance > max_distance) { continue; } @@ -303,7 +247,9 @@ offer_suggestions_for_name_error(PyNameErrorObject *exc) // Offer suggestions for a given exception. Returns a python string object containing the // suggestions. This function returns NULL if no suggestion was found or if an exception happened, // users must call PyErr_Occurred() to disambiguate. -PyObject *_Py_Offer_Suggestions(PyObject *exception) { +PyObject * +_Py_Offer_Suggestions(PyObject *exception) +{ PyObject *result = NULL; assert(!PyErr_Occurred()); if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) { @@ -314,3 +260,22 @@ PyObject *_Py_Offer_Suggestions(PyObject *exception) { return result; } +Py_ssize_t * +_Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost) +{ + assert(PyUnicode_Check(a) && PyUnicode_Check(b)); + Py_ssize_t size_a, size_b; + const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a); + if (utf8_a == NULL) { + return -1; + } + const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b); + if (utf8_b == NULL) { + return -1; + } + if (max_cost == -1) { + max_cost = MOVE_COST * Py_MAX(size_a, size_b); + } + return levenshtein_distance(utf8_a, size_a, utf8_b, size_b, max_cost); +} + From 8e120f3626a3ebc120844ec577a1c739ca814f36 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sat, 1 May 2021 20:44:09 -0400 Subject: [PATCH 3/8] Add tests using _testinternalcapi --- Include/internal/pycore_pyerrors.h | 2 + Modules/_testinternalcapi.c | 79 ++++++++++++++++++++++++++++++ Python/suggestions.c | 4 +- 3 files changed, 83 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index d1af8e91b3b908..a5e97fe23fb05e 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -87,6 +87,8 @@ PyAPI_FUNC(int) _PyErr_CheckSignalsTstate(PyThreadState *tstate); PyAPI_FUNC(void) _Py_DumpExtensionModules(int fd, PyInterpreterState *interp); extern PyObject* _Py_Offer_Suggestions(PyObject* exception); +PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b, + Py_ssize_t max_cost); #ifdef __cplusplus } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index ab6c5965d1661b..49ff3362f1e29e 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -18,6 +18,7 @@ #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_initconfig.h" // _Py_GetConfigsAsDict() #include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy() +#include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost() static PyObject * @@ -279,6 +280,83 @@ test_atomic_funcs(PyObject *self, PyObject *Py_UNUSED(args)) } +static int +check_edit_cost(const char *a, const char *b, Py_ssize_t expected) +{ + PyObject *a_obj = PyUnicode_FromString(a); + if (a_obj == NULL) { + return -1; + } + PyObject *b_obj = PyUnicode_FromString(b); + if (a_obj == NULL) { + return -1; + } + Py_ssize_t result = _Py_UTF8_Edit_Cost(a_obj, b_obj, -1); + if (result != expected) { + PyErr_Format(PyExc_AssertionError, + "Edit cost from '%s' to '%s' returns %zd, expected %zd", + a, b, result, expected); + return -1; + } + // Check that smaller max_edits thresholds are exceeded. + Py_ssize_t max_edits = result; + while (max_edits > 0) { + max_edits /= 2; + Py_ssize_t result2 = _Py_UTF8_Edit_Cost(a_obj, b_obj, max_edits); + if (result2 <= max_edits) { + PyErr_Format(PyExc_AssertionError, + "Edit cost from '%s' to '%s' (threshold %zd) " + "returns %zd, expected greater than %zd", + a, b, max_edits, result2, max_edits); + return -1; + } + } + // Check that bigger max_edits thresholds don't change anything + Py_ssize_t result3 = _Py_UTF8_Edit_Cost(a_obj, b_obj, result * 2 + 1); + if (result3 != result) { + PyErr_Format(PyExc_AssertionError, + "Edit cost from '%s' to '%s' (threshold %zd) " + "returns %zd, expected %zd", + a, b, result * 2, result3, result); + return -1; + } + return 0; +} + +static PyObject * +test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args)) +{ + #define CHECK(a, b, n) do { \ + if (check_edit_cost(a, b, n) < 0) { \ + return NULL; \ + } \ + } while (0) \ + + CHECK("", "", 0); + CHECK("", "a", 2); + CHECK("a", "A", 1); + CHECK("Apple", "Aple", 2); + CHECK("Banana", "B@n@n@", 6); + CHECK("Cherry", "Cherry!", 2); + CHECK("---0---", "------", 2); + CHECK("abc", "y", 6); + CHECK("aa", "bb", 4); + CHECK("aaaaa", "AAAAA", 5); + CHECK("wxyz", "wXyZ", 2); + CHECK("wxyz", "wXyZ123", 8); + CHECK("Python", "Java", 12); + CHECK("Java", "C#", 8); + CHECK("AbstractFoobarManager", "abstract_foobar_manager", 3+2*2); + CHECK("CPython", "PyPy", 10); + CHECK("CPython", "pypy", 11); + CHECK("AttributeError", "AttributeErrop", 2); + CHECK("AttributeError", "AttributeErrorTests", 10); + + #undef CHECK + Py_RETURN_NONE; +} + + static PyMethodDef TestMethods[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -289,6 +367,7 @@ static PyMethodDef TestMethods[] = { {"get_config", test_get_config, METH_NOARGS}, {"set_config", test_set_config, METH_O}, {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS}, + {"test_edit_cost", test_edit_cost, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/suggestions.c b/Python/suggestions.c index 91981c18e3ccde..837547cfe266c3 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -10,7 +10,7 @@ #define MOVE_COST 2 #define CASE_COST 1 -#define LEAST_FIVE_BITS(n) ((n) * 31) +#define LEAST_FIVE_BITS(n) ((n) & 31) static inline int substitution_cost(char a, char b) @@ -260,7 +260,7 @@ _Py_Offer_Suggestions(PyObject *exception) return result; } -Py_ssize_t * +Py_ssize_t _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost) { assert(PyUnicode_Check(a) && PyUnicode_Check(b)); From d7160946eb72a4ba264336d9cb732ee45efcaf4c Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sat, 1 May 2021 20:54:37 -0400 Subject: [PATCH 4/8] Remove unused buffer --- Python/suggestions.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Python/suggestions.c b/Python/suggestions.c index 837547cfe266c3..76797ff1b98fc6 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -135,27 +135,23 @@ calculate_suggestions(PyObject *dir, return NULL; } - Py_ssize_t suggestion_distance = PyUnicode_GetLength(name); + Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX; PyObject *suggestion = NULL; Py_ssize_t name_size; const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size); if (name_str == NULL) { return NULL; } - size_t *work_buffer = PyMem_Calloc(name_size, sizeof(size_t)); - if (work_buffer == NULL) { - return NULL; - } for (int i = 0; i < dir_size; ++i) { PyObject *item = PyList_GET_ITEM(dir, i); Py_ssize_t item_size; const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size); if (item_str == NULL) { - PyMem_Free(work_buffer); return NULL; } Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6; + max_distance = Py_MIN(max_distance, suggestion_distance); Py_ssize_t current_distance = levenshtein_distance(name_str, name_size, item_str, item_size, max_distance); From eff17fbdcfe9af885022d85eb1fe8c548a443ac2 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sun, 2 May 2021 01:09:20 -0400 Subject: [PATCH 5/8] Allow for suggestions on larger dir()s and names --- Lib/test/test_exceptions.py | 98 +++++++++++++++++++++++++++++++------ Python/suggestions.c | 11 ++--- 2 files changed, 87 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index db80c65d9002f0..bc0404ea4b04d5 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1565,21 +1565,87 @@ def test_name_error_bad_suggestions_do_not_trigger_for_small_names(self): def test_name_error_suggestions_do_not_trigger_for_too_many_locals(self): def f(): # Mutating locals() is unreliable, so we need to do it by hand - a1 = a2 = a3 = a4 = a5 = a6 = a7 = a8 = a9 = a10 = a11 = a12 = a13 = \ - a14 = a15 = a16 = a17 = a18 = a19 = a20 = a21 = a22 = a23 = a24 = a25 = \ - a26 = a27 = a28 = a29 = a30 = a31 = a32 = a33 = a34 = a35 = a36 = a37 = \ - a38 = a39 = a40 = a41 = a42 = a43 = a44 = a45 = a46 = a47 = a48 = a49 = \ - a50 = a51 = a52 = a53 = a54 = a55 = a56 = a57 = a58 = a59 = a60 = a61 = \ - a62 = a63 = a64 = a65 = a66 = a67 = a68 = a69 = a70 = a71 = a72 = a73 = \ - a74 = a75 = a76 = a77 = a78 = a79 = a80 = a81 = a82 = a83 = a84 = a85 = \ - a86 = a87 = a88 = a89 = a90 = a91 = a92 = a93 = a94 = a95 = a96 = a97 = \ - a98 = a99 = a100 = a101 = a102 = a103 = a104 = a105 = a106 = a107 = \ - a108 = a109 = a110 = a111 = a112 = a113 = a114 = a115 = a116 = a117 = \ - a118 = a119 = a120 = a121 = a122 = a123 = a124 = a125 = a126 = \ - a127 = a128 = a129 = a130 = a131 = a132 = a133 = a134 = a135 = a136 = \ - a137 = a138 = a139 = a140 = a141 = a142 = a143 = a144 = a145 = \ - a146 = a147 = a148 = a149 = a150 = a151 = a152 = a153 = a154 = a155 = \ - a156 = a157 = a158 = a159 = a160 = a161 = None + a1 = a2 = a3 = a4 = a5 = a6 = a7 = a8 = a9 = a10 = \ + a11 = a12 = a13 = a14 = a15 = a16 = a17 = a18 = a19 = a20 = \ + a21 = a22 = a23 = a24 = a25 = a26 = a27 = a28 = a29 = a30 = \ + a31 = a32 = a33 = a34 = a35 = a36 = a37 = a38 = a39 = a40 = \ + a41 = a42 = a43 = a44 = a45 = a46 = a47 = a48 = a49 = a50 = \ + a51 = a52 = a53 = a54 = a55 = a56 = a57 = a58 = a59 = a60 = \ + a61 = a62 = a63 = a64 = a65 = a66 = a67 = a68 = a69 = a70 = \ + a71 = a72 = a73 = a74 = a75 = a76 = a77 = a78 = a79 = a80 = \ + a81 = a82 = a83 = a84 = a85 = a86 = a87 = a88 = a89 = a90 = \ + a91 = a92 = a93 = a94 = a95 = a96 = a97 = a98 = a99 = a100 = \ + a101 = a102 = a103 = a104 = a105 = a106 = a107 = a108 = a109 = a110 = \ + a111 = a112 = a113 = a114 = a115 = a116 = a117 = a118 = a119 = a120 = \ + a121 = a122 = a123 = a124 = a125 = a126 = a127 = a128 = a129 = a130 = \ + a131 = a132 = a133 = a134 = a135 = a136 = a137 = a138 = a139 = a140 = \ + a141 = a142 = a143 = a144 = a145 = a146 = a147 = a148 = a149 = a150 = \ + a151 = a152 = a153 = a154 = a155 = a156 = a157 = a158 = a159 = a160 = \ + a161 = a162 = a163 = a164 = a165 = a166 = a167 = a168 = a169 = a170 = \ + a171 = a172 = a173 = a174 = a175 = a176 = a177 = a178 = a179 = a180 = \ + a181 = a182 = a183 = a184 = a185 = a186 = a187 = a188 = a189 = a190 = \ + a191 = a192 = a193 = a194 = a195 = a196 = a197 = a198 = a199 = a200 = \ + a201 = a202 = a203 = a204 = a205 = a206 = a207 = a208 = a209 = a210 = \ + a211 = a212 = a213 = a214 = a215 = a216 = a217 = a218 = a219 = a220 = \ + a221 = a222 = a223 = a224 = a225 = a226 = a227 = a228 = a229 = a230 = \ + a231 = a232 = a233 = a234 = a235 = a236 = a237 = a238 = a239 = a240 = \ + a241 = a242 = a243 = a244 = a245 = a246 = a247 = a248 = a249 = a250 = \ + a251 = a252 = a253 = a254 = a255 = a256 = a257 = a258 = a259 = a260 = \ + a261 = a262 = a263 = a264 = a265 = a266 = a267 = a268 = a269 = a270 = \ + a271 = a272 = a273 = a274 = a275 = a276 = a277 = a278 = a279 = a280 = \ + a281 = a282 = a283 = a284 = a285 = a286 = a287 = a288 = a289 = a290 = \ + a291 = a292 = a293 = a294 = a295 = a296 = a297 = a298 = a299 = a300 = \ + a301 = a302 = a303 = a304 = a305 = a306 = a307 = a308 = a309 = a310 = \ + a311 = a312 = a313 = a314 = a315 = a316 = a317 = a318 = a319 = a320 = \ + a321 = a322 = a323 = a324 = a325 = a326 = a327 = a328 = a329 = a330 = \ + a331 = a332 = a333 = a334 = a335 = a336 = a337 = a338 = a339 = a340 = \ + a341 = a342 = a343 = a344 = a345 = a346 = a347 = a348 = a349 = a350 = \ + a351 = a352 = a353 = a354 = a355 = a356 = a357 = a358 = a359 = a360 = \ + a361 = a362 = a363 = a364 = a365 = a366 = a367 = a368 = a369 = a370 = \ + a371 = a372 = a373 = a374 = a375 = a376 = a377 = a378 = a379 = a380 = \ + a381 = a382 = a383 = a384 = a385 = a386 = a387 = a388 = a389 = a390 = \ + a391 = a392 = a393 = a394 = a395 = a396 = a397 = a398 = a399 = a400 = \ + a401 = a402 = a403 = a404 = a405 = a406 = a407 = a408 = a409 = a410 = \ + a411 = a412 = a413 = a414 = a415 = a416 = a417 = a418 = a419 = a420 = \ + a421 = a422 = a423 = a424 = a425 = a426 = a427 = a428 = a429 = a430 = \ + a431 = a432 = a433 = a434 = a435 = a436 = a437 = a438 = a439 = a440 = \ + a441 = a442 = a443 = a444 = a445 = a446 = a447 = a448 = a449 = a450 = \ + a451 = a452 = a453 = a454 = a455 = a456 = a457 = a458 = a459 = a460 = \ + a461 = a462 = a463 = a464 = a465 = a466 = a467 = a468 = a469 = a470 = \ + a471 = a472 = a473 = a474 = a475 = a476 = a477 = a478 = a479 = a480 = \ + a481 = a482 = a483 = a484 = a485 = a486 = a487 = a488 = a489 = a490 = \ + a491 = a492 = a493 = a494 = a495 = a496 = a497 = a498 = a499 = a500 = \ + a501 = a502 = a503 = a504 = a505 = a506 = a507 = a508 = a509 = a510 = \ + a511 = a512 = a513 = a514 = a515 = a516 = a517 = a518 = a519 = a520 = \ + a521 = a522 = a523 = a524 = a525 = a526 = a527 = a528 = a529 = a530 = \ + a531 = a532 = a533 = a534 = a535 = a536 = a537 = a538 = a539 = a540 = \ + a541 = a542 = a543 = a544 = a545 = a546 = a547 = a548 = a549 = a550 = \ + a551 = a552 = a553 = a554 = a555 = a556 = a557 = a558 = a559 = a560 = \ + a561 = a562 = a563 = a564 = a565 = a566 = a567 = a568 = a569 = a570 = \ + a571 = a572 = a573 = a574 = a575 = a576 = a577 = a578 = a579 = a580 = \ + a581 = a582 = a583 = a584 = a585 = a586 = a587 = a588 = a589 = a590 = \ + a591 = a592 = a593 = a594 = a595 = a596 = a597 = a598 = a599 = a600 = \ + a601 = a602 = a603 = a604 = a605 = a606 = a607 = a608 = a609 = a610 = \ + a611 = a612 = a613 = a614 = a615 = a616 = a617 = a618 = a619 = a620 = \ + a621 = a622 = a623 = a624 = a625 = a626 = a627 = a628 = a629 = a630 = \ + a631 = a632 = a633 = a634 = a635 = a636 = a637 = a638 = a639 = a640 = \ + a641 = a642 = a643 = a644 = a645 = a646 = a647 = a648 = a649 = a650 = \ + a651 = a652 = a653 = a654 = a655 = a656 = a657 = a658 = a659 = a660 = \ + a661 = a662 = a663 = a664 = a665 = a666 = a667 = a668 = a669 = a670 = \ + a671 = a672 = a673 = a674 = a675 = a676 = a677 = a678 = a679 = a680 = \ + a681 = a682 = a683 = a684 = a685 = a686 = a687 = a688 = a689 = a690 = \ + a691 = a692 = a693 = a694 = a695 = a696 = a697 = a698 = a699 = a700 = \ + a701 = a702 = a703 = a704 = a705 = a706 = a707 = a708 = a709 = a710 = \ + a711 = a712 = a713 = a714 = a715 = a716 = a717 = a718 = a719 = a720 = \ + a721 = a722 = a723 = a724 = a725 = a726 = a727 = a728 = a729 = a730 = \ + a731 = a732 = a733 = a734 = a735 = a736 = a737 = a738 = a739 = a740 = \ + a741 = a742 = a743 = a744 = a745 = a746 = a747 = a748 = a749 = a750 = \ + a751 = a752 = a753 = a754 = a755 = a756 = a757 = a758 = a759 = a760 = \ + a761 = a762 = a763 = a764 = a765 = a766 = a767 = a768 = a769 = a770 = \ + a771 = a772 = a773 = a774 = a775 = a776 = a777 = a778 = a779 = a780 = \ + a781 = a782 = a783 = a784 = a785 = a786 = a787 = a788 = a789 = a790 = \ + a791 = a792 = a793 = a794 = a795 = a796 = a797 = a798 = a799 = a800 \ + = None print(a0) try: @@ -1778,7 +1844,7 @@ class A: blech = None # A class with a very big __dict__ will not be consider # for suggestions. - for index in range(160): + for index in range(2000): setattr(A, f"index_{index}", None) try: diff --git a/Python/suggestions.c b/Python/suggestions.c index 76797ff1b98fc6..aa67d01703a7b0 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -3,9 +3,8 @@ #include "pycore_pyerrors.h" -#define MAX_DISTANCE 3 -#define MAX_CANDIDATE_ITEMS 160 -#define MAX_STRING_SIZE 25 +#define MAX_CANDIDATE_ITEMS 750 +#define MAX_STRING_SIZE 40 #define MOVE_COST 2 #define CASE_COST 1 @@ -41,9 +40,6 @@ levenshtein_distance(const char *a, size_t a_size, size_t max_cost) { static size_t buffer[MAX_STRING_SIZE]; - if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) { - return max_cost + 1; - } // Both strings are the same (by identity) if (a == b) { @@ -62,6 +58,9 @@ levenshtein_distance(const char *a, size_t a_size, if (a_size == 0 || b_size == 0) { return (a_size + b_size) * MOVE_COST; } + if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) { + return max_cost + 1; + } // Prefer shorter buffer if (b_size < a_size) { From e99baf13a70a073886781f0ae434d9297c159b2d Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sun, 2 May 2021 02:12:41 -0400 Subject: [PATCH 6/8] Correct a comment --- Python/suggestions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/suggestions.c b/Python/suggestions.c index aa67d01703a7b0..8e93e3ed8bc9b6 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -69,7 +69,7 @@ levenshtein_distance(const char *a, size_t a_size, } // quick fail when a match is impossible. - if ((b_size - a_size)*MOVE_COST > max_cost) { + if ((b_size - a_size) * MOVE_COST > max_cost) { return max_cost + 1; } @@ -77,7 +77,7 @@ levenshtein_distance(const char *a, size_t a_size, // matrix, we can update just one row in place. // Initialize the buffer row for (size_t i = 0; i < a_size; i++) { - // cost from a[:0] to b[:index+1] + // cost from b[:0] to a[:i+1] buffer[i] = (i + 1) * MOVE_COST; } From 3af514a6ff7f66261b5708fc7b5f910b675353a4 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sun, 2 May 2021 07:12:15 -0400 Subject: [PATCH 7/8] Remove one more unneeded computation --- Python/suggestions.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/suggestions.c b/Python/suggestions.c index 8e93e3ed8bc9b6..6fb01f10cd37c9 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -149,8 +149,10 @@ calculate_suggestions(PyObject *dir, if (item_str == NULL) { return NULL; } + // No more than 1/3 of the involved characters should need changed. Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6; - max_distance = Py_MIN(max_distance, suggestion_distance); + // Don't take matches we've already beaten. + max_distance = Py_MIN(max_distance, suggestion_distance - 1); Py_ssize_t current_distance = levenshtein_distance(name_str, name_size, item_str, item_size, max_distance); From ce87e15020a8d3ac4e72ced1f67afb1c173a5e37 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 3 May 2021 16:29:00 +0100 Subject: [PATCH 8/8] Fix refleaks --- Modules/_testinternalcapi.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 49ff3362f1e29e..d5616fd59c6e50 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -283,20 +283,24 @@ test_atomic_funcs(PyObject *self, PyObject *Py_UNUSED(args)) static int check_edit_cost(const char *a, const char *b, Py_ssize_t expected) { - PyObject *a_obj = PyUnicode_FromString(a); + int ret = -1; + PyObject *a_obj = NULL; + PyObject *b_obj = NULL; + + a_obj = PyUnicode_FromString(a); if (a_obj == NULL) { - return -1; + goto exit; } - PyObject *b_obj = PyUnicode_FromString(b); + b_obj = PyUnicode_FromString(b); if (a_obj == NULL) { - return -1; + goto exit; } Py_ssize_t result = _Py_UTF8_Edit_Cost(a_obj, b_obj, -1); if (result != expected) { PyErr_Format(PyExc_AssertionError, "Edit cost from '%s' to '%s' returns %zd, expected %zd", a, b, result, expected); - return -1; + goto exit; } // Check that smaller max_edits thresholds are exceeded. Py_ssize_t max_edits = result; @@ -308,7 +312,7 @@ check_edit_cost(const char *a, const char *b, Py_ssize_t expected) "Edit cost from '%s' to '%s' (threshold %zd) " "returns %zd, expected greater than %zd", a, b, max_edits, result2, max_edits); - return -1; + goto exit; } } // Check that bigger max_edits thresholds don't change anything @@ -318,9 +322,13 @@ check_edit_cost(const char *a, const char *b, Py_ssize_t expected) "Edit cost from '%s' to '%s' (threshold %zd) " "returns %zd, expected %zd", a, b, result * 2, result3, result); - return -1; + goto exit; } - return 0; + ret = 0; +exit: + Py_XDECREF(a_obj); + Py_XDECREF(b_obj); + return ret; } static PyObject *