Skip to content

Commit bc73ebb

Browse files
author
Matthew Barnett
committed
Git issue 508: Regex doesn't build using CPython main (3.13.0a0)
Removed usage of _PyBytes_Join and did a little tidying of the code that makes the result string.
1 parent 774dbfd commit bc73ebb

File tree

6 files changed

+120
-70
lines changed

6 files changed

+120
-70
lines changed

changelog.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
1+
Version: 2023.8.8
2+
3+
Git issue 508: Regex doesn't build using CPython main (3.13.0a0)
4+
Removed usage of _PyBytes_Join and did a little tidying of the code that makes the result string.
5+
16
Version: 2023.6.3
27

3-
# Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
8+
Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
49
Conditional node needed an additional member that points to the true branch.
510

611
Version: 2023.5.5

regex_3/_regex.c

Lines changed: 99 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -18706,8 +18706,8 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index)
1870618706
if (!item)
1870718707
goto error;
1870818708

18709-
/* PyList_SET_ITEM borrows the reference. */
18710-
PyList_SET_ITEM(result, 0, item);
18709+
/* PyList_SetItem borrows the reference. */
18710+
PyList_SetItem(result, 0, item);
1871118711

1871218712
return result;
1871318713
}
@@ -18726,8 +18726,8 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index)
1872618726
if (!item)
1872718727
goto error;
1872818728

18729-
/* PyList_SET_ITEM borrows the reference. */
18730-
PyList_SET_ITEM(result, i, item);
18729+
/* PyList_SetItem borrows the reference. */
18730+
PyList_SetItem(result, i, item);
1873118731
}
1873218732

1873318733
return result;
@@ -18786,8 +18786,8 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) {
1878618786
if (!item)
1878718787
goto error;
1878818788

18789-
/* PyList_SET_ITEM borrows the reference. */
18790-
PyList_SET_ITEM(result, 0, item);
18789+
/* PyList_SetItem borrows the reference. */
18790+
PyList_SetItem(result, 0, item);
1879118791

1879218792
return result;
1879318793
}
@@ -18806,8 +18806,8 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) {
1880618806
if (!item)
1880718807
goto error;
1880818808

18809-
/* PyList_SET_ITEM borrows the reference. */
18810-
PyList_SET_ITEM(result, i, item);
18809+
/* PyList_SetItem borrows the reference. */
18810+
PyList_SetItem(result, i, item);
1881118811
}
1881218812

1881318813
return result;
@@ -18867,8 +18867,8 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index)
1886718867
if (!item)
1886818868
goto error;
1886918869

18870-
/* PyList_SET_ITEM borrows the reference. */
18871-
PyList_SET_ITEM(result, 0, item);
18870+
/* PyList_SetItem borrows the reference. */
18871+
PyList_SetItem(result, 0, item);
1887218872

1887318873
return result;
1887418874
}
@@ -18888,8 +18888,8 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index)
1888818888
if (!item)
1888918889
goto error;
1889018890

18891-
/* PyList_SET_ITEM borrows the reference. */
18892-
PyList_SET_ITEM(result, i, item);
18891+
/* PyList_SetItem borrows the reference. */
18892+
PyList_SetItem(result, i, item);
1889318893
}
1889418894

1889518895
return result;
@@ -18923,8 +18923,8 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t
1892318923
if (!slice)
1892418924
goto error;
1892518925

18926-
/* PyList_SET_ITEM borrows the reference. */
18927-
PyList_SET_ITEM(result, 0, slice);
18926+
/* PyList_SetItem borrows the reference. */
18927+
PyList_SetItem(result, 0, slice);
1892818928

1892918929
return result;
1893018930
}
@@ -18945,8 +18945,8 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t
1894518945
if (!slice)
1894618946
goto error;
1894718947

18948-
/* PyList_SET_ITEM borrows the reference. */
18949-
PyList_SET_ITEM(result, i, slice);
18948+
/* PyList_SetItem borrows the reference. */
18949+
PyList_SetItem(result, i, slice);
1895018950
}
1895118951

1895218952
return result;
@@ -19282,13 +19282,13 @@ static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject*
1928219282
if (!keys)
1928319283
goto failed;
1928419284

19285-
for (g = 0; g < PyList_GET_SIZE(keys); g++) {
19285+
for (g = 0; g < PyList_Size(keys); g++) {
1928619286
PyObject* key;
1928719287
PyObject* value;
1928819288
int status;
1928919289

19290-
/* PyList_GET_ITEM borrows a reference. */
19291-
key = PyList_GET_ITEM(keys, g);
19290+
/* PyList_GetItem borrows a reference. */
19291+
key = PyList_GetItem(keys, g);
1929219292
if (!key)
1929319293
goto failed;
1929419294

@@ -19326,14 +19326,14 @@ static PyObject* match_capturesdict(MatchObject* self) {
1932619326
if (!keys)
1932719327
goto failed;
1932819328

19329-
for (g = 0; g < PyList_GET_SIZE(keys); g++) {
19329+
for (g = 0; g < PyList_Size(keys); g++) {
1933019330
PyObject* key;
1933119331
Py_ssize_t group;
1933219332
PyObject* captures;
1933319333
int status;
1933419334

19335-
/* PyList_GET_ITEM borrows a reference. */
19336-
key = PyList_GET_ITEM(keys, g);
19335+
/* PyList_GetItem borrows a reference. */
19336+
key = PyList_GetItem(keys, g);
1933719337
if (!key)
1933819338
goto failed;
1933919339

@@ -19509,12 +19509,12 @@ Py_LOCAL_INLINE(int) add_to_join_list(RE_JoinInfo* join_info, PyObject* item) {
1950919509
goto error;
1951019510
}
1951119511

19512-
/* PyList_SET_ITEM borrows the reference. */
19513-
PyList_SET_ITEM(join_info->list, 0, join_info->item);
19512+
/* PyList_SetItem borrows the reference. */
19513+
PyList_SetItem(join_info->list, 0, join_info->item);
1951419514
join_info->item = NULL;
1951519515

19516-
/* PyList_SET_ITEM borrows the reference. */
19517-
PyList_SET_ITEM(join_info->list, 1, new_item);
19516+
/* PyList_SetItem borrows the reference. */
19517+
PyList_SetItem(join_info->list, 1, new_item);
1951819518
return 0;
1951919519
}
1952019520

@@ -19535,38 +19535,78 @@ Py_LOCAL_INLINE(void) clear_join_list(RE_JoinInfo* join_info) {
1953519535
Py_XDECREF(join_info->item);
1953619536
}
1953719537

19538+
/* Joins a list of bytestrings. */
19539+
Py_LOCAL_INLINE(PyObject*) join_bytestrings(PyObject* list) {
19540+
Py_ssize_t count;
19541+
Py_ssize_t length;
19542+
Py_ssize_t i;
19543+
PyObject *result;
19544+
char* to_bytes;
19545+
19546+
count = PyList_Size(list);
19547+
19548+
/* How long will the result be? */
19549+
length = 0;
19550+
19551+
for (i = 0; i < count; i++)
19552+
length += PyBytes_Size(PyList_GetItem(list, i));
19553+
19554+
/* Create the resulting bytestring, but uninitialised. */
19555+
result = PyBytes_FromStringAndSize(NULL, length);
19556+
if (!result)
19557+
return NULL;
19558+
19559+
/* Fill the resulting bytestring. */
19560+
to_bytes = PyBytes_AsString(result);
19561+
length = 0;
19562+
19563+
for (i = 0; i < count; i++) {
19564+
PyObject* bytestring;
19565+
char* from_bytes;
19566+
Py_ssize_t from_length;
19567+
19568+
bytestring = PyList_GetItem(list, i);
19569+
from_bytes = PyBytes_AsString(bytestring);
19570+
from_length = PyBytes_Size(bytestring);
19571+
memmove(to_bytes + length, from_bytes, from_length);
19572+
length += from_length;
19573+
}
19574+
19575+
return result;
19576+
}
19577+
19578+
/* Joins a list of strings. */
19579+
Py_LOCAL_INLINE(PyObject*) join_strings(PyObject* list) {
19580+
PyObject* joiner;
19581+
PyObject* result;
19582+
19583+
joiner = PyUnicode_FromString("");
19584+
if (!joiner)
19585+
return NULL;
19586+
19587+
result = PyUnicode_Join(joiner, list);
19588+
Py_DECREF(joiner);
19589+
19590+
return result;
19591+
}
19592+
1953819593
/* Joins together a list of strings for pattern_subx. */
1953919594
Py_LOCAL_INLINE(PyObject*) join_list_info(RE_JoinInfo* join_info) {
1954019595
/* If the list already exists then just do the join. */
1954119596
if (join_info->list) {
19542-
PyObject* joiner;
1954319597
PyObject* result;
1954419598

1954519599
if (join_info->reversed)
1954619600
/* The list needs to be reversed before being joined. */
1954719601
PyList_Reverse(join_info->list);
1954819602

19549-
if (join_info->is_unicode) {
19603+
if (join_info->is_unicode)
1955019604
/* Concatenate the Unicode strings. */
19551-
joiner = PyUnicode_New(0, 0);
19552-
if (!joiner) {
19553-
clear_join_list(join_info);
19554-
return NULL;
19555-
}
19556-
19557-
result = PyUnicode_Join(joiner, join_info->list);
19558-
} else {
19559-
joiner = PyBytes_FromString("");
19560-
if (!joiner) {
19561-
clear_join_list(join_info);
19562-
return NULL;
19563-
}
19564-
19605+
result = join_strings(join_info->list);
19606+
else
1956519607
/* Concatenate the bytestrings. */
19566-
result = _PyBytes_Join(joiner, join_info->list);
19567-
}
19608+
result = join_bytestrings(join_info->list);
1956819609

19569-
Py_DECREF(joiner);
1957019610
clear_join_list(join_info);
1957119611

1957219612
return result;
@@ -19651,13 +19691,13 @@ static PyObject* match_expand(MatchObject* self, PyObject* str_template) {
1965119691
init_join_list(&join_info, FALSE, PyUnicode_Check(self->string));
1965219692

1965319693
/* Add each part of the template to the list. */
19654-
size = PyList_GET_SIZE(replacement);
19694+
size = PyList_Size(replacement);
1965519695
for (i = 0; i < size; i++) {
1965619696
PyObject* item;
1965719697
PyObject* str_item;
1965819698

19659-
/* PyList_GET_ITEM borrows a reference. */
19660-
item = PyList_GET_ITEM(replacement, i);
19699+
/* PyList_GetItem borrows a reference. */
19700+
item = PyList_GetItem(replacement, i);
1966119701
str_item = get_match_replacement(self, item, self->group_count);
1966219702
if (!str_item)
1966319703
goto error;
@@ -19727,19 +19767,19 @@ Py_LOCAL_INLINE(PyObject*) make_capture_dict(MatchObject* match, MatchObject**
1972719767
if (!values)
1972819768
goto failed;
1972919769

19730-
for (g = 0; g < PyList_GET_SIZE(keys); g++) {
19770+
for (g = 0; g < PyList_Size(keys); g++) {
1973119771
PyObject* key;
1973219772
PyObject* value;
1973319773
Py_ssize_t v;
1973419774
int status;
1973519775

19736-
/* PyList_GET_ITEM borrows a reference. */
19737-
key = PyList_GET_ITEM(keys, g);
19776+
/* PyList_GetItem borrows a reference. */
19777+
key = PyList_GetItem(keys, g);
1973819778
if (!key)
1973919779
goto failed;
1974019780

19741-
/* PyList_GET_ITEM borrows a reference. */
19742-
value = PyList_GET_ITEM(values, g);
19781+
/* PyList_GetItem borrows a reference. */
19782+
value = PyList_GetItem(values, g);
1974319783
if (!value)
1974419784
goto failed;
1974519785

@@ -21742,7 +21782,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
2174221782
Py_ssize_t step;
2174321783

2174421784
/* Add each part of the template to the list. */
21745-
count = PyList_GET_SIZE(replacement);
21785+
count = PyList_Size(replacement);
2174621786
if (join_info.reversed) {
2174721787
/* We're searching backwards, so we'll be reversing the list
2174821788
* when it's complete. Therefore, we need to add the items of
@@ -21761,8 +21801,8 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
2176121801
PyObject* item;
2176221802
PyObject* str_item;
2176321803

21764-
/* PyList_GET_ITEM borrows a reference. */
21765-
item = PyList_GET_ITEM(replacement, index);
21804+
/* PyList_GetItem borrows a reference. */
21805+
item = PyList_GetItem(replacement, index);
2176621806
str_item = get_sub_replacement(item, string, &state,
2176721807
self->public_group_count);
2176821808
if (!str_item)
@@ -25719,7 +25759,7 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) {
2571925759
unpacked = FALSE;
2572025760

2572125761
/* Read the regex code. */
25722-
code_len = PyList_GET_SIZE(code_list);
25762+
code_len = PyList_Size(code_list);
2572325763
code = (RE_CODE*)re_alloc((size_t)code_len * sizeof(RE_CODE));
2572425764
if (!code) {
2572525765
if (unpacked) {
@@ -25733,8 +25773,8 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) {
2573325773
PyObject* o;
2573425774
size_t value;
2573525775

25736-
/* PyList_GET_ITEM borrows a reference. */
25737-
o = PyList_GET_ITEM(code_list, i);
25776+
/* PyList_GetItem borrows a reference. */
25777+
o = PyList_GetItem(code_list, i);
2573825778

2573925779
value = PyLong_AsUnsignedLong(o);
2574025780
if ((Py_ssize_t)value == -1 && PyErr_Occurred())

regex_3/_regex_core.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3000,18 +3000,19 @@ def has_simple_start(self):
30003000
def _compile(self, reverse, fuzzy):
30013001
code = []
30023002

3003-
key = self.group, reverse, fuzzy
3004-
ref = self.info.call_refs.get(key)
3005-
if ref is not None:
3006-
code += [(OP.CALL_REF, ref)]
3007-
30083003
public_group = private_group = self.group
30093004
if private_group < 0:
30103005
public_group = self.info.private_groups[private_group]
30113006
private_group = self.info.group_count - private_group
30123007

3013-
code += ([(OP.GROUP, int(not reverse), private_group, public_group)] +
3014-
self.subpattern.compile(reverse, fuzzy) + [(OP.END, )])
3008+
key = self.group, reverse, fuzzy
3009+
ref = self.info.call_refs.get(key)
3010+
if ref is not None:
3011+
code += [(OP.CALL_REF, ref)]
3012+
3013+
code += [(OP.GROUP, int(not reverse), private_group, public_group)]
3014+
code += self.subpattern.compile(reverse, fuzzy)
3015+
code += [(OP.END, )]
30153016

30163017
if ref is not None:
30173018
code += [(OP.END, )]

regex_3/regex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@
241241
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
242242
"__doc__", "RegexFlag"]
243243

244-
__version__ = "2.5.129"
244+
__version__ = "2.5.132"
245245

246246
# --------------------------------------------------------------------
247247
# Public interface.

regex_3/test_regex.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3289,7 +3289,7 @@ def test_hg_bugs(self):
32893289
"female: her dog; male: his cat. asdsasda")], ['female: her dog',
32903290
'male: his cat'])
32913291

3292-
# Hg issue 78: "Captures"doesn't work for recursive calls
3292+
# Hg issue 78: "Captures" doesn't work for recursive calls
32933293
self.assertEqual(regex.search(r'(?<rec>\((?:[^()]++|(?&rec))*\))',
32943294
'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)',
32953295
'(((1+0)+1)+1)'])
@@ -3624,6 +3624,9 @@ def test_hg_bugs(self):
36243624
self.assertEqual(regex.search(r'(?(DEFINE)(?<quant>\d+)(?<item>\w+))(?&quant) (?&item)',
36253625
'5 elephants')[0], '5 elephants')
36263626

3627+
self.assertEqual(regex.search(r'(?&routine)(?(DEFINE)(?<routine>.))', 'a').group('routine'), None)
3628+
self.assertEqual(regex.search(r'(?&routine)(?(DEFINE)(?<routine>.))', 'a').captures('routine'), ['a'])
3629+
36273630
# Hg issue 153: Request: (*SKIP).
36283631
self.assertEqual(regex.search(r'12(*FAIL)|3', '123')[0], '3')
36293632
self.assertEqual(regex.search(r'(?r)12(*FAIL)|3', '123')[0], '3')

0 commit comments

Comments
 (0)