Skip to content

Commit 50000c1

Browse files
committed
compile time cleandoc
1 parent ec931fc commit 50000c1

File tree

6 files changed

+231
-28
lines changed

6 files changed

+231
-28
lines changed

Include/internal/pycore_compile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);
9191

9292
/* Access compiler internals for unit testing */
9393

94+
PyAPI_FUNC(PyObject*) _PyCompile_CleanDoc(PyObject *doc);
95+
9496
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
9597
PyObject *ast,
9698
PyObject *filename,

Lib/inspect.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -881,29 +881,28 @@ def cleandoc(doc):
881881
882882
Any whitespace that can be uniformly removed from the second line
883883
onwards is removed."""
884-
try:
885-
lines = doc.expandtabs().split('\n')
886-
except UnicodeError:
887-
return None
888-
else:
889-
# Find minimum indentation of any non-blank lines after first line.
890-
margin = sys.maxsize
891-
for line in lines[1:]:
892-
content = len(line.lstrip())
893-
if content:
894-
indent = len(line) - content
895-
margin = min(margin, indent)
896-
# Remove indentation.
897-
if lines:
898-
lines[0] = lines[0].lstrip()
899-
if margin < sys.maxsize:
900-
for i in range(1, len(lines)): lines[i] = lines[i][margin:]
901-
# Remove any trailing or leading blank lines.
902-
while lines and not lines[-1]:
903-
lines.pop()
904-
while lines and not lines[0]:
905-
lines.pop(0)
906-
return '\n'.join(lines)
884+
lines = doc.expandtabs().split('\n')
885+
886+
# Find minimum indentation of any non-blank lines after first line.
887+
margin = sys.maxsize
888+
for line in lines[1:]:
889+
content = len(line.lstrip(' '))
890+
if content:
891+
indent = len(line) - content
892+
margin = min(margin, indent)
893+
# Remove indentation.
894+
if lines:
895+
lines[0] = lines[0].lstrip(' ')
896+
if margin < sys.maxsize:
897+
for i in range(1, len(lines)):
898+
lines[i] = lines[i][margin:]
899+
# Remove any trailing or leading blank lines.
900+
while lines and not lines[-1]:
901+
lines.pop()
902+
while lines and not lines[0]:
903+
lines.pop(0)
904+
return '\n'.join(lines)
905+
907906

908907
def getfile(object):
909908
"""Work out which source or compiled file an object was defined in."""

Lib/test/test_inspect.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,9 +596,44 @@ def test_finddoc(self):
596596
self.assertEqual(finddoc(int.from_bytes), int.from_bytes.__doc__)
597597
self.assertEqual(finddoc(int.real), int.real.__doc__)
598598

599+
cleandoc_testdata = [
600+
# first line should have different margin
601+
(' An\n indented\n docstring.', 'An\nindented\n docstring.'),
602+
# trailing whitespace are not removed.
603+
(' An \n \n indented \n docstring. ',
604+
'An \n \nindented \n docstring. '),
605+
# NUL is not termination.
606+
('doc\0string\n\n second\0line\n third\0line\0',
607+
'doc\0string\n\nsecond\0line\nthird\0line\0'),
608+
# first line is lstrip()-ped. other lines are kept when no margin.[w:
609+
(' ', ''),
610+
]
611+
599612
def test_cleandoc(self):
600-
self.assertEqual(inspect.cleandoc('An\n indented\n docstring.'),
601-
'An\nindented\ndocstring.')
613+
func = inspect.cleandoc
614+
testdata = self.cleandoc_testdata + [
615+
# leading and trailing empty lines should be removed
616+
('\n\n\n first paragraph\n\n second paragraph\n\n',
617+
'first paragraph\n\n second paragraph'),
618+
(' \n \n \n ', ' \n \n '),
619+
]
620+
for i, (input, expected) in enumerate(testdata):
621+
with self.subTest(i=i):
622+
self.assertEqual(func(input), expected)
623+
624+
@cpython_only
625+
def test_c_cleandoc(self):
626+
import _testinternalcapi
627+
func = _testinternalcapi.compiler_cleandoc
628+
testdata = self.cleandoc_testdata + [
629+
# leading and trailing empty lines are not removed
630+
('\n\n\n first paragraph\n\n second paragraph\n\n',
631+
'\n\n\nfirst paragraph\n\n second paragraph\n\n'),
632+
(' \n \n \n ', '\n \n \n '),
633+
]
634+
for i, (input, expected) in enumerate(self.cleandoc_testdata):
635+
with self.subTest(i=i):
636+
self.assertEqual(func(input), expected)
602637

603638
def test_getcomments(self):
604639
self.assertEqual(inspect.getcomments(mod), '# line 1\n')

Modules/_testinternalcapi.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "interpreteridobject.h" // _PyInterpreterID_LookUp()
1515
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
1616
#include "pycore_bitutils.h" // _Py_bswap32()
17-
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
17+
#include "pycore_compile.h" // _PyCompile_CleanDoc, _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
1818
#include "pycore_ceval.h" // _PyEval_AddPendingCall
1919
#include "pycore_fileutils.h" // _Py_normpath
2020
#include "pycore_frame.h" // _PyInterpreterFrame
@@ -591,6 +591,23 @@ set_eval_frame_record(PyObject *self, PyObject *list)
591591
Py_RETURN_NONE;
592592
}
593593

594+
/*[clinic input]
595+
596+
_testinternalcapi.compiler_cleandoc -> object
597+
598+
doc: unicode
599+
600+
C implementation of inspect.cleandoc().
601+
[clinic start generated code]*/
602+
603+
static PyObject *
604+
_testinternalcapi_compiler_cleandoc_impl(PyObject *module, PyObject *doc)
605+
/*[clinic end generated code: output=2dd203a80feff5bc input=2de03fab931d9cdc]*/
606+
{
607+
return _PyCompile_CleanDoc(doc);
608+
}
609+
610+
594611
/*[clinic input]
595612
596613
_testinternalcapi.compiler_codegen -> object
@@ -1271,6 +1288,8 @@ static PyMethodDef module_functions[] = {
12711288
{"DecodeLocaleEx", decode_locale_ex, METH_VARARGS},
12721289
{"set_eval_frame_default", set_eval_frame_default, METH_NOARGS, NULL},
12731290
{"set_eval_frame_record", set_eval_frame_record, METH_O, NULL},
1291+
_TESTINTERNALCAPI_COMPILER_CLEANDOC_METHODDEF
1292+
_TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF
12741293
_TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF
12751294
_TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF
12761295
_TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF

Modules/clinic/_testinternalcapi.c.h

Lines changed: 60 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/compile.c

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1672,10 +1672,16 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts)
16721672
if (c->c_optimize < 2) {
16731673
docstring = _PyAST_GetDocString(stmts);
16741674
if (docstring) {
1675+
PyObject *cleandoc = _PyCompile_CleanDoc(docstring);
1676+
if (cleandoc == NULL) {
1677+
return ERROR;
1678+
}
16751679
i = 1;
16761680
st = (stmt_ty)asdl_seq_GET(stmts, 0);
16771681
assert(st->kind == Expr_kind);
1678-
VISIT(c, expr, st->v.Expr.value);
1682+
location loc = LOC(st->v.Expr.value);
1683+
ADDOP_LOAD_CONST(c, loc, cleandoc);
1684+
Py_DECREF(cleandoc);
16791685
RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store));
16801686
}
16811687
}
@@ -2220,11 +2226,19 @@ compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t f
22202226
/* if not -OO mode, add docstring */
22212227
if (c->c_optimize < 2) {
22222228
docstring = _PyAST_GetDocString(body);
2229+
if (docstring) {
2230+
docstring = _PyCompile_CleanDoc(docstring);
2231+
if (docstring == NULL) {
2232+
compiler_exit_scope(c);
2233+
return ERROR;
2234+
}
2235+
}
22232236
}
22242237
if (compiler_add_const(c->c_const_cache, c->u, docstring ? docstring : Py_None) < 0) {
22252238
compiler_exit_scope(c);
22262239
return ERROR;
22272240
}
2241+
Py_XDECREF(docstring);
22282242

22292243
c->u->u_metadata.u_argcount = asdl_seq_LEN(args->args);
22302244
c->u->u_metadata.u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
@@ -7935,6 +7949,81 @@ cfg_to_instructions(cfg_builder *g)
79357949
return NULL;
79367950
}
79377951

7952+
// C implementation of inspect.cleandoc()
7953+
//
7954+
// Difference from inspect.cleandoc():
7955+
// - Do not remove leading and trailing blank lines to keep lineno.
7956+
PyObject *
7957+
_PyCompile_CleanDoc(PyObject *doc)
7958+
{
7959+
doc = PyObject_CallMethod(doc, "expandtabs", NULL);
7960+
if (doc == NULL) {
7961+
return NULL;
7962+
}
7963+
7964+
Py_ssize_t doc_size;
7965+
const char *doc_utf8 = PyUnicode_AsUTF8AndSize(doc, &doc_size);
7966+
if (doc_utf8 == NULL) {
7967+
Py_DECREF(doc);
7968+
return NULL;
7969+
}
7970+
const char *p = doc_utf8;
7971+
const char *pend = p + doc_size;
7972+
7973+
// First pass: find minimum indentation of any non-blank lines
7974+
// after first line.
7975+
while (p < pend && *p++ != '\n') {
7976+
}
7977+
7978+
Py_ssize_t margin = PY_SSIZE_T_MAX;
7979+
while (p < pend) {
7980+
const char *s = p;
7981+
while (*p == ' ') p++;
7982+
if (p < pend && *p != '\n') {
7983+
margin = Py_MIN(margin, p - s);
7984+
}
7985+
while (p < pend && *p++ != '\n') {
7986+
}
7987+
}
7988+
if (margin == PY_SSIZE_T_MAX) {
7989+
margin = 0;
7990+
}
7991+
7992+
// Second pass: write cleandoc into buff.
7993+
char *buff = PyMem_Malloc(doc_size + 1);
7994+
char *w = buff;
7995+
p = doc_utf8;
7996+
7997+
// copy firstline without indent.
7998+
while (*p == ' ') p++;
7999+
while (p < pend) {
8000+
int ch = *w++ = *p++;
8001+
if (ch == '\n') {
8002+
break;
8003+
}
8004+
}
8005+
8006+
// copy subsequent lines without margin.
8007+
while (p < pend) {
8008+
for (ssize_t i = 0; i < margin; i++, p++) {
8009+
if (*p != ' ') {
8010+
assert(*p == '\n' || *p == '\0');
8011+
break;
8012+
}
8013+
}
8014+
while (p < pend) {
8015+
int ch = *w++ = *p++;
8016+
if (ch == '\n') {
8017+
break;
8018+
}
8019+
}
8020+
}
8021+
8022+
Py_DECREF(doc);
8023+
return PyUnicode_FromStringAndSize(buff, w - buff);
8024+
}
8025+
8026+
79388027
PyObject *
79398028
_PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags,
79408029
int optimize, int compile_mode)

0 commit comments

Comments
 (0)