From 715f3137526968974a277e3ad32931e8c27e0e68 Mon Sep 17 00:00:00 2001 From: Jayson Reis Date: Mon, 21 Jan 2019 11:54:16 +0100 Subject: [PATCH 1/2] Improve base64decode faster when running validate=True This is done by compiling the regex that validates that the encoded data only has valid base64 characters. --- Lib/base64.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 2be9c395a96674..f7ffecabc807cc 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -28,7 +28,7 @@ 'urlsafe_b64encode', 'urlsafe_b64decode', ] - +VALID_BASE64_REGEX = re.compile(b'^[A-Za-z0-9+/]*={0,2}$') bytes_types = (bytes, bytearray) # Types acceptable as binary data def _bytes_from_decode_data(s): @@ -82,7 +82,7 @@ def b64decode(s, altchars=None, validate=False): altchars = _bytes_from_decode_data(altchars) assert len(altchars) == 2, repr(altchars) s = s.translate(bytes.maketrans(altchars, b'+/')) - if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): + if validate and not VALID_BASE64_REGEX.match(s): raise binascii.Error('Non-base64 digit found') return binascii.a2b_base64(s) From 30aa7d400afd0eda6fe89bccef9b439d8c6ceb57 Mon Sep 17 00:00:00 2001 From: Jayson Reis Date: Mon, 21 Jan 2019 17:39:48 +0100 Subject: [PATCH 2/2] Add _get_valid_base64_regex as proxy function in order to lazy compile VALID_BASE64_REGEX --- Lib/base64.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index f7ffecabc807cc..6996244d8e2eae 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -28,9 +28,17 @@ 'urlsafe_b64encode', 'urlsafe_b64decode', ] -VALID_BASE64_REGEX = re.compile(b'^[A-Za-z0-9+/]*={0,2}$') +VALID_BASE64_REGEX = None bytes_types = (bytes, bytearray) # Types acceptable as binary data +def _get_valid_base64_regex(): + global VALID_BASE64_REGEX + if VALID_BASE64_REGEX: + return VALID_BASE64_REGEX + + VALID_BASE64_REGEX = re.compile(b'^[A-Za-z0-9+/]*={0,2}$') + return VALID_BASE64_REGEX + def _bytes_from_decode_data(s): if isinstance(s, str): try: @@ -82,7 +90,7 @@ def b64decode(s, altchars=None, validate=False): altchars = _bytes_from_decode_data(altchars) assert len(altchars) == 2, repr(altchars) s = s.translate(bytes.maketrans(altchars, b'+/')) - if validate and not VALID_BASE64_REGEX.match(s): + if validate and not _get_valid_base64_regex().match(s): raise binascii.Error('Non-base64 digit found') return binascii.a2b_base64(s)