Skip to content

Commit 2ad77c7

Browse files
authored
Fix: send part's checksum for XML MPU part upload (#1529)
* For XML MPU part upload checksum of the part should be sent to the server using `x-goog-hash` header . [Docs](https://cloud.google.com/storage/docs/xml-api/put-object-multipart#request_headers) * The server will throw `400 InvalidDigest/BadDigest/CrcMismatch` in difference scenarios; but all of them essentially mean the same - "The checksum provided by user didn't match the one calculated by us"
1 parent a8109e0 commit 2ad77c7

File tree

4 files changed

+125
-3
lines changed

4 files changed

+125
-3
lines changed

google/cloud/storage/_media/_upload.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,29 @@ def _process_upload_response(self, response):
13871387
13881388
.. _sans-I/O: https://sans-io.readthedocs.io/
13891389
"""
1390+
# Data corruption errors shouldn't be considered as invalid responses,
1391+
# So we handle them earlier than call to `_helpers.require_status_code`.
1392+
# If the response is 400, we check for data corruption errors.
1393+
if response.status_code == 400:
1394+
root = ElementTree.fromstring(response.text)
1395+
error_code = root.find("Code").text
1396+
error_message = root.find("Message").text
1397+
error_details = root.find("Details").text
1398+
if error_code in ["InvalidDigest", "BadDigest", "CrcMismatch"]:
1399+
raise DataCorruption(
1400+
response,
1401+
(
1402+
"Checksum mismatch: checksum calculated by client and"
1403+
" server did not match. Error code: {error_code},"
1404+
" Error message: {error_message},"
1405+
" Error details: {error_details}"
1406+
).format(
1407+
error_code=error_code,
1408+
error_message=error_message,
1409+
error_details=error_details,
1410+
),
1411+
)
1412+
13901413
_helpers.require_status_code(
13911414
response,
13921415
(http.client.OK,),

google/cloud/storage/_media/requests/upload.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from google.cloud.storage._media import _upload
2323
from google.cloud.storage._media.requests import _request_helpers
24+
from google.cloud.storage._media import _helpers
2425

2526

2627
class SimpleUpload(_request_helpers.RequestsMixin, _upload.SimpleUpload):
@@ -757,6 +758,14 @@ def upload(
757758
~requests.Response: The HTTP response returned by ``transport``.
758759
"""
759760
method, url, payload, headers = self._prepare_upload_request()
761+
if self._checksum_object is not None:
762+
checksum_digest_in_base64 = _helpers.prepare_checksum_digest(
763+
self._checksum_object.digest()
764+
)
765+
if self._checksum_type == "crc32c":
766+
headers["X-Goog-Hash"] = f"crc32c={checksum_digest_in_base64}"
767+
elif self._checksum_type == "md5":
768+
headers["X-Goog-Hash"] = f"md5={checksum_digest_in_base64}"
760769

761770
# Wrap the request business logic in a function to be retried.
762771
def retriable_request():

tests/resumable_media/unit/requests/test_upload.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@
4242
UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA"
4343
PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"}
4444
FILE_DATA = b"testdata" * 128
45+
_HASH_HEADER = "x-goog-hash"
46+
CRC32C_HASH_OF_FIRST_PART = "8hVqVQ=="
47+
MD5_HASH_OF_FIRST_PART = "gfVZ4+0LdooJwGAkxLrCcg=="
48+
DEFAULT_CONNECT_TIMEOUT = 61
49+
DEFAULT_READ_TIMEOUT = 60
4550

4651

4752
@pytest.fixture(scope="session")
@@ -402,6 +407,66 @@ def test_mpu_part(filename):
402407
assert part.etag == PARTS[1]
403408

404409

410+
def test_mpu_part_with_md5_enabled(filename):
411+
part = upload_mod.XMLMPUPart(
412+
EXAMPLE_XML_UPLOAD_URL,
413+
UPLOAD_ID,
414+
filename,
415+
start=0,
416+
end=128,
417+
part_number=1,
418+
checksum="md5",
419+
)
420+
421+
transport = mock.Mock(spec=["request"])
422+
transport.request.return_value = _make_response(
423+
headers={"etag": PARTS[1], _HASH_HEADER: f"md5={MD5_HASH_OF_FIRST_PART}"}
424+
)
425+
426+
part.upload(transport)
427+
428+
transport.request.assert_called_once_with(
429+
"PUT",
430+
f"{part.upload_url}?partNumber={part.part_number}&uploadId={UPLOAD_ID}",
431+
data=FILE_DATA[part.start : part.end],
432+
headers={"X-Goog-Hash": f"md5={MD5_HASH_OF_FIRST_PART}"},
433+
timeout=(DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT),
434+
)
435+
436+
assert part.finished
437+
assert part.etag == PARTS[1]
438+
439+
440+
def test_mpu_part_with_crc32c_enabled(filename):
441+
part = upload_mod.XMLMPUPart(
442+
EXAMPLE_XML_UPLOAD_URL,
443+
UPLOAD_ID,
444+
filename,
445+
start=0,
446+
end=128,
447+
part_number=1,
448+
checksum="crc32c",
449+
)
450+
451+
transport = mock.Mock(spec=["request"])
452+
transport.request.return_value = _make_response(
453+
headers={"etag": PARTS[1], _HASH_HEADER: f"crc32c={CRC32C_HASH_OF_FIRST_PART}"}
454+
)
455+
456+
part.upload(transport)
457+
458+
transport.request.assert_called_once_with(
459+
"PUT",
460+
f"{part.upload_url}?partNumber={part.part_number}&uploadId={UPLOAD_ID}",
461+
data=FILE_DATA[part.start : part.end],
462+
headers={"X-Goog-Hash": f"crc32c={CRC32C_HASH_OF_FIRST_PART}"},
463+
timeout=(DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT),
464+
)
465+
466+
assert part.finished
467+
assert part.etag == PARTS[1]
468+
469+
405470
def _make_response(status_code=http.client.OK, headers=None, text=None):
406471
headers = headers or {}
407472
return mock.Mock(

tests/resumable_media/unit/test__upload.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,21 @@
4747
UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA"
4848
PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"}
4949
FILE_DATA = b"testdata" * 128
50+
CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE = """<?xml version='1.0' encoding='UTF-8'?>
51+
<Error>
52+
<Code>{ERROR_CODE}</Code>
53+
<Message>The MD5 you specified in Content-MD5 or x-goog-hash was invalid.</Message>
54+
<Details>Invalid MD5 value: dfdfdfd==</Details>
55+
</Error>"""
56+
INVALID_MD5_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format(
57+
ERROR_CODE="InvalidDigest"
58+
)
59+
INVALID_CRC32C_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format(
60+
ERROR_CODE="BadDigest"
61+
)
62+
INCORRECT_LENGTH_CRC32C_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format(
63+
ERROR_CODE="CrcMismatch"
64+
)
5065

5166

5267
@pytest.fixture(scope="session")
@@ -1471,7 +1486,15 @@ def test_xml_mpu_part_invalid_response(filename):
14711486
part._process_upload_response(response)
14721487

14731488

1474-
def test_xml_mpu_part_checksum_failure(filename):
1489+
@pytest.mark.parametrize(
1490+
"error_scenarios",
1491+
[
1492+
INVALID_MD5_XML_RESPONSE,
1493+
INVALID_CRC32C_XML_RESPONSE,
1494+
INCORRECT_LENGTH_CRC32C_XML_RESPONSE,
1495+
],
1496+
)
1497+
def test_xml_mpu_part_checksum_failure(filename, error_scenarios):
14751498
PART_NUMBER = 1
14761499
START = 0
14771500
END = 256
@@ -1490,7 +1513,9 @@ def test_xml_mpu_part_checksum_failure(filename):
14901513
_fix_up_virtual(part)
14911514
part._prepare_upload_request()
14921515
response = _make_xml_response(
1493-
headers={"etag": ETAG, "x-goog-hash": "md5=Ojk9c3dhfxgoKVVHYwFbHQ=="}
1516+
status_code=http.client.BAD_REQUEST,
1517+
headers={"etag": ETAG, "x-goog-hash": "md5=Ojk9c3dhfxgoKVVHYwFbHQ=="},
1518+
text=error_scenarios,
14941519
) # Example md5 checksum but not the correct one
14951520
with pytest.raises(DataCorruption):
14961521
part._process_upload_response(response)
@@ -1555,7 +1580,7 @@ def _make_xml_response(status_code=http.client.OK, headers=None, text=None):
15551580
headers=headers,
15561581
status_code=status_code,
15571582
text=text,
1558-
spec=["headers", "status_code"],
1583+
spec=["headers", "status_code", "text"],
15591584
)
15601585

15611586

0 commit comments

Comments
 (0)