# -*- coding: utf-8 -*- # # Copyright 2021 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Helpers for calculating CRC32C checksums.""" from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals import base64 import warnings import six # pylint: disable=g-import-not-at-top try: # TODO(b/175725675) Make google_crc32c available with Cloud SDK. # Supress missing c extension warnings raised by google-crc32c. This usually # means the user needs to re-install the library. with warnings.catch_warnings(): warnings.simplefilter('ignore') import google_crc32c if google_crc32c.implementation in ('c', 'cffi'): # google-crc32c==1.1.3 changed implementation value to `c`. # We are checking both to ensure this is compatible with older versions. IS_FAST_GOOGLE_CRC32C_AVAILABLE = True else: raise ImportError except ImportError: # TODO(b/194124148) Fall back on pure Python google-crc32c. # Cleans up a lot of this file. import gcloud_crcmod as crcmod IS_FAST_GOOGLE_CRC32C_AVAILABLE = False # pylint: enable=g-import-not-at-top # Castagnoli polynomial and its degree. CASTAGNOLI_POLY = 4812730177 DEGREE = 32 # Table storing polynomial values of x^(2^k) mod CASTAGNOLI_POLY for all k < 31, # where x^(2^k) and CASTAGNOLI_POLY are both considered polynomials. This is # sufficient since x^(2^31) mod CASTAGNOLI_POLY = x. X_POW_2K_TABLE = [ 2, 4, 16, 256, 65536, 517762881, 984302966, 408362264, 1503875210, 2862076957, 3884826397, 1324787473, 621200174, 1758783527, 1416537776, 1180494764, 648569364, 2521473789, 994858823, 1728245375, 3498467999, 4059169852, 3345064394, 2828422810, 2429203150, 3336788029, 860151998, 2102628683, 1033187991, 4243778976, 1123580069 ] def get_crc32c(initial_data=b''): """Returns an instance of Hashlib-like helper for CRC32C operations. Args: initial_data (bytes): The CRC32C object will be initialized with the checksum of the data. Returns: The google_crc32c.Checksum instance if google-crc32c (https://github.com/googleapis/python-crc32c) is available. If not, returns the predefined.Crc instance from crcmod library. Usage: # Get the instance. crc = get_crc32c() # Update the instance with data. If your data is available in chunks, # you can update each chunk so that you don't have to keep everything in # memory. for chunk in chunks: crc.update(data) # Get the digest. crc_digest = crc.digest() """ if IS_FAST_GOOGLE_CRC32C_AVAILABLE: crc = google_crc32c.Checksum() else: crc = crcmod.predefined.Crc('crc-32c') if initial_data: crc.update(initial_data) return crc def get_crc32c_from_checksum(checksum): """Returns Hashlib-like CRC32C object with a starting checksum. Args: checksum (int): CRC32C checksum representing the hash of processed data. Returns: google_crc32c.Checksum if google-crc32c is available or predefined.Crc instance from crcmod library. Both set to use initial checksum. """ crc = get_crc32c() if IS_FAST_GOOGLE_CRC32C_AVAILABLE: # pylint:disable=protected-access crc._crc = checksum # pylint:enable=protected-access else: crc.crcValue = checksum return crc def get_crc32c_hash_string_from_checksum(checksum): """Returns base64-encoded hash from the checksum. Args: checksum (int): CRC32C checksum representing the hash of processed data. Returns: A string representing the base64 encoded CRC32C hash. """ crc_object = get_crc32c_from_checksum(checksum) return get_hash(crc_object) def get_checksum(crc): """Gets the hex checksum from a CRC32C object. Args: crc (google_crc32c.Checksum|predefined.Crc): CRC32C object from google-crc32c or crcmod package. Returns: An int representing the CRC32C checksum of the provided object. """ return int(crc.hexdigest(), 16) def get_hash(crc): """Gets the base64-encoded hash from a CRC32C object. Args: crc (google_crc32c.Checksum|predefined.Crc): CRC32C object from google-crc32c or crcmod package. Returns: A string representing the base64 encoded CRC32C hash. """ return base64.b64encode(crc.digest()).decode('ascii') def does_data_match_checksum(data, crc32c_checksum): """Checks if checksum for the data matches the supplied checksum. Args: data (bytes): Bytes over which the checksum should be calculated. crc32c_checksum (int): Checksum against which data's checksum will be compared. Returns: True iff both checksums match. """ crc = get_crc32c() crc.update(six.ensure_binary(data)) return get_checksum(crc) == crc32c_checksum def _reverse_32_bits(crc_checksum): return int('{0:032b}'.format(crc_checksum, width=32)[::-1], 2) def _multiply_crc_polynomials(p, q): """Multiplies two polynomials together modulo CASTAGNOLI_POLY. Args: p (int): The first polynomial. q (int): The second polynomial. Returns: Int result of the multiplication. """ result = 0 top_bit = 1 << DEGREE for _ in range(DEGREE): if p & 1: result ^= q q <<= 1 if q & top_bit: q ^= CASTAGNOLI_POLY p >>= 1 return result def _extend_crc32c_checksum_by_zeros(crc_checksum, bit_count): """Given crc_checksum representing polynomial P(x), compute P(x)*x^bit_count. Args: crc_checksum (int): crc respresenting polynomial P(x). bit_count (int): number of bits in crc. Returns: P(x)*x^bit_count (int). """ updated_crc_checksum = _reverse_32_bits(crc_checksum) i = 0 while bit_count != 0: if bit_count & 1: updated_crc_checksum = _multiply_crc_polynomials( updated_crc_checksum, X_POW_2K_TABLE[i % len(X_POW_2K_TABLE)]) i += 1 bit_count >>= 1 updated_crc_checksum = _reverse_32_bits(updated_crc_checksum) return updated_crc_checksum def concat_checksums(crc_a, crc_b, b_byte_count): """Computes CRC32C for concat(A, B) given crc(A), crc(B),and len(B). An explanation of the algorithm can be found at https://code.google.com/archive/p/crcutil/downloads. Args: crc_a (int): Represents the CRC32C checksum of object A. crc_b (int): Represents the CRC32C checksum of object B. b_byte_count (int): Length of data covered by crc_b in bytes. Returns: CRC32C checksum representing the data covered by crc_a and crc_b (int). """ if not b_byte_count: return crc_a b_bit_count = 8 * b_byte_count return _extend_crc32c_checksum_by_zeros(crc_a, bit_count=b_bit_count) ^ crc_b