diff --git a/torchao/quantization/README.md b/torchao/quantization/README.md index 16891ba13d..9911c70b2a 100644 --- a/torchao/quantization/README.md +++ b/torchao/quantization/README.md @@ -49,12 +49,14 @@ import pickle import torchao.quantization # After the first forward pass (when quantization was done) +from torchao.quantization.autoquant import AUTOQUANT_CACHE with open("quantization-cache.pkl", "wb") as f: - pickle.dump(torchao.quantization.AUTOQUANT_CACHE) + pickle.dump(AUTOQUANT_CACHE) # On load +from torchao.quantization.autoquant import AUTOQUANT_CACHE with open("quantization-cache.pkl", "rb") as f: - torchao.quantization.AUTOQUANT_CACHE.update(pickle.load(f)) + AUTOQUANT_CACHE.update(pickle.load(f)) ``` ## Affine Quantization Affine quantization refers to the type of quantization that maps from floating point numbers to quantized numbers (typically integer) with an affine transformation, i.e.: `quantized_val = float_val / scale + zero_point` where `scale` and `zero_point` are quantization parameters for some granularity and based on some data.