Source code for qkeras.codebook

# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Clustering based quantizers"""


import keras
import keras.ops.numpy as knp
import numpy as np
from keras import Model, layers
from sklearn.cluster import KMeans
from tqdm import tqdm


[docs] def create_in_out_table(km, quantizer): """Create [in, out] table needed to map compressed activations to codebook values. Given v: in_table[out_table[v]] => codebook value of v Arguments: km: KMeans model quantizer: quantizer function to apply to out_table Returns in_table: conversion of compressed table indexes to n-bit numbers out_table: conversion of n-bit output activations to compressed table indexes """ in_table = km.cluster_centers_.flatten() qrange = quantizer.range().reshape(-1, 1).astype("float32") out_table = km.predict(qrange).ravel() return in_table, out_table
[docs] def activation_compression( model, compile_config, activation_indexes, bits, X_train, y_train, X_test, y_test, sample_size=1.0, ): """This function applies clustering based non-uniform quantization inspired by https://arxiv.org/pdf/1911.02079.pdf model: Keras model compile_config: Dictionary of arguments to be passed to model.compile() for all submodels activation_indexes: Index list of layers to be quantized. This will used to split the model and create submodels bits: Number of bits to compress activations to. This will results in 2**bits codebook values X_train, y_train: training data used to fit clustering algorithm X_test, y_test: validation data sample_size: fraction of training data activations to be used when computing codebook values Returns: cb_tables: [in, out] tables. See create_in_out_table docs models: list of keras submodels km_models: list of KMeans fitted models """ assert len(activation_indexes) > 0 assert 0.0 < sample_size <= 1.0 km_models = [KMeans(2**bits)] * len(activation_indexes) cb_tables = [[]] * len(activation_indexes) models = [] x = x_in = model.layers[0].output for i in range(1, len(model.layers)): layer = model.layers[i] x = layer(x) if i in activation_indexes or i == len(model.layers) - 1: print("\nCreating submodel...") models.append(Model([x_in], [x])) x = x_in = layers.Input( layer.output[0].shape, batch_size=layer.output.shape[0], dtype=layer.output.dtype, ) models[-1].compile(**compile_config) print(models[-1].summary()) print("\nsample_size: ", sample_size) x = X_train for i, model in enumerate(models[:-1]): print(f"fitting km[{i}]...") x = model.predict(x) km = km_models[i] temp = x.flatten().reshape(-1, 1) if sample_size < 1.0: idxs = np.random.choice(x.shape[0], size=int(sample_size * x.shape[0])) temp = temp[idxs] km.fit(temp) quantizer = getattr( model.layers[-1], "quantizer", getattr(model.layers[-1], "activation") ) km.cluster_centers_ = keras.ops.convert_to_numpy(quantizer(km.cluster_centers_)) km.cluster_centers_.sort(axis=0) cb_tables[i] = create_in_out_table(km, quantizer) x = X_test for i, model in enumerate(models[:-1]): x = model.predict(x) km = km_models[i] preds = km.predict(x.flatten().reshape(-1, 1)) x = km.cluster_centers_[preds].reshape(x.shape) n_unique = np.unique(x.flatten()).shape[0] print(f"Number of unique activations: {n_unique}") assert n_unique <= 2**bits print("\nEvaluating...") models[-1].evaluate(x, y_test, verbose=2) return cb_tables, models, km_models
[docs] def weight_compression(weights, bits, axis=0, quantizer=None): """Creates an in, out table that maps weight values to their codebook values. Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf Arguments: weights: Numpy array bits: Number of bits to compress weights to. This will results in 2**bits codebook values axis: axis to apply quantization by quantizer: quantizer function that will be applied to codebook values Returns: index_table: array of indices that maps to codebook values for all weights codebook_table: array of codebook values """ assert bits <= 8 n = 2**bits index_table = [] codebook_table = np.zeros((weights.shape[axis], n)) km_models = [None] * weights.shape[axis] for i, w in tqdm(enumerate(np.split(weights, weights.shape[axis], axis))): original_shape = w.shape w = np.ascontiguousarray(w.ravel(), dtype=np.float64) km = KMeans(n) km.fit(w.reshape(-1, 1)) if quantizer: q_output = np.ascontiguousarray(keras.ops.convert_to_numpy(quantizer(km.cluster_centers_)), dtype=np.float64) km.cluster_centers_ = q_output km.cluster_centers_.sort(axis=0) km_models[i] = km codebook_table[i, :] = km.cluster_centers_.flatten() preds = km.predict(w.reshape(-1, 1)) index_table.append(preds.reshape(original_shape)) index_table = knp.concatenate(index_table, axis) return index_table, codebook_table
[docs] def two_tier_embedding_compression(embeddings, bits, quantizer=None): """Creates tables that maps embedding values to their codebook values. Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf Arguments: weights: Numpy array bits: Number of bits to compress weights to. This will results in 2**bits codebook values quantizer: quantizer function that will be applied to codebook values Returns: index_table: array of indices that maps to codebook values cluster_index_table: array that maps each row to the codebook table index codebook_table: array of codebook values quantized_embeddings: Numpy array MxN of quantized weights """ assert bits <= 8 n = 2**bits quantized_embeddings = embeddings.copy() index_table = np.zeros(embeddings.shape, dtype=np.uint8) cluster_index_table = np.zeros(index_table.shape[0], dtype=np.uint8) codebook_table = np.zeros((n, n)) km1 = KMeans(n) km1.fit(embeddings) tier1 = km1.predict(embeddings) km_models = [0] * n block_sizes = [0] * n for block_label in tqdm(range(n)): mask = block_label == tier1 indices = np.arange(embeddings.shape[0])[mask] block = embeddings[mask] km2 = KMeans(n) km2.fit(block.flatten().reshape(-1, 1)) if quantizer: km2.cluster_centers_ = keras.ops.convert_to_numpy(quantizer(km2.cluster_centers_)) km2.cluster_centers_.sort(axis=0) km_models[block_label] = km2 codebook_table[block_label, :] = km2.cluster_centers_.flatten() cluster_index_table[indices] = block_label block_sizes[block_label] = block.shape[0] for i in indices: preds = km2.predict(embeddings[i, :].reshape(-1, 1)) index_table[indices, :] = preds quantized_embeddings[i, :] = km2.cluster_centers_[preds].flatten() print("block_sizes:", block_sizes) return index_table, cluster_index_table, codebook_table, quantized_embeddings