|
| 1 | +# Copyright 2024 The KerasNLP Authors |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +import numpy as np |
| 15 | + |
| 16 | +from keras_nlp.src.utils.preset_utils import HF_CONFIG_FILE |
| 17 | +from keras_nlp.src.utils.preset_utils import jax_memory_cleanup |
| 18 | +from keras_nlp.src.utils.preset_utils import load_config |
| 19 | +from keras_nlp.src.utils.timm.safetensor_utils import SafetensorLoader |
| 20 | + |
| 21 | + |
| 22 | +def convert_backbone_config(timm_config): |
| 23 | + timm_architecture = timm_config["architecture"] |
| 24 | + |
| 25 | + if "resnetv2_" in timm_architecture: |
| 26 | + use_pre_activation = True |
| 27 | + else: |
| 28 | + use_pre_activation = False |
| 29 | + |
| 30 | + if timm_architecture == "resnet18": |
| 31 | + stackwise_num_blocks = [2, 2, 2, 2] |
| 32 | + block_type = "basic_block" |
| 33 | + elif timm_architecture == "resnet26": |
| 34 | + stackwise_num_blocks = [2, 2, 2, 2] |
| 35 | + block_type = "bottleneck_block" |
| 36 | + elif timm_architecture == "resnet34": |
| 37 | + stackwise_num_blocks = [3, 4, 6, 3] |
| 38 | + block_type = "basic_block" |
| 39 | + elif timm_architecture in ("resnet50", "resnetv2_50"): |
| 40 | + stackwise_num_blocks = [3, 4, 6, 3] |
| 41 | + block_type = "bottleneck_block" |
| 42 | + elif timm_architecture in ("resnet101", "resnetv2_101"): |
| 43 | + stackwise_num_blocks = [3, 4, 23, 3] |
| 44 | + block_type = "bottleneck_block" |
| 45 | + elif timm_architecture in ("resnet152", "resnetv2_152"): |
| 46 | + stackwise_num_blocks = [3, 8, 36, 3] |
| 47 | + block_type = "bottleneck_block" |
| 48 | + else: |
| 49 | + raise ValueError( |
| 50 | + f"Currently, the architecture {timm_architecture} is not supported." |
| 51 | + ) |
| 52 | + |
| 53 | + return dict( |
| 54 | + stackwise_num_filters=[64, 128, 256, 512], |
| 55 | + stackwise_num_blocks=stackwise_num_blocks, |
| 56 | + stackwise_num_strides=[1, 2, 2, 2], |
| 57 | + block_type=block_type, |
| 58 | + use_pre_activation=use_pre_activation, |
| 59 | + ) |
| 60 | + |
| 61 | + |
| 62 | +def convert_weights(backbone, loader, timm_config): |
| 63 | + def transpose_conv2d(x, shape): |
| 64 | + return np.transpose(x, (2, 3, 1, 0)) |
| 65 | + |
| 66 | + def port_conv2d(keras_layer_name, hf_weight_prefix): |
| 67 | + loader.port_weight( |
| 68 | + backbone.get_layer(keras_layer_name).kernel, |
| 69 | + hf_weight_key=f"{hf_weight_prefix}.weight", |
| 70 | + hook_fn=transpose_conv2d, |
| 71 | + ) |
| 72 | + |
| 73 | + def port_batch_normalization(keras_layer_name, hf_weight_prefix): |
| 74 | + loader.port_weight( |
| 75 | + backbone.get_layer(keras_layer_name).gamma, |
| 76 | + hf_weight_key=f"{hf_weight_prefix}.weight", |
| 77 | + ) |
| 78 | + loader.port_weight( |
| 79 | + backbone.get_layer(keras_layer_name).beta, |
| 80 | + hf_weight_key=f"{hf_weight_prefix}.bias", |
| 81 | + ) |
| 82 | + loader.port_weight( |
| 83 | + backbone.get_layer(keras_layer_name).moving_mean, |
| 84 | + hf_weight_key=f"{hf_weight_prefix}.running_mean", |
| 85 | + ) |
| 86 | + loader.port_weight( |
| 87 | + backbone.get_layer(keras_layer_name).moving_variance, |
| 88 | + hf_weight_key=f"{hf_weight_prefix}.running_var", |
| 89 | + ) |
| 90 | + |
| 91 | + version = "v1" if not backbone.use_pre_activation else "v2" |
| 92 | + block_type = backbone.block_type |
| 93 | + |
| 94 | + # Stem |
| 95 | + if version == "v1": |
| 96 | + port_conv2d("conv1_conv", "conv1") |
| 97 | + port_batch_normalization("conv1_bn", "bn1") |
| 98 | + else: |
| 99 | + port_conv2d("conv1_conv", "stem.conv") |
| 100 | + |
| 101 | + # Stages |
| 102 | + num_stacks = len(backbone.stackwise_num_filters) |
| 103 | + for stack_index in range(num_stacks): |
| 104 | + for block_idx in range(backbone.stackwise_num_blocks[stack_index]): |
| 105 | + if version == "v1": |
| 106 | + keras_name = f"v1_stack{stack_index}_block{block_idx}" |
| 107 | + hf_name = f"layer{stack_index+1}.{block_idx}" |
| 108 | + else: |
| 109 | + keras_name = f"v2_stack{stack_index}_block{block_idx}" |
| 110 | + hf_name = f"stages.{stack_index}.blocks.{block_idx}" |
| 111 | + |
| 112 | + if version == "v1": |
| 113 | + if block_idx == 0 and ( |
| 114 | + block_type == "bottleneck_block" or stack_index > 0 |
| 115 | + ): |
| 116 | + port_conv2d( |
| 117 | + f"{keras_name}_0_conv", f"{hf_name}.downsample.0" |
| 118 | + ) |
| 119 | + port_batch_normalization( |
| 120 | + f"{keras_name}_0_bn", f"{hf_name}.downsample.1" |
| 121 | + ) |
| 122 | + port_conv2d(f"{keras_name}_1_conv", f"{hf_name}.conv1") |
| 123 | + port_batch_normalization(f"{keras_name}_1_bn", f"{hf_name}.bn1") |
| 124 | + port_conv2d(f"{keras_name}_2_conv", f"{hf_name}.conv2") |
| 125 | + port_batch_normalization(f"{keras_name}_2_bn", f"{hf_name}.bn2") |
| 126 | + if block_type == "bottleneck_block": |
| 127 | + port_conv2d(f"{keras_name}_3_conv", f"{hf_name}.conv3") |
| 128 | + port_batch_normalization( |
| 129 | + f"{keras_name}_3_bn", f"{hf_name}.bn3" |
| 130 | + ) |
| 131 | + else: |
| 132 | + if block_idx == 0 and ( |
| 133 | + block_type == "bottleneck_block" or stack_index > 0 |
| 134 | + ): |
| 135 | + port_conv2d( |
| 136 | + f"{keras_name}_0_conv", f"{hf_name}.downsample.conv" |
| 137 | + ) |
| 138 | + port_batch_normalization( |
| 139 | + f"{keras_name}_pre_activation_bn", f"{hf_name}.norm1" |
| 140 | + ) |
| 141 | + port_conv2d(f"{keras_name}_1_conv", f"{hf_name}.conv1") |
| 142 | + port_batch_normalization( |
| 143 | + f"{keras_name}_1_bn", f"{hf_name}.norm2" |
| 144 | + ) |
| 145 | + port_conv2d(f"{keras_name}_2_conv", f"{hf_name}.conv2") |
| 146 | + if block_type == "bottleneck_block": |
| 147 | + port_batch_normalization( |
| 148 | + f"{keras_name}_2_bn", f"{hf_name}.norm3" |
| 149 | + ) |
| 150 | + port_conv2d(f"{keras_name}_3_conv", f"{hf_name}.conv3") |
| 151 | + |
| 152 | + # Post |
| 153 | + if version == "v2": |
| 154 | + port_batch_normalization("post_bn", "norm") |
| 155 | + |
| 156 | + # Rebuild normalization layer with pretrained mean & std |
| 157 | + mean = timm_config["pretrained_cfg"]["mean"] |
| 158 | + std = timm_config["pretrained_cfg"]["std"] |
| 159 | + normalization_layer = backbone.get_layer("normalization") |
| 160 | + normalization_layer.input_mean = mean |
| 161 | + normalization_layer.input_variance = [s**2 for s in std] |
| 162 | + normalization_layer.build(normalization_layer._build_input_shape) |
| 163 | + |
| 164 | + |
| 165 | +def load_resnet_backbone(cls, preset, load_weights, **kwargs): |
| 166 | + timm_config = load_config(preset, HF_CONFIG_FILE) |
| 167 | + keras_config = convert_backbone_config(timm_config) |
| 168 | + backbone = cls(**keras_config, **kwargs) |
| 169 | + if load_weights: |
| 170 | + jax_memory_cleanup(backbone) |
| 171 | + with SafetensorLoader(preset) as loader: |
| 172 | + convert_weights(backbone, loader, timm_config) |
| 173 | + return backbone |
0 commit comments