|
| 1 | +# Copyright 2023 The KerasNLP Authors |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +import keras |
| 15 | +from keras import layers |
| 16 | + |
| 17 | +from keras_nlp.src.api_export import keras_nlp_export |
| 18 | +from keras_nlp.src.models.backbone import Backbone |
| 19 | + |
| 20 | + |
| 21 | +@keras_nlp_export("keras_nlp.models.VGGBackbone") |
| 22 | +class VGGBackbone(Backbone): |
| 23 | + """ |
| 24 | + This class represents Keras Backbone of VGG model. |
| 25 | +
|
| 26 | + This class implements a VGG backbone as described in [Very Deep |
| 27 | + Convolutional Networks for Large-Scale Image Recognition]( |
| 28 | + https://arxiv.org/abs/1409.1556)(ICLR 2015). |
| 29 | +
|
| 30 | + Args: |
| 31 | + stackwise_num_repeats: list of ints, number of repeated convolutional |
| 32 | + blocks per VGG block. For VGG16 this is [2, 2, 3, 3, 3] and for |
| 33 | + VGG19 this is [2, 2, 4, 4, 4]. |
| 34 | + stackwise_num_filters: list of ints, filter size for convolutional |
| 35 | + blocks per VGG block. For both VGG16 and VGG19 this is [ |
| 36 | + 64, 128, 256, 512, 512]. |
| 37 | + include_rescaling: bool, whether to rescale the inputs. If set to |
| 38 | + True, inputs will be passed through a `Rescaling(1/255.0)` layer. |
| 39 | + input_shape: tuple, optional shape tuple, defaults to (224, 224, 3). |
| 40 | + pooling: bool, Optional pooling mode for feature extraction |
| 41 | + when `include_top` is `False`. |
| 42 | + - `None` means that the output of the model will be |
| 43 | + the 4D tensor output of the |
| 44 | + last convolutional block. |
| 45 | + - `avg` means that global average pooling |
| 46 | + will be applied to the output of the |
| 47 | + last convolutional block, and thus |
| 48 | + the output of the model will be a 2D tensor. |
| 49 | + - `max` means that global max pooling will |
| 50 | + be applied. |
| 51 | +
|
| 52 | + Examples: |
| 53 | + ```python |
| 54 | + input_data = np.ones((2, 224, 224, 3), dtype="float32") |
| 55 | +
|
| 56 | + # Pretrained VGG backbone. |
| 57 | + model = keras_nlp.models.VGGBackbone.from_preset("vgg16") |
| 58 | + model(input_data) |
| 59 | +
|
| 60 | + # Randomly initialized VGG backbone with a custom config. |
| 61 | + model = keras_nlp.models.VGGBackbone( |
| 62 | + stackwise_num_repeats = [2, 2, 3, 3, 3], |
| 63 | + stackwise_num_filters = [64, 128, 256, 512, 512], |
| 64 | + input_shape = (224, 224, 3), |
| 65 | + include_rescaling = False, |
| 66 | + pooling = "avg", |
| 67 | + ) |
| 68 | + model(input_data) |
| 69 | + ``` |
| 70 | + """ |
| 71 | + |
| 72 | + def __init__( |
| 73 | + self, |
| 74 | + stackwise_num_repeats, |
| 75 | + stackwise_num_filters, |
| 76 | + include_rescaling, |
| 77 | + input_image_shape=(224, 224, 3), |
| 78 | + pooling="avg", |
| 79 | + **kwargs, |
| 80 | + ): |
| 81 | + |
| 82 | + # === Functional Model === |
| 83 | + img_input = keras.layers.Input(shape=input_image_shape) |
| 84 | + x = img_input |
| 85 | + |
| 86 | + if include_rescaling: |
| 87 | + x = layers.Rescaling(scale=1 / 255.0)(x) |
| 88 | + for stack_index in range(len(stackwise_num_repeats) - 1): |
| 89 | + x = apply_vgg_block( |
| 90 | + x=x, |
| 91 | + num_layers=stackwise_num_repeats[stack_index], |
| 92 | + filters=stackwise_num_filters[stack_index], |
| 93 | + kernel_size=(3, 3), |
| 94 | + activation="relu", |
| 95 | + padding="same", |
| 96 | + max_pool=True, |
| 97 | + name=f"block{stack_index + 1}", |
| 98 | + ) |
| 99 | + if pooling == "avg": |
| 100 | + x = layers.GlobalAveragePooling2D()(x) |
| 101 | + elif pooling == "max": |
| 102 | + x = layers.GlobalMaxPooling2D()(x) |
| 103 | + |
| 104 | + super().__init__(inputs=img_input, outputs=x, **kwargs) |
| 105 | + |
| 106 | + # === Config === |
| 107 | + self.stackwise_num_repeats = stackwise_num_repeats |
| 108 | + self.stackwise_num_filters = stackwise_num_filters |
| 109 | + self.include_rescaling = include_rescaling |
| 110 | + self.input_image_shape = input_image_shape |
| 111 | + self.pooling = pooling |
| 112 | + |
| 113 | + def get_config(self): |
| 114 | + return { |
| 115 | + "stackwise_num_repeats": self.stackwise_num_repeats, |
| 116 | + "stackwise_num_filters": self.stackwise_num_filters, |
| 117 | + "include_rescaling": self.include_rescaling, |
| 118 | + "input_image_shape": self.input_image_shape, |
| 119 | + "pooling": self.pooling, |
| 120 | + } |
| 121 | + |
| 122 | + |
| 123 | +def apply_vgg_block( |
| 124 | + x, |
| 125 | + num_layers, |
| 126 | + filters, |
| 127 | + kernel_size, |
| 128 | + activation, |
| 129 | + padding, |
| 130 | + max_pool, |
| 131 | + name, |
| 132 | +): |
| 133 | + """ |
| 134 | + Applies VGG block |
| 135 | + Args: |
| 136 | + x: Tensor, input tensor to pass through network |
| 137 | + num_layers: int, number of CNN layers in the block |
| 138 | + filters: int, filter size of each CNN layer in block |
| 139 | + kernel_size: int (or) tuple, kernel size for CNN layer in block |
| 140 | + activation: str (or) callable, activation function for each CNN layer in |
| 141 | + block |
| 142 | + padding: str (or) callable, padding function for each CNN layer in block |
| 143 | + max_pool: bool, whether to add MaxPooling2D layer at end of block |
| 144 | + name: str, name of the block |
| 145 | +
|
| 146 | + Returns: |
| 147 | + keras.KerasTensor |
| 148 | + """ |
| 149 | + for num in range(1, num_layers + 1): |
| 150 | + x = layers.Conv2D( |
| 151 | + filters, |
| 152 | + kernel_size, |
| 153 | + activation=activation, |
| 154 | + padding=padding, |
| 155 | + name=f"{name}_conv{num}", |
| 156 | + )(x) |
| 157 | + if max_pool: |
| 158 | + x = layers.MaxPooling2D((2, 2), (2, 2), name=f"{name}_pool")(x) |
| 159 | + return x |
0 commit comments