8.2.1. Example: timm Model Inference

An application example that retrieves a model from timm and performs inference on the Image (beignets-task-guide.png)

beignets-task-guide.png

Fig. 8.2 beignets-task-guide.png

Execution Method (resnet50.a1h_in1k)

$ cd /opt/pfn/pfcomp/codegen/MLSDK/examples/
$ ./run_timm.sh --model_name resnet50.a1h_in1k --batch_size 16

Expected Output (resnet50.a1h_in1k)

MNCore2 top-5 classes:
- espresso (967)
- cup (968)
- chocolate sauce, chocolate syrup (960)
- consomme (925)
- eggnog (969)
Torch top-5 classes:
- espresso (967)
- cup (968)
- chocolate sauce, chocolate syrup (960)
- eggnog (969)
- consomme (925)

Execution Method (mobilenetv3_small_050.lamb_in1k)

$ cd /opt/pfn/pfcomp/codegen/MLSDK/examples/
$ ./run_timm.sh --model_name mobilenetv3_small_050.lamb_in1k --batch_size 16

Expected Output (mobilenetv3_small_050.lamb_in1k)

MNCore2 top-5 classes:
- cup (968)
- trifle (927)
- face powder (551)
- ice cream, icecream (928)
- coffee mug (504)
Torch top-5 classes:
- cup (968)
- trifle (927)
- ice cream, icecream (928)
- face powder (551)
- coffee mug (504)

Scripts

Listing 8.12 /opt/pfn/pfcomp/codegen/MLSDK/examples/run_timm.sh
 1#! /bin/bash
 2set -eux -o pipefail
 3
 4VENVDIR=/tmp/run_timm_venv
 5CURRENT_DIR=$(realpath $(dirname $0))
 6CODEGEN_DIR=$(realpath ${CURRENT_DIR}/../../)
 7BUILD_DIR=${BUILD_DIR:-${CODEGEN_DIR}/build}
 8
 9if [[ ! -d ${VENVDIR} ]]; then
10    python3 -m venv --system-site-packages ${VENVDIR}
11    source ${VENVDIR}/bin/activate
16    pip3 install timm==1.0.14 huggingface-hub==0.28.1
17else
18    source ${VENVDIR}/bin/activate
19fi
20
21source "${BUILD_DIR}/codegen_pythonpath.sh"
22
23# Set Hugging Face cache directory to avoid filling up the home directory
24HF_HOME=${HF_HOME:-"/tmp/huggingface"} \
25    exec python3 ${CURRENT_DIR}/run_timm.py "$@"
Listing 8.13 /opt/pfn/pfcomp/codegen/MLSDK/examples/run_timm.py
  1import argparse
  2import os
  3from pathlib import Path
  4from typing import Any, Optional, Union
  5
  6import timm
  7import torch
  8from mlsdk import (
  9    Context,
 10    MNCoreSGD,
 11    MNDevice,
 12    set_buffer_name_in_optimizer,
 13    set_tensor_name_in_module,
 14    storage,
 15)
 16from PIL import Image
 17
 18SAMPLE_IMAGE_PATH = os.path.join(
 19    os.path.dirname(__file__), "./datasets/mncore2_chip.png"
 20)
 21
 22
 23def escape_path(path: str) -> str:
 24    escaped = ""
 25    for c in path:
 26        if c.isalnum() or c in "_-":
 27            escaped += c
 28        else:
 29            escaped += "_"
 30    return escaped
 31
 32
 33def create_model_with_cache(
 34    model_name: str, model_cache_dir: Optional[str] = None, **kwargs: Any
 35) -> Any:
 36    if not model_cache_dir:
 37        return timm.create_model(model_name, **kwargs)
 38    else:
 39        timm_version = "timm_version" + timm.__version__
 40        torch_version = "torch_version" + torch.__version__
 41        cache_dir = os.path.join(
 42            model_cache_dir,
 43            escape_path(f"{torch_version}_{timm_version}_{model_name}"),
 44        )
 45        # Load the model always from the cache to return the same model object always.
 46        # This should also create the cache if it does not exist.
 47        return timm.create_model(model_name, **kwargs, cache_dir=cache_dir)
 48
 49
 50def imagenet_classes() -> list[str]:
 51    script_dir = os.path.dirname(__file__)
 52    imagenet_classes_path = os.path.join(script_dir, "imagenet_classes.txt")
 53    with open(imagenet_classes_path) as f:
 54        return [line.strip() for line in f]
 55
 56
 57def run_inference(
 58    args: argparse.Namespace,
 59) -> None:
 60    img = Image.open(SAMPLE_IMAGE_PATH)
 61    model = create_model_with_cache(
 62        args.model_name,
 63        pretrained=True,
 64        model_cache_dir=args.model_cache_dir,
 65    )
 66    model = model.eval()
 67
 68    def infer(input: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
 69        with torch.no_grad():
 70            x = input["images"]
 71            return {"out": model(x)}
 72
 73    data_config = timm.data.resolve_model_data_config(model)
 74    transforms = timm.data.create_transform(**data_config, is_training=False)
 75    images = transforms(img).unsqueeze(0).expand(args.batch_size, -1, -1, -1)
 76    sample = {"images": images}
 77
 78    device = MNDevice(args.device)
 79    context = Context(device)
 80    Context.switch_context(context)
 81    context.registry.register("model", model)
 82
 83    compile_options: dict[str, str] = {}
 84    if args.option_json is not None:
 85        compile_options = {"option_json": str(args.option_json)}
 86
 87    compiled_infer = context.compile(
 88        infer,
 89        sample,
 90        storage.path(args.outdir) / "infer",
 91        options=compile_options,
 92    )
 93
 94    if args.action == "compile":
 95        context.synchronize()
 96        return
 97
 98    result_as_proxy = compiled_infer(sample)
 99
100    if args.action == "run":
101        context.synchronize()
102        return
103
104    result_on_torch = infer(sample)
105
106    # Tensors obtained via ".cpu()" from TensorProxy exist on GPU in CUDA environments,
107    # so they need to be moved to CPU before the comparison.
108    result = result_as_proxy["out"].cpu()
109    if result.is_cuda:
110        result = result.cpu()
111
112    context.synchronize()
113    torch.allclose(result, result_on_torch["out"], atol=1e-5)
114
115    if "in1k" in args.model_name:
116        classes = imagenet_classes()
117        mncore_top5_classes = torch.topk(result[0], 5).indices.cpu()
118        print("MNCore2 top-5 classes:")
119        for i in mncore_top5_classes:
120            print(f"- {classes[i]} ({i.item()})")
121        torch_top5_classes = torch.topk(result_on_torch["out"][0], 5).indices
122        print("Torch top-5 classes:")
123        for i in torch_top5_classes:
124            print(f"- {classes[i]} ({i.item()})")
125
126
127# return mncore.runtime_core._context._function.CompiledFunction
128# but this is not directly exposed in the public API, so we use Any here.
129def compile_train_step_with_torch_onnx(
130    model: Any,
131    sample: dict[str, Any],
132    context: Context,
133    outdir: str,
134    option_json: str | None = None,
135) -> Any:
136    model = model.train()
137    context.registry.register("model0", model)
138    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
139    context.registry.register("optimizer0", optimizer)
140    loss_fn = torch.nn.CrossEntropyLoss()
141
142    def f(inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
143        return {"loss": loss_fn(model(inputs["images"]), inputs["labels"])}
144
145    compile_options: dict[str, Union[str, bool]] = {"backprop": True}
146    if option_json is not None:
147        compile_options["option_json"] = str(option_json)
148
149    compiled_train_step = context.compile(
150        f,
151        sample,
152        storage.path(outdir) / "train_step_torch_onnx",
153        optimizers=[optimizer],
154        options=compile_options,
155    )
156
157    def wrapped(inputs: dict[str, Any]) -> Any:
158        inputs["optimizer0@0@mncore_learning_rate"] = torch.tensor(0.1)
159        inputs["optimizer0@0@mncore_global_step"] = torch.tensor(wrapped.global_step)  # type: ignore
160        inputs["mncore_grad_scale_factor"] = torch.tensor(1)
161        wrapped.global_step += 1  # type: ignore
162        return compiled_train_step(inputs)
163
164    wrapped.global_step = 0  # type: ignore
165
166    return wrapped
167
168
169# return mncore.runtime_core._context._function.CompiledFunction
170# but this is not directly exposed in the public API, so we use Any here.
171def compile_train_step_with_fx2onnx(
172    model: Any,
173    sample: dict[str, Any],
174    context: Context,
175    outdir: str,
176    option_json: str | None = None,
177) -> Any:
178    model = model.train()
179    set_tensor_name_in_module(model, "model0")
180    for p in model.parameters():
181        context.register_param(p)
182    optimizer = MNCoreSGD(model.parameters(), 0.1, 0.9, 0.0)
183    set_buffer_name_in_optimizer(optimizer, "optimizer0")
184    context.register_optimizer_buffers(optimizer)
185    loss_fn = torch.nn.CrossEntropyLoss()
186
187    def train_step(input: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
188        x = input["images"]
189        t = input["labels"]
190        optimizer.zero_grad()
191        y = model(x)
192        loss = loss_fn(y, t)
193        loss.backward()
194        optimizer.step()
195        return {"loss": loss}
196
197    compile_options: dict[str, Union[str, bool]] = {}
198    if option_json is not None:
199        compile_options["option_json"] = str(option_json)
200
201    return context.compile(
202        train_step,
203        sample,
204        storage.path(outdir) / "train_step_fx2onnx",
205        options=compile_options,
206        export_kwargs={"use_fx2onnx": True},
207    )
208
209
210def run_training(
211    args: argparse.Namespace,
212) -> None:
213    device = MNDevice(args.device)
214    context = Context(device)
215    Context.switch_context(context)
216
217    img = Image.open(SAMPLE_IMAGE_PATH)
218    model = create_model_with_cache(
219        args.model_name,
220        pretrained=True,
221        num_classes=1000,
222        model_cache_dir=args.model_cache_dir,
223    )
224    data_config = timm.data.resolve_model_data_config(model)
225    transforms = timm.data.create_transform(**data_config, is_training=False)
226    images = transforms(img).unsqueeze(0).expand(args.batch_size, -1, -1, -1)
227    labels = torch.randint(0, 1000, (args.batch_size,))
228    sample = {"images": images, "labels": labels}
229
230    # TODO (akirakawata): Should we make this argument?
231    use_fx2onnx = not bool(
232        int(os.environ.get("MNCORE_USE_LEGACY_ONNX_EXPORTER", False))
233    )
234    if use_fx2onnx:
235        # NOTE (puchupala): fx2onnx training needs the optimizer in the
236        # exported graph and lr, step, and grad scale factor in the inputs,
237        # so it follows a separate code path.
238        compiled_train_step = compile_train_step_with_fx2onnx(
239            model,
240            sample,
241            context,
242            args.outdir,
243            option_json=args.option_json,
244        )
245    else:
246        compiled_train_step = compile_train_step_with_torch_onnx(
247            model,
248            sample,
249            context,
250            args.outdir,
251            option_json=args.option_json,
252        )
253
254    if args.action == "compile":
255        context.synchronize()
256        return
257
258    first_loss = compiled_train_step(sample)["loss"].cpu()
259
260    if args.action == "run":
261        context.synchronize()
262        return
263
264    for _ in range(args.num_iters - 2):
265        compiled_train_step(sample)
266    last_loss = compiled_train_step(sample)["loss"].cpu()
267    context.synchronize()
268
269    assert last_loss < first_loss
270
271
272if __name__ == "__main__":
273    parser = argparse.ArgumentParser()
274    parser.add_argument("--batch_size", type=int, default=1)
275    parser.add_argument("--model_name", type=str, required=True)
276    parser.add_argument("--outdir", type=str, default="/tmp/mlsdk_timm")
277    parser.add_argument("--option_json", type=Path, default=None)
278    parser.add_argument("--mode", type=str, default="infer", choices=["infer", "train"])
279    parser.add_argument(
280        "--device",
281        type=str,
282        default="mncore2:auto",
283        choices=["mncore2:auto", "pfvm:cpu", "pfvm:cuda"],
284    )
285    parser.add_argument(
286        "--model_cache_dir",
287        type=str,
288        default=None,
289        help="Directory to cache the model weights. "
290        "If not set, weights are always downloaded from the hub. default: None",
291    )
292    parser.add_argument(
293        "--action",
294        type=str,
295        default="validate",
296        choices=["compile", "run", "validate"],
297        help="Whether to only compile, run without validation, "
298        "or run with validation (default: validate)",
299    )
300
301    train_group = parser.add_argument_group(
302        "Training options", "Options for training mode (ignored in inference mode)"
303    )
304    train_group.add_argument(
305        "--num_iters",
306        type=int,
307        default=12,
308        help="Number of training iterations to run (default: 12)",
309    )
310
311    args = parser.parse_args()
312
313    # Simple args validation
314    assert args.batch_size > 0, "Batch size must be positive"
315    assert (
316        args.num_iters >= 2
317    ), "Number of iterations must be at least 2 to observe loss decrease"
318    if args.option_json is not None:
319        assert (
320            args.option_json.is_file()
321        ), f"Option JSON file not found: {args.option_json}"
322
323    if args.mode == "train":
324        run_training(args)
325    elif args.mode == "infer":
326        run_inference(args)
327    else:
328        raise ValueError(f"Unsupported mode: {args.mode}")