8.2.1. Example: timm Model Inference
An application example that retrieves a model from timm and performs inference on the Image (beignets-task-guide.png)
Fig. 8.2 beignets-task-guide.png
Execution Method (resnet50.a1h_in1k)
$ cd /opt/pfn/pfcomp/codegen/MLSDK/examples/
$ ./run_timm.sh --model_name resnet50.a1h_in1k --batch_size 16
Expected Output (resnet50.a1h_in1k)
MNCore2 top-5 classes:
- espresso (967)
- cup (968)
- chocolate sauce, chocolate syrup (960)
- consomme (925)
- eggnog (969)
Torch top-5 classes:
- espresso (967)
- cup (968)
- chocolate sauce, chocolate syrup (960)
- eggnog (969)
- consomme (925)
Execution Method (mobilenetv3_small_050.lamb_in1k)
$ cd /opt/pfn/pfcomp/codegen/MLSDK/examples/
$ ./run_timm.sh --model_name mobilenetv3_small_050.lamb_in1k --batch_size 16
Expected Output (mobilenetv3_small_050.lamb_in1k)
MNCore2 top-5 classes:
- cup (968)
- trifle (927)
- face powder (551)
- ice cream, icecream (928)
- coffee mug (504)
Torch top-5 classes:
- cup (968)
- trifle (927)
- ice cream, icecream (928)
- face powder (551)
- coffee mug (504)
Scripts
1#! /bin/bash
2set -eux -o pipefail
3
4VENVDIR=/tmp/run_timm_venv
5CURRENT_DIR=$(realpath $(dirname $0))
6CODEGEN_DIR=$(realpath ${CURRENT_DIR}/../../)
7BUILD_DIR=${BUILD_DIR:-${CODEGEN_DIR}/build}
8
9if [[ ! -d ${VENVDIR} ]]; then
10 python3 -m venv --system-site-packages ${VENVDIR}
11 source ${VENVDIR}/bin/activate
16 pip3 install timm==1.0.14 huggingface-hub==0.28.1
17else
18 source ${VENVDIR}/bin/activate
19fi
20
21source "${BUILD_DIR}/codegen_pythonpath.sh"
22
23# Set Hugging Face cache directory to avoid filling up the home directory
24HF_HOME=${HF_HOME:-"/tmp/huggingface"} \
25 exec python3 ${CURRENT_DIR}/run_timm.py "$@"
1import argparse
2import os
3from pathlib import Path
4from typing import Any, Optional, Union
5
6import timm
7import torch
8from mlsdk import (
9 Context,
10 MNCoreSGD,
11 MNDevice,
12 set_buffer_name_in_optimizer,
13 set_tensor_name_in_module,
14 storage,
15)
16from PIL import Image
17
18SAMPLE_IMAGE_PATH = os.path.join(
19 os.path.dirname(__file__), "./datasets/mncore2_chip.png"
20)
21
22
23def escape_path(path: str) -> str:
24 escaped = ""
25 for c in path:
26 if c.isalnum() or c in "_-":
27 escaped += c
28 else:
29 escaped += "_"
30 return escaped
31
32
33def create_model_with_cache(
34 model_name: str, model_cache_dir: Optional[str] = None, **kwargs: Any
35) -> Any:
36 if not model_cache_dir:
37 return timm.create_model(model_name, **kwargs)
38 else:
39 timm_version = "timm_version" + timm.__version__
40 torch_version = "torch_version" + torch.__version__
41 cache_dir = os.path.join(
42 model_cache_dir,
43 escape_path(f"{torch_version}_{timm_version}_{model_name}"),
44 )
45 # Load the model always from the cache to return the same model object always.
46 # This should also create the cache if it does not exist.
47 return timm.create_model(model_name, **kwargs, cache_dir=cache_dir)
48
49
50def imagenet_classes() -> list[str]:
51 script_dir = os.path.dirname(__file__)
52 imagenet_classes_path = os.path.join(script_dir, "imagenet_classes.txt")
53 with open(imagenet_classes_path) as f:
54 return [line.strip() for line in f]
55
56
57def run_inference(
58 args: argparse.Namespace,
59) -> None:
60 img = Image.open(SAMPLE_IMAGE_PATH)
61 model = create_model_with_cache(
62 args.model_name,
63 pretrained=True,
64 model_cache_dir=args.model_cache_dir,
65 )
66 model = model.eval()
67
68 def infer(input: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
69 with torch.no_grad():
70 x = input["images"]
71 return {"out": model(x)}
72
73 data_config = timm.data.resolve_model_data_config(model)
74 transforms = timm.data.create_transform(**data_config, is_training=False)
75 images = transforms(img).unsqueeze(0).expand(args.batch_size, -1, -1, -1)
76 sample = {"images": images}
77
78 device = MNDevice(args.device)
79 context = Context(device)
80 Context.switch_context(context)
81 context.registry.register("model", model)
82
83 compile_options: dict[str, str] = {}
84 if args.option_json is not None:
85 compile_options = {"option_json": str(args.option_json)}
86
87 compiled_infer = context.compile(
88 infer,
89 sample,
90 storage.path(args.outdir) / "infer",
91 options=compile_options,
92 )
93
94 if args.action == "compile":
95 context.synchronize()
96 return
97
98 result_as_proxy = compiled_infer(sample)
99
100 if args.action == "run":
101 context.synchronize()
102 return
103
104 result_on_torch = infer(sample)
105
106 # Tensors obtained via ".cpu()" from TensorProxy exist on GPU in CUDA environments,
107 # so they need to be moved to CPU before the comparison.
108 result = result_as_proxy["out"].cpu()
109 if result.is_cuda:
110 result = result.cpu()
111
112 context.synchronize()
113 torch.allclose(result, result_on_torch["out"], atol=1e-5)
114
115 if "in1k" in args.model_name:
116 classes = imagenet_classes()
117 mncore_top5_classes = torch.topk(result[0], 5).indices.cpu()
118 print("MNCore2 top-5 classes:")
119 for i in mncore_top5_classes:
120 print(f"- {classes[i]} ({i.item()})")
121 torch_top5_classes = torch.topk(result_on_torch["out"][0], 5).indices
122 print("Torch top-5 classes:")
123 for i in torch_top5_classes:
124 print(f"- {classes[i]} ({i.item()})")
125
126
127# return mncore.runtime_core._context._function.CompiledFunction
128# but this is not directly exposed in the public API, so we use Any here.
129def compile_train_step_with_torch_onnx(
130 model: Any,
131 sample: dict[str, Any],
132 context: Context,
133 outdir: str,
134 option_json: str | None = None,
135) -> Any:
136 model = model.train()
137 context.registry.register("model0", model)
138 optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
139 context.registry.register("optimizer0", optimizer)
140 loss_fn = torch.nn.CrossEntropyLoss()
141
142 def f(inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
143 return {"loss": loss_fn(model(inputs["images"]), inputs["labels"])}
144
145 compile_options: dict[str, Union[str, bool]] = {"backprop": True}
146 if option_json is not None:
147 compile_options["option_json"] = str(option_json)
148
149 compiled_train_step = context.compile(
150 f,
151 sample,
152 storage.path(outdir) / "train_step_torch_onnx",
153 optimizers=[optimizer],
154 options=compile_options,
155 )
156
157 def wrapped(inputs: dict[str, Any]) -> Any:
158 inputs["optimizer0@0@mncore_learning_rate"] = torch.tensor(0.1)
159 inputs["optimizer0@0@mncore_global_step"] = torch.tensor(wrapped.global_step) # type: ignore
160 inputs["mncore_grad_scale_factor"] = torch.tensor(1)
161 wrapped.global_step += 1 # type: ignore
162 return compiled_train_step(inputs)
163
164 wrapped.global_step = 0 # type: ignore
165
166 return wrapped
167
168
169# return mncore.runtime_core._context._function.CompiledFunction
170# but this is not directly exposed in the public API, so we use Any here.
171def compile_train_step_with_fx2onnx(
172 model: Any,
173 sample: dict[str, Any],
174 context: Context,
175 outdir: str,
176 option_json: str | None = None,
177) -> Any:
178 model = model.train()
179 set_tensor_name_in_module(model, "model0")
180 for p in model.parameters():
181 context.register_param(p)
182 optimizer = MNCoreSGD(model.parameters(), 0.1, 0.9, 0.0)
183 set_buffer_name_in_optimizer(optimizer, "optimizer0")
184 context.register_optimizer_buffers(optimizer)
185 loss_fn = torch.nn.CrossEntropyLoss()
186
187 def train_step(input: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
188 x = input["images"]
189 t = input["labels"]
190 optimizer.zero_grad()
191 y = model(x)
192 loss = loss_fn(y, t)
193 loss.backward()
194 optimizer.step()
195 return {"loss": loss}
196
197 compile_options: dict[str, Union[str, bool]] = {}
198 if option_json is not None:
199 compile_options["option_json"] = str(option_json)
200
201 return context.compile(
202 train_step,
203 sample,
204 storage.path(outdir) / "train_step_fx2onnx",
205 options=compile_options,
206 export_kwargs={"use_fx2onnx": True},
207 )
208
209
210def run_training(
211 args: argparse.Namespace,
212) -> None:
213 device = MNDevice(args.device)
214 context = Context(device)
215 Context.switch_context(context)
216
217 img = Image.open(SAMPLE_IMAGE_PATH)
218 model = create_model_with_cache(
219 args.model_name,
220 pretrained=True,
221 num_classes=1000,
222 model_cache_dir=args.model_cache_dir,
223 )
224 data_config = timm.data.resolve_model_data_config(model)
225 transforms = timm.data.create_transform(**data_config, is_training=False)
226 images = transforms(img).unsqueeze(0).expand(args.batch_size, -1, -1, -1)
227 labels = torch.randint(0, 1000, (args.batch_size,))
228 sample = {"images": images, "labels": labels}
229
230 # TODO (akirakawata): Should we make this argument?
231 use_fx2onnx = not bool(
232 int(os.environ.get("MNCORE_USE_LEGACY_ONNX_EXPORTER", False))
233 )
234 if use_fx2onnx:
235 # NOTE (puchupala): fx2onnx training needs the optimizer in the
236 # exported graph and lr, step, and grad scale factor in the inputs,
237 # so it follows a separate code path.
238 compiled_train_step = compile_train_step_with_fx2onnx(
239 model,
240 sample,
241 context,
242 args.outdir,
243 option_json=args.option_json,
244 )
245 else:
246 compiled_train_step = compile_train_step_with_torch_onnx(
247 model,
248 sample,
249 context,
250 args.outdir,
251 option_json=args.option_json,
252 )
253
254 if args.action == "compile":
255 context.synchronize()
256 return
257
258 first_loss = compiled_train_step(sample)["loss"].cpu()
259
260 if args.action == "run":
261 context.synchronize()
262 return
263
264 for _ in range(args.num_iters - 2):
265 compiled_train_step(sample)
266 last_loss = compiled_train_step(sample)["loss"].cpu()
267 context.synchronize()
268
269 assert last_loss < first_loss
270
271
272if __name__ == "__main__":
273 parser = argparse.ArgumentParser()
274 parser.add_argument("--batch_size", type=int, default=1)
275 parser.add_argument("--model_name", type=str, required=True)
276 parser.add_argument("--outdir", type=str, default="/tmp/mlsdk_timm")
277 parser.add_argument("--option_json", type=Path, default=None)
278 parser.add_argument("--mode", type=str, default="infer", choices=["infer", "train"])
279 parser.add_argument(
280 "--device",
281 type=str,
282 default="mncore2:auto",
283 choices=["mncore2:auto", "pfvm:cpu", "pfvm:cuda"],
284 )
285 parser.add_argument(
286 "--model_cache_dir",
287 type=str,
288 default=None,
289 help="Directory to cache the model weights. "
290 "If not set, weights are always downloaded from the hub. default: None",
291 )
292 parser.add_argument(
293 "--action",
294 type=str,
295 default="validate",
296 choices=["compile", "run", "validate"],
297 help="Whether to only compile, run without validation, "
298 "or run with validation (default: validate)",
299 )
300
301 train_group = parser.add_argument_group(
302 "Training options", "Options for training mode (ignored in inference mode)"
303 )
304 train_group.add_argument(
305 "--num_iters",
306 type=int,
307 default=12,
308 help="Number of training iterations to run (default: 12)",
309 )
310
311 args = parser.parse_args()
312
313 # Simple args validation
314 assert args.batch_size > 0, "Batch size must be positive"
315 assert (
316 args.num_iters >= 2
317 ), "Number of iterations must be at least 2 to observe loss decrease"
318 if args.option_json is not None:
319 assert (
320 args.option_json.is_file()
321 ), f"Option JSON file not found: {args.option_json}"
322
323 if args.mode == "train":
324 run_training(args)
325 elif args.mode == "infer":
326 run_inference(args)
327 else:
328 raise ValueError(f"Unsupported mode: {args.mode}")