8.2.3. Example: Object Detection using SSD
COCO dataset を対象に SSD-ResNet34 の学習済みモデルを用いた推論を行うサンプルプログラム
注釈
ここで利用している学習済みモデル、及び一部ソースコードは intel / ai-reference-models にあるものを部分的に改変、またはそのまま利用しています。これらのライセンスはいずれも Apache License, Version 2.0 です。
また、COCO dataset は CC BY 4.0 のライセンスで提供されており、その中でも特に CC BY 2.0 で提供されている画像を使用します。
Execution Method
初回実行時には以下の内容のダウンロードが行われます (二回目以降は省略されます)。デフォルトでのダウンロード先は /tmp/mlsdk_ssd_inference/ 以下です。
Python packages
$ cd /opt/pfn/pfcomp/codegen/MLSDK/examples/ssd_inference
$ ./run_ssd_inference.sh /tmp/mlsdk_ssd_inference/coco/val2017/000000363666.jpg --device mncore2:auto
Expected Output
検出結果は、デフォルトでは実行時のカレントディレクトリに out_mncore_ prefix のファイルとして出力されます。同時に out_torch_ prefix のファイルも出力されますが、こちらは結果比較用に PyTorch で実行した結果です。比較には structural similarity index measure (SSIM) アルゴリズムを用います。
Tip
結果ファイルの出力先ディレクトリを指定するには --out_img_dir オプションを使用できます。
検出結果 (
./out_mncore_000000363666.jpg)
図 8.3 Object detection using SSD-ResNet34 on MN-Core 2
ログ出力 (
SSIM scoreは 0.99 以上であれば PyTorch と同等の推論が出来ているとみなします。)
Drawing detection result => /opt/pfn/pfcomp/codegen/MLSDK/examples/ssd_inference/out_mncore_000000363666.jpg
Drawing detection result => /opt/pfn/pfcomp/codegen/MLSDK/examples/ssd_inference/out_torch_000000363666.jpg
SSIM score: 0.9959848160827895
Scripts
1#! /bin/bash
2
3set -eux -o pipefail
4
5EXAMPLE_NAME="mlsdk_ssd_inference"
6VENV_DIR=${VENV_DIR:-"/tmp/${EXAMPLE_NAME}/venv"}
7EXTERNAL_DIR=${EXTERNAL_DIR:-"/tmp/${EXAMPLE_NAME}/external"}
8COCO_DIR=${COCO_DIR:-"/tmp/${EXAMPLE_NAME}/coco"}
9OUT_DIR=${OUT_DIR:-"/tmp/${EXAMPLE_NAME}/out"}
10
11CURRENT_DIR=$(realpath $(dirname $0))
12CODEGEN_DIR=$(realpath ${CURRENT_DIR}/../../../)
13BUILD_DIR="${CODEGEN_DIR}/build"
14
15### Prepare and source venv/
16
17if [[ ! -d ${VENV_DIR} ]]; then
18 python3 -m venv --system-site-packages ${VENV_DIR}
19 source ${VENV_DIR}/bin/activate
20 pip3 install -r ${CURRENT_DIR}/requirements.txt
21else
22 source ${VENV_DIR}/bin/activate
23fi
24
25### Prepare external/ items
26
27mkdir -p ${EXTERNAL_DIR}
28pushd ${EXTERNAL_DIR}
29
30if [[ ! -d ai-reference-models ]]; then
31 git clone 'https://github.com/intel/ai-reference-models.git' ai-reference-models --depth 1 --branch v3.3
32fi
33if [[ ! -f resnet34-ssd1200.pth ]]; then
34 # For downloading the trained model, please refer to this documentation.
35 # Ref: https://github.com/intel/ai-reference-models/blob/main/models_v2/pytorch/ssd-resnet34/inference/cpu/CONTAINER.md
36 wget --no-check-certificate \
37 'https://docs.google.com/uc?export=download&id=13kWgEItsoxbVKUlkQz4ntjl1IZGk6_5Z' \
38 -O resnet34-ssd1200.pth
39fi
40
41popd
42
43### Prepare coco/ items
44
45mkdir -p ${COCO_DIR}
46pushd ${COCO_DIR}
47
48# download the COCO datasets and annotations
49if [[ ! -d val2017 ]]; then
50 curl -O 'http://images.cocodataset.org/zips/val2017.zip'
51 unzip val2017.zip
52 rm -f val2017.zip
53fi
54if [[ ! -d annotations ]]; then
55 curl -O 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip'
56 unzip annotations_trainval2017.zip
57 rm -f annotations_trainval2017.zip
58fi
59# fetch images and annotations for inference
60if [[ ! -f annotations/fetched_annotations_eval.json ]]; then
61 python3 ${CURRENT_DIR}/coco_preparation.py \
62 -a annotations/instances_val2017.json \
63 -i val2017 \
64 -f annotations/fetched_annotations_eval.json
65fi
66
67popd
68
69### Run ssd_inference.py
70
71source "${BUILD_DIR}/codegen_pythonpath.sh"
72
73# Do not split a small h/w image among L1/L2 blocks.
74export CODEGEN_SPATIAL_SPLIT_THRESHOLD_L2B_L1B=32
75
76PYTHONPATH=${PYTHONPATH}:"${EXTERNAL_DIR}/ai-reference-models/models_v2/pytorch/ssd-resnet34/inference/cpu"
77python3 ${CURRENT_DIR}/ssd_inference.py \
78 --model_path "${EXTERNAL_DIR}/resnet34-ssd1200.pth" \
79 --coco_data_path "${COCO_DIR}/val2017" \
80 --coco_annotation_path "${COCO_DIR}/annotations/fetched_annotations_eval.json" \
81 --out_img_dir "${CURRENT_DIR}" \
82 --outdir "${OUT_DIR}" \
83 --option_json "${CODEGEN_DIR}/preset_options/O1.json" \
84 ${@}
1import argparse
2import os
3from pathlib import Path
4from typing import Mapping, Optional
5
6import cv2
7import matplotlib
8import matplotlib.patches as patches
9import matplotlib.pyplot as plt
10import numpy as np
11import torch
12import torch.utils.data
13from PIL import Image, ImageFile
14from skimage.metrics import structural_similarity as ssim
15
16# isort: off
17
18# MLSDK modules
19from mlsdk import (
20 MNDevice,
21 Context,
22 storage,
23 CacheOptions,
24 TensorLike,
25)
26
27# following modules are from:
28# [ai-reference-models repository](https://github.com/intel/ai-reference-models/tree/main/models_v2/pytorch/ssd-resnet34/inference/cpu) # NOQA: B950
29from infer import dboxes_R34_coco
30from ssd_r34 import SSD_R34
31from utils import COCODetection, Encoder, SSDTransformer
32
33# isort: on
34
35# No GUI output
36matplotlib.use("Agg")
37
38
39# label_num = 81 and strides = [3, 3, 2, 2, 2, 2] are from original source in:
40# https://github.com/intel/ai-reference-models/tree/main/models_v2/pytorch/ssd-resnet34/inference/cpu/infer.py
41def load_model(
42 path: str | os.PathLike,
43 label_num: int = 81,
44 strides: tuple[int, ...] = (3, 3, 2, 2, 2, 2),
45) -> torch.nn.Module:
46 # Create class object
47 model = SSD_R34(label_num, strides=strides)
48 # Load pretrained model's parameters
49 model.load_state_dict(
50 torch.load(path, map_location=lambda storage, loc: storage)["model"]
51 )
52 assert isinstance(model, torch.nn.Module)
53 return model
54
55
56# Load an input image given as a path
57def load_image(
58 img_path: str | os.PathLike,
59 img_size: tuple[int, int] = (1200, 1200),
60) -> tuple[ImageFile.ImageFile, torch.Tensor]:
61 # Resize input img for model
62 orig_img = Image.open(img_path)
63
64 # Convert img to tensor and change shape
65 resized_img = orig_img.resize(img_size)
66 converted_img_np = np.array(resized_img).transpose(
67 2, 0, 1
68 ) # (C,H,W), assuming dtype=uint8
69 converted_img = (
70 torch.from_numpy(converted_img_np).contiguous().unsqueeze(dim=0)
71 ) # (1,C,H,W)
72
73 return orig_img, converted_img
74
75
76# Decode a model's output for an input image
77def decode_outputs(
78 out_locs: torch.Tensor,
79 out_labels: torch.Tensor,
80 encoder: Encoder,
81 criteria: float = 0.50,
82 max_output: int = 200,
83 device: int = 0,
84) -> list[tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
85 try:
86 decoded_outputs = encoder.decode_batch(
87 out_locs,
88 out_labels,
89 criteria=criteria,
90 max_output=max_output,
91 device=device,
92 )
93 except Exception as e:
94 print(f"Error in decode_outputs: {type(e)} - {e}")
95 return []
96
97 results = []
98 idx = 0
99 for i in range(decoded_outputs[3].size(0)):
100 detection_num = decoded_outputs[3][i].item()
101 idx_range = idx + detection_num
102 results.append(
103 (
104 decoded_outputs[0][idx:idx_range],
105 decoded_outputs[1][idx:idx_range],
106 decoded_outputs[2][idx:idx_range],
107 )
108 )
109 idx += detection_num
110
111 return results
112
113
114# This function is originally from:
115# [ai-reference-models](https://github.com/intel/ai-reference-models/blob/main/models_v2/pytorch/ssd-resnet34/inference/cpu/utils.py) # NOQA: B950
116# Drawing bboxes and labels for detected objects
117def draw_patches( # NOQA: CFQ002
118 img: ImageFile.ImageFile,
119 img_path: Path,
120 bboxes: torch.Tensor,
121 labels: torch.Tensor,
122 order: str = "ltrb",
123 label_map: Mapping[int, str] | None = None,
124 bbox_alpha: float = 1.0,
125 bbox_linewidth: int = 1,
126 label_alpha: float = 0.3,
127 label_linewidth: int = 2,
128 text_size: int = 10,
129) -> None:
130 img_np = np.array(img)
131 labels_np = labels.detach().numpy()
132 bboxes_np = bboxes.detach().numpy()
133
134 # From labels_np (ndarray[int]) to labels_list (list[str])
135 if label_map is not None:
136 labels_list = [
137 label_map.get(int(label_i), "(no label)") for label_i in labels_np
138 ]
139 else:
140 # Use an int value as a label str
141 labels_list = [str(int(label_i)) for label_i in labels_np]
142
143 if order == "ltrb":
144 xmin, ymin, xmax, ymax = (
145 bboxes_np[:, 0],
146 bboxes_np[:, 1],
147 bboxes_np[:, 2],
148 bboxes_np[:, 3],
149 )
150 cx, cy, w, h = (xmin + xmax) / 2, (ymin + ymax) / 2, xmax - xmin, ymax - ymin
151 else:
152 cx, cy, w, h = (
153 bboxes_np[:, 0],
154 bboxes_np[:, 1],
155 bboxes_np[:, 2],
156 bboxes_np[:, 3],
157 )
158
159 htot, wtot, _ = img_np.shape
160 cx *= wtot
161 cy *= htot
162 w *= wtot
163 h *= htot
164
165 plt.imshow(img_np)
166 ax = plt.gca()
167 for cx_i, cy_i, w_i, h_i, label_i in zip(cx, cy, w, h, labels_list):
168 if label_i == "background":
169 continue
170 ax.add_patch(
171 patches.Rectangle(
172 (cx_i - 0.5 * w_i, cy_i - 0.5 * h_i),
173 w_i,
174 h_i,
175 fill=False,
176 color="r",
177 alpha=bbox_alpha,
178 linewidth=bbox_linewidth,
179 )
180 )
181 bbox_props = dict(
182 boxstyle="square,pad=0",
183 fc="y",
184 ec="y",
185 alpha=label_alpha,
186 linewidth=label_linewidth,
187 )
188 ax.text(
189 cx_i - 0.5 * w_i,
190 cy_i - 0.5 * h_i,
191 label_i,
192 ha="left",
193 va="bottom",
194 size=text_size,
195 bbox=bbox_props,
196 )
197
198 plt.savefig(img_path)
199 plt.clf()
200 plt.close()
201
202
203# Fetch results satisfying threshold and draw bounding box on the given input image
204def draw_detection_result( # NOQA: CFQ002
205 out_locs: torch.Tensor,
206 out_labels: torch.Tensor,
207 img: ImageFile.ImageFile,
208 img_path: Path,
209 encoder: Encoder,
210 threshold: float,
211 label_info: dict[int, str],
212) -> None:
213
214 decoded_output = decode_outputs(out_locs, out_labels, encoder)
215 if not decoded_output:
216 print("no objects have been detected")
217 return
218
219 bboxes, labels, scores = decoded_output[0]
220 detection_mask = scores > threshold
221 fetched_bboxes = bboxes[detection_mask]
222 fetched_labels = labels[detection_mask]
223 draw_patches(img, img_path, fetched_bboxes, fetched_labels, label_map=label_info)
224
225
226def calc_ssim(
227 lhs_img_path: Path,
228 rhs_img_path: Path,
229) -> float:
230 lhs_img = cv2.imread(lhs_img_path)
231 rhs_img = cv2.imread(rhs_img_path)
232 assert lhs_img.shape == rhs_img.shape
233
234 # Convert to gray scale (only image structure is necessary for SSIM)
235 lhs_img_gray = cv2.cvtColor(lhs_img, cv2.COLOR_BGR2GRAY)
236 rhs_img_gray = cv2.cvtColor(rhs_img, cv2.COLOR_BGR2GRAY)
237
238 return float(ssim(lhs_img_gray, rhs_img_gray)) # type: ignore[no-untyped-call]
239
240
241def run_infer( # NOQA: CFQ002
242 *,
243 img_path: Path,
244 model_path: Path,
245 coco_data_path: Path,
246 coco_annotation_path: Path,
247 out_img_dir: Path,
248 threshold: float,
249 device_name: str,
250 outdir: str,
251 option_json_path: Optional[Path] = None,
252) -> None:
253 # Create Dataset object
254 IMG_SIZE = [1200, 1200]
255 STRIDES = [3, 3, 2, 2, 2, 2]
256 default_boxes = dboxes_R34_coco(IMG_SIZE, STRIDES)
257 transformer = SSDTransformer(default_boxes, tuple(IMG_SIZE), val=True)
258 coco = COCODetection(coco_data_path, coco_annotation_path, transformer)
259
260 # Create encoder for decode process
261 encoder = Encoder(default_boxes)
262
263 # Load and prepare images
264 orig_img, converted_img = load_image(img_path)
265 infer_input = {"image": converted_img}
266 img_name = img_path.name
267
268 # Create pretrained model object
269 model = load_model(model_path)
270 model.eval()
271
272 # Inference function for Context.compile
273 def infer_fn(sample: dict[str, TensorLike]) -> dict[str, TensorLike]:
274 with torch.no_grad():
275 locs, labels = model(sample["image"].float() / 255.0)
276 return {"locs": locs, "labels": labels}
277
278 device = MNDevice(device_name)
279 context = Context(device)
280 Context.switch_context(context)
281
282 context.registry.register("model", model)
283
284 compile_options = {}
285 if option_json_path is not None:
286 compile_options = {"option_json": str(option_json_path)}
287
288 compiled_infer_fn = context.compile(
289 infer_fn,
290 infer_input,
291 storage.path(outdir),
292 options=compile_options,
293 cache_options=CacheOptions(outdir + "/cache"),
294 )
295
296 out_mncore = compiled_infer_fn(infer_input)
297 out_locs_mncore, out_labels_mncore = (
298 out_mncore["locs"].cpu(),
299 out_mncore["labels"].cpu(),
300 )
301
302 mncore_img_path = out_img_dir / ("out_mncore_" + img_name)
303 print(f"Drawing detection result => {mncore_img_path}")
304 draw_detection_result(
305 out_locs_mncore,
306 out_labels_mncore,
307 orig_img,
308 mncore_img_path,
309 encoder,
310 threshold,
311 label_info=coco.label_info,
312 )
313
314 out_torch = infer_fn(infer_input)
315 out_locs_torch, out_labels_torch = (
316 out_torch["locs"].cpu(),
317 out_torch["labels"].cpu(),
318 )
319
320 torch_img_path = out_img_dir / ("out_torch_" + img_name)
321 print(f"Drawing detection result => {torch_img_path}")
322 draw_detection_result(
323 out_locs_torch,
324 out_labels_torch,
325 orig_img,
326 torch_img_path,
327 encoder,
328 threshold,
329 label_info=coco.label_info,
330 )
331
332 score = calc_ssim(mncore_img_path, torch_img_path)
333 print(f"SSIM score: {score}")
334 assert score > 0.99, "Generated images differ."
335
336
337def main() -> None:
338 parser = argparse.ArgumentParser(
339 description="Run SSD-ResNet34-1200 model for input image"
340 )
341 parser.add_argument("img_path", type=Path, help="Path to input image")
342 parser.add_argument(
343 "--model_path",
344 type=Path,
345 required=True,
346 help="Path to trained SSD model (e.g. resnet34-ssd1200.pth)",
347 )
348 parser.add_argument(
349 "--coco_data_path", type=Path, required=True, help="Path to the COCO dataset"
350 )
351 parser.add_argument(
352 "--coco_annotation_path",
353 type=Path,
354 required=True,
355 help="Path to annotation data (JSON) corresponding to the COCO dataset",
356 )
357 parser.add_argument(
358 "--out_img_dir",
359 type=Path,
360 default=Path("."),
361 help="Path to directory to output detection result images",
362 )
363 parser.add_argument(
364 "--threshold",
365 type=float,
366 default=0.4,
367 help="""
368 Detection threshold (0.0-1.0), a smaller threshold make object detecting
369 more sensitive""",
370 )
371 parser.add_argument("--device", type=str, default="mncore2:auto")
372 parser.add_argument("--outdir", type=str, default="/tmp/mlsdk_ssd_inference/out")
373 parser.add_argument(
374 "--option_json",
375 type=Path,
376 default="/opt/pfn/pfcomp/codegen/preset_options/O1.json",
377 )
378 args = parser.parse_args()
379
380 # Validate arguments
381 assert 0 <= args.threshold and args.threshold <= 1.0
382 assert not args.out_img_dir.is_file(), "Please specify directory path."
383
384 args.out_img_dir.mkdir(parents=True, exist_ok=True)
385
386 run_infer(
387 img_path=args.img_path,
388 model_path=args.model_path,
389 coco_data_path=args.coco_data_path,
390 coco_annotation_path=args.coco_annotation_path,
391 out_img_dir=args.out_img_dir,
392 threshold=args.threshold,
393 device_name=args.device,
394 outdir=args.outdir,
395 option_json_path=args.option_json,
396 )
397
398
399if __name__ == "__main__":
400 main()
1matplotlib
2pycocotools
3defusedxml
1import argparse
2import json
3import os
4from typing import Any
5
6
7def fetch_imgs_and_ids(
8 json_obj: dict[str, Any], license_id_list: list[int]
9) -> tuple[list[list[dict[str, int | str]]], list[int]]:
10 img_list = []
11 id_list = []
12 for i in json_obj["images"]:
13 if i["license"] in license_id_list:
14 img_list.append(i)
15 id_list.append(i["id"])
16
17 return img_list, id_list
18
19
20def fetch_annotations(json_obj: dict[str, Any], id_list: list[int]) -> list[Any]:
21 ann_list = []
22 for i in json_obj["annotations"]:
23 if i["image_id"] in id_list:
24 ann_list.append(i)
25
26 return ann_list
27
28
29def remove_wasted_imgs(
30 json_obj: dict[str, Any], fetched_dict: dict[str, Any], img_dir: str | os.PathLike
31) -> None:
32 org_file_names = set([i["file_name"] for i in json_obj["images"]])
33 fetched_file_name = set([i["file_name"] for i in fetched_dict["images"]])
34 diff_set = org_file_names - fetched_file_name
35 for i in diff_set:
36 wasted_img_path = os.path.join(img_dir, i)
37 if os.path.exists(wasted_img_path):
38 os.remove(wasted_img_path)
39
40
41def main() -> None:
42 parser = argparse.ArgumentParser()
43 parser.add_argument(
44 "-a",
45 "--annotation_file",
46 type=str,
47 default="./coco/annotations/instances_val2017.json",
48 )
49 parser.add_argument("-i", "--images_dir", type=str, default="./coco/val2017/")
50 parser.add_argument(
51 "-f",
52 "--fetched_annotation_file",
53 type=str,
54 default="./coco/annotations/fetched_annotations.json",
55 )
56 args = parser.parse_args()
57
58 # fetch images' infomation with no license problems
59 json_obj = None
60 with open(args.annotation_file, mode="r") as f:
61 json_obj = json.load(f)
62 license_id_list = [4] # 4: CC-BY 2.0
63
64 img_list, id_list = fetch_imgs_and_ids(json_obj, license_id_list)
65 ann_list = fetch_annotations(json_obj, id_list)
66 fetched_dict = (
67 {"info": json_obj["info"]}
68 | {"licenses": json_obj["licenses"]}
69 | {"images": img_list}
70 | {"annotations": ann_list}
71 | {"categories": json_obj["categories"]}
72 )
73
74 # dump fetched infomations
75 with open(args.fetched_annotation_file, mode="w") as f:
76 json.dump(fetched_dict, f)
77
78 # remove files unused in inference
79 remove_wasted_imgs(json_obj, fetched_dict, args.images_dir)
80
81
82if __name__ == "__main__":
83 main()