clean and update scripts in web_p

This commit is contained in:
shalenikol 2025-03-12 11:45:34 +03:00
parent c85784f3dc
commit ccc1e2da4b
28 changed files with 1163 additions and 139 deletions

196
web_p/models_dope.py Executable file
View file

@ -0,0 +1,196 @@
"""
NVIDIA from jtremblay@gmail.com
"""
# Networks
import torch
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data
import torchvision.models as models
class DopeNetwork(nn.Module):
def __init__(
self,
pretrained=False,
numBeliefMap=9,
numAffinity=16,
stop_at_stage=6, # number of stages to process (if less than total number of stages)
):
super(DopeNetwork, self).__init__()
self.stop_at_stage = stop_at_stage
vgg_full = models.vgg19(pretrained=False).features
self.vgg = nn.Sequential()
for i_layer in range(24):
self.vgg.add_module(str(i_layer), vgg_full[i_layer])
# Add some layers
i_layer = 23
self.vgg.add_module(
str(i_layer), nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
)
self.vgg.add_module(str(i_layer + 1), nn.ReLU(inplace=True))
self.vgg.add_module(
str(i_layer + 2), nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
)
self.vgg.add_module(str(i_layer + 3), nn.ReLU(inplace=True))
# print('---Belief------------------------------------------------')
# _2 are the belief map stages
self.m1_2 = DopeNetwork.create_stage(128, numBeliefMap, True)
self.m2_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m3_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m4_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m5_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m6_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
# print('---Affinity----------------------------------------------')
# _1 are the affinity map stages
self.m1_1 = DopeNetwork.create_stage(128, numAffinity, True)
self.m2_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m3_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m4_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m5_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m6_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
def forward(self, x):
"""Runs inference on the neural network"""
out1 = self.vgg(x)
out1_2 = self.m1_2(out1)
out1_1 = self.m1_1(out1)
if self.stop_at_stage == 1:
return [out1_2], [out1_1]
out2 = torch.cat([out1_2, out1_1, out1], 1)
out2_2 = self.m2_2(out2)
out2_1 = self.m2_1(out2)
if self.stop_at_stage == 2:
return [out1_2, out2_2], [out1_1, out2_1]
out3 = torch.cat([out2_2, out2_1, out1], 1)
out3_2 = self.m3_2(out3)
out3_1 = self.m3_1(out3)
if self.stop_at_stage == 3:
return [out1_2, out2_2, out3_2], [out1_1, out2_1, out3_1]
out4 = torch.cat([out3_2, out3_1, out1], 1)
out4_2 = self.m4_2(out4)
out4_1 = self.m4_1(out4)
if self.stop_at_stage == 4:
return [out1_2, out2_2, out3_2, out4_2], [out1_1, out2_1, out3_1, out4_1]
out5 = torch.cat([out4_2, out4_1, out1], 1)
out5_2 = self.m5_2(out5)
out5_1 = self.m5_1(out5)
if self.stop_at_stage == 5:
return [out1_2, out2_2, out3_2, out4_2, out5_2], [
out1_1,
out2_1,
out3_1,
out4_1,
out5_1,
]
out6 = torch.cat([out5_2, out5_1, out1], 1)
out6_2 = self.m6_2(out6)
out6_1 = self.m6_1(out6)
return [out1_2, out2_2, out3_2, out4_2, out5_2, out6_2], [
out1_1,
out2_1,
out3_1,
out4_1,
out5_1,
out6_1,
]
@staticmethod
def create_stage(in_channels, out_channels, first=False):
"""Create the neural network layers for a single stage."""
model = nn.Sequential()
mid_channels = 128
if first:
padding = 1
kernel = 3
count = 6
final_channels = 512
else:
padding = 3
kernel = 7
count = 10
final_channels = mid_channels
# First convolution
model.add_module(
"0",
nn.Conv2d(
in_channels, mid_channels, kernel_size=kernel, stride=1, padding=padding
),
)
# Middle convolutions
i = 1
while i < count - 1:
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i),
nn.Conv2d(
mid_channels,
mid_channels,
kernel_size=kernel,
stride=1,
padding=padding,
),
)
i += 1
# Penultimate convolution
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i), nn.Conv2d(mid_channels, final_channels, kernel_size=1, stride=1)
)
i += 1
# Last convolution
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i), nn.Conv2d(final_channels, out_channels, kernel_size=1, stride=1)
)
i += 1
return model

Binary file not shown.

Before

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 86 KiB

View file

@ -1,105 +0,0 @@
task: detect
mode: train
model: /home/shalenikol/fork_work/webservice/server/build/public/4c4f3909-74b0-4206-aec1-fc4acd3a1081/weights/od_w01/yolov8n.pt
data: /home/shalenikol/fork_work/webservice/server/build/public/4c4f3909-74b0-4206-aec1-fc4acd3a1081/weights/od_w01/rbs_train.yaml
epochs: 33
time: null
patience: 50
batch: 16
imgsz: 640
save: true
save_period: -1
cache: false
device: null
workers: 8
project: /home/shalenikol/fork_work/webservice/server/build/public/4c4f3909-74b0-4206-aec1-fc4acd3a1081/weights/od_w01
name: train
exist_ok: false
pretrained: true
optimizer: auto
verbose: true
seed: 0
deterministic: true
single_cls: false
rect: false
cos_lr: false
close_mosaic: 10
resume: false
amp: true
fraction: 1.0
profile: false
freeze: null
multi_scale: false
overlap_mask: true
mask_ratio: 4
dropout: 0.0
val: true
split: val
save_json: false
save_hybrid: false
conf: null
iou: 0.7
max_det: 300
half: false
dnn: false
plots: true
source: null
vid_stride: 1
stream_buffer: false
visualize: false
augment: false
agnostic_nms: false
classes: null
retina_masks: false
embed: null
show: false
save_frames: false
save_txt: false
save_conf: false
save_crop: false
show_labels: true
show_conf: true
show_boxes: true
line_width: null
format: torchscript
keras: false
optimize: false
int8: false
dynamic: false
simplify: false
opset: null
workspace: 4
nms: false
lr0: 0.01
lrf: 0.01
momentum: 0.937
weight_decay: 0.0005
warmup_epochs: 3.0
warmup_momentum: 0.8
warmup_bias_lr: 0.1
box: 7.5
cls: 0.5
dfl: 1.5
pose: 12.0
kobj: 1.0
label_smoothing: 0.0
nbs: 64
hsv_h: 0.015
hsv_s: 0.7
hsv_v: 0.4
degrees: 0.0
translate: 0.1
scale: 0.5
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
mosaic: 1.0
mixup: 0.0
copy_paste: 0.0
auto_augment: randaugment
erasing: 0.4
crop_fraction: 1.0
cfg: null
tracker: botsort.yaml
save_dir: /home/shalenikol/fork_work/webservice/server/build/public/4c4f3909-74b0-4206-aec1-fc4acd3a1081/weights/od_w01/train

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 141 KiB

View file

@ -1,34 +0,0 @@
epoch, train/box_loss, train/cls_loss, train/dfl_loss, metrics/precision(B), metrics/recall(B), metrics/mAP50(B), metrics/mAP50-95(B), val/box_loss, val/cls_loss, val/dfl_loss, lr/pg0, lr/pg1, lr/pg2
1, 0.62674, 1.281, 0.92555, 0.99239, 0.99448, 0.99323, 0.90966, 0.40212, 0.8264, 0.80447, 0.00066247, 0.00066247, 0.00066247
2, 0.60996, 0.71899, 0.93387, 0.9945, 0.99945, 0.99484, 0.91551, 0.43253, 0.60301, 0.8228, 0.0012893, 0.0012893, 0.0012893
3, 0.58648, 0.54879, 0.92909, 1, 0.98871, 0.99494, 0.9213, 0.40211, 0.39327, 0.81593, 0.0018761, 0.0018761, 0.0018761
4, 0.58195, 0.48301, 0.92375, 0.99087, 0.9337, 0.97172, 0.89393, 0.41614, 0.46785, 0.82069, 0.00182, 0.00182, 0.00182
5, 0.56201, 0.44926, 0.92381, 0.99447, 0.99385, 0.99494, 0.94951, 0.34807, 0.32406, 0.8013, 0.00182, 0.00182, 0.00182
6, 0.52696, 0.40581, 0.9068, 0.95813, 0.98343, 0.99281, 0.94494, 0.33023, 0.48053, 0.79401, 0.00176, 0.00176, 0.00176
7, 0.51017, 0.3952, 0.90752, 0.99889, 1, 0.995, 0.95388, 0.3192, 0.33973, 0.7992, 0.0017, 0.0017, 0.0017
8, 0.50772, 0.37889, 0.90238, 0.98351, 0.98842, 0.98581, 0.94918, 0.30154, 0.28504, 0.79667, 0.00164, 0.00164, 0.00164
9, 0.47737, 0.3576, 0.89251, 0.99946, 0.99448, 0.995, 0.97205, 0.28135, 0.23642, 0.79101, 0.00158, 0.00158, 0.00158
10, 0.46587, 0.34547, 0.89324, 0.99948, 1, 0.995, 0.96897, 0.28021, 0.28522, 0.78694, 0.00152, 0.00152, 0.00152
11, 0.45881, 0.33452, 0.89055, 0.99954, 1, 0.995, 0.97012, 0.26364, 0.21443, 0.7813, 0.00146, 0.00146, 0.00146
12, 0.44939, 0.32887, 0.89206, 0.9996, 1, 0.995, 0.98382, 0.24486, 0.20614, 0.78109, 0.0014, 0.0014, 0.0014
13, 0.44388, 0.32289, 0.88796, 0.99932, 1, 0.995, 0.97195, 0.27681, 0.21443, 0.77933, 0.00134, 0.00134, 0.00134
14, 0.43847, 0.31282, 0.88496, 0.99965, 1, 0.995, 0.98019, 0.25014, 0.20255, 0.7775, 0.00128, 0.00128, 0.00128
15, 0.41585, 0.30067, 0.8774, 0.99943, 1, 0.995, 0.97609, 0.25842, 0.21239, 0.78006, 0.00122, 0.00122, 0.00122
16, 0.41436, 0.29784, 0.87488, 0.99964, 1, 0.995, 0.97823, 0.25499, 0.19837, 0.78004, 0.00116, 0.00116, 0.00116
17, 0.414, 0.29771, 0.87575, 0.99943, 1, 0.995, 0.98746, 0.2251, 0.203, 0.77468, 0.0011, 0.0011, 0.0011
18, 0.39273, 0.29075, 0.86927, 0.99445, 1, 0.995, 0.98597, 0.22693, 0.19648, 0.77208, 0.00104, 0.00104, 0.00104
19, 0.40052, 0.28802, 0.87804, 0.99958, 1, 0.995, 0.98541, 0.22268, 0.18749, 0.77233, 0.00098, 0.00098, 0.00098
20, 0.38066, 0.27951, 0.86666, 0.99969, 1, 0.995, 0.98901, 0.20959, 0.1775, 0.7697, 0.00092, 0.00092, 0.00092
21, 0.38115, 0.27813, 0.8658, 0.99964, 1, 0.995, 0.98895, 0.20699, 0.1779, 0.77073, 0.00086, 0.00086, 0.00086
22, 0.37441, 0.27094, 0.87121, 0.99965, 1, 0.995, 0.98975, 0.20138, 0.17235, 0.76785, 0.0008, 0.0008, 0.0008
23, 0.36808, 0.26148, 0.86426, 0.99965, 1, 0.995, 0.98829, 0.19861, 0.1628, 0.76706, 0.00074, 0.00074, 0.00074
24, 0.25547, 0.199, 0.77555, 0.99955, 1, 0.995, 0.98791, 0.21853, 0.18063, 0.76972, 0.00068, 0.00068, 0.00068
25, 0.24799, 0.1969, 0.78404, 0.99958, 1, 0.995, 0.98812, 0.23069, 0.18178, 0.76985, 0.00062, 0.00062, 0.00062
26, 0.24232, 0.1915, 0.78022, 0.99968, 1, 0.995, 0.99024, 0.20883, 0.16788, 0.76752, 0.00056, 0.00056, 0.00056
27, 0.23288, 0.1839, 0.77463, 0.99968, 1, 0.995, 0.99151, 0.2026, 0.16501, 0.76809, 0.0005, 0.0005, 0.0005
28, 0.23066, 0.18012, 0.77547, 0.99961, 1, 0.995, 0.98912, 0.19388, 0.1534, 0.76246, 0.00044, 0.00044, 0.00044
29, 0.22286, 0.17062, 0.77932, 0.9997, 1, 0.995, 0.99039, 0.20566, 0.14978, 0.76601, 0.00038, 0.00038, 0.00038
30, 0.21427, 0.16357, 0.77529, 0.9997, 1, 0.995, 0.99215, 0.18345, 0.14148, 0.76206, 0.00032, 0.00032, 0.00032
31, 0.20895, 0.16067, 0.77189, 0.9997, 1, 0.995, 0.99187, 0.17027, 0.13746, 0.76124, 0.00026, 0.00026, 0.00026
32, 0.20248, 0.15421, 0.77526, 0.9997, 1, 0.995, 0.99246, 0.17229, 0.13828, 0.76056, 0.0002, 0.0002, 0.0002
33, 0.19494, 0.15005, 0.76361, 0.99971, 1, 0.995, 0.99302, 0.16442, 0.12543, 0.76043, 0.00014, 0.00014, 0.00014
1 epoch train/box_loss train/cls_loss train/dfl_loss metrics/precision(B) metrics/recall(B) metrics/mAP50(B) metrics/mAP50-95(B) val/box_loss val/cls_loss val/dfl_loss lr/pg0 lr/pg1 lr/pg2
2 1 0.62674 1.281 0.92555 0.99239 0.99448 0.99323 0.90966 0.40212 0.8264 0.80447 0.00066247 0.00066247 0.00066247
3 2 0.60996 0.71899 0.93387 0.9945 0.99945 0.99484 0.91551 0.43253 0.60301 0.8228 0.0012893 0.0012893 0.0012893
4 3 0.58648 0.54879 0.92909 1 0.98871 0.99494 0.9213 0.40211 0.39327 0.81593 0.0018761 0.0018761 0.0018761
5 4 0.58195 0.48301 0.92375 0.99087 0.9337 0.97172 0.89393 0.41614 0.46785 0.82069 0.00182 0.00182 0.00182
6 5 0.56201 0.44926 0.92381 0.99447 0.99385 0.99494 0.94951 0.34807 0.32406 0.8013 0.00182 0.00182 0.00182
7 6 0.52696 0.40581 0.9068 0.95813 0.98343 0.99281 0.94494 0.33023 0.48053 0.79401 0.00176 0.00176 0.00176
8 7 0.51017 0.3952 0.90752 0.99889 1 0.995 0.95388 0.3192 0.33973 0.7992 0.0017 0.0017 0.0017
9 8 0.50772 0.37889 0.90238 0.98351 0.98842 0.98581 0.94918 0.30154 0.28504 0.79667 0.00164 0.00164 0.00164
10 9 0.47737 0.3576 0.89251 0.99946 0.99448 0.995 0.97205 0.28135 0.23642 0.79101 0.00158 0.00158 0.00158
11 10 0.46587 0.34547 0.89324 0.99948 1 0.995 0.96897 0.28021 0.28522 0.78694 0.00152 0.00152 0.00152
12 11 0.45881 0.33452 0.89055 0.99954 1 0.995 0.97012 0.26364 0.21443 0.7813 0.00146 0.00146 0.00146
13 12 0.44939 0.32887 0.89206 0.9996 1 0.995 0.98382 0.24486 0.20614 0.78109 0.0014 0.0014 0.0014
14 13 0.44388 0.32289 0.88796 0.99932 1 0.995 0.97195 0.27681 0.21443 0.77933 0.00134 0.00134 0.00134
15 14 0.43847 0.31282 0.88496 0.99965 1 0.995 0.98019 0.25014 0.20255 0.7775 0.00128 0.00128 0.00128
16 15 0.41585 0.30067 0.8774 0.99943 1 0.995 0.97609 0.25842 0.21239 0.78006 0.00122 0.00122 0.00122
17 16 0.41436 0.29784 0.87488 0.99964 1 0.995 0.97823 0.25499 0.19837 0.78004 0.00116 0.00116 0.00116
18 17 0.414 0.29771 0.87575 0.99943 1 0.995 0.98746 0.2251 0.203 0.77468 0.0011 0.0011 0.0011
19 18 0.39273 0.29075 0.86927 0.99445 1 0.995 0.98597 0.22693 0.19648 0.77208 0.00104 0.00104 0.00104
20 19 0.40052 0.28802 0.87804 0.99958 1 0.995 0.98541 0.22268 0.18749 0.77233 0.00098 0.00098 0.00098
21 20 0.38066 0.27951 0.86666 0.99969 1 0.995 0.98901 0.20959 0.1775 0.7697 0.00092 0.00092 0.00092
22 21 0.38115 0.27813 0.8658 0.99964 1 0.995 0.98895 0.20699 0.1779 0.77073 0.00086 0.00086 0.00086
23 22 0.37441 0.27094 0.87121 0.99965 1 0.995 0.98975 0.20138 0.17235 0.76785 0.0008 0.0008 0.0008
24 23 0.36808 0.26148 0.86426 0.99965 1 0.995 0.98829 0.19861 0.1628 0.76706 0.00074 0.00074 0.00074
25 24 0.25547 0.199 0.77555 0.99955 1 0.995 0.98791 0.21853 0.18063 0.76972 0.00068 0.00068 0.00068
26 25 0.24799 0.1969 0.78404 0.99958 1 0.995 0.98812 0.23069 0.18178 0.76985 0.00062 0.00062 0.00062
27 26 0.24232 0.1915 0.78022 0.99968 1 0.995 0.99024 0.20883 0.16788 0.76752 0.00056 0.00056 0.00056
28 27 0.23288 0.1839 0.77463 0.99968 1 0.995 0.99151 0.2026 0.16501 0.76809 0.0005 0.0005 0.0005
29 28 0.23066 0.18012 0.77547 0.99961 1 0.995 0.98912 0.19388 0.1534 0.76246 0.00044 0.00044 0.00044
30 29 0.22286 0.17062 0.77932 0.9997 1 0.995 0.99039 0.20566 0.14978 0.76601 0.00038 0.00038 0.00038
31 30 0.21427 0.16357 0.77529 0.9997 1 0.995 0.99215 0.18345 0.14148 0.76206 0.00032 0.00032 0.00032
32 31 0.20895 0.16067 0.77189 0.9997 1 0.995 0.99187 0.17027 0.13746 0.76124 0.00026 0.00026 0.00026
33 32 0.20248 0.15421 0.77526 0.9997 1 0.995 0.99246 0.17229 0.13828 0.76056 0.0002 0.0002 0.0002
34 33 0.19494 0.15005 0.76361 0.99971 1 0.995 0.99302 0.16442 0.12543 0.76043 0.00014 0.00014 0.00014

Binary file not shown.

Before

Width:  |  Height:  |  Size: 280 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 205 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 179 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 181 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 171 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 179 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 187 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 177 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 186 KiB

Binary file not shown.

Binary file not shown.

967
web_p/utils_dope.py Executable file
View file

@ -0,0 +1,967 @@
"""
NVIDIA from jtremblay@gmail.com
"""
import numpy as np
import torch
import os
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data
import torchvision.transforms as transforms
import torch.utils.data as data
import glob
import os
import boto3
import io
from PIL import Image
from PIL import ImageDraw
from PIL import ImageEnhance
from math import acos
from math import sqrt
from math import pi
from os.path import exists, basename
import json
from os.path import join
import albumentations as A
def default_loader(path):
return Image.open(path).convert("RGB")
def length(v):
return sqrt(v[0] ** 2 + v[1] ** 2)
def dot_product(v, w):
return v[0] * w[0] + v[1] * w[1]
def normalize(v):
norm = np.linalg.norm(v, ord=1)
if norm == 0:
norm = np.finfo(v.dtype).eps
return v / norm
def determinant(v, w):
return v[0] * w[1] - v[1] * w[0]
def inner_angle(v, w):
cosx = dot_product(v, w) / (length(v) * length(w))
rad = acos(cosx) # in radians
return rad * 180 / pi # returns degrees
def py_ang(A, B=(1, 0)):
inner = inner_angle(A, B)
det = determinant(A, B)
if (
det < 0
): # this is a property of the det. If the det < 0 then B is clockwise of A
return inner
else: # if the det > 0 then A is immediately clockwise of B
return 360 - inner
import colorsys, math
def append_dot(extensions):
res = []
for ext in extensions:
if not ext.startswith("."):
res.append(f".{ext}")
else:
res.append(ext)
return res
def loadimages(root, extensions=["png"]):
imgs = []
extensions = append_dot(extensions)
def add_json_files(
path,
):
for ext in extensions:
for file in os.listdir(path):
imgpath = os.path.join(path, file)
if (
imgpath.endswith(ext)
and exists(imgpath)
and exists(imgpath.replace(ext, ".json"))
):
imgs.append(
(
imgpath,
imgpath.replace(path, "").replace("/", ""),
imgpath.replace(ext, ".json"),
)
)
def explore(path):
if not os.path.isdir(path):
return
folders = [
os.path.join(path, o)
for o in os.listdir(path)
if os.path.isdir(os.path.join(path, o))
]
for path_entry in folders:
explore(path_entry)
add_json_files(path)
explore(root)
return imgs
def loadweights(root):
if root.endswith(".pth") and os.path.isfile(root):
return [root]
else:
weights = [
os.path.join(root, f)
for f in os.listdir(root)
if os.path.isfile(os.path.join(root, f)) and f.endswith(".pth")
]
weights.sort()
return weights
def loadimages_inference(root, extensions):
imgs, imgsname = [], []
extensions = append_dot(extensions)
def add_imgs(
path,
):
for ext in extensions:
for file in os.listdir(path):
imgpath = os.path.join(path, file)
if imgpath.endswith(ext) and exists(imgpath):
imgs.append(imgpath)
imgsname.append(imgpath.replace(root, ""))
def explore(path):
if not os.path.isdir(path):
return
folders = [
os.path.join(path, o)
for o in os.listdir(path)
if os.path.isdir(os.path.join(path, o))
]
for path_entry in folders:
explore(path_entry)
add_imgs(path)
explore(root)
return imgs, imgsname
class CleanVisiiDopeLoader(data.Dataset):
def __init__(
self,
path_dataset,
objects=None,
sigma=1,
output_size=400,
extensions=["png"],
debug=False,
use_s3=False,
buckets=[],
endpoint_url=None,
):
###################
self.path_dataset = path_dataset
self.objects_interest = objects
self.sigma = sigma
self.output_size = output_size
self.extensions = append_dot(extensions)
self.debug = debug
###################
self.imgs = []
self.s3_buckets = {}
self.use_s3 = use_s3
if self.use_s3:
self.session = boto3.Session()
self.s3 = self.session.resource(
service_name="s3", endpoint_url=endpoint_url
)
for bucket_name in buckets:
try:
self.s3_buckets[bucket_name] = self.s3.Bucket(bucket_name)
except Exception as e:
print(
f"Error trying to load bucket {bucket_name} for training data:",
e,
)
for bucket in self.s3_buckets:
bucket_objects = [
str(obj.key) for obj in self.s3_buckets[bucket].objects.all()
]
jsons = set([json for json in bucket_objects if json.endswith(".json")])
imgs = [
img
for img in bucket_objects
if img.endswith(tuple(self.extensions))
]
for ext in self.extensions:
for img in imgs:
# Only add images that have a ground truth file
if img.endswith(ext) and img.replace(ext, ".json") in jsons:
# (img key, bucket name, json key)
self.imgs.append((img, bucket, img.replace(ext, ".json")))
else:
for path_look in path_dataset:
self.imgs += loadimages(path_look, extensions=self.extensions)
# np.random.shuffle(self.imgs)
print("Number of Training Images:", len(self.imgs))
print(self.imgs)
if debug:
print("Debuging will be save in debug/")
if os.path.isdir("debug"):
print(f'folder {"debug"}/ exists')
else:
os.mkdir("debug")
print(f'created folder {"debug"}/')
def __len__(self):
return len(self.imgs)
def __getitem__(self, index):
# load the data
if self.use_s3:
img_key, bucket, json_key = self.imgs[index]
mem_img = io.BytesIO()
object_img = self.s3_buckets[bucket].Object(img_key)
object_img.download_fileobj(mem_img)
img = np.array(Image.open(mem_img).convert("RGB"))
object_json = self.s3_buckets[bucket].Object(json_key)
data_json = json.load(object_json.get()["Body"])
img_name = img_key[:-3]
else:
path_img, img_name, path_json = self.imgs[index]
# load the image
img = np.array(Image.open(path_img).convert("RGB"))
# load the json file
with open(path_json) as f:
data_json = json.load(f)
all_projected_cuboid_keypoints = []
# load the projected cuboid keypoints
for obj in data_json["objects"]:
if (
self.objects_interest is not None
and not obj["class"] in self.objects_interest
):
continue
# load the projected_cuboid_keypoints
# 06.02.2024 @shalenikol
# if obj["visibility_image"] > 0:
if obj["visibility"] > 0:
projected_cuboid_keypoints = obj["projected_cuboid"]
# FAT dataset only has 8 corners for 'projected_cuboid'
if len(projected_cuboid_keypoints) == 8:
projected_cuboid_keypoints.append(obj["projected_cuboid_centroid"])
else:
projected_cuboid_keypoints = [
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
]
all_projected_cuboid_keypoints.append(projected_cuboid_keypoints)
if len(all_projected_cuboid_keypoints) == 0:
all_projected_cuboid_keypoints = [
[
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
[-100, -100],
]
]
# flatten the keypoints
flatten_projected_cuboid = []
for obj in all_projected_cuboid_keypoints:
for p in obj:
flatten_projected_cuboid.append(p)
#######
if self.debug:
img_to_save = Image.fromarray(img)
draw = ImageDraw.Draw(img_to_save)
for ip, p in enumerate(flatten_projected_cuboid):
draw.ellipse(
(int(p[0]) - 2, int(p[1]) - 2, int(p[0]) + 2, int(p[1]) + 2),
fill="green",
)
img_to_save.save(f"debug/{img_name.replace('.png','_original.png')}")
#######
# data augmentation
transform = A.Compose(
[
A.RandomCrop(width=400, height=400),
A.Rotate(limit=180),
A.RandomBrightnessContrast(
brightness_limit=0.2, contrast_limit=0.15, p=1
),
A.GaussNoise(p=1),
],
keypoint_params=A.KeypointParams(format="xy", remove_invisible=False),
)
transformed = transform(image=img, keypoints=flatten_projected_cuboid)
img_transformed = transformed["image"]
flatten_projected_cuboid_transformed = transformed["keypoints"]
#######
# transform to the final output
if not self.output_size == 400:
transform = A.Compose(
[
A.Resize(width=self.output_size, height=self.output_size),
],
keypoint_params=A.KeypointParams(format="xy", remove_invisible=False),
)
transformed = transform(
image=img_transformed, keypoints=flatten_projected_cuboid_transformed
)
img_transformed_output_size = transformed["image"]
flatten_projected_cuboid_transformed_output_size = transformed["keypoints"]
else:
img_transformed_output_size = img_transformed
flatten_projected_cuboid_transformed_output_size = (
flatten_projected_cuboid_transformed
)
#######
if self.debug:
img_transformed_saving = Image.fromarray(img_transformed)
draw = ImageDraw.Draw(img_transformed_saving)
for ip, p in enumerate(flatten_projected_cuboid_transformed):
draw.ellipse(
(int(p[0]) - 2, int(p[1]) - 2, int(p[0]) + 2, int(p[1]) + 2),
fill="green",
)
img_transformed_saving.save(
f"debug/{img_name.replace('.png','_transformed.png')}"
)
#######
# update the keypoints list
# obj x keypoint_id x (x,y)
i_all = 0
for i_obj, obj in enumerate(all_projected_cuboid_keypoints):
for i_p, point in enumerate(obj):
all_projected_cuboid_keypoints[i_obj][
i_p
] = flatten_projected_cuboid_transformed_output_size[i_all]
i_all += 1
# generate the belief maps
beliefs = CreateBeliefMap(
size=int(self.output_size),
pointsBelief=all_projected_cuboid_keypoints,
sigma=self.sigma,
nbpoints=9,
save=False,
)
beliefs = torch.from_numpy(np.array(beliefs))
# generate affinity fields with centroid.
affinities = GenerateMapAffinity(
size=int(self.output_size),
nb_vertex=8,
pointsInterest=all_projected_cuboid_keypoints,
objects_centroid=np.array(all_projected_cuboid_keypoints)[:, -1].tolist(),
scale=1,
)
# prepare for the image tensors
normalize_tensor = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
]
)
to_tensor = transforms.Compose(
[
transforms.ToTensor(),
]
)
img_tensor = normalize_tensor(Image.fromarray(img_transformed))
img_original = to_tensor(img_transformed)
########
if self.debug:
imgs = VisualizeBeliefMap(beliefs)
img, grid = save_image(
imgs,
f"debug/{img_name.replace('.png','_beliefs.png')}",
mean=0,
std=1,
nrow=3,
save=True,
)
imgs = VisualizeAffinityMap(affinities)
save_image(
imgs,
f"debug/{img_name.replace('.png','_affinities.png')}",
mean=0,
std=1,
nrow=3,
save=True,
)
########
img_tensor[torch.isnan(img_tensor)] = 0
affinities[torch.isnan(affinities)] = 0
beliefs[torch.isnan(beliefs)] = 0
img_tensor[torch.isinf(img_tensor)] = 0
affinities[torch.isinf(affinities)] = 0
beliefs[torch.isinf(beliefs)] = 0
return {
"img": img_tensor,
"affinities": torch.clamp(affinities, -1, 1),
"beliefs": torch.clamp(beliefs, 0, 1),
"file_name": img_name,
"img_original": img_original,
}
def VisualizeAffinityMap(
tensor,
# tensor of (len(keypoints)*2)xwxh
threshold_norm_vector=0.4,
# how long does the vector has to be to be drawn
points=None,
# list of points to draw in white on top of the image
factor=1.0,
# by how much the image was reduced, scale factor
translation=(0, 0)
# by how much the points were moved
# return len(keypoints)x3xwxh # stack of images
):
images = torch.zeros(tensor.shape[0] // 2, 3, tensor.shape[1], tensor.shape[2])
for i_image in range(0, tensor.shape[0], 2): # could be read as i_keypoint
indices = (
torch.abs(tensor[i_image, :, :]) + torch.abs(tensor[i_image + 1, :, :])
> threshold_norm_vector
).nonzero()
for indice in indices:
i, j = indice
angle_vector = np.array([tensor[i_image, i, j], tensor[i_image + 1, i, j]])
if length(angle_vector) > threshold_norm_vector:
angle = py_ang(angle_vector)
c = colorsys.hsv_to_rgb(angle / 360, 1, 1)
else:
c = [0, 0, 0]
for i_c in range(3):
images[i_image // 2, i_c, i, j] = c[i_c]
if not points is None:
point = points[i_image // 2]
print(
int(point[1] * factor + translation[1]),
int(point[0] * factor + translation[0]),
)
images[
i_image // 2,
:,
int(point[1] * factor + translation[1])
- 1 : int(point[1] * factor + translation[1])
+ 1,
int(point[0] * factor + translation[0])
- 1 : int(point[0] * factor + translation[0])
+ 1,
] = 1
return images
def VisualizeBeliefMap(
tensor,
# tensor of len(keypoints)xwxh
points=None,
# list of points to draw on top of the image
factor=1.0,
# by how much the image was reduced, scale factor
translation=(0, 0)
# by how much the points were moved
# return len(keypoints)x3xwxh # stack of images in torch tensor
):
images = torch.zeros(tensor.shape[0], 3, tensor.shape[1], tensor.shape[2])
for i_image in range(0, tensor.shape[0]): # could be read as i_keypoint
belief = tensor[i_image].clone()
belief -= float(torch.min(belief).item())
belief /= float(torch.max(belief).item())
belief = torch.clamp(belief, 0, 1)
belief = torch.cat(
[belief.unsqueeze(0), belief.unsqueeze(0), belief.unsqueeze(0)]
).unsqueeze(0)
images[i_image] = belief
return images
def GenerateMapAffinity(
size, nb_vertex, pointsInterest, objects_centroid, scale, save=False
):
# Apply the downscale right now, so the vectors are correct.
img_affinity = Image.new("RGB", (int(size / scale), int(size / scale)), "black")
# create the empty tensors
totensor = transforms.Compose([transforms.ToTensor()])
affinities = []
for i_points in range(nb_vertex):
affinities.append(torch.zeros(2, int(size / scale), int(size / scale)))
for i_pointsImage in range(len(pointsInterest)):
pointsImage = pointsInterest[i_pointsImage]
center = objects_centroid[i_pointsImage]
for i_points in range(nb_vertex):
point = pointsImage[i_points]
affinity_pair, img_affinity = getAfinityCenter(
int(size / scale),
int(size / scale),
tuple((np.array(pointsImage[i_points]) / scale).tolist()),
tuple((np.array(center) / scale).tolist()),
img_affinity=img_affinity,
radius=1,
)
affinities[i_points] = (affinities[i_points] + affinity_pair) / 2
# Normalizing
v = affinities[i_points].numpy()
xvec = v[0]
yvec = v[1]
norms = np.sqrt(xvec * xvec + yvec * yvec)
nonzero = norms > 0
xvec[nonzero] /= norms[nonzero]
yvec[nonzero] /= norms[nonzero]
affinities[i_points] = torch.from_numpy(np.concatenate([[xvec], [yvec]]))
affinities = torch.cat(affinities, 0)
return affinities
def getAfinityCenter(
width, height, point, center, radius=7, tensor=None, img_affinity=None
):
"""
Create the affinity map
"""
if tensor is None:
tensor = torch.zeros(2, height, width).float()
# create the canvas for the afinity output
imgAffinity = Image.new("RGB", (width, height), "black")
totensor = transforms.Compose([transforms.ToTensor()])
draw = ImageDraw.Draw(imgAffinity)
r1 = radius
p = point
draw.ellipse((p[0] - r1, p[1] - r1, p[0] + r1, p[1] + r1), (255, 255, 255))
del draw
# compute the array to add the afinity
array = (np.array(imgAffinity) / 255)[:, :, 0]
angle_vector = np.array(center) - np.array(point)
angle_vector = normalize(angle_vector)
affinity = np.concatenate([[array * angle_vector[0]], [array * angle_vector[1]]])
if not img_affinity is None:
# find the angle vector
if length(angle_vector) > 0:
angle = py_ang(angle_vector)
else:
angle = 0
c = np.array(colorsys.hsv_to_rgb(angle / 360, 1, 1)) * 255
draw = ImageDraw.Draw(img_affinity)
draw.ellipse(
(p[0] - r1, p[1] - r1, p[0] + r1, p[1] + r1),
fill=(int(c[0]), int(c[1]), int(c[2])),
)
del draw
re = torch.from_numpy(affinity).float() + tensor
return re, img_affinity
def CreateBeliefMap(size, pointsBelief, nbpoints, sigma=16, save=False):
# Create the belief maps in the points
beliefsImg = []
for numb_point in range(nbpoints):
array = np.zeros([size, size])
out = np.zeros([size, size])
for point in pointsBelief:
p = [point[numb_point][1], point[numb_point][0]]
w = int(sigma * 2)
if p[0] - w >= 0 and p[0] + w < size and p[1] - w >= 0 and p[1] + w < size:
for i in range(int(p[0]) - w, int(p[0]) + w + 1):
for j in range(int(p[1]) - w, int(p[1]) + w + 1):
# if there is already a point there.
array[i, j] = max(
np.exp(
-(
((i - p[0]) ** 2 + (j - p[1]) ** 2)
/ (2 * (sigma**2))
)
),
array[i, j],
)
beliefsImg.append(array.copy())
if save:
stack = np.stack([array, array, array], axis=0).transpose(2, 1, 0)
imgBelief = Image.fromarray((stack * 255).astype("uint8"))
imgBelief.save("debug/{}.png".format(numb_point))
return beliefsImg
def crop(img, i, j, h, w):
"""Crop the given PIL.Image.
Args:
img (PIL.Image): Image to be cropped.
i: Upper pixel coordinate.
j: Left pixel coordinate.
h: Height of the cropped image.
w: Width of the cropped image.
Returns:
PIL.Image: Cropped image.
"""
return img.crop((j, i, j + w, i + h))
class AddRandomContrast(object):
"""
Apply some random image filters from PIL
"""
def __init__(self, sigma=0.1):
self.sigma = sigma
def __call__(self, im):
contrast = ImageEnhance.Contrast(im)
im = contrast.enhance(np.random.normal(1, self.sigma))
return im
class AddRandomBrightness(object):
"""
Apply some random image filters from PIL
"""
def __init__(self, sigma=0.1):
self.sigma = sigma
def __call__(self, im):
contrast = ImageEnhance.Brightness(im)
im = contrast.enhance(np.random.normal(1, self.sigma))
return im
class AddNoise(object):
"""Given mean: (R, G, B) and std: (R, G, B),
will normalize each channel of the torch.*Tensor, i.e.
channel = (channel - mean) / std
"""
def __init__(self, std=0.1):
self.std = std
def __call__(self, tensor):
# TODO: make efficient
t = torch.FloatTensor(tensor.size()).normal_(0, self.std)
t = tensor.add(t)
t = torch.clamp(t, -1, 1) # this is expansive
return t
irange = range
def make_grid(
tensor,
nrow=8,
padding=2,
normalize=False,
range=None,
scale_each=False,
pad_value=0,
):
"""Make a grid of images.
Args:
tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
or a list of images all of the same size.
nrow (int, optional): Number of images displayed in each row of the grid.
The Final grid size is (B / nrow, nrow). Default is 8.
padding (int, optional): amount of padding. Default is 2.
normalize (bool, optional): If True, shift the image to the range (0, 1),
by subtracting the minimum and dividing by the maximum pixel value.
range (tuple, optional): tuple (min, max) where min and max are numbers,
then these numbers are used to normalize the image. By default, min and max
are computed from the tensor.
scale_each (bool, optional): If True, scale each image in the batch of
images separately rather than the (min, max) over all images.
pad_value (float, optional): Value for the padded pixels.
Example:
See this notebook `here <https://gist.github.com/anonymous/bf16430f7750c023141c562f3e9f2a91>`_
"""
if not (
torch.is_tensor(tensor)
or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))
):
raise TypeError(
"tensor or list of tensors expected, got {}".format(type(tensor))
)
# if list of tensors, convert to a 4D mini-batch Tensor
if isinstance(tensor, list):
tensor = torch.stack(tensor, dim=0)
if tensor.dim() == 2: # single image H x W
tensor = tensor.view(1, tensor.size(0), tensor.size(1))
if tensor.dim() == 3: # single image
if tensor.size(0) == 1: # if single-channel, convert to 3-channel
tensor = torch.cat((tensor, tensor, tensor), 0)
tensor = tensor.view(1, tensor.size(0), tensor.size(1), tensor.size(2))
if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
tensor = torch.cat((tensor, tensor, tensor), 1)
if normalize is True:
tensor = tensor.clone() # avoid modifying tensor in-place
if range is not None:
assert isinstance(
range, tuple
), "range has to be a tuple (min, max) if specified. min and max are numbers"
def norm_ip(img, min, max):
img.clamp_(min=min, max=max)
img.add_(-min).div_(max - min + 1e-5)
def norm_range(t, range):
if range is not None:
norm_ip(t, range[0], range[1])
else:
norm_ip(t, float(t.min()), float(t.max()))
if scale_each is True:
for t in tensor: # loop over mini-batch dimension
norm_range(t, range)
else:
norm_range(tensor, range)
if tensor.size(0) == 1:
return tensor.squeeze()
# make the mini-batch of images into a grid
nmaps = tensor.size(0)
xmaps = min(nrow, nmaps)
ymaps = int(math.ceil(float(nmaps) / xmaps))
height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
grid = tensor.new(3, height * ymaps + padding, width * xmaps + padding).fill_(
pad_value
)
k = 0
for y in irange(ymaps):
for x in irange(xmaps):
if k >= nmaps:
break
grid.narrow(1, y * height + padding, height - padding).narrow(
2, x * width + padding, width - padding
).copy_(tensor[k])
k = k + 1
return grid
def save_image(tensor, filename, nrow=4, padding=2, mean=None, std=None, save=True):
"""
Saves a given Tensor into an image file.
If given a mini-batch tensor, will save the tensor as a grid of images.
"""
from PIL import Image
tensor = tensor.cpu()
grid = make_grid(tensor, nrow=nrow, padding=10, pad_value=1)
if not mean is None:
# ndarr = grid.mul(std).add(mean).mul(255).byte().transpose(0,2).transpose(0,1).numpy()
ndarr = (
grid.mul(std)
.add(mean)
.mul(255)
.byte()
.transpose(0, 2)
.transpose(0, 1)
.numpy()
)
else:
ndarr = (
grid.mul(0.5)
.add(0.5)
.mul(255)
.byte()
.transpose(0, 2)
.transpose(0, 1)
.numpy()
)
im = Image.fromarray(ndarr)
if save is True:
im.save(filename)
return im, grid
from PIL import ImageDraw, Image, ImageFont
import json
class Draw(object):
"""Drawing helper class to visualize the neural network output"""
def __init__(self, im):
"""
:param im: The image to draw in.
"""
self.draw = ImageDraw.Draw(im)
self.width = im.size[0]
def draw_line(self, point1, point2, line_color, line_width=2):
"""Draws line on image"""
if point1 is not None and point2 is not None:
self.draw.line([point1, point2], fill=line_color, width=line_width)
def draw_dot(self, point, point_color, point_radius):
"""Draws dot (filled circle) on image"""
if point is not None:
xy = [
point[0] - point_radius,
point[1] - point_radius,
point[0] + point_radius,
point[1] + point_radius,
]
self.draw.ellipse(xy, fill=point_color, outline=point_color)
def draw_text(self, point, text, text_color):
"""Draws text on image"""
if point is not None:
self.draw.text(point, text, fill=text_color, font=ImageFont.truetype("misc/arial.ttf", self.width // 50))
def draw_cube(self, points, color=(0, 255, 0)):
"""
Draws cube with a thick solid line across
the front top edge and an X on the top face.
"""
# draw front
self.draw_line(points[0], points[1], color)
self.draw_line(points[1], points[2], color)
self.draw_line(points[3], points[2], color)
self.draw_line(points[3], points[0], color)
# draw back
self.draw_line(points[4], points[5], color)
self.draw_line(points[6], points[5], color)
self.draw_line(points[6], points[7], color)
self.draw_line(points[4], points[7], color)
# draw sides
self.draw_line(points[0], points[4], color)
self.draw_line(points[7], points[3], color)
self.draw_line(points[5], points[1], color)
self.draw_line(points[2], points[6], color)
# draw dots
self.draw_dot(points[0], point_color=color, point_radius=4)
self.draw_dot(points[1], point_color=color, point_radius=4)
# draw x on the top
self.draw_line(points[0], points[5], color)
self.draw_line(points[1], points[4], color)
# Draw center
self.draw_dot(points[8], point_color=color, point_radius=6)
for i in range(9):
self.draw_text(points[i], str(i), (255, 0, 0))