Implement TripletMarginLoss
Open
long10024070
opened this issue 9 months ago
•
0 comments
- Added TripletMarginLoss foward and backward operations and kernels.
- Added driver test and gtest for TripletMarginLoss foward and backward operations.
- New API is guarded by MIOPEN_BETA_API macro.
- Compared to ROCm pytorch:
float16
| op_name |
dtype |
size |
contiguous |
reduction |
model |
direction |
ROCm pytorch |
MIOpen HIP |
Improvement |
| TripletMarginLoss |
float16 |
[65536 1] |
contiguous |
sum |
dlrm |
fwd |
65034 |
33852 |
1.92 |
| TripletMarginLoss |
float16 |
[65536 1] |
contiguous |
sum |
dlrm |
bwd |
44971 |
38021 |
1.18 |
| TripletMarginLoss |
float16 |
[65536 1] |
noncontiguous |
sum |
dlrm |
fwd |
62898 |
33858 |
1.86 |
| TripletMarginLoss |
float16 |
[65536 1] |
noncontiguous |
sum |
dlrm |
bwd |
44650 |
38009 |
1.17 |
| TripletMarginLoss |
float16 |
[65536 1] |
contiguous |
none |
dlrm |
fwd |
48480 |
24237 |
2.00 |
| TripletMarginLoss |
float16 |
[65536 1] |
contiguous |
none |
dlrm |
bwd |
47677 |
37775 |
1.26 |
| TripletMarginLoss |
float16 |
[65536 1] |
noncontiguous |
none |
dlrm |
fwd |
48715 |
24257 |
2.01 |
| TripletMarginLoss |
float16 |
[65536 1] |
noncontiguous |
none |
dlrm |
bwd |
47794 |
37756 |
1.27 |
| TripletMarginLoss |
float16 |
[837 80] |
contiguous |
sum |
yolor |
fwd |
53661 |
38085 |
1.41 |
| TripletMarginLoss |
float16 |
[837 80] |
contiguous |
sum |
yolor |
bwd |
59386 |
45335 |
1.31 |
| TripletMarginLoss |
float16 |
[837 80] |
noncontiguous |
sum |
yolor |
fwd |
70712 |
38413 |
1.84 |
| TripletMarginLoss |
float16 |
[837 80] |
noncontiguous |
sum |
yolor |
bwd |
75072 |
46313 |
1.62 |
| TripletMarginLoss |
float16 |
[837 80] |
contiguous |
none |
yolor |
fwd |
49939 |
30816 |
1.62 |
| TripletMarginLoss |
float16 |
[837 80] |
contiguous |
none |
yolor |
bwd |
61778 |
45844 |
1.35 |
| TripletMarginLoss |
float16 |
[837 80] |
noncontiguous |
none |
yolor |
fwd |
58250 |
31171 |
1.87 |
| TripletMarginLoss |
float16 |
[837 80] |
noncontiguous |
none |
yolor |
bwd |
73457 |
47787 |
1.54 |
| TripletMarginLoss |
float16 |
[800 4077] |
noncontiguous |
sum |
yolov5 |
fwd |
434657 |
294023 |
1.48 |
float32
| op_name |
dtype |
size |
contiguous |
reduction |
model |
direction |
ROCm pytorch |
MIOpen HIP |
Improvement |
| TripletMarginLoss |
float32 |
[65536 1] |
contiguous |
sum |
dlrm |
fwd |
61302 |
37046 |
1.65 |
| TripletMarginLoss |
float32 |
[65536 1] |
contiguous |
sum |
dlrm |
bwd |
44412 |
41185 |
1.08 |
| TripletMarginLoss |
float32 |
[65536 1] |
noncontiguous |
sum |
dlrm |
fwd |
59329 |
36716 |
1.62 |
| TripletMarginLoss |
float32 |
[65536 1] |
noncontiguous |
sum |
dlrm |
bwd |
43724 |
40715 |
1.07 |
| TripletMarginLoss |
float32 |
[65536 1] |
contiguous |
none |
dlrm |
fwd |
43890 |
25435 |
1.73 |
| TripletMarginLoss |
float32 |
[65536 1] |
contiguous |
none |
dlrm |
bwd |
42613 |
40474 |
1.05 |
| TripletMarginLoss |
float32 |
[65536 1] |
noncontiguous |
none |
dlrm |
fwd |
43762 |
25906 |
1.69 |
| TripletMarginLoss |
float32 |
[65536 1] |
noncontiguous |
none |
dlrm |
bwd |
42293 |
40117 |
1.05 |
| TripletMarginLoss |
float32 |
[837 80] |
contiguous |
sum |
yolor |
fwd |
50833 |
42107 |
1.21 |
| TripletMarginLoss |
float32 |
[837 80] |
contiguous |
sum |
yolor |
bwd |
61047 |
48437 |
1.26 |
| TripletMarginLoss |
float32 |
[837 80] |
noncontiguous |
sum |
yolor |
fwd |
65593 |
43276 |
1.52 |
| TripletMarginLoss |
float32 |
[837 80] |
noncontiguous |
sum |
yolor |
bwd |
72956 |
49467 |
1.47 |
| TripletMarginLoss |
float32 |
[837 80] |
contiguous |
none |
yolor |
fwd |
42565 |
33001 |
1.29 |
| TripletMarginLoss |
float32 |
[837 80] |
contiguous |
none |
yolor |
bwd |
60714 |
47565 |
1.28 |
| TripletMarginLoss |
float32 |
[837 80] |
noncontiguous |
none |
yolor |
fwd |
57894 |
33285 |
1.74 |
| TripletMarginLoss |
float32 |
[837 80] |
noncontiguous |
none |
yolor |
bwd |
72890 |
48310 |
1.51 |
| TripletMarginLoss |
float32 |
[800 4077] |
noncontiguous |
none |
yolov5 |
fwd |
392713 |
319432 |
1.23 |
bfloat16
| op_name |
dtype |
size |
contiguous |
reduction |
model |
direction |
ROCm pytorch |
MIOpen HIP |
Improvement |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
contiguous |
sum |
dlrm |
fwd |
72253 |
34144 |
2.12 |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
contiguous |
sum |
dlrm |
bwd |
51438 |
38398 |
1.34 |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
noncontiguous |
sum |
dlrm |
fwd |
69197 |
34119 |
2.03 |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
noncontiguous |
sum |
dlrm |
bwd |
50558 |
38416 |
1.32 |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
contiguous |
none |
dlrm |
fwd |
55342 |
24494 |
2.26 |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
contiguous |
none |
dlrm |
bwd |
53934 |
38079 |
1.42 |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
noncontiguous |
none |
dlrm |
fwd |
54701 |
24466 |
2.24 |
| TripletMarginLoss |
bfloat16 |
[65536 1] |
noncontiguous |
none |
dlrm |
bwd |
53966 |
38216 |
1.41 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
contiguous |
sum |
yolor |
fwd |
58366 |
38314 |
1.52 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
contiguous |
sum |
yolor |
bwd |
68077 |
45703 |
1.49 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
noncontiguous |
sum |
yolor |
fwd |
69134 |
38680 |
1.79 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
noncontiguous |
sum |
yolor |
bwd |
81965 |
46648 |
1.76 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
contiguous |
none |
yolor |
fwd |
51070 |
31024 |
1.65 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
contiguous |
none |
yolor |
bwd |
70349 |
47070 |
1.49 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
noncontiguous |
none |
yolor |
fwd |
62910 |
31579 |
1.99 |
| TripletMarginLoss |
bfloat16 |
[837 80] |
noncontiguous |
none |
yolor |
bwd |
84189 |
47273 |
1.78 |
| TripletMarginLoss |
bfloat16 |
[800 4077] |
noncontiguous |
none |
yolov5 |
fwd |
428386 |
290112 |
1.48 |
| type |
average |
| float16 |
1.74 |
| float32 |
1.56 |
| bfloat16 |
1.86 |