metafile.yaml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. Models:
  2. - Name: vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512
  3. In Collection: UPerNet
  4. Results:
  5. Task: Semantic Segmentation
  6. Dataset: ADE20K
  7. Metrics:
  8. mIoU: 47.71
  9. mIoU(ms+flip): 49.51
  10. Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py
  11. Metadata:
  12. Training Data: ADE20K
  13. Batch Size: 16
  14. Architecture:
  15. - ViT-B
  16. - UPerNet
  17. Training Resources: 8x V100 GPUS
  18. Memory (GB): 9.2
  19. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth
  20. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json
  21. Paper:
  22. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  23. URL: https://arxiv.org/pdf/2010.11929.pdf
  24. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  25. Framework: PyTorch
  26. - Name: vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512
  27. In Collection: UPerNet
  28. Results:
  29. Task: Semantic Segmentation
  30. Dataset: ADE20K
  31. Metrics:
  32. mIoU: 46.75
  33. mIoU(ms+flip): 48.46
  34. Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py
  35. Metadata:
  36. Training Data: ADE20K
  37. Batch Size: 16
  38. Architecture:
  39. - ViT-B
  40. - UPerNet
  41. Training Resources: 8x V100 GPUS
  42. Memory (GB): 9.2
  43. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth
  44. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json
  45. Paper:
  46. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  47. URL: https://arxiv.org/pdf/2010.11929.pdf
  48. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  49. Framework: PyTorch
  50. - Name: vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512
  51. In Collection: UPerNet
  52. Results:
  53. Task: Semantic Segmentation
  54. Dataset: ADE20K
  55. Metrics:
  56. mIoU: 47.73
  57. mIoU(ms+flip): 49.95
  58. Config: configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
  59. Metadata:
  60. Training Data: ADE20K
  61. Batch Size: 16
  62. Architecture:
  63. - ViT-B
  64. - UPerNet
  65. Training Resources: 8x V100 GPUS
  66. Memory (GB): 9.21
  67. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth
  68. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json
  69. Paper:
  70. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  71. URL: https://arxiv.org/pdf/2010.11929.pdf
  72. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  73. Framework: PyTorch
  74. - Name: vit_deit-s16_upernet_8xb2-80k_ade20k-512x512
  75. In Collection: UPerNet
  76. Results:
  77. Task: Semantic Segmentation
  78. Dataset: ADE20K
  79. Metrics:
  80. mIoU: 42.96
  81. mIoU(ms+flip): 43.79
  82. Config: configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py
  83. Metadata:
  84. Training Data: ADE20K
  85. Batch Size: 16
  86. Architecture:
  87. - DeiT-S
  88. - UPerNet
  89. Training Resources: 8x V100 GPUS
  90. Memory (GB): 4.68
  91. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth
  92. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json
  93. Paper:
  94. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  95. URL: https://arxiv.org/pdf/2010.11929.pdf
  96. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  97. Framework: PyTorch
  98. - Name: vit_deit-s16_upernet_8xb2-160k_ade20k-512x512
  99. In Collection: UPerNet
  100. Results:
  101. Task: Semantic Segmentation
  102. Dataset: ADE20K
  103. Metrics:
  104. mIoU: 42.87
  105. mIoU(ms+flip): 43.79
  106. Config: configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py
  107. Metadata:
  108. Training Data: ADE20K
  109. Batch Size: 16
  110. Architecture:
  111. - DeiT-S
  112. - UPerNet
  113. Training Resources: 8x V100 GPUS
  114. Memory (GB): 4.68
  115. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth
  116. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json
  117. Paper:
  118. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  119. URL: https://arxiv.org/pdf/2010.11929.pdf
  120. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  121. Framework: PyTorch
  122. - Name: vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512
  123. In Collection: UPerNet
  124. Results:
  125. Task: Semantic Segmentation
  126. Dataset: ADE20K
  127. Metrics:
  128. mIoU: 43.82
  129. mIoU(ms+flip): 45.07
  130. Config: configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py
  131. Metadata:
  132. Training Data: ADE20K
  133. Batch Size: 16
  134. Architecture:
  135. - DeiT-S
  136. - UPerNet
  137. Training Resources: 8x V100 GPUS
  138. Memory (GB): 5.69
  139. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth
  140. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json
  141. Paper:
  142. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  143. URL: https://arxiv.org/pdf/2010.11929.pdf
  144. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  145. Framework: PyTorch
  146. - Name: vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512
  147. In Collection: UPerNet
  148. Results:
  149. Task: Semantic Segmentation
  150. Dataset: ADE20K
  151. Metrics:
  152. mIoU: 43.52
  153. mIoU(ms+flip): 45.01
  154. Config: configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
  155. Metadata:
  156. Training Data: ADE20K
  157. Batch Size: 16
  158. Architecture:
  159. - DeiT-S
  160. - UPerNet
  161. Training Resources: 8x V100 GPUS
  162. Memory (GB): 5.69
  163. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth
  164. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json
  165. Paper:
  166. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  167. URL: https://arxiv.org/pdf/2010.11929.pdf
  168. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  169. Framework: PyTorch
  170. - Name: vit_deit-b16_upernet_8xb2-80k_ade20k-512x512
  171. In Collection: UPerNet
  172. Results:
  173. Task: Semantic Segmentation
  174. Dataset: ADE20K
  175. Metrics:
  176. mIoU: 45.24
  177. mIoU(ms+flip): 46.73
  178. Config: configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py
  179. Metadata:
  180. Training Data: ADE20K
  181. Batch Size: 16
  182. Architecture:
  183. - DeiT-B
  184. - UPerNet
  185. Training Resources: 8x V100 GPUS
  186. Memory (GB): 7.75
  187. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth
  188. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json
  189. Paper:
  190. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  191. URL: https://arxiv.org/pdf/2010.11929.pdf
  192. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  193. Framework: PyTorch
  194. - Name: vit_deit-b16_upernet_8xb2-160k_ade20k-512x512
  195. In Collection: UPerNet
  196. Results:
  197. Task: Semantic Segmentation
  198. Dataset: ADE20K
  199. Metrics:
  200. mIoU: 45.36
  201. mIoU(ms+flip): 47.16
  202. Config: configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py
  203. Metadata:
  204. Training Data: ADE20K
  205. Batch Size: 16
  206. Architecture:
  207. - DeiT-B
  208. - UPerNet
  209. Training Resources: 8x V100 GPUS
  210. Memory (GB): 7.75
  211. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth
  212. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json
  213. Paper:
  214. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  215. URL: https://arxiv.org/pdf/2010.11929.pdf
  216. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  217. Framework: PyTorch
  218. - Name: vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512
  219. In Collection: UPerNet
  220. Results:
  221. Task: Semantic Segmentation
  222. Dataset: ADE20K
  223. Metrics:
  224. mIoU: 45.46
  225. mIoU(ms+flip): 47.16
  226. Config: configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py
  227. Metadata:
  228. Training Data: ADE20K
  229. Batch Size: 16
  230. Architecture:
  231. - DeiT-B
  232. - UPerNet
  233. Training Resources: 8x V100 GPUS
  234. Memory (GB): 9.21
  235. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth
  236. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json
  237. Paper:
  238. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  239. URL: https://arxiv.org/pdf/2010.11929.pdf
  240. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  241. Framework: PyTorch
  242. - Name: vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512
  243. In Collection: UPerNet
  244. Results:
  245. Task: Semantic Segmentation
  246. Dataset: ADE20K
  247. Metrics:
  248. mIoU: 45.37
  249. mIoU(ms+flip): 47.23
  250. Config: configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
  251. Metadata:
  252. Training Data: ADE20K
  253. Batch Size: 16
  254. Architecture:
  255. - DeiT-B
  256. - UPerNet
  257. Training Resources: 8x V100 GPUS
  258. Memory (GB): 9.21
  259. Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth
  260. Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json
  261. Paper:
  262. Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale'
  263. URL: https://arxiv.org/pdf/2010.11929.pdf
  264. Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98
  265. Framework: PyTorch