metafile.yml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. Collections:
  2. - Name: CLIP
  3. Metadata:
  4. Architecture:
  5. - Attention Dropout
  6. - Convolution
  7. - Dense Connections
  8. - Dropout
  9. - GELU
  10. - Layer Normalization
  11. - Multi-Head Attention
  12. - Scaled Dot-Product Attention
  13. - Tanh Activation
  14. Paper:
  15. Title: Learning Transferable Visual Models From Natural Language Supervision
  16. URL: https://arxiv.org/abs/2103.00020
  17. README: configs/clip/README.md
  18. Code:
  19. URL: https://github.com/open-mmlab/mmpretrain/blob/main/mmpretrain/models/backbones/vision_transformer.py
  20. Version: v1.0.0
  21. Models:
  22. - Name: vit-base-p32_clip-openai-pre_3rdparty_in1k
  23. Metadata:
  24. FLOPs: 4364335104
  25. Parameters: 88225000
  26. Training Data:
  27. - OpenAI
  28. - ImageNet-1k
  29. In Collection: CLIP
  30. Results:
  31. - Dataset: ImageNet-1k
  32. Metrics:
  33. Top 1 Accuracy: 81.77
  34. Top 5 Accuracy: 95.89
  35. Task: Image Classification
  36. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p32_openai-pre_3rdparty_in1k_20221220-a0182ba9.pth
  37. Config: configs/clip/vit-base-p32_pt-64xb64_in1k.py
  38. Converted From:
  39. Code: https://github.com/rwightman/pytorch-image-models
  40. Weights: https://huggingface.co/timm/vit_base_patch32_clip_224.openai_ft_in1k
  41. - Name: vit-base-p32_clip-laion2b-pre_3rdparty_in1k
  42. Metadata:
  43. FLOPs: 4364335104
  44. Parameters: 88225000
  45. Training Data:
  46. - LAION-2B
  47. - ImageNet-1k
  48. In Collection: CLIP
  49. Results:
  50. - Dataset: ImageNet-1k
  51. Metrics:
  52. Top 1 Accuracy: 82.46
  53. Top 5 Accuracy: 96.12
  54. Task: Image Classification
  55. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p32_laion2b-pre_3rdparty_in1k_20221220-194df57f.pth
  56. Config: configs/clip/vit-base-p32_pt-64xb64_in1k.py
  57. Converted From:
  58. Code: https://github.com/rwightman/pytorch-image-models
  59. Weights: https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in1k
  60. - Name: vit-base-p32_clip-laion2b-in12k-pre_3rdparty_in1k
  61. Metadata:
  62. FLOPs: 4364335104
  63. Parameters: 88225000
  64. Training Data:
  65. - LAION-2B
  66. - ImageNet-12k
  67. - ImageNet-1k
  68. In Collection: CLIP
  69. Results:
  70. - Dataset: ImageNet-1k
  71. Metrics:
  72. Top 1 Accuracy: 83.06
  73. Top 5 Accuracy: 96.49
  74. Task: Image Classification
  75. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p32_laion2b-in12k-pre_3rdparty_in1k_20221220-b384e830.pth
  76. Config: configs/clip/vit-base-p32_pt-64xb64_in1k.py
  77. Converted From:
  78. Code: https://github.com/rwightman/pytorch-image-models
  79. Weights: https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k
  80. - Name: vit-base-p32_clip-openai-in12k-pre_3rdparty_in1k-384px
  81. Metadata:
  82. FLOPs: 12661054464
  83. Parameters: 88225000
  84. Training Data:
  85. - OpenAI
  86. - ImageNet-12k
  87. - ImageNet-1k
  88. In Collection: CLIP
  89. Results:
  90. - Dataset: ImageNet-1k
  91. Metrics:
  92. Top 1 Accuracy: 85.13
  93. Top 5 Accuracy: 97.42
  94. Task: Image Classification
  95. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p32_openai-in12k-pre_3rdparty_in1k-384px_20221220-dc2e49ea.pth
  96. Config: configs/clip/vit-base-p32_pt-64xb64_in1k-384px.py
  97. Converted From:
  98. Code: https://github.com/rwightman/pytorch-image-models
  99. Weights: https://huggingface.co/timm/vit_base_patch32_clip_384.openai_ft_in12k_in1k
  100. - Name: vit-base-p32_clip-laion2b-in12k-pre_3rdparty_in1k-384px
  101. Metadata:
  102. FLOPs: 12661054464
  103. Parameters: 88225000
  104. Training Data:
  105. - LAION-2B
  106. - ImageNet-12k
  107. - ImageNet-1k
  108. In Collection: CLIP
  109. Results:
  110. - Dataset: ImageNet-1k
  111. Metrics:
  112. Top 1 Accuracy: 85.39
  113. Top 5 Accuracy: 97.67
  114. Task: Image Classification
  115. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p32_laion2b-in12k-pre_3rdparty_in1k-384px_20221220-c7757552.pth
  116. Config: configs/clip/vit-base-p32_pt-64xb64_in1k-384px.py
  117. Converted From:
  118. Code: https://github.com/rwightman/pytorch-image-models
  119. Weights: https://huggingface.co/timm/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k
  120. - Name: vit-base-p16_clip-openai-pre_3rdparty_in1k
  121. Metadata:
  122. FLOPs: 16855600128
  123. Parameters: 86568424
  124. Training Data:
  125. - OpenAI
  126. - ImageNet-1k
  127. In Collection: CLIP
  128. Results:
  129. - Dataset: ImageNet-1k
  130. Metrics:
  131. Top 1 Accuracy: 85.3
  132. Top 5 Accuracy: 97.5
  133. Task: Image Classification
  134. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_openai-pre_3rdparty_in1k_20221220-c7d9c899.pth
  135. Config: configs/clip/vit-base-p16_pt-64xb64_in1k.py
  136. Converted From:
  137. Code: https://github.com/rwightman/pytorch-image-models
  138. Weights: https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in1k
  139. - Name: vit-base-p16_clip-laion2b-pre_3rdparty_in1k
  140. Metadata:
  141. FLOPs: 16855600128
  142. Parameters: 86568424
  143. Training Data:
  144. - LAION-2B
  145. - ImageNet-1k
  146. In Collection: CLIP
  147. Results:
  148. - Dataset: ImageNet-1k
  149. Metrics:
  150. Top 1 Accuracy: 85.49
  151. Top 5 Accuracy: 97.59
  152. Task: Image Classification
  153. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_laion2b-pre_3rdparty_in1k_20221220-5e24ff58.pth
  154. Config: configs/clip/vit-base-p16_pt-64xb64_in1k.py
  155. Converted From:
  156. Code: https://github.com/rwightman/pytorch-image-models
  157. Weights: https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in1k
  158. - Name: vit-base-p16_clip-openai-in12k-pre_3rdparty_in1k
  159. Metadata:
  160. FLOPs: 16855600128
  161. Parameters: 86568424
  162. Training Data:
  163. - OpenAI
  164. - ImageNet-12k
  165. - ImageNet-1k
  166. In Collection: CLIP
  167. Results:
  168. - Dataset: ImageNet-1k
  169. Metrics:
  170. Top 1 Accuracy: 85.99
  171. Top 5 Accuracy: 97.72
  172. Task: Image Classification
  173. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_openai-in12k-pre_3rdparty_in1k_20221220-90d930a8.pth
  174. Config: configs/clip/vit-base-p16_pt-64xb64_in1k.py
  175. Converted From:
  176. Code: https://github.com/rwightman/pytorch-image-models
  177. Weights: https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in12k_in1k
  178. - Name: vit-base-p16_clip-laion2b-in12k-pre_3rdparty_in1k
  179. Metadata:
  180. FLOPs: 16855600128
  181. Parameters: 86568424
  182. Training Data:
  183. - LAION-2B
  184. - ImageNet-12k
  185. - ImageNet-1k
  186. In Collection: CLIP
  187. Results:
  188. - Dataset: ImageNet-1k
  189. Metrics:
  190. Top 1 Accuracy: 86.02
  191. Top 5 Accuracy: 97.76
  192. Task: Image Classification
  193. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_laion2b-in12k-pre_3rdparty_in1k_20221220-a5e31f8c.pth
  194. Config: configs/clip/vit-base-p16_pt-64xb64_in1k.py
  195. Converted From:
  196. Code: https://github.com/rwightman/pytorch-image-models
  197. Weights: https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k
  198. - Name: vit-base-p32_clip-laion2b-in12k-pre_3rdparty_in1k-448px
  199. Metadata:
  200. FLOPs: 17202416640
  201. Parameters: 88225000
  202. Training Data:
  203. - LAION-2B
  204. - ImageNet-12k
  205. - ImageNet-1k
  206. In Collection: CLIP
  207. Results:
  208. - Dataset: ImageNet-1k
  209. Metrics:
  210. Top 1 Accuracy: 85.76
  211. Top 5 Accuracy: 97.63
  212. Task: Image Classification
  213. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p32_laion2b-in12k-pre_3rdparty_in1k-448px_20221220-ca404a7d.pth
  214. Config: configs/clip/vit-base-p32_pt-64xb64_in1k-448px.py
  215. Converted From:
  216. Code: https://github.com/rwightman/pytorch-image-models
  217. Weights: https://huggingface.co/timm/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k
  218. - Name: vit-base-p16_clip-openai-pre_3rdparty_in1k-384px
  219. Metadata:
  220. FLOPs: 49370078208
  221. Parameters: 86568424
  222. Training Data:
  223. - OpenAI
  224. - ImageNet-1k
  225. In Collection: CLIP
  226. Results:
  227. - Dataset: ImageNet-1k
  228. Metrics:
  229. Top 1 Accuracy: 86.25
  230. Top 5 Accuracy: 97.9
  231. Task: Image Classification
  232. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_openai-pre_3rdparty_in1k-384px_20221220-eb012e87.pth
  233. Config: configs/clip/vit-base-p16_pt-64xb64_in1k-384px.py
  234. Converted From:
  235. Code: https://github.com/rwightman/pytorch-image-models
  236. Weights: https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in1k
  237. - Name: vit-base-p16_clip-laion2b-pre_3rdparty_in1k-384px
  238. Metadata:
  239. FLOPs: 49370078208
  240. Parameters: 86568424
  241. Training Data:
  242. - LAION-2B
  243. - ImageNet-1k
  244. In Collection: CLIP
  245. Results:
  246. - Dataset: ImageNet-1k
  247. Metrics:
  248. Top 1 Accuracy: 86.52
  249. Top 5 Accuracy: 97.97
  250. Task: Image Classification
  251. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_laion2b-pre_3rdparty_in1k-384px_20221220-558ed826.pth
  252. Config: configs/clip/vit-base-p16_pt-64xb64_in1k-384px.py
  253. Converted From:
  254. Code: https://github.com/rwightman/pytorch-image-models
  255. Weights: https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in1k
  256. - Name: vit-base-p16_clip-openai-in12k-pre_3rdparty_in1k-384px
  257. Metadata:
  258. FLOPs: 49370078208
  259. Parameters: 86568424
  260. Training Data:
  261. - OpenAI
  262. - ImageNet-12k
  263. - ImageNet-1k
  264. In Collection: CLIP
  265. Results:
  266. - Dataset: ImageNet-1k
  267. Metrics:
  268. Top 1 Accuracy: 86.87
  269. Top 5 Accuracy: 98.05
  270. Task: Image Classification
  271. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_openai-in12k-pre_3rdparty_in1k-384px_20221220-8df86b74.pth
  272. Config: configs/clip/vit-base-p16_pt-64xb64_in1k-384px.py
  273. Converted From:
  274. Code: https://github.com/rwightman/pytorch-image-models
  275. Weights: https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in12k_in1k
  276. - Name: vit-base-p16_clip-laion2b-in12k-pre_3rdparty_in1k-384px
  277. Metadata:
  278. FLOPs: 49370078208
  279. Parameters: 86568424
  280. Training Data:
  281. - LAION-2B
  282. - ImageNet-12k
  283. - ImageNet-1k
  284. In Collection: CLIP
  285. Results:
  286. - Dataset: ImageNet-1k
  287. Metrics:
  288. Top 1 Accuracy: 87.17
  289. Top 5 Accuracy: 98.02
  290. Task: Image Classification
  291. Weights: https://download.openmmlab.com/mmclassification/v0/clip/clip-vit-base-p16_laion2b-in12k-pre_3rdparty_in1k-384px_20221220-84ed0cc0.pth
  292. Config: configs/clip/vit-base-p16_pt-64xb64_in1k-384px.py
  293. Converted From:
  294. Code: https://github.com/rwightman/pytorch-image-models
  295. Weights: https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k
  296. - Name: vit-large-p14_clip-openai-pre_3rdparty
  297. Metadata:
  298. FLOPs: 59696580608
  299. Parameters: 303302656
  300. Training Data:
  301. - OpenAI
  302. In Collection: CLIP
  303. Weights: https://download.openmmlab.com/mmclassification/v0/clip/vit-large-p14_clip-openai-pre_3rdparty_20230517-95e2af0b.pth
  304. Config: configs/clip/vit-large-p14_headless.py
  305. Converted From:
  306. Code: https://github.com/mlfoundations/open_clip
  307. Weights: https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt