metafile.yml 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. Collections:
  2. - Name: BLIP
  3. Metadata:
  4. Training Data:
  5. - COCO
  6. - VG
  7. - Conceptual Captions
  8. - Conceptual 12M
  9. - SBU captions
  10. Architecture:
  11. - Transformer
  12. Training Resources: 8x A100 GPUs
  13. Paper:
  14. Title: 'BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language
  15. Understanding and Generation'
  16. URL: https://arxiv.org/abs/2201.12086
  17. README: configs/blip/README.md
  18. Models:
  19. - Name: blip-base_8xb16_refcoco
  20. Metadata:
  21. FLOPs: null
  22. Parameters: 498488636
  23. In Collection: BLIP
  24. Results:
  25. - Task: Visual Grounding
  26. Dataset: RefCOCO
  27. Metrics:
  28. Accuracy (testA): 86.14
  29. Accuracy (testB): 77.33
  30. Weights: https://download.openmmlab.com/mmclassification/v1/blip/blip-base_8xb16_refcoco_20230508-d2d10f4c.pth
  31. Config: configs/blip/blip-base_8xb16_refcoco.py
  32. - Name: blip-base_3rdparty_caption
  33. Metadata:
  34. FLOPs: null
  35. Parameters: 223971644
  36. In Collection: BLIP
  37. Results:
  38. - Dataset: COCO
  39. Task: Image Caption
  40. Metrics:
  41. BLEU-4: 40.12
  42. CIDER: 132.82
  43. Weights: https://download.openmmlab.com/mmclassification/v1/blip/blip-base_3rdparty_coco-caption_20230419-a5b71af3.pth
  44. Config: configs/blip/blip-base_8xb32_caption.py
  45. Converted From:
  46. Weights: https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP/blip_coco_caption_base.pth
  47. Code: https://github.com/salesforce/LAVIS
  48. - Name: blip-base_3rdparty_nlvr
  49. Metadata:
  50. FLOPs: null
  51. Parameters: 259372034
  52. In Collection: BLIP
  53. Results:
  54. - Task: NLVR
  55. Dataset: NLVR2
  56. Metrics:
  57. Top 1 Accuracy: 82.33
  58. Weights: https://download.openmmlab.com/mmclassification/v1/blip/blip-base_3rdparty_nlvr_20230427-3b14d33f.pth
  59. Config: configs/blip/blip-base_8xb32_nlvr.py
  60. Converted From:
  61. Weights: https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_nlvr.pth
  62. Code: https://github.com/salesforce/LAVIS
  63. - Name: blip-base_3rdparty_vqa
  64. Metadata:
  65. FLOPs: null
  66. Parameters: 361478972
  67. In Collection: BLIP
  68. Results:
  69. - Task: Visual Question Answering
  70. Dataset: VQAv2
  71. Metrics:
  72. Accuracy: 78.2
  73. Weights: https://download.openmmlab.com/mmclassification/v1/blip/blip-base_3rdparty-capflit_vqa_20230505-81488941.pth
  74. Config: configs/blip/blip-base_8xb32_vqa.py
  75. Converted From:
  76. Weights: https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth
  77. Code: https://github.com/salesforce/LAVIS
  78. - Name: blip-base_3rdparty_retrieval
  79. Metadata:
  80. FLOPs: null
  81. Parameters: 447486979
  82. In Collection: BLIP
  83. Results:
  84. - Task: Image-To-Text Retrieval
  85. Dataset: COCO
  86. Metrics:
  87. Recall@1: 82.52
  88. Recall@5: 95.34
  89. - Task: Text-To-Image Retrieval
  90. Dataset: COCO
  91. Metrics:
  92. Recall@1: 64.82
  93. Recall@5: 86.28
  94. Weights: https://download.openmmlab.com/mmclassification/v1/blip/blip-base_3rdparty_coco-retrieval_20230419-a1804d2c.pth
  95. Config: configs/blip/blip-base_8xb32_retrieval.py
  96. Converted From:
  97. Weights: https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP/blip_coco_retrieval.pth
  98. Code: https://github.com/salesforce/LAVIS