metafile.yml 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. Collections:
  2. - Name: BLIP-2
  3. Metadata:
  4. Training Data:
  5. - COCO
  6. - VG
  7. - CC3M
  8. - CC12M
  9. - SBU
  10. - LAION-400M
  11. Training Resources: 8x A100 GPUs
  12. Architecture:
  13. - Transformer
  14. - Q-Former
  15. Paper:
  16. Title: 'BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image
  17. Encoders and Large Language Models'
  18. URL: https://arxiv.org/abs/2301.12597
  19. README: configs/blip2/README.md
  20. Models:
  21. - Name: blip2_3rdparty_retrieval
  22. Metadata:
  23. FLOPs: null
  24. Parameters: 1173191358
  25. In Collection: BLIP-2
  26. Results:
  27. - Task: Image-To-Text Retrieval
  28. Dataset: COCO
  29. Metrics:
  30. Recall@1: 85.4
  31. - Task: Text-To-Image Retrieval
  32. Dataset: COCO
  33. Metrics:
  34. Recall@1: 68.3
  35. Weights: https://download.openmmlab.com/mmclassification/v1/blip2/blip2_3rdparty_pretrain_20230505-f7ef4390.pth
  36. Config: configs/blip2/blip2_8xb32_retrieval.py
  37. Converted From:
  38. Weights: https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_opt2.7b.pth
  39. Code: https://github.com/salesforce/LAVIS
  40. - Name: blip2-opt2.7b_3rdparty-zeroshot_vqa
  41. Metadata:
  42. FLOPs: null
  43. Parameters: 3770465152
  44. In Collection: BLIP-2
  45. Results:
  46. - Task: Visual Question Answering
  47. Dataset: VQAv2
  48. Metrics:
  49. Accuracy: 53.5
  50. Weights: https://download.openmmlab.com/mmclassification/v1/blip2/blip2-opt2.7b_3rdparty_pretrain_20230505-b51db4e1.pth
  51. Config: configs/blip2/blip2-opt2.7b_8xb16_vqa.py
  52. Converted From:
  53. Weights: https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_opt2.7b.pth
  54. Code: https://github.com/salesforce/LAVIS
  55. - Name: blip2-opt2.7b_3rdparty-zeroshot_caption
  56. Metadata:
  57. FLOPs: null
  58. Parameters: 3770465152
  59. In Collection: BLIP-2
  60. Results:
  61. - Task: Image Caption
  62. Dataset: COCO
  63. Metrics:
  64. BLEU-4: 32.90
  65. CIDER: 111.10
  66. Weights: https://download.openmmlab.com/mmclassification/v1/blip2/blip2-opt2.7b_3rdparty_pretrain_20230505-b51db4e1.pth
  67. Config: configs/blip2/blip2-opt2.7b_8xb32_caption.py
  68. Converted From:
  69. Weights: https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_opt2.7b.pth
  70. Code: https://github.com/salesforce/LAVIS