Vision-Language-Model/requirements.txt at main · Goutam16-Withcode/Vision-Language-Model · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Vision Language Models - Requirements
# Install with: pip install -r requirements.txt

# Core Deep Learning Stack
torch>=2.0.0
torchvision>=0.15.0
torchaudio>=2.0.0

# Transformers and Model Hub
transformers>=4.40.0
huggingface-hub>=0.19.0

# Image Processing
Pillow>=9.0.0
opencv-python>=4.8.0

# Numerical Computing
numpy>=1.24.0
scipy>=1.10.0

# Visualization
matplotlib>=3.7.0
seaborn>=0.12.0

# Data and Utilities
requests>=2.31.0
pyyaml>=6.0

# Jupyter and Notebooks
jupyter>=1.0.0
jupyterlab>=4.0.0
ipython>=8.0.0

# VLM-specific Utilities
qwen-vl-utils>=0.0.1

# Frontend (Streamlit)
streamlit>=1.28.0
streamlit-option-menu>=0.3.2

# Optional but Recommended for Performance
# Uncomment these if you have CUDA-compatible GPU

# For 8-bit quantization (memory efficient)
# bitsandbytes>=0.41.0

# For Flash Attention 2 (faster inference)
# flash-attn>=2.3.0

# Development and Debugging (Optional)
# black>=23.0.0
# pylint>=2.17.0
# ipdb>=0.13.0