Models
tenant | namespace | model name | gpu count | vram (GB) | cpu | memory (GB) | standby | state | snapshot nodes | revision | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|
gpu | pageable | pinned | ||||||||||
public | BAAI | Aquila-7B | 2 | 13.0 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 172 |
public | Deci | DeciLM-7B | 2 | 13.0 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 190 |
public | EleutherAI | pythia-12b | 2 | 14.2 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 205 |
public | OpenAssistant | oasst-sft-4-pythia-12b-epoch-3.5 | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 216 |
public | Qwen | Qwen2.5-1.5B | 1 | 8.0 | 12.0 | 18.0 | Blob | Blob | Blob | Normal | ['node2'] | 146 |
public | Qwen | Qwen2.5-7B-Instruct-1M | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 156 |
public | Qwen | Qwen2.5-7B-Instruct-GPTQ-Int8 | 1 | 14.2 | 20.0 | 30.0 | Blob | Blob | Blob | Normal | ['node2'] | 144 |
public | Qwen | Qwen2.5-Coder-1.5B-Instruct | 1 | 6.0 | 12.0 | 18.0 | Blob | Blob | Blob | Normal | ['node2'] | 1208 |
public | Qwen | Qwen2.5-Coder-14B-Instruct-GPTQ-Int8 | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 142 |
public | Qwen | Qwen2.5-Coder-3B | 1 | 10.0 | 12.0 | 18.0 | Blob | Blob | Blob | Normal | ['node2'] | 137 |
public | Qwen | Qwen2.5-Coder-7B-Instruct | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 140 |
public | Qwen | Qwen2.5-Math-1.5B | 1 | 8.0 | 12.0 | 18.0 | Blob | Blob | Blob | Normal | ['node2'] | 150 |
public | Qwen | Qwen2.5-Math-1.5B-Instruct | 1 | 8.0 | 12.0 | 20.0 | Blob | Blob | Blob | Normal | ['node2'] | 825 |
public | Qwen | Qwen2.5-Math-7B | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 154 |
public | Qwen | Qwen2.5-Math-7B-Instruct | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 841 |
public | Salesforce | codegen-2B-multi | 1 | 13.0 | 20.0 | 12.0 | Blob | Blob | Blob | Normal | ['node2'] | 239 |
public | THUDM | chatglm3-6b | 1 | 13.8 | 12.0 | 20.0 | Blob | Blob | Blob | Normal | ['node2'] | 160 |
public | THUDM | chatglm3-6b-128k | 1 | 13.8 | 12.0 | 20.0 | Blob | Blob | Blob | Normal | ['node2'] | 164 |
public | THUDM | chatglm3-6b-32k | 1 | 13.8 | 12.0 | 20.0 | Blob | Blob | Blob | Normal | ['node2'] | 162 |
public | allenai | OLMo-1B-hf | 1 | 14.6 | 12.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 219 |
public | allenai | OLMo-1B-hf_2gpu | 2 | 14.6 | 12.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 221 |
public | allenai | OLMo-7B-hf | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 223 |
public | baichuan-inc | Baichuan-7B | 2 | 13.8 | 20.0 | 60.0 | Blob | Blob | Blob | Normal | ['node2'] | 175 |
public | baichuan-inc | Baichuan2-7B-Chat | 2 | 13.8 | 20.0 | 60.0 | Blob | Blob | Blob | Normal | ['node2'] | 177 |
public | bigcode | starcoder2-3b | 1 | 13.8 | 12.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 242 |
public | bigcode | starcoder2-7b | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 244 |
public | databricks | dolly-v2-12b | 2 | 14.0 | 20.0 | 90.0 | Blob | Blob | Blob | Normal | ['node2'] | 213 |
public | deepseek-ai | DeepSeek-R1-Distill-Llama-8B | 2 | 13.8 | 20.0 | 60.0 | Blob | Blob | Blob | Normal | ['node2'] | 230 |
public | deepseek-ai | DeepSeek-R1-Distill-Qwen-1.5B | 1 | 13.0 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 232 |
public | deepseek-ai | DeepSeek-R1-Distill-Qwen-7B | 2 | 13.8 | 20.0 | 60.0 | Blob | Blob | Blob | Normal | ['node2'] | 234 |
public | deepseek-ai | deepseek-llm-7b-chat | 1 | 14.6 | 20.0 | 60.0 | Blob | Blob | Blob | Normal | ['node2'] | 226 |
public | deepseek-ai | deepseek-llm-7b-chat_2gpu | 2 | 14.2 | 20.0 | 60.0 | Blob | Blob | Blob | Normal | ['node2'] | 228 |
public | deepseek-ai | deepseek-math-7b-instruct | 2 | 13.8 | 20.0 | 60.0 | Blob | Blob | Blob | Normal | ['node2'] | 236 |
public | opt-iml-max-1.3b | 1 | 3.8 | 12.0 | 15.0 | Mem | File | Mem | Normal | ['node2'] | 127 | |
public | microsoft | Phi-3-mini-128k-instruct | 1 | 13.0 | 12.0 | 18.0 | Blob | Blob | Blob | Normal | ['node2'] | 187 |
public | microsoft | Phi-3-mini-4k-instruct | 1 | 13.0 | 12.0 | 18.0 | Blob | Blob | Blob | Normal | ['node2'] | 185 |
public | mosaicml | mpt-7b | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 180 |
public | mosaicml | mpt-7b-storywriter | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 182 |
public | nomic-ai | gpt4all-j | 2 | 13.8 | 20.0 | 50.0 | Blob | Blob | Blob | Normal | ['node2'] | 202 |
public | openai-community | gpt2-xl | 1 | 12.0 | 12.0 | 18.0 | Blob | Blob | Blob | Normal | ['node2'] | 196 |
public | openbmb | MiniCPM-2B-dpo-bf16 | 1 | 13.8 | 12.0 | 28.0 | Blob | Blob | Blob | Normal | ['node2'] | 208 |
public | openbmb | MiniCPM-2B-sft-bf16 | 1 | 9.0 | 12.0 | 24.0 | Blob | Blob | Blob | Normal | ['node2'] | 210 |
public | tiiuae | falcon-rw-7b | 2 | 13.8 | 12.0 | 80.0 | Blob | Blob | Blob | Normal | ['node2'] | 193 |
Summary
Model Count |
43 |
Required GPU Count |
66 |
Required VRAM (GB) |
864.4 GB |
Required CPU Cores |
716.0 |
Required Memory (GB) |
1805.0 GB |