99 lines
3.1 KiB
YAML
99 lines
3.1 KiB
YAML
services:
|
|
ollama-intel-arc:
|
|
#image: intelanalytics/ipex-llm-inference-cpp-xpu:latest
|
|
#build:
|
|
# context: ./ipex-ollama
|
|
# dockerfile: Dockerfile
|
|
# args:
|
|
# IPEXLLM_RELEASE_REPO: ipex-llm/ipex-llm
|
|
# IPEXLLM_RELEASE_VERSON: v2.3.0
|
|
# IPEXLLM_PORTABLE_ZIP_FILENAME: ollama-ipex-llm-2.2.0-ubuntu.tgz
|
|
image: intelanalytics/ipex-llm-inference-cpp-xpu:latest
|
|
#image: docker.io/library/ollama-intel-gpu-ollama-intel-gpu
|
|
container_name: ollama-intel-arc
|
|
restart: unless-stopped
|
|
devices:
|
|
#- /dev/dri:/dev/dri
|
|
- /dev/dri/renderD128:/dev/dri/renderD128
|
|
volumes:
|
|
- /tmp/.X11-unix:/tmp/.X11-unix
|
|
- ollama-volume:/root/.ollama
|
|
ports:
|
|
- 11435:11434
|
|
environment:
|
|
- OLLAMA_NUM_PARALLEL=1
|
|
- OLLAMA_DEFAULT_KEEPALIVE="6h"
|
|
- OLLAMA_KEEP_ALIVE=24h
|
|
- OLLAMA_MAX_LOADED_MODELS=2
|
|
- OLLAMA_MAX_QUEUE=512
|
|
- OLLAMA_MAX_VRAM=0
|
|
#- no_proxy=localhost,127.0.0.1
|
|
- OLLAMA_HOST=0.0.0.0
|
|
#- DEVICE=Arc
|
|
#- OLLAMA_INTEL_GPU=true
|
|
- OLLAMA_NUM_GPU=999
|
|
#- ZES_ENABLE_SYSMAN_DEVICE_INFO=1
|
|
- DISPLAY=${DISPLAY-:0}
|
|
|
|
# # Available low_bit format including sym_int4, sym_int8, fp16 etc.
|
|
- USE_XETLA=OFF
|
|
- ZES_ENABLE_SYSMAN=1
|
|
|
|
# Add some intel specific adjustments
|
|
# https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/fastchat_quickstart.md
|
|
|
|
- SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
|
- ENABLE_SDP_FUSION=1
|
|
|
|
# [optional] under most circumstances, the following environment variable may improve performance,
|
|
# but sometimes this may also cause performance degradation
|
|
- SYCL_CACHE_PERSISTENT=1
|
|
|
|
# For Intel Core™ Ultra Processors (Series 2) with processor number 2xxK or 2xxH (code name Arrow Lake):
|
|
#- IPEX_LLM_NPU_ARL=1
|
|
|
|
# For Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake):
|
|
#- IPEX_LLM_NPU_MTL=1
|
|
|
|
# Parallel workers
|
|
#- OMP_NUM_THREADS=48
|
|
|
|
command: sh -c 'mkdir -p /llm/ollama && cd /llm/ollama && init-ollama && exec ./ollama serve'
|
|
|
|
open-webui:
|
|
image: ghcr.io/open-webui/open-webui:latest
|
|
container_name: open-webui
|
|
volumes:
|
|
- open-webui-volume:/app/backend/data
|
|
depends_on:
|
|
- ollama-intel-arc
|
|
ports:
|
|
- 4040:8080
|
|
environment:
|
|
- OLLAMA_BASE_URL=http://ollama-intel-arc:11434
|
|
|
|
- WEBUI_AUTH=False
|
|
#- ENABLE_OPENAI_API=False
|
|
#- ENABLE_OLLAMA_API=True
|
|
- ENABLE_IMAGE_GENERATION=True
|
|
- IMAGE_GENERATION_ENGINE=automatic1111
|
|
- IMAGE_GENERATION_MODEL=dreamshaper_8
|
|
- IMAGE_SIZE=400x400
|
|
- IMAGE_STEPS=8
|
|
- AUTOMATIC1111_BASE_URL=http://sdnext-ipex:7860/
|
|
- AUTOMATIC1111_CFG_SCALE=2
|
|
- AUTOMATIC1111_SAMPLER=DPM++ SDE
|
|
- AUTOMATIC1111_SCHEDULER=Karras
|
|
- ENABLE_RAG_WEB_SEARCH=True
|
|
|
|
- SCARF_NO_ANALYTICS=true
|
|
- DO_NOT_TRACK=true
|
|
- ANONYMIZED_TELEMETRY=false
|
|
extra_hosts:
|
|
- host.docker.internal:host-gateway
|
|
restart: unless-stopped
|
|
|
|
volumes:
|
|
ollama-volume: {}
|
|
open-webui-volume: {}
|