diff --git a/docker-compose.yml b/docker-compose.yml index 04e2fbc..b897a5c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,21 +1,63 @@ services: ollama-intel-arc: + #image: intelanalytics/ipex-llm-inference-cpp-xpu:latest + #build: + # context: ./ipex-ollama + # dockerfile: Dockerfile + # args: + # IPEXLLM_RELEASE_REPO: ipex-llm/ipex-llm + # IPEXLLM_RELEASE_VERSON: v2.3.0 + # IPEXLLM_PORTABLE_ZIP_FILENAME: ollama-ipex-llm-2.2.0-ubuntu.tgz image: intelanalytics/ipex-llm-inference-cpp-xpu:latest + #image: docker.io/library/ollama-intel-gpu-ollama-intel-gpu container_name: ollama-intel-arc restart: unless-stopped devices: - - /dev/dri:/dev/dri + #- /dev/dri:/dev/dri + - /dev/dri/renderD128:/dev/dri/renderD128 volumes: + - /tmp/.X11-unix:/tmp/.X11-unix - ollama-volume:/root/.ollama ports: - - 11434:11434 + - 11435:11434 environment: - - no_proxy=localhost,127.0.0.1 + - OLLAMA_NUM_PARALLEL=1 + - OLLAMA_DEFAULT_KEEPALIVE="6h" + - OLLAMA_KEEP_ALIVE=24h + - OLLAMA_MAX_LOADED_MODELS=2 + - OLLAMA_MAX_QUEUE=512 + - OLLAMA_MAX_VRAM=0 + #- no_proxy=localhost,127.0.0.1 - OLLAMA_HOST=0.0.0.0 - - DEVICE=Arc - - OLLAMA_INTEL_GPU=true + #- DEVICE=Arc + #- OLLAMA_INTEL_GPU=true - OLLAMA_NUM_GPU=999 + #- ZES_ENABLE_SYSMAN_DEVICE_INFO=1 + - DISPLAY=${DISPLAY-:0} + + # # Available low_bit format including sym_int4, sym_int8, fp16 etc. + - USE_XETLA=OFF - ZES_ENABLE_SYSMAN=1 + + # Add some intel specific adjustments + # https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/fastchat_quickstart.md + + - SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + - ENABLE_SDP_FUSION=1 + + # [optional] under most circumstances, the following environment variable may improve performance, + # but sometimes this may also cause performance degradation + - SYCL_CACHE_PERSISTENT=1 + + # For Intel Core™ Ultra Processors (Series 2) with processor number 2xxK or 2xxH (code name Arrow Lake): + #- IPEX_LLM_NPU_ARL=1 + + # For Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake): + #- IPEX_LLM_NPU_MTL=1 + + # Parallel workers + #- OMP_NUM_THREADS=48 + command: sh -c 'mkdir -p /llm/ollama && cd /llm/ollama && init-ollama && exec ./ollama serve' open-webui: @@ -28,9 +70,11 @@ services: ports: - 4040:8080 environment: + - OLLAMA_BASE_URL=http://ollama-intel-arc:11434 + - WEBUI_AUTH=False - - ENABLE_OPENAI_API=False - - ENABLE_OLLAMA_API=True + #- ENABLE_OPENAI_API=False + #- ENABLE_OLLAMA_API=True - ENABLE_IMAGE_GENERATION=True - IMAGE_GENERATION_ENGINE=automatic1111 - IMAGE_GENERATION_MODEL=dreamshaper_8 @@ -40,6 +84,11 @@ services: - AUTOMATIC1111_CFG_SCALE=2 - AUTOMATIC1111_SAMPLER=DPM++ SDE - AUTOMATIC1111_SCHEDULER=Karras + - ENABLE_RAG_WEB_SEARCH=True + + - SCARF_NO_ANALYTICS=true + - DO_NOT_TRACK=true + - ANONYMIZED_TELEMETRY=false extra_hosts: - host.docker.internal:host-gateway restart: unless-stopped