diff --git a/Dockerfile b/Dockerfile index 3253c79..cf5fa32 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,32 @@ -FROM intelanalytics/ipex-llm-inference-cpp-xpu:latest +FROM ubuntu:24.04 +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=america/los_angeles -RUN mkdir -p /llm/ollama; \ - cd /llm/ollama; \ - init-ollama; -WORKDIR /llm/ollama +# Base packages +RUN apt update && \ + apt install --no-install-recommends -q -y \ + software-properties-common \ + ca-certificates \ + wget \ + ocl-icd-libopencl1 -COPY commands.sh /llm/ollama/commands.sh -RUN ["chmod", "+x", "/llm/ollama/commands.sh"] -ENTRYPOINT ["/llm/ollama/commands.sh"] +# Intel GPU compute user-space drivers +RUN mkdir -p /tmp/gpu && \ + cd /tmp/gpu && \ + wget https://github.com/oneapi-src/level-zero/releases/download/v1.19.2/level-zero_1.19.2+u24.04_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb && \ + dpkg -i *.deb && \ + rm *.deb + +# Install Ollama Portable Zip +RUN cd / && \ + wget https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz && \ + tar xvf ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz --strip-components=1 -C / + +ENV OLLAMA_HOST=0.0.0.0:11434 + +ENTRYPOINT ["/bin/bash", "/start-ollama.sh"] diff --git a/README.md b/README.md index cfac50b..322ee59 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,19 @@ # ollama-intel-gpu -This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux or Windows WSL2. +This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux + +## Important Note + +All Ollama based ipex-llm defects should be reported directly to the ipex-llm project at https://github.com/intel/ipex-llm ## Screenshot ![screenshot](doc/screenshot.png) # Prerequisites -* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer). -* Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows) -* Intel ARC series GPU. Tested with Intel ARC A770 16GB, Intel(R) Core(TM) Ultra 5 125H integrated GPU (Meteor Lake), and Intel(R) Core(TM) Intel Ultra 7 258V integrated GPU (Lunar Lake) - +* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04.02 +* Installed Docker and Docker-compose tools +* Intel ARC series GPU (tested with Intel ARC A770 16GB and Intel(R) Core(TM) Ultra 5 125H integrated GPU) + # Usage The following will build the Ollama with Intel ARC GPU support, and compose those with the public docker image based on OpenWEB UI from https://github.com/open-webui/open-webui @@ -21,16 +25,10 @@ $ cd ollama-intel-gpu $ docker compose up ``` -Windows WSL2: -```bash -$ git clone https://github.com/mattcurf/ollama-intel-gpu -$ cd ollama-intel-gpu -$ docker-compose -f docker-compose-wsl2.yml up -``` - -*Note:* ipex-llm now requires setting the GPU type in the docker-compose.yml file. Set variable DEVICE= to 'Arc' or 'iGPU'. See https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md for more information. +*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the intended device to use. Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM # References -* [https://dgpu-docs.intel.com/driver/client/overview.html](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/README.md) +* https://dgpu-docs.intel.com/driver/client/overview.html +* https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_portablze_zip_quickstart.md diff --git a/docker-compose-wsl2.yml b/docker-compose-wsl2.yml deleted file mode 100644 index 7efe6c6..0000000 --- a/docker-compose-wsl2.yml +++ /dev/null @@ -1,35 +0,0 @@ -version: "3.9" -services: - ollama-intel-gpu: - build: - context: . - dockerfile: Dockerfile - container_name: ollama-intel-gpu - image: ollama-intel-gpu:latest - restart: always - devices: - - /dev/dri:/dev/dri - - /dev/dxg:/dev/dxg - volumes: - - /usr/lib/wsl:/usr/lib/wsl - - /tmp/.X11-unix:/tmp/.X11-unix - - ollama-intel-gpu:/root/.ollama - environment: - - DISPLAY=${DISPLAY} - ollama-webui: - image: ghcr.io/open-webui/open-webui - container_name: ollama-webui - volumes: - - ollama-webui:/app/backend/data - depends_on: - - ollama-intel-gpu - ports: - - ${OLLAMA_WEBUI_PORT-3000}:8080 - environment: - - OLLAMA_BASE_URL=http://ollama-intel-gpu:11434 - extra_hosts: - - host.docker.internal:host-gateway - restart: unless-stopped -volumes: - ollama-webui: {} - ollama-intel-gpu: {} diff --git a/docker-compose.yml b/docker-compose.yml index be050d2..117bac2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,8 @@ services: volumes: - ollama-intel-gpu:/root/.ollama environment: - - DEVICE=Arc + - ONEAPI_DEVICE_SELECTOR=level_zero:0 + - IPEX_LLM_NUM_CTX=16384 ollama-webui: image: ghcr.io/open-webui/open-webui container_name: ollama-webui