diff --git a/Dockerfile b/Dockerfile index 69a065d..3253c79 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,10 @@ -FROM intelanalytics/ipex-llm-inference-cpp-xpu:2.2.0-SNAPSHOT - -ENV ZES_ENABLE_SYSMAN=1 -ENV OLLAMA_HOST=0.0.0.0:11434 +FROM intelanalytics/ipex-llm-inference-cpp-xpu:latest RUN mkdir -p /llm/ollama; \ cd /llm/ollama; \ init-ollama; - WORKDIR /llm/ollama -ENTRYPOINT ["./ollama", "serve"] +COPY commands.sh /llm/ollama/commands.sh +RUN ["chmod", "+x", "/llm/ollama/commands.sh"] +ENTRYPOINT ["/llm/ollama/commands.sh"] diff --git a/README.md b/README.md index de0800a..cfac50b 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,6 @@ This repo illustrates the use of Ollama with support for Intel ARC GPU based via * Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows) * Intel ARC series GPU. Tested with Intel ARC A770 16GB, Intel(R) Core(TM) Ultra 5 125H integrated GPU (Meteor Lake), and Intel(R) Core(TM) Intel Ultra 7 258V integrated GPU (Lunar Lake) -*Note:* This branch uses the upstream ipex container published by Intel. See the alternate branch [alternate_base_image](https://github.com/mattcurf/ollama-intel-gpu/tree/alternate_base_image) for an equivalent Dockerfile which builds everything from the published packages directly. - # Usage The following will build the Ollama with Intel ARC GPU support, and compose those with the public docker image based on OpenWEB UI from https://github.com/open-webui/open-webui @@ -30,13 +28,9 @@ $ cd ollama-intel-gpu $ docker-compose -f docker-compose-wsl2.yml up ``` -*Note:* you will see the following message. This is expected and harmless, as the docker image 'ollama-intel-gpu' is built locally. -``` -ollama-intel-gpu Warning pull access denied for ollama-intel-gpu, repository does not exist or may require 'docker login': denied: requested access to the resource is denied -``` +*Note:* ipex-llm now requires setting the GPU type in the docker-compose.yml file. Set variable DEVICE= to 'Arc' or 'iGPU'. See https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md for more information. Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM # References -* https://dgpu-docs.intel.com/driver/client/overview.html -* https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html +* [https://dgpu-docs.intel.com/driver/client/overview.html](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/README.md) diff --git a/commands.sh b/commands.sh new file mode 100644 index 0000000..8d7df6d --- /dev/null +++ b/commands.sh @@ -0,0 +1,5 @@ +#!/bin/bash +source ipex-llm-init --gpu --device $DEVICE +export OLLAMA_HOST=0.0.0.0:11434 +cd /llm/ollama +./ollama serve diff --git a/docker-compose.yml b/docker-compose.yml index f06a16d..be050d2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,19 +1,16 @@ -version: "3.9" services: ollama-intel-gpu: build: context: . dockerfile: Dockerfile container_name: ollama-intel-gpu - image: ollama-intel-gpu:latest restart: always devices: - /dev/dri:/dev/dri volumes: - - /tmp/.X11-unix:/tmp/.X11-unix - ollama-intel-gpu:/root/.ollama environment: - - DISPLAY=${DISPLAY} + - DEVICE=Arc ollama-webui: image: ghcr.io/open-webui/open-webui container_name: ollama-webui