Merge pull request #39 from mattcurf/ollama_portable_zip

This commit is contained in:
Matt Curfman
2025-02-21 22:31:41 -08:00
committed by GitHub
4 changed files with 44 additions and 58 deletions
+30 -8
View File
@@ -1,10 +1,32 @@
FROM intelanalytics/ipex-llm-inference-cpp-xpu:latest
FROM ubuntu:24.04
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=america/los_angeles
RUN mkdir -p /llm/ollama; \
cd /llm/ollama; \
init-ollama;
WORKDIR /llm/ollama
# Base packages
RUN apt update && \
apt install --no-install-recommends -q -y \
software-properties-common \
ca-certificates \
wget \
ocl-icd-libopencl1
COPY commands.sh /llm/ollama/commands.sh
RUN ["chmod", "+x", "/llm/ollama/commands.sh"]
ENTRYPOINT ["/llm/ollama/commands.sh"]
# Intel GPU compute user-space drivers
RUN mkdir -p /tmp/gpu && \
cd /tmp/gpu && \
wget https://github.com/oneapi-src/level-zero/releases/download/v1.19.2/level-zero_1.19.2+u24.04_amd64.deb && \
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb && \
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb && \
dpkg -i *.deb && \
rm *.deb
# Install Ollama Portable Zip
RUN cd / && \
wget https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz && \
tar xvf ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz --strip-components=1 -C /
ENV OLLAMA_HOST=0.0.0.0:11434
ENTRYPOINT ["/bin/bash", "/start-ollama.sh"]
+11 -13
View File
@@ -1,14 +1,18 @@
# ollama-intel-gpu
This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux or Windows WSL2.
This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux
## Important Note
All Ollama based ipex-llm defects should be reported directly to the ipex-llm project at https://github.com/intel/ipex-llm
## Screenshot
![screenshot](doc/screenshot.png)
# Prerequisites
* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer).
* Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows)
* Intel ARC series GPU. Tested with Intel ARC A770 16GB, Intel(R) Core(TM) Ultra 5 125H integrated GPU (Meteor Lake), and Intel(R) Core(TM) Intel Ultra 7 258V integrated GPU (Lunar Lake)
* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04.02
* Installed Docker and Docker-compose tools
* Intel ARC series GPU (tested with Intel ARC A770 16GB and Intel(R) Core(TM) Ultra 5 125H integrated GPU)
# Usage
@@ -21,16 +25,10 @@ $ cd ollama-intel-gpu
$ docker compose up
```
Windows WSL2:
```bash
$ git clone https://github.com/mattcurf/ollama-intel-gpu
$ cd ollama-intel-gpu
$ docker-compose -f docker-compose-wsl2.yml up
```
*Note:* ipex-llm now requires setting the GPU type in the docker-compose.yml file. Set variable DEVICE= to 'Arc' or 'iGPU'. See https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md for more information.
*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the intended device to use.
Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM
# References
* [https://dgpu-docs.intel.com/driver/client/overview.html](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/README.md)
* https://dgpu-docs.intel.com/driver/client/overview.html
* https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_portablze_zip_quickstart.md
-35
View File
@@ -1,35 +0,0 @@
version: "3.9"
services:
ollama-intel-gpu:
build:
context: .
dockerfile: Dockerfile
container_name: ollama-intel-gpu
image: ollama-intel-gpu:latest
restart: always
devices:
- /dev/dri:/dev/dri
- /dev/dxg:/dev/dxg
volumes:
- /usr/lib/wsl:/usr/lib/wsl
- /tmp/.X11-unix:/tmp/.X11-unix
- ollama-intel-gpu:/root/.ollama
environment:
- DISPLAY=${DISPLAY}
ollama-webui:
image: ghcr.io/open-webui/open-webui
container_name: ollama-webui
volumes:
- ollama-webui:/app/backend/data
depends_on:
- ollama-intel-gpu
ports:
- ${OLLAMA_WEBUI_PORT-3000}:8080
environment:
- OLLAMA_BASE_URL=http://ollama-intel-gpu:11434
extra_hosts:
- host.docker.internal:host-gateway
restart: unless-stopped
volumes:
ollama-webui: {}
ollama-intel-gpu: {}
+2 -1
View File
@@ -10,7 +10,8 @@ services:
volumes:
- ollama-intel-gpu:/root/.ollama
environment:
- DEVICE=Arc
- ONEAPI_DEVICE_SELECTOR=level_zero:0
- IPEX_LLM_NUM_CTX=16384
ollama-webui:
image: ghcr.io/open-webui/open-webui
container_name: ollama-webui