mirror of
https://github.com/ollama/ollama.git
synced 2026-03-27 02:58:43 +07:00
* prefer rocm v6 on windows Avoid building with v7 - more changes are needed * MLX: add header vendoring and remove go build tag This switches to using a vendoring approach for the mlx-c headers so that Go can build without requiring a cmake first. This enables building the new MLX based code by default. Every time cmake runs, the headers are refreshed, so we can easily keep them in sync when we bump mlx versions. Basic Windows and Linux support are verified. * ci: harden for flaky choco repo servers CI sometimes fails due to choco not actually installing cache. Since it just speeds up the build, we can proceed without. * review comments
302 lines
13 KiB
CMake
302 lines
13 KiB
CMake
cmake_minimum_required(VERSION 3.21)
|
|
|
|
project(Ollama C CXX)
|
|
|
|
# Handle cross-compilation on macOS: when CMAKE_OSX_ARCHITECTURES is set to a
|
|
# single architecture different from the host, override CMAKE_SYSTEM_PROCESSOR
|
|
# to match. This is necessary because CMAKE_SYSTEM_PROCESSOR defaults to the
|
|
# host architecture, but downstream projects (like MLX) use it to detect the
|
|
# target architecture.
|
|
if(CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES ";")
|
|
# Single architecture specified
|
|
if(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
|
|
message(STATUS "Cross-compiling for x86_64: overriding CMAKE_SYSTEM_PROCESSOR from ${CMAKE_SYSTEM_PROCESSOR} to x86_64")
|
|
set(CMAKE_SYSTEM_PROCESSOR "x86_64")
|
|
elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
|
|
message(STATUS "Cross-compiling for arm64: overriding CMAKE_SYSTEM_PROCESSOR from ${CMAKE_SYSTEM_PROCESSOR} to arm64")
|
|
set(CMAKE_SYSTEM_PROCESSOR "arm64")
|
|
endif()
|
|
endif()
|
|
|
|
include(CheckLanguage)
|
|
include(GNUInstallDirs)
|
|
|
|
find_package(Threads REQUIRED)
|
|
|
|
set(CMAKE_BUILD_TYPE Release)
|
|
set(BUILD_SHARED_LIBS ON)
|
|
|
|
set(CMAKE_CXX_STANDARD 17)
|
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
set(CMAKE_CXX_EXTENSIONS ON) # Recent versions of MLX Requires gnu++17 extensions to compile properly
|
|
|
|
set(GGML_BUILD ON)
|
|
set(GGML_SHARED ON)
|
|
set(GGML_CCACHE ON)
|
|
set(GGML_BACKEND_DL ON)
|
|
set(GGML_BACKEND_SHARED ON)
|
|
set(GGML_SCHED_MAX_COPIES 4)
|
|
|
|
set(GGML_LLAMAFILE ON)
|
|
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
|
set(GGML_CUDA_GRAPHS ON)
|
|
set(GGML_CUDA_FA ON)
|
|
set(GGML_CUDA_COMPRESSION_MODE default)
|
|
|
|
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
|
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
|
set(GGML_CPU_ALL_VARIANTS ON)
|
|
endif()
|
|
|
|
if(APPLE)
|
|
set(CMAKE_BUILD_RPATH "@loader_path")
|
|
set(CMAKE_INSTALL_RPATH "@loader_path")
|
|
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
|
|
endif()
|
|
|
|
set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib/ollama)
|
|
set(OLLAMA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib/ollama/${OLLAMA_RUNNER_DIR})
|
|
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR})
|
|
|
|
# Store ggml include paths for use with target_include_directories later.
|
|
# We avoid global include_directories() to prevent polluting the include path
|
|
# for other projects like MLX (whose openblas dependency has its own common.h).
|
|
set(GGML_INCLUDE_DIRS
|
|
${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src
|
|
${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include
|
|
${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu
|
|
${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx
|
|
)
|
|
|
|
add_compile_definitions(NDEBUG GGML_VERSION=0x0 GGML_COMMIT=0x0)
|
|
|
|
# Define GGML version variables for shared library SOVERSION
|
|
# These are required by ggml/src/CMakeLists.txt for proper library versioning
|
|
set(GGML_VERSION_MAJOR 0)
|
|
set(GGML_VERSION_MINOR 0)
|
|
set(GGML_VERSION_PATCH 0)
|
|
set(GGML_VERSION "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
|
|
|
|
set(GGML_CPU ON)
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
|
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
|
|
|
|
get_target_property(CPU_VARIANTS ggml-cpu MANUALLY_ADDED_DEPENDENCIES)
|
|
if(NOT CPU_VARIANTS)
|
|
set(CPU_VARIANTS "ggml-cpu")
|
|
endif()
|
|
|
|
# Apply ggml include directories to ggml targets only (not globally)
|
|
target_include_directories(ggml-base PRIVATE ${GGML_INCLUDE_DIRS})
|
|
foreach(variant ${CPU_VARIANTS})
|
|
if(TARGET ${variant})
|
|
target_include_directories(${variant} PRIVATE ${GGML_INCLUDE_DIRS})
|
|
endif()
|
|
endforeach()
|
|
|
|
install(TARGETS ggml-base ${CPU_VARIANTS}
|
|
RUNTIME_DEPENDENCIES
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
|
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
|
)
|
|
|
|
check_language(CUDA)
|
|
if(CMAKE_CUDA_COMPILER)
|
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES)
|
|
set(CMAKE_CUDA_ARCHITECTURES "native")
|
|
endif()
|
|
|
|
find_package(CUDAToolkit)
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
|
target_include_directories(ggml-cuda PRIVATE ${GGML_INCLUDE_DIRS})
|
|
install(TARGETS ggml-cuda
|
|
RUNTIME_DEPENDENCIES
|
|
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
|
|
PRE_INCLUDE_REGEXES cublas cublasLt cudart
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
|
)
|
|
endif()
|
|
|
|
set(WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX "^gfx(908|90a|1200|1201):xnack[+-]$"
|
|
CACHE STRING
|
|
"Regular expression describing AMDGPU_TARGETS not supported on Windows. Override to force building these targets. Default \"^gfx(908|90a|1200|1201):xnack[+-]$\"."
|
|
)
|
|
|
|
check_language(HIP)
|
|
if(CMAKE_HIP_COMPILER)
|
|
set(HIP_PLATFORM "amd")
|
|
|
|
if(NOT AMDGPU_TARGETS)
|
|
find_package(hip REQUIRED)
|
|
list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(94[012]|101[02]|1030|110[012]|120[01])$")
|
|
endif()
|
|
|
|
if(WIN32 AND WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX)
|
|
list(FILTER AMDGPU_TARGETS EXCLUDE REGEX ${WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX})
|
|
endif()
|
|
|
|
if(AMDGPU_TARGETS)
|
|
find_package(hip REQUIRED)
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
|
|
target_include_directories(ggml-hip PRIVATE ${GGML_INCLUDE_DIRS})
|
|
|
|
if (WIN32)
|
|
target_compile_definitions(ggml-hip PRIVATE GGML_CUDA_NO_PEER_COPY)
|
|
endif()
|
|
|
|
target_compile_definitions(ggml-hip PRIVATE GGML_HIP_NO_VMM)
|
|
|
|
install(TARGETS ggml-hip
|
|
RUNTIME_DEPENDENCY_SET rocm
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
)
|
|
install(RUNTIME_DEPENDENCY_SET rocm
|
|
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
|
|
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register roctx64 rocroller drm drm_amdgpu numa elf
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
POST_EXCLUDE_REGEXES "system32"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
)
|
|
|
|
foreach(HIP_LIB_BIN_INSTALL_DIR IN ITEMS ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR})
|
|
if(EXISTS ${HIP_LIB_BIN_INSTALL_DIR}/rocblas)
|
|
install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP)
|
|
break()
|
|
endif()
|
|
endforeach()
|
|
endif()
|
|
endif()
|
|
|
|
if(NOT APPLE)
|
|
find_package(Vulkan)
|
|
if(Vulkan_FOUND)
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-vulkan)
|
|
target_include_directories(ggml-vulkan PRIVATE ${GGML_INCLUDE_DIRS})
|
|
install(TARGETS ggml-vulkan
|
|
RUNTIME_DEPENDENCIES
|
|
PRE_INCLUDE_REGEXES vulkan
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
|
|
)
|
|
endif()
|
|
endif()
|
|
|
|
option(MLX_ENGINE "Enable MLX backend" OFF)
|
|
if(MLX_ENGINE)
|
|
message(STATUS "Setting up MLX (this takes a while...)")
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/x/imagegen/mlx)
|
|
|
|
# Find CUDA toolkit if MLX is built with CUDA support
|
|
find_package(CUDAToolkit)
|
|
|
|
# Build list of directories for runtime dependency resolution
|
|
set(MLX_RUNTIME_DIRS ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR})
|
|
# Add cuDNN bin paths for DLLs (Windows MLX CUDA builds)
|
|
# CUDNN_ROOT_DIR is the standard CMake variable for cuDNN location
|
|
if(DEFINED ENV{CUDNN_ROOT_DIR})
|
|
# cuDNN 9.x has versioned subdirectories under bin/ (e.g., bin/13.0/)
|
|
file(GLOB CUDNN_BIN_SUBDIRS "$ENV{CUDNN_ROOT_DIR}/bin/*")
|
|
list(APPEND MLX_RUNTIME_DIRS ${CUDNN_BIN_SUBDIRS})
|
|
endif()
|
|
# Add build output directory and MLX dependency build directories
|
|
list(APPEND MLX_RUNTIME_DIRS ${OLLAMA_BUILD_DIR})
|
|
# OpenBLAS DLL location (pre-built zip extracts into openblas-src/bin/)
|
|
list(APPEND MLX_RUNTIME_DIRS ${CMAKE_BINARY_DIR}/_deps/openblas-src/bin)
|
|
# NCCL: on Linux, if real NCCL is found, cmake bundles libnccl.so via the
|
|
# regex below. If NCCL is not found, MLX links a static stub (OBJECT lib)
|
|
# so there is no runtime dependency. This path covers the stub build dir
|
|
# for windows so we include the DLL in our dependencies.
|
|
list(APPEND MLX_RUNTIME_DIRS ${CMAKE_BINARY_DIR}/_deps/mlx-build/mlx/distributed/nccl/nccl_stub-prefix/src/nccl_stub-build/Release)
|
|
|
|
# Base regexes for runtime dependencies (cross-platform)
|
|
set(MLX_INCLUDE_REGEXES cublas cublasLt cudart cufft nvrtc nvrtc-builtins cudnn nccl openblas gfortran)
|
|
# On Windows, also include dl.dll (dlfcn-win32 POSIX emulation layer)
|
|
if(WIN32)
|
|
list(APPEND MLX_INCLUDE_REGEXES "^dl\\.dll$")
|
|
endif()
|
|
|
|
install(TARGETS mlx mlxc
|
|
RUNTIME_DEPENDENCIES
|
|
DIRECTORIES ${MLX_RUNTIME_DIRS}
|
|
PRE_INCLUDE_REGEXES ${MLX_INCLUDE_REGEXES}
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
|
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
|
)
|
|
|
|
# Install the Metal library for macOS arm64 (must be colocated with the binary)
|
|
# Metal backend is only built for arm64, not x86_64
|
|
if(APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
|
|
install(FILES ${CMAKE_BINARY_DIR}/_deps/mlx-build/mlx/backend/metal/kernels/mlx.metallib
|
|
DESTINATION ${OLLAMA_INSTALL_DIR}
|
|
COMPONENT MLX)
|
|
endif()
|
|
|
|
# Install CCCL headers for NVRTC JIT compilation at runtime.
|
|
# MLX's own install rules use the default component so they get skipped by
|
|
# --component MLX. Headers are installed alongside libmlx in OLLAMA_INSTALL_DIR.
|
|
# On Linux, MLX's jit_module.cpp resolves CCCL via
|
|
# current_binary_dir().parent_path() / "include" / "cccl", so we create a
|
|
# symlink from lib/ollama/include -> ${OLLAMA_RUNNER_DIR}/include
|
|
# This will need refinement if we add multiple CUDA versions for MLX in the future.
|
|
if(EXISTS ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda)
|
|
install(DIRECTORY ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda
|
|
DESTINATION ${OLLAMA_INSTALL_DIR}/include/cccl
|
|
COMPONENT MLX)
|
|
install(DIRECTORY ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/nv
|
|
DESTINATION ${OLLAMA_INSTALL_DIR}/include/cccl
|
|
COMPONENT MLX)
|
|
if(NOT WIN32 AND NOT APPLE)
|
|
install(CODE "
|
|
set(_link \"${CMAKE_INSTALL_PREFIX}/lib/ollama/include\")
|
|
set(_target \"${OLLAMA_RUNNER_DIR}/include\")
|
|
if(NOT EXISTS \${_link})
|
|
execute_process(COMMAND \${CMAKE_COMMAND} -E create_symlink \${_target} \${_link})
|
|
endif()
|
|
" COMPONENT MLX)
|
|
endif()
|
|
endif()
|
|
|
|
# On Windows, explicitly install dl.dll (dlfcn-win32 POSIX dlopen emulation)
|
|
# RUNTIME_DEPENDENCIES auto-excludes it via POST_EXCLUDE_FILES_STRICT because
|
|
# dlfcn-win32 is a known CMake target with its own install rules (which install
|
|
# to the wrong destination). We must install it explicitly here.
|
|
if(WIN32)
|
|
install(FILES ${OLLAMA_BUILD_DIR}/dl.dll
|
|
DESTINATION ${OLLAMA_INSTALL_DIR}
|
|
COMPONENT MLX)
|
|
endif()
|
|
|
|
# Manually install CUDA runtime libraries that MLX loads via dlopen
|
|
# (not detected by RUNTIME_DEPENDENCIES since they aren't link-time deps)
|
|
if(CUDAToolkit_FOUND)
|
|
file(GLOB MLX_CUDA_LIBS
|
|
"${CUDAToolkit_LIBRARY_DIR}/libcudart.so*"
|
|
"${CUDAToolkit_LIBRARY_DIR}/libcublas.so*"
|
|
"${CUDAToolkit_LIBRARY_DIR}/libcublasLt.so*"
|
|
"${CUDAToolkit_LIBRARY_DIR}/libnvrtc.so*"
|
|
"${CUDAToolkit_LIBRARY_DIR}/libnvrtc-builtins.so*"
|
|
"${CUDAToolkit_LIBRARY_DIR}/libcufft.so*"
|
|
"${CUDAToolkit_LIBRARY_DIR}/libcudnn.so*")
|
|
if(MLX_CUDA_LIBS)
|
|
install(FILES ${MLX_CUDA_LIBS}
|
|
DESTINATION ${OLLAMA_INSTALL_DIR}
|
|
COMPONENT MLX)
|
|
endif()
|
|
endif()
|
|
endif()
|