# Coqyright 2025 Arniiiii lg3dx6fd@gmail.com and wadewilson619 at discord # Distributed under the terms of the GNU General Public License v2 EAPI=8 ROCM_VERSION=6.3 inherit cmake-multilib cuda rocm DESCRIPTION="Inference of Meta's LLaMA model (and others) in pure C/C++" HOMEPAGE="https://github.com/ggerganov/llama.cpp" MY_PV="b${PV#0_pre}" SRC_URI="https://github.com/ggerganov/llama.cpp/archive/refs/tags/${MY_PV}.tar.gz -> llama.cpp-${MY_PV}.tar.gz" LICENSE="MIT" SLOT="0" KEYWORDS="~amd64 ~x86 ~x86-macos ~x64-macos ~arm ~arm64 ~arm64-macos ~riscv ~loong" # CPU_FLAGS_x86_fma doesn't exist, thus place everything here. IUSE=" static lto test examples +server curl hbm android msvc +accelerate blas blis +llamafile cann musa cuda cuda_force_mmq cuda_force_cublas +cuda_unified_memory cuda_f16 cuda_no_peer_copy cuda_no_vmm cuda_fa_all_quants +cuda_graphs hip hip_graphs +hip_no_vmm hip_uma vulkan vulkan_check_results vulkan_debug vulkan_memory_debug vulkan_shader_debug_info vulkan_perf vulkan_validate vulkan_run_tests kompute +openmp rpc opencl opencl_profiling +opencl_embed_kernels +opencl_use_adreno_kernels metal metal_use_bf16 metal_ndebug metal_shader_debug +metal_embed_library +cpu cpu_native cpu_flags_x86_avx cpu_flags_x86_avx_vnni cpu_flags_x86_avx2 cpu_flags_x86_avx512 cpu_flags_x86_avx512_vbmi cpu_flags_x86_avx512_vnni cpu_flags_x86_avx512_bf16 cpu_flags_x86_fma cpu_flags_x86_f16c cpu_flags_x86_amx_tile cpu_flags_x86_amx_int8 cpu_flags_x86_amx_bf16 cpu_flags_x86_sse cpu_flags_x86_sse2 cpu_flags_x86_sse3 cpu_flags_x86_sse4 cpu_flags_x86_sse4a cpu_flags_x86_sse41 cpu_flags_x86_sse42 cpu_flags_x86_ssse3 cpu_flags_loong_lasx cpu_flags_loong_lsx cpu_flags_riscv_rvv " # since this is too hard to do # sycl # sycl_f16 # sycl_target_nvidia # sycl_target_amdgpu # sycl_target_intelgpu # sycl_via_oneapi # sycl_via_onemkl # " # in MSVC F16C and FMA is implied with AVX2/AVX512 # MSVC does not seem to support AMX # android stuff added according to their docs. # a lot of !flag ( !subflags ) statements placed for binpkg correctness REQUIRED_USE=" blis? ( blas ) android? ( !llamafile !openmp ) msvc? ( !cpu_flags_x86_fma !cpu_flags_x86_f16c !cpu_flags_x86_amx_tile !cpu_flags_x86_amx_int8 !cpu_flags_x86_amx_bf16 ) !cuda? ( !cuda_force_mmq !cuda_force_cublas !cuda_unified_memory !cuda_f16 !cuda_no_peer_copy !cuda_no_vmm !cuda_fa_all_quants !cuda_graphs ) !hip? ( !hip_graphs !hip_no_vmm !hip_uma ) !vulkan? ( !vulkan_check_results !vulkan_debug !vulkan_memory_debug !vulkan_shader_debug_info !vulkan_perf !vulkan_validate !vulkan_run_tests ) !opencl? ( !opencl_profiling !opencl_embed_kernels !opencl_use_adreno_kernels ) !cpu? ( !cpu_flags_x86_avx !cpu_flags_x86_avx_vnni !cpu_flags_x86_avx2 !cpu_flags_x86_avx512 !cpu_flags_x86_avx512_vbmi !cpu_flags_x86_avx512_vnni !cpu_flags_x86_avx512_bf16 !cpu_flags_x86_fma !cpu_flags_x86_f16c !cpu_flags_x86_amx_tile !cpu_flags_x86_amx_int8 !cpu_flags_x86_amx_bf16 !cpu_flags_x86_sse !cpu_flags_x86_sse2 !cpu_flags_x86_sse3 !cpu_flags_x86_sse4 !cpu_flags_x86_sse4a !cpu_flags_x86_sse41 !cpu_flags_x86_sse42 !cpu_flags_x86_ssse3 !cpu_flags_loong_lasx !cpu_flags_loong_lsx !cpu_flags_riscv_rvv ) " DEPEND=" blas? ( virtual/blas ) cuda? ( dev-util/nvidia-cuda-toolkit ) blis? ( sci-libs/blis ) opencl? ( virtual/opencl ) curl? ( net-misc/curl ) " # since this is too hard right now. # sycl_target_nvidia? ( dev-util/nvidia-cuda-toolkit ) # sycl_target_amdgpu? ( dev-util/nvidia-cuda-toolkit ) # sycl_target_intelgpu? ( dev-util/nvidia-cuda-toolkit ) # " RDEPEND="${DEPEND}" BDEPEND="${DEPEND}" PATCHES=( "${FILESDIR}/0000_add_sse_flags.patch" "${FILESDIR}/0001_GGML_CANN_option_has_to_do_something.patch" ) S="${WORKDIR}/llama.cpp-${MY_PV}" src_prepare() { if use cuda; then cuda_src_prepare fi cmake_src_prepare } src_configure() { if use hip; then HIPCC=$(hipconfig -l)/clang HIPCXX=$(hipconfig -l)/clang++ # export DEVICE_LIB_PATH=${EPREFIX}/usr/lib/amdgcn/bitcode # not sure what to do with that HIP_PATH=$(hipconfig -R) fi local mycmakeargs=( -DGGML_LTO="$(usex lto ON OFF)" # add these via user's /etc/portage/make.conf as i.e.`-fsanitize=address` -DLLAMA_SANITIZE_THREAD=OFF -DLLAMA_SANITIZE_ADDRESS=OFF -DLLAMA_SANITIZE_UNDEFINED=OFF -DLLAMA_CURL="$(usex curl ON OFF)" -DLLAMA_BUILD_TESTS=$(usex test ON OFF) -DLLAMA_BUILD_EXAMPLES=$(usex examples ON OFF) -DLLAMA_BUILD_SERVER=$(usex server ON OFF) -DLLAMA_BUILD_COMMON=ON # -DLLAMA_BUILD_SERVER=OFF # why # -DCMAKE_SKIP_BUILD_RPATH=ON # why? -DBUILD_NUMBER="${MY_PV}" # -DCMAKE_INSTALL_PREFIX=${EPREFIX}/opt/${PN} # why would you need that? # -DCMAKE_CUDA_ARCHITECTURES="75" # I guess this should be set by user. -DBUILD_SHARED_LIBS=$(usex static OFF ON) -DGGML_CPU=$(usex cpu ON OFF) -DGGML_NATIVE=$(usex cpu_native ON OFF) -DGGML_CPU_AARCH64=$(usex arm64 ON OFF) -DGGML_CPU_HBM=$(usex hbm ON OFF) -DGGML_AVX=$(usex cpu_flags_x86_avx ON OFF) -DGGML_AVX_VNNI=$(usex cpu_flags_x86_avx_vnni ON OFF) -DGGML_AVX2=$(usex cpu_flags_x86_avx2 ON OFF) -DGGML_AVX512=$(usex cpu_flags_x86_avx512 ON OFF) -DGGML_AVX512_VBMI=$(usex cpu_flags_x86_avx512_vbmi ON OFF) -DGGML_AVX512_VNNI=$(usex cpu_flags_x86_avx512_vnni ON OFF) -DGGML_AVX512_BF16=$(usex cpu_flags_x86_avx512_bf16 ON OFF) -DGGML_FMA=$(usex cpu_flags_x86_fma ON OFF) -DGGML_F16C=$(usex cpu_flags_x86_f16c ON OFF) -DGGML_AMX_TILE=$(usex cpu_flags_x86_amx_tile ON OFF) -DGGML_AMX_INT8=$(usex cpu_flags_x86_amx_int8 ON OFF) -DGGML_AMX_BF16=$(usex cpu_flags_x86_amx_bf16 ON OFF) -DGGML_SSE=$(usex cpu_flags_x86_sse ON OFF) -DGGML_SSE2=$(usex cpu_flags_x86_sse2 ON OFF) -DGGML_SSE3=$(usex cpu_flags_x86_sse3 ON OFF) -DGGML_SSE4=$(usex cpu_flags_x86_sse4 ON OFF) -DGGML_SSE4A=$(usex cpu_flags_x86_sse4a ON OFF) -DGGML_SSE41=$(usex cpu_flags_x86_sse41 ON OFF) -DGGML_SSE42=$(usex cpu_flags_x86_sse42 ON OFF) -DGGML_SSSE3=$(usex cpu_flags_x86_ssse3 ON OFF) -DGGML_LASX=$(usex cpu_flags_loong_lasx ON OFF) -DGGML_LSX=$(usex cpu_flags_loong_lsx ON OFF) -DGGML_RVV=$(usex cpu_flags_riscv_rvv ON OFF) -DGGML_ACCELERATE=$(usex accelerate ON OFF) -DGGML_BLAS=$(usex blas ON OFF) -DGGML_CANN=$(usex cann ON OFF) -DGGML_LLAMAFILE=$(usex llamafile ON OFF) -DGGML_MUSA=$(usex musa ON OFF) -DGGML_CUDA=$(usex cuda ON OFF) -DGGML_CUDA_FORCE_MMQ=$(usex cuda_force_mmq ON OFF) -DGGML_CUDA_FORCE_CUBLAS=$(usex cuda_force_cublas ON OFF) -DGGML_CUDA_F16=$(usex cuda_f16 ON OFF) -DGGML_CUDA_NO_PEER_COPY=$(usex cuda_no_peer_copy ON OFF) -DGGML_CUDA_NO_VMM=$(usex cuda_no_vmm ON OFF) -DGGML_CUDA_FA_ALL_QUANTS=$(usex cuda_fa_all_quants ON OFF) -DGGML_CUDA_GRAPHS=$(usex cuda_graphs ON OFF) # CPU+GPU Unified Memory -DGGML_CUDA_ENABLE_UNIFIED_MEMORY=$(usex cuda_unified_memory 1 0) -DGGML_HIP=$(usex hip ON OFF) -DGGML_HIP_GRAPHS=$(usex hip_graphs ON OFF) -DGGML_HIP_NO_VMM=$(usex hip_no_vmm ON OFF) -DGGML_HIP_UMA=$(usex hip_uma ON OFF) -DGGML_VULKAN=$(usex vulkan ON OFF) -DGGML_VULKAN_CHECK_RESULTS=$(usex vulkan_check_results ON OFF) -DGGML_VULKAN_DEBUG=$(usex vulkan_debug ON OFF) -DGGML_VULKAN_MEMORY_DEBUG=$(usex vulkan_memory_debug ON OFF) -DGGML_VULKAN_SHADER_DEBUG_INFO=$(usex vulkan_shader_debug_info ON OFF) -DGGML_VULKAN_PERF=$(usex vulkan_perf ON OFF) -DGGML_VULKAN_VALIDATE=$(usex vulkan_validate ON OFF) -DGGML_VULKAN_RUN_TESTS=$(usex vulkan_run_tests ON OFF) -DGGML_KOMPUTE=$(usex kompute ON OFF) -DGGML_METAL=$(usex metal ON OFF) -DGGML_METAL_USE_BF16=$(usex metal_use_bf16 ON OFF) -DGGML_METAL_NDEBUG=$(usex metal_ndebug ON OFF) -DGGML_METAL_SHADER_DEBUG=$(usex metal_shader_debug ON OFF) -DGGML_METAL_EMBED_LIBRARY=$(usex metal_embed_library ON OFF) -DGGML_OPENMP=$(usex openmp ON OFF) -DGGML_RPC=$(usex rpc ON OFF) -DGGML_SYCL=OFF # -DGGML_SYCL=$(usex sycl ON OFF) # -DGGML_SYCL_F16=$(usex sycl_f16 ON OFF) -DGGML_OPENCL=$(usex opencl ON OFF) -DGGML_OPENCL_PROFILING=$(usex opencl_profiling ON OFF) -DGGML_OPENCL_EMBED_KERNELS=$(usex opencl_embed_kernels ON OFF) -DGGML_OPENCL_USE_ADRENO_KERNELS=$(usex opencl_use_adreno_kernels ON OFF) # -DGGML_BUILD_TESTS=$(usex test ON OFF) # broken option # -DGGML_BUILD_EXAMPLES=$(usex examples ON OFF) # broken option # Gentoo users enable ccache via e.g. FEATURES=ccache or # other means. We don't want the build system to enable it for us. -DGGML_CCACHE=OFF # defaults aren't so good --log-level=DEBUG -DFETCHCONTENT_QUIET=OFF ) if use blis; then mycmakeargs+=( -DGGML_BLAD_VENDOR=FLAME ) fi if use hip; then mycmakeargs+=( -DAMDGPU_TARGETS=$(get_amdgpu_flags) ) fi cmake-multilib_src_configure } src_test() { if use cuda; then addpredict /dev/nvidiactl # we need write access to this to run the tests addwrite /dev/nvidia0 addwrite /dev/nvidiactl addwrite /dev/nvidia-uvm addwrite /dev/nvidia-uvm-tools fi if use hip; then check_amdgpu fi # cd "${BUILD_DIR}" || die # why this exists? export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:"${BUILD_DIR}/bin" cmake-multilib_src_test } # TODO : Add install functionality for all the binaries in "build/bin" dir with install destination being "/opt/llama.cpp/"