Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
b97e6fc812
5 changed files with 15 additions and 10 deletions
|
@ -24,7 +24,7 @@
|
||||||
useOpenCL
|
useOpenCL
|
||||||
useRocm
|
useRocm
|
||||||
useVulkan
|
useVulkan
|
||||||
],
|
] && blas.meta.available,
|
||||||
useCuda ? config.cudaSupport,
|
useCuda ? config.cudaSupport,
|
||||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
||||||
useMpi ? false, # Increases the runtime closure size by ~700M
|
useMpi ? false, # Increases the runtime closure size by ~700M
|
||||||
|
@ -67,10 +67,15 @@ let
|
||||||
strings.optionalString (suffices != [ ])
|
strings.optionalString (suffices != [ ])
|
||||||
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||||
|
|
||||||
|
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
||||||
|
|
||||||
# TODO: package the Python in this repository in a Nix-like way.
|
# TODO: package the Python in this repository in a Nix-like way.
|
||||||
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
||||||
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
||||||
# https://peps.python.org/pep-0517/
|
# https://peps.python.org/pep-0517/
|
||||||
|
#
|
||||||
|
# TODO: Package up each Python script or service appropriately, by making
|
||||||
|
# them into "entrypoints"
|
||||||
llama-python = python3.withPackages (
|
llama-python = python3.withPackages (
|
||||||
ps: [
|
ps: [
|
||||||
ps.numpy
|
ps.numpy
|
||||||
|
@ -159,11 +164,6 @@ effectiveStdenv.mkDerivation (
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
substituteInPlace ./ggml-metal.m \
|
substituteInPlace ./ggml-metal.m \
|
||||||
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||||
|
|
||||||
# TODO: Package up each Python script or service appropriately.
|
|
||||||
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
|
|
||||||
# we could make those *.py into setuptools' entrypoints
|
|
||||||
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
|
|
||||||
'';
|
'';
|
||||||
|
|
||||||
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
||||||
|
@ -244,8 +244,8 @@ effectiveStdenv.mkDerivation (
|
||||||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||||
# if they haven't been added yet.
|
# if they haven't been added yet.
|
||||||
postInstall = ''
|
postInstall = ''
|
||||||
mv $out/bin/main $out/bin/llama
|
mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
|
||||||
mv $out/bin/server $out/bin/llama-server
|
mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
|
||||||
mkdir -p $out/include
|
mkdir -p $out/include
|
||||||
cp $src/llama.h $out/include/
|
cp $src/llama.h $out/include/
|
||||||
'';
|
'';
|
||||||
|
|
|
@ -6,7 +6,7 @@ for more information, please go to [Meituan-AutoML/MobileVLM](https://github.com
|
||||||
|
|
||||||
The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
|
The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
|
||||||
|
|
||||||
Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobiVLM as an example, the different conversion step will be shown.
|
Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobileVLM as an example, the different conversion step will be shown.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
Build with cmake or run `make llava-cli` to build it.
|
Build with cmake or run `make llava-cli` to build it.
|
||||||
|
|
|
@ -296,7 +296,9 @@ These options help improve the performance and memory usage of the LLaMA models.
|
||||||
|
|
||||||
### Batch Size
|
### Batch Size
|
||||||
|
|
||||||
- `-b N, --batch-size N`: Set the batch size for prompt processing (default: 512). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations.
|
- `-b N, --batch-size N`: Set the batch size for prompt processing (default: `2048`). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations.
|
||||||
|
|
||||||
|
- `-ub N`, `--ubatch-size N`: physical maximum batch size. This is for pipeline parallelization. Default: `512`.
|
||||||
|
|
||||||
### Prompt Caching
|
### Prompt Caching
|
||||||
|
|
||||||
|
|
|
@ -3566,6 +3566,7 @@ int main(int argc, char ** argv) {
|
||||||
sigemptyset (&sigint_action.sa_mask);
|
sigemptyset (&sigint_action.sa_mask);
|
||||||
sigint_action.sa_flags = 0;
|
sigint_action.sa_flags = 0;
|
||||||
sigaction(SIGINT, &sigint_action, NULL);
|
sigaction(SIGINT, &sigint_action, NULL);
|
||||||
|
sigaction(SIGTERM, &sigint_action, NULL);
|
||||||
#elif defined (_WIN32)
|
#elif defined (_WIN32)
|
||||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||||
|
|
|
@ -145,6 +145,7 @@
|
||||||
# the same path you would with an overlay.
|
# the same path you would with an overlay.
|
||||||
legacyPackages = {
|
legacyPackages = {
|
||||||
llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
|
llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
};
|
};
|
||||||
|
@ -155,6 +156,7 @@
|
||||||
{
|
{
|
||||||
default = config.legacyPackages.llamaPackages.llama-cpp;
|
default = config.legacyPackages.llamaPackages.llama-cpp;
|
||||||
vulkan = config.packages.default.override { useVulkan = true; };
|
vulkan = config.packages.default.override { useVulkan = true; };
|
||||||
|
windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
|
||||||
}
|
}
|
||||||
// lib.optionalAttrs pkgs.stdenv.isLinux {
|
// lib.optionalAttrs pkgs.stdenv.isLinux {
|
||||||
opencl = config.packages.default.override { useOpenCL = true; };
|
opencl = config.packages.default.override { useOpenCL = true; };
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue