Add quantize support for Falcon 180B models

countzero · countzero · commit 3d0c97d67e8b · 2023-09-16T12:12:36.000+02:00
diff --git a/rebuild_llama.cpp.ps1 b/rebuild_llama.cpp.ps1
@@ -95,6 +95,15 @@ git submodule update --remote --merge --force
 # of the repository to enable quick debugging.
 git -C ./vendor/llama.cpp checkout $version
 
+# Until https://github.com/ggerganov/llama.cpp/pull/3049 is resolved
+# we are adding the working falcon 180B convert script directly.
+if (-not(Test-Path -Path "./vendor/llama.cpp/convert-falcon180-hf-to-gguf.py")) {
+
+    Invoke-WebRequest `
+        -Uri "https://raw.githubusercontent.com/ggerganov/llama.cpp/3a26b3c310ad210d21684f0e222505939eb34259/convert-falcon180-hf-to-gguf.py" `
+        -OutFile "./vendor/llama.cpp/convert-falcon180-hf-to-gguf.py"
+}
+
 $lines = @(
     "# This is a workaround for a CMake bug on Windows to build llama.cpp"
     "# with OpenBLAS. The find_package(BLAS) call fails to find OpenBLAS,"
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 98311c427739e3b06527c3ce6b5c021ab6692740
+Subproject commit e6616cf0db2b63189fc34d0076f654af9adecdf8