Add examples for falcon and extended Llama 2 context

countzero · countzero · commit 845a05e069b0 · 2023-09-15T12:38:16.000+02:00
diff --git a/examples/server_falcon_40b.ps1 b/examples/server_falcon_40b.ps1
@@ -0,0 +1,7 @@
+Start-Process "http://127.0.0.1:8080"
+
+../vendor/llama.cpp/build/bin/Release/server `
+    --model "../vendor/llama.cpp/models/falcon-40b/model-quantized-q4_K_M.gguf" `
+    --ctx-size 4096 `
+    --threads 16 `
+    --n-gpu-layers 10
diff --git a/examples/server_phind_codellama_34b_v2_16K.ps1 b/examples/server_phind_codellama_34b_v2_16K.ps1
@@ -0,0 +1,15 @@
+Start-Process "http://127.0.0.1:8080"
+
+# We are increasing the context size of a Llama 2 model from 4096 token
+# to 16384 token, which is a ctx_scale of 4.0. The paramters formula is:
+#
+#     --rope-freq-scale = 1 / ctx_scale
+#     --rope-freq-base = 10000 * ctx_scale
+#
+../vendor/llama.cpp/build/bin/Release/server `
+    --model "../vendor/llama.cpp/models/Phind-CodeLlama-34B-v2/model-quantized-q4_K_M.gguf" `
+    --ctx-size 16384 `
+    --rope-freq-scale 0.25 `
+    --rope-freq-base 40000 `
+    --threads 16 `
+    --n-gpu-layers 10
diff --git a/rebuild_llama.cpp.ps1 b/rebuild_llama.cpp.ps1
@@ -127,15 +127,28 @@ Set-Location -Path "./vendor/llama.cpp/build"
 switch ($blasAccelerator) {
 
     "OpenBLAS" {
-        cmake -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS ..
+        cmake `
+            -DLLAMA_BUILD_SERVER=ON `
+            -DLLAMA_DISABLE_LOGS=ON `
+            -DLLAMA_BLAS=ON `
+            -DLLAMA_BLAS_VENDOR=OpenBLAS `
+            ..
     }
 
     "cuBLAS" {
-        cmake -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON ..
+        cmake `
+            -DLLAMA_BUILD_SERVER=ON `
+            -DLLAMA_DISABLE_LOGS=ON `
+            -DLLAMA_CUBLAS=ON `
+            ..
     }
 
     default {
-        cmake -DLLAMA_BUILD_SERVER=ON ..
+        cmake `
+            -DLLAMA_BUILD_SERVER=ON `
+            -DLLAMA_DISABLE_LOGS=ON `
+            -DLLAMA_SERVER_VERBOSE=OFF `
+            ..
     }
 }
 
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 71ca2fad7d6c0ef95ef9944fb3a1a843e481f314
+Subproject commit 98311c427739e3b06527c3ce6b5c021ab6692740