Merge branch 'master' into concedo_experimental
# Conflicts: # README.md
This commit is contained in:
commit
e01dc631f7
6 changed files with 257 additions and 108 deletions
|
@ -1,9 +1,12 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")/.." || exit
|
||||
|
||||
MODEL="${MODEL:-./models/13B/ggml-model-q4_0.bin}"
|
||||
USER_NAME="${USER_NAME:-User}"
|
||||
PROMPT_TEMPLATE=${PROMPT_TEMPLATE:-./prompts/chat.txt}
|
||||
USER_NAME="${USER_NAME:-USER}"
|
||||
AI_NAME="${AI_NAME:-ChatLLaMa}"
|
||||
|
||||
# Adjust to the number of CPU cores you want to use.
|
||||
|
@ -15,39 +18,24 @@ N_PREDICTS="${N_PREDICTS:-2048}"
|
|||
# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024
|
||||
GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}"
|
||||
|
||||
DATE_TIME=$(date +%H:%M)
|
||||
DATE_YEAR=$(date +%Y)
|
||||
|
||||
PROMPT_FILE=$(mktemp -t llamacpp_prompt.XXXXXXX.txt)
|
||||
|
||||
sed -e "s/\[\[USER_NAME\]\]/$USER_NAME/g" \
|
||||
-e "s/\[\[AI_NAME\]\]/$AI_NAME/g" \
|
||||
-e "s/\[\[DATE_TIME\]\]/$DATE_TIME/g" \
|
||||
-e "s/\[\[DATE_YEAR\]\]/$DATE_YEAR/g" \
|
||||
$PROMPT_TEMPLATE > $PROMPT_FILE
|
||||
|
||||
# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS
|
||||
./main $GEN_OPTIONS \
|
||||
--model "$MODEL" \
|
||||
--threads "$N_THREAD" \
|
||||
--n_predict "$N_PREDICTS" \
|
||||
--color --interactive \
|
||||
--file ${PROMPT_FILE} \
|
||||
--reverse-prompt "${USER_NAME}:" \
|
||||
--prompt "
|
||||
Text transcript of a never ending dialog, where ${USER_NAME} interacts with an AI assistant named ${AI_NAME}.
|
||||
${AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer ${USER_NAME}'s requests immediately and with details and precision.
|
||||
There are no annotations like (30 seconds passed...) or (to himself), just what ${USER_NAME} and ${AI_NAME} say aloud to each other.
|
||||
The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
|
||||
The transcript only includes text, it does not include markup like HTML and Markdown.
|
||||
|
||||
$USER_NAME: Hello, $AI_NAME!
|
||||
$AI_NAME: Hello $USER_NAME! How may I help you today?
|
||||
$USER_NAME: What year is it?
|
||||
$AI_NAME: We are in $(date +%Y).
|
||||
$USER_NAME: Please tell me the largest city in Europe.
|
||||
$AI_NAME: The largest city in Europe is Moscow, the capital of Russia.
|
||||
$USER_NAME: What can you tell me about Moscow?
|
||||
$AI_NAME: Moscow, on the Moskva River in western Russia, is the nation's cosmopolitan capital. In its historic core is the Kremlin, a complex that's home to the president and tsarist treasures in the Armoury. Outside its walls is Red Square, Russia’s symbolic center.
|
||||
$USER_NAME: What is a cat?
|
||||
$AI_NAME: A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae.
|
||||
$USER_NAME: How do I pass command line arguments to a Node.js program?
|
||||
$AI_NAME: The arguments are stored in process.argv.
|
||||
|
||||
argv[0] is the path to the Node. js executable.
|
||||
argv[1] is the path to the script file.
|
||||
argv[2] is the first argument passed to the script.
|
||||
argv[3] is the second argument passed to the script and so on.
|
||||
$USER_NAME: Name a color.
|
||||
$AI_NAME: Blue.
|
||||
$USER_NAME: What time is it?
|
||||
$AI_NAME: It is $(date +%H:%M).
|
||||
$USER_NAME:" "$@"
|
||||
--in-prefix ' ' \
|
||||
"$@"
|
||||
|
|
120
ggml.c
120
ggml.c
|
@ -1561,15 +1561,135 @@ static void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * r
|
|||
}
|
||||
|
||||
static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
|
||||
assert(QK8_0 == 32);
|
||||
assert(k % QK8_0 == 0);
|
||||
const int nb = k / QK8_0;
|
||||
|
||||
block_q8_0 * restrict y = vy;
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
for (int i = 0; i < nb; i++) {
|
||||
float32x4_t srcv [8];
|
||||
float32x4_t asrcv[8];
|
||||
float32x4_t amaxv[8];
|
||||
|
||||
for (int l = 0; l < 8; l++) srcv[l] = vld1q_f32(x + i*32 + 4*l);
|
||||
for (int l = 0; l < 8; l++) asrcv[l] = vabsq_f32(srcv[l]);
|
||||
|
||||
for (int l = 0; l < 4; l++) amaxv[2*l] = vmaxq_f32(asrcv[2*l], asrcv[2*l+1]);
|
||||
for (int l = 0; l < 2; l++) amaxv[4*l] = vmaxq_f32(amaxv[4*l], amaxv[4*l+2]);
|
||||
for (int l = 0; l < 1; l++) amaxv[8*l] = vmaxq_f32(amaxv[8*l], amaxv[8*l+4]);
|
||||
|
||||
const float amax = vmaxvq_f32(amaxv[0]);
|
||||
|
||||
const float d = amax / ((1 << 7) - 1);
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
y[i].d = d;
|
||||
|
||||
for (int l = 0; l < 8; l++) {
|
||||
const float32x4_t v = vmulq_n_f32(srcv[l], id);
|
||||
const int32x4_t vi = vcvtnq_s32_f32(v);
|
||||
|
||||
y[i].qs[4*l + 0] = vgetq_lane_s32(vi, 0);
|
||||
y[i].qs[4*l + 1] = vgetq_lane_s32(vi, 1);
|
||||
y[i].qs[4*l + 2] = vgetq_lane_s32(vi, 2);
|
||||
y[i].qs[4*l + 3] = vgetq_lane_s32(vi, 3);
|
||||
}
|
||||
}
|
||||
#elif defined(__AVX2__) || defined(__AVX__)
|
||||
for (int i = 0; i < nb; i++) {
|
||||
// Load elements into 4 AVX vectors
|
||||
__m256 v0 = _mm256_loadu_ps( x );
|
||||
__m256 v1 = _mm256_loadu_ps( x + 8 );
|
||||
__m256 v2 = _mm256_loadu_ps( x + 16 );
|
||||
__m256 v3 = _mm256_loadu_ps( x + 24 );
|
||||
x += 32;
|
||||
|
||||
// Compute max(abs(e)) for the block
|
||||
const __m256 signBit = _mm256_set1_ps( -0.0f );
|
||||
__m256 maxAbs = _mm256_andnot_ps( signBit, v0 );
|
||||
maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v1 ) );
|
||||
maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v2 ) );
|
||||
maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v3 ) );
|
||||
|
||||
__m128 max4 = _mm_max_ps( _mm256_extractf128_ps( maxAbs, 1 ), _mm256_castps256_ps128( maxAbs ) );
|
||||
max4 = _mm_max_ps( max4, _mm_movehl_ps( max4, max4 ) );
|
||||
max4 = _mm_max_ss( max4, _mm_movehdup_ps( max4 ) );
|
||||
const float maxScalar = _mm_cvtss_f32( max4 );
|
||||
|
||||
// Quantize these floats
|
||||
const float d = maxScalar / 127.f;
|
||||
y[i].d = d;
|
||||
const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f;
|
||||
const __m256 mul = _mm256_set1_ps( id );
|
||||
|
||||
// Apply the multiplier
|
||||
v0 = _mm256_mul_ps( v0, mul );
|
||||
v1 = _mm256_mul_ps( v1, mul );
|
||||
v2 = _mm256_mul_ps( v2, mul );
|
||||
v3 = _mm256_mul_ps( v3, mul );
|
||||
|
||||
// Round to nearest integer
|
||||
v0 = _mm256_round_ps( v0, _MM_ROUND_NEAREST );
|
||||
v1 = _mm256_round_ps( v1, _MM_ROUND_NEAREST );
|
||||
v2 = _mm256_round_ps( v2, _MM_ROUND_NEAREST );
|
||||
v3 = _mm256_round_ps( v3, _MM_ROUND_NEAREST );
|
||||
|
||||
// Convert floats to integers
|
||||
__m256i i0 = _mm256_cvtps_epi32( v0 );
|
||||
__m256i i1 = _mm256_cvtps_epi32( v1 );
|
||||
__m256i i2 = _mm256_cvtps_epi32( v2 );
|
||||
__m256i i3 = _mm256_cvtps_epi32( v3 );
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// Convert int32 to int16
|
||||
i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15
|
||||
i2 = _mm256_packs_epi32( i2, i3 ); // 16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31
|
||||
// Convert int16 to int8
|
||||
i0 = _mm256_packs_epi16( i0, i2 ); // 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
|
||||
|
||||
// We got our precious signed bytes, but the order is now wrong
|
||||
// These AVX2 pack instructions process 16-byte pieces independently
|
||||
// The following instruction is fixing the order
|
||||
const __m256i perm = _mm256_setr_epi32( 0, 4, 1, 5, 2, 6, 3, 7 );
|
||||
i0 = _mm256_permutevar8x32_epi32( i0, perm );
|
||||
|
||||
_mm256_storeu_si256((__m256i *)y[i].qs, i0);
|
||||
#else
|
||||
// Since we don't have in AVX some necessary functions,
|
||||
// we split the registers in half and call AVX2 analogs from SSE
|
||||
__m128i ni0 = _mm256_castsi256_si128( i0 );
|
||||
__m128i ni1 = _mm256_extractf128_si256( i0, 1);
|
||||
__m128i ni2 = _mm256_castsi256_si128( i1 );
|
||||
__m128i ni3 = _mm256_extractf128_si256( i1, 1);
|
||||
__m128i ni4 = _mm256_castsi256_si128( i2 );
|
||||
__m128i ni5 = _mm256_extractf128_si256( i2, 1);
|
||||
__m128i ni6 = _mm256_castsi256_si128( i3 );
|
||||
__m128i ni7 = _mm256_extractf128_si256( i3, 1);
|
||||
|
||||
// Convert int32 to int16
|
||||
ni0 = _mm_packs_epi32( ni0, ni1 );
|
||||
ni2 = _mm_packs_epi32( ni2, ni3 );
|
||||
ni4 = _mm_packs_epi32( ni4, ni5 );
|
||||
ni6 = _mm_packs_epi32( ni6, ni7 );
|
||||
// Convert int16 to int8
|
||||
ni0 = _mm_packs_epi16( ni0, ni2 );
|
||||
ni4 = _mm_packs_epi16( ni4, ni6 );
|
||||
|
||||
_mm_storeu_si128((__m128i *)(y[i].qs + 0), ni0);
|
||||
_mm_storeu_si128((__m128i *)(y[i].qs + 16), ni4);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
// scalar
|
||||
quantize_row_q8_0_reference(x, y, k);
|
||||
#endif
|
||||
}
|
||||
|
||||
// reference implementation for deterministic creation of model files
|
||||
static void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) {
|
||||
assert(QK8_1 == 32);
|
||||
assert(k % QK8_1 == 0);
|
||||
const int nb = k / QK8_1;
|
||||
|
||||
|
|
7
prompts/chat-with-vicuna-v0.txt
Normal file
7
prompts/chat-with-vicuna-v0.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
A chat between a curious human ("[[USER_NAME]]") and an artificial intelligence assistant ("[[AI_NAME]]"). The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
|
||||
### [[USER_NAME]]: Hello, [[AI_NAME]].
|
||||
### [[AI_NAME]]: Hello. How may I help you today?
|
||||
### [[USER_NAME]]: Please tell me the largest city in Europe.
|
||||
### [[AI_NAME]]: Sure. The largest city in Europe is Moscow, the capital of Russia.
|
||||
### [[USER_NAME]]:
|
7
prompts/chat-with-vicuna-v1.txt
Normal file
7
prompts/chat-with-vicuna-v1.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
A chat between a curious human ("[[USER_NAME]]") and an artificial intelligence assistant ("[[AI_NAME]]"). The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
|
||||
[[USER_NAME]]: Hello, [[AI_NAME]].
|
||||
[[AI_NAME]]: Hello. How may I help you today?
|
||||
[[USER_NAME]]: Please tell me the largest city in Europe.
|
||||
[[AI_NAME]]: Sure. The largest city in Europe is Moscow, the capital of Russia.
|
||||
[[USER_NAME]]:
|
28
prompts/chat.txt
Normal file
28
prompts/chat.txt
Normal file
|
@ -0,0 +1,28 @@
|
|||
Text transcript of a never ending dialog, where [[USER_NAME]] interacts with an AI assistant named [[AI_NAME]].
|
||||
[[AI_NAME]] is helpful, kind, honest, friendly, good at writing and never fails to answer [[USER_NAME]]'s requests immediately and with details and precision.
|
||||
There are no annotations like (30 seconds passed...) or (to himself), just what [[USER_NAME]] and [[AI_NAME]] say aloud to each other.
|
||||
The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
|
||||
The transcript only includes text, it does not include markup like HTML and Markdown.
|
||||
|
||||
[[USER_NAME]]: Hello, [[AI_NAME]]!
|
||||
[[AI_NAME]]: Hello [[USER_NAME]]! How may I help you today?
|
||||
[[USER_NAME]]: What year is it?
|
||||
[[AI_NAME]]: We are in [[DATE_YEAR]].
|
||||
[[USER_NAME]]: Please tell me the largest city in Europe.
|
||||
[[AI_NAME]]: The largest city in Europe is Moscow, the capital of Russia.
|
||||
[[USER_NAME]]: What can you tell me about Moscow?
|
||||
[[AI_NAME]]: Moscow, on the Moskva River in western Russia, is the nation's cosmopolitan capital. In its historic core is the Kremlin, a complex that's home to the president and tsarist treasures in the Armoury. Outside its walls is Red Square, Russia’s symbolic center.
|
||||
[[USER_NAME]]: What is a cat?
|
||||
[[AI_NAME]]: A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae.
|
||||
[[USER_NAME]]: How do I pass command line arguments to a Node.js program?
|
||||
[[AI_NAME]]: The arguments are stored in process.argv.
|
||||
|
||||
argv[0] is the path to the Node. js executable.
|
||||
argv[1] is the path to the script file.
|
||||
argv[2] is the first argument passed to the script.
|
||||
argv[3] is the second argument passed to the script and so on.
|
||||
[[USER_NAME]]: Name a color.
|
||||
[[AI_NAME]]: Blue.
|
||||
[[USER_NAME]]: What time is it?
|
||||
[[AI_NAME]]: It is [[DATE_TIME]].
|
||||
[[USER_NAME]]:
|
|
@ -1,78 +1,77 @@
|
|||
import os
|
||||
import hashlib
|
||||
|
||||
def sha256sum(file):
|
||||
block_size = 16 * 1024 * 1024 # 16 MB block size
|
||||
b = bytearray(block_size)
|
||||
file_hash = hashlib.sha256()
|
||||
mv = memoryview(b)
|
||||
with open(file, 'rb', buffering=0) as f:
|
||||
while True:
|
||||
n = f.readinto(mv)
|
||||
if not n:
|
||||
break
|
||||
file_hash.update(mv[:n])
|
||||
|
||||
return file_hash.hexdigest()
|
||||
|
||||
# Define the path to the llama directory (parent folder of script directory)
|
||||
llama_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
|
||||
|
||||
# Define the file with the list of hashes and filenames
|
||||
hash_list_file = os.path.join(llama_path, "SHA256SUMS")
|
||||
|
||||
# Check if the hash list file exists
|
||||
if not os.path.exists(hash_list_file):
|
||||
print(f"Hash list file not found: {hash_list_file}")
|
||||
exit(1)
|
||||
|
||||
# Read the hash file content and split it into an array of lines
|
||||
with open(hash_list_file, "r") as f:
|
||||
hash_list = f.read().splitlines()
|
||||
|
||||
# Create an array to store the results
|
||||
results = []
|
||||
|
||||
# Loop over each line in the hash list
|
||||
for line in hash_list:
|
||||
# Split the line into hash and filename
|
||||
hash_value, filename = line.split(" ")
|
||||
|
||||
# Get the full path of the file by joining the llama path and the filename
|
||||
file_path = os.path.join(llama_path, filename)
|
||||
|
||||
# Informing user of the progress of the integrity check
|
||||
print(f"Verifying the checksum of {file_path}")
|
||||
|
||||
# Check if the file exists
|
||||
if os.path.exists(file_path):
|
||||
# Calculate the SHA256 checksum of the file using hashlib
|
||||
file_hash = sha256sum(file_path)
|
||||
|
||||
# Compare the file hash with the expected hash
|
||||
if file_hash == hash_value:
|
||||
valid_checksum = "V"
|
||||
file_missing = ""
|
||||
else:
|
||||
valid_checksum = ""
|
||||
file_missing = ""
|
||||
else:
|
||||
valid_checksum = ""
|
||||
file_missing = "X"
|
||||
|
||||
# Add the results to the array
|
||||
results.append({
|
||||
"filename": filename,
|
||||
"valid checksum": valid_checksum,
|
||||
"file missing": file_missing
|
||||
})
|
||||
|
||||
|
||||
# Print column headers for results table
|
||||
print("\n" + "filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20))
|
||||
print("-" * 80)
|
||||
|
||||
# Output the results as a table
|
||||
for r in results:
|
||||
print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}")
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
def sha256sum(file):
|
||||
block_size = 16 * 1024 * 1024 # 16 MB block size
|
||||
b = bytearray(block_size)
|
||||
file_hash = hashlib.sha256()
|
||||
mv = memoryview(b)
|
||||
with open(file, 'rb', buffering=0) as f:
|
||||
while True:
|
||||
n = f.readinto(mv)
|
||||
if not n:
|
||||
break
|
||||
file_hash.update(mv[:n])
|
||||
|
||||
return file_hash.hexdigest()
|
||||
|
||||
# Define the path to the llama directory (parent folder of script directory)
|
||||
llama_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
|
||||
|
||||
# Define the file with the list of hashes and filenames
|
||||
hash_list_file = os.path.join(llama_path, "SHA256SUMS")
|
||||
|
||||
# Check if the hash list file exists
|
||||
if not os.path.exists(hash_list_file):
|
||||
print(f"Hash list file not found: {hash_list_file}")
|
||||
exit(1)
|
||||
|
||||
# Read the hash file content and split it into an array of lines
|
||||
with open(hash_list_file, "r") as f:
|
||||
hash_list = f.read().splitlines()
|
||||
|
||||
# Create an array to store the results
|
||||
results = []
|
||||
|
||||
# Loop over each line in the hash list
|
||||
for line in hash_list:
|
||||
# Split the line into hash and filename
|
||||
hash_value, filename = line.split(" ")
|
||||
|
||||
# Get the full path of the file by joining the llama path and the filename
|
||||
file_path = os.path.join(llama_path, filename)
|
||||
|
||||
# Informing user of the progress of the integrity check
|
||||
print(f"Verifying the checksum of {file_path}")
|
||||
|
||||
# Check if the file exists
|
||||
if os.path.exists(file_path):
|
||||
# Calculate the SHA256 checksum of the file using hashlib
|
||||
file_hash = sha256sum(file_path)
|
||||
|
||||
# Compare the file hash with the expected hash
|
||||
if file_hash == hash_value:
|
||||
valid_checksum = "V"
|
||||
file_missing = ""
|
||||
else:
|
||||
valid_checksum = ""
|
||||
file_missing = ""
|
||||
else:
|
||||
valid_checksum = ""
|
||||
file_missing = "X"
|
||||
|
||||
# Add the results to the array
|
||||
results.append({
|
||||
"filename": filename,
|
||||
"valid checksum": valid_checksum,
|
||||
"file missing": file_missing
|
||||
})
|
||||
|
||||
|
||||
# Print column headers for results table
|
||||
print("\n" + "filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20))
|
||||
print("-" * 80)
|
||||
|
||||
# Output the results as a table
|
||||
for r in results:
|
||||
print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue