Remove extraneous files
This commit is contained in:
parent
d532d5b1f7
commit
4089657815
10 changed files with 0 additions and 712 deletions
|
@ -1,181 +0,0 @@
|
||||||
# running Mixtral in a loop
|
|
||||||
|
|
||||||
# Needs a zsh change to max memory using
|
|
||||||
# sudo sysctl iogpu.wired_limit_mb=27500 (anything bigger crashes easily)
|
|
||||||
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import re
|
|
||||||
import psutil
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
import queue
|
|
||||||
|
|
||||||
def get_pid():
|
|
||||||
# Get the parent process ID (PPID) of the current Python script
|
|
||||||
current_pid = os.getpid()
|
|
||||||
parent_pid = None
|
|
||||||
|
|
||||||
# Iterate through all the parent processes to find the actual Python process
|
|
||||||
while parent_pid is not None:
|
|
||||||
try:
|
|
||||||
parent_proc = psutil.Process(parent_pid)
|
|
||||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
||||||
parent_pid = None
|
|
||||||
else:
|
|
||||||
if 'python' in parent_proc.name():
|
|
||||||
current_pid = parent_pid
|
|
||||||
else:
|
|
||||||
parent_pid = parent_proc.ppid()
|
|
||||||
|
|
||||||
# Print the PID of the running Python script
|
|
||||||
print(f"The PID of the running Python script is: {current_pid}")
|
|
||||||
|
|
||||||
return current_pid
|
|
||||||
|
|
||||||
def get_cpu_percent():
|
|
||||||
cpu_percent = psutil.cpu_percent() # Measure CPU usage every second
|
|
||||||
return cpu_percent
|
|
||||||
|
|
||||||
def get_memory_info():
|
|
||||||
mem_info = psutil.virtual_memory()
|
|
||||||
return {
|
|
||||||
'total': mem_info.total,
|
|
||||||
'used': mem_info.used,
|
|
||||||
'percent': mem_info.percent
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_threads():
|
|
||||||
# Get the PID of the process you want to inspect
|
|
||||||
pid = get_pid()
|
|
||||||
|
|
||||||
# Get the process object
|
|
||||||
process = psutil.Process(pid)
|
|
||||||
|
|
||||||
# Print the number of threads used by the process
|
|
||||||
print("Number of threads:", len(process.threads()))
|
|
||||||
|
|
||||||
# Iterate over the threads and print their attributes
|
|
||||||
for thread in process.threads():
|
|
||||||
print(f"Thread ID: {thread.id}")
|
|
||||||
#print(f"Thread count: {thread.count}")
|
|
||||||
#print(f"Thread index: {thread.index}")
|
|
||||||
print(f"Thread system_time: {thread.system_time}")
|
|
||||||
print(f"Thread user time: {thread.user_time}")
|
|
||||||
|
|
||||||
def find_time_and_tokens(string):
|
|
||||||
# Define the regular expression pattern
|
|
||||||
pattern = r"llama_print_timings: total time =\s*(\d+(\.\d+)?)\s*ms /\s*(\d+)"
|
|
||||||
pattern2 = r"llama_model_loader: - kv 10: llama.expert_used_count u32 = (\d+)"
|
|
||||||
|
|
||||||
# Search for the pattern in stderr
|
|
||||||
match = re.search(pattern, string)
|
|
||||||
match2 = re.search(pattern2, string)
|
|
||||||
|
|
||||||
if match:
|
|
||||||
# Extract the total time and token count from the matched groups
|
|
||||||
total_time = float(match.group(1))
|
|
||||||
token_count = int(match.group(3))
|
|
||||||
|
|
||||||
print(f"Total time taken: {total_time} ms")
|
|
||||||
print(f"Token consumption count: {token_count}")
|
|
||||||
else:
|
|
||||||
print("Could not find the total time and token count in the output.")
|
|
||||||
|
|
||||||
if match2:
|
|
||||||
# Extract the total time and token count from the matched groups
|
|
||||||
experts_used = float(match2.group(1))
|
|
||||||
|
|
||||||
print(f"Number of experts used: {experts_used}")
|
|
||||||
else:
|
|
||||||
print("Could not find the total number of experts used in the process.")
|
|
||||||
|
|
||||||
def command_setup(return_queue, prompt="How can I use python psutil package to calculate CPU and memory usage in a run?"):
|
|
||||||
|
|
||||||
prompt2 = f" [INST] {prompt} [/INST] "
|
|
||||||
kv_override = f"llama_kv_expert_used_count=int:3"
|
|
||||||
command = [
|
|
||||||
'/Users/edsilm2/llama.cpp/build/bin/main',
|
|
||||||
'-m',
|
|
||||||
'/Users/edsilm2/llama.cpp/models/Mixtral-8x7b-Q2_K/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf',
|
|
||||||
'-p', prompt2,
|
|
||||||
'-ngl', '99',
|
|
||||||
'-c', '4096',
|
|
||||||
'-n', '-1',
|
|
||||||
'-s', '1',
|
|
||||||
'-ctk', 'q8_0',
|
|
||||||
'--override-kv', kv_override # this doesn't have any effect on the LOG which doesn't reflect kv overrides (they say)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
#print(command)
|
|
||||||
response = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
|
|
||||||
exit_code = response.wait()
|
|
||||||
# print(dir(response))
|
|
||||||
# print("Returned from subprocess call.")
|
|
||||||
stdout, stderr = response.communicate()
|
|
||||||
|
|
||||||
# Check if the command was successful (exit code 0 usually means success)
|
|
||||||
if exit_code == 0:
|
|
||||||
print(f"\nUser input: \033[31m{prompt}\033[0m\n")
|
|
||||||
# Convert the output bytes to a string and print it
|
|
||||||
output_str = stdout.decode('utf-8').strip()
|
|
||||||
print(f"Output: \033[33m{output_str}\033[0m\n")
|
|
||||||
|
|
||||||
output_err = stderr.decode('utf-8').strip()
|
|
||||||
#print(f"Standard Error: \033[33m{output_err}\033[0m\n")
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
# There was an error, print the error message
|
|
||||||
error_str = stderr.decode('utf-8').strip()
|
|
||||||
print('Error:', error_str)
|
|
||||||
except AttributeError as ae:
|
|
||||||
print(f"Unable to process the exit code correctly: {ae}.")
|
|
||||||
|
|
||||||
find_time_and_tokens(output_err)
|
|
||||||
|
|
||||||
cpu_percent_usage = get_cpu_percent()
|
|
||||||
print(f"CPU percentage usage = {cpu_percent_usage}\n")
|
|
||||||
|
|
||||||
get_threads()
|
|
||||||
|
|
||||||
memory_info = get_memory_info()
|
|
||||||
print(f"Memory usage: Total = {memory_info['total']} Used = {memory_info['used']} Percentage = {memory_info['percent']}")
|
|
||||||
|
|
||||||
# Put return values on queue
|
|
||||||
return_queue.put((stdout, stderr, exit_code))
|
|
||||||
|
|
||||||
def check_response(response):
|
|
||||||
start = time.time()
|
|
||||||
while time.time() - start < 30:
|
|
||||||
if response.poll() is not None:
|
|
||||||
break
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
if response.poll() is None:
|
|
||||||
print("Killing process")
|
|
||||||
response.kill()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
prompt = "Who are you?"
|
|
||||||
while prompt != "quit":
|
|
||||||
|
|
||||||
# original user prompt was here
|
|
||||||
|
|
||||||
q = queue.Queue()
|
|
||||||
|
|
||||||
#response, error, code = command_setup(prompt)
|
|
||||||
|
|
||||||
thread = threading.Thread(target=command_setup, args=(q, prompt))
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
# Wait with timeout
|
|
||||||
thread.join(timeout=5)
|
|
||||||
|
|
||||||
# Get return values from queue
|
|
||||||
if not q.empty():
|
|
||||||
stdout, stderr, exit_code = q.get()
|
|
||||||
|
|
||||||
prompt = input("Awaiting the reply from mixtral ... ",)
|
|
|
@ -1,2 +0,0 @@
|
||||||
for i in range(10):
|
|
||||||
print(i, i**2)
|
|
File diff suppressed because one or more lines are too long
|
@ -1,58 +0,0 @@
|
||||||
import os
|
|
||||||
from ctransformers import AutoModelForCausalLM
|
|
||||||
# Requires SCIPHI_API_KEY in the environment
|
|
||||||
from agent_search import SciPhi
|
|
||||||
|
|
||||||
def initialise():
|
|
||||||
SCIPHI_API_KEY = "528d08dc3ed417f32954509131952c5a"
|
|
||||||
sciphi_api_key = os.environ("SCI_PHI_API_KEY")
|
|
||||||
|
|
||||||
'''def get_chat_completion(
|
|
||||||
self, conversation: list[dict], generation_config: GenerationConfig
|
|
||||||
) -> str:
|
|
||||||
self._check_stop_token(generation_config.stop_token)
|
|
||||||
prompt = ""
|
|
||||||
added_system_prompt = False
|
|
||||||
for message in conversation:
|
|
||||||
if message["role"] == "system":
|
|
||||||
prompt += f"### System:\n{SciPhiLLMInterface.ALPACA_CHAT_SYSTEM_PROMPT}. Further, the assistant is given the following additional instructions - {message['content']}\n\n"
|
|
||||||
added_system_prompt = True
|
|
||||||
elif message["role"] == "user":
|
|
||||||
last_user_message = message["content"]
|
|
||||||
prompt += f"### Instruction:\n{last_user_message}\n\n"
|
|
||||||
elif message["role"] == "assistant":
|
|
||||||
prompt += f"### Response:\n{message['content']}\n\n"
|
|
||||||
|
|
||||||
if not added_system_prompt:
|
|
||||||
prompt = f"### System:\n{SciPhiLLMInterface.ALPACA_CHAT_SYSTEM_PROMPT}.\n\n{prompt}"
|
|
||||||
|
|
||||||
context = self.rag_interface.get_contexts([last_user_message])[0]
|
|
||||||
prompt += f"### Response:\n{SciPhiFormatter.RETRIEVAL_TOKEN} {SciPhiFormatter.INIT_PARAGRAPH_TOKEN}{context}{SciPhiFormatter.END_PARAGRAPH_TOKEN}"
|
|
||||||
latest_completion = self.model.get_instruct_completion(
|
|
||||||
prompt, generation_config
|
|
||||||
).strip()
|
|
||||||
|
|
||||||
return SciPhiFormatter.remove_cruft(latest_completion)
|
|
||||||
'''
|
|
||||||
def perform_search(client):
|
|
||||||
# Perform a search
|
|
||||||
search_response = client.search(query='Quantum Field Theory', search_provider='agent-search')
|
|
||||||
print(search_response)
|
|
||||||
# example: [{ 'score': '.89', 'url': 'https://...', 'metadata': {...} }
|
|
||||||
|
|
||||||
# Generate a RAG response
|
|
||||||
rag_response = client.get_search_rag_response(query='latest news', search_provider='bing', llm_model='SciPhi/Sensei-7B-V1')
|
|
||||||
print(rag_response)
|
|
||||||
# example: { 'response': '...', 'other_queries': '...', 'search_results': '...' }
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
initialise()
|
|
||||||
|
|
||||||
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
|
||||||
llm = AutoModelForCausalLM.from_pretrained("models/", model_file="sciphi-self-rag-mistral-7b-32k.Q5_K_M.gguf", model_type="mistral", gpu_layers=50)
|
|
||||||
|
|
||||||
print(llm("In 2024 AI is going to"))
|
|
||||||
|
|
||||||
perform_search(client)
|
|
|
@ -1,58 +0,0 @@
|
||||||
import threading
|
|
||||||
import queue
|
|
||||||
import requests
|
|
||||||
|
|
||||||
def print_dict(data):
|
|
||||||
if isinstance(data, dict):
|
|
||||||
print_dict(data)
|
|
||||||
elif isinstance(data, list):
|
|
||||||
for entry in data:
|
|
||||||
print_dict(entry)
|
|
||||||
elif data == "content":
|
|
||||||
print(f"Key: {data:>30}: {data['content']}")
|
|
||||||
return
|
|
||||||
|
|
||||||
def producer(list):
|
|
||||||
# Generate test requests and add them to the queue
|
|
||||||
for i in range(10): # Adjust for desired load
|
|
||||||
request_data = f"What is the capital of {list[i % len(list)]}?"
|
|
||||||
print(f"Request: {request_data}")
|
|
||||||
requests_queue.put(request_data)
|
|
||||||
|
|
||||||
def consumer():
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
request_data = requests_queue.get()
|
|
||||||
print(f"Processing {request_data}")
|
|
||||||
response = requests.post("http://localhost:8080", data=request_data)
|
|
||||||
print_dict(response.text)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception {e}\n")
|
|
||||||
continue
|
|
||||||
finally:
|
|
||||||
requests_queue.task_done()
|
|
||||||
|
|
||||||
# Define your test request data
|
|
||||||
requests_queue = queue.Queue()
|
|
||||||
|
|
||||||
# number of threads
|
|
||||||
num_threads = 5
|
|
||||||
|
|
||||||
# some text data
|
|
||||||
country_list = ["France", "Germany", "China", "USA", "Italy", "India",
|
|
||||||
"Ukraine", "Japan", "Australia", "New Zealand", "Indonesia", "Nigeria", "Saudi Arabia", "Israel", "Egypt", "Kenya", "Chile", "Mexico", "Canada"]
|
|
||||||
|
|
||||||
# Create producer and consumer threads
|
|
||||||
producer_thread = threading.Thread(target=producer, args = (country_list,))
|
|
||||||
consumer_threads = [threading.Thread(target=consumer) for _ in range(num_threads)] # Adjust thread count
|
|
||||||
|
|
||||||
# Start threads and monitor resources
|
|
||||||
producer_thread.start()
|
|
||||||
for thread in consumer_threads:
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
producer_thread.join()
|
|
||||||
for thread in consumer_threads:
|
|
||||||
thread.join()
|
|
||||||
|
|
||||||
print("Stress test completed!")
|
|
|
@ -1,40 +0,0 @@
|
||||||
# stock market predictions
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from sklearn import preprocessing
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.linear_model import LinearRegression
|
|
||||||
|
|
||||||
def prepare_data(df,forecast_col,forecast_out,test_size):
|
|
||||||
label = df[forecast_col].shift(-forecast_out) #creating new column called label with the last 5 rows are nan
|
|
||||||
X = np.array(df[[forecast_col]]) #creating the feature array
|
|
||||||
X = preprocessing.scale(X) #processing the feature array
|
|
||||||
X_lately = X[-forecast_out:] #creating the column i want to use later in the predicting method
|
|
||||||
X = X[:-forecast_out] # X that will contain the training and testing
|
|
||||||
label.dropna(inplace=True) #dropping na values
|
|
||||||
y = np.array(label) # assigning Y
|
|
||||||
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=test_size, random_state=0) #cross validation
|
|
||||||
|
|
||||||
response = [X_train,X_test , Y_train, Y_test , X_lately]
|
|
||||||
return response
|
|
||||||
|
|
||||||
df = pd.read_csv("prices.csv")
|
|
||||||
df = df[df.symbol == "GOOG"]
|
|
||||||
|
|
||||||
forecast_col = 'close'
|
|
||||||
forecast_out = 5
|
|
||||||
test_size = 0.2
|
|
||||||
|
|
||||||
X_train, X_test, Y_train, Y_test , X_lately =prepare_data(df,forecast_col,forecast_out,test_size); #calling the method were the cross validation and data preperation is in
|
|
||||||
learner = LinearRegression() #initializing linear regression model
|
|
||||||
|
|
||||||
learner.fit(X_train,Y_train) #training the linear regression model
|
|
||||||
|
|
||||||
score=learner.score(X_test,Y_test)#testing the linear regression model
|
|
||||||
forecast= learner.predict(X_lately) #set that will contain the forecasted data
|
|
||||||
response={}#creting json object
|
|
||||||
response['test_score']=score
|
|
||||||
response['forecast_set']=forecast
|
|
||||||
|
|
||||||
print(response)
|
|
|
@ -1,29 +0,0 @@
|
||||||
# A simple illustration of how to represent cache occupancy
|
|
||||||
# graphically using unicvode blocks
|
|
||||||
# which are generated using print("\u2588"), print("\u2591")
|
|
||||||
|
|
||||||
from time import sleep
|
|
||||||
import random
|
|
||||||
|
|
||||||
CACHE_SIZE = 50
|
|
||||||
used_blocks = [5, 3, 2, 1, 10, 2, 6, 4, 7, 10]
|
|
||||||
|
|
||||||
def visualize_kv_cache(used_blocks, total_size):
|
|
||||||
cache_viz = "["
|
|
||||||
tot_used = 0
|
|
||||||
for i in range(len(used_blocks)):
|
|
||||||
# cache_viz += "█" * used_blocks[i]
|
|
||||||
cache_viz += "\u2589" * used_blocks[i]
|
|
||||||
cache_viz += "░" * (total_size - used_blocks[i])
|
|
||||||
cache_viz += f"{used_blocks[i]:3.0f}/{total_size}]\r["
|
|
||||||
tot_used += used_blocks[i]
|
|
||||||
|
|
||||||
#print(f"\r[{cache_viz}] {used_blocks[i]:2.0f}/{total_size}", end="")
|
|
||||||
|
|
||||||
print(f"\r{cache_viz}] {tot_used}/{len(used_blocks) * total_size}", end="")
|
|
||||||
|
|
||||||
|
|
||||||
while True:
|
|
||||||
visualize_kv_cache(used_blocks, CACHE_SIZE)
|
|
||||||
sleep(0.5)
|
|
||||||
used_blocks = used_blocks[1:] + [random.randint(0,50)] # update used blocks
|
|
|
@ -1,26 +0,0 @@
|
||||||
// just trying to get the cursor position
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
struct CursorPos {
|
|
||||||
int x;
|
|
||||||
int y;
|
|
||||||
};
|
|
||||||
|
|
||||||
static CursorPos getCursorPos() {
|
|
||||||
|
|
||||||
// Get text cursor position
|
|
||||||
auto cursorPos = getCursorPos();
|
|
||||||
|
|
||||||
// Assign to struct
|
|
||||||
CursorPos pos;
|
|
||||||
pos.x = cursorPos.x;
|
|
||||||
pos.y = cursorPos.y;
|
|
||||||
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
CursorPos cursor = getCursorPos();
|
|
||||||
printf("The x co-ordinate of the cursor is %zu\n; the y co-ordinate of the cursor is %zu\n", cursor.x, cursor.y);
|
|
||||||
}
|
|
|
@ -1,76 +0,0 @@
|
||||||
/*
|
|
||||||
A utility to represent the kv-cache occupancy graphically
|
|
||||||
Takes as parameters
|
|
||||||
- total cache size (-c)
|
|
||||||
- number of simultaneous accesses/slots (-np)
|
|
||||||
- a parameter related to the display context (max window width - data display requirements)
|
|
||||||
It then uses a trick borrowed from tqdm to display occupancy
|
|
||||||
TODO: Show contiguous space and block availability
|
|
||||||
*/
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <vector>
|
|
||||||
#include <cstdlib> // for rand()
|
|
||||||
|
|
||||||
// a custom function to display graphics of the kvcache status
|
|
||||||
static void show_kvcache(std::vector<std::pair<int,struct llama_client_slot>> used_blocks, int cache_size) {
|
|
||||||
|
|
||||||
int max_length = 128;
|
|
||||||
int num_blocks = used_blocks.size();
|
|
||||||
int slot_cache_size = cache_size / num_blocks;
|
|
||||||
bool cls_flag = true;
|
|
||||||
std::string slot_symbol1 = "";
|
|
||||||
std::string slot_symbol2 = "";
|
|
||||||
std::string slot_symbol3 = "";
|
|
||||||
auto& p = used_blocks[0];
|
|
||||||
llama_client_slot slot = p.second;
|
|
||||||
|
|
||||||
return; // remove when not in debug mode
|
|
||||||
|
|
||||||
if ((used_blocks.size() == 0) || (used_blocks[0].first == 0)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print visualization
|
|
||||||
// Always start at the top left of the window (H means 'move cursor to this position'; 2J = cls)
|
|
||||||
// Only clear the screen the first time round
|
|
||||||
if (cls_flag) {
|
|
||||||
printf("\033[2J");
|
|
||||||
cls_flag = false;
|
|
||||||
}
|
|
||||||
printf("\033[1;0H\033[K**************************\n\033[KKVcache occupancy by slot:\n\033[K**************************\n");
|
|
||||||
for(int i=0; i<num_blocks; i++) {
|
|
||||||
printf("\033[K"); // clear the current line
|
|
||||||
for(int j=0; j < max_length; j++) {
|
|
||||||
int used = used_blocks[i].first * max_length / slot_cache_size;
|
|
||||||
if((j < max_length / 2) && (j < used)) {
|
|
||||||
printf("\033[90m█\033[0m");
|
|
||||||
} else if (j < used) {
|
|
||||||
printf("\033[94m█\033[0m");
|
|
||||||
} else {
|
|
||||||
printf("\033[91m█\033[0m");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(used_blocks[i].second.state == PROCESSING) {
|
|
||||||
slot_symbol1 = "\u23F0"; // clock symbol = processing
|
|
||||||
} else if(used_blocks[i].second.state == IDLE) {
|
|
||||||
slot_symbol1 = "\u2705"; // red box white tick
|
|
||||||
} else {
|
|
||||||
slot_symbol1 = "\u2620"; // skull and crossbones symbol = dead?
|
|
||||||
}
|
|
||||||
if(used_blocks[i].second.command == LOAD_PROMPT) {
|
|
||||||
slot_symbol2 = "\u24C1"; // dingbat L symbol = loading
|
|
||||||
} else if(used_blocks[i].second.command == RELEASE) {
|
|
||||||
slot_symbol2 = "\u24C7"; // dingbat R release
|
|
||||||
} else if(used_blocks[i].second.command == NONE) {
|
|
||||||
slot_symbol2 = "\u24C3"; // dingbat N none
|
|
||||||
}
|
|
||||||
if(used_blocks[i].first == slot_cache_size) {
|
|
||||||
slot_symbol3 = "\u274E"; // red box white cross
|
|
||||||
} else {
|
|
||||||
slot_symbol3 = "";
|
|
||||||
}
|
|
||||||
printf(" %4d/%5d %2d %s %s %s\n", used_blocks[i].first, slot_cache_size, used_blocks[i].second.id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str());
|
|
||||||
}
|
|
||||||
printf("\n\033[%dJ", 0);
|
|
||||||
}
|
|
|
@ -1,68 +0,0 @@
|
||||||
/*
|
|
||||||
A utility to represent the kv-cache occupancy graphically
|
|
||||||
Takes as parameters
|
|
||||||
- total cache size (-c)
|
|
||||||
- number of simultaneous accesses/slots (-np)
|
|
||||||
- a parameter related to the display context (max window width - data display requirements)
|
|
||||||
It then uses a trick borrowed from tqdm to display occupancy
|
|
||||||
TODO: Show contiguous space and block availability
|
|
||||||
*/
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <vector>
|
|
||||||
#include <cstdlib> // for rand()
|
|
||||||
|
|
||||||
static void show_kvcache(
|
|
||||||
std::vector<int> used_blocks,
|
|
||||||
int cache_size,
|
|
||||||
int max_length
|
|
||||||
) {
|
|
||||||
int num_blocks = used_blocks.size();
|
|
||||||
int slot_cache_size = cache_size / num_blocks;
|
|
||||||
|
|
||||||
while(true) {
|
|
||||||
|
|
||||||
// Print visualization after erasing the current line
|
|
||||||
for(int i=0; i<num_blocks; i++) {
|
|
||||||
for(int j=0; j<max_length; j++) {
|
|
||||||
if(j<used_blocks[i] * max_length / slot_cache_size) {
|
|
||||||
std::cout << "\033[94m█\033[0m";
|
|
||||||
}
|
|
||||||
//else if ((j == int(used_blocks[i] * max_length / slot_cache_size + 0.5)) && (j > 7 * max_length / slot_cache_size + 0.5)) {
|
|
||||||
// std::cout << "\033[D\033[D\033[D\033[D" << std::setw(3) << used_blocks[i] << "\033[C";
|
|
||||||
//}
|
|
||||||
else {
|
|
||||||
std::cout << "\033[91m█\033[0m";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << " " << std::setw(5) << used_blocks[i] << "/" << std::setw(5) << slot_cache_size << std::endl;
|
|
||||||
}
|
|
||||||
std::cout << "{";
|
|
||||||
std::string upcursor = "\033[K\033[A\033[K";
|
|
||||||
|
|
||||||
for(int i=0; i < num_blocks; i++){
|
|
||||||
//std::cout << used_blocks[i] << " ";
|
|
||||||
upcursor += "\033[A\033[K";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove first element
|
|
||||||
used_blocks.erase(used_blocks.begin());
|
|
||||||
|
|
||||||
// Add new random block at the end
|
|
||||||
u_int new_block = rand() % slot_cache_size;
|
|
||||||
used_blocks.push_back(new_block);
|
|
||||||
|
|
||||||
// Adjust the cursor so that the display overwrites itself
|
|
||||||
upcursor += "\033[A\033[K";
|
|
||||||
std::cout << "}" << std::endl;
|
|
||||||
std::cin.get();
|
|
||||||
std::cout << upcursor;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
std::vector<int> used_blocks = {64, 64, 64, 64, 64, 64, 64, 64, 64, 46, 46, 46, 46, 46, 46, 46, 46, 46};
|
|
||||||
int cache_size = 65536;
|
|
||||||
int max_length = 128;
|
|
||||||
show_kvcache(used_blocks, cache_size, max_length);
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue