@@ -273,9 +277,13 @@
import { createApp, defineComponent, shallowRef, computed, h } from './deps_vue.esm-browser.js';
import { llama } from './completion.js';
+ // utility functions
const isString = (x) => !!x.toLowerCase;
const isNumeric = (n) => !isString(n) && !isNaN(n);
+ const escapeAttr = (str) => str.replace(/>/g, '>').replace(/"/g, '"');
+ const copyStr = (str) => navigator.clipboard.writeText(str);
+ // constants
const BASE_URL = localStorage.getItem('base') // for debugging
|| (new URL('.', document.baseURI).href).toString(); // for production
const CONFIG_DEFAULT = {
@@ -305,7 +313,7 @@
custom: '', // custom json-stringified object
};
const CONFIG_INFO = {
- apiKey: '',
+ apiKey: 'Set the API Key if you are using --api-key option for the server.',
systemMessage: 'The starting message that defines how model should behave.',
samplers: 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
temperature: 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
@@ -336,19 +344,28 @@
// markdown support
const VueMarkdown = defineComponent(
(props) => {
- const md = shallowRef(new markdownit(props.options ?? { breaks: true }));
- for (const plugin of props.plugins ?? []) {
- md.value.use(plugin);
- }
+ const md = shallowRef(new markdownit({ breaks: true }));
+ const origFenchRenderer = md.value.renderer.rules.fence;
+ md.value.renderer.rules.fence = (tokens, idx, ...args) => {
+ const content = tokens[idx].content;
+ const origRendered = origFenchRenderer(tokens, idx, ...args);
+ return `
+
+
+
+ ${origRendered}
+
`;
+ };
+ window.copyStr = copyStr;
const content = computed(() => md.value.render(props.source));
return () => h("div", { innerHTML: content.value });
},
- { props: ["source", "options", "plugins"] }
+ { props: ["source"] }
);
// inout field to be used by settings modal
- const SettingsModalNumericInput = defineComponent({
- template: document.getElementById('settings-modal-numeric-input').innerHTML,
+ const SettingsModalShortInput = defineComponent({
+ template: document.getElementById('settings-modal-short-input').innerHTML,
props: ['configKey', 'configDefault', 'configInfo', 'modelValue'],
});
@@ -401,7 +418,11 @@
if (!conv) return;
const msg = conv.messages.pop();
conv.lastModified = Date.now();
- localStorage.setItem(convId, JSON.stringify(conv));
+ if (conv.messages.length === 0) {
+ StorageUtils.remove(convId);
+ } else {
+ localStorage.setItem(convId, JSON.stringify(conv));
+ }
return msg;
},
@@ -442,7 +463,7 @@
const mainApp = createApp({
components: {
VueMarkdown,
- SettingsModalNumericInput,
+ SettingsModalShortInput,
},
data() {
return {
@@ -599,6 +620,7 @@
this.isGenerating = false;
this.stopGeneration = () => {};
this.fetchMessages();
+ chatScrollToBottom();
},
// message actions
@@ -612,7 +634,7 @@
this.generateMessage(currConvId);
},
copyMsg(msg) {
- navigator.clipboard.writeText(msg.content);
+ copyStr(msg.content);
},
editUserMsgAndRegenerate(msg) {
if (this.isGenerating) return;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 09ec01e13..b8e003be9 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -102,6 +102,12 @@ struct server_task_result {
bool error;
};
+struct server_static_file {
+ const unsigned char * data;
+ unsigned int size;
+ const char * mime_type;
+};
+
struct slot_params {
bool stream = true;
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
@@ -2267,6 +2273,16 @@ int main(int argc, char ** argv) {
LOG_INF("%s\n", common_params_get_system_info(params).c_str());
LOG_INF("\n");
+ // static files
+ std::map
static_files = {
+ { "/", { index_html, index_html_len, "text/html; charset=utf-8" }},
+ { "/completion.js", { completion_js, completion_js_len, "text/javascript; charset=utf-8" }},
+ { "/deps_daisyui.min.css", { deps_daisyui_min_css, deps_daisyui_min_css_len, "text/css; charset=utf-8" }},
+ { "/deps_markdown-it.js", { deps_markdown_it_js, deps_markdown_it_js_len, "text/javascript; charset=utf-8" }},
+ { "/deps_tailwindcss.js", { deps_tailwindcss_js, deps_tailwindcss_js_len, "text/javascript; charset=utf-8" }},
+ { "/deps_vue.esm-browser.js", { deps_vue_esm_browser_js, deps_vue_esm_browser_js_len, "text/javascript; charset=utf-8" }},
+ };
+
std::unique_ptr svr;
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
@@ -2347,7 +2363,7 @@ int main(int argc, char ** argv) {
// Middlewares
//
- auto middleware_validate_api_key = [¶ms, &res_error](const httplib::Request & req, httplib::Response & res) {
+ auto middleware_validate_api_key = [¶ms, &res_error, &static_files](const httplib::Request & req, httplib::Response & res) {
static const std::unordered_set public_endpoints = {
"/health",
"/models",
@@ -2359,8 +2375,8 @@ int main(int argc, char ** argv) {
return true;
}
- // If path is public, skip validation
- if (public_endpoints.find(req.path) != public_endpoints.end()) {
+ // If path is public or is static file, skip validation
+ if (public_endpoints.find(req.path) != public_endpoints.end() || static_files.find(req.path) != static_files.end()) {
return true;
}
@@ -3104,13 +3120,6 @@ int main(int argc, char ** argv) {
res.status = 200; // HTTP OK
};
- auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) {
- return [content, len, mime_type](const httplib::Request &, httplib::Response & res) {
- res.set_content(reinterpret_cast(content), len, mime_type);
- return false;
- };
- };
-
//
// Router
//
@@ -3125,12 +3134,13 @@ int main(int argc, char ** argv) {
}
} else {
// using embedded static files
- svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8"));
- svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8"));
- svr->Get("/deps_daisyui.min.css", handle_static_file(deps_daisyui_min_css, deps_daisyui_min_css_len, "text/css; charset=utf-8"));
- svr->Get("/deps_markdown-it.js", handle_static_file(deps_markdown_it_js, deps_markdown_it_js_len, "text/javascript; charset=utf-8"));
- svr->Get("/deps_tailwindcss.js", handle_static_file(deps_tailwindcss_js, deps_tailwindcss_js_len, "text/javascript; charset=utf-8"));
- svr->Get("/deps_vue.esm-browser.js", handle_static_file(deps_vue_esm_browser_js, deps_vue_esm_browser_js_len, "text/javascript; charset=utf-8"));
+ for (const auto & it : static_files) {
+ const server_static_file & static_file = it.second;
+ svr->Get(it.first.c_str(), [&static_file](const httplib::Request &, httplib::Response & res) {
+ res.set_content(reinterpret_cast(static_file.data), static_file.size, static_file.mime_type);
+ return false;
+ });
+ }
}
// register API routes
diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c
index 7fa2897c2..f0e276b69 100644
--- a/ggml/src/ggml-cpu/ggml-cpu-quants.c
+++ b/ggml/src/ggml-cpu/ggml-cpu-quants.c
@@ -150,6 +150,28 @@ static inline __m128i packNibbles( __m256i bytes )
#endif
}
#elif defined(__AVX__)
+static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 )
+{
+ // Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh
+ const __m128i lowByte = _mm_set1_epi16( 0xFF );
+ __m128i high = _mm_andnot_si128( lowByte, bytes1 );
+ __m128i low = _mm_and_si128( lowByte, bytes1 );
+ high = _mm_srli_epi16( high, 4 );
+ bytes1 = _mm_or_si128( low, high );
+ high = _mm_andnot_si128( lowByte, bytes2 );
+ low = _mm_and_si128( lowByte, bytes2 );
+ high = _mm_srli_epi16( high, 4 );
+ bytes2 = _mm_or_si128( low, high );
+
+ return _mm_packus_epi16( bytes1, bytes2);
+}
+
+static inline __m128i mul_add_epi8_sse(const __m128i x, const __m128i y) {
+ const __m128i ax = _mm_sign_epi8(x, x);
+ const __m128i sy = _mm_sign_epi8(y, x);
+ return _mm_maddubs_epi16(ax, sy);
+}
+
// spread 32 bits to 32 bytes { 0x00, 0xFF }
static inline __m256i bytes_from_bits_32(const uint8_t * x) {
uint32_t x32;
@@ -217,26 +239,29 @@ static inline __m256 mul_sum_i8_pairs_float(const __m256i x, const __m256i y) {
return sum_i16_pairs_float(doth, dotl);
}
-static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 )
-{
- // Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh
- const __m128i lowByte = _mm_set1_epi16( 0xFF );
- __m128i high = _mm_andnot_si128( lowByte, bytes1 );
- __m128i low = _mm_and_si128( lowByte, bytes1 );
- high = _mm_srli_epi16( high, 4 );
- bytes1 = _mm_or_si128( low, high );
- high = _mm_andnot_si128( lowByte, bytes2 );
- low = _mm_and_si128( lowByte, bytes2 );
- high = _mm_srli_epi16( high, 4 );
- bytes2 = _mm_or_si128( low, high );
+// larger version of mul_sum_i8_pairs_float where x and y are each represented by four 128-bit vectors
+static inline __m256 mul_sum_i8_quad_float(const __m128i x_1_0, const __m128i x_1_1, const __m128i x_2_0, const __m128i x_2_1,
+ const __m128i y_1_0, const __m128i y_1_1, const __m128i y_2_0, const __m128i y_2_1) {
+ const __m128i mone = _mm_set1_epi16(1);
- return _mm_packus_epi16( bytes1, bytes2);
+ const __m128i p16_1_0 = mul_add_epi8_sse(x_1_0, y_1_0);
+ const __m128i p16_1_1 = mul_add_epi8_sse(x_1_1, y_1_1);
+ const __m128i p16_2_0 = mul_add_epi8_sse(x_2_0, y_2_0);
+ const __m128i p16_2_1 = mul_add_epi8_sse(x_2_1, y_2_1);
+ const __m128i p_1_0 = _mm_madd_epi16(p16_1_0, mone);
+ const __m128i p_1_1 = _mm_madd_epi16(p16_1_1, mone);
+ const __m128i p_2_0 = _mm_madd_epi16(p16_2_0, mone);
+ const __m128i p_2_1 = _mm_madd_epi16(p16_2_1, mone);
+ const __m128i p_1 = _mm_add_epi32(p_1_0, p_1_1);
+ const __m128i p_2 = _mm_add_epi32(p_2_0, p_2_1);
+ return _mm256_cvtepi32_ps(MM256_SET_M128I(p_2, p_1));
}
-static inline __m128i mul_add_epi8_sse(const __m128i x, const __m128i y) {
- const __m128i ax = _mm_sign_epi8(x, x);
- const __m128i sy = _mm_sign_epi8(y, x);
- return _mm_maddubs_epi16(ax, sy);
+// quad fp16 delta calculation
+static inline __m256 quad_fp16_delta_float(const float x0, const float y0, const float x1, const float y1) {
+ // GGML_FP16_TO_FP32 is faster than Intel F16C
+ return _mm256_set_m128(_mm_set1_ps(GGML_FP16_TO_FP32(x1) * GGML_FP16_TO_FP32(y1)),
+ _mm_set1_ps(GGML_FP16_TO_FP32(x0) * GGML_FP16_TO_FP32(y0)));
}
#endif
#elif defined(__SSSE3__)
@@ -2004,10 +2029,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
sumf = hsum_float_8(acc);
#elif defined(__AVX__)
- const __m128i mone = _mm_set1_epi16(1);
-
- __m256 accum1 = _mm256_setzero_ps();
- __m256 accum2 = _mm256_setzero_ps();
+ __m256 accum = _mm256_setzero_ps();
for (; ib + 1 < nb; ib += 2) {
const __m128i q4bits_1 = _mm_loadu_si128((const __m128i *)x[ib + 0].qs);
const __m128i q4bits_2 = _mm_loadu_si128((const __m128i *)x[ib + 1].qs);
@@ -2020,21 +2042,20 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
const __m128i q4b_1_1 = _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(q4bits_1, 4)), _mm_set1_epi8(8));
const __m128i q4b_2_0 = _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), q4bits_2), _mm_set1_epi8(8));
const __m128i q4b_2_1 = _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(q4bits_2, 4)), _mm_set1_epi8(8));
+
const __m128i p16_1_0 = mul_add_epi8_sse(q4b_1_0, q8b_1_0);
const __m128i p16_1_1 = mul_add_epi8_sse(q4b_1_1, q8b_1_1);
const __m128i p16_2_0 = mul_add_epi8_sse(q4b_2_0, q8b_2_0);
const __m128i p16_2_1 = mul_add_epi8_sse(q4b_2_1, q8b_2_1);
- const __m128i p_1_0 = _mm_madd_epi16(p16_1_0, mone);
- const __m128i p_1_1 = _mm_madd_epi16(p16_1_1, mone);
- const __m128i p_2_0 = _mm_madd_epi16(p16_2_0, mone);
- const __m128i p_2_1 = _mm_madd_epi16(p16_2_1, mone);
- accum1 = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(y[ib + 0].d)*GGML_FP16_TO_FP32(x[ib + 0].d)),
- _mm256_cvtepi32_ps(MM256_SET_M128I(p_1_1, p_1_0))), accum1);
- accum2 = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(y[ib + 1].d)*GGML_FP16_TO_FP32(x[ib + 1].d)),
- _mm256_cvtepi32_ps(MM256_SET_M128I(p_2_1, p_2_0))), accum2);
+ const __m128i p_1 = _mm_add_epi16(p16_1_0, p16_1_1);
+ const __m128i p_2 = _mm_add_epi16(p16_2_0, p16_2_1);
+ const __m256 p = sum_i16_pairs_float(p_2, p_1);
+
+ const __m256 deltas = quad_fp16_delta_float(x[ib].d, y[ib].d, x[ib + 1].d, y[ib + 1].d);
+ accum = _mm256_add_ps(_mm256_mul_ps(deltas, p), accum);
}
- sumf = hsum_float_8(_mm256_add_ps(accum1, accum2));
+ sumf = hsum_float_8(accum);
#elif defined(__SSSE3__)
// set constants
const __m128i lowMask = _mm_set1_epi8(0xF);
@@ -3535,7 +3556,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
}
sumf = vaddvq_f32(sumv0) + vaddvq_f32(sumv1);
-#elif defined(__AVX2__) || defined(__AVX__)
+#elif defined(__AVX2__)
// Initialize accumulator with zeros
__m256 acc = _mm256_setzero_ps();
@@ -3549,14 +3570,29 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
const __m256 q = mul_sum_i8_pairs_float(qx, qy);
// Multiply q with scale and accumulate
-#if defined(__AVX2__)
acc = _mm256_fmadd_ps( d, q, acc );
-#else
- acc = _mm256_add_ps( _mm256_mul_ps( d, q ), acc );
-#endif
}
sumf = hsum_float_8(acc);
+#elif defined(__AVX__)
+ __m256 accum = _mm256_setzero_ps();
+
+ for (; ib + 1 < nb; ib += 2) {
+ const __m128i qx_1_0 = _mm_loadu_si128((const __m128i *)x[ib].qs);
+ const __m128i qx_1_1 = _mm_loadu_si128((const __m128i *)x[ib].qs + 1);
+ const __m128i qx_2_0 = _mm_loadu_si128((const __m128i *)x[ib + 1].qs);
+ const __m128i qx_2_1 = _mm_loadu_si128((const __m128i *)x[ib + 1].qs + 1);
+ const __m128i qy_1_0 = _mm_loadu_si128((const __m128i *)y[ib].qs);
+ const __m128i qy_1_1 = _mm_loadu_si128((const __m128i *)y[ib].qs + 1);
+ const __m128i qy_2_0 = _mm_loadu_si128((const __m128i *)y[ib + 1].qs);
+ const __m128i qy_2_1 = _mm_loadu_si128((const __m128i *)y[ib + 1].qs + 1);
+
+ const __m256 p = mul_sum_i8_quad_float(qx_1_0, qx_1_1, qx_2_0, qx_2_1, qy_1_0, qy_1_1, qy_2_0, qy_2_1);
+ const __m256 deltas = quad_fp16_delta_float(x[ib].d, y[ib].d, x[ib + 1].d, y[ib + 1].d);
+ accum = _mm256_add_ps(_mm256_mul_ps(deltas, p), accum);
+ }
+
+ sumf = hsum_float_8(accum);
#elif defined(__riscv_v_intrinsic)
size_t vl = __riscv_vsetvl_e8m1(qk);
@@ -10322,10 +10358,8 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
#elif defined __AVX__
const __m128i values128 = _mm_loadu_si128((const __m128i*)kvalues_iq4nl);
const __m128i m4b = _mm_set1_epi8(0x0f);
- const __m128i mone = _mm_set1_epi16(1);
- __m256 accum1 = _mm256_setzero_ps();
- __m256 accum2 = _mm256_setzero_ps();
+ __m256 accum = _mm256_setzero_ps();
for (; ib + 1 < nb; ib += 2) {
const __m128i q4bits_1 = _mm_loadu_si128((const __m128i *)x[ib + 0].qs);
const __m128i q4bits_2 = _mm_loadu_si128((const __m128i *)x[ib + 1].qs);
@@ -10338,21 +10372,13 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
const __m128i q4b_1_1 = _mm_shuffle_epi8(values128, _mm_and_si128(_mm_srli_epi16(q4bits_1, 4), m4b));
const __m128i q4b_2_0 = _mm_shuffle_epi8(values128, _mm_and_si128(q4bits_2, m4b));
const __m128i q4b_2_1 = _mm_shuffle_epi8(values128, _mm_and_si128(_mm_srli_epi16(q4bits_2, 4), m4b));
- const __m128i p16_1_0 = mul_add_epi8_sse(q4b_1_0, q8b_1_0);
- const __m128i p16_1_1 = mul_add_epi8_sse(q4b_1_1, q8b_1_1);
- const __m128i p16_2_0 = mul_add_epi8_sse(q4b_2_0, q8b_2_0);
- const __m128i p16_2_1 = mul_add_epi8_sse(q4b_2_1, q8b_2_1);
- const __m128i p_1_0 = _mm_madd_epi16(p16_1_0, mone);
- const __m128i p_1_1 = _mm_madd_epi16(p16_1_1, mone);
- const __m128i p_2_0 = _mm_madd_epi16(p16_2_0, mone);
- const __m128i p_2_1 = _mm_madd_epi16(p16_2_1, mone);
- accum1 = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(y[ib + 0].d)*GGML_FP16_TO_FP32(x[ib + 0].d)),
- _mm256_cvtepi32_ps(MM256_SET_M128I(p_1_1, p_1_0))), accum1);
- accum2 = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(y[ib + 1].d)*GGML_FP16_TO_FP32(x[ib + 1].d)),
- _mm256_cvtepi32_ps(MM256_SET_M128I(p_2_1, p_2_0))), accum2);
+
+ const __m256 p = mul_sum_i8_quad_float(q4b_1_0, q4b_1_1, q4b_2_0, q4b_2_1, q8b_1_0, q8b_1_1, q8b_2_0, q8b_2_1);
+ const __m256 deltas = quad_fp16_delta_float(x[ib].d, y[ib].d, x[ib + 1].d, y[ib + 1].d);
+ accum = _mm256_add_ps(_mm256_mul_ps(deltas, p), accum);
}
- sumf = hsum_float_8(_mm256_add_ps(accum1, accum2));
+ sumf = hsum_float_8(accum);
#elif defined(__POWER9_VECTOR__)
const vector signed char lowMask = vec_splats((signed char)0xF);
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
index 30b1bf895..61f53cd01 100644
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -1469,8 +1469,12 @@ static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t
sumf += (ggml_float)_mm512_reduce_add_ps(c2);
#undef LOAD
-#elif defined(__AVX2__)
+#elif defined(__AVX2__) || defined(__AVX__)
+#if defined(__AVX2__)
#define LOAD(p) _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_cvtepu16_epi32(_mm_loadu_si128((const __m128i *)(p))), 16))
+#else
+#define LOAD(p) _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi32(_mm_cvtepu16_epi32(_mm_loadu_si128((const __m128i *)(p))), 16)), (_mm_slli_epi32(_mm_cvtepu16_epi32(_mm_bsrli_si128(_mm_loadu_si128((const __m128i *)(p)), 8)), 16)), 1))
+#endif
__m256 c1 = _mm256_setzero_ps();
__m256 c2 = _mm256_setzero_ps();
__m256 c3 = _mm256_setzero_ps();
diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh
index 06a04745b..74d6c6c8b 100755
--- a/scripts/sync-ggml-am.sh
+++ b/scripts/sync-ggml-am.sh
@@ -144,17 +144,17 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
-e 's/([[:space:]]|[ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \
-e 's/([[:space:]]|[ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \
-e 's/([[:space:]]|[ab]\/)cmake\/FindSIMD.cmake/\1ggml\/cmake\/FindSIMD.cmake/g' \
- -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.c/\1ggml\/src\/ggml\1.c/g' \
- -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cpp/\1ggml\/src\/ggml\1.cpp/g' \
- -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.h/\1ggml\/src\/ggml\1.h/g' \
- -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cu/\1ggml\/src\/ggml\1.cu/g' \
- -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.m/\1ggml\/src\/ggml\1.m/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.c/\1ggml\/src\/ggml\2.c/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cpp/\1ggml\/src\/ggml\2.cpp/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.h/\1ggml\/src\/ggml\2.h/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cu/\1ggml\/src\/ggml\2.cu/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.m/\1ggml\/src\/ggml\2.m/g' \
-e 's/([[:space:]]|[ab]\/)src\/ggml-amx\//\1ggml\/src\/ggml-amx\//g' \
-e 's/([[:space:]]|[ab]\/)src\/ggml-cann\//\1ggml\/src\/ggml-cann\//g' \
-e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \
-e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \
-e 's/([[:space:]]|[ab]\/)src\/vulkan-shaders\//\1ggml\/src\/vulkan-shaders\//g' \
- -e 's/([[:space:]]|[ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\1.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\2.h/g' \
-e 's/([[:space:]]|[ab]\/)examples\/common\.h/\1examples\/common.h/g' \
-e 's/([[:space:]]|[ab]\/)examples\/common\.cpp/\1examples\/common.cpp/g' \
-e 's/([[:space:]]|[ab]\/)examples\/common-ggml\.h/\1examples\/common-ggml.h/g' \