diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml index f63860d14..276a217d4 100644 --- a/.github/workflows/close-issue.yml +++ b/.github/workflows/close-issue.yml @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/stale@v5 with: - exempt-issue-labels: "refactor,help wanted,good first issue,research,bug" + exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap" days-before-issue-stale: 30 days-before-issue-close: 14 stale-issue-label: "stale" diff --git a/AUTHORS b/AUTHORS index 2eb60806a..6796b2941 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,4 @@ -# date: Thu Nov 28 20:46:15 EET 2024 +# date: Tue Feb 4 13:04:05 EET 2025 # this file is auto-generated by scripts/gen-authors.sh 0cc4m @@ -20,6 +20,8 @@ Adithya Balaji AdithyanI Adrian Adrian Hesketh +Adrien Gallouët +Adrien Gallouët Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com> Ahmet Zeer AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> @@ -55,6 +57,7 @@ Ananta Bastola Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> András Salamon Andreas (Andi) Kunar +Andreas Kieslinger <47689530+aendk@users.noreply.github.com> Andrei Andrew Canis Andrew Downing @@ -91,13 +94,17 @@ Ben Siraphob Ben Williams Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com> Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com> +Benson Wong Bernat Vadell +Bernhard M. Wiedemann Bert Wagner +Billel Mokeddem Bingan <70050083+binganao@users.noreply.github.com> Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com> Bodo Graumann Bono Lv Borislav Stanimirov +Borislav Stanimirov Branden Butler Brandon Squizzato <35474886+bsquizz@users.noreply.github.com> Brian @@ -117,6 +124,7 @@ Casey Primozic Casey Primozic CausalLM <148736309+CausalLM@users.noreply.github.com> Cebtenzzre +CentricStorm Chad Brewbaker Changyeon Kim Chao Jiang @@ -131,12 +139,15 @@ Chris Kuehl Christian Demsar Christian Demsar Christian Falch <875252+chrfalch@users.noreply.github.com> +Christian Kastner Christian Kögler Christian Köhnenkamp Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com> +Christopher Nielsen <62156882+mascguy@users.noreply.github.com> Clark Saben <76020733+csaben@users.noreply.github.com> Clint Herron Conrad Kramer +Corentin REGAL CrispStrobe <154636388+CrispStrobe@users.noreply.github.com> Csaba Kecskemeti Cuong Trinh Manh @@ -176,6 +187,7 @@ Dibakar Gope Didzis Gosko Diego Devesa Diogo Teles Sant'Anna +Djip007 <3705339+Djip007@users.noreply.github.com> Djip007 Don Mahurin DooWoong Lee (David) @@ -193,6 +205,7 @@ Edward Taylor Elaine Elbios <141279586+Elbios@users.noreply.github.com> Elton Kola +Emreerdog <34742675+Emreerdog@users.noreply.github.com> Engininja2 <139037756+Engininja2@users.noreply.github.com> Equim Eric Curtin @@ -233,6 +246,7 @@ Fred Douglas <43351173+fredlas@users.noreply.github.com> Frederik Vogel Gabe Goodhart Gabe Goodhart +Gaetan Bisson GainLee Galunid Gary Linscott @@ -249,6 +263,7 @@ Guillaume "Vermeille" Sanchez Guillaume Wenzek Guoliang Hua <32868157+nbcsm@users.noreply.github.com> Guoteng <32697156+SolenoidWGT@users.noreply.github.com> +Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com> Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com> Haggai Nuchi Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> @@ -259,11 +274,13 @@ Haoxiang Fei Harald Fernengel Hatsune Miku <129688334+at8u@users.noreply.github.com> HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com> +Haus1 Henk Poley Henri Vasserman Henrik Forstén Herman Semenov Hesen Peng +HimariO Hoang Nguyen Hong Bo PENG Hongyu Ouyang <96765450+casavaca@users.noreply.github.com> @@ -280,6 +297,7 @@ Icecream95 Ido S IgnacioFDM Igor Okulist +Ihar Hrachyshka Ikko Eltociear Ashimine Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com> Ionoclast Laboratories @@ -289,12 +307,14 @@ Ivan Ivan Filipov <159561759+vanaka11@users.noreply.github.com> Ivan Komarov Ivan Stepanov +JFLFY2255 JH23X <165871467+JH23X@users.noreply.github.com> Jack Mousseau Jack Mousseau JackJollimore <130917767+JackJollimore@users.noreply.github.com> Jaeden Amero Jaemin Son +Jafar Uruç Jag Chadha Jakub N James A Capozzoli <157492257+jac-jim@users.noreply.github.com> @@ -315,6 +335,7 @@ Jeffrey Morgan Jeffrey Quesnelle Jeroen Mostert Jesse Jojo Johnson +Jett Janiak Jeximo Jhen-Jie Hong Jiahao Li @@ -343,6 +364,7 @@ Josh Ramer Joyce Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Judd +Juk Armstrong <69222624+jukofyork@users.noreply.github.com> Julius Arkenberg Jun Hee Yoo Jun Jie <71215065+junnjiee16@users.noreply.github.com> @@ -357,6 +379,7 @@ Justine Tunney Juuso Alasuutari KASR Kamil Tomšík +Karol Kontny <82021046+kkontny@users.noreply.github.com> Karsten Weiss Karthick Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com> @@ -376,6 +399,7 @@ Kolen Cheung Konstantin Herud Konstantin Zhuravlyov Kunshang Ji +Kyle Bruene Kyle Liang Kyle Mistele Kylin <56434533+KyL0N@users.noreply.github.com> @@ -394,6 +418,7 @@ Liu Jia LoganDark Loïc Carrère LostRuins <39025047+LostRuins@users.noreply.github.com> +LostRuins Concedo <39025047+LostRuins@users.noreply.github.com> Luciano Luo Tian Lyle Dean @@ -423,6 +448,7 @@ MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com> Mateusz Charytoniuk Matheus C. França Matheus Gabriel Alves Silva +Mathieu Baudier Mathieu Geli Mathieu Nayrolles Mathijs Henquet @@ -444,6 +470,7 @@ Meng, Hengyu Mengqing Cao Merrick Christensen Michael Coppola +Michael Engel Michael Francis Michael Hueschen Michael Kesper @@ -452,7 +479,9 @@ Michael Podvitskiy Michael Potter Michael de Gans Michaël de Vries +Michał Moskal Michał Tuszyński +Michelle Tan <41475767+MichelleTanPY@users.noreply.github.com> Mihai Mike Mikko Juola @@ -477,6 +506,7 @@ Neo Zhang <14088817+arthw@users.noreply.github.com> Neo Zhang Neo Zhang Jianyu Neuman Vong +NeverLucky <92274250+nvrxq@users.noreply.github.com> Nexes the Old <124105151+Nexesenex@users.noreply.github.com> Nexesenex <124105151+Nexesenex@users.noreply.github.com> Niall Coates <1349685+Niall-@users.noreply.github.com> @@ -484,11 +514,15 @@ Nicholai Tukanov Nico Bosshard Nicolai Weitkemper Nicolás Pérez +Nicolò Scipione Nigel Bosch +Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Niklas Korz NikolaiLyssogor <59844691+NikolaiLyssogor@users.noreply.github.com> +Nikolaos Pothitos Nikolas <127742645+nneubacher@users.noreply.github.com> Nindaleth +Nuno OSecret <135510162+OLSecret@users.noreply.github.com> Oleksandr Nikitin Oleksii Maryshchenko @@ -504,6 +538,7 @@ Pavel Zloi Pavol Rusnak Paweł Wodnicki <151604+32bitmicro@users.noreply.github.com> Pedro Cuenca +Peter Peter Sugihara Phil H <5756783+phiharri@users.noreply.github.com> Philip Taron @@ -529,9 +564,12 @@ Rand Xie Randall Fitzgerald Random Fly Reinforce-II +Rémy Oudompheng Ren Xuancheng Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> +Reza Kakhki RhinoDevel +Riccardo Orlando Riceball LEE Rich Dougherty Richard Kiss @@ -544,6 +582,8 @@ Riley Stewart Rinne Rinne Robert Brisita <986796+rbrisita@users.noreply.github.com> +Robert Collins +Robert Ormandi <52251610+ormandi@users.noreply.github.com> Robert Sung-wook Shin Robey Holderith Robyn @@ -559,7 +599,9 @@ Roni Ronny Brendel Ronsor Rowan Hart +Ruan <47767371+ruanych@users.noreply.github.com> Ruchira Hasaranga +Rudi Servo Ruixin Huang <18860020911@163.com> Rune <43761327+Rune-AI@users.noreply.github.com> RunningLeon @@ -623,12 +665,14 @@ Steven Roussey Steward Garcia <57494570+FSSRepo@users.noreply.github.com> StrangeBytesDev <141275258+StrangeBytesDev@users.noreply.github.com> Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> +Sukriti Sharma SuperUserNameMan Sutou Kouhei Tai Duc Nguyen Taikono-Himazin Tameem <113388789+AhmadTameem@users.noreply.github.com> Tamotsu Takahashi +Tei Home Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com> Thatcher Chamberlin Theia Vogel @@ -640,6 +684,7 @@ Tim Miller Tim Wang Timmy Knight Timothy Cronin <40186632+4imothy@users.noreply.github.com> +Ting Lou Ting Lou Ting Sun Tobias Lütke @@ -661,6 +706,7 @@ Uzo Nweke Vaibhav Srivastav Val Kharitonov Valentin Konovalov +Valentin Mamedov <45292985+Inf1delis@users.noreply.github.com> Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> Vali Malinoiu <0x4139@gmail.com> Victor Nogueira @@ -673,13 +719,17 @@ Vladimir Malyutin Vladimir Zorin VoidIsVoid <343750470@qq.com> Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com> +Wang Qin <37098874+wangqin0@users.noreply.github.com> +Wang Ran (汪然) WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com> Weird Constructor Welby Seely Wentai Zhang WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> William Tambellini +William Tambellini Willy Tarreau +Woof Dog <197125663+woof-dog@users.noreply.github.com> Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com> Wu Jian Ping Wu Jian Ping @@ -692,6 +742,7 @@ Xie Yanbo Xingchen Song(宋星辰) Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com> Xuan Son Nguyen +Xuan-Son Nguyen Yaiko Yann Follet <131855179+YannFollet@users.noreply.github.com> Yaroslav @@ -702,7 +753,9 @@ Yoshi Suhara Yoshi Suhara Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com> +Yüg Yui +Yun Dou Yuri Khrustalev Yusuf Kağan Hanoğlu Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com> @@ -714,18 +767,23 @@ Zhang Peiyuan Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com> Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com> Zhiyuan Li +Zhiyuan Li ZhouYuChen Ziad Ben Hadj-Alouane Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> Zsapi a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com> +a3sh <38979186+A3shTnT@users.noreply.github.com> adel boussaken afrideva <95653597+afrideva@users.noreply.github.com> +ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com> agray3 akawrykow <142945436+akawrykow@users.noreply.github.com> +alek3y <44779186+alek3y@users.noreply.github.com> alexpinel <93524949+alexpinel@users.noreply.github.com> alonfaraj alwqx +amd-dwang amd-lalithnc amritahs-ibm andrijdavid @@ -737,6 +795,7 @@ arch-btw <57669023+arch-btw@users.noreply.github.com> arcrank ardfork <134447697+ardfork@users.noreply.github.com> arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com> +aryantandon01 <80969509+aryantandon01@users.noreply.github.com> at8u <129688334+at8u@users.noreply.github.com> automaticcat awatuna <23447591+awatuna@users.noreply.github.com> @@ -751,12 +810,14 @@ bryanSwk <93190252+bryanSwk@users.noreply.github.com> bsilvereagle bssrdf byte-6174 <88070277+byte-6174@users.noreply.github.com> +cduk <19917266+cduk@users.noreply.github.com> cebtenzzre chaihahaha chiranko <96988916+chiranko@users.noreply.github.com> clibdev <52199778+clibdev@users.noreply.github.com> clyang cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com> +codezjx coezbek comex compilade <113953597+compilade@users.noreply.github.com> @@ -780,14 +841,17 @@ drbh ds5t5 <145942675+ds5t5@users.noreply.github.com> dylan eastriver +ebraminio ebraminio eiery <19350831+eiery@users.noreply.github.com> eric8607242 fairydreaming <166155368+fairydreaming@users.noreply.github.com> fengerhu1 <2748250768@qq.com> +fj-y-saito <85871716+fj-y-saito@users.noreply.github.com> fraxy-v <65565042+fraxy-v@users.noreply.github.com> github-actions[bot] gliptic +gn64 goerch grahameth <96447521+grahameth@users.noreply.github.com> gtygo @@ -812,10 +876,12 @@ icppWorld <124377669+icppWorld@users.noreply.github.com> igarnier intelmatt <61025942+intelmatt@users.noreply.github.com> iohub +issixx <46835150+issixx@users.noreply.github.com> jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com> jaime-m-p <167997752+jaime-m-p@users.noreply.github.com> jameswu2014 <545426914@qq.com> jdomke <28772296+jdomke@users.noreply.github.com> +jiahao su jiez <373447296@qq.com> jneem joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> @@ -828,6 +894,7 @@ junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> jwj7140 <32943891+jwj7140@users.noreply.github.com> k.h.lai kaizau +kallewoof kalomaze <66376113+kalomaze@users.noreply.github.com> kang katsu560 <118887472+katsu560@users.noreply.github.com> @@ -835,6 +902,7 @@ kchro3 <62481661+kchro3@users.noreply.github.com> khimaros kiltyj klosax <131523366+klosax@users.noreply.github.com> +krystiancha kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> kunnis kuronekosaiko @@ -847,6 +915,8 @@ ldwang le.chang leejet leo-pony +lexasub +lhez limitedAtonement liuwei-git <14815172+liuwei-git@users.noreply.github.com> lon <114724657+longregen@users.noreply.github.com> @@ -855,10 +925,13 @@ ltoniazzi <61414566+ltoniazzi@users.noreply.github.com> luoyu-intel m3ndax maddes8cht <55592906+maddes8cht@users.noreply.github.com> +mahorozte <41834471+mahorozte@users.noreply.github.com> makomk manikbhandari maor-ps <154728172+maor-ps@users.noreply.github.com> +mashdragon <122402293+mashdragon@users.noreply.github.com> matiaslin <45382001+matiaslin@users.noreply.github.com> +matt23654 matteo mdrokz mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> @@ -868,6 +941,7 @@ mmyjona momonga <115213907+mmnga@users.noreply.github.com> momonga <146910567+mmngays@users.noreply.github.com> moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> +musoles <135031143+musoles@users.noreply.github.com> mzcu nanahi <130121847+na-na-hi@users.noreply.github.com> ngc92 <7938269+ngc92@users.noreply.github.com> @@ -885,6 +959,7 @@ oobabooga <112222186+oobabooga@users.noreply.github.com> opparco ostix360 <55257054+ostix360@users.noreply.github.com> pculliton +peidaqi pengxin99 perserk piDack <104877312+piDack@users.noreply.github.com> @@ -892,10 +967,12 @@ pmysl postmasters pudepiedj qingfengfenga <41416092+qingfengfenga@users.noreply.github.com> +qingy1337 qouoq qunash rabidcopy rankaiyx +redbeard rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com> rhuddleston rimoliga <53384203+rimoliga@users.noreply.github.com> @@ -912,6 +989,7 @@ sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com> slaren <2141330+slaren@users.noreply.github.com> slaren snadampal <87143774+snadampal@users.noreply.github.com> +someone13574 <81528246+someone13574@users.noreply.github.com> standby24x7 staviq stduhpf @@ -931,6 +1009,7 @@ uint256_t uint256_t unbounded uvos +uvos valiray <133289098+valiray@users.noreply.github.com> vb vik @@ -951,6 +1030,7 @@ xaedes xctan xloem <0xloem@gmail.com> yangli2 +ymcki <84055651+ymcki@users.noreply.github.com> yuiseki yuri@FreeBSD zakkor @@ -963,4 +1043,5 @@ zrm 杨朱 · Kiki 源文雨 <41315874+fumiama@users.noreply.github.com> 蕭澧邦 <45505768+shou692199@users.noreply.github.com> +谢乃闻 Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com> diff --git a/README.md b/README.md index 7f306d199..d68330d2a 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs) - Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp) - Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs) +- Rust (automated build from crates.io): [ShelbyJenkins/llm_client](https://github.com/ShelbyJenkins/llm_client) - C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp) - C#/VB.NET (more features - community license): [LM-Kit.NET](https://docs.lm-kit.com/lm-kit-net/index.html) - Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s) diff --git a/common/chat.cpp b/common/chat.cpp index 77cae245b..a72b1a899 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -405,7 +405,7 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_ return data; } static common_chat_msg common_chat_parse_command_r7b(const std::string & input) { - static std::regex response_regex("<\\|START_RESPONSE\\|>(.*?)<\\|END_RESPONSE\\|>"); + static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>"); static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>"); std::smatch match; diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift index 371917b2e..55c31166c 100644 --- a/examples/batched.swift/Sources/main.swift +++ b/examples/batched.swift/Sources/main.swift @@ -31,6 +31,11 @@ defer { llama_model_free(model) } +guard let vocab = llama_model_get_vocab(model) else { + print("Failed to get vocab") + exit(1) +} + var tokens = tokenize(text: prompt, add_bos: true) let n_kv_req = UInt32(tokens.count) + UInt32((n_len - Int(tokens.count)) * n_parallel) @@ -41,7 +46,7 @@ context_params.n_batch = UInt32(max(n_len, n_parallel)) context_params.n_threads = 8 context_params.n_threads_batch = 8 -let context = llama_new_context_with_model(model, context_params) +let context = llama_init_from_model(model, context_params) guard context != nil else { print("Failed to initialize context") exit(1) @@ -141,7 +146,7 @@ while n_cur <= n_len { let new_token_id = llama_sampler_sample(smpl, context, i_batch[i]) // is it an end of stream? -> mark the stream as finished - if llama_vocab_is_eog(model, new_token_id) || n_cur == n_len { + if llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_len { i_batch[i] = -1 // print("") if n_parallel > 1 { @@ -207,7 +212,7 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] { let utf8Count = text.utf8.count let n_tokens = utf8Count + (add_bos ? 1 : 0) let tokens = UnsafeMutablePointer.allocate(capacity: n_tokens) - let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false) + let tokenCount = llama_tokenize(vocab, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false) var swiftTokens: [llama_token] = [] for i in 0 ..< tokenCount { swiftTokens.append(tokens[Int(i)]) @@ -218,12 +223,12 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] { private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String? { var result = [CChar](repeating: 0, count: 8) - let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count), 0, false) + let nTokens = llama_token_to_piece(vocab, token, &result, Int32(result.count), 0, false) if nTokens < 0 { let actualTokensCount = -Int(nTokens) result = .init(repeating: 0, count: actualTokensCount) let check = llama_token_to_piece( - model, + vocab, token, &result, Int32(result.count), diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift index 477c3e6f2..ee7141a66 100644 --- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -24,6 +24,7 @@ func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama actor LlamaContext { private var model: OpaquePointer private var context: OpaquePointer + private var vocab: OpaquePointer private var sampling: UnsafeMutablePointer private var batch: llama_batch private var tokens_list: [llama_token] @@ -47,6 +48,7 @@ actor LlamaContext { self.sampling = llama_sampler_chain_init(sparams) llama_sampler_chain_add(self.sampling, llama_sampler_init_temp(0.4)) llama_sampler_chain_add(self.sampling, llama_sampler_init_dist(1234)) + vocab = llama_model_get_vocab(model) } deinit { @@ -79,7 +81,7 @@ actor LlamaContext { ctx_params.n_threads = Int32(n_threads) ctx_params.n_threads_batch = Int32(n_threads) - let context = llama_new_context_with_model(model, ctx_params) + let context = llama_init_from_model(model, ctx_params) guard let context else { print("Could not load context!") throw LlamaError.couldNotInitializeContext @@ -151,7 +153,7 @@ actor LlamaContext { new_token_id = llama_sampler_sample(sampling, context, batch.n_tokens - 1) - if llama_vocab_is_eog(model, new_token_id) || n_cur == n_len { + if llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_len { print("\n") is_done = true let new_token_str = String(cString: temporary_invalid_cchars + [0]) @@ -297,7 +299,7 @@ actor LlamaContext { let utf8Count = text.utf8.count let n_tokens = utf8Count + (add_bos ? 1 : 0) + 1 let tokens = UnsafeMutablePointer.allocate(capacity: n_tokens) - let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false) + let tokenCount = llama_tokenize(vocab, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false) var swiftTokens: [llama_token] = [] for i in 0...allocate(capacity: Int(-nTokens)) @@ -324,7 +326,7 @@ actor LlamaContext { defer { newResult.deallocate() } - let nNewTokens = llama_token_to_piece(model, token, newResult, -nTokens, 0, false) + let nNewTokens = llama_token_to_piece(vocab, token, newResult, -nTokens, 0, false) let bufferPointer = UnsafeBufferPointer(start: newResult, count: Int(nNewTokens)) return Array(bufferPointer) } else { diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py index 87a4a27e0..df10f9a42 100644 --- a/examples/server/tests/unit/test_tool_call.py +++ b/examples/server/tests/unit/test_tool_call.py @@ -275,7 +275,8 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t @pytest.mark.slow @pytest.mark.parametrize("hf_repo,template_override", [ - ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), + ("bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M", ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")), + ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"), @@ -299,13 +300,15 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it. ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), ]) -def test_weather_tool_call(hf_repo: str, template_override: str | Tuple[str, str | None] | None): +def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None): global server n_predict = 512 server.n_slots = 1 diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 7c069e420..75b5ea3b4 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -274,22 +274,25 @@ endif() # Generate version info based on git commit. -find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH) -execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE GGML_BUILD_NUMBER - OUTPUT_STRIP_TRAILING_WHITESPACE -) +if(NOT DEFINED GGML_BUILD_NUMBER) + find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH) + execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE GGML_BUILD_NUMBER + OUTPUT_STRIP_TRAILING_WHITESPACE + ) -if(GGML_BUILD_NUMBER EQUAL 1) - message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.") + if(GGML_BUILD_NUMBER EQUAL 1) + message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.") + endif() + + execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE GGML_BUILD_COMMIT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) endif() -execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE GGML_BUILD_COMMIT - OUTPUT_STRIP_TRAILING_WHITESPACE -) # Capture variables prefixed with GGML_. diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index 76f8e4291..9605914ff 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -20,7 +20,10 @@ #define GGML_METAL_MAX_COMMAND_BUFFERS 8 // create residency sets only on macOS >= 15.0 -#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 +#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || \ + TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000 || \ + TARGET_OS_TV && __TV_OS_VERSION_MAX_ALLOWED >= 180000 || \ + TARGET_OS_VISION && __VISION_OS_VERSION_MAX_ALLOWED >= 200000 #define GGML_METAL_HAS_RESIDENCY_SETS 1 #endif @@ -1071,7 +1074,7 @@ static bool ggml_backend_metal_buffer_rset_init( } #if defined(GGML_METAL_HAS_RESIDENCY_SETS) - if (@available(macOS 15.0, *)) { + if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { MTLResidencySetDescriptor * desc = [[MTLResidencySetDescriptor alloc] init]; desc.label = @"ggml_backend_metal"; desc.initialCapacity = ctx->n_buffers; @@ -1106,7 +1109,7 @@ static bool ggml_backend_metal_buffer_rset_init( // rset free static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer_context * ctx) { #if defined(GGML_METAL_HAS_RESIDENCY_SETS) - if (@available(macOS 15.0, *)) { + if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { if (ctx->rset) { [ctx->rset endResidency]; [ctx->rset removeAllAllocations]; diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 34f1cbf69..26a105f64 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -498e0ecd2c4f9379439fd413805af10e8e9ff349 +694244a6e40dc255f6bb4376fb17431c06633e6c diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 7827ad0e4..1494c2443 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -291,11 +291,11 @@ static void test_template(const common_chat_template & tmpl, const std::vector"); test_template(tmpl, end_tokens, text_message, tools, - "<|START_RESPONSE|>Hello, world!<|END_RESPONSE|>", + "<|START_RESPONSE|>Hello, world!\n" + "What's up?<|END_RESPONSE|>", /* expect_grammar_triggered= */ false); } { @@ -449,7 +450,7 @@ static void test_template_output_parsers() { assert_msg_equals(msg_from_json(text_message), common_chat_parse("{\n" - " \"response\": \"Hello, world!\"\n" + " \"response\": \"Hello, world!\\nWhat's up?\"\n" "}", common_chat_params_init(tmpl, inputs_tools).format)); test_template(tmpl, end_tokens, tool_call_message_with_id, tools, @@ -472,7 +473,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); test_template( tmpl, end_tokens, tool_call_message_with_id, tools, "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]"); @@ -497,7 +498,7 @@ static void test_template_output_parsers() { inputs_tools) .format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "\n" "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" @@ -537,7 +538,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); } @@ -549,7 +550,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "{\"arg1\": 1}"); } @@ -563,7 +564,8 @@ static void test_template_output_parsers() { test_template(tmpl, end_tokens, text_message, {}, "all\n" - "Hello, world!", + "Hello, world!\n" + "What's up?", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "special_function\n" @@ -576,7 +578,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); } @@ -588,9 +590,9 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); - test_template(tmpl, end_tokens, text_reasoning_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); - assert_msg_equals(msg_from_json(text_reasoning_message), common_chat_parse("I'm thinkingHello, world!", COMMON_CHAT_FORMAT_DEEPSEEK_R1)); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + test_template(tmpl, end_tokens, text_reasoning_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + assert_msg_equals(msg_from_json(text_reasoning_message), common_chat_parse("I'm thinkingHello, world!\nWhat's up?", COMMON_CHAT_FORMAT_DEEPSEEK_R1)); // test_template(tmpl, end_tokens, tool_call_message, tools, // "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" // "```json\n" @@ -608,9 +610,9 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); - test_template(tmpl, end_tokens, text_reasoning_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); - assert_msg_equals(msg_from_json(text_reasoning_message), common_chat_parse("I'm thinkingHello, world!", COMMON_CHAT_FORMAT_DEEPSEEK_R1)); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + test_template(tmpl, end_tokens, text_reasoning_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + assert_msg_equals(msg_from_json(text_reasoning_message), common_chat_parse("I'm thinkingHello, world!\nWhat's up?", COMMON_CHAT_FORMAT_DEEPSEEK_R1)); assert_msg_equals(msg_from_json(tool_call_reasoning_message), common_chat_parse(