Added httplib support
This commit is contained in:
parent
e216aa0463
commit
f01c6cbc7e
7 changed files with 10606 additions and 0 deletions
|
@ -36,4 +36,5 @@ else()
|
|||
add_subdirectory(embedding)
|
||||
add_subdirectory(save-load-state)
|
||||
add_subdirectory(benchmark)
|
||||
add_subdirectory(server)
|
||||
endif()
|
||||
|
|
5
examples/server/CMakeLists.txt
Normal file
5
examples/server/CMakeLists.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
set(TARGET server)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp json11.cpp json11.hpp httplib.h server.h)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
8794
examples/server/httplib.h
Normal file
8794
examples/server/httplib.h
Normal file
File diff suppressed because it is too large
Load diff
790
examples/server/json11.cpp
Normal file
790
examples/server/json11.cpp
Normal file
|
@ -0,0 +1,790 @@
|
|||
/* Copyright (c) 2013 Dropbox, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "json11.hpp"
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <limits>
|
||||
|
||||
namespace json11 {
|
||||
|
||||
static const int max_depth = 200;
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using std::initializer_list;
|
||||
using std::move;
|
||||
|
||||
/* Helper for representing null - just a do-nothing struct, plus comparison
|
||||
* operators so the helpers in JsonValue work. We can't use nullptr_t because
|
||||
* it may not be orderable.
|
||||
*/
|
||||
struct NullStruct {
|
||||
bool operator==(NullStruct) const { return true; }
|
||||
bool operator<(NullStruct) const { return false; }
|
||||
};
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Serialization
|
||||
*/
|
||||
|
||||
static void dump(NullStruct, string &out) {
|
||||
out += "null";
|
||||
}
|
||||
|
||||
static void dump(double value, string &out) {
|
||||
if (std::isfinite(value)) {
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof buf, "%.17g", value);
|
||||
out += buf;
|
||||
} else {
|
||||
out += "null";
|
||||
}
|
||||
}
|
||||
|
||||
static void dump(int value, string &out) {
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof buf, "%d", value);
|
||||
out += buf;
|
||||
}
|
||||
|
||||
static void dump(bool value, string &out) {
|
||||
out += value ? "true" : "false";
|
||||
}
|
||||
|
||||
static void dump(const string &value, string &out) {
|
||||
out += '"';
|
||||
for (size_t i = 0; i < value.length(); i++) {
|
||||
const char ch = value[i];
|
||||
if (ch == '\\') {
|
||||
out += "\\\\";
|
||||
} else if (ch == '"') {
|
||||
out += "\\\"";
|
||||
} else if (ch == '\b') {
|
||||
out += "\\b";
|
||||
} else if (ch == '\f') {
|
||||
out += "\\f";
|
||||
} else if (ch == '\n') {
|
||||
out += "\\n";
|
||||
} else if (ch == '\r') {
|
||||
out += "\\r";
|
||||
} else if (ch == '\t') {
|
||||
out += "\\t";
|
||||
} else if (static_cast<uint8_t>(ch) <= 0x1f) {
|
||||
char buf[8];
|
||||
snprintf(buf, sizeof buf, "\\u%04x", ch);
|
||||
out += buf;
|
||||
} else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
||||
&& static_cast<uint8_t>(value[i+2]) == 0xa8) {
|
||||
out += "\\u2028";
|
||||
i += 2;
|
||||
} else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
||||
&& static_cast<uint8_t>(value[i+2]) == 0xa9) {
|
||||
out += "\\u2029";
|
||||
i += 2;
|
||||
} else {
|
||||
out += ch;
|
||||
}
|
||||
}
|
||||
out += '"';
|
||||
}
|
||||
|
||||
static void dump(const Json::array &values, string &out) {
|
||||
bool first = true;
|
||||
out += "[";
|
||||
for (const auto &value : values) {
|
||||
if (!first)
|
||||
out += ", ";
|
||||
value.dump(out);
|
||||
first = false;
|
||||
}
|
||||
out += "]";
|
||||
}
|
||||
|
||||
static void dump(const Json::object &values, string &out) {
|
||||
bool first = true;
|
||||
out += "{";
|
||||
for (const auto &kv : values) {
|
||||
if (!first)
|
||||
out += ", ";
|
||||
dump(kv.first, out);
|
||||
out += ": ";
|
||||
kv.second.dump(out);
|
||||
first = false;
|
||||
}
|
||||
out += "}";
|
||||
}
|
||||
|
||||
void Json::dump(string &out) const {
|
||||
m_ptr->dump(out);
|
||||
}
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Value wrappers
|
||||
*/
|
||||
|
||||
template <Json::Type tag, typename T>
|
||||
class Value : public JsonValue {
|
||||
protected:
|
||||
|
||||
// Constructors
|
||||
explicit Value(const T &value) : m_value(value) {}
|
||||
explicit Value(T &&value) : m_value(move(value)) {}
|
||||
|
||||
// Get type tag
|
||||
Json::Type type() const override {
|
||||
return tag;
|
||||
}
|
||||
|
||||
// Comparisons
|
||||
bool equals(const JsonValue * other) const override {
|
||||
return m_value == static_cast<const Value<tag, T> *>(other)->m_value;
|
||||
}
|
||||
bool less(const JsonValue * other) const override {
|
||||
return m_value < static_cast<const Value<tag, T> *>(other)->m_value;
|
||||
}
|
||||
|
||||
const T m_value;
|
||||
void dump(string &out) const override { json11::dump(m_value, out); }
|
||||
};
|
||||
|
||||
class JsonDouble final : public Value<Json::NUMBER, double> {
|
||||
double number_value() const override { return m_value; }
|
||||
int int_value() const override { return static_cast<int>(m_value); }
|
||||
bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
||||
bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
||||
public:
|
||||
explicit JsonDouble(double value) : Value(value) {}
|
||||
};
|
||||
|
||||
class JsonInt final : public Value<Json::NUMBER, int> {
|
||||
double number_value() const override { return m_value; }
|
||||
int int_value() const override { return m_value; }
|
||||
bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
||||
bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
||||
public:
|
||||
explicit JsonInt(int value) : Value(value) {}
|
||||
};
|
||||
|
||||
class JsonBoolean final : public Value<Json::BOOL, bool> {
|
||||
bool bool_value() const override { return m_value; }
|
||||
public:
|
||||
explicit JsonBoolean(bool value) : Value(value) {}
|
||||
};
|
||||
|
||||
class JsonString final : public Value<Json::STRING, string> {
|
||||
const string &string_value() const override { return m_value; }
|
||||
public:
|
||||
explicit JsonString(const string &value) : Value(value) {}
|
||||
explicit JsonString(string &&value) : Value(move(value)) {}
|
||||
};
|
||||
|
||||
class JsonArray final : public Value<Json::ARRAY, Json::array> {
|
||||
const Json::array &array_items() const override { return m_value; }
|
||||
const Json & operator[](size_t i) const override;
|
||||
public:
|
||||
explicit JsonArray(const Json::array &value) : Value(value) {}
|
||||
explicit JsonArray(Json::array &&value) : Value(move(value)) {}
|
||||
};
|
||||
|
||||
class JsonObject final : public Value<Json::OBJECT, Json::object> {
|
||||
const Json::object &object_items() const override { return m_value; }
|
||||
const Json & operator[](const string &key) const override;
|
||||
public:
|
||||
explicit JsonObject(const Json::object &value) : Value(value) {}
|
||||
explicit JsonObject(Json::object &&value) : Value(move(value)) {}
|
||||
};
|
||||
|
||||
class JsonNull final : public Value<Json::NUL, NullStruct> {
|
||||
public:
|
||||
JsonNull() : Value({}) {}
|
||||
};
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Static globals - static-init-safe
|
||||
*/
|
||||
struct Statics {
|
||||
const std::shared_ptr<JsonValue> null = make_shared<JsonNull>();
|
||||
const std::shared_ptr<JsonValue> t = make_shared<JsonBoolean>(true);
|
||||
const std::shared_ptr<JsonValue> f = make_shared<JsonBoolean>(false);
|
||||
const string empty_string;
|
||||
const vector<Json> empty_vector;
|
||||
const map<string, Json> empty_map;
|
||||
Statics() {}
|
||||
};
|
||||
|
||||
static const Statics & statics() {
|
||||
static const Statics s {};
|
||||
return s;
|
||||
}
|
||||
|
||||
static const Json & static_null() {
|
||||
// This has to be separate, not in Statics, because Json() accesses statics().null.
|
||||
static const Json json_null;
|
||||
return json_null;
|
||||
}
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Constructors
|
||||
*/
|
||||
|
||||
Json::Json() noexcept : m_ptr(statics().null) {}
|
||||
Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
|
||||
Json::Json(double value) : m_ptr(make_shared<JsonDouble>(value)) {}
|
||||
Json::Json(int value) : m_ptr(make_shared<JsonInt>(value)) {}
|
||||
Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
|
||||
Json::Json(const string &value) : m_ptr(make_shared<JsonString>(value)) {}
|
||||
Json::Json(string &&value) : m_ptr(make_shared<JsonString>(move(value))) {}
|
||||
Json::Json(const char * value) : m_ptr(make_shared<JsonString>(value)) {}
|
||||
Json::Json(const Json::array &values) : m_ptr(make_shared<JsonArray>(values)) {}
|
||||
Json::Json(Json::array &&values) : m_ptr(make_shared<JsonArray>(move(values))) {}
|
||||
Json::Json(const Json::object &values) : m_ptr(make_shared<JsonObject>(values)) {}
|
||||
Json::Json(Json::object &&values) : m_ptr(make_shared<JsonObject>(move(values))) {}
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Accessors
|
||||
*/
|
||||
|
||||
Json::Type Json::type() const { return m_ptr->type(); }
|
||||
double Json::number_value() const { return m_ptr->number_value(); }
|
||||
int Json::int_value() const { return m_ptr->int_value(); }
|
||||
bool Json::bool_value() const { return m_ptr->bool_value(); }
|
||||
const string & Json::string_value() const { return m_ptr->string_value(); }
|
||||
const vector<Json> & Json::array_items() const { return m_ptr->array_items(); }
|
||||
const map<string, Json> & Json::object_items() const { return m_ptr->object_items(); }
|
||||
const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
|
||||
const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
|
||||
|
||||
double JsonValue::number_value() const { return 0; }
|
||||
int JsonValue::int_value() const { return 0; }
|
||||
bool JsonValue::bool_value() const { return false; }
|
||||
const string & JsonValue::string_value() const { return statics().empty_string; }
|
||||
const vector<Json> & JsonValue::array_items() const { return statics().empty_vector; }
|
||||
const map<string, Json> & JsonValue::object_items() const { return statics().empty_map; }
|
||||
const Json & JsonValue::operator[] (size_t) const { return static_null(); }
|
||||
const Json & JsonValue::operator[] (const string &) const { return static_null(); }
|
||||
|
||||
const Json & JsonObject::operator[] (const string &key) const {
|
||||
auto iter = m_value.find(key);
|
||||
return (iter == m_value.end()) ? static_null() : iter->second;
|
||||
}
|
||||
const Json & JsonArray::operator[] (size_t i) const {
|
||||
if (i >= m_value.size()) return static_null();
|
||||
else return m_value[i];
|
||||
}
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Comparison
|
||||
*/
|
||||
|
||||
bool Json::operator== (const Json &other) const {
|
||||
if (m_ptr == other.m_ptr)
|
||||
return true;
|
||||
if (m_ptr->type() != other.m_ptr->type())
|
||||
return false;
|
||||
|
||||
return m_ptr->equals(other.m_ptr.get());
|
||||
}
|
||||
|
||||
bool Json::operator< (const Json &other) const {
|
||||
if (m_ptr == other.m_ptr)
|
||||
return false;
|
||||
if (m_ptr->type() != other.m_ptr->type())
|
||||
return m_ptr->type() < other.m_ptr->type();
|
||||
|
||||
return m_ptr->less(other.m_ptr.get());
|
||||
}
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Parsing
|
||||
*/
|
||||
|
||||
/* esc(c)
|
||||
*
|
||||
* Format char c suitable for printing in an error message.
|
||||
*/
|
||||
static inline string esc(char c) {
|
||||
char buf[12];
|
||||
if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
|
||||
snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
|
||||
} else {
|
||||
snprintf(buf, sizeof buf, "(%d)", c);
|
||||
}
|
||||
return string(buf);
|
||||
}
|
||||
|
||||
static inline bool in_range(long x, long lower, long upper) {
|
||||
return (x >= lower && x <= upper);
|
||||
}
|
||||
|
||||
namespace {
|
||||
/* JsonParser
|
||||
*
|
||||
* Object that tracks all state of an in-progress parse.
|
||||
*/
|
||||
struct JsonParser final {
|
||||
|
||||
/* State
|
||||
*/
|
||||
const string &str;
|
||||
size_t i;
|
||||
string &err;
|
||||
bool failed;
|
||||
const JsonParse strategy;
|
||||
|
||||
/* fail(msg, err_ret = Json())
|
||||
*
|
||||
* Mark this parse as failed.
|
||||
*/
|
||||
Json fail(string &&msg) {
|
||||
return fail(move(msg), Json());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T fail(string &&msg, const T err_ret) {
|
||||
if (!failed)
|
||||
err = std::move(msg);
|
||||
failed = true;
|
||||
return err_ret;
|
||||
}
|
||||
|
||||
/* consume_whitespace()
|
||||
*
|
||||
* Advance until the current character is non-whitespace.
|
||||
*/
|
||||
void consume_whitespace() {
|
||||
while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
|
||||
i++;
|
||||
}
|
||||
|
||||
/* consume_comment()
|
||||
*
|
||||
* Advance comments (c-style inline and multiline).
|
||||
*/
|
||||
bool consume_comment() {
|
||||
bool comment_found = false;
|
||||
if (str[i] == '/') {
|
||||
i++;
|
||||
if (i == str.size())
|
||||
return fail("unexpected end of input after start of comment", false);
|
||||
if (str[i] == '/') { // inline comment
|
||||
i++;
|
||||
// advance until next line, or end of input
|
||||
while (i < str.size() && str[i] != '\n') {
|
||||
i++;
|
||||
}
|
||||
comment_found = true;
|
||||
}
|
||||
else if (str[i] == '*') { // multiline comment
|
||||
i++;
|
||||
if (i > str.size()-2)
|
||||
return fail("unexpected end of input inside multi-line comment", false);
|
||||
// advance until closing tokens
|
||||
while (!(str[i] == '*' && str[i+1] == '/')) {
|
||||
i++;
|
||||
if (i > str.size()-2)
|
||||
return fail(
|
||||
"unexpected end of input inside multi-line comment", false);
|
||||
}
|
||||
i += 2;
|
||||
comment_found = true;
|
||||
}
|
||||
else
|
||||
return fail("malformed comment", false);
|
||||
}
|
||||
return comment_found;
|
||||
}
|
||||
|
||||
/* consume_garbage()
|
||||
*
|
||||
* Advance until the current character is non-whitespace and non-comment.
|
||||
*/
|
||||
void consume_garbage() {
|
||||
consume_whitespace();
|
||||
if(strategy == JsonParse::COMMENTS) {
|
||||
bool comment_found = false;
|
||||
do {
|
||||
comment_found = consume_comment();
|
||||
if (failed) return;
|
||||
consume_whitespace();
|
||||
}
|
||||
while(comment_found);
|
||||
}
|
||||
}
|
||||
|
||||
/* get_next_token()
|
||||
*
|
||||
* Return the next non-whitespace character. If the end of the input is reached,
|
||||
* flag an error and return 0.
|
||||
*/
|
||||
char get_next_token() {
|
||||
consume_garbage();
|
||||
if (failed) return static_cast<char>(0);
|
||||
if (i == str.size())
|
||||
return fail("unexpected end of input", static_cast<char>(0));
|
||||
|
||||
return str[i++];
|
||||
}
|
||||
|
||||
/* encode_utf8(pt, out)
|
||||
*
|
||||
* Encode pt as UTF-8 and add it to out.
|
||||
*/
|
||||
void encode_utf8(long pt, string & out) {
|
||||
if (pt < 0)
|
||||
return;
|
||||
|
||||
if (pt < 0x80) {
|
||||
out += static_cast<char>(pt);
|
||||
} else if (pt < 0x800) {
|
||||
out += static_cast<char>((pt >> 6) | 0xC0);
|
||||
out += static_cast<char>((pt & 0x3F) | 0x80);
|
||||
} else if (pt < 0x10000) {
|
||||
out += static_cast<char>((pt >> 12) | 0xE0);
|
||||
out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
||||
out += static_cast<char>((pt & 0x3F) | 0x80);
|
||||
} else {
|
||||
out += static_cast<char>((pt >> 18) | 0xF0);
|
||||
out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
|
||||
out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
||||
out += static_cast<char>((pt & 0x3F) | 0x80);
|
||||
}
|
||||
}
|
||||
|
||||
/* parse_string()
|
||||
*
|
||||
* Parse a string, starting at the current position.
|
||||
*/
|
||||
string parse_string() {
|
||||
string out;
|
||||
long last_escaped_codepoint = -1;
|
||||
while (true) {
|
||||
if (i == str.size())
|
||||
return fail("unexpected end of input in string", "");
|
||||
|
||||
char ch = str[i++];
|
||||
|
||||
if (ch == '"') {
|
||||
encode_utf8(last_escaped_codepoint, out);
|
||||
return out;
|
||||
}
|
||||
|
||||
if (in_range(ch, 0, 0x1f))
|
||||
return fail("unescaped " + esc(ch) + " in string", "");
|
||||
|
||||
// The usual case: non-escaped characters
|
||||
if (ch != '\\') {
|
||||
encode_utf8(last_escaped_codepoint, out);
|
||||
last_escaped_codepoint = -1;
|
||||
out += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle escapes
|
||||
if (i == str.size())
|
||||
return fail("unexpected end of input in string", "");
|
||||
|
||||
ch = str[i++];
|
||||
|
||||
if (ch == 'u') {
|
||||
// Extract 4-byte escape sequence
|
||||
string esc = str.substr(i, 4);
|
||||
// Explicitly check length of the substring. The following loop
|
||||
// relies on std::string returning the terminating NUL when
|
||||
// accessing str[length]. Checking here reduces brittleness.
|
||||
if (esc.length() < 4) {
|
||||
return fail("bad \\u escape: " + esc, "");
|
||||
}
|
||||
for (size_t j = 0; j < 4; j++) {
|
||||
if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
|
||||
&& !in_range(esc[j], '0', '9'))
|
||||
return fail("bad \\u escape: " + esc, "");
|
||||
}
|
||||
|
||||
long codepoint = strtol(esc.data(), nullptr, 16);
|
||||
|
||||
// JSON specifies that characters outside the BMP shall be encoded as a pair
|
||||
// of 4-hex-digit \u escapes encoding their surrogate pair components. Check
|
||||
// whether we're in the middle of such a beast: the previous codepoint was an
|
||||
// escaped lead (high) surrogate, and this is a trail (low) surrogate.
|
||||
if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
|
||||
&& in_range(codepoint, 0xDC00, 0xDFFF)) {
|
||||
// Reassemble the two surrogate pairs into one astral-plane character, per
|
||||
// the UTF-16 algorithm.
|
||||
encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
|
||||
| (codepoint - 0xDC00)) + 0x10000, out);
|
||||
last_escaped_codepoint = -1;
|
||||
} else {
|
||||
encode_utf8(last_escaped_codepoint, out);
|
||||
last_escaped_codepoint = codepoint;
|
||||
}
|
||||
|
||||
i += 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
encode_utf8(last_escaped_codepoint, out);
|
||||
last_escaped_codepoint = -1;
|
||||
|
||||
if (ch == 'b') {
|
||||
out += '\b';
|
||||
} else if (ch == 'f') {
|
||||
out += '\f';
|
||||
} else if (ch == 'n') {
|
||||
out += '\n';
|
||||
} else if (ch == 'r') {
|
||||
out += '\r';
|
||||
} else if (ch == 't') {
|
||||
out += '\t';
|
||||
} else if (ch == '"' || ch == '\\' || ch == '/') {
|
||||
out += ch;
|
||||
} else {
|
||||
return fail("invalid escape character " + esc(ch), "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* parse_number()
|
||||
*
|
||||
* Parse a double.
|
||||
*/
|
||||
Json parse_number() {
|
||||
size_t start_pos = i;
|
||||
|
||||
if (str[i] == '-')
|
||||
i++;
|
||||
|
||||
// Integer part
|
||||
if (str[i] == '0') {
|
||||
i++;
|
||||
if (in_range(str[i], '0', '9'))
|
||||
return fail("leading 0s not permitted in numbers");
|
||||
} else if (in_range(str[i], '1', '9')) {
|
||||
i++;
|
||||
while (in_range(str[i], '0', '9'))
|
||||
i++;
|
||||
} else {
|
||||
return fail("invalid " + esc(str[i]) + " in number");
|
||||
}
|
||||
|
||||
if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
|
||||
&& (i - start_pos) <= static_cast<size_t>(std::numeric_limits<int>::digits10)) {
|
||||
return std::atoi(str.c_str() + start_pos);
|
||||
}
|
||||
|
||||
// Decimal part
|
||||
if (str[i] == '.') {
|
||||
i++;
|
||||
if (!in_range(str[i], '0', '9'))
|
||||
return fail("at least one digit required in fractional part");
|
||||
|
||||
while (in_range(str[i], '0', '9'))
|
||||
i++;
|
||||
}
|
||||
|
||||
// Exponent part
|
||||
if (str[i] == 'e' || str[i] == 'E') {
|
||||
i++;
|
||||
|
||||
if (str[i] == '+' || str[i] == '-')
|
||||
i++;
|
||||
|
||||
if (!in_range(str[i], '0', '9'))
|
||||
return fail("at least one digit required in exponent");
|
||||
|
||||
while (in_range(str[i], '0', '9'))
|
||||
i++;
|
||||
}
|
||||
|
||||
return std::strtod(str.c_str() + start_pos, nullptr);
|
||||
}
|
||||
|
||||
/* expect(str, res)
|
||||
*
|
||||
* Expect that 'str' starts at the character that was just read. If it does, advance
|
||||
* the input and return res. If not, flag an error.
|
||||
*/
|
||||
Json expect(const string &expected, Json res) {
|
||||
assert(i != 0);
|
||||
i--;
|
||||
if (str.compare(i, expected.length(), expected) == 0) {
|
||||
i += expected.length();
|
||||
return res;
|
||||
} else {
|
||||
return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
|
||||
}
|
||||
}
|
||||
|
||||
/* parse_json()
|
||||
*
|
||||
* Parse a JSON object.
|
||||
*/
|
||||
Json parse_json(int depth) {
|
||||
if (depth > max_depth) {
|
||||
return fail("exceeded maximum nesting depth");
|
||||
}
|
||||
|
||||
char ch = get_next_token();
|
||||
if (failed)
|
||||
return Json();
|
||||
|
||||
if (ch == '-' || (ch >= '0' && ch <= '9')) {
|
||||
i--;
|
||||
return parse_number();
|
||||
}
|
||||
|
||||
if (ch == 't')
|
||||
return expect("true", true);
|
||||
|
||||
if (ch == 'f')
|
||||
return expect("false", false);
|
||||
|
||||
if (ch == 'n')
|
||||
return expect("null", Json());
|
||||
|
||||
if (ch == '"')
|
||||
return parse_string();
|
||||
|
||||
if (ch == '{') {
|
||||
map<string, Json> data;
|
||||
ch = get_next_token();
|
||||
if (ch == '}')
|
||||
return data;
|
||||
|
||||
while (1) {
|
||||
if (ch != '"')
|
||||
return fail("expected '\"' in object, got " + esc(ch));
|
||||
|
||||
string key = parse_string();
|
||||
if (failed)
|
||||
return Json();
|
||||
|
||||
ch = get_next_token();
|
||||
if (ch != ':')
|
||||
return fail("expected ':' in object, got " + esc(ch));
|
||||
|
||||
data[std::move(key)] = parse_json(depth + 1);
|
||||
if (failed)
|
||||
return Json();
|
||||
|
||||
ch = get_next_token();
|
||||
if (ch == '}')
|
||||
break;
|
||||
if (ch != ',')
|
||||
return fail("expected ',' in object, got " + esc(ch));
|
||||
|
||||
ch = get_next_token();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
if (ch == '[') {
|
||||
vector<Json> data;
|
||||
ch = get_next_token();
|
||||
if (ch == ']')
|
||||
return data;
|
||||
|
||||
while (1) {
|
||||
i--;
|
||||
data.push_back(parse_json(depth + 1));
|
||||
if (failed)
|
||||
return Json();
|
||||
|
||||
ch = get_next_token();
|
||||
if (ch == ']')
|
||||
break;
|
||||
if (ch != ',')
|
||||
return fail("expected ',' in list, got " + esc(ch));
|
||||
|
||||
ch = get_next_token();
|
||||
(void)ch;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
return fail("expected value, got " + esc(ch));
|
||||
}
|
||||
};
|
||||
}//namespace {
|
||||
|
||||
Json Json::parse(const string &in, string &err, JsonParse strategy) {
|
||||
JsonParser parser { in, 0, err, false, strategy };
|
||||
Json result = parser.parse_json(0);
|
||||
|
||||
// Check for any trailing garbage
|
||||
parser.consume_garbage();
|
||||
if (parser.failed)
|
||||
return Json();
|
||||
if (parser.i != in.size())
|
||||
return parser.fail("unexpected trailing " + esc(in[parser.i]));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Documented in json11.hpp
|
||||
vector<Json> Json::parse_multi(const string &in,
|
||||
std::string::size_type &parser_stop_pos,
|
||||
string &err,
|
||||
JsonParse strategy) {
|
||||
JsonParser parser { in, 0, err, false, strategy };
|
||||
parser_stop_pos = 0;
|
||||
vector<Json> json_vec;
|
||||
while (parser.i != in.size() && !parser.failed) {
|
||||
json_vec.push_back(parser.parse_json(0));
|
||||
if (parser.failed)
|
||||
break;
|
||||
|
||||
// Check for another object
|
||||
parser.consume_garbage();
|
||||
if (parser.failed)
|
||||
break;
|
||||
parser_stop_pos = parser.i;
|
||||
}
|
||||
return json_vec;
|
||||
}
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * *
|
||||
* Shape-checking
|
||||
*/
|
||||
|
||||
bool Json::has_shape(const shape & types, string & err) const {
|
||||
if (!is_object()) {
|
||||
err = "expected JSON object, got " + dump();
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& obj_items = object_items();
|
||||
for (auto & item : types) {
|
||||
const auto it = obj_items.find(item.first);
|
||||
if (it == obj_items.cend() || it->second.type() != item.second) {
|
||||
err = "bad type for " + item.first + " in " + dump();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace json11
|
232
examples/server/json11.hpp
Normal file
232
examples/server/json11.hpp
Normal file
|
@ -0,0 +1,232 @@
|
|||
/* json11
|
||||
*
|
||||
* json11 is a tiny JSON library for C++11, providing JSON parsing and serialization.
|
||||
*
|
||||
* The core object provided by the library is json11::Json. A Json object represents any JSON
|
||||
* value: null, bool, number (int or double), string (std::string), array (std::vector), or
|
||||
* object (std::map).
|
||||
*
|
||||
* Json objects act like values: they can be assigned, copied, moved, compared for equality or
|
||||
* order, etc. There are also helper methods Json::dump, to serialize a Json to a string, and
|
||||
* Json::parse (static) to parse a std::string as a Json object.
|
||||
*
|
||||
* Internally, the various types of Json object are represented by the JsonValue class
|
||||
* hierarchy.
|
||||
*
|
||||
* A note on numbers - JSON specifies the syntax of number formatting but not its semantics,
|
||||
* so some JSON implementations distinguish between integers and floating-point numbers, while
|
||||
* some don't. In json11, we choose the latter. Because some JSON implementations (namely
|
||||
* Javascript itself) treat all numbers as the same type, distinguishing the two leads
|
||||
* to JSON that will be *silently* changed by a round-trip through those implementations.
|
||||
* Dangerous! To avoid that risk, json11 stores all numbers as double internally, but also
|
||||
* provides integer helpers.
|
||||
*
|
||||
* Fortunately, double-precision IEEE754 ('double') can precisely store any integer in the
|
||||
* range +/-2^53, which includes every 'int' on most systems. (Timestamps often use int64
|
||||
* or long long to avoid the Y2038K problem; a double storing microseconds since some epoch
|
||||
* will be exact for +/- 275 years.)
|
||||
*/
|
||||
|
||||
/* Copyright (c) 2013 Dropbox, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <initializer_list>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if _MSC_VER <= 1800 // VS 2013
|
||||
#ifndef noexcept
|
||||
#define noexcept throw()
|
||||
#endif
|
||||
|
||||
#ifndef snprintf
|
||||
#define snprintf _snprintf_s
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace json11 {
|
||||
|
||||
enum JsonParse {
|
||||
STANDARD, COMMENTS
|
||||
};
|
||||
|
||||
class JsonValue;
|
||||
|
||||
class Json final {
|
||||
public:
|
||||
// Types
|
||||
enum Type {
|
||||
NUL, NUMBER, BOOL, STRING, ARRAY, OBJECT
|
||||
};
|
||||
|
||||
// Array and object typedefs
|
||||
typedef std::vector<Json> array;
|
||||
typedef std::map<std::string, Json> object;
|
||||
|
||||
// Constructors for the various types of JSON value.
|
||||
Json() noexcept; // NUL
|
||||
Json(std::nullptr_t) noexcept; // NUL
|
||||
Json(double value); // NUMBER
|
||||
Json(int value); // NUMBER
|
||||
Json(bool value); // BOOL
|
||||
Json(const std::string &value); // STRING
|
||||
Json(std::string &&value); // STRING
|
||||
Json(const char * value); // STRING
|
||||
Json(const array &values); // ARRAY
|
||||
Json(array &&values); // ARRAY
|
||||
Json(const object &values); // OBJECT
|
||||
Json(object &&values); // OBJECT
|
||||
|
||||
// Implicit constructor: anything with a to_json() function.
|
||||
template <class T, class = decltype(&T::to_json)>
|
||||
Json(const T & t) : Json(t.to_json()) {}
|
||||
|
||||
// Implicit constructor: map-like objects (std::map, std::unordered_map, etc)
|
||||
template <class M, typename std::enable_if<
|
||||
std::is_constructible<std::string, decltype(std::declval<M>().begin()->first)>::value
|
||||
&& std::is_constructible<Json, decltype(std::declval<M>().begin()->second)>::value,
|
||||
int>::type = 0>
|
||||
Json(const M & m) : Json(object(m.begin(), m.end())) {}
|
||||
|
||||
// Implicit constructor: vector-like objects (std::list, std::vector, std::set, etc)
|
||||
template <class V, typename std::enable_if<
|
||||
std::is_constructible<Json, decltype(*std::declval<V>().begin())>::value,
|
||||
int>::type = 0>
|
||||
Json(const V & v) : Json(array(v.begin(), v.end())) {}
|
||||
|
||||
// This prevents Json(some_pointer) from accidentally producing a bool. Use
|
||||
// Json(bool(some_pointer)) if that behavior is desired.
|
||||
Json(void *) = delete;
|
||||
|
||||
// Accessors
|
||||
Type type() const;
|
||||
|
||||
bool is_null() const { return type() == NUL; }
|
||||
bool is_number() const { return type() == NUMBER; }
|
||||
bool is_bool() const { return type() == BOOL; }
|
||||
bool is_string() const { return type() == STRING; }
|
||||
bool is_array() const { return type() == ARRAY; }
|
||||
bool is_object() const { return type() == OBJECT; }
|
||||
|
||||
// Return the enclosed value if this is a number, 0 otherwise. Note that json11 does not
|
||||
// distinguish between integer and non-integer numbers - number_value() and int_value()
|
||||
// can both be applied to a NUMBER-typed object.
|
||||
double number_value() const;
|
||||
int int_value() const;
|
||||
|
||||
// Return the enclosed value if this is a boolean, false otherwise.
|
||||
bool bool_value() const;
|
||||
// Return the enclosed string if this is a string, "" otherwise.
|
||||
const std::string &string_value() const;
|
||||
// Return the enclosed std::vector if this is an array, or an empty vector otherwise.
|
||||
const array &array_items() const;
|
||||
// Return the enclosed std::map if this is an object, or an empty map otherwise.
|
||||
const object &object_items() const;
|
||||
|
||||
// Return a reference to arr[i] if this is an array, Json() otherwise.
|
||||
const Json & operator[](size_t i) const;
|
||||
// Return a reference to obj[key] if this is an object, Json() otherwise.
|
||||
const Json & operator[](const std::string &key) const;
|
||||
|
||||
// Serialize.
|
||||
void dump(std::string &out) const;
|
||||
std::string dump() const {
|
||||
std::string out;
|
||||
dump(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
// Parse. If parse fails, return Json() and assign an error message to err.
|
||||
static Json parse(const std::string & in,
|
||||
std::string & err,
|
||||
JsonParse strategy = JsonParse::STANDARD);
|
||||
static Json parse(const char * in,
|
||||
std::string & err,
|
||||
JsonParse strategy = JsonParse::STANDARD) {
|
||||
if (in) {
|
||||
return parse(std::string(in), err, strategy);
|
||||
} else {
|
||||
err = "null input";
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
// Parse multiple objects, concatenated or separated by whitespace
|
||||
static std::vector<Json> parse_multi(
|
||||
const std::string & in,
|
||||
std::string::size_type & parser_stop_pos,
|
||||
std::string & err,
|
||||
JsonParse strategy = JsonParse::STANDARD);
|
||||
|
||||
static inline std::vector<Json> parse_multi(
|
||||
const std::string & in,
|
||||
std::string & err,
|
||||
JsonParse strategy = JsonParse::STANDARD) {
|
||||
std::string::size_type parser_stop_pos;
|
||||
return parse_multi(in, parser_stop_pos, err, strategy);
|
||||
}
|
||||
|
||||
bool operator== (const Json &rhs) const;
|
||||
bool operator< (const Json &rhs) const;
|
||||
bool operator!= (const Json &rhs) const { return !(*this == rhs); }
|
||||
bool operator<= (const Json &rhs) const { return !(rhs < *this); }
|
||||
bool operator> (const Json &rhs) const { return (rhs < *this); }
|
||||
bool operator>= (const Json &rhs) const { return !(*this < rhs); }
|
||||
|
||||
/* has_shape(types, err)
|
||||
*
|
||||
* Return true if this is a JSON object and, for each item in types, has a field of
|
||||
* the given type. If not, return false and set err to a descriptive message.
|
||||
*/
|
||||
typedef std::initializer_list<std::pair<std::string, Type>> shape;
|
||||
bool has_shape(const shape & types, std::string & err) const;
|
||||
|
||||
private:
|
||||
std::shared_ptr<JsonValue> m_ptr;
|
||||
};
|
||||
|
||||
// Internal class hierarchy - JsonValue objects are not exposed to users of this API.
|
||||
class JsonValue {
|
||||
protected:
|
||||
friend class Json;
|
||||
friend class JsonInt;
|
||||
friend class JsonDouble;
|
||||
virtual Json::Type type() const = 0;
|
||||
virtual bool equals(const JsonValue * other) const = 0;
|
||||
virtual bool less(const JsonValue * other) const = 0;
|
||||
virtual void dump(std::string &out) const = 0;
|
||||
virtual double number_value() const;
|
||||
virtual int int_value() const;
|
||||
virtual bool bool_value() const;
|
||||
virtual const std::string &string_value() const;
|
||||
virtual const Json::array &array_items() const;
|
||||
virtual const Json &operator[](size_t i) const;
|
||||
virtual const Json::object &object_items() const;
|
||||
virtual const Json &operator[](const std::string &key) const;
|
||||
virtual ~JsonValue() {}
|
||||
};
|
||||
|
||||
} // namespace json11
|
742
examples/server/server.cpp
Normal file
742
examples/server/server.cpp
Normal file
|
@ -0,0 +1,742 @@
|
|||
#include <server.h>
|
||||
|
||||
using namespace httplib;
|
||||
using namespace json11;
|
||||
|
||||
bool Llama::load_context() {
|
||||
// load the model
|
||||
{
|
||||
auto lparams = llama_context_default_params();
|
||||
|
||||
lparams.n_ctx = params.n_ctx;
|
||||
lparams.n_parts = params.n_parts;
|
||||
lparams.seed = params.seed;
|
||||
lparams.f16_kv = params.memory_f16;
|
||||
lparams.use_mmap = params.use_mmap;
|
||||
lparams.use_mlock = params.use_mlock;
|
||||
|
||||
ctx = llama_init_from_file(params.model.c_str(), lparams);
|
||||
|
||||
if (ctx == NULL)
|
||||
{
|
||||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
n_ctx = llama_n_ctx(ctx);
|
||||
|
||||
|
||||
// enable interactive mode if reverse prompt or interactive start is specified
|
||||
if (params.antiprompt.size() != 0 || params.interactive_first)
|
||||
{
|
||||
params.interactive = true;
|
||||
}
|
||||
|
||||
// determine newline token
|
||||
llama_token_newline = ::llama_tokenize(ctx, "\n", false);
|
||||
|
||||
last_n_tokens.resize(n_ctx);
|
||||
std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
|
||||
printf("Llama Context loaded\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Llama::prompt_test() {
|
||||
embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
||||
|
||||
if ((int)embd_inp.size() > n_ctx - 4)
|
||||
{
|
||||
fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Llama::setting_context() {
|
||||
n_remain = params.n_predict;
|
||||
|
||||
// number of tokens to keep when resetting context
|
||||
if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size() || params.instruct)
|
||||
{
|
||||
params.n_keep = (int)embd_inp.size();
|
||||
}
|
||||
|
||||
// print system information
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
|
||||
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
||||
}
|
||||
|
||||
fprintf(stderr, "sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n",
|
||||
params.repeat_last_n, params.repeat_penalty, params.presence_penalty, params.frequency_penalty, params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau);
|
||||
fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
|
||||
fprintf(stderr, "\n\n");
|
||||
|
||||
printf("\rLoading prompt: 0%");
|
||||
|
||||
while(true) {
|
||||
if (embd.size() > 0)
|
||||
{
|
||||
if (n_past + (int)embd.size() > n_ctx)
|
||||
{
|
||||
const int n_left = n_past - params.n_keep;
|
||||
n_past = params.n_keep;
|
||||
embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left / 2 - embd.size(), last_n_tokens.end() - embd.size());
|
||||
}
|
||||
for (int i = 0; i < (int)embd.size(); i += params.n_batch)
|
||||
{
|
||||
int n_eval = (int)embd.size() - i;
|
||||
if (n_eval > params.n_batch)
|
||||
{
|
||||
n_eval = params.n_batch;
|
||||
}
|
||||
if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads))
|
||||
{
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return;
|
||||
}
|
||||
n_past += n_eval;
|
||||
}
|
||||
}
|
||||
embd.clear();
|
||||
if ((int)embd_inp.size() <= n_consumed && !is_interacting)
|
||||
{
|
||||
// out of user input, sample next token
|
||||
const float temp = params.temp;
|
||||
const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k;
|
||||
const float top_p = params.top_p;
|
||||
const float tfs_z = params.tfs_z;
|
||||
const float typical_p = params.typical_p;
|
||||
const int32_t repeat_last_n = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n;
|
||||
const float repeat_penalty = params.repeat_penalty;
|
||||
const float alpha_presence = params.presence_penalty;
|
||||
const float alpha_frequency = params.frequency_penalty;
|
||||
const int mirostat = params.mirostat;
|
||||
const float mirostat_tau = params.mirostat_tau;
|
||||
const float mirostat_eta = params.mirostat_eta;
|
||||
const bool penalize_nl = params.penalize_nl;
|
||||
llama_token id = 0;
|
||||
{
|
||||
auto logits = llama_get_logits(ctx);
|
||||
auto n_vocab = llama_n_vocab(ctx);
|
||||
|
||||
// Apply params.logit_bias map
|
||||
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++)
|
||||
{
|
||||
logits[it->first] += it->second;
|
||||
}
|
||||
|
||||
std::vector<llama_token_data> candidates;
|
||||
candidates.reserve(n_vocab);
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++)
|
||||
{
|
||||
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||
}
|
||||
|
||||
llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
|
||||
|
||||
// Apply penalties
|
||||
float nl_logit = logits[llama_token_nl()];
|
||||
auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
|
||||
llama_sample_repetition_penalty(ctx, &candidates_p,
|
||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
last_n_repeat, repeat_penalty);
|
||||
llama_sample_frequency_and_presence_penalties(ctx, &candidates_p,
|
||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
last_n_repeat, alpha_frequency, alpha_presence);
|
||||
if (!penalize_nl)
|
||||
{
|
||||
logits[llama_token_nl()] = nl_logit;
|
||||
}
|
||||
|
||||
if (temp <= 0)
|
||||
{
|
||||
// Greedy sampling
|
||||
id = llama_sample_token_greedy(ctx, &candidates_p);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mirostat == 1)
|
||||
{
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
const int mirostat_m = 100;
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
|
||||
}
|
||||
else if (mirostat == 2)
|
||||
{
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Temperature sampling
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k);
|
||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z);
|
||||
llama_sample_typical(ctx, &candidates_p, typical_p);
|
||||
llama_sample_top_p(ctx, &candidates_p, top_p);
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token(ctx, &candidates_p);
|
||||
}
|
||||
}
|
||||
last_n_tokens.erase(last_n_tokens.begin());
|
||||
last_n_tokens.push_back(id);
|
||||
}
|
||||
|
||||
// replace end of text token with newline token when in interactive mode
|
||||
if (id == llama_token_eos() && params.interactive && !params.instruct)
|
||||
{
|
||||
id = llama_token_newline.front();
|
||||
if (params.antiprompt.size() != 0)
|
||||
{
|
||||
// tokenize and inject first reverse prompt
|
||||
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
|
||||
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
|
||||
}
|
||||
}
|
||||
// add it to the context
|
||||
embd.push_back(id);
|
||||
// decrement remaining sampling budget
|
||||
--n_remain;
|
||||
}
|
||||
else
|
||||
{
|
||||
// some user input remains from prompt or interaction, forward it to processing
|
||||
while ((int)embd_inp.size() > n_consumed)
|
||||
{
|
||||
embd.push_back(embd_inp[n_consumed]);
|
||||
last_n_tokens.erase(last_n_tokens.begin());
|
||||
last_n_tokens.push_back(embd_inp[n_consumed]);
|
||||
++n_consumed;
|
||||
if ((int)embd.size() >= params.n_batch)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf("\rLoading prompt: %d%%", (int)(((float)n_consumed / (int)embd_inp.size()) * 100));
|
||||
}
|
||||
if (params.interactive && (int)embd_inp.size() <= n_consumed) {
|
||||
// check for reverse prompt
|
||||
if (params.antiprompt.size())
|
||||
{
|
||||
std::string last_output;
|
||||
for (auto id : last_n_tokens)
|
||||
{
|
||||
last_output += llama_token_to_str(ctx, id);
|
||||
}
|
||||
is_antiprompt = false;
|
||||
// Check if each of the reverse prompts appears at the end of the output.
|
||||
for (std::string &antiprompt : params.antiprompt)
|
||||
{
|
||||
if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos)
|
||||
{
|
||||
is_interacting = true;
|
||||
is_antiprompt = true;
|
||||
context_config = true;
|
||||
printf("\rContext prompt and behavior configured.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int Llama::set_message(std::string msg) {
|
||||
if (msg.length() > 1)
|
||||
{
|
||||
auto line_inp = ::llama_tokenize(ctx, msg, false);
|
||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||
n_remain -= line_inp.size();
|
||||
printf("\nRequest completion: %i message tokens \n", line_inp.size());
|
||||
is_antiprompt = false;
|
||||
return line_inp.size();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::string Llama::inference() {
|
||||
std::string result = "";
|
||||
if (embd.size() > 0)
|
||||
{
|
||||
if (n_past + (int)embd.size() > n_ctx)
|
||||
{
|
||||
const int n_left = n_past - params.n_keep;
|
||||
n_past = params.n_keep;
|
||||
embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left / 2 - embd.size(), last_n_tokens.end() - embd.size());
|
||||
}
|
||||
for (int i = 0; i < (int)embd.size(); i += params.n_batch)
|
||||
{
|
||||
int n_eval = (int)embd.size() - i;
|
||||
if (n_eval > params.n_batch)
|
||||
{
|
||||
n_eval = params.n_batch;
|
||||
}
|
||||
if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads))
|
||||
{
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return result;
|
||||
}
|
||||
n_past += n_eval;
|
||||
}
|
||||
}
|
||||
embd.clear();
|
||||
if ((int)embd_inp.size() <= n_consumed && !is_interacting)
|
||||
{
|
||||
// out of user input, sample next token
|
||||
const float temp = params.temp;
|
||||
const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k;
|
||||
const float top_p = params.top_p;
|
||||
const float tfs_z = params.tfs_z;
|
||||
const float typical_p = params.typical_p;
|
||||
const int32_t repeat_last_n = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n;
|
||||
const float repeat_penalty = params.repeat_penalty;
|
||||
const float alpha_presence = params.presence_penalty;
|
||||
const float alpha_frequency = params.frequency_penalty;
|
||||
const int mirostat = params.mirostat;
|
||||
const float mirostat_tau = params.mirostat_tau;
|
||||
const float mirostat_eta = params.mirostat_eta;
|
||||
const bool penalize_nl = params.penalize_nl;
|
||||
llama_token id = 0;
|
||||
{
|
||||
auto logits = llama_get_logits(ctx);
|
||||
auto n_vocab = llama_n_vocab(ctx);
|
||||
|
||||
// Apply params.logit_bias map
|
||||
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++)
|
||||
{
|
||||
logits[it->first] += it->second;
|
||||
}
|
||||
|
||||
std::vector<llama_token_data> candidates;
|
||||
candidates.reserve(n_vocab);
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++)
|
||||
{
|
||||
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||
}
|
||||
|
||||
llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
|
||||
|
||||
// Apply penalties
|
||||
float nl_logit = logits[llama_token_nl()];
|
||||
auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
|
||||
llama_sample_repetition_penalty(ctx, &candidates_p,
|
||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
last_n_repeat, repeat_penalty);
|
||||
llama_sample_frequency_and_presence_penalties(ctx, &candidates_p,
|
||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
last_n_repeat, alpha_frequency, alpha_presence);
|
||||
if (!penalize_nl)
|
||||
{
|
||||
logits[llama_token_nl()] = nl_logit;
|
||||
}
|
||||
|
||||
if (temp <= 0)
|
||||
{
|
||||
// Greedy sampling
|
||||
id = llama_sample_token_greedy(ctx, &candidates_p);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mirostat == 1)
|
||||
{
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
const int mirostat_m = 100;
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
|
||||
}
|
||||
else if (mirostat == 2)
|
||||
{
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Temperature sampling
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k);
|
||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z);
|
||||
llama_sample_typical(ctx, &candidates_p, typical_p);
|
||||
llama_sample_top_p(ctx, &candidates_p, top_p);
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token(ctx, &candidates_p);
|
||||
}
|
||||
}
|
||||
last_n_tokens.erase(last_n_tokens.begin());
|
||||
last_n_tokens.push_back(id);
|
||||
}
|
||||
|
||||
// replace end of text token with newline token when in interactive mode
|
||||
if (id == llama_token_eos() && params.interactive && !params.instruct)
|
||||
{
|
||||
id = llama_token_newline.front();
|
||||
if (params.antiprompt.size() != 0)
|
||||
{
|
||||
// tokenize and inject first reverse prompt
|
||||
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
|
||||
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
|
||||
}
|
||||
}
|
||||
|
||||
// add it to the context
|
||||
embd.push_back(id);
|
||||
for (auto id : embd)
|
||||
{
|
||||
result += llama_token_to_str(ctx, id);
|
||||
tokens_completion++;
|
||||
}
|
||||
// decrement remaining sampling budget
|
||||
--n_remain;
|
||||
}
|
||||
else
|
||||
{
|
||||
// some user input remains from prompt or interaction, forward it to processing
|
||||
while ((int)embd_inp.size() > n_consumed)
|
||||
{
|
||||
embd.push_back(embd_inp[n_consumed]);
|
||||
last_n_tokens.erase(last_n_tokens.begin());
|
||||
last_n_tokens.push_back(embd_inp[n_consumed]);
|
||||
++n_consumed;
|
||||
if ((int)embd.size() >= params.n_batch)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (params.interactive && (int)embd_inp.size() <= n_consumed) {
|
||||
// check for reverse prompt
|
||||
if (params.antiprompt.size())
|
||||
{
|
||||
std::string last_output;
|
||||
for (auto id : last_n_tokens)
|
||||
{
|
||||
last_output += llama_token_to_str(ctx, id);
|
||||
}
|
||||
is_antiprompt = false;
|
||||
// Check if each of the reverse prompts appears at the end of the output.
|
||||
for (std::string &antiprompt : params.antiprompt)
|
||||
{
|
||||
if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos)
|
||||
{
|
||||
is_interacting = true;
|
||||
is_antiprompt = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (n_past > 0)
|
||||
{
|
||||
is_interacting = false;
|
||||
}
|
||||
}
|
||||
if (params.interactive && n_remain <= 0 && params.n_predict != -1)
|
||||
{
|
||||
n_remain = params.n_predict;
|
||||
is_interacting = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void server_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help show this help message and exit\n");
|
||||
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
|
||||
fprintf(stderr, " --memory_f32 use f32 instead of f16 for memory key+value\n");
|
||||
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
||||
if (llama_mlock_supported()) {
|
||||
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
||||
}
|
||||
if (llama_mmap_supported()) {
|
||||
fprintf(stderr, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
||||
}
|
||||
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
||||
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
||||
fprintf(stderr, " -host ip address to listen (default 0.0.0.0)\n");
|
||||
fprintf(stderr, " -port PORT port to listen (default 8080)\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
|
||||
// own arguments required by this example
|
||||
gpt_params default_params;
|
||||
|
||||
gpt_params params;
|
||||
params.model = "ggml-model.bin";
|
||||
|
||||
std::string hostname = "0.0.0.0";
|
||||
int port = 8080;
|
||||
|
||||
std::string arg;
|
||||
bool invalid_param = false;
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
arg = argv[i];
|
||||
if (arg == "--port")
|
||||
{
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
port = std::stoi(argv[i]);
|
||||
}
|
||||
else if (arg == "--host")
|
||||
{
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
hostname = argv[i];
|
||||
}
|
||||
else if (arg == "--keep")
|
||||
{
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_keep = std::stoi(argv[i]);
|
||||
}
|
||||
else if (arg == "-m" || arg == "--model")
|
||||
{
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.model = argv[i];
|
||||
}
|
||||
else if (arg == "-h" || arg == "--help")
|
||||
{
|
||||
server_print_usage(argc, argv, default_params);
|
||||
exit(0);
|
||||
}
|
||||
else if (arg == "-c" || arg == "--ctx_size")
|
||||
{
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_ctx = std::stoi(argv[i]);
|
||||
}
|
||||
else if (arg == "--memory_f32")
|
||||
{
|
||||
params.memory_f16 = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
server_print_usage(argc, argv, default_params);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (invalid_param)
|
||||
{
|
||||
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
||||
server_print_usage(argc, argv, default_params);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.seed <= 0)
|
||||
{
|
||||
params.seed = time(NULL);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
|
||||
|
||||
Llama* llama = new Llama(params);
|
||||
|
||||
// load model file
|
||||
if(!llama->load_context()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
Server svr;
|
||||
|
||||
svr.Get("/", [](const Request &req, Response &res)
|
||||
{
|
||||
res.set_content("<h1>llama.cpp server works</h1>", "text/html");
|
||||
}
|
||||
);
|
||||
|
||||
svr.Post("/setting-context", [&llama](const Request &req, Response &res) {
|
||||
if(!llama->context_config){
|
||||
std::string err;
|
||||
Json body = Json::parse(req.body, err);
|
||||
// seed whould be passed by the request, but seem
|
||||
// the current implementation need it in the load file
|
||||
if (!body["threads"].is_null())
|
||||
{
|
||||
llama->params.n_threads = body["threads"].int_value();
|
||||
}
|
||||
if (!body["n_predict"].is_null())
|
||||
{
|
||||
llama->params.n_predict = body["n_predict"].int_value();
|
||||
}
|
||||
if (!body["top_k"].is_null())
|
||||
{
|
||||
llama->params.top_k = body["top_k"].int_value();
|
||||
}
|
||||
if (!body["top_p"].is_null())
|
||||
{
|
||||
llama->params.top_p = body["top_p"].number_value();
|
||||
}
|
||||
if (!body["temperature"].is_null())
|
||||
{
|
||||
llama->params.temp = body["temperature"].number_value();
|
||||
}
|
||||
if (!body["batch_size"].is_null())
|
||||
{
|
||||
llama->params.n_batch = body["batch_size"].number_value();
|
||||
}
|
||||
if (!body["context"].is_null())
|
||||
{
|
||||
llama->params.prompt = "";
|
||||
Json::array context_messages = body["context"].array_items();
|
||||
// generate prompt for vicuna model v1
|
||||
for (Json ctx_msg : context_messages)
|
||||
{
|
||||
auto role = ctx_msg["role"].string_value();
|
||||
if (role == "system")
|
||||
{
|
||||
llama->params.prompt = ctx_msg["content"].string_value() + "\n\n";
|
||||
}
|
||||
else if (role == "user")
|
||||
{
|
||||
llama->params.prompt += "### Human: " + ctx_msg["content"].string_value() + "\n";
|
||||
}
|
||||
else if (role == "assistant")
|
||||
{
|
||||
llama->params.prompt += "### Assistant: " + ctx_msg["content"].string_value() + "\n";
|
||||
}
|
||||
}
|
||||
llama->params.prompt += "### Human:";
|
||||
}
|
||||
else
|
||||
{
|
||||
Json data = Json::object{
|
||||
{"status", "error"},
|
||||
{"reason", "Missing Context"}};
|
||||
res.set_content(data.dump(), "application/json");
|
||||
res.status = 400;
|
||||
return;
|
||||
}
|
||||
|
||||
if(!llama->prompt_test())
|
||||
{
|
||||
Json data = Json::object{
|
||||
{"status", "error"},
|
||||
{"reason", "Context too long, please be more specific"}};
|
||||
res.set_content(data.dump(), "application/json");
|
||||
res.status = 400;
|
||||
return;
|
||||
}
|
||||
|
||||
// Default configs for interactive with Vicuna model
|
||||
llama->params.interactive = true;
|
||||
llama->params.antiprompt.push_back("### Human:");
|
||||
llama->params.repeat_last_n = 64;
|
||||
llama->params.repeat_penalty = 1.1f;
|
||||
llama->setting_context();
|
||||
}
|
||||
Json data = Json::object {
|
||||
{ "status", "done" }};
|
||||
res.set_content(data.dump(), "application/json");
|
||||
});
|
||||
|
||||
svr.Post("/set-message", [&llama](const Request &req, Response &res) {
|
||||
bool result = false;
|
||||
if (llama->context_config)
|
||||
{
|
||||
std::string err;
|
||||
Json body = Json::parse(req.body, err);
|
||||
result = llama->set_message(body["message"].string_value() + "\n");
|
||||
}
|
||||
Json data = Json::object{
|
||||
{"can_inference", result }};
|
||||
res.set_content(data.dump(), "application/json");
|
||||
});
|
||||
|
||||
svr.Get("/completion", [&llama](const Request &req, Response &res)
|
||||
{
|
||||
bool stream = false;
|
||||
if (req.has_param("stream")) {
|
||||
stream = req.get_param_value("stream") == "true";
|
||||
}
|
||||
if(stream) {
|
||||
// Stream token by token like Chat GPT
|
||||
res.set_content_provider(
|
||||
"application/json",
|
||||
[&llama](size_t offset, DataSink &sink)
|
||||
{
|
||||
int ignore = 0;
|
||||
while(!llama->is_antiprompt) {
|
||||
std::string result = llama->inference();
|
||||
// ignore ### Human: and ### Assistant:
|
||||
if(result == "##") {
|
||||
ignore = 5;
|
||||
}
|
||||
if(ignore == 0) {
|
||||
Json data = Json::object{
|
||||
{"content", result.c_str()},
|
||||
{"tokens_consumed", 1 }};
|
||||
std::string json_data = data.dump();
|
||||
sink.write(json_data.c_str(), json_data.length());
|
||||
} else {
|
||||
ignore--;
|
||||
}
|
||||
printf("\rProcessing: %i tokens processed.", llama->tokens_completion);
|
||||
}
|
||||
sink.done(); // No more data
|
||||
printf("\rCompletion finished: %i tokens predicted.\n", llama->tokens_completion);
|
||||
return true; // return 'false' if you want to cancel the process.
|
||||
});
|
||||
} else {
|
||||
// Send all completion when finish
|
||||
int ignore = 0;
|
||||
std::string completion = "";
|
||||
llama->tokens_completion = 0;
|
||||
while (!llama->is_antiprompt)
|
||||
{
|
||||
std::string result = llama->inference();
|
||||
// ignore ### Human: and ### Assistant:
|
||||
if (result == "##")
|
||||
{
|
||||
ignore = 5;
|
||||
}
|
||||
if (ignore == 0)
|
||||
{
|
||||
completion += result;
|
||||
printf("\rProcessing: %i tokens processed.", llama->tokens_completion);
|
||||
}
|
||||
else
|
||||
{
|
||||
ignore--;
|
||||
}
|
||||
}
|
||||
Json data = Json::object {
|
||||
{ "content", completion.c_str() },
|
||||
{ "total_tokens", llama->tokens_completion }
|
||||
};
|
||||
printf("\nCompletion finished: %i tokens predicted.\n", llama->tokens_completion);
|
||||
res.set_content(data.dump(), "application/json");
|
||||
}
|
||||
});
|
||||
|
||||
printf("llama.cpp HTTP Server Listening at http://%s:%i", hostname, port);
|
||||
|
||||
// change hostname and port
|
||||
svr.listen(hostname, port);
|
||||
}
|
42
examples/server/server.h
Normal file
42
examples/server/server.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include <httplib.h>
|
||||
#include <json11.hpp>
|
||||
#include <cstring>
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
/*
|
||||
This isn't the best way to do this.
|
||||
|
||||
Missing:
|
||||
- Clean context (insert new prompt for change the behavior,
|
||||
this implies clean kv cache and emb_inp in runtime)
|
||||
- Release context (free memory) after shutdown the server
|
||||
|
||||
*/
|
||||
|
||||
class Llama{
|
||||
public:
|
||||
Llama(gpt_params params_) : params(params_){};
|
||||
bool load_context();
|
||||
bool prompt_test();
|
||||
void setting_context();
|
||||
int set_message(std::string msg);
|
||||
std::string inference();
|
||||
|
||||
bool context_config = false;
|
||||
bool is_antiprompt = false;
|
||||
int tokens_completion = 0;
|
||||
gpt_params params;
|
||||
private:
|
||||
llama_context *ctx;
|
||||
int n_ctx;
|
||||
int n_past = 0;
|
||||
int n_consumed = 0;
|
||||
int n_session_consumed = 0;
|
||||
int n_remain = 0;
|
||||
std::vector<llama_token> embd;
|
||||
std::vector<llama_token> last_n_tokens;
|
||||
bool is_interacting = false;
|
||||
std::vector<int> llama_token_newline;
|
||||
std::vector<int> embd_inp;
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue