From 674bb08b20749d8aa866c6106c9c28bb729267df Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Sun, 11 Jun 2023 22:40:01 -0400 Subject: [PATCH] handle & print parser errors --- examples/grammar-parser.cpp | 33 +++++++++++++++++++-------------- examples/main/main.cpp | 11 ++++++----- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/examples/grammar-parser.cpp b/examples/grammar-parser.cpp index 650c3cfc5..bdd48e796 100644 --- a/examples/grammar-parser.cpp +++ b/examples/grammar-parser.cpp @@ -47,7 +47,7 @@ namespace grammar_parser { pos++; } if (pos == src) { - throw std::string("expecting name at ") + src; + throw std::runtime_error(std::string("expecting name at ") + src); } return std::make_pair(pos, parse_space(pos)); } @@ -63,7 +63,7 @@ namespace grammar_parser { return std::make_pair((first << 4) + second, src + 4); } } - throw std::string("expecting \\xNN at ") + src; + throw std::runtime_error(std::string("expecting \\xNN at ") + src); } else if (esc == '"' || esc == '[' || esc == ']') { return std::make_pair(esc, src + 2); } else if (esc == 'r') { @@ -73,11 +73,11 @@ namespace grammar_parser { } else if (esc == 't') { return std::make_pair('\t', src + 2); } - throw std::string("unknown escape at ") + src; + throw std::runtime_error(std::string("unknown escape at ") + src); } else if (*src) { return std::make_pair(*src, src + 1); } - throw std::string("unexpected end of input"); + throw std::runtime_error("unexpected end of input"); } const char * parse_alternates( @@ -151,12 +151,12 @@ namespace grammar_parser { outbuf.push_back(1); outbuf.push_back(sub_rule_id); if (*pos != ')') { - throw std::string("expecting ')' at ") + pos; + throw std::runtime_error(std::string("expecting ')' at ") + pos); } pos = parse_space(pos + 1); } else if (*pos == '*' || *pos == '+' || *pos == '?') { // repetition operator if (outbuf.size() - out_start - 1 == 0) { - throw std::string("expecting preceeding item to */+/? at ") + pos; + throw std::runtime_error(std::string("expecting preceeding item to */+/? at ") + pos); } std::vector & out_grammar = state.out_grammar; @@ -236,7 +236,7 @@ namespace grammar_parser { const std::string name(src, name_len); if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) { - throw std::string("expecting ::= at ") + pos; + throw std::runtime_error(std::string("expecting ::= at ") + pos); } pos = parse_space(pos + 3); @@ -247,19 +247,24 @@ namespace grammar_parser { } else if (*pos == '\n') { pos++; } else if (*pos) { - throw std::string("expecting newline or end at ") + pos; + throw std::runtime_error(std::string("expecting newline or end at ") + pos); } return parse_space(pos); } parse_state parse(const char * src) { - parse_state state; - const char * pos = parse_space(src); - while (*pos) { - pos = parse_rule(state, pos); + try { + parse_state state; + const char * pos = parse_space(src); + while (*pos) { + pos = parse_rule(state, pos); + } + state.out_grammar.push_back(0xffff); + return state; + } catch (const std::exception & err) { + fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what()); + return parse_state(); } - state.out_grammar.push_back(0xffff); - return state; } const uint16_t * print_rule( diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 47b4728e9..8d9371e19 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -296,6 +296,10 @@ int main(int argc, char ** argv) { llama_grammar * grammar = NULL; if (!params.grammar.empty()) { parsed_grammar = grammar_parser::parse(params.grammar.c_str()); + // will be empty (default) if there are parse errors + if (parsed_grammar.out_grammar.empty()) { + return 1; + } fprintf(stderr, "%s: grammar:\n", __func__); grammar_parser::print_grammar(stderr, parsed_grammar); fprintf(stderr, "\n"); @@ -631,11 +635,8 @@ int main(int argc, char ** argv) { if (n_past > 0) { if (is_interacting) { // reset grammar state if we're restarting generation - if (!params.grammar.empty()) { - parsed_grammar = grammar_parser::parse(params.grammar.c_str()); - if (grammar != NULL) { - llama_grammar_free(grammar); - } + if (grammar != NULL) { + llama_grammar_free(grammar); grammar = llama_grammar_init( parsed_grammar.out_grammar.data(), parsed_grammar.symbol_ids.at("root")); }