llama : split unicodedata.cpp from unicode.cpp

This commit is contained in:
Jared Van Bortel 2024-03-25 16:00:03 -04:00
parent b80c0af078
commit e5ddf2fcdd
5 changed files with 1430 additions and 1404 deletions

View file

@ -1165,6 +1165,7 @@ add_library(llama
llama.h llama.h
unicode.h unicode.h
unicode.cpp unicode.cpp
unicodedata.cpp
) )
target_include_directories(llama PUBLIC .) target_include_directories(llama PUBLIC .)

View file

@ -663,7 +663,7 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
unicode.o: unicode.cpp unicode.h unicode.o: unicode.cpp unicodedata.cpp unicode.h unicodedata.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o

File diff suppressed because it is too large Load diff

1409
unicodedata.cpp Normal file

File diff suppressed because it is too large Load diff

15
unicodedata.h Normal file
View file

@ -0,0 +1,15 @@
#pragma once
#include <cstdint>
#include <map>
#include <utility>
#include <vector>
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_digit;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_letter;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_whitespace;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_accent_mark;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_punctuation;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_symbol;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_control;
extern const std::multimap<uint32_t, uint32_t> unicode_map_nfd;