From 281dca45d46078bf58042727237bb20682118785 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Sat, 2 Dec 2023 13:17:27 -0500 Subject: [PATCH] v1 --- bindata.py | 35 +++++++++++++++++++++++++++++++++++ binread4.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 bindata.py create mode 100644 binread4.py diff --git a/bindata.py b/bindata.py new file mode 100644 index 000000000..78578f600 --- /dev/null +++ b/bindata.py @@ -0,0 +1,35 @@ +import numpy as np +import glob +import struct + +def read_blck(fi): + for x in range(1000): + lenb = fi.read(4) + if len(lenb)<4: + print('error') + return + lena = struct.unpack('i',lenb) + by = int(lena[0])*4 + if by <0: + print("erro",by) + lenb = fi.read(4) + return + else: + floata = fi.read(by) + if len(floata) == by: + fl = struct.unpack(f'{lena[0]}f',floata) + #print(fl) + if len(fl)> 4096: + aa = np.array(fl) + A = aa[:4096] + yield A + else: + print("erro",by,len(floata)) + +def fit_generator(): + for f in glob.glob("batch*.bin"): + with open(f,"rb") as fi: + one= next(read_blck(fi)) + two= next(read_blck(fi)) + print("DEBUG",one,two) + yield one,two diff --git a/binread4.py b/binread4.py new file mode 100644 index 000000000..eccd31a90 --- /dev/null +++ b/binread4.py @@ -0,0 +1,43 @@ +import numpy as np +from keras.preprocessing import sequence +from keras.models import Sequential +from keras.layers import Dense, Embedding +from keras.layers import LSTM +from keras.datasets import imdb + +import bindata + +def batch_iter(data, labels, batch_size, shuffle=True): + num_batches_per_epoch = int((len(data) - 1) / batch_size) + 1 + return num_batches_per_epoch, bindata.fit_generator() + + +def main(mode): + max_features = 20000 + maxlen = 80 + batch_size = 32 + + (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) + + x_train = sequence.pad_sequences(x_train, maxlen=maxlen) + x_test = sequence.pad_sequences(x_test, maxlen=maxlen) + + model = Sequential() + model.add(Embedding(max_features, 128)) + model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) + model.add(Dense(1, activation='sigmoid')) + + model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + + if mode == 'fit': + model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test)) + else: + train_steps, train_batches = batch_iter(x_train, y_train, batch_size) + valid_steps, valid_batches = batch_iter(x_test, y_test, batch_size) + model.fit_generator(train_batches, train_steps, epochs=1, validation_data=valid_batches, validation_steps=valid_steps) + + +if __name__ == '__main__': + import sys + mode = 'fit' + main(mode)