如果有人在加载TextVectorization 层的配置时询问自己如何获得dense 张量而不是ragged 张量,请尝试显式设置output_mode。该问题与最近的一个错误有关,其中 output_mode 在来自已保存的配置时未正确设置。
这会产生一个dense 张量:
text_dataset = tf.data.Dataset.from_tensor_slices([
"this is some clean text",
"some more text",
"even some more text"])
vectorizer = TextVectorization(max_tokens=10, output_mode='int', output_sequence_length = 10)
vectorizer.adapt(text_dataset.batch(1024))
print(vectorizer("this"))
pickle.dump({'config': vectorizer.get_config(),
'weights': vectorizer.get_weights()}
, open("tv_layer.pkl", "wb"))
from_disk = pickle.load(open("tv_layer.pkl", "rb"))
new_vectorizer = TextVectorization(max_tokens=from_disk['config']['max_tokens'],
output_mode='int',
output_sequence_length=from_disk['config']['output_sequence_length'])
new_vectorizer.adapt(tf.data.Dataset.from_tensor_slices(["xyz"]))
new_vectorizer.set_weights(from_disk['weights'])
print(new_vectorizer("this"))
tf.Tensor([5 0 0 0 0 0 0 0 0 0], shape=(10,), dtype=int64)
tf.Tensor([5 0 0 0 0 0 0 0 0 0], shape=(10,), dtype=int64)
这会在加载时产生 ragged 张量:
import tensorflow as tf
text_dataset = tf.data.Dataset.from_tensor_slices([
"this is some clean text",
"some more text",
"even some more text"])
vectorizer = TextVectorization(max_tokens=10, output_mode='int', output_sequence_length = 10)
vectorizer.adapt(text_dataset.batch(1024))
print(vectorizer("this"))
pickle.dump({'config': vectorizer.get_config(),
'weights': vectorizer.get_weights()}
, open("tv_layer.pkl", "wb"))
from_disk = pickle.load(open("tv_layer.pkl", "rb"))
new_vectorizer = TextVectorization(max_tokens=from_disk['config']['max_tokens'],
output_mode=from_disk['config']['output_mode'],
output_sequence_length=from_disk['config']['output_sequence_length'])
new_vectorizer.adapt(tf.data.Dataset.from_tensor_slices(["xyz"]))
new_vectorizer.set_weights(from_disk['weights'])
print(new_vectorizer("this"))
tf.Tensor([5 0 0 0 0 0 0 0 0 0], shape=(10,), dtype=int64)
tf.Tensor([5], shape=(1,), dtype=int64)