【发布时间】:2018-01-30 04:57:22
【问题描述】:
我需要了解如何在 Google Cloud ML 上部署模型。我的第一个任务是在服务上部署一个非常简单的文本分类器。我按以下步骤进行操作(也许可以缩短为更少的步骤,如果是,请随时告诉我):
- 使用 Keras 定义模型并导出到 YAML
- 加载 YAML 并导出为 Tensorflow SavedModel
- 将模型上传到 Google Cloud Storage
- 将模型从存储部署到 Google Cloud ML
- 在模型网站上将上传模型版本设置为默认值。
- 使用示例输入运行模型
我终于完成了第 1-5 步,但现在在运行模型时出现了下面这个奇怪的错误。任何人都可以帮忙吗?有关步骤的详细信息如下。希望它也可以帮助其他陷入上述步骤的人。我的模型在本地运行良好。
我见过Deploying Keras Models via Google Cloud ML 和Export a basic Tensorflow model to Google Cloud ML,但他们似乎卡在流程的其他步骤上。
错误
Prediction failed: Exception during model execution: AbortionError(code=StatusCode.INVALID_ARGUMENT, details="In[0] is not a matrix
[[Node: MatMul = MatMul[T=DT_FLOAT, _output_shapes=[[-1,64]], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Mean, softmax_W/read)]]")
第 1 步
# import necessary classes from Keras..
model_input = Input(shape=(maxlen,), dtype='int32')
embed = Embedding(input_dim=nb_tokens,
output_dim=256,
mask_zero=False,
input_length=maxlen,
name='embedding')
x = embed(model_input)
x = GlobalAveragePooling1D()(x)
outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)]
model = Model(input=[model_input], output=outputs, name="fasttext")
# export to YAML..
第 2 步
from __future__ import print_function
import sys
import os
import tensorflow as tf
from tensorflow.contrib.session_bundle import exporter
import keras
from keras import backend as K
from keras.models import model_from_config, model_from_yaml
from optparse import OptionParser
EXPORT_VERSION = 1 # for us to keep track of different model versions (integer)
def export_model(model_def, model_weights, export_path):
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
K.set_learning_phase(0) # all new operations will be in test mode from now on
yaml_file = open(model_def, 'r')
yaml_string = yaml_file.read()
yaml_file.close()
model = model_from_yaml(yaml_string)
# force initialization
model.compile(loss='categorical_crossentropy',
optimizer='adam')
Wsave = model.get_weights()
model.set_weights(Wsave)
# weights are not loaded as I'm just testing, not really deploying
# model.load_weights(model_weights)
print(model.input)
print(model.output)
pred_node_names = output_node_names = 'Softmax:0'
num_output = 1
export_path_base = export_path
export_path = os.path.join(
tf.compat.as_bytes(export_path_base),
tf.compat.as_bytes('initial'))
builder = tf.saved_model.builder.SavedModelBuilder(export_path)
# Build the signature_def_map.
x = model.input
y = model.output
values, indices = tf.nn.top_k(y, 5)
table = tf.contrib.lookup.index_to_string_table_from_tensor(tf.constant([str(i) for i in xrange(5)]))
prediction_classes = table.lookup(tf.to_int64(indices))
classification_inputs = tf.saved_model.utils.build_tensor_info(model.input)
classification_outputs_classes = tf.saved_model.utils.build_tensor_info(prediction_classes)
classification_outputs_scores = tf.saved_model.utils.build_tensor_info(values)
classification_signature = (
tf.saved_model.signature_def_utils.build_signature_def(inputs={tf.saved_model.signature_constants.CLASSIFY_INPUTS: classification_inputs},
outputs={tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES: classification_outputs_classes, tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: classification_outputs_scores},
method_name=tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME))
tensor_info_x = tf.saved_model.utils.build_tensor_info(x)
tensor_info_y = tf.saved_model.utils.build_tensor_info(y)
prediction_signature = (tf.saved_model.signature_def_utils.build_signature_def(
inputs={'images': tensor_info_x},
outputs={'scores': tensor_info_y},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={'predict_images': prediction_signature,
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: classification_signature,},
legacy_init_op=legacy_init_op)
builder.save()
print('Done exporting!')
raise SystemExit
if __name__ == '__main__':
usage = "usage: %prog [options] arg"
parser = OptionParser(usage)
(options, args) = parser.parse_args()
if len(args) < 3:
raise ValueError("Too few arguments!")
model_def = args[0]
model_weights = args[1]
export_path = args[2]
export_model(model_def, model_weights, export_path)
第 3 步
gsutil cp -r fasttext_cloud/ gs://quiet-notch-xyz.appspot.com
第 4 步
from __future__ import print_function
from oauth2client.client import GoogleCredentials
from googleapiclient import discovery
from googleapiclient import errors
import time
projectID = 'projects/{}'.format('quiet-notch-xyz')
modelName = 'fasttext'
modelID = '{}/models/{}'.format(projectID, modelName)
versionName = 'Initial'
versionDescription = 'Initial release.'
trainedModelLocation = 'gs://quiet-notch-xyz.appspot.com/fasttext/'
credentials = GoogleCredentials.get_application_default()
ml = discovery.build('ml', 'v1', credentials=credentials)
# Create a dictionary with the fields from the request body.
requestDict = {'name': modelName, 'description': 'Online predictions.'}
# Create a request to call projects.models.create.
request = ml.projects().models().create(parent=projectID, body=requestDict)
# Make the call.
try:
response = request.execute()
except errors.HttpError as err:
# Something went wrong, print out some information.
print('There was an error creating the model.' +
' Check the details:')
print(err._get_reason())
# Clear the response for next time.
response = None
raise
time.sleep(10)
requestDict = {'name': versionName,
'description': versionDescription,
'deploymentUri': trainedModelLocation}
# Create a request to call projects.models.versions.create
request = ml.projects().models().versions().create(parent=modelID,
body=requestDict)
# Make the call.
try:
print("Creating model setup..", end=' ')
response = request.execute()
# Get the operation name.
operationID = response['name']
print('Done.')
except errors.HttpError as err:
# Something went wrong, print out some information.
print('There was an error creating the version.' +
' Check the details:')
print(err._get_reason())
raise
done = False
request = ml.projects().operations().get(name=operationID)
print("Adding model from storage..", end=' ')
while (not done):
response = None
# Wait for 10000 milliseconds.
time.sleep(10)
# Make the next call.
try:
response = request.execute()
# Check for finish.
done = True # response.get('done', False)
except errors.HttpError as err:
# Something went wrong, print out some information.
print('There was an error getting the operation.' +
'Check the details:')
print(err._get_reason())
done = True
raise
print("Done.")
第 5 步
使用网站。
第 6 步
def predict_json(instances, project='quiet-notch-xyz', model='fasttext', version=None):
"""Send json data to a deployed model for prediction.
Args:
project (str): project where the Cloud ML Engine Model is deployed.
model (str): model name.
instances ([Mapping[str: Any]]): Keys should be the names of Tensors
your deployed model expects as inputs. Values should be datatypes
convertible to Tensors, or (potentially nested) lists of datatypes
convertible to tensors.
version: str, version of the model to target.
Returns:
Mapping[str: any]: dictionary of prediction results defined by the
model.
"""
# Create the ML Engine service object.
# To authenticate set the environment variable
# GOOGLE_APPLICATION_CREDENTIALS=<path_to_service_account_file>
service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/{}/models/{}'.format(project, model)
if version is not None:
name += '/versions/{}'.format(version)
response = service.projects().predict(
name=name,
body={'instances': instances}
).execute()
if 'error' in response:
raise RuntimeError(response['error'])
return response['predictions']
然后使用测试输入运行函数:predict_json({'inputs':[[18, 87, 13, 589, 0]]})
【问题讨论】:
-
问题可能不止于此,但让我们从这里开始:CloudML Engine 目前仅支持使用单个签名(默认签名)。查看您的代码,我认为 prediction_signature 更有可能导致成功,但您尚未将其设为默认签名。你能试试吗?由于部署到云端可能需要一些时间,我建议使用以下方法在本地进行测试:
gcloud ml-engine local predict -
好吧,这似乎是合理的。我必须承认我不太了解
builder.add_meta_graph_and_variables()函数。如何将其更改为默认签名? -
这会很乱,所以我会添加它作为答案,我会不断更新,直到我们解决了问题。顺便说一句,我已经使用
simple_save正在孵化的contrib函数简化了保存的模型过程,并且应该在 TF 1.4 中可用:github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/… -
simple_save函数看起来很棒!遗憾的是,我无法更新我的 TF 版本(而且我认为我当前的版本对于contrib中的内容来说太旧了),但我期待在 TF 1.4 发布并更新后使用它! -
嗯.. 我正在尝试运行
gcloud ml-engine local predict --model-dir=fasttext_cloud/ --json-instances=debug_instance.json,但它无法加载 Tensorflow。这很奇怪,因为 Tensorflow 在其他所有方面都可以正常工作,包括提到的示例:python -c 'import tensorflow'。我将为此打开一个新问题。
标签: deployment tensorflow google-cloud-platform keras google-cloud-ml