【发布时间】:2020-07-13 10:56:05
【问题描述】:
我正在使用大型机器将我的完整数据集加载到内存中以使用以下方法进行训练:(使用我的生成器将整个数据加载到 x 和 y 张量中)
training_generator = my_generator(train_data_file, batch_size=TOTAL_SAMPLES)
x_tensor, y_tensor = training_generator.__getitem__(0)
我这样做是为了在训练期间不使用发电机就能更快地训练。我有足够的 RAM 来加载此数据集,但出现以下错误。我不确定为什么在加载数据时使用我的 GPU。如何更改我的方法以不过度使用 GPU 内存?下面是我的 getitem 函数,然后是回溯。
from tensorflow import cast, float16
def __getitem__(self, idx) :
"""
Function for tensorflow to get a Batch of Data
return batch_x:
return batch_y:
"""
# X Data
x_list = []
# Y Data
y_list = []
# Loop Through Batch
for i in range(0, self.batch_size):
x_list.append(cast(self.h5_data[self.batches[idx][i]]['x'][:], dtype=float16))
df = pd.DataFrame(data=self.h5_data[self.batches[idx][i]]['y'][:])
y_list.append(cast(convert_dataframe_to_tensor(df),dtype=float16))
# Stack X List into Tensor
batch_x = np.stack(x_list, axis=0)
# Stack Y List into Tensor
batch_y = np.stack(y_list, axis=0)
return batch_x, batch_y
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-11-ae34a7e4c3fd> in <module>
30
---> 31 x_tensor, y_tensor = training_generator.__getitem__(0)
32
33
~/thirdeye/lib/generators.py in __getitem__(self, idx)
71 df = pd.DataFrame(data=self.h5_data[self.batches[idx][i]]['y'][:])
72
---> 73 y_list.append(cast(convert_dataframe_to_tensor(df),dtype=float16))
74
75 # Stack X List into Tensor
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/util/dispatch.py in wrapper(*args, **kwargs)
178 """Call target, and fall back on dispatchers if there is a TypeError."""
179 try:
--> 180 return target(*args, **kwargs)
181 except (TypeError, ValueError):
182 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/ops/math_ops.py in cast(x, dtype, name)
705 x = ops.convert_to_tensor(x, name="x")
706 if x.dtype.base_dtype != base_type:
--> 707 x = gen_math_ops.cast(x, base_type, name=name)
708 if x.dtype.is_complex and base_type.is_floating:
709 logging.warn("Casting complex to real discards imaginary part.")
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_math_ops.py in cast(x, DstT, Truncate, name)
1969 pass # Add nodes to the TensorFlow graph.
1970 except _core._NotOkStatusException as e:
-> 1971 _ops.raise_from_not_ok_status(e, name)
1972 # Add nodes to the TensorFlow graph.
1973 DstT = _execute.make_type(DstT, "DstT")
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in raise_from_not_ok_status(e, name)
6604 message = e.message + (" name: " + name if name is not None else "")
6605 # pylint: disable=protected-access
-> 6606 six.raise_from(core._status_to_exception(e.code, message), None)
6607 # pylint: enable=protected-access
6608
/opt/conda/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run Cast: Dst tensor is not initialized. [Op:Cast] name: Cast/
【问题讨论】:
标签: tensorflow machine-learning keras gpu