【问题标题】:Iterating a tf.data.Dataset.from_generator for a keras image generator.flow_from_dir throw errors为 keras 图像生成器迭代 tf.data.Dataset.from_generator.flow_from_dir 抛出错误
【发布时间】:2020-06-28 20:26:34
【问题描述】:

这是使用带有 .flow_from_directory 的 Keras ImageDataGenerator 的情况,用 tf.data.Dataset.from_generator(...) 包装它。数据集的任何迭代尝试都失败了。

错误总结:

InvalidArgumentError: TypeError: endswith first arg must be bytes or a tuple of bytes, not str

代码sn-p:

import tensorflow as tf   # version 2.1.0

DATA_URL = 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
flowers_root_path = tf.keras.utils.get_file(origin=DATA_URL, fname='flower_photos', untar=True)

img_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, rotation_range=20)
gen = img_gen.flow_from_directory(flowers_root_path)

ds = tf.data.Dataset.from_generator(
  # lambda: gen,            # this works
  img_gen.flow_from_directory, args=[flowers_root_path],    # this failed.
  output_types=(tf.float32, tf.float32), 
  output_shapes=([32,256,256,3], [32,5])
)

it = iter(ds)
batch = next(it)
print(batch)

使用“lambda: gen”看起来不错。知道为什么吗?

全栈跟踪:

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/context.py in execution_mode(mode)
   1896     ctx.executor = executor_new
-> 1897     yield
   1898   finally:

10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/iterator_ops.py in _next_internal(self)
    658             output_types=self._flat_output_types,
--> 659             output_shapes=self._flat_output_shapes)
    660 

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_dataset_ops.py in iterator_get_next_sync(iterator, output_types, output_shapes, name)
   2478     except _core._NotOkStatusException as e:
-> 2479       _ops.raise_from_not_ok_status(e, name)
   2480   # Add nodes to the TensorFlow graph.

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in raise_from_not_ok_status(e, name)
   6605   # pylint: disable=protected-access
-> 6606   six.raise_from(core._status_to_exception(e.code, message), None)
   6607   # pylint: enable=protected-access

/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)

InvalidArgumentError: TypeError: endswith first arg must be bytes or a tuple of bytes, not str
Traceback (most recent call last):

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 673, in get_iterator
    return self._iterators[iterator_id]

KeyError: 0


During handling of the above exception, another exception occurred:


Traceback (most recent call last):

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/script_ops.py", line 236, in __call__
    ret = func(*args)

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 789, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 675, in get_iterator
    iterator = iter(self._generator(*self._args.pop(iterator_id)))

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/image_data_generator.py", line 540, in flow_from_directory
    interpolation=interpolation

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/directory_iterator.py", line 126, in __init__
    classes, filenames = res.get()

  File "/usr/lib/python3.6/multiprocessing/pool.py", line 644, in get
    raise self._value

  File "/usr/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/utils.py", line 216, in _list_valid_filenames_in_directory
    for root, fname in valid_files:

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/utils.py", line 172, in _iter_valid_files
    if fname.lower().endswith('.tiff'):

TypeError: endswith first arg must be bytes or a tuple of bytes, not str


     [[{{node PyFunc}}]] [Op:IteratorGetNextSync]

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-56-a2623f5ab104> in <module>()
      1 it = iter(ds)
----> 2 batch = next(it)
      3 print(batch)

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/iterator_ops.py in __next__(self)
    628 
    629   def __next__(self):  # For Python 3 compatibility
--> 630     return self.next()
    631 
    632   def _next_internal(self):

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/iterator_ops.py in next(self)
    672     """Returns a nested structure of `Tensor`s containing the next element."""
    673     try:
--> 674       return self._next_internal()
    675     except errors.OutOfRangeError:
    676       raise StopIteration

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/iterator_ops.py in _next_internal(self)
    663         return self._element_spec._from_compatible_tensor_list(ret)  # pylint: disable=protected-access
    664       except AttributeError:
--> 665         return structure.from_compatible_tensor_list(self._element_spec, ret)
    666 
    667   @property

/usr/lib/python3.6/contextlib.py in __exit__(self, type, value, traceback)
     97                 value = type()
     98             try:
---> 99                 self.gen.throw(type, value, traceback)
    100             except StopIteration as exc:
    101                 # Suppress StopIteration *unless* it's the same exception that

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/context.py in execution_mode(mode)
   1898   finally:
   1899     ctx.executor = executor_old
-> 1900     executor_new.wait()
   1901 
   1902 

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/executor.py in wait(self)
     65   def wait(self):
     66     """Waits for ops dispatched in this executor to finish."""
---> 67     pywrap_tensorflow.TFE_ExecutorWaitForAllPendingNodes(self._handle)
     68 
     69   def clear_error(self):

InvalidArgumentError: TypeError: endswith first arg must be bytes or a tuple of bytes, not str
Traceback (most recent call last):

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 673, in get_iterator
    return self._iterators[iterator_id]

KeyError: 0


During handling of the above exception, another exception occurred:


Traceback (most recent call last):

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/script_ops.py", line 236, in __call__
    ret = func(*args)

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 789, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 675, in get_iterator
    iterator = iter(self._generator(*self._args.pop(iterator_id)))

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/image_data_generator.py", line 540, in flow_from_directory
    interpolation=interpolation

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/directory_iterator.py", line 126, in __init__
    classes, filenames = res.get()

  File "/usr/lib/python3.6/multiprocessing/pool.py", line 644, in get
    raise self._value

  File "/usr/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/utils.py", line 216, in _list_valid_filenames_in_directory
    for root, fname in valid_files:

  File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/utils.py", line 172, in _iter_valid_files
    if fname.lower().endswith('.tiff'):

TypeError: endswith first arg must be bytes or a tuple of bytes, not str


     [[{{node PyFunc}}]]

【问题讨论】:

    标签: python-3.x tensorflow tensorflow-datasets tf.keras


    【解决方案1】:

    根据Stack Overflow Answer,您可以通过替换使您的代码正常工作

    gen = img_gen.flow_from_directory(flowers_root_path)
    

    def Gen():
      gen = img_gen.flow_from_directory(flowers_root_path)
      for (x,y) in gen:
        yield (x,y)
    

    完整的工作代码如下所示:

    import tensorflow as tf   # version 2.1.0
    
    DATA_URL = 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
    flowers_root_path = tf.keras.utils.get_file(origin=DATA_URL, fname='flower_photos', untar=True)
    
    img_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, rotation_range=20)
    def Gen():
      gen = img_gen.flow_from_directory(flowers_root_path)
      for (x,y) in gen:
        yield (x,y)
    
    ds = tf.data.Dataset.from_generator(  
      Gen,  output_types=(tf.float32, tf.float32),  output_shapes=([32,256,256,3], [32,5]))
    
    it = iter(ds)
    batch = next(it)
    print(batch)
    

    另外,请找到带有工作代码的Github Gist

    【讨论】:

    • 我可以使用 lambda: gen 让它工作(这可能与你所做的类似)。我发布它是为了质疑为什么以另一种方式不工作。它应该适用于 2.1.0
    猜你喜欢
    • 2014-06-30
    • 2019-02-26
    • 2017-09-05
    • 2020-10-23
    • 1970-01-01
    • 2021-06-03
    • 2018-04-16
    • 2020-08-11
    • 1970-01-01
    相关资源
    最近更新 更多