【问题标题】:Surprising challenge generating comprehensive list令人惊讶的挑战生成综合列表
【发布时间】:2014-05-25 20:30:21
【问题描述】:

我在 Python 方面面临着一个令人惊讶的挑战。

我是一名物理学家,在光学接口处生成一系列层模拟。模拟的细节并不是特别重要,但关键是我生成了所有可能的情况——在一定厚度和层序范围内的不同材料。

我一直在编写代码来生成一个全面而独特的列表,但我对计算即使是相对简单的系统也需要多长时间感到震惊!当然,Python 和一台合理的计算机应该可以在没有过多压力的情况下处理这个问题。建议将不胜感激。

谢谢

from itertools import permutations, combinations_with_replacement

def join_adjacent_repeated_materials(potential_structure):
    """
    Self-explanitory...
    """
    #print potential_structure

    new_layers = [] # List to hold re-cast structure
    for layer in potential_structure:
        if len(new_layers) > 0: # if not the first item in the list of layers
            last_layer=new_layers[-1] # last element of existing layer list
            if layer[0] == last_layer[0]: # true is the two layers are the same material
                combined_layer = (layer[0], layer[1] + last_layer[1])
                new_layers[len(new_layers)-1] = combined_layer
            else: # adjcent layers are different material so no comibantion is possible
                new_layers.append(layer)
        else: # for the first layer
            new_layers.append(layer)

    return tuple(new_layers)

def calculate_unique_structure_lengths(thicknesses, materials, maximum_number_of_layers,\
                                       maximum_individual_layer_thicknesses, \
                                       maximum_total_material_thicknesses):
    """
    Create a set on all possible multilayer combinations.

    thicknesses : if this contains '0' the total number of layers will vary
                  from 0 to maximum_number_of_layers, otherwise, the
                  number total number layers will always be maximum_number_of_layers
                  e.g. arange(0 , 100, 5)

    materials : list of materials used
                e.g. ['Metal', 'Dielectric']

    maximum_number_of_layers : pretty self-explanitory...
                               e.g. 5

    maximum_individual_layer_thicknesses : filters the created the multilayer structures
                                           preventing the inclusion layers that are too thick
                                           - this is important after the joining of
                                           adjacent materials
                                           e.g. (('Metal',30),('Dielectric',20))

    maximum_total_material_thicknesses : similar to the above but filters structures where the total
                                         amount of a particular material is exceeded
                                         e.g. (('Metal',50),('Dielectric',100))


    """
    # generate all possible thickness combinations and material combinations
    all_possible_thickness_sets = set(permutations(combinations_with_replacement(thicknesses, maximum_number_of_layers)))
    all_possible_layer_material_orders = set(permutations(combinations_with_replacement(materials, maximum_number_of_layers)))


    first_set = set() # Create set object (list of unique elements, no repeats)
    for layer_material_order in all_possible_layer_material_orders:
        for layer_thickness_set in all_possible_thickness_sets:
            potential_structure = [] # list to hold this structure
            for layer, thickness in zip(layer_material_order[0], layer_thickness_set[0]): # combine the layer thickness with its material
                if thickness != 0: # layers of zero thickness are not added to potential_structure
                    potential_structure.append((layer, thickness))
            first_set.add(tuple(potential_structure)) # add this potential_structure to the first_set set

    #print('first_set')
    #for struct in first_set:
    #    print struct

    ## join adjacent repeated materials
    second_set = set() # create new set
    for potential_structure in first_set:
        second_set.add(join_adjacent_repeated_materials(potential_structure))

    ## remove structures where a layer is too thick
    third_set = set()
    for potential_structure in second_set: # check all the structures in the set
        conditions_satisfied=True # default
        for max_condition in maximum_individual_layer_thicknesses: # check this structure using each condition
            for layer in potential_structure: # examine each layer
                if layer[0] == max_condition[0]: # match condition with material
                    if layer[1] > max_condition[1]: # test thickness condition
                        conditions_satisfied=False
        if conditions_satisfied:
            third_set.add(potential_structure)

    ##remove structures that contain too much of a certain material
    fourth_set = set()
    for potential_structure in second_set: # check all the structures in the set
        conditions_satisfied=True # default
        for max_condition in maximum_total_material_thicknesses: # check this structure using each condition
            amount_of_material_in_this_structure = 0 # initialise a counter
            for layer in potential_structure: # examine each layer
                if layer[0] == max_condition[0]: # match condition with material
                    amount_of_material_in_this_structure += layer[1]
                    if amount_of_material_in_this_structure > max_condition[1]: # test thickness condition
                        conditions_satisfied=False
        if conditions_satisfied:
            fourth_set.add(potential_structure)

    return fourth_set

thicknesses = [0,1,2]
materials = ('A', 'B') # Tuple cannot be accidentally appended to later
maximum_number_of_layers = 3
maximum_individual_layer_thicknesses=(('A',30),('B',20))
maximum_total_material_thicknesses=(('A',20),('B',15))

calculate_unique_structure_lengths(thicknesses, materials, maximum_number_of_layers,\
                                   maximum_individual_layer_thicknesses = maximum_individual_layer_thicknesses, \
                                   maximum_total_material_thicknesses = maximum_total_material_thicknesses)

【问题讨论】:

  • 您能否更具体一点:代码的哪些部分很慢,如果可能,我们如何重现缓慢的行为?您是否也尝试过自己分析代码?如果你有,你能给我们提供结果吗?
  • 分析您的代码并查看其大部分时间花在哪里。

标签: python list set unique


【解决方案1】:
all_possible_thickness_sets = set(permutations(combinations_with_replacement(thicknesses, maximum_number_of_layers)))
all_possible_layer_material_orders = set(permutations(combinations_with_replacement(materials, maximum_number_of_layers)))

天哪!这些套装将是巨大的!让我们举个例子。如果thicknesses 有 6 个东西,maximum_number_of_layers 是 3,那么第一组将有大约 2 个 quintillion 的东西。你为什么做这个?如果这些确实是您想要使用的集合,那么您将需要找到一种不需要构建这些集合的算法,因为它永远不会发生。我怀疑这些不是您想要的套装;也许你想要itertools.product

all_possible_thickness_sets = set(product(thicknesses, repeat=maximum_number_of_layers))

下面是 itertools.product 的示例:

>>> for x in product([1, 2, 3], repeat=2):
...     print x
...
(1, 1)
(1, 2)
(1, 3)
(2, 1)
(2, 2)
(2, 3)
(3, 1)
(3, 2)
(3, 3)

这看起来像你需要的吗?

【讨论】:

    【解决方案2】:

    所以你经常做的一件事就是在set 中添加一些东西。如果您查看 Python documentation 中集合的运行时行为,它在底部说,关于最坏的情况“单个操作可能需要非常长的时间,具体取决于容器的历史记录”。我认为如果添加很多元素,内存重新分配可能会咬你一口,因为 Python 无法知道启动时要保留多少内存。

    我看的越多,我认为你保留的内存越多。 third_set 例如甚至不被使用。 second_set 可以替换为 first_set,如果您直接调用 join_adjacent_materials。如果我没看错的话,即使 first_set 也可能消失,你可以在构造 fourth_set 时创建候选对象。

    当然,如果您将所有内容放入一组嵌套循环中,代码的可读性可能会降低。但是,有一些方法可以构建您的代码,而不是为了可读性而创建不必要的对象 - 例如,您可以创建一个生成候选对象并通过 yield 返回每​​个结果的函数。

    【讨论】:

      【解决方案3】:

      FWIW 我检测了您的 code 以启用分析。结果如下:

      输出:

      Sun May 25 16:06:31 2014    surprising-challenge-generating-comprehensive-python-list.stats
      
               348,365,046 function calls in 1,538.413 seconds
      
         Ordered by: cumulative time, internal time
      
         ncalls  tottime  percall  cumtime  percall filename:lineno(function)
              1 1052.933 1052.933 1538.413 1538.413 surprising-challenge-generating-comprehensive-python-list.py:34(calculate_unique_structure_lengths)
       87091200  261.764    0.000  261.764    0.000 {zip}
       87091274  130.492    0.000  130.492    0.000 {method 'add' of 'set' objects}
      174182440   93.223    0.000   93.223    0.000 {method 'append' of 'list' objects}
             30    0.000    0.000    0.000    0.000 surprising-challenge-generating-comprehensive-python-list.py:14(join_adjacent_repeated_materials)
            100    0.000    0.000    0.000    0.000 {len}
              1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
      


      为了更详细地了解代码花费的时间,我在您的代码的几乎逐字副本上使用了line_profiler 模块,并为您的每个函数获得了以下结果:

      > python "C:\Python27\Scripts\kernprof.py" -l -v surprising-challenge-generating-comprehensive-python-list.py
      Wrote profile results to example.py.lprof
      Timer unit: 3.2079e-07 s
      
      File: surprising-challenge-generating-comprehensive-python-list.py
      Function: join_adjacent_repeated_materials at line 3
      Total time: 0.000805183 s
      
      Line #      Hits         Time  Per Hit   % Time  Line Contents
      ==============================================================
           3                                           @profile
           4                                           def join_adjacent_repeated_materials(potential_structure):
           5                                               """
           6                                               Self-explanitory...
           7                                               """
           8                                               #print potential_structure
           9
          10        30          175      5.8      7.0      new_layers = [] # List to hold re-cast structure
          11       100          544      5.4     21.7      for layer in potential_structure:
          12        70          416      5.9     16.6          if len(new_layers) > 0: # if not the first item in the list of layers
          13        41          221      5.4      8.8              last_layer=new_layers[-1] # last element of existing layer list
          14        41          248      6.0      9.9              if layer[0] == last_layer[0]: # true is the two layers are the same material
          15        30          195      6.5      7.8                  combined_layer = (layer[0], layer[1] + last_layer[1])
          16        30          203      6.8      8.1                  new_layers[len(new_layers)-1] = combined_layer
          17                                                       else: # adjcent layers are different material so no comibantion is possible
          18        11           68      6.2      2.7                  new_layers.append(layer)
          19                                                   else: # for the first layer
          20        29          219      7.6      8.7              new_layers.append(layer)
          21
          22        30          221      7.4      8.8      return tuple(new_layers)
      
      File: surprising-challenge-generating-comprehensive-python-list.py
      Function: calculate_unique_structure_lengths at line 24
      Total time: 3767.94 s
      
      Line #      Hits         Time  Per Hit   % Time  Line Contents
      ==============================================================
          24                                           @profile
          25                                           def calculate_unique_structure_lengths(thicknesses, materials, maximum_number_of_layers,\
          26                                                                                  maximum_individual_layer_thicknesses, \
          27                                                                                  maximum_total_material_thicknesses):
          28                                               """
          29                                               Create a set on all possible multilayer combinations.
          30
          31                                               thicknesses : if this contains '0' the total number of layers will vary
          32                                                             from 0 to maximum_number_of_layers, otherwise, the
          33                                                             number total number layers will always be maximum_number_of_layers
          34                                                             e.g. arange(0 , 100, 5)
          35
          36                                               materials : list of materials used
          37                                                           e.g. ['Metal', 'Dielectric']
          38
          39                                               maximum_number_of_layers : pretty self-explanitory...
          40                                                                          e.g. 5
          41
          42                                               maximum_individual_layer_thicknesses : filters the created the multilayer structures
          43                                                                                      preventing the inclusion layers that are too thick
          44                                                                                      - this is important after the joining of
          45                                                                                      adjacent materials
          46                                                                                      e.g. (('Metal',30),('Dielectric',20))
          47
          48                                               maximum_total_material_thicknesses : similar to the above but filters structures where the total
          49                                                                                    amount of a particular material is exceeded
          50                                                                                    e.g. (('Metal',50),('Dielectric',100))
          51
          52
          53                                               """
          54                                               # generate all possible thickness combinations and material combinations
          55         1     20305240 20305240.0    0.2      all_possible_thickness_sets = set(permutations(combinations_with_replacement(thicknesses, maximum_number_of_layers)))
          56         1          245    245.0      0.0      all_possible_layer_material_orders = set(permutations(combinations_with_replacement(materials, maximum_number_of_layers)))
          57
          58
          59         1           13     13.0      0.0      first_set = set() # Create set object (list of unique elements, no repeats)
      
          60        25          235      9.4      0.0      for layer_material_order in all_possible_layer_material_orders:
          61  87091224    896927052     10.3      7.6          for layer_thickness_set in all_possible_thickness_sets:
          62  87091200    920048586     10.6      7.8              potential_structure = [] # list to hold this structure
          63 348364800   4160332176     11.9     35.4              for layer, thickness in zip(layer_material_order[0], layer_thickness_set[0]): # combine the layer thickness with its material
          64 261273600   2334038439      8.9     19.9                  if thickness != 0: # layers of zero thickness are not added to potential_structure
          65 174182400   2003639625     11.5     17.1                      potential_structure.append((layer, thickness))
          66  87091200   1410517427     16.2     12.0              first_set.add(tuple(potential_structure)) # add this potential_structure to the first_set set
          67
          68                                               #print('first_set')
          69                                               #for struct in first_set:
          70                                               #    print struct
          71
          72                                               ## join adjacent repeated materials
          73         1           14     14.0      0.0      second_set = set() # create new set
          74        31          274      8.8      0.0      for potential_structure in first_set:
          75        30         5737    191.2      0.0          second_set.add(join_adjacent_repeated_materials(potential_structure))
          76
          77                                               ## remove structures where a layer is too thick
          78         1           10     10.0      0.0      third_set = set()
          79        23          171      7.4      0.0      for potential_structure in second_set: # check all the structures in the set
          80        22          164      7.5      0.0          conditions_satisfied=True # default
          81        66          472      7.2      0.0          for max_condition in maximum_individual_layer_thicknesses: # check this structure using each condition
          82       104          743      7.1      0.0              for layer in potential_structure: # examine each layer
          83        60          472      7.9      0.0                  if layer[0] == max_condition[0]: # match condition with material
          84        30          239      8.0      0.0                      if layer[1] > max_condition[1]: # test thickness condition
          85                                                                   conditions_satisfied=False
          86        22          149      6.8      0.0          if conditions_satisfied:
          87        22          203      9.2      0.0              third_set.add(potential_structure)
          88
          89                                               ##remove structures that contain too much of a certain material
          90         1           10     10.0      0.0      fourth_set = set()
          91        23          178      7.7      0.0      for potential_structure in second_set: # check all the structures in the set
          92        22          158      7.2      0.0          conditions_satisfied=True # default
          93        66          489      7.4      0.0          for max_condition in maximum_total_material_thicknesses: # check this structure using each condition
          94        44          300      6.8      0.0              amount_of_material_in_this_structure = 0 # initialise a counter
          95       104          850      8.2      0.0              for layer in potential_structure: # examine each layer
          96        60         2961     49.4      0.0                  if layer[0] == max_condition[0]: # match condition with material
          97        30          271      9.0      0.0                      amount_of_material_in_this_structure += layer[1]
          98        30          255      8.5      0.0                      if amount_of_material_in_this_structure > max_condition[1]: # test thickness condition
          99                                                                   conditions_satisfied=False
         100        22          160      7.3      0.0          if conditions_satisfied:
         101        22          259     11.8      0.0              fourth_set.add(potential_structure)
         102
         103         1           16     16.0      0.0      return fourth_set
      

      如您所见,构建first_setincalculate_unique_structure_lengths()是迄今为止最耗时的步骤。

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 2019-10-05
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2015-10-19
        • 2018-01-09
        • 2011-04-29
        相关资源
        最近更新 更多