我真的很喜欢 virhilo 所做的方法,但这是他正在测试的一组非常具体的数据。在所有这些中,不仅要测试功能,还要测试它们你将如何做。我整理了一个更详尽的测试集。它通过比较列表运行您指定的每个函数(仅使用一个小装饰器),并计算出每个函数需要多长时间,因此它慢了多少。结果是,在不了解数据的大小、重叠和类型的更多信息的情况下,您并不总是清楚应该执行哪个功能。
这是我的测试程序,下面是输出。
from timeit import Timer
from copy import copy
import random
import sys
funcs = []
class timeMe(object):
def __init__(self, f):
funcs.append(f)
self.f = f
def __call__(self, *args, **kwargs):
return self.f(*args, **kwargs)
@timeMe
def extend_list_then_set(input1, input2):
"""
extending one list by another end then remove duplicates by making set
"""
l1 = copy(input1)
l2 = copy(input2)
l1.extend(l2)
set(l1)
@timeMe
def per_element_append_to_list(input1, input2):
"""
checking if element is on one list end adding it only if not
"""
l1 = copy(input1)
l2 = copy(input2)
for elem in l2:
if elem not in l1:
l1.append(elem)
@timeMe
def union_sets(input1, input2):
"""
making sets from both lists and then union from them
"""
l1 = copy(input1)
l2 = copy(input2)
set(l1) | set(l2)
@timeMe
def set_from_one_add_from_two(input1, input2):
"""
make set from list 1, then add elements for set 2
"""
l1 = copy(input1)
l2 = copy(input2)
l1 = set(l1)
for element in l2:
l1.add(element)
@timeMe
def set_from_one_union_two(input1, input2):
"""
make set from list 1, then union list 2
"""
l1 = copy(input1)
l2 = copy(input2)
x = set(l1).union(l2)
@timeMe
def chain_then_set(input1, input2):
"""
chain l1 & l2, then make a set out of that
"""
l1 = copy(input1)
l2 = copy(input2)
set(itertools.chain(l1, l2))
def run_results(l1, l2, times):
for f in funcs:
t = Timer('%s(l1, l2)' % f.__name__,
'from __main__ import %s; l1 = %s; l2 = %s' % (f.__name__, l1, l2))
yield (f.__name__, t.timeit(times))
test_datasets = [
('original, small, some overlap', range(200), range(150, 250), 10000),
('no overlap: l1 = [1], l2 = [2..100]', [1], range(2, 100), 10000),
('lots of overlap: l1 = [1], l2 = [1]*100', [1], [1]*100, 10000),
('50 random ints below 2000 in each', [random.randint(0, 2000) for x in range(50)], [random.randint(0, 2000) for x in range(50)], 10000),
('50 elements in each, no overlap', range(50), range(51, 100), 10000),
('50 elements in each, total overlap', range(50), range(50), 10000),
('500 random ints below 500 in each', [random.randint(0, 500) for x in range(500)], [random.randint(0, 500) for x in range(500)], 1000),
('500 random ints below 2000 in each', [random.randint(0, 2000) for x in range(500)], [random.randint(0, 2000) for x in range(500)], 1000),
('500 random ints below 200000 in each', [random.randint(0, 200000) for x in range(500)], [random.randint(0, 200000) for x in range(500)], 1000),
('500 elements in each, no overlap', range(500), range(501, 1000), 10000),
('500 elements in each, total overlap', range(500), range(500), 10000),
('10000 random ints below 200000 in each', [random.randint(0, 200000) for x in range(10000)], [random.randint(0, 200000) for x in range(10000)], 50),
('10000 elements in each, no overlap', range(10000), range(10001, 20000), 10),
('10000 elements in each, total overlap', range(10000), range(10000), 10),
('original lists 100 times', range(200)*100, range(150, 250)*100, 10),
]
fullresults = []
for description, l1, l2, times in test_datasets:
print "Now running %s times: %s" % (times, description)
results = list(run_results(l1, l2, times))
speedresults = [x for x in sorted(results, key=lambda x: x[1])]
for name, speed in results:
finish = speedresults.index((name, speed)) + 1
timesslower = speed / speedresults[0][1]
fullresults.append((description, name, speed, finish, timesslower))
print '\t', finish, ('%.2fx' % timesslower).ljust(10), name.ljust(40), speed
print
import csv
out = csv.writer(sys.stdout)
out.writerow(('Test', 'Function', 'Speed', 'Place', 'timesslower'))
out.writerows(fullresults)
结果
我的意思是鼓励您使用自己的数据进行测试,因此我不想在细节上喋喋不休。但是...第一个扩展方法是最快的平均方法,但是 set_from_one_union_two (x = set(l1).union(l2)) 赢了几次。如果您自己运行脚本,您可以获得更多详细信息。
我报告的数字是该函数比该测试中最胖的函数慢的次数。如果它是最快的,它将是 1。
Functions
Tests extend_list_then_set per_element_append_to_list set_from_one_add_from_two set_from_one_union_two union_sets chain_then_set
original, small, some overlap 1 25.04 1.53 1.18 1.39 1.08
no overlap: l1 = [1], l2 = [2..100] 1.08 13.31 2.10 1 1.27 1.07
lots of overlap: l1 = [1], l2 = [1]*100 1.10 1.30 2.43 1 1.25 1.05
50 random ints below 2000 in each 1 7.76 1.35 1.20 1.31 1
50 elements in each, no overlap 1 9.00 1.48 1.13 1.18 1.10
50 elements in each, total overlap 1.08 4.07 1.64 1.04 1.41 1
500 random ints below 500 in each 1.16 68.24 1.75 1 1.28 1.03
500 random ints below 2000 in each 1 102.42 1.64 1.43 1.81 1.20
500 random ints below 200000 in each 1.14 118.96 1.99 1.52 1.98 1
500 elements in each, no overlap 1.01 145.84 1.86 1.25 1.53 1
500 elements in each, total overlap 1 53.10 1.95 1.16 1.57 1.05
10000 random ints below 200000 in each 1 2588.99 1.73 1.35 1.88 1.12
10000 elements in each, no overlap 1 3164.01 1.91 1.26 1.65 1.02
10000 elements in each, total overlap 1 1068.67 1.89 1.26 1.70 1.05
original lists 100 times 1.11 2068.06 2.03 1 1.04 1.17
Average 1.04 629.25 1.82 1.19 1.48 1.06
Standard Deviation 0.05 1040.76 0.26 0.15 0.26 0.05
Max 1.16 3164.01 2.43 1.52 1.98 1.20