使用如下代码:
import pprint
import random
from functools import partial
from operator import itemgetter
data = [{'User ID': '111',
'Full Name': 'name a',
'Role: Name': 'role a'},
{'User ID': '222',
'Full Name': 'name b',
'Role: Name': 'role a'},
{'User ID': '232',
'Full Name': 'name c',
'Role: Name': 'role b'},
{'User ID': '223',
'Full Name': 'name d',
'Role: Name': 'role d'},
{'User ID': '444',
'Full Name': 'name e',
'Role: Name': 'role d'}]
def optimize(items, quality_function, stop=1000):
no_improvement = 0
best = 0
while no_improvement < stop:
i = random.randint(0, len(items) - 1)
j = random.randint(0, len(items) - 1)
copy = items[::]
copy[i], copy[j] = copy[j], copy[i]
q = quality_function(copy)
if q > best:
items, best = copy, q
no_improvement = 0
else:
no_improvement += 1
return items
def quality_maxmindist(items, key=None):
if key is None:
def identity(e): return e
key = identity
s = 0
for k, item in { key(item) : item for item in items}.items():
indcs = [i for i in range(len(items)) if key(items[i]) == k]
if len(indcs) > 1:
s += sum(1. / (indcs[i + 1] - indcs[i]) for i in range(len(indcs) - 1))
return 1. / s
quality_fun = partial(quality_maxmindist, key=itemgetter("Role: Name"))
res = optimize(data, quality_fun)
pprint.pprint(res)
输出
[{'Full Name': 'name a', 'Role: Name': 'role a', 'User ID': '111'},
{'Full Name': 'name d', 'Role: Name': 'role d', 'User ID': '223'},
{'Full Name': 'name c', 'Role: Name': 'role b', 'User ID': '232'},
{'Full Name': 'name b', 'Role: Name': 'role a', 'User ID': '222'},
{'Full Name': 'name e', 'Role: Name': 'role d', 'User ID': '444'}]
基本上,您将key 参数添加到函数quality_maxmindist(来自here),此关键参数将用于确定项目如何相等。
在您的问题的特定情况下,您可以在"Role: Name" 上使用operator.itemgetter,这样具有相同角色的项目将被视为相等。请参阅下面的代码更改与 cmets:
def quality_maxmindist(items, key=None):
if key is None:
def identity(e): return e
key = identity
s = 0
# notice that you need to use a dictionary for finding unique items by key
for k, item in { key(item) : item for item in items}.items():
indcs = [i for i in range(len(items)) if key(items[i]) == k] # notice
if len(indcs) > 1:
s += sum(1. / (indcs[i + 1] - indcs[i]) for i in range(len(indcs) - 1))
return 1. / s
quality_fun = partial(quality_maxmindist, key=itemgetter("Role: Name"))
res = optimize(data, quality_fun)