欢迎来到 Stack Overflow。当我运行您的代码时,我没有收到任何错误,但由于没有任何重复,因此两个数据集的计数结果均为 0。所以我改变了你的输入数据:
iOS_list = [
["Village", "did", "removed", "enjoyed", "explain", "nor", "ham", "saw", "calling", "talking."],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["feelings", "own", "shy.", "Request", "norland", "neither", "mistake", "for", "yet.", "Between"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["unpacked", "delicate.", "Why", "sir", "end", "believe", "uncivil", "respect.", "Always", "get"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["adieus", "nature", "day", "course", "for", "common.", "My", "little", "garret", "repair"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["For", "who", "thoroughly", "her", "boy", "estimating", "conviction.", "Removed", "demands", "expense"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["admire", "in", "giving.", "See", "resolved", "goodness", "felicity", "shy", "civility", "domestic"],
["had", "but.", "Drawings", "offended", "yet", "answered", "jennings", "perceive", "laughing", "six"]
]
play_apps = [
["We", "diminution", "preference", "thoroughly", "if.", "Joy", "deal", "pain", "view", "much"],
["her", "time.", "Led", "young", "gay", "would", "now", "state.", "Pronounce", "we"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["attention", "admitting", "on", "assurance", "of", "suspicion", "conveying.", "That", "his", "west"],
["quit", "had", "met", "till.", "Of", "advantage", "he", "attending", "household", "at"],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["dining", "so", "he", "my", "around", "to.", "Increasing", "impression", "interested", "expression"],
["he", "my", "at.", "Respect", "invited", "request", "charmed", "me", "warrant", "to."],
["Expect", "no", "pretty", "as", "do", "though", "so", "genius", "afraid", "cousin."],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["Girl", "when", "of", "ye", "snug", "poor", "draw.", "Mistake", "totally", "of"],
["in", "chiefly.", "Justice", "visitor", "him", "entered", "for.", "Continue", "delicate", "as"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["being", "style", "house.", "An", "whom", "down", "kept", "lain", "name", "so"],
]
def find_duplicates(listy, index):
unique_apps = [] # list of all app names
duplicate_apps = [] # list of known duplicate app names
for app in listy:
name = app[index]
if name in unique_apps:
duplicate_apps.append(name)
elif name not in unique_apps:
unique_apps.append(name)
print('Count of duplicate apps in data set:',len(duplicate_apps))
print('\n')
print('Sample of duplicate apps in data set:', duplicate_apps[:3])
print('\n')
find_duplicates(play_apps, 0)
find_duplicates(iOS_list, 2)
它仍在运行,没有任何问题,所以如果您报告了错误,我认为您可能需要检查您的输入数据。我认为您可能有一个列表(在您的列表中)其中没有 3 个元素(因为您正在查看索引 2)
但我也发现了可能的错误:在您的代码中,您将重复的应用名称与唯一的应用名称分开。但是,如果一个名字出现了 5 次,则一个副本将在 unique_apps 中,四个副本将在 duplicate_apps 中。我可能错了,但我认为您希望该名称仅在 duplicate_apps 中出现一次。如果这是您想要的,您可以通过使用 set 代替列表来对您的代码进行最小的更改 duplicate_apps,这样您就不会有重复项。
iOS_list = [
["Village", "did", "removed", "enjoyed", "explain", "nor", "ham", "saw", "calling", "talking."],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["feelings", "own", "shy.", "Request", "norland", "neither", "mistake", "for", "yet.", "Between"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["unpacked", "delicate.", "Why", "sir", "end", "believe", "uncivil", "respect.", "Always", "get"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["adieus", "nature", "day", "course", "for", "common.", "My", "little", "garret", "repair"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["For", "who", "thoroughly", "her", "boy", "estimating", "conviction.", "Removed", "demands", "expense"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["admire", "in", "giving.", "See", "resolved", "goodness", "felicity", "shy", "civility", "domestic"],
["had", "but.", "Drawings", "offended", "yet", "answered", "jennings", "perceive", "laughing", "six"]
]
play_apps = [
["We", "diminution", "preference", "thoroughly", "if.", "Joy", "deal", "pain", "view", "much"],
["her", "time.", "Led", "young", "gay", "would", "now", "state.", "Pronounce", "we"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["attention", "admitting", "on", "assurance", "of", "suspicion", "conveying.", "That", "his", "west"],
["quit", "had", "met", "till.", "Of", "advantage", "he", "attending", "household", "at"],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["dining", "so", "he", "my", "around", "to.", "Increasing", "impression", "interested", "expression"],
["he", "my", "at.", "Respect", "invited", "request", "charmed", "me", "warrant", "to."],
["Expect", "no", "pretty", "as", "do", "though", "so", "genius", "afraid", "cousin."],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["Girl", "when", "of", "ye", "snug", "poor", "draw.", "Mistake", "totally", "of"],
["in", "chiefly.", "Justice", "visitor", "him", "entered", "for.", "Continue", "delicate", "as"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["being", "style", "house.", "An", "whom", "down", "kept", "lain", "name", "so"],
]
def find_duplicates(listy, index):
unique_apps = [] # list of all app names
duplicate_apps = set() # list of known duplicate app names
for app in listy:
name = app[index]
if name in unique_apps:
duplicate_apps.add(name)
elif name not in unique_apps:
unique_apps.append(name)
print('Count of duplicate apps in data set:',len(duplicate_apps))
print('\n')
print('Sample of duplicate apps in data set:', list(duplicate_apps)[:3])
print('\n')
find_duplicates(play_apps, 0)
find_duplicates(iOS_list, 2)
但我们可以通过使用generator expression、Counter、list comprehension 和random.sample() 函数来简化您的代码。
import collections
import random
iOS_list = [
["Village", "did", "removed", "enjoyed", "explain", "nor", "ham", "saw", "calling", "talking."],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["feelings", "own", "shy.", "Request", "norland", "neither", "mistake", "for", "yet.", "Between"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["unpacked", "delicate.", "Why", "sir", "end", "believe", "uncivil", "respect.", "Always", "get"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["adieus", "nature", "day", "course", "for", "common.", "My", "little", "garret", "repair"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["For", "who", "thoroughly", "her", "boy", "estimating", "conviction.", "Removed", "demands", "expense"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["admire", "in", "giving.", "See", "resolved", "goodness", "felicity", "shy", "civility", "domestic"],
["had", "but.", "Drawings", "offended", "yet", "answered", "jennings", "perceive", "laughing", "six"]
]
play_apps = [
["We", "diminution", "preference", "thoroughly", "if.", "Joy", "deal", "pain", "view", "much"],
["her", "time.", "Led", "young", "gay", "would", "now", "state.", "Pronounce", "we"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["attention", "admitting", "on", "assurance", "of", "suspicion", "conveying.", "That", "his", "west"],
["quit", "had", "met", "till.", "Of", "advantage", "he", "attending", "household", "at"],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["dining", "so", "he", "my", "around", "to.", "Increasing", "impression", "interested", "expression"],
["he", "my", "at.", "Respect", "invited", "request", "charmed", "me", "warrant", "to."],
["Expect", "no", "pretty", "as", "do", "though", "so", "genius", "afraid", "cousin."],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["Girl", "when", "of", "ye", "snug", "poor", "draw.", "Mistake", "totally", "of"],
["in", "chiefly.", "Justice", "visitor", "him", "entered", "for.", "Continue", "delicate", "as"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["being", "style", "house.", "An", "whom", "down", "kept", "lain", "name", "so"],
]
def find_duplicates(source_list, name_location):
names = (current_app[name_location] for current_app in source_list)
counts = collections.Counter(names)
duplicates = [name for (name, count) in counts.items() if count > 1]
duplicates_count = len(duplicates)
sample = random.sample(duplicates, min(3, duplicates_count))
print('Count of duplicate apps in data set:', duplicates_count, '\n')
print('Sample of duplicate apps in data set:', sample, '\n')
find_duplicates(play_apps, 0)
find_duplicates(iOS_list, 2)
你满意吗?