【问题标题】:Clauset-Newman-Moore community detection algorithm in pythonpython中的Clauset-Newman-Moore社区检测算法
【发布时间】:2014-07-07 19:16:28
【问题描述】:

仅供参考:这不是家庭作业

我尝试在 python 中实现 Clauset-Newman-Moore 社区检测算法,当它运行时,它输出的模块化 (Q) 值始终偏离某些因素。完整的代码如下 - 它接受格式为 this 的文本文件。

虽然我不希望任何人提供一个完整的解决方案来修复所有代码,但非常感谢任何关于可能出错的提示!

一些可能很重要的注意事项:

  • heapq 模块中的 python 堆是一个最小堆,我需要一个最大堆用于该算法,因此 Q 的值存储为 -Q。
  • 出于同样的原因,交换 Q 值的算术运算(减法而不是加法等)

#!/usr/bin/env python

'''
Usage:
    python cnm.py <input_file> <output_file>
'''

import heapq
import sys
import time
from pprint import pprint

def read_input(filename):
    ''' Loads the input into a dictionary'''

    output_dict = {}
    with open(filename, 'r') as f:
        for line in f:
            l = line.split('\t')
            key = int(l[0].strip())
            value = int(l[1].strip())

            if key not in output_dict:
                output_dict[key] = []
            if value not in output_dict:
                output_dict[value] = []

            if value in output_dict[key]:
                pass
            else:
                output_dict[key].append(value)

            if key in output_dict[value]:
                pass
            else:
                output_dict[value].append(key)

    return output_dict


def calculate_m(input_dict):
    ''' Gives the total number of edges in the network. '''
    total = 0
    for key in input_dict:
        total += len(input_dict[key])
    return total / 2


def calculate_deltaQ(m, ki, kj):
    ''' Calculates deltaQ for two communities i and j '''
    deltaQ = 1.0/(2.0*m) - float(ki*kj) / ((2*m)**2)
    return deltaQ


def populate_Qtrees(input_dict, m):
    Qtrees = {}
    for i in input_dict:
        community = input_dict[i]
        ki = len(community)
        Qtrees[i] = {}
        for j in community:
            kj = len(input_dict[j])
            Qtrees[i][j] = calculate_deltaQ(m, ki, kj)

    return Qtrees


def populate_Qheaps(input_dict, m):
    Qheaps = {}
    for key in input_dict:
        community = input_dict[key]
        ki = len(community)
        Qheaps[key] = []
        for i in community:
            kj = len(input_dict[i])
            deltaQ = calculate_deltaQ(m, ki, kj)
            # we store the items in the heap as their negative values because
            # python heap is a min-heap
            heapq.heappush(Qheaps[key], (-deltaQ, i, key))
    return Qheaps


def populate_H(Qheaps):
    H = []
    for key in Qheaps:
        if Qheaps[key] == []:
            continue
        else:
            maximum = Qheaps[key][0]
        heapq.heappush(H, maximum)
    return H


def populate_a(input_dict, m):
    a = {}
    for key in input_dict:
        k = len(input_dict[key])
        ai = float(k) / (2.0 * m)
        a[key] = ai
    return a


def select_largest_q(H):
    return heapq.heappop(H)


def update_Qtrees(Qtrees, a, i, j):

    # from equation 10a - summing i into j
    for key in Qtrees[i]:
        if key in Qtrees[j]:
            Qtrees[j][key] = Qtrees[i][key] - Qtrees[j][key]

    # from equations 10b and 10c - update row j
    for key in Qtrees:
        if key in Qtrees[i] and key not in Qtrees[j]:
            Qtrees[j][key] = Qtrees[i][key] + (2 * a[j] * a[key])
        elif key in Qtrees[j] and key not in Qtrees[i]:
            Qtrees[j][key] = Qtrees[j][key] + (2 * a[i] * a[key])

    # remove i key and update j for each row k
    for key in Qtrees:
        if i in Qtrees[key]:
            Qtrees[key].pop(i, None)
        if j in Qtrees[key]:
            Qtrees[key][j] = Qtrees[key][j] + (2 * a[i] * a[key])

    # remove the self-reference (necessary because our tree is a python dict)
    if j in Qtrees[j]:
        Qtrees[j].pop(j, None)

    # remove i
    Qtrees.pop(i, None)

    return Qtrees


def update_Qheaps(Qtrees, Qheaps, i, j):

    # remove the heap i
    Qheaps.pop(i, None)

    # rebuild the jth heap from the jth binary tree in Qtree
    community = Qtrees[j]
    h = [ (community[key], key, j) for key in community ] # list comprehension
    heapq.heapify(h)
    Qheaps[j] = h

    # remove the ith and update the jth element in each heap
    for key in Qheaps:
        heap = Qheaps[key]
        for item in heap[:]:
            if item[1] == i:
                heap.remove(item)
                heapq.heapify(heap)
            elif item[1] == j:
                # we temporarily change the item to a list to perform insertion
                # (tuples are immutable)
                item_copy = list(item)
                heap.remove(item)
                item_copy[0] = Qtrees[key][j]
                heapq.heappush(heap, tuple(item_copy))

    return Qheaps


def update_a(a, i, j):
    a[j] += a[i]
    a[i] = 0
    return a


def main():
    ''' Main loop of the program. '''

    # read command line input
    filename = sys.argv[1]
    maxQ = 0
    max_step = 0
    Q = 0

    input_dict = read_input(filename)
    m = calculate_m(input_dict)
    nodes = len(input_dict)

    Qtrees = populate_Qtrees(input_dict, m)
    Qheaps = populate_Qheaps(input_dict, m)
    H = populate_H(Qheaps)
    a = populate_a(input_dict, m)

    step = 0
    print 'i', '\t', 'j', '\t', 'Q', '\t\t', 'deltaQ', '\t\t', 'step'

    while H:
        deltaQ, i, j = select_largest_q(H)
        Q -= deltaQ

        Qtrees = update_Qtrees(Qtrees, a, i, j)
        Qheaps = update_Qheaps(Qtrees, Qheaps, i, j)
        H = populate_H(Qheaps)
        a = update_a(a, i, j)

        step += 1

        print i, '\t', j, '\t', round(Q, 7), '\t', round(deltaQ, 7), '\t', step

        if deltaQ < 0:
            maxQ = deltaQ
            max_step = step
        else:
            pass

    output_file = sys.argv[2]
    with open(output_file, 'w+') as f:
        f.write(
'''FASTCOMMUNITY_INFERENCE_ALGORITHM in python!
START-----: {0}
---NET_STATS----
NUMNODES--: {1}
NUMEDGES--: {2}
---MODULARITY---
MAXQ------: {3}
STEP------: {4}
EXIT------: {5}'''.format(time.asctime(),
                          nodes,
                          m,
                          maxQ,
                          max_step,
                          time.asctime() ))


if __name__ == '__main__':
    main()

【问题讨论】:

    标签: python algorithm python-2.7 data-structures graph-algorithm


    【解决方案1】:

    我担心的一件事是:

    heap.remove(item)
    item_copy[0] = Qtrees[key][j]
    heapq.heappush(heap, tuple(item_copy))
    

    heap.remove(item) 将从名为 heap 的列表中删除项目 - 并销毁堆不变量。

    换句话说,在这一步之后,您的名为 heap 的变量可能不再是堆。

    也许打电话会有所帮助

    heapq.heapify(heap)
    

    在 heap.remove(item) 之后。

    【讨论】:

    • 谢谢!出于某种原因,我的印象是heapq.heappush(heap, tuple(item_copy)) 会重新堆放堆。悬停,结果还是不行……
    【解决方案2】:

    这看起来像很多堆。解决当前问题后,使用平衡二叉搜索树可能会更好。运行较慢的代码比运行较快的代码要好。

    1. heapq 模块中的 python 堆是一个最小堆,我需要一个最大堆用于该算法,因此 Q 的值存储为 -Q。
    2. 出于同样的原因,交换 Q 值的算术运算(减法而不是加法等)

    第二个问题是由第一个问题引起的。如果 Q 存储为 -Q,则称其为 q。那么 A + q 实际上是 A + (-Q),即 A + Q。这可能是您的问题。最好的解决方法可能是使用 -Q 来撤消符号更改。

    【讨论】:

      【解决方案3】:

      我认为你的代码 -

      删除 i 键并更新每行 k 的 j

      for key in Qtrees:
          if i in Qtrees[key]:
              Qtrees[key].pop(i, None)
          if j in Qtrees[key]:
              Qtrees[key][j] = Qtrees[key][j] + (2 * a[i] * a[key])
      

      更新 Qtree 是个问题,根据论文没有提到要执行此代码 -

      如果 j 在 Qtrees[key] 中:

          Qtrees[key][j] = Qtrees[key][j] + (2 * a[i] * a[key])
      

      您应该删除此代码并尝试。

      【讨论】:

        猜你喜欢
        • 2013-12-24
        • 1970-01-01
        • 1970-01-01
        • 2020-07-07
        • 1970-01-01
        • 2018-08-15
        • 2012-03-17
        • 2015-05-11
        • 2015-06-03
        相关资源
        最近更新 更多