Source code for networkx.algorithms.sparsifiers

"""用于计算图稀疏化器的函数。"""

import math

import networkx as nx
from networkx.utils import not_implemented_for, py_random_state

__all__ = ["spanner"]


[docs] @not_implemented_for("directed") @not_implemented_for("multigraph") @py_random_state(3) @nx._dispatchable(edge_attrs="weight", returns_graph=True) def spanner(G, stretch, weight=None, seed=None): """返回具有给定伸展度的给定图的稀疏图。 图 G = (V, E) 的伸展度为 t 的稀疏图是一个子图 H = (V, E_S),其中 E_S 是 E 的子集,并且 H 中任意一对节点之间的距离最多是 G 中节点之间距离的 t 倍。 Parameters ---------- G : NetworkX 图 一个无向简单图。 stretch : float 稀疏图的伸展度。 weight : 对象 用作距离的边属性。 seed : 整数,random_state,或 None(默认) 随机数生成状态的指示器。 参见 :ref:`Randomness<randomness>` 。 Returns ------- NetworkX 图 具有给定伸展度的给定图的稀疏图。 Raises ------ ValueError 如果给定的伸展度小于 1。 Notes ----- 此函数实现了 Baswana 和 Sen 的稀疏图算法, 参见 [1]。 该算法是一个随机化的拉斯维加斯算法:期望的运行时间是 O(km),其中 k = (stretch + 1) // 2,m 是 G 中的边数。返回的图始终是具有指定伸展度的给定图的稀疏图。对于加权图,稀疏图中的边数是 O(k * n^(1 + 1 / k)),其中 k 如上定义,n 是 G 中的节点数。对于无权图,边数是 O(n^(1 + 1 / k) + kn)。 References ---------- [1] S. Baswana, S. Sen. 一种简单且线性时间随机化算法,用于在加权图中计算稀疏图。 Random Struct. Algorithms 30(4): 532-563 (2007). """ if stretch < 1: raise ValueError("stretch must be at least 1") k = (stretch + 1) // 2 # initialize spanner H with empty edge set H = nx.empty_graph() H.add_nodes_from(G.nodes) # phase 1: forming the clusters # the residual graph has V' from the paper as its node set # and E' from the paper as its edge set residual_graph = _setup_residual_graph(G, weight) # clustering is a dictionary that maps nodes in a cluster to the # cluster center clustering = {v: v for v in G.nodes} sample_prob = math.pow(G.number_of_nodes(), -1 / k) size_limit = 2 * math.pow(G.number_of_nodes(), 1 + 1 / k) i = 0 while i < k - 1: # step 1: sample centers sampled_centers = set() for center in set(clustering.values()): if seed.random() < sample_prob: sampled_centers.add(center) # combined loop for steps 2 and 3 edges_to_add = set() edges_to_remove = set() new_clustering = {} for v in residual_graph.nodes: if clustering[v] in sampled_centers: continue # step 2: find neighboring (sampled) clusters and # lightest edges to them lightest_edge_neighbor, lightest_edge_weight = _lightest_edge_dicts( residual_graph, clustering, v ) neighboring_sampled_centers = ( set(lightest_edge_weight.keys()) & sampled_centers ) # step 3: add edges to spanner if not neighboring_sampled_centers: # connect to each neighboring center via lightest edge for neighbor in lightest_edge_neighbor.values(): edges_to_add.add((v, neighbor)) # remove all incident edges for neighbor in residual_graph.adj[v]: edges_to_remove.add((v, neighbor)) else: # there is a neighboring sampled center closest_center = min( neighboring_sampled_centers, key=lightest_edge_weight.get ) closest_center_weight = lightest_edge_weight[closest_center] closest_center_neighbor = lightest_edge_neighbor[closest_center] edges_to_add.add((v, closest_center_neighbor)) new_clustering[v] = closest_center # connect to centers with edge weight less than # closest_center_weight for center, edge_weight in lightest_edge_weight.items(): if edge_weight < closest_center_weight: neighbor = lightest_edge_neighbor[center] edges_to_add.add((v, neighbor)) # remove edges to centers with edge weight less than # closest_center_weight for neighbor in residual_graph.adj[v]: nbr_cluster = clustering[neighbor] nbr_weight = lightest_edge_weight[nbr_cluster] if ( nbr_cluster == closest_center or nbr_weight < closest_center_weight ): edges_to_remove.add((v, neighbor)) # check whether iteration added too many edges to spanner, # if so repeat if len(edges_to_add) > size_limit: # an iteration is repeated O(1) times on expectation continue # iteration succeeded i = i + 1 # actually add edges to spanner for u, v in edges_to_add: _add_edge_to_spanner(H, residual_graph, u, v, weight) # actually delete edges from residual graph residual_graph.remove_edges_from(edges_to_remove) # copy old clustering data to new_clustering for node, center in clustering.items(): if center in sampled_centers: new_clustering[node] = center clustering = new_clustering # step 4: remove intra-cluster edges for u in residual_graph.nodes: for v in list(residual_graph.adj[u]): if clustering[u] == clustering[v]: residual_graph.remove_edge(u, v) # update residual graph node set for v in list(residual_graph.nodes): if v not in clustering: residual_graph.remove_node(v) # phase 2: vertex-cluster joining for v in residual_graph.nodes: lightest_edge_neighbor, _ = _lightest_edge_dicts(residual_graph, clustering, v) for neighbor in lightest_edge_neighbor.values(): _add_edge_to_spanner(H, residual_graph, v, neighbor, weight) return H
def _setup_residual_graph(G, weight): """设置残余图作为G的副本,具有唯一的边权重。 残余图的节点集对应于Baswana-Sen论文中的集合V',边集对应于论文中的集合E'。 此函数为残余图的边分配不同的权重(即使是无权重的输入图),这是算法所要求的。 Parameters ---------- G : NetworkX图 一个无向简单图。 weight : 对象 用作距离的边属性。 Returns ------- NetworkX图 用于Baswana-Sen算法的残余图。 """ residual_graph = G.copy() # establish unique edge weights, even for unweighted graphs for u, v in G.edges(): if not weight: residual_graph[u][v]["weight"] = (id(u), id(v)) else: residual_graph[u][v]["weight"] = (G[u][v][weight], id(u), id(v)) return residual_graph def _lightest_edge_dicts(residual_graph, clustering, node): """找到每个簇的最轻边。 搜索与给定节点相邻的每个簇的最小权重边。 Parameters ---------- residual_graph : NetworkX 图 由 Baswana-Sen 算法使用的残差图。 clustering : 字典 节点的当前聚类。 node : 节点 搜索起始的节点。 Returns ------- lightest_edge_neighbor, lightest_edge_weight : 字典, 字典 lightest_edge_neighbor 是一个字典,将中心 C 映射到相应簇中的节点 v,使得从给定节点到 v 的边是从给定节点到簇中任何节点的最轻边。lightest_edge_weight 将中心 C 映射到上述边的权重。 Notes ----- 如果在残差图中没有与给定节点相邻的簇节点,则该簇的中心不是返回字典中的键。 """ lightest_edge_neighbor = {} lightest_edge_weight = {} for neighbor in residual_graph.adj[node]: nbr_center = clustering[neighbor] weight = residual_graph[node][neighbor]["weight"] if ( nbr_center not in lightest_edge_weight or weight < lightest_edge_weight[nbr_center] ): lightest_edge_neighbor[nbr_center] = neighbor lightest_edge_weight[nbr_center] = weight return lightest_edge_neighbor, lightest_edge_weight def _add_edge_to_spanner(H, residual_graph, u, v, weight): """将边 {u, v} 添加到扩展图 H 中,并从残余图中获取权重。 Parameters ---------- H : NetworkX 图 正在构建的扩展图。 residual_graph : NetworkX 图 由 Baswana-Sen 算法使用的残余图。边的权重从此图中获取。 u : 节点 边的一个端点。 v : 节点 边的另一个端点。 weight : 对象 用作距离的边属性。 """ H.add_edge(u, v) if weight: H[u][v][weight] = residual_graph[u][v]["weight"][0]