Note
Go to the end to download the full example code.
Unix 邮件#
从Unix邮箱创建一个允许多重边和自环的有向图。节点是电子邮件地址,链接从发送者指向接收者。边数据是一个包含所有电子邮件消息数据的Python email.Message对象。
这个例子展示了`DiGraph`能够保存任意Python对象(在这个例子中是一个电子邮件消息列表)的边数据的能力。
示例Unix邮件邮箱文件名为”unix_email.mbox”,可以在以下链接找到:
From: alice@edu To: bob@gov Subject: NetworkX
From: bob@gov To: alice@edu Subject: Re: NetworkX
From: bob@gov To: ted@com Subject: Re: Graph package in Python?
From: ted@com To: bob@gov Subject: Graph package in Python?
From: ted@com To: bob@gov Subject: get together for lunch to discuss Networks?
From: ted@com To: carol@gov Subject: get together for lunch to discuss Networks?
From: ted@com To: alice@edu Subject: get together for lunch to discuss Networks?
from email.utils import getaddresses, parseaddr
import mailbox
import matplotlib.pyplot as plt
import networkx as nx
# Unix邮箱配方
# 参见 https://docs.python.org/3/library/mailbox.html
def mbox_graph():
mbox = mailbox.mbox("unix_email.mbox") # 解析Unix邮箱
G = nx.MultiDiGraph() # 创建空图
# 解析每条消息并构建图
for msg in mbox: # msg 是 python email.Message.Message 对象
(source_name, source_addr) = parseaddr(msg["From"]) # 发送者
# 获取所有收件人
# 参见 https://docs.python.org/3/library/email.html
tos = msg.get_all("to", [])
ccs = msg.get_all("cc", [])
resent_tos = msg.get_all("resent-to", [])
resent_ccs = msg.get_all("resent-cc", [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
# 现在为这封邮件消息添加边
for target_name, target_addr in all_recipients:
G.add_edge(source_addr, target_addr, message=msg)
return G
G = mbox_graph()
# 打印带有消息主题的边
for u, v, d in G.edges(data=True):
print(f"From: {u} To: {v} Subject: {d['message']['Subject']}")
pos = nx.spring_layout(G, iterations=10, seed=227)
nx.draw(G, pos, node_size=0, alpha=0.4, edge_color="r", font_size=16, with_labels=True)
ax = plt.gca()
ax.margins(0.08)
plt.show()
Total running time of the script: (0 minutes 0.029 seconds)