Source code for networkx.utils.random_sequence

"""
生成随机数、随机序列和随机选择的实用工具。
"""

import networkx as nx
from networkx.utils import py_random_state

__all__ = [
    "powerlaw_sequence",
    "zipf_rv",
    "cumulative_distribution",
    "discrete_sequence",
    "random_weighted_sample",
    "weighted_choice",
]


# The same helpers for choosing random sequences from distributions
# uses Python's random module
# https://docs.python.org/3/library/random.html



[docs]
@py_random_state(2)
def powerlaw_sequence(n, exponent=2.0, seed=None):
    """
    返回从幂律分布中抽取的长度为n的样本序列。
    """
    return [seed.paretovariate(exponent - 1) for i in range(n)]




[docs]
@py_random_state(2)
def zipf_rv(alpha, xmin=1, seed=None):
    r"""返回从Zipf分布中随机选择的值。

返回值是从概率分布中抽取的整数

.. math::

    p(x)=\frac{x^{-\alpha}}{\zeta(\alpha, x_{\min})},

其中$\zeta(\alpha, x_{\min})$是Hurwitz zeta函数。

Parameters
----------
alpha : float
  分布的指数值
xmin : int
  最小值
seed : integer, random_state, 或 None (默认)
  随机数生成状态的指示器。
  参见 :ref:`随机性<randomness>` 。

Returns
-------
x : int
  Zipf分布的随机值

Raises
------
ValueError:
  如果 xmin < 1 或
  如果 alpha <= 1

Notes
-----
拒绝算法生成依赖于参数的均匀有界预期时间的幂律分布的随机值。有关其操作的详细信息，请参见[1]_。

Examples
--------
>>> nx.utils.zipf_rv(alpha=2, xmin=3, seed=42)
8

References
----------
.. [1] Luc Devroye, 非均匀随机变量生成,
   Springer-Verlag, 纽约, 1986.
"""
    if xmin < 1:
        raise ValueError("xmin < 1")
    if alpha <= 1:
        raise ValueError("a <= 1.0")
    a1 = alpha - 1.0
    b = 2**a1
    while True:
        u = 1.0 - seed.random()  # u in (0,1]
        v = seed.random()  # v in [0,1)
        x = int(xmin * u ** -(1.0 / a1))
        t = (1.0 + (1.0 / x)) ** a1
        if v * x * (t - 1.0) / (b - 1.0) <= t / b:
            break
    return x




[docs]
def cumulative_distribution(distribution):
    """返回从离散分布归一化的累积分布。"""

    cdf = [0.0]
    psum = sum(distribution)
    for i in range(len(distribution)):
        cdf.append(cdf[i] + distribution[i] / psum)
    return cdf




[docs]
@py_random_state(3)
def discrete_sequence(n, distribution=None, cdistribution=None, seed=None):
    """返回从给定离散分布或离散累积分布中抽取的长度为 n 的样本序列。

必须指定以下之一：

distribution = 值的直方图，将被归一化

cdistribution = 归一化的离散累积分布
"""
    import bisect

    if cdistribution is not None:
        cdf = cdistribution
    elif distribution is not None:
        cdf = cumulative_distribution(distribution)
    else:
        raise nx.NetworkXError(
            "discrete_sequence: distribution or cdistribution missing"
        )

    # get a uniform random number
    inputseq = [seed.random() for i in range(n)]

    # choose from CDF
    seq = [bisect.bisect_left(cdf, s) - 1 for s in inputseq]
    return seq




[docs]
@py_random_state(2)
def random_weighted_sample(mapping, k, seed=None):
    """返回从加权样本中无放回地抽取的k个项目。

输入是一个以项目为键、权重为值的字典。
"""
    if k > len(mapping):
        raise ValueError("sample larger than population")
    sample = set()
    while len(sample) < k:
        sample.add(weighted_choice(mapping, seed))
    return list(sample)




[docs]
@py_random_state(1)
def weighted_choice(mapping, seed=None):
    """从加权样本中返回一个单一元素。

输入是一个以权重为值的项的字典。
"""
    # use roulette method
    rnd = seed.random() * sum(mapping.values())
    for k, w in mapping.items():
        rnd -= w
        if rnd < 0:
            return k