Sampling from 2D distribution

# this program follows the pbrt Distribution2D class methods
import numpy
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

def clamp(num, low, high):
    return max(low, min(num, high))

def process_cdf_1d(pdf_data, begin, n, cdf_out, integral_out = None):
    cdf_out[:]
    cdf_out.append(0)
    for i in range(1, n + 1):
        cdf_out.append(cdf_out[i - 1] + pdf_data[begin + i - 1])
    integral = cdf_out[n]
    if integral_out is not None:
        integral_out[0] = integral
    if (integral == 0):
        for i in range(1, n + 1):
            cdf_out[i] = float(i) / float(n)
    else:
        for i in range(1, n + 1):
            cdf_out[i] /= integral

def find_interval(cdf, pred):
    first = 0
    l = len(cdf)
    while(l > 0):
        half = l >> 1
        mid = first + half
        if (pred(mid)):
            first = mid + 1
            l -= half + 1
        else:
            l = half
    return clamp(first - 1, 0, len(cdf) - 2)

def sample_c_1d(cdf, u, off = None):
    offset = find_interval(cdf, lambda idx: cdf[idx] <= u)
    if off is not None:
        off[0] = offset
    du = u - cdf[offset]
    if ((cdf[offset + 1] - cdf[offset]) > 0):
        du /= cdf[offset + 1] - cdf[offset]
    return (offset + du) / (len(cdf) - 1)

################################################################

pdf = [0.01, 0.18, 0.07, 0.13, 0.06,
       0.02, 0.08, 0.25, 0.01, 0.19]
nu = 5 # col
nv = 2 # row

num_samples = 4000

conditional_cdf_v = [] # [nv + 1][nu]
marginal_pdf = [] # [nv]
marginal_cdf = [] # [nv + 1]

# get conditional and marginal
for v in range(0, nv):
    cdf = []
    m = [0]
    process_cdf_1d(pdf, v * nu, nu, cdf, m)
    conditional_cdf_v.append(cdf)
    marginal_pdf.append(m[0])

process_cdf_1d(marginal_pdf, 0, nv, marginal_cdf)

def sample_c_2d(pair):
    v = [0]
    data_v = sample_c_1d(marginal_cdf, pair[1], v)
    data_u = sample_c_1d(conditional_cdf_v[v[0]], pair[0])
    return [data_u, data_v]

# sample
data = []
for i in range(0, num_samples):
    d = sample_c_2d([numpy.random.random(),
                     numpy.random.random()])
    d[0] *= nu
    d[1] *= nv
    data.append(d)

################################################################

fig = plt.figure(figsize=(8,8))
gs = gridspec.GridSpec(2, 2, width_ratios=[3.5, 1.5], height_ratios=[1.5, 1.5])

# plot conditional
ax = fig.add_subplot(gs[0])
ax.tick_params(axis='both', labelsize = 6)
x = range(0, nu + 1)
for i in range(0, nv):
    plt.plot(x, conditional_cdf_v[i], 'o--', label='v = ' + str(i))
plt.xlabel('possible values')
plt.ylabel('probability')
plt.title('conditional CDF')
plt.legend(loc='best', framealpha=1)

# plot marginal pdf
ax = fig.add_subplot(gs[1])
ax.tick_params(axis='both', labelsize = 6)
for i in range(0, nv):
    x = range(i, i + 1)
    plt.plot(x, marginal_pdf[i], 'o', label=('v = ' + str(i)))
plt.xlabel('v')
plt.ylabel('probability')
plt.title('marginal PDF')
plt.legend(framealpha=1)

# plot samples
ax = fig.add_subplot(gs[2])
ax.tick_params(axis='both', labelsize = 6)
ax.scatter([d[0] for d in data],
           [d[1] for d in data], marker='.', color='g', s=1)
plt.xlabel('u')
plt.ylabel('v')
plt.title('sample values')

plt.tight_layout()
plt.show()

Steps:

  • Use the 2 dimensional pdf $p(u, v)$ to solve for an array of conditional CDF $P(u | v)$ (cumulative probability density of $u$ given $v$), each of which provides an integral of probability. The marginal CDF $P(v)$ (average density of $v$ given all $u$) can be solved using these integrals.
  • The mariginal CDF $P(v)$ can then be used to sample $v$, providing a density distribution offset, which is the index of the conditional CDF in the CDF array to be used to sample $u$.

Result: