Sampling from an image

import os, numpy
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import time
numpy.random.seed(seed=int(time.time()))

def clamp(num, low, high):
    return max(low, min(num, high))

# ---------------- 1d methods ---------------- #

def find_interval(cdf, pred):
    first = 0
    l = len(cdf)
    while(l > 0):
        half = l >> 1
        mid = first + half
        if (pred(mid)):
            first = mid + 1
            l -= half + 1
        else:
            l = half
    return clamp(first - 1, 0, len(cdf) - 2)

def sample_c_1d(cdf, u, off = None):
    offset = find_interval(cdf, lambda idx: cdf[idx] <= u)
    if off is not None:
        off[0] = offset
    du = u - cdf[offset]
    if ((cdf[offset + 1] - cdf[offset]) > 0):
        du /= cdf[offset + 1] - cdf[offset]
    return (offset + du) / (len(cdf) - 1)

# ---------------- input ---------------- #

filename = 'dining-room-grey.raw'
img_data = numpy.fromfile(os.path.join(os.getcwd(), filename), dtype='uint8', sep="")
img_h = 3072 # row
img_w = 6144 # col
img_data = img_data.reshape([img_h, img_w]) # (row, col)

gather_l = 72
nu = int(img_w / float(gather_l) ) # col
nv = int(img_h / float(gather_l)) # row
print('func dimension nv(row):', nv, ', nu(col):', nu)

# downscale the image

func = []
g2 = gather_l * gather_l
for i in range(0, nv):
    for j in range(0, nu):
        sum = 0
        for ii in range(0, gather_l):
            for jj in range(0, gather_l):
                sum += img_data[i * gather_l + ii][ j * gather_l + jj]
        func.append(sum / g2)

# ---------------- 2d methods ---------------- #

conditional_cdf_v = []
marginal_func = []
marginal_cdf = []

def process_cdf_1d(func_in, begin, n, cdf_out, integral_out = None):
    cdf_out[:]
    cdf_out.append(0)
    for i in range(1, n + 1):
        cdf_out.append(cdf_out[i - 1] + func_in[begin + i - 1] / n)
    integral = cdf_out[n]
    if integral_out is not None:
        integral_out[0] = integral
    if (integral == 0):
        for i in range(1, n + 1):
            cdf_out[i] = float(i) / float(n)
    else:
        for i in range(1, n + 1):
            cdf_out[i] /= integral

for v in range(0, nv): # for each row
    cdf = []
    m = [0]
    process_cdf_1d(func, v * nu, nu, cdf, m)
    conditional_cdf_v.append(cdf)
    marginal_func.append(m[0])
process_cdf_1d(marginal_func, 0, nv, marginal_cdf)

# ---------------- sampling ---------------- #

num_samples = 80000

def sample_c_2d(pair):
    v = [0]
    data_v = sample_c_1d(marginal_cdf, pair[1], v)
    data_u = sample_c_1d(conditional_cdf_v[v[0]], pair[0])
    return [data_v, data_u] # row, col

data = []
for i in range(0, num_samples):
    d = sample_c_2d([numpy.random.random(),
                     numpy.random.random()])
    d[0] *= nv # row
    d[1] *= nu # col
    data.append(d)

# ---------------- plot ---------------- #

plot_input = True
plot_cdf = True
plot_samples = True

# plot input

if plot_input:
    plt.figure(figsize=(10, 9))
    plt.subplot(211)
    plt.imshow(img_data, cmap='gray')
    plt.xlabel('col')
    plt.ylabel('row')
    plt.title('input image')

    plt.subplot(212)
    plt.imshow(numpy.array(func).reshape(nv, nu), cmap='gray')
    plt.xlabel('u')
    plt.ylabel('v')
    plt.title('downscaled image')

# plot conditional and marginal

if plot_cdf:
    fig = plt.figure(figsize=(12,5))
    gs = gridspec.GridSpec(1, 2, width_ratios=[3.5, 1.5])

    ax = fig.add_subplot(gs[0])
    ax.tick_params(axis='both', labelsize = 6)
    x = range(0, nu + 1)
    for i in range(0, nv):
        plt.plot(x, conditional_cdf_v[i], '.--', label='v = ' + str(i))
    plt.xlabel('possible values')
    plt.ylabel('probability')
    plt.title('conditional CDF')
    plt.legend(framealpha=.2, prop={'size': 6})

    # plot marginal pdf
    ax = fig.add_subplot(gs[1])
    ax.tick_params(axis='both', labelsize = 6)
    for i in range(0, nv):
        x = range(i, i + 1)
        plt.plot(x, marginal_cdf[i], '.', label=('v = ' + str(i)))
    plt.xlabel('v')
    plt.ylabel('probability')
    plt.title('marginal CDF')

if plot_samples:
    plt.figure(figsize=(10, 5))
    plt.tick_params(axis='both', labelsize = 6)
    x = [d[1] for d in data], # x, col
    y = [d[0] for d in data] # y, row
    plt.scatter(x, y, marker='.', color=(.4, .4, .4), s=.5)
    plt.ylim(nv, 0)
    plt.title('sample values (uv coords)')
    plt.xlabel('u')
    plt.ylabel('v')

if plot_input or plot_cdf or plot_samples:
    plt.tight_layout()
    plt.show()

Attribution:

Result:

Brighter areas receive more samples.