这是一种方法,但不是最有效的。
使用binary indexed tree作为前缀和可以提高效率。
最高级别的功能。它迭代k 次,每次调用wchoice()。要从总体中删除当前选定的成员,我只需将其权重设置为 0。
/**
* Produces a weighted sample from `population` of size `k` without replacement.
*
* @param {Object[]} population The population to select from.
* @param {number[]} weights The weighted values of the population.
* @param {number} k The size of the sample to return.
* @returns {[number[], Object[]]} An array of two arrays. The first holds the
* indices of the members in the sample, and
* the second holds the sample members.
*/
function wsample(population, weights, k) {
let sample = [];
let indices = [];
let index = 0;
let choice = null;
let acmwts = accumulate(weights);
for (let i=0; i < k; i++) {
[index, choice] = wchoice(population, acmwts, true);
sample.push(choice);
indices.push(index);
// The below updates the accumulated weights as if the member
// at `index` has a weight of 0, eliminating it from future draws.
// This portion could be optimized. See note below.
let ndecr = weights[index];
for (; index < acmwts.length; index++) {
acmwts[index] -= ndecr;
}
}
return [indices, sample];
}
上面更新累积权重数组的代码部分是算法效率低下的地方。最坏的情况是 O(n - ?) 在每次通过时更新。这里的另一个解决方案遵循与此类似的算法,但使用binary indexed tree 来降低将前缀和更新为O(log n) 操作的成本。
wsample() 调用 wchoice() 从加权列表中选择一个成员。 wchoice() 生成一个累积权重数组,生成一个从 0 到权重总和的随机数(累积权重列表中的最后一项)。然后在累积权重中找到它的插入点;谁是赢家:
/**
* Randomly selects a member of `population` weighting the probability each
* will be selected using `weights`. `accumulated` indicates whether `weights`
* is pre-accumulated, in which case it will skip its accumulation step.
*
* @param {Object[]} population The population to select from.
* @param {number[]} weights The weights of the population.
* @param {boolean} [accumulated] true if weights are pre-accumulated.
* Treated as false if not provided.
* @returns {[number, Object]} An array with the selected member's index and
* the member itself.
*/
function wchoice(population, weights, accumulated) {
let acm = (accumulated) ? weights : accumulate(weights);
let rnd = Math.random() * acm[acm.length - 1];
let idx = bisect_left(acm, rnd);
return [idx, population[idx]];
}
这是我改编自https://en.wikipedia.org/wiki/Binary_search_algorithm的二分搜索算法的JS实现
/**
* Finds the left insertion point for `target` in array `arr`. Uses a binary
* search algorithm.
*
* @param {number[]} arr A sorted ascending array.
* @param {number} target The target value.
* @returns {number} The index in `arr` where `target` can be inserted to
* preserve the order of the array.
*/
function bisect_left(arr, target) {
let n = arr.length;
let l = 0;
let r = n - 1;
while (l <= r) {
let m = Math.floor((l + r) / 2);
if (arr[m] < target) {
l = m + 1;
} else if (arr[m] >= target) {
r = m - 1;
}
}
return l;
}
没有找到现成的 JS 累加器函数,所以我自己写了一个简单的。
/**
* Generates an array of accumulated values for `numbers`.
* e.g.: [1, 5, 2, 1, 5] --> [1, 6, 8, 9, 14]
*
* @param {number[]} numbers The numbers to accumulate.
* @returns {number[]} An array of accumulated values.
*/
function accumulate(numbers) {
let accm = [];
let total = 0;
for (let n of numbers) {
total += n;
accm.push(total)
}
return accm;
}