Source code for giant.utilities.random_combination
"""
Provides an iterator for generating unique random combinations from a population where order doesn't matter.
This is useful for performing RANSAC (Random Sample Consensus) analysis, a method for robust fitting of models
in the presence of outliers. It ensures that the same sample sets are not chosen multiple times, improving
the efficiency of the RANSAC algorithm.
"""
from typing import Union, Iterator, Sequence, Any
from itertools import combinations
from random import sample
from scipy.special import comb
from giant._typing import ARRAY_LIKE, BasicSequenceProtocol
[docs]
class RandomCombinations:
"""
Iterate over `number_of_combos` random combinations of `combo_length` from `population`.
This iterator ensures unique combinations are returned. If more combinations are requested than are possible,
then an exhaustive list is returned. This is particularly useful for RANSAC analysis, where we need to
generate multiple unique subsets of data points to fit models.
The population that combinations are coming from can either be provided directly, or an integer can be provided to
create a range based population.
For example:
>>> from giant.utilities.random_combination import RandomCombination
>>> # works for an integer sequence
>>> for sample in RandomCombination(10, 2, 3): print(sample)
(2, 6)
(1, 7)
(1, 8)
>>> # works for any sequence like
>>> for sample in RandomCombination('abcdefghijk', 2, 3): print(sample)
('c', 'k')
('d', 'j')
('g', 'j')
>>> # returns an exhaustive set if there are more combos requested than can be made uniquely
>>> for sample in RandomCombination(10, 2, 3): print(sample)
(0, 1)
(0, 2)
(1, 2)
Note that the type of whatever is contained in the sequence like object must support ordering.
"""
population: BasicSequenceProtocol
int_population: Sequence[int]
def __init__(self, population: Union[int, BasicSequenceProtocol], combo_length: int, number_of_combos: int):
"""
:param population: The population to choose from. If specified as an integer then the population will be
range(int).
:param combo_length: The length for each combination as an integer
:param number_of_combos: the number of unique combinations you want as an integer
"""
if isinstance(population, int):
n_population = population
self.population = range(population)
self.int_population = self.population
else:
n_population = len(population)
self.int_population = range(n_population)
self.population = population
self.combo_length = combo_length
self.possible_combos = int(comb(n_population, self.combo_length))
self.number_of_combos = number_of_combos
def __iter__(self) -> Iterator[tuple[Any, ...]]:
"""
Generate random combinations.
If the number of requested combinations is greater than or equal to the number of possible combinations,
this method will return all possible combinations. Otherwise, it will generate unique random combinations.
:return: An iterator of tuples, where each tuple is a combination of elements from the population.
"""
if self.number_of_combos >= self.possible_combos:
for combo in combinations(self.population, self.combo_length):
yield combo
else:
used_samples = set()
for _ in range(self.number_of_combos):
new_sample = tuple(sorted(sample(self.int_population, self.combo_length)))
while new_sample in used_samples:
new_sample = tuple(sorted(sample(self.int_population, self.combo_length)))
used_samples.add(new_sample)
yield tuple(self.population[ind] for ind in new_sample)