Back

Genetic Algorithm

Overview

The GeneticAlgorithm class in the df::ml namespace implements a classic evolutionary optimization algorithm. It evolves a population of candidate solutions through selection, crossover, and mutation operators to find optimal or near-optimal solutions to continuous and combinatorial optimization problems.

Configuration Enums

Selection, Crossover, and Mutation Methods

namespace df::ml {

// Selection method for choosing parents
enum class SelectionMethod {
    Tournament,     // Tournament selection (default)
    Roulette,       // Roulette wheel (fitness-proportionate)
    Rank            // Rank-based selection
};

// Crossover method for combining parents
enum class CrossoverMethod {
    SinglePoint,    // Single crossover point
    TwoPoint,       // Two crossover points
    Uniform         // Each gene from random parent
};

// Mutation method for introducing variation
enum class MutationMethod {
    Gaussian,       // Add Gaussian noise (continuous)
    Uniform,        // Replace with uniform random value
    Swap            // Swap two positions (combinatorial)
};

} // namespace df::ml

GeneticAlgorithm Class

Class Definition

namespace df::ml {

class GeneticAlgorithm {
public:
    // Constructor with configuration parameters
    GeneticAlgorithm(
        size_t population_size  = 100,
        double crossover_rate   = 0.8,
        double mutation_rate    = 0.1,
        size_t max_generations  = 1000,
        double elite_fraction   = 0.1,
        SelectionMethod selection   = SelectionMethod::Tournament,
        CrossoverMethod crossover   = CrossoverMethod::SinglePoint,
        MutationMethod  mutation    = MutationMethod::Gaussian
    );

    // Optimize a continuous objective function
    // fitness: function taking a vector of doubles, returning fitness (higher=better)
    // bounds: {min, max} for each dimension
    std::vector<double> optimize(
        std::function<double(const std::vector<double>&)> fitness,
        const std::vector<std::pair<double, double>> &bounds
    );

    // Optimize a combinatorial problem (permutation-based)
    std::vector<size_t> optimize_combinatorial(
        std::function<double(const std::vector<size_t>&)> fitness,
        size_t num_elements
    );

    // Get the history of best fitness per generation
    std::vector<double> get_evolution_history() const;

    // Get population statistics (mean, best, worst fitness)
    struct PopulationMetrics {
        double best_fitness;
        double worst_fitness;
        double mean_fitness;
        double diversity;
    };
    PopulationMetrics get_population_metrics() const;
};

} // namespace df::ml

Factory Functions

Factory Functions

namespace df::ml {

// Create a genetic algorithm with default settings for continuous optimization
GeneticAlgorithm create_genetic_algorithm(
    size_t population_size = 100,
    size_t max_generations = 1000
);

// Create a genetic algorithm configured for combinatorial optimization
GeneticAlgorithm create_genetic_algorithm_combinatorial(
    size_t population_size = 100,
    size_t max_generations = 1000
);

} // namespace df::ml

Examples

Continuous Optimization

Minimize Rosenbrock Function

#include <dataframe/ml/genetic_algorithm.h>
#include <iostream>
#include <cmath>

int main() {
    // Rosenbrock function (minimum at (1,1) with value 0)
    auto rosenbrock = [](const std::vector<double> &x) -> double {
        double a = 1.0 - x[0];
        double b = x[1] - x[0] * x[0];
        double f = a * a + 100.0 * b * b;
        return -f;  // Negate because GA maximizes fitness
    };

    // Create GA
    df::ml::GeneticAlgorithm ga(
        200,    // population size
        0.85,   // crossover rate
        0.05,   // mutation rate
        2000,   // max generations
        0.1,    // elite fraction
        df::ml::SelectionMethod::Tournament,
        df::ml::CrossoverMethod::TwoPoint,
        df::ml::MutationMethod::Gaussian
    );

    // Search bounds: x in [-5, 5], y in [-5, 5]
    std::vector<std::pair<double, double>> bounds{
        {-5.0, 5.0}, {-5.0, 5.0}
    };

    auto best = ga.optimize(rosenbrock, bounds);

    std::cout << "Best solution: (" << best[0] << ", " << best[1] << ")\n";
    std::cout << "Expected: (1.0, 1.0)\n";

    // Check convergence history
    auto history = ga.get_evolution_history();
    std::cout << "Final best fitness: " << history.back() << "\n";

    return 0;
}

Combinatorial Optimization

Traveling Salesman Problem

// Simple TSP with city coordinates
std::vector<std::pair<double,double>> cities = {
    {0,0}, {1,5}, {5,2}, {7,8}, {3,6}, {8,1}, {2,3}, {6,4}
};

auto tsp_fitness = [&](const std::vector<size_t> &route) -> double {
    double total_dist = 0;
    for (size_t i = 0; i < route.size(); ++i) {
        size_t j = (i + 1) % route.size();
        double dx = cities[route[i]].first  - cities[route[j]].first;
        double dy = cities[route[i]].second - cities[route[j]].second;
        total_dist += std::sqrt(dx*dx + dy*dy);
    }
    return -total_dist;  // Negate to maximize (minimize distance)
};

auto ga = df::ml::create_genetic_algorithm_combinatorial(300, 5000);
auto best_route = ga.optimize_combinatorial(tsp_fitness, cities.size());

std::cout << "Best route: ";
for (auto idx : best_route) std::cout << idx << " ";
std::cout << std::endl;