Back

Mathematical Functions

Overview

The DataFrame library provides a rich set of mathematical functions for element-wise arithmetic, normalization, bounds computation, random number generation, weighted sums, and NaN handling. These functions operate on Serie objects and support both scalar and vector types.

Arithmetic Operations

Arithmetic Function Signatures

// Element-wise arithmetic between two Series
template <typename T> Serie<T> add(const Serie<T> &a, const Serie<T> &b);
template <typename T> Serie<T> sub(const Serie<T> &a, const Serie<T> &b);
template <typename T> Serie<T> mult(const Serie<T> &a, const Serie<T> &b);
template <typename T> Serie<T> div(const Serie<T> &a, const Serie<T> &b);

// Operator overloads
template <typename T> Serie<T> operator+(const Serie<T> &a, const Serie<T> &b);
template <typename T> Serie<T> operator-(const Serie<T> &a, const Serie<T> &b);
template <typename T> Serie<T> operator*(const Serie<T> &a, const Serie<T> &b);
template <typename T> Serie<T> operator/(const Serie<T> &a, const Serie<T> &b);

// Negate (unary minus)
template <typename T> Serie<T> negate(const Serie<T> &serie);
template <typename T> Serie<T> operator-(const Serie<T> &serie);

// Scale by a scalar or by another Serie
template <typename T> Serie<T> scale(const Serie<T> &serie, double factor);
template <typename T> Serie<T> scale(const Serie<T> &serie, const Serie<double> &factors);
Arithmetic Example

df::Serie<double> a{1.0, 2.0, 3.0};
df::Serie<double> b{4.0, 5.0, 6.0};

auto sum  = a + b;              // {5, 7, 9}
auto diff = a - b;              // {-3, -3, -3}
auto prod = a * b;              // {4, 10, 18}
auto quot = b / a;              // {4, 2.5, 2}

auto neg = -a;                  // {-1, -2, -3}
auto scaled = df::math::scale(a, 2.0);  // {2, 4, 6}

// Scale each element by a different factor
df::Serie<double> factors{0.5, 1.0, 2.0};
auto varied = df::math::scale(a, factors);  // {0.5, 2.0, 6.0}

Bounds, Min, Max

Bounds Functions

// Returns a pair {min_value, max_value}
template <typename T>
std::pair<T, T> bounds(const Serie<T> &serie);

// Individual min and max
template <typename T> T min(const Serie<T> &serie);
template <typename T> T max(const Serie<T> &serie);
Bounds Example

df::Serie<double> values{3.0, 1.5, 7.2, 0.8, 5.1};

auto [lo, hi] = df::math::bounds(values);  // lo=0.8, hi=7.2
double mn = df::math::min(values);          // 0.8
double mx = df::math::max(values);          // 7.2

// Works for vector types too (component-wise)
df::Serie<Vector3D> pts{{1,2,3}, {4,0,1}, {2,5,2}};
auto [pmin, pmax] = df::math::bounds(pts);
// pmin = {1, 0, 1}, pmax = {4, 5, 3}

Normalize

Normalize Functions

// Configuration for normalization
struct NormalizeConfig {
    double target_min = 0.0;  // Target range minimum
    double target_max = 1.0;  // Target range maximum
};

// Normalize to [0, 1] by default
template <typename T>
Serie<double> normalize(const Serie<T> &serie);

// Normalize to a custom range
template <typename T>
Serie<double> normalize(const Serie<T> &serie, const NormalizeConfig &config);
Normalize Example

df::Serie<double> values{10, 20, 30, 40, 50};

// Normalize to [0, 1]
auto normed = df::math::normalize(values);
// normed = {0.0, 0.25, 0.5, 0.75, 1.0}

// Normalize to custom range [-1, 1]
auto custom = df::math::normalize(values, {-1.0, 1.0});
// custom = {-1.0, -0.5, 0.0, 0.5, 1.0}

Random Number Generation

Random Functions

// Distribution type enum
enum class DistType {
    Uniform, Normal, Bernoulli, Poisson
};

// Generate random Serie with uniform distribution in [lo, hi]
Serie<double> random_uniform(size_t count, double lo = 0.0, double hi = 1.0);

// Generate random Serie with normal distribution
Serie<double> random_normal(size_t count, double mean = 0.0, double stddev = 1.0);

// Generate random boolean Serie with Bernoulli distribution
Serie<bool> random_bernoulli(size_t count, double probability = 0.5);

// Generate random integer Serie with Poisson distribution
Serie<int> random_poisson(size_t count, double lambda = 1.0);

// Random sampling: pick n random elements from a Serie
template <typename T>
Serie<T> random_sampling(const Serie<T> &serie, size_t n);
Random Example

// 100 uniform random values in [0, 10]
auto uniform = df::math::random_uniform(100, 0.0, 10.0);

// 50 normally distributed values (mean=5, stddev=2)
auto normal = df::math::random_normal(50, 5.0, 2.0);

// 30 coin flips (60% heads)
auto flips = df::math::random_bernoulli(30, 0.6);

// Poisson-distributed event counts
auto events = df::math::random_poisson(100, 3.5);

// Sample 10 random elements from a Serie
df::Serie<double> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
auto sample = df::math::random_sampling(data, 5);

Weighted Sum

Weighted Sum Functions

// Weighted sum with scalar weights
template <typename T>
Serie<T> weightedSum(
    const std::vector<Serie<T>> &series,
    const std::vector<double> &weights
);

// Weighted sum with Serie weights (per-element weights)
template <typename T>
Serie<T> weightedSum(
    const std::vector<Serie<T>> &series,
    const std::vector<Serie<double>> &weights
);
Weighted Sum Example

df::Serie<double> s1{1.0, 2.0, 3.0};
df::Serie<double> s2{4.0, 5.0, 6.0};
df::Serie<double> s3{7.0, 8.0, 9.0};

// Scalar weights: result = 0.5*s1 + 0.3*s2 + 0.2*s3
auto ws = df::math::weightedSum(
    {s1, s2, s3},
    {0.5, 0.3, 0.2}
);
// ws = {0.5*1+0.3*4+0.2*7, ...} = {3.1, 3.9, 4.7}

NaN Handling

The df::nan namespace provides utilities for detecting, replacing, and interpolating NaN values in a Serie.

NaN Functions

namespace df::nan {

// Fill methods for interpolation
enum class FillMethod {
    LINEAR,     // Linear interpolation between valid neighbors
    NEAREST,    // Use nearest valid value
    PREVIOUS,   // Use previous valid value (forward fill)
    NEXT,       // Use next valid value (backward fill)
    MEAN        // Replace with serie mean
};

// Set value at a specific index to NaN
Serie<double> set_at(const Serie<double> &serie, size_t index);

// Set values to NaN where predicate is true
Serie<double> set_where(const Serie<double> &serie,
                         std::function<bool(double, size_t)> predicate);

// Find indices of NaN values
std::vector<size_t> find(const Serie<double> &serie);

// Count NaN values
size_t count(const Serie<double> &serie);

// Check if serie has any NaN values
bool has(const Serie<double> &serie);

// Interpolate (fill) NaN values using the specified method
Serie<double> interpolate(const Serie<double> &serie,
                           FillMethod method = FillMethod::LINEAR);

} // namespace df::nan
NaN Handling Example

df::Serie<double> data{1.0, 2.0, NAN, 4.0, NAN, 6.0};

// Check for NaN
bool has_nan = df::nan::has(data);        // true
size_t nan_count = df::nan::count(data);  // 2

// Find NaN indices
auto indices = df::nan::find(data);  // {2, 4}

// Fill NaN by linear interpolation
auto filled = df::nan::interpolate(data, df::nan::FillMethod::LINEAR);
// filled = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}

// Forward fill
auto ffill = df::nan::interpolate(data, df::nan::FillMethod::PREVIOUS);
// ffill = {1.0, 2.0, 2.0, 4.0, 4.0, 6.0}

// Replace with mean
auto mean_fill = df::nan::interpolate(data, df::nan::FillMethod::MEAN);

// Set specific values to NaN
auto modified = df::nan::set_at(data, 0);  // NaN at index 0

// Set NaN where value exceeds threshold
auto clamped = df::nan::set_where(data, [](double v, size_t) {
    return v > 5.0;
});