Back

CSV I/O

Overview

The CSV module provides functions for reading and writing DataFrames in CSV (Comma-Separated Values) format. It supports customizable delimiters, quoting, headers, whitespace trimming, and type handling through the CSVOptions configuration struct.

CSVOptions

CSVOptions Struct

struct CSVOptions {
    char        delimiter       = ',';       // Field separator character
    char        quote_char      = '"';       // Quote character for fields
    bool        has_header      = true;      // First row contains column names
    bool        all_double      = false;     // Force all columns to double type
    bool        trim_whitespace = true;      // Trim leading/trailing whitespace
    bool        skip_empty_lines = true;     // Skip blank lines
    std::string null_value      = "";        // String representing null/missing
    size_t      skip_rows       = 0;         // Number of rows to skip at start
};

The default options work well for standard CSV files with a header row and comma delimiters. Adjust the options for TSV files (delimiter = '\t'), headerless files, or files with custom null representations.

Reading CSV

read_csv Function

// Read a CSV file into a Dataframe
Dataframe read_csv(const std::string &filename,
                   const CSVOptions &options = {});
Reading Examples

// Read a standard CSV file
auto df = df::io::read_csv("data.csv");

// Access columns
auto names  = df.get<std::string>("name");
auto values = df.get<double>("value");

// Read a TSV (tab-separated) file
df::CSVOptions tsv_opts;
tsv_opts.delimiter = '\t';
auto tsv_data = df::io::read_csv("data.tsv", tsv_opts);

// Read a file with no header
df::CSVOptions no_header;
no_header.has_header = false;
auto raw_data = df::io::read_csv("raw.csv", no_header);
// Columns are named "col_0", "col_1", etc.

// Force all columns to double (useful for numeric-only files)
df::CSVOptions numeric_opts;
numeric_opts.all_double = true;
auto numeric_data = df::io::read_csv("measurements.csv", numeric_opts);

// Skip metadata rows at the top
df::CSVOptions skip_opts;
skip_opts.skip_rows = 3;  // Skip first 3 rows
auto data = df::io::read_csv("report.csv", skip_opts);

// Handle custom null values
df::CSVOptions null_opts;
null_opts.null_value = "NA";
auto data_with_na = df::io::read_csv("survey.csv", null_opts);

Writing CSV

write_csv Functions

// Write a Dataframe to a CSV file
void write_csv(const Dataframe    &df,
               const std::string  &filename,
               const CSVOptions   &options = {});

// Write a Dataframe to an output stream
void write_csv(const Dataframe    &df,
               std::ostream       &os,
               const CSVOptions   &options = {});
Writing Examples

// Create a Dataframe
df::Dataframe dataframe;
dataframe.add("x", df::Serie<double>{1.0, 2.0, 3.0, 4.0, 5.0});
dataframe.add("y", df::Serie<double>{2.1, 4.0, 5.9, 8.1, 10.0});
dataframe.add("label", df::Serie<std::string>{"a", "b", "c", "d", "e"});

// Write to file with default options
df::io::write_csv(dataframe, "output.csv");

// Write as TSV
df::CSVOptions tsv_opts;
tsv_opts.delimiter = '\t';
df::io::write_csv(dataframe, "output.tsv", tsv_opts);

// Write to stdout
df::io::write_csv(dataframe, std::cout);

// Write to a string stream
std::ostringstream oss;
df::io::write_csv(dataframe, oss);
std::string csv_string = oss.str();

Complete Example

CSV Round-Trip Example

#include <dataframe/Dataframe.h>
#include <dataframe/io/csv.h>
#include <dataframe/stats.h>
#include <iostream>

int main() {
    // Read measurement data
    df::CSVOptions opts;
    opts.all_double      = true;
    opts.trim_whitespace = true;

    auto data = df::io::read_csv("measurements.csv", opts);

    // Process the data
    auto temperatures = data.get<double>("temperature");
    auto pressures    = data.get<double>("pressure");

    std::cout << "Records: " << temperatures.size() << "\n";
    std::cout << "Mean temp: " << df::stats::mean(temperatures) << "\n";
    std::cout << "Mean pressure: " << df::stats::mean(pressures) << "\n";

    // Add computed columns
    auto z_temps = df::stats::z_score(temperatures);

    df::Dataframe results;
    results.add("temperature", temperatures);
    results.add("pressure", pressures);
    results.add("z_temperature", z_temps);

    // Write results
    df::io::write_csv(results, "analysis_results.csv");

    std::cout << "Results written to analysis_results.csv\n";

    return 0;
}