Back
CSV I/O
Overview
The CSV module provides functions for reading and writing DataFrames in CSV (Comma-Separated Values) format.
It supports customizable delimiters, quoting, headers, whitespace trimming, and type handling through the
CSVOptions configuration struct.
CSVOptions
CSVOptions Struct
struct CSVOptions {
char delimiter = ','; // Field separator character
char quote_char = '"'; // Quote character for fields
bool has_header = true; // First row contains column names
bool all_double = false; // Force all columns to double type
bool trim_whitespace = true; // Trim leading/trailing whitespace
bool skip_empty_lines = true; // Skip blank lines
std::string null_value = ""; // String representing null/missing
size_t skip_rows = 0; // Number of rows to skip at start
};
The default options work well for standard CSV files with a header row and comma delimiters.
Adjust the options for TSV files (delimiter = '\t'), headerless files, or files
with custom null representations.
Reading CSV
read_csv Function
// Read a CSV file into a Dataframe
Dataframe read_csv(const std::string &filename,
const CSVOptions &options = {});
Reading Examples
// Read a standard CSV file
auto df = df::io::read_csv("data.csv");
// Access columns
auto names = df.get<std::string>("name");
auto values = df.get<double>("value");
// Read a TSV (tab-separated) file
df::CSVOptions tsv_opts;
tsv_opts.delimiter = '\t';
auto tsv_data = df::io::read_csv("data.tsv", tsv_opts);
// Read a file with no header
df::CSVOptions no_header;
no_header.has_header = false;
auto raw_data = df::io::read_csv("raw.csv", no_header);
// Columns are named "col_0", "col_1", etc.
// Force all columns to double (useful for numeric-only files)
df::CSVOptions numeric_opts;
numeric_opts.all_double = true;
auto numeric_data = df::io::read_csv("measurements.csv", numeric_opts);
// Skip metadata rows at the top
df::CSVOptions skip_opts;
skip_opts.skip_rows = 3; // Skip first 3 rows
auto data = df::io::read_csv("report.csv", skip_opts);
// Handle custom null values
df::CSVOptions null_opts;
null_opts.null_value = "NA";
auto data_with_na = df::io::read_csv("survey.csv", null_opts);
Writing CSV
write_csv Functions
// Write a Dataframe to a CSV file
void write_csv(const Dataframe &df,
const std::string &filename,
const CSVOptions &options = {});
// Write a Dataframe to an output stream
void write_csv(const Dataframe &df,
std::ostream &os,
const CSVOptions &options = {});
Writing Examples
// Create a Dataframe
df::Dataframe dataframe;
dataframe.add("x", df::Serie<double>{1.0, 2.0, 3.0, 4.0, 5.0});
dataframe.add("y", df::Serie<double>{2.1, 4.0, 5.9, 8.1, 10.0});
dataframe.add("label", df::Serie<std::string>{"a", "b", "c", "d", "e"});
// Write to file with default options
df::io::write_csv(dataframe, "output.csv");
// Write as TSV
df::CSVOptions tsv_opts;
tsv_opts.delimiter = '\t';
df::io::write_csv(dataframe, "output.tsv", tsv_opts);
// Write to stdout
df::io::write_csv(dataframe, std::cout);
// Write to a string stream
std::ostringstream oss;
df::io::write_csv(dataframe, oss);
std::string csv_string = oss.str();
Complete Example
CSV Round-Trip Example
#include <dataframe/Dataframe.h>
#include <dataframe/io/csv.h>
#include <dataframe/stats.h>
#include <iostream>
int main() {
// Read measurement data
df::CSVOptions opts;
opts.all_double = true;
opts.trim_whitespace = true;
auto data = df::io::read_csv("measurements.csv", opts);
// Process the data
auto temperatures = data.get<double>("temperature");
auto pressures = data.get<double>("pressure");
std::cout << "Records: " << temperatures.size() << "\n";
std::cout << "Mean temp: " << df::stats::mean(temperatures) << "\n";
std::cout << "Mean pressure: " << df::stats::mean(pressures) << "\n";
// Add computed columns
auto z_temps = df::stats::z_score(temperatures);
df::Dataframe results;
results.add("temperature", temperatures);
results.add("pressure", pressures);
results.add("z_temperature", z_temps);
// Write results
df::io::write_csv(results, "analysis_results.csv");
std::cout << "Results written to analysis_results.csv\n";
return 0;
}