Back
JSON I/O
Overview
The JSON module provides functions for reading and writing DataFrames in JSON format. The expected JSON structure is an array of objects, where each object represents a row and each key corresponds to a column name. Writing supports both pretty-printed and compact output formats.
JSON Format
The JSON format used by the library represents a DataFrame as an array of row objects:
Expected JSON Structure
// The JSON file should contain an array of objects:
// [
// {"name": "Alice", "age": 30, "score": 95.5},
// {"name": "Bob", "age": 25, "score": 87.2},
// {"name": "Carol", "age": 35, "score": 91.8}
// ]
//
// Each unique key becomes a column in the Dataframe.
// Values are auto-detected as string, integer, or double.
Reading JSON
read_json Function
// Read a JSON file into a Dataframe
// The file must contain an array of objects
Dataframe read_json(const std::string &filename);
Reading Examples
// Read a JSON file
auto df = df::io::read_json("data.json");
// Access columns by name
auto names = df.get<std::string>("name");
auto ages = df.get<int>("age");
auto scores = df.get<double>("score");
std::cout << "Records: " << names.size() << std::endl;
std::cout << "First name: " << names[0] << std::endl;
// Read geospatial data
auto geo = df::io::read_json("stations.json");
// JSON: [{"lat": 48.85, "lon": 2.35, "name": "Paris"}, ...]
auto lats = geo.get<double>("lat");
auto lons = geo.get<double>("lon");
auto names = geo.get<std::string>("name");
Writing JSON
write_json Functions
// Write a Dataframe to a JSON file
// pretty: if true, output is indented and human-readable
void write_json(const Dataframe &df,
const std::string &filename,
bool pretty = true);
// Write a Dataframe to an output stream
void write_json(const Dataframe &df,
std::ostream &os,
bool pretty = true);
Writing Examples
// Create a Dataframe
df::Dataframe dataframe;
dataframe.add("city", df::Serie<std::string>{"Paris", "London", "Berlin"});
dataframe.add("population", df::Serie<double>{2.16e6, 8.98e6, 3.64e6});
dataframe.add("area_km2", df::Serie<double>{105.4, 1572.0, 891.7});
// Write pretty-printed JSON (default)
df::io::write_json(dataframe, "cities.json");
// Output:
// [
// {
// "city": "Paris",
// "population": 2160000.0,
// "area_km2": 105.4
// },
// ...
// ]
// Write compact JSON
df::io::write_json(dataframe, "cities_compact.json", false);
// Output: [{"city":"Paris","population":2160000.0,"area_km2":105.4},...]
// Write to stdout
df::io::write_json(dataframe, std::cout);
// Write to a string stream
std::ostringstream oss;
df::io::write_json(dataframe, oss, true);
std::string json_str = oss.str();
Complete Example
JSON Processing Pipeline
#include <dataframe/Dataframe.h>
#include <dataframe/io/json.h>
#include <dataframe/stats.h>
#include <dataframe/map.h>
#include <iostream>
int main() {
// Read sensor data from JSON
auto sensors = df::io::read_json("sensors.json");
// JSON: [{"id": "S1", "temp": 22.5, "humidity": 65.0}, ...]
auto temps = sensors.get<double>("temp");
auto humidity = sensors.get<double>("humidity");
auto ids = sensors.get<std::string>("id");
// Compute statistics
double mean_temp = df::stats::mean(temps);
double mean_hum = df::stats::mean(humidity);
std::cout << "Sensors: " << ids.size() << "\n";
std::cout << "Mean temperature: " << mean_temp << "\n";
std::cout << "Mean humidity: " << mean_hum << "\n";
// Compute heat index (simplified)
auto heat_index = df::map([](double t, double h, size_t) {
return t + 0.5 * h;
}, temps, humidity);
// Build output Dataframe
df::Dataframe results;
results.add("id", ids);
results.add("temperature", temps);
results.add("humidity", humidity);
results.add("heat_index", heat_index);
// Write results
df::io::write_json(results, "sensor_analysis.json");
std::cout << "Results written to sensor_analysis.json\n";
return 0;
}