Back

JSON I/O

Overview

The JSON module provides functions for reading and writing DataFrames in JSON format. The expected JSON structure is an array of objects, where each object represents a row and each key corresponds to a column name. Writing supports both pretty-printed and compact output formats.

JSON Format

The JSON format used by the library represents a DataFrame as an array of row objects:

Expected JSON Structure

// The JSON file should contain an array of objects:
// [
//   {"name": "Alice", "age": 30, "score": 95.5},
//   {"name": "Bob",   "age": 25, "score": 87.2},
//   {"name": "Carol", "age": 35, "score": 91.8}
// ]
//
// Each unique key becomes a column in the Dataframe.
// Values are auto-detected as string, integer, or double.

Reading JSON

read_json Function

// Read a JSON file into a Dataframe
// The file must contain an array of objects
Dataframe read_json(const std::string &filename);
Reading Examples

// Read a JSON file
auto df = df::io::read_json("data.json");

// Access columns by name
auto names  = df.get<std::string>("name");
auto ages   = df.get<int>("age");
auto scores = df.get<double>("score");

std::cout << "Records: " << names.size() << std::endl;
std::cout << "First name: " << names[0] << std::endl;

// Read geospatial data
auto geo = df::io::read_json("stations.json");
// JSON: [{"lat": 48.85, "lon": 2.35, "name": "Paris"}, ...]
auto lats  = geo.get<double>("lat");
auto lons  = geo.get<double>("lon");
auto names = geo.get<std::string>("name");

Writing JSON

write_json Functions

// Write a Dataframe to a JSON file
// pretty: if true, output is indented and human-readable
void write_json(const Dataframe   &df,
                const std::string &filename,
                bool pretty = true);

// Write a Dataframe to an output stream
void write_json(const Dataframe   &df,
                std::ostream      &os,
                bool pretty = true);
Writing Examples

// Create a Dataframe
df::Dataframe dataframe;
dataframe.add("city", df::Serie<std::string>{"Paris", "London", "Berlin"});
dataframe.add("population", df::Serie<double>{2.16e6, 8.98e6, 3.64e6});
dataframe.add("area_km2", df::Serie<double>{105.4, 1572.0, 891.7});

// Write pretty-printed JSON (default)
df::io::write_json(dataframe, "cities.json");
// Output:
// [
//   {
//     "city": "Paris",
//     "population": 2160000.0,
//     "area_km2": 105.4
//   },
//   ...
// ]

// Write compact JSON
df::io::write_json(dataframe, "cities_compact.json", false);
// Output: [{"city":"Paris","population":2160000.0,"area_km2":105.4},...]

// Write to stdout
df::io::write_json(dataframe, std::cout);

// Write to a string stream
std::ostringstream oss;
df::io::write_json(dataframe, oss, true);
std::string json_str = oss.str();

Complete Example

JSON Processing Pipeline

#include <dataframe/Dataframe.h>
#include <dataframe/io/json.h>
#include <dataframe/stats.h>
#include <dataframe/map.h>
#include <iostream>

int main() {
    // Read sensor data from JSON
    auto sensors = df::io::read_json("sensors.json");
    // JSON: [{"id": "S1", "temp": 22.5, "humidity": 65.0}, ...]

    auto temps     = sensors.get<double>("temp");
    auto humidity  = sensors.get<double>("humidity");
    auto ids       = sensors.get<std::string>("id");

    // Compute statistics
    double mean_temp = df::stats::mean(temps);
    double mean_hum  = df::stats::mean(humidity);

    std::cout << "Sensors: " << ids.size() << "\n";
    std::cout << "Mean temperature: " << mean_temp << "\n";
    std::cout << "Mean humidity: " << mean_hum << "\n";

    // Compute heat index (simplified)
    auto heat_index = df::map([](double t, double h, size_t) {
        return t + 0.5 * h;
    }, temps, humidity);

    // Build output Dataframe
    df::Dataframe results;
    results.add("id", ids);
    results.add("temperature", temps);
    results.add("humidity", humidity);
    results.add("heat_index", heat_index);

    // Write results
    df::io::write_json(results, "sensor_analysis.json");
    std::cout << "Results written to sensor_analysis.json\n";

    return 0;
}