Search code examples
rustserderust-polars

unable to convert json to dataframe, polars panicked


use reqwest::blocking::get;
use polars::prelude::*;
use serde::{Deserialize, Serialize};
use std::io::Cursor;


#[derive(Serialize, Deserialize, Debug)]
struct StockZhAHist {
    date: String,
    open: f64,
    close: f64,
    high: f64,
    low: f64,
    volume: f64,
    turnover: f64,
    amplitude: f64,
    change_rate: f64,
    change_amount: f64,
    turnover_rate: f64,
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let url = "http://127.0.0.1:8080/api/public/stock_zh_a_hist";
    let response = get(url)?.text()?;
    // println!("{}", response);
    // deserialize response with serde
    let stock_zh_a_hist: Vec<StockZhAHist> = serde_json::from_str(&response)?;
    // convert to polars dataframe
    println!("{:?}", stock_zh_a_hist);
    let df = JsonReader::new(Cursor::new(response))
        .infer_schema_len(Some(1000))
        .finish()?;
    println!("{:?}", df);
    Ok(())
}

i have written a script that retrieves raw data from my server and i was able to print out the JSON 'stock_zh_a_hist'. but somehow when i tried to print out the dataframe converted from this JSON, the programme panicked and returned :

thread 'main' panicked at 'Arrow datatype Struct([Field { name: "date", data_type: LargeUtf8, is_nullable: true, metadata: {} }, Field { name: "open", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "close", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "high", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "low", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "volume", data_type: Int64, is_nullable: true, metadata: {} }, Field { name: "turnover", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "amplitude", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "change_rate", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "change_amount", data_type: Float64, is_nullable: true, metadata: {} }, Field { name: "turnover_rate", data_type: Float64, is_nullable: true, metadata: {} }]) not supported by Polars. You probably need to activate that data-type feature.', /home/arthur/.cargo/registry/src/index.crates.io-6f17d22bba15001f/polars-core-0.30.0/src/datatypes/field.rs:158:19.

i also read source code there which reads:

 #[cfg(feature = "dtype-decimal")]
            ArrowDataType::Decimal(precision, scale) => DataType::Decimal(Some(*precision), Some(*scale)),
            dt => panic!("Arrow datatype {dt:?} not supported by Polars. You probably need to activate that data-type feature."),  

i am assuming polars is having some problem reading the json with correct datatypes, but how can i solve it?

update 1: i have added the feature dtype-decimal in cargo.toml but the same error persists.

update 2: the content of stock_zh_a_hist is as follows:

[StockZhAHist { date: "1996-12-16T00:00:00.000", open: 16.86, close: 16.86, high: 16.86, low: 16.86, volume: 62442.0, turnover: 105277000.0, amplitude: 0.0, change_rate: -10.22, change_amount: -1.92, turnover_rate: 0.87 },
StockZhAHist { date: "1996-12-17T00:00:00.000", open: 15.17, close: 15.17, high: 16.79, low: 15.17, volume: 463675.0, turnover: 718902016.0, amplitude: 9.61, change_rate: -10.02, change_amount: -1.69, turnover_rate: 6.49 },
StockZhAHist { date: "1996-12-18T00:00:00.000", open: 15.28, close: 16.69, high: 16.69, low: 15.18, volume: 445380.0, turnover: 719400000.0, amplitude: 9.95, change_rate: 10.02, change_amount: 1.52, turnover_rate: 6.24 },
StockZhAHist { date: "1996-12-19T00:00:00.000", open: 17.01, close: 16.4, high: 17.9, low: 15.99, volume: 572946.0, turnover: 970124992.0, amplitude: 11.44, change_rate: -1.74, change_amount: -0.29, turnover_rate: 8.03 },
StockZhAHist { date: "1996-12-20T00:00:00.000", open: 16.19, close: 16.39, high: 16.68, low: 15.9, volume: 277758.0, turnover: 451471008.0, amplitude: 4.76, change_rate: -0.06, change_amount: -0.01, turnover_rate: 3.89 },
StockZhAHist { date: "1996-12-23T00:00:00.000", open: 16.58, close: 16.57, high: 16.89, low: 16.4, volume: 155369.0, turnover: 258260992.0, amplitude: 2.99, change_rate: 1.1, change_amount: 0.18, turnover_rate: 2.18 },
StockZhAHist { date: "1996-12-24T00:00:00.000", open: 16.58, close: 15.95, high: 16.73, low: 15.93, volume: 153168.0, turnover: 249324000.0, amplitude: 4.83, change_rate: -3.74, change_amount: -0.62, turnover_rate: 2.15 },
StockZhAHist { date: "1996-12-25T00:00:00.000", open: 16.1, close: 16.47, high: 16.7, low: 15.86, volume: 175391.0, turnover: 285567008.0, amplitude: 5.27, change_rate: 3.26, change_amount: 0.52, turnover_rate: 2.46 },
StockZhAHist { date: "1996-12-26T00:00:00.000", open: 16.68, close: 16.47, high: 16.78, low: 16.3, volume: 51516.0, turnover: 85214000.0, amplitude: 2.91, change_rate: 0.0, change_amount: 0.0, turnover_rate: 0.72 }...]

Solution

  • Seems like a bug — this issue goes away if you enable feature dtype-struct, but you definitely shouldn't have to in order to read from json.

    As a workaround, even without dtype-struct, you can get this to work by converting your data to JSON Lines.

    use polars::prelude::{JsonReader, SerReader};
    use serde::{Deserialize, Serialize};
    use std::io::Cursor;
    
    #[derive(Serialize, Deserialize, Debug)]
    struct StockZhAHist {
        date: String,
        open: f64,
        close: f64,
        high: f64,
        low: f64,
        volume: f64,
        turnover: f64,
        amplitude: f64,
        change_rate: f64,
        change_amount: f64,
        turnover_rate: f64,
    }
    
    fn main() -> Result<(), Box<dyn std::error::Error>> {
        let data = vec![
            StockZhAHist {
                date: "1996-12-16T00:00:00.000".into(),
                open: 16.86,
                close: 16.86,
                high: 16.86,
                low: 16.86,
                volume: 62442.0,
                turnover: 105277000.0,
                amplitude: 0.0,
                change_rate: -10.22,
                change_amount: -1.92,
                turnover_rate: 0.87,
            },
            StockZhAHist {
                date: "1996-12-17T00:00:00.000".into(),
                open: 15.17,
                close: 15.17,
                high: 16.79,
                low: 15.17,
                volume: 463675.0,
                turnover: 718902016.0,
                amplitude: 9.61,
                change_rate: -10.02,
                change_amount: -1.69,
                turnover_rate: 6.49,
            },
            StockZhAHist {
                date: "1996-12-18T00:00:00.000".into(),
                open: 15.28,
                close: 16.69,
                high: 16.69,
                low: 15.18,
                volume: 445380.0,
                turnover: 719400000.0,
                amplitude: 9.95,
                change_rate: 10.02,
                change_amount: 1.52,
                turnover_rate: 6.24,
            },
        ];
        // JSON Lines data
        let response = data
            .into_iter()
            .map(|d| serde_json::to_string(&d))
            .collect::<Result<Vec<_>, _>>()?
            .join("\n");
        println!("{}", response);
        // convert to polars dataframe
        let df = JsonReader::new(Cursor::new(response))
            .infer_schema_len(Some(1000))
            .with_json_format(polars::prelude::JsonFormat::JsonLines)
            .finish()?;
        println!("{:?}", df);
        Ok(())
    }
    
    {"date":"1996-12-16T00:00:00.000","open":16.86,"close":16.86,"high":16.86,"low":16.86,"volume":62442.0,"turnover":105277000.0,"amplitude":0.0,"change_rate":-10.22,"change_amount":-1.92,"turnover_rate":0.87}
    {"date":"1996-12-17T00:00:00.000","open":15.17,"close":15.17,"high":16.79,"low":15.17,"volume":463675.0,"turnover":718902016.0,"amplitude":9.61,"change_rate":-10.02,"change_amount":-1.69,"turnover_rate":6.49}
    {"date":"1996-12-18T00:00:00.000","open":15.28,"close":16.69,"high":16.69,"low":15.18,"volume":445380.0,"turnover":719400000.0,"amplitude":9.95,"change_rate":10.02,"change_amount":1.52,"turnover_rate":6.24}
    [/Users/ben/.cargo/registry/src/index.crates.io-6f17d22bba15001f/polars-io-0.30.0/src/ndjson/core.rs:162] &data_type = Struct(
        [
            Field {
                name: "date",
                data_type: LargeUtf8,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "open",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "close",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "high",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "low",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "volume",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "turnover",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "amplitude",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "change_rate",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "change_amount",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
            Field {
                name: "turnover_rate",
                data_type: Float64,
                is_nullable: true,
                metadata: {},
            },
        ],
    )
    shape: (3, 11)
    ┌─────────────────────────┬───────┬───────┬───────┬───┬───────────┬─────────────┬───────────────┬───────────────┐
    │ date                    ┆ open  ┆ close ┆ high  ┆ … ┆ amplitude ┆ change_rate ┆ change_amount ┆ turnover_rate │
    │ ---                     ┆ ---   ┆ ---   ┆ ---   ┆   ┆ ---       ┆ ---         ┆ ---           ┆ ---           │
    │ str                     ┆ f64   ┆ f64   ┆ f64   ┆   ┆ f64       ┆ f64         ┆ f64           ┆ f64           │
    ╞═════════════════════════╪═══════╪═══════╪═══════╪═══╪═══════════╪═════════════╪═══════════════╪═══════════════╡
    │ 1996-12-16T00:00:00.000 ┆ 16.86 ┆ 16.86 ┆ 16.86 ┆ … ┆ 0.0       ┆ -10.22      ┆ -1.92         ┆ 0.87          │
    │ 1996-12-17T00:00:00.000 ┆ 15.17 ┆ 15.17 ┆ 16.79 ┆ … ┆ 9.61      ┆ -10.02      ┆ -1.69         ┆ 6.49          │
    │ 1996-12-18T00:00:00.000 ┆ 15.28 ┆ 16.69 ┆ 16.69 ┆ … ┆ 9.95      ┆ 10.02       ┆ 1.52          ┆ 6.24          │
    └─────────────────────────┴───────┴───────┴───────┴───┴───────────┴─────────────┴───────────────┴───────────────┘