Search code examples
rrtf

How can I read an *.rtf file?


I have a large *.rtf file with meteorological data. When I open it using LibreOffice I get the data in the following format:

[ {
  "fecha" : "2022-01-01",
  "indicativo" : "0016A",
  "nombre" : "REUS AEROPUERTO",
  "provincia" : "TARRAGONA",
  "altitud" : "71",
  "tmed" : "12,8",
  "prec" : "0,0",
  "tmin" : "4,6",
  "horatmin" : "07:33",
  "tmax" : "21,0",
  "horatmax" : "14:49",
  "dir" : "99",
  "velmedia" : "1,7",
  "racha" : "3,6",
  "horaracha" : "13:01",
  "sol" : "8,7",
  "presMax" : "1019,0",
  "horaPresMax" : "00",
  "presMin" : "1016,3",
  "horaPresMin" : "14"
}, {
  "fecha" : "2022-01-02",
  "indicativo" : "0016A",
  "nombre" : "REUS AEROPUERTO",
  "provincia" : "TARRAGONA",
  "altitud" : "71",
  "tmed" : "11,0",
  "prec" : "0,0",
  "tmin" : "4,2",
  "horatmin" : "01:13",
  "tmax" : "17,7",
  "horatmax" : "12:09",
  "dir" : "17",
  "velmedia" : "2,2",
  "racha" : "10,8",
  "horaracha" : "11:51",
  "sol" : "7,6",
  "presMax" : "1019,5",
  "horaPresMax" : "Varias",
  "presMin" : "1017,1",
  "horaPresMin" : "14"
}, {
  "fecha" : "2022-01-03",
  "indicativo" : "0016A",
  "nombre" : "REUS AEROPUERTO",
  "provincia" : "TARRAGONA",
  "altitud" : "71",
  "tmed" : "10,4",
  "prec" : "0,0",
  "tmin" : "5,7",
  "horatmin" : "23:54",
  "tmax" : "15,0",
  "horatmax" : "13:13",
  "dir" : "35",
  "velmedia" : "1,4",
  "racha" : "5,8",
  "horaracha" : "19:05",
  "sol" : "4,8",
  "presMax" : "1019,0",
  "horaPresMax" : "00",
  "presMin" : "1009,2",
  "horaPresMin" : "24"
} ]

All I need is to transform this data to columns (variables by date) using R. I tried with striprtf:::read_rtf().

Any help would be appreciated. Thanks

Lee


Solution

  • This appears to be JSON, so we can use jsonlite. Taking that and storing it in an object,

    chr <- '[ {
      "fecha" : "2022-01-01",
      "indicativo" : "0016A",
      "nombre" : "REUS AEROPUERTO",
      "provincia" : "TARRAGONA",
      "altitud" : "71",
      "tmed" : "12,8",
      "prec" : "0,0",
      "tmin" : "4,6",
      "horatmin" : "07:33",
      "tmax" : "21,0",
      "horatmax" : "14:49",
      "dir" : "99",
      "velmedia" : "1,7",
      "racha" : "3,6",
      "horaracha" : "13:01",
      "sol" : "8,7",
      "presMax" : "1019,0",
      "horaPresMax" : "00",
      "presMin" : "1016,3",
      "horaPresMin" : "14"
    }, {
      "fecha" : "2022-01-02",
      "indicativo" : "0016A",
      "nombre" : "REUS AEROPUERTO",
      "provincia" : "TARRAGONA",
      "altitud" : "71",
      "tmed" : "11,0",
      "prec" : "0,0",
      "tmin" : "4,2",
      "horatmin" : "01:13",
      "tmax" : "17,7",
      "horatmax" : "12:09",
      "dir" : "17",
      "velmedia" : "2,2",
      "racha" : "10,8",
      "horaracha" : "11:51",
      "sol" : "7,6",
      "presMax" : "1019,5",
      "horaPresMax" : "Varias",
      "presMin" : "1017,1",
      "horaPresMin" : "14"
    }, {
      "fecha" : "2022-01-03",
      "indicativo" : "0016A",
      "nombre" : "REUS AEROPUERTO",
      "provincia" : "TARRAGONA",
      "altitud" : "71",
      "tmed" : "10,4",
      "prec" : "0,0",
      "tmin" : "5,7",
      "horatmin" : "23:54",
      "tmax" : "15,0",
      "horatmax" : "13:13",
      "dir" : "35",
      "velmedia" : "1,4",
      "racha" : "5,8",
      "horaracha" : "19:05",
      "sol" : "4,8",
      "presMax" : "1019,0",
      "horaPresMax" : "00",
      "presMin" : "1009,2",
      "horaPresMin" : "24"
    } ]
    '
    

    We can do

    jsonlite::fromJSON(chr)
    #        fecha indicativo          nombre provincia altitud tmed prec tmin horatmin tmax horatmax dir velmedia racha horaracha sol presMax horaPresMax presMin horaPresMin
    # 1 2022-01-01      0016A REUS AEROPUERTO TARRAGONA      71 12,8  0,0  4,6    07:33 21,0    14:49  99      1,7   3,6     13:01 8,7  1019,0          00  1016,3          14
    # 2 2022-01-02      0016A REUS AEROPUERTO TARRAGONA      71 11,0  0,0  4,2    01:13 17,7    12:09  17      2,2  10,8     11:51 7,6  1019,5      Varias  1017,1          14
    # 3 2022-01-03      0016A REUS AEROPUERTO TARRAGONA      71 10,4  0,0  5,7    23:54 15,0    13:13  35      1,4   5,8     19:05 4,8  1019,0          00  1009,2          24
    

    It appears to have a comma-based locale and a date, we can fix some of that with:

    out <- jsonlite::fromJSON(chr)
    out[] <- lapply(out, type.convert, as.is = TRUE, dec = ",")
    out$fecha <- as.Date(out$fecha)
    
    out
    #        fecha indicativo          nombre provincia altitud tmed prec tmin horatmin tmax horatmax dir velmedia racha horaracha sol presMax horaPresMax presMin horaPresMin
    # 1 2022-01-01      0016A REUS AEROPUERTO TARRAGONA      71 12.8    0  4.6    07:33 21.0    14:49  99      1.7   3.6     13:01 8.7  1019.0          00  1016.3          14
    # 2 2022-01-02      0016A REUS AEROPUERTO TARRAGONA      71 11.0    0  4.2    01:13 17.7    12:09  17      2.2  10.8     11:51 7.6  1019.5      Varias  1017.1          14
    # 3 2022-01-03      0016A REUS AEROPUERTO TARRAGONA      71 10.4    0  5.7    23:54 15.0    13:13  35      1.4   5.8     19:05 4.8  1019.0          00  1009.2          24
    
    str(out)
    # 'data.frame': 3 obs. of  20 variables:
    #  $ fecha      : Date, format: "2022-01-01" "2022-01-02" "2022-01-03"
    #  $ indicativo : chr  "0016A" "0016A" "0016A"
    #  $ nombre     : chr  "REUS AEROPUERTO" "REUS AEROPUERTO" "REUS AEROPUERTO"
    #  $ provincia  : chr  "TARRAGONA" "TARRAGONA" "TARRAGONA"
    #  $ altitud    : int  71 71 71
    #  $ tmed       : num  12.8 11 10.4
    #  $ prec       : num  0 0 0
    #  $ tmin       : num  4.6 4.2 5.7
    #  $ horatmin   : chr  "07:33" "01:13" "23:54"
    #  $ tmax       : num  21 17.7 15
    #  $ horatmax   : chr  "14:49" "12:09" "13:13"
    #  $ dir        : int  99 17 35
    #  $ velmedia   : num  1.7 2.2 1.4
    #  $ racha      : num  3.6 10.8 5.8
    #  $ horaracha  : chr  "13:01" "11:51" "19:05"
    #  $ sol        : num  8.7 7.6 4.8
    #  $ presMax    : num  1019 1020 1019
    #  $ horaPresMax: chr  "00" "Varias" "00"
    #  $ presMin    : num  1016 1017 1009
    #  $ horaPresMin: int  14 14 24