import json
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
biocPkgTools = importr('BiocPkgTools')
biocPkgList = biocPkgTools.biocPkgList()
biocPkgList = json.loads(ro.conversion.rpy2py(biocPkgList))
The dataframe looks great and I'm just trying to convert it to a json object with column names as keys but I receive this error:
Traceback (most recent call last):
File "/bioconductor/bioconductor.py", line 11, in <module>
json = json.loads(ro.conversion.rpy2py(biocPkgList))
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/json/__init__.py", line 339, in loads
raise TypeError(f'the JSON object must be str, bytes or bytearray, '
TypeError: the JSON object must be str, bytes or bytearray, not DataFrame
Other steps I've tried is converting it to a pandas dataframe then to json but that also gives an error. I appreciate any help I can get.
Pandas method:
import rpy2.robjects.numpy2ri as rpyn
import json
import pandas as pd
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
biocPkgTools = importr('BiocPkgTools')
biocPkgList = biocPkgTools.biocPkgList()
columns = list(biocPkgList.colnames)
biocPkgList_df = pd.DataFrame(biocPkgList)
biocPkgList_df = biocPkgList_df.T
biocPkgList_df.columns = columns
biocPkgList_json = biocPkgList_df.to_json(orient='records')
print(biocPkgList_json)
I get these R errors:
R[write to console]: Error: unimplemented type 'char' in 'eval'
R[write to console]: Error: cannot have attributes on a CHARSXP
R[write to console]: Fatal error: unable to initialize the JIT
To convert an R DataFrame to JSON-formatted Python dict/list structure (which seem to be what you are attempting), you need to either:
For the solution (a), I would recommend using rjson
R package:
import json
from rpy2.robjects.packages import importr
bioc_pkg_tools = importr('BiocPkgTools')
rjson = importr('rjson')
bioc_pkg_data_frame = bioc_pkg_tools.biocPkgList()
r_json_string_vector = rjson.toJSON(bioc_pkg_data_frame)
py_json_string = r_json_string_vector[0]
py_json_structure = json.loads(py_json_string)
print(py_json_structure.keys())
# dict_keys(['Package', 'Version', 'Depends', 'Suggests', 'License', 'MD5sum', 'NeedsCompilation', 'Title', 'Description', 'biocViews', 'Author', 'Maintainer', 'git_url', 'git_branch', 'git_last_commit', 'git_last_commit_date', 'Date/Publication', 'source.ver', 'win.binary.ver', 'mac.binary.ver', 'vignettes', 'vignetteTitles', 'hasREADME', 'hasNEWS', 'hasINSTALL', 'hasLICENSE', 'Rfiles', 'dependencyCount', 'Imports', 'Enhances', 'dependsOnMe', 'VignetteBuilder', 'suggestsMe', 'LinkingTo', 'Archs', 'URL', 'SystemRequirements', 'BugReports', 'importsMe', 'PackageStatus', 'Video', 'linksToMe', 'License_restricts_use', 'organism', 'OS_type', 'License_is_FOSS'])
Now, as for (b) the code would be along these lines:
from rpy2.robjects import pandas2ri
from rpy2.robjects import default_converter
from rpy2.robjects.conversion import localconverter, rpy2py
base = importr('base')
with localconverter(default_converter + pandas2ri.converter):
pandas_dataframe = base.as_data_frame(bioc_pkg_data_frame)
py_json_string = pandas_dataframe.to_json()
py_json_structure = json.loads(py_json_structure)
However, it does not work in this case (raising TypeError: 'NULLType' object is not iterable
), because the R data frame contains lists (e.g. in the Depends
column) and conversion of data frames with embedded lists is not yet supported by rpy2 (https://github.com/rpy2/rpy2/issues/773 and https://github.com/rpy2/rpy2/issues/860).
You can still extract a subset of the data frame that does not include list:
list_columns = []
i = 1
columns_to_keep = []
for column_name in bioc_pkg_data_frame.names:
# rx2 is equivalent of `bioc_pkg_data_frame[[column_name]]` in R
column = bioc_pkg_data_frame.rx2(column_name)
r_class = get_r_class(column)[0]
if r_class == 'list':
list_columns.append(column_name)
else:
columns_to_keep.append(i)
i += 1
# we will exclude these:
print(list_columns)
# Depends, Suggests, biocViews, Author, Maintainer, vignettes, vignetteTitles, Rfiles, Imports, Enhances, dependsOnMe, suggestsMe, LinkingTo, Archs, importsMe, linksToMe
And then get a pandas dataframe and JSON (string/structure) with:
with localconverter(default_converter + pandas2ri.converter):
pandas_dataframe = base.as_data_frame(bioc_pkg_data_frame_no_lists)
py_json_string = pandas_dataframe.to_json()
py_json_structure = json.loads(py_json_structure)
(or you could convert the lists to a concatenated string)