Using reticulate, I'm getting data from a Python API through Interactive Brokers. I'd like to convert the data passed from my Python API to a data frame or a tibble, but I'm completely baffled about how to get it done. I also cannot figure out how to recreate the data in a reprex, so I'm just providing the text as it appears on my R console.
Here's the data that gets passed to me in the openOrders object from the Python API. It looks like this when I enter the object name at the R prompt:
> openOrders
[[1]]
Order(orderId=16, clientId=501, permId=115804563, action='Buy', totalQuantity=1.0)
[[2]]
Order(orderId=17, clientId=501, permId=115804564, action='SELL', totalQuantity=1.0)
[[3]]
Order(orderId=18, clientId=501, permId=115804565, action='SELL', totalQuantity=1.0)
When I dput openOrders and view the file, I get:
> dput(openOrders) %>% print()
list(<environment>, <environment>, <environment>)
[[1]]
Order(orderId=25, clientId=501, permId=306800005, action='Buy', totalQuantity=1.0)
[[2]]
Order(orderId=26, clientId=501, permId=306800006, action='SELL', totalQuantity=1.0)
[[3]]
Order(orderId=27, clientId=501, permId=306800007, action='SELL', totalQuantity=1.0)
R tells me the data type is a list:
> typeof(openOrders)
[1] "list"
I can access individual entries in the list:
> openOrders[1]
[[1]]
Order(orderId=16, clientId=501, permId=115804563, action='Buy', totalQuantity=1.0)
I can access individual data elements by name in the list using the following:
> openOrders[[1]]$orderId
[1] 16
When I ask for the names in the list, there are 134 of them (here's an excerpt):
> names(openOrders[[1]])
[1] "account" "action" "activeStartTime"
...
[133] "volatilityType" "whatIf"
But I cannot figure out how to get the data into a data frame or tibble. Ideally, the data frame or tibble would look like this:
> openOrders
orderId clientId permId action totalQuantity
[1] 16 501 115804563 'Buy' 1.0
[2] 17 501 115804564 'SELL' 1.0
[3] 18 501 115804565. 'SELL' 1.0
I've tried the enframe function, as suggested in another post and I get:
> enframe(openOrders)
# A tibble: 3 x 2
name value
<int> <list>
1 1 <ib_ns..O>
2 2 <ib_ns..O>
3 3 <ib_ns..O>
I also tried the following from another post and got the error:
x <- as.data.frame(do.call(rbind, openOrders))
Warning: Error in <Anonymous>: environments cannot be coerced to other types
Here's the python code:
from ib_insync import *
import pandas as pd
import numpy as np
# Identify open orders
def ibOpenOrders():
orders = ib.openOrders()
ib.sleep(0)
return (orders)
ib = IB()
And here is my code in R:
library (reticulate)
use_python("/usr/local/bin/python3.7")
source_python("iBrokersCallsReprex.py")
openOrders <- ibOpenOrders()
And to get the data for dput, I set up the data in a pandas data frame on the python side before returning it to R. I changed the python code to:
def ibOpenOrders():
orders = ib.openOrders()
ib.sleep(0)
df = util.df(orders)
return (df)
By doing that, dput returns the following:
structure(list(orderId = c(68, 69, 70), clientId = c(500, 500,
500), permId = c(306801738, 306801739, 306801740), action = c("Buy",
"SELL", "SELL"), totalQuantity = c(1, 1, 1), orderType = c("LMT",
"LMT", "STP"), lmtPrice = c(9646.25, 9656.25, 1.79769313486232e+308
), auxPrice = c(1.79769313486232e+308, 1.79769313486232e+308,
9626.25), tif = c("", "", ""), activeStartTime = c("", "", ""
), activeStopTime = c("", "", ""), ocaGroup = c("", "", ""),
ocaType = c(0, 0, 0), orderRef = c("", "", ""), transmit = c(FALSE,
FALSE, TRUE), parentId = c(0, 68, 68), blockOrder = c(FALSE,
FALSE, FALSE), sweepToFill = c(FALSE, FALSE, FALSE), displaySize = c(0,
0, 0), triggerMethod = c(0, 0, 0), outsideRth = c(FALSE,
FALSE, FALSE), hidden = c(FALSE, FALSE, FALSE), goodAfterTime = c("",
"", ""), goodTillDate = c("", "", ""), rule80A = c("", "",
""), allOrNone = c(FALSE, FALSE, FALSE), minQty = c(2147483647,
2147483647, 2147483647), percentOffset = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), overridePercentageConstraints = c(FALSE,
FALSE, FALSE), trailStopPrice = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), trailingPercent = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), faGroup = c("",
"", ""), faProfile = c("", "", ""), faMethod = c("", "",
""), faPercentage = c("", "", ""), designatedLocation = c("",
"", ""), openClose = c("O", "O", "O"), origin = c(0, 0, 0
), shortSaleSlot = c(0, 0, 0), exemptCode = c(-1, -1, -1),
discretionaryAmt = c(0, 0, 0), eTradeOnly = c(TRUE, TRUE,
TRUE), firmQuoteOnly = c(TRUE, TRUE, TRUE), nbboPriceCap = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), optOutSmartRouting = c(FALSE,
FALSE, FALSE), auctionStrategy = c(0, 0, 0), startingPrice = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), stockRefPrice = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), delta = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), stockRangeLower = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), stockRangeUpper = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), randomizePrice = c(FALSE,
FALSE, FALSE), randomizeSize = c(FALSE, FALSE, FALSE), volatility = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), volatilityType = c(2147483647,
2147483647, 2147483647), deltaNeutralOrderType = c("", "",
""), deltaNeutralAuxPrice = c(1.79769313486232e+308, 1.79769313486232e+308,
1.79769313486232e+308), deltaNeutralConId = c(0, 0, 0), deltaNeutralSettlingFirm = c("",
"", ""), deltaNeutralClearingAccount = c("", "", ""), deltaNeutralClearingIntent = c("",
"", ""), deltaNeutralOpenClose = c("", "", ""), deltaNeutralShortSale = c(FALSE,
FALSE, FALSE), deltaNeutralShortSaleSlot = c(0, 0, 0), deltaNeutralDesignatedLocation = c("",
"", ""), continuousUpdate = c(FALSE, FALSE, FALSE), referencePriceType = c(2147483647,
2147483647, 2147483647), basisPoints = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), basisPointsType = c(2147483647,
2147483647, 2147483647), scaleInitLevelSize = c(2147483647,
2147483647, 2147483647), scaleSubsLevelSize = c(2147483647,
2147483647, 2147483647), scalePriceIncrement = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), scalePriceAdjustValue = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), scalePriceAdjustInterval = c(2147483647,
2147483647, 2147483647), scaleProfitOffset = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), scaleAutoReset = c(FALSE,
FALSE, FALSE), scaleInitPosition = c(2147483647, 2147483647,
2147483647), scaleInitFillQty = c(2147483647, 2147483647,
2147483647), scaleRandomPercent = c(FALSE, FALSE, FALSE),
scaleTable = c("", "", ""), hedgeType = c("", "", ""), hedgeParam = c("",
"", ""), account = c("", "", ""), settlingFirm = c("", "",
""), clearingAccount = c("", "", ""), clearingIntent = c("",
"", ""), algoStrategy = c("", "", ""), algoParams = list(
list(), list(), list()), smartComboRoutingParams = list(
list(), list(), list()), algoId = c("", "", ""), whatIf = c(FALSE,
FALSE, FALSE), notHeld = c(FALSE, FALSE, FALSE), solicited = c(FALSE,
FALSE, FALSE), modelCode = c("", "", ""), orderComboLegs = list(
list(), list(), list()), orderMiscOptions = list(list(),
list(), list()), referenceContractId = c(0, 0, 0), peggedChangeAmount = c(0,
0, 0), isPeggedChangeAmountDecrease = c(FALSE, FALSE, FALSE
), referenceChangeAmount = c(0, 0, 0), referenceExchangeId = c("",
"", ""), adjustedOrderType = c("", "", ""), triggerPrice = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), adjustedStopPrice = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), adjustedStopLimitPrice = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), adjustedTrailingAmount = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), adjustableTrailingUnit = c(0,
0, 0), lmtPriceOffset = c(1.79769313486232e+308, 1.79769313486232e+308,
1.79769313486232e+308), conditions = list(list(), list(),
list()), conditionsCancelOrder = c(FALSE, FALSE, FALSE
), conditionsIgnoreRth = c(FALSE, FALSE, FALSE), extOperator = c("",
"", ""), softDollarTier = list(<environment>, <environment>,
<environment>), cashQty = c(1.79769313486232e+308, 1.79769313486232e+308,
1.79769313486232e+308), mifid2DecisionMaker = c("", "", ""
), mifid2DecisionAlgo = c("", "", ""), mifid2ExecutionTrader = c("",
"", ""), mifid2ExecutionAlgo = c("", "", ""), dontUseAutoPriceForHedge = c(FALSE,
FALSE, FALSE), isOmsContainer = c(FALSE, FALSE, FALSE), discretionaryUpToLimitPrice = c(FALSE,
FALSE, FALSE), autoCancelDate = c("", "", ""), filledQuantity = c(1.79769313486232e+308,
1.79769313486232e+308, 1.79769313486232e+308), refFuturesConId = c(0,
0, 0), autoCancelParent = c(FALSE, FALSE, FALSE), shareholder = c("",
"", ""), imbalanceOnly = c(FALSE, FALSE, FALSE), routeMarketableToBbo = c(FALSE,
FALSE, FALSE), parentPermId = c(0, 0, 0), usePriceMgmtAlgo = c(FALSE,
FALSE, FALSE)), class = "data.frame", row.names = c(NA, -3L
), pandas.index = <environment>)
On the R side, here is the structure of openOrders
> str(openOrders)
tibble [9 × 130] (S3: tbl_df/tbl/data.frame)
$ orderId : num [1:9] 140 141 142 133 134 132 148 149 150
$ clientId : num [1:9] 500 500 500 500 500 500 500 500 500
$ permId : num [1:9] 1.78e+09 1.78e+09 1.78e+09 1.78e+09 1.78e+09 ...
$ action : chr [1:9] "BUY" "SELL" "SELL" "SELL" ...
$ isPeggedChangeAmountDecrease : logi [1:9] FALSE FALSE FALSE FALSE FALSE FALSE ...
[list output truncated]
- attr(*, "pandas.index")=RangeIndex(start=0, stop=9, step=1)
Here's what I get when I print the pandas data frame on the python side:
def ibOpenOrders():
openOrders = ib.openOrders()
ib.sleep(0)
#print (openOrders.head())
df = util.df(openOrders)
print (df.head())
orderId clientId ... parentPermId usePriceMgmtAlgo
0 13 400 ... 0 False
1 14 400 ... 0 False
2 12 400 ... 0 False
3 7 400 ... 0 False
4 5 400 ... 0 False
And, here's what I get when I print just the softDollarTier attribute on the python side:
print (openOrders.softDollarTier)
[18 rows x 130 columns]
0 SoftDollarTier(name='', val='', displayName='')
1 SoftDollarTier(name='', val='', displayName='')
2 SoftDollarTier(name='', val='', displayName='')
3 SoftDollarTier(name='', val='', displayName='')
And here's what I get on the R side for that same attribute when I access it directly.
> head(openOrders$softDollarTier)
[[1]]
SoftDollarTier(name='', val='', displayName='')
[[2]]
SoftDollarTier(name='', val='', displayName='')
[[3]]
SoftDollarTier(name='', val='', displayName='')
Any ideas?
As discussed, as long as the Python method returns an actual Pandas data frame, reticulate
will convert to R data frame. To demonstrate with reproducible example of various atomic types:
Python
import numpy as np
import pandas as pd
alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
data_tools = ['sas', 'stata', 'spss', 'python', 'r', 'julia']
### DATA BUILD
def build_py_df():
np.random.seed(6520)
random_df = pd.DataFrame({'group': np.random.choice(data_tools, 500),
'int': np.random.randint(1, 10, 500),
'num': np.random.randn(500),
'char': [''.join(np.random.choice(list(alpha), 3)) for _ in range(500)],
'bool': np.random.choice([True, False], 500),
'date': np.random.choice(pd.date_range('2000-01-01', '2019-05-31'), 500)
})
return random_df
df = build_py_df()
print(df.head(10))
Output
# group int num char bool date
# 0 r 8 -0.604529 eNR True 2008-09-01
# 1 stata 7 0.875878 0G9 True 2004-07-13
# 2 spss 4 -0.857370 mrH False 2017-11-29
# 3 stata 6 -2.144899 MFj True 2003-03-03
# 4 stata 3 -0.408117 Gsh True 2008-11-28
# 5 stata 2 1.324790 gR0 True 2004-04-15
# 6 julia 6 0.682228 jhR True 2004-09-18
# 7 python 6 -0.993106 cqT True 2002-03-27
# 8 julia 5 -0.346687 GfC True 2007-04-30
# 9 r 7 0.925665 d1a True 2006-01-01
R
library (reticulate)
source_python("/path/to/Python/script.py")
py_df <- build_py_df()
head(py_df, 10)
Output
# group int num char bool date
# 1 r 8 -0.6045292 eNR TRUE 2008-09-01
# 2 stata 7 0.8758784 0G9 TRUE 2004-07-13
# 3 spss 4 -0.8573697 mrH FALSE 2017-11-29
# 4 stata 6 -2.1448990 MFj TRUE 2003-03-03
# 5 stata 3 -0.4081175 Gsh TRUE 2008-11-28
# 6 stata 2 1.3247895 gR0 TRUE 2004-04-15
# 7 julia 6 0.6822280 jhR TRUE 2004-09-18
# 8 python 6 -0.9931057 cqT TRUE 2002-03-27
# 9 julia 5 -0.3466866 GfC TRUE 2007-04-30
# 10 r 7 0.9256647 d1a TRUE 2006-01-01
For meta data
str(py_df)
# 'data.frame': 500 obs. of 6 variables:
# $ group: chr "r" "stata" "spss" "stata" ...
# $ int : num 8 7 4 6 3 2 6 6 5 7 ...
# $ num : num -0.605 0.876 -0.857 -2.145 -0.408 ...
# $ char : chr "eNR" "0G9" "mrH" "MFj" ...
# $ bool : logi TRUE TRUE FALSE TRUE TRUE TRUE ...
# $ date : POSIXct, format: "2008-09-01" "2004-07-13" "2017-11-29" "2003-03-03" ...
# - attr(*, "pandas.index")=RangeIndex(start=0, stop=500, step=1)
attributes(py_df)
# $`names`
# [1] "group" "int" "num" "char" "bool" "date"
# $class
# [1] "data.frame"
# $row.names
# [1] 1 2 3 4 5 6
# SHOW ALL ATTRIBUTES
# $pandas.index
# RangeIndex(start=0, stop=500, step=1)
attributes(py_df)$pandas.index
# RangeIndex(start=0, stop=500, step=1)