I have a dump file of XGBoost tree structure trained in Python. The structure has 377 trees, and file has approximately 50,000 lines. I would like to convert this structure to MQL4 code, or C code so to say. The text file looks something like this:
booster[0]:
0:[inp0<6.85417] yes=1,no=2,missing=1
1:[inp10<1.00054] yes=3,no=4,missing=3
3:[inp21<0.974632] yes=7,no=8,missing=7
7:[inp22<1.01021] yes=15,no=16,missing=15
15:[inp15<0.994931] yes=31,no=32,missing=31
31:[inp12<0.999151] yes=63,no=64,missing=63
63:[inp23<0.957624] yes=111,no=112,missing=111
111:leaf=0.163636
112:leaf=-0.36
64:leaf=0.323077
32:[inp19<0.993949] yes=65,no=66,missing=65
65:[inp23<0.931146] yes=113,no=114,missing=113
113:leaf=-0
114:[inp23<0.972193] yes=161,no=162,missing=161
161:leaf=-0.421782
162:leaf=-0.133333
66:[inp2<61] yes=115,no=116,missing=115
115:leaf=0.381818
116:leaf=-0.388235
16:[inp17<0.985065] yes=33,no=34,missing=33
33:leaf=-0.381818
34:[inp23<0.946341] yes=67,no=68,missing=67
67:leaf=-0.36
68:[inp12<1.00121] yes=117,no=118,missing=117
117:[inp19<0.989751] yes=163,no=164,missing=163
163:leaf=0.367742
164:leaf=-0.0666667
118:[inp0<4.29167] yes=165,no=166,missing=165
165:leaf=-0
166:leaf=-0.3
8:[inp11<0.999875] yes=17,no=18,missing=17
17:[inp7<134] yes=35,no=36,missing=35
35:[inp9<62] yes=69,no=70,missing=69
69:[inp8<26] yes=119,no=120,missing=119
119:[inp23<0.993382] yes=167,no=168,missing=167
167:leaf=-0.211765
168:leaf=0.27
120:[inp21<0.989946] yes=169,no=170,missing=169
169:leaf=-0.392308
170:leaf=-0.161421
70:[inp17<0.997] yes=121,no=122,missing=121
121:[inp13<0.999021] yes=171,no=172,missing=171
171:leaf=-0.0378947
172:leaf=-0.340541
122:[inp11<0.9986] yes=173,no=174,missing=173
173:leaf=-0.2
174:leaf=0.0857143
36:[inp8<154] yes=71,no=72,missing=71
71:[inp8<132] yes=123,no=124,missing=123
123:[inp2<123] yes=175,no=176,missing=175
175:leaf=0.0277635
176:leaf=-0.132584
124:[inp4<170] yes=177,no=178,missing=177
177:leaf=0.269725
178:leaf=0.0618557
72:[inp2<26] yes=125,no=126,missing=125
125:[inp9<123] yes=179,no=180,missing=179
179:leaf=-0.224742
180:leaf=-0
126:[inp2<60] yes=181,no=182,missing=181
181:leaf=0.0330435
182:leaf=-0.0703448
18:[inp15<0.999742] yes=37,no=38,missing=37
37:[inp14<1.00044] yes=73,no=74,missing=73
73:[inp2<73] yes=127,no=128,missing=127
127:[inp16<1.00107] yes=183,no=184,missing=183
183:leaf=-0.36
184:leaf=0.0666667
128:[inp11<0.999936] yes=185,no=186,missing=185
185:leaf=0.4
186:leaf=-0.0666667
74:[inp3<371] yes=129,no=130,missing=129
129:leaf=0.494118
130:leaf=0.12
38:[inp23<0.997023] yes=75,no=76,missing=75
75:[inp20<1.00221] yes=131,no=132,missing=131
131:leaf=0.163636
132:[inp13<1.00015] yes=187,no=188,missing=187
187:leaf=-0.371429
188:leaf=0.0666667
76:leaf=0.3
4:[inp19<0.978746] yes=9,no=10,missing=9
9:[inp6<260] yes=19,no=20,missing=19
19:[inp3<405] yes=39,no=40,missing=39
39:[inp20<0.998759] yes=77,no=78,missing=77
77:leaf=0.0545455
78:[inp2<206] yes=133,no=134,missing=133
133:[inp9<217] yes=189,no=190,missing=189
189:leaf=-0.485714
190:leaf=-0.12
134:[inp0<5.39583] yes=191,no=192,missing=191
191:leaf=-0.24
192:leaf=0.3
40:[inp0<5.875] yes=79,no=80,missing=79
79:leaf=0.36
80:leaf=-0.15
20:[inp9<87] yes=41,no=42,missing=41
41:[inp15<0.99573] yes=81,no=82,missing=81
81:[inp4<272] yes=135,no=136,missing=135
135:leaf=0.381818
136:leaf=-0
82:[inp13<0.999781] yes=137,no=138,missing=137
137:leaf=-0.42
138:leaf=0.0545455
42:[inp3<199] yes=83,no=84,missing=83
83:leaf=0.458824
84:leaf=-0.0666667
10:[inp18<1.01862] yes=21,no=22,missing=21
21:[inp16<1.00397] yes=43,no=44,missing=43
43:[inp22<1.03335] yes=85,no=86,missing=85
85:[inp5<474] yes=139,no=140,missing=139
139:[inp19<0.998419] yes=193,no=194,missing=193
193:leaf=0.0538108
194:leaf=0.190909
140:[inp4<164] yes=195,no=196,missing=195
195:leaf=0.1125
196:leaf=-0.278351
86:[inp17<0.994249] yes=141,no=142,missing=141
141:[inp5<154] yes=197,no=198,missing=197
197:leaf=-0.0568421
198:leaf=-0.377778
142:[inp22<1.03873] yes=199,no=200,missing=199
199:leaf=-0.135484
200:leaf=0.1584
44:[inp15<0.999235] yes=87,no=88,missing=87
87:[inp21<0.9739] yes=143,no=144,missing=143
143:[inp3<106] yes=201,no=202,missing=201
201:leaf=-0.272727
202:leaf=0.410526
144:[inp14<1.00351] yes=203,no=204,missing=203
203:leaf=-0.0146652
204:leaf=0.155556
88:[inp21<0.999884] yes=145,no=146,missing=145
145:[inp22<1.04426] yes=205,no=206,missing=205
205:leaf=-0.0905588
206:leaf=0.105263
146:[inp4<313] yes=207,no=208,missing=207
207:leaf=0.182927
208:leaf=-0.253846
22:[inp18<1.01903] yes=45,no=46,missing=45
45:[inp0<3.60417] yes=89,no=90,missing=89
89:leaf=-0
90:leaf=0.471429
46:[inp18<1.01953] yes=91,no=92,missing=91
91:[inp2<32] yes=147,no=148,missing=147
147:[inp21<0.997154] yes=209,no=210,missing=209
209:leaf=-0.3
210:leaf=0.333333
148:[inp21<0.99536] yes=211,no=212,missing=211
211:leaf=-0.0666667
212:leaf=-0.45
92:[inp16<1.01109] yes=149,no=150,missing=149
149:[inp2<137] yes=213,no=214,missing=213
213:leaf=0.305085
214:leaf=-0.0923077
150:[inp4<117] yes=215,no=216,missing=215
215:leaf=0.294118
216:leaf=-0.0375
2:[inp5<183] yes=5,no=6,missing=5
5:[inp6<187] yes=11,no=12,missing=11
11:[inp13<1.00025] yes=23,no=24,missing=23
23:[inp18<1.0069] yes=47,no=48,missing=47
47:[inp20<1.00403] yes=93,no=94,missing=93
93:[inp23<0.975704] yes=151,no=152,missing=151
151:leaf=-0
152:leaf=-0.45
94:[inp23<0.990095] yes=153,no=154,missing=153
153:[inp2<28] yes=217,no=218,missing=217
217:leaf=-0
218:leaf=0.4
154:leaf=-0.2
48:leaf=-0.485714
24:[inp3<205] yes=49,no=50,missing=49
49:leaf=0.3
50:leaf=-0
12:[inp6<258] yes=25,no=26,missing=25
25:[inp10<1.00079] yes=51,no=52,missing=51
51:[inp22<1.03732] yes=95,no=96,missing=95
95:leaf=0.5
96:leaf=-0
52:leaf=-0.0666667
26:[inp9<52] yes=53,no=54,missing=53
53:leaf=0.375
54:[inp15<0.998562] yes=97,no=98,missing=97
97:leaf=-0.410526
98:[inp9<92] yes=155,no=156,missing=155
155:[inp3<120] yes=219,no=220,missing=219
219:leaf=-0
220:leaf=-0.428571
156:[inp8<275] yes=221,no=222,missing=221
221:leaf=0.44
222:leaf=-0.0545455
6:[inp10<1.00118] yes=13,no=14,missing=13
13:[inp4<366] yes=27,no=28,missing=27
27:[inp23<0.998109] yes=55,no=56,missing=55
55:[inp15<0.999976] yes=99,no=100,missing=99
99:[inp17<0.994571] yes=157,no=158,missing=157
157:[inp12<1.00049] yes=223,no=224,missing=223
223:leaf=-0.458824
224:leaf=-0.128571
158:[inp3<33] yes=225,no=226,missing=225
225:leaf=-0.12
226:leaf=-0.552381
100:[inp11<0.999604] yes=159,no=160,missing=159
159:leaf=0.12
160:leaf=-0.36
56:[inp18<1.00668] yes=101,no=102,missing=101
101:leaf=0.333333
102:leaf=-0.342857
28:[inp7<81] yes=57,no=58,missing=57
57:leaf=0.3
58:[inp9<20] yes=103,no=104,missing=103
103:leaf=0.0666667
104:leaf=-0.388235
14:[inp19<0.992859] yes=29,no=30,missing=29
29:[inp11<0.999532] yes=59,no=60,missing=59
59:leaf=0.415385
60:[inp1<5] yes=105,no=106,missing=105
105:leaf=-0.2
106:leaf=0.15
30:[inp3<227] yes=61,no=62,missing=61
61:[inp2<126] yes=107,no=108,missing=107
107:leaf=-0.461538
108:leaf=-0
62:[inp0<6.9375] yes=109,no=110,missing=109
109:leaf=0.272727
110:leaf=-0.15
booster[1]:
0:[...
There are 24 inputs as inp0, inp1, ..., inp23
.
On conditions of these inputs being smaller than some thresholds a booster[0]
probability is set.
There are 377 booster trees ( booster[0], ... , booster[376] )
.
The condition numbers do not follow the same sequence in different boosters.
Missing parameter is irrelevant as I do not have any missing input values.
I would like to get all 377 booster probabilities and sum them up to get an overall sum, and then feed it to a logistic function to calculate the final output probability. Logistic function is:
1 / ( 1 + exp( -sum ) )
The C code I would like to get is something like that:
if ( inp0 < 6.85417 && inp10 < 1.00054 ... ) booster[0] = 0.163636;
if ( ...
Does anyone have any idea on how to get this text file and output as C code as easily as possible, either in Python or C++?
Good news is that it is now possible to turn your sklearn models (including XGBoost) to popular programming languages' native code with m2cgen – Model To Code Generator library in Python. The advantage here is that you can deploy your ML model on any platform because it would consist of basic if statements, mathematical operations or functions which are available in every programming language. Platform independent deployment of ML models is a very strong capability for a data scientist. In our case it is as easy as:
import m2cgen as m2c
code = m2c.export_to_c(xgb_model) #xgb_model is your trained XGBoost model with .fit method
#write the code to a text file
text_file = open("xgb_c.txt", "w")
text_file.write("C code for the XGB structure: %s" % code)
text_file.close()
And the converted C code looks something like this:
void score(double * input, double * output) {
double var0;
if ((input[0]) >= (6.8125)) {
if ((input[15]) >= (1.00002789)) {
if ((input[8]) >= (127)) {
if ((input[11]) >= (0.999750614)) {
if ((input[7]) >= (252)) {
var0 = -0.226666674;
.
.
.
} else {
var376 = -0.0343097448;
}
double var377;
var377 = (1) / ((1) + (exp((0) - ((((((((-0.0) + ((((((((((((((((((((((((((((((((((((((((var0) + (var1)) + (var2)) + (var3)) + (var4)) + (var5)) + (var6)) + (var7)) + (var8)) + (var9)) + (var10)) + (var11)) + (var12)) + (var13)) + (var14)) + (var15)) + (var16)) + (var17)) + (var18)) + (var19)) + (var20)) + (var21)) + (var22)) + (var23)) + (var24)) + (var25)) + (var26)) + (var27)) + (var28)) + (var29)) + (var30)) + (var31)) + (var32)) + (var33)) + (var34)) + (var35)) + (var36)) + (var37)) + (var38)) + (var39))) + ((((((((((((((((((((((((((((((((((((((((((var40) + (var41)) + (var42)) + (var43)) + (var44)) + (var45)) + (var46)) + (var47)) + (var48)) + (var49)) + (var50)) + (var51)) + (var52)) + (var53)) + (var54)) + (var55)) + (var56)) + (var57)) + (var58)) + (var59)) + (var60)) + (var61)) + (var62)) + (var63)) + (var64)) + (var65)) + (var66)) + (var67)) + (var68)) + (var69)) + (var70)) + (var71)) + (var72)) + (var73)) + (var74)) + (var75)) + (var76)) + (var77)) + (var78)) + (var79)) + (var80)) + (var81))) + (((((((((((((((((((((((((((((((((((((((((((((((((((((((var82) + (var83)) + (var84)) + (var85)) + (var86)) + (var87)) + (var88)) + (var89)) + (var90)) + (var91)) + (var92)) + (var93)) + (var94)) + (var95)) + (var96)) + (var97)) + (var98)) + (var99)) + (var100)) + (var101)) + (var102)) + (var103)) + (var104)) + (var105)) + (var106)) + (var107)) + (var108)) + (var109)) + (var110)) + (var111)) + (var112)) + (var113)) + (var114)) + (var115)) + (var116)) + (var117)) + (var118)) + (var119)) + (var120)) + (var121)) + (var122)) + (var123)) + (var124)) + (var125)) + (var126)) + (var127)) + (var128)) + (var129)) + (var130)) + (var131)) + (var132)) + (var133)) + (var134)) + (var135)) + (var136))) + ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((var137) + (var138)) + (var139)) + (var140)) + (var141)) + (var142)) + (var143)) + (var144)) + (var145)) + (var146)) + (var147)) + (var148)) + (var149)) + (var150)) + (var151)) + (var152)) + (var153)) + (var154)) + (var155)) + (var156)) + (var157)) + (var158)) + (var159)) + (var160)) + (var161)) + (var162)) + (var163)) + (var164)) + (var165)) + (var166)) + (var167)) + (var168)) + (var169)) + (var170)) + (var171)) + (var172)) + (var173)) + (var174)) + (var175)) + (var176)) + (var177)) + (var178)) + (var179)) + (var180)) + (var181)) + (var182)) + (var183)) + (var184)) + (var185)) + (var186)) + (var187)) + (var188)) + (var189)) + (var190)) + (var191)) + (var192)) + (var193)) + (var194))) + ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((var195) + (var196)) + (var197)) + (var198)) + (var199)) + (var200)) + (var201)) + (var202)) + (var203)) + (var204)) + (var205)) + (var206)) + (var207)) + (var208)) + (var209)) + (var210)) + (var211)) + (var212)) + (var213)) + (var214)) + (var215)) + (var216)) + (var217)) + (var218)) + (var219)) + (var220)) + (var221)) + (var222)) + (var223)) + (var224)) + (var225)) + (var226)) + (var227)) + (var228)) + (var229)) + (var230)) + (var231)) + (var232)) + (var233)) + (var234)) + (var235)) + (var236)) + (var237)) + (var238)) + (var239)) + (var240)) + (var241)) + (var242)) + (var243)) + (var244)) + (var245)) + (var246)) + (var247)) + (var248)) + (var249)) + (var250)) + (var251)) + (var252)) + (var253)) + (var254)) + (var255)) + (var256)) + (var257)) + (var258)) + (var259)) + (var260)) + (var261)) + (var262))) + (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((var263) + (var264)) + (var265)) + (var266)) + (var267)) + (var268)) + (var269)) + (var270)) + (var271)) + (var272)) + (var273)) + (var274)) + (var275)) + (var276)) + (var277)) + (var278)) + (var279)) + (var280)) + (var281)) + (var282)) + (var283)) + (var284)) + (var285)) + (var286)) + (var287)) + (var288)) + (var289)) + (var290)) + (var291)) + (var292)) + (var293)) + (var294)) + (var295)) + (var296)) + (var297)) + (var298)) + (var299)) + (var300)) + (var301)) + (var302)) + (var303)) + (var304)) + (var305)) + (var306)) + (var307)) + (var308)) + (var309)) + (var310)) + (var311)) + (var312)) + (var313)) + (var314)) + (var315)) + (var316)) + (var317)) + (var318)) + (var319)) + (var320)) + (var321)) + (var322)) + (var323)) + (var324)) + (var325)) + (var326)) + (var327)) + (var328)) + (var329)) + (var330)) + (var331)) + (var332)) + (var333)) + (var334)) + (var335)) + (var336)) + (var337)) + (var338)) + (var339)) + (var340)) + (var341)) + (var342)) + (var343))) + (((((((((((((((((((((((((((((((((var344) + (var345)) + (var346)) + (var347)) + (var348)) + (var349)) + (var350)) + (var351)) + (var352)) + (var353)) + (var354)) + (var355)) + (var356)) + (var357)) + (var358)) + (var359)) + (var360)) + (var361)) + (var362)) + (var363)) + (var364)) + (var365)) + (var366)) + (var367)) + (var368)) + (var369)) + (var370)) + (var371)) + (var372)) + (var373)) + (var374)) + (var375)) + (var376))))));
memcpy(output, (double[]){(1) - (var377), var377}, 2 * sizeof(double));
}
Which can be translated into MQL4 language fairly easily.