Search code examples
pythoncxgboostalgorithmic-tradingmql4

Converting XGBoost tree structure dump file to MQL4 (C like language) code


I have a dump file of XGBoost tree structure trained in Python. The structure has 377 trees, and file has approximately 50,000 lines. I would like to convert this structure to MQL4 code, or C code so to say. The text file looks something like this:

booster[0]:
0:[inp0<6.85417] yes=1,no=2,missing=1
1:[inp10<1.00054] yes=3,no=4,missing=3
    3:[inp21<0.974632] yes=7,no=8,missing=7
        7:[inp22<1.01021] yes=15,no=16,missing=15
            15:[inp15<0.994931] yes=31,no=32,missing=31
                31:[inp12<0.999151] yes=63,no=64,missing=63
                    63:[inp23<0.957624] yes=111,no=112,missing=111
                        111:leaf=0.163636
                        112:leaf=-0.36
                    64:leaf=0.323077
                32:[inp19<0.993949] yes=65,no=66,missing=65
                    65:[inp23<0.931146] yes=113,no=114,missing=113
                        113:leaf=-0
                        114:[inp23<0.972193] yes=161,no=162,missing=161
                            161:leaf=-0.421782
                            162:leaf=-0.133333
                    66:[inp2<61] yes=115,no=116,missing=115
                        115:leaf=0.381818
                        116:leaf=-0.388235
            16:[inp17<0.985065] yes=33,no=34,missing=33
                33:leaf=-0.381818
                34:[inp23<0.946341] yes=67,no=68,missing=67
                    67:leaf=-0.36
                    68:[inp12<1.00121] yes=117,no=118,missing=117
                        117:[inp19<0.989751] yes=163,no=164,missing=163
                            163:leaf=0.367742
                            164:leaf=-0.0666667
                        118:[inp0<4.29167] yes=165,no=166,missing=165
                            165:leaf=-0
                            166:leaf=-0.3
        8:[inp11<0.999875] yes=17,no=18,missing=17
            17:[inp7<134] yes=35,no=36,missing=35
                35:[inp9<62] yes=69,no=70,missing=69
                    69:[inp8<26] yes=119,no=120,missing=119
                        119:[inp23<0.993382] yes=167,no=168,missing=167
                            167:leaf=-0.211765
                            168:leaf=0.27
                        120:[inp21<0.989946] yes=169,no=170,missing=169
                            169:leaf=-0.392308
                            170:leaf=-0.161421
                    70:[inp17<0.997] yes=121,no=122,missing=121
                        121:[inp13<0.999021] yes=171,no=172,missing=171
                            171:leaf=-0.0378947
                            172:leaf=-0.340541
                        122:[inp11<0.9986] yes=173,no=174,missing=173
                            173:leaf=-0.2
                            174:leaf=0.0857143
                36:[inp8<154] yes=71,no=72,missing=71
                    71:[inp8<132] yes=123,no=124,missing=123
                        123:[inp2<123] yes=175,no=176,missing=175
                            175:leaf=0.0277635
                            176:leaf=-0.132584
                        124:[inp4<170] yes=177,no=178,missing=177
                            177:leaf=0.269725
                            178:leaf=0.0618557
                    72:[inp2<26] yes=125,no=126,missing=125
                        125:[inp9<123] yes=179,no=180,missing=179
                            179:leaf=-0.224742
                            180:leaf=-0
                        126:[inp2<60] yes=181,no=182,missing=181
                            181:leaf=0.0330435
                            182:leaf=-0.0703448
            18:[inp15<0.999742] yes=37,no=38,missing=37
                37:[inp14<1.00044] yes=73,no=74,missing=73
                    73:[inp2<73] yes=127,no=128,missing=127
                        127:[inp16<1.00107] yes=183,no=184,missing=183
                            183:leaf=-0.36
                            184:leaf=0.0666667
                        128:[inp11<0.999936] yes=185,no=186,missing=185
                            185:leaf=0.4
                            186:leaf=-0.0666667
                    74:[inp3<371] yes=129,no=130,missing=129
                        129:leaf=0.494118
                        130:leaf=0.12
                38:[inp23<0.997023] yes=75,no=76,missing=75
                    75:[inp20<1.00221] yes=131,no=132,missing=131
                        131:leaf=0.163636
                        132:[inp13<1.00015] yes=187,no=188,missing=187
                            187:leaf=-0.371429
                            188:leaf=0.0666667
                    76:leaf=0.3
    4:[inp19<0.978746] yes=9,no=10,missing=9
        9:[inp6<260] yes=19,no=20,missing=19
            19:[inp3<405] yes=39,no=40,missing=39
                39:[inp20<0.998759] yes=77,no=78,missing=77
                    77:leaf=0.0545455
                    78:[inp2<206] yes=133,no=134,missing=133
                        133:[inp9<217] yes=189,no=190,missing=189
                            189:leaf=-0.485714
                            190:leaf=-0.12
                        134:[inp0<5.39583] yes=191,no=192,missing=191
                            191:leaf=-0.24
                            192:leaf=0.3
                40:[inp0<5.875] yes=79,no=80,missing=79
                    79:leaf=0.36
                    80:leaf=-0.15
            20:[inp9<87] yes=41,no=42,missing=41
                41:[inp15<0.99573] yes=81,no=82,missing=81
                    81:[inp4<272] yes=135,no=136,missing=135
                        135:leaf=0.381818
                        136:leaf=-0
                    82:[inp13<0.999781] yes=137,no=138,missing=137
                        137:leaf=-0.42
                        138:leaf=0.0545455
                42:[inp3<199] yes=83,no=84,missing=83
                    83:leaf=0.458824
                    84:leaf=-0.0666667
        10:[inp18<1.01862] yes=21,no=22,missing=21
            21:[inp16<1.00397] yes=43,no=44,missing=43
                43:[inp22<1.03335] yes=85,no=86,missing=85
                    85:[inp5<474] yes=139,no=140,missing=139
                        139:[inp19<0.998419] yes=193,no=194,missing=193
                            193:leaf=0.0538108
                            194:leaf=0.190909
                        140:[inp4<164] yes=195,no=196,missing=195
                            195:leaf=0.1125
                            196:leaf=-0.278351
                    86:[inp17<0.994249] yes=141,no=142,missing=141
                        141:[inp5<154] yes=197,no=198,missing=197
                            197:leaf=-0.0568421
                            198:leaf=-0.377778
                        142:[inp22<1.03873] yes=199,no=200,missing=199
                            199:leaf=-0.135484
                            200:leaf=0.1584
                44:[inp15<0.999235] yes=87,no=88,missing=87
                    87:[inp21<0.9739] yes=143,no=144,missing=143
                        143:[inp3<106] yes=201,no=202,missing=201
                            201:leaf=-0.272727
                            202:leaf=0.410526
                        144:[inp14<1.00351] yes=203,no=204,missing=203
                            203:leaf=-0.0146652
                            204:leaf=0.155556
                    88:[inp21<0.999884] yes=145,no=146,missing=145
                        145:[inp22<1.04426] yes=205,no=206,missing=205
                            205:leaf=-0.0905588
                            206:leaf=0.105263
                        146:[inp4<313] yes=207,no=208,missing=207
                            207:leaf=0.182927
                            208:leaf=-0.253846
            22:[inp18<1.01903] yes=45,no=46,missing=45
                45:[inp0<3.60417] yes=89,no=90,missing=89
                    89:leaf=-0
                    90:leaf=0.471429
                46:[inp18<1.01953] yes=91,no=92,missing=91
                    91:[inp2<32] yes=147,no=148,missing=147
                        147:[inp21<0.997154] yes=209,no=210,missing=209
                            209:leaf=-0.3
                            210:leaf=0.333333
                        148:[inp21<0.99536] yes=211,no=212,missing=211
                            211:leaf=-0.0666667
                            212:leaf=-0.45
                    92:[inp16<1.01109] yes=149,no=150,missing=149
                        149:[inp2<137] yes=213,no=214,missing=213
                            213:leaf=0.305085
                            214:leaf=-0.0923077
                        150:[inp4<117] yes=215,no=216,missing=215
                            215:leaf=0.294118
                            216:leaf=-0.0375
2:[inp5<183] yes=5,no=6,missing=5
    5:[inp6<187] yes=11,no=12,missing=11
        11:[inp13<1.00025] yes=23,no=24,missing=23
            23:[inp18<1.0069] yes=47,no=48,missing=47
                47:[inp20<1.00403] yes=93,no=94,missing=93
                    93:[inp23<0.975704] yes=151,no=152,missing=151
                        151:leaf=-0
                        152:leaf=-0.45
                    94:[inp23<0.990095] yes=153,no=154,missing=153
                        153:[inp2<28] yes=217,no=218,missing=217
                            217:leaf=-0
                            218:leaf=0.4
                        154:leaf=-0.2
                48:leaf=-0.485714
            24:[inp3<205] yes=49,no=50,missing=49
                49:leaf=0.3
                50:leaf=-0
        12:[inp6<258] yes=25,no=26,missing=25
            25:[inp10<1.00079] yes=51,no=52,missing=51
                51:[inp22<1.03732] yes=95,no=96,missing=95
                    95:leaf=0.5
                    96:leaf=-0
                52:leaf=-0.0666667
            26:[inp9<52] yes=53,no=54,missing=53
                53:leaf=0.375
                54:[inp15<0.998562] yes=97,no=98,missing=97
                    97:leaf=-0.410526
                    98:[inp9<92] yes=155,no=156,missing=155
                        155:[inp3<120] yes=219,no=220,missing=219
                            219:leaf=-0
                            220:leaf=-0.428571
                        156:[inp8<275] yes=221,no=222,missing=221
                            221:leaf=0.44
                            222:leaf=-0.0545455
    6:[inp10<1.00118] yes=13,no=14,missing=13
        13:[inp4<366] yes=27,no=28,missing=27
            27:[inp23<0.998109] yes=55,no=56,missing=55
                55:[inp15<0.999976] yes=99,no=100,missing=99
                    99:[inp17<0.994571] yes=157,no=158,missing=157
                        157:[inp12<1.00049] yes=223,no=224,missing=223
                            223:leaf=-0.458824
                            224:leaf=-0.128571
                        158:[inp3<33] yes=225,no=226,missing=225
                            225:leaf=-0.12
                            226:leaf=-0.552381
                    100:[inp11<0.999604] yes=159,no=160,missing=159
                        159:leaf=0.12
                        160:leaf=-0.36
                56:[inp18<1.00668] yes=101,no=102,missing=101
                    101:leaf=0.333333
                    102:leaf=-0.342857
            28:[inp7<81] yes=57,no=58,missing=57
                57:leaf=0.3
                58:[inp9<20] yes=103,no=104,missing=103
                    103:leaf=0.0666667
                    104:leaf=-0.388235
        14:[inp19<0.992859] yes=29,no=30,missing=29
            29:[inp11<0.999532] yes=59,no=60,missing=59
                59:leaf=0.415385
                60:[inp1<5] yes=105,no=106,missing=105
                    105:leaf=-0.2
                    106:leaf=0.15
            30:[inp3<227] yes=61,no=62,missing=61
                61:[inp2<126] yes=107,no=108,missing=107
                    107:leaf=-0.461538
                    108:leaf=-0
                62:[inp0<6.9375] yes=109,no=110,missing=109
                    109:leaf=0.272727
                    110:leaf=-0.15
booster[1]:
0:[...

There are 24 inputs as inp0, inp1, ..., inp23.

On conditions of these inputs being smaller than some thresholds a booster[0] probability is set.

There are 377 booster trees ( booster[0], ... , booster[376] ).

The condition numbers do not follow the same sequence in different boosters.

Missing parameter is irrelevant as I do not have any missing input values.

I would like to get all 377 booster probabilities and sum them up to get an overall sum, and then feed it to a logistic function to calculate the final output probability. Logistic function is:

1 / ( 1 + exp( -sum ) )

The C code I would like to get is something like that:

if ( inp0 < 6.85417 && inp10 < 1.00054 ... ) booster[0] = 0.163636;
if ( ...

Does anyone have any idea on how to get this text file and output as C code as easily as possible, either in Python or C++?


Solution

  • Good news is that it is now possible to turn your sklearn models (including XGBoost) to popular programming languages' native code with m2cgen – Model To Code Generator library in Python. The advantage here is that you can deploy your ML model on any platform because it would consist of basic if statements, mathematical operations or functions which are available in every programming language. Platform independent deployment of ML models is a very strong capability for a data scientist. In our case it is as easy as:

    import m2cgen as m2c
    
    code = m2c.export_to_c(xgb_model) #xgb_model is your trained XGBoost model with .fit method
    #write the code to a text file
    text_file = open("xgb_c.txt", "w")
    text_file.write("C code for the XGB structure: %s" % code)
    text_file.close()
    

    And the converted C code looks something like this:

    void score(double * input, double * output) {
        double var0;
        if ((input[0]) >= (6.8125)) {
            if ((input[15]) >= (1.00002789)) {
                if ((input[8]) >= (127)) {
                    if ((input[11]) >= (0.999750614)) {
                        if ((input[7]) >= (252)) {
                            var0 = -0.226666674;
    .
    .
    .
    } else {
            var376 = -0.0343097448;
        }
        double var377;
        var377 = (1) / ((1) + (exp((0) - ((((((((-0.0) + ((((((((((((((((((((((((((((((((((((((((var0) + (var1)) + (var2)) + (var3)) + (var4)) + (var5)) + (var6)) + (var7)) + (var8)) + (var9)) + (var10)) + (var11)) + (var12)) + (var13)) + (var14)) + (var15)) + (var16)) + (var17)) + (var18)) + (var19)) + (var20)) + (var21)) + (var22)) + (var23)) + (var24)) + (var25)) + (var26)) + (var27)) + (var28)) + (var29)) + (var30)) + (var31)) + (var32)) + (var33)) + (var34)) + (var35)) + (var36)) + (var37)) + (var38)) + (var39))) + ((((((((((((((((((((((((((((((((((((((((((var40) + (var41)) + (var42)) + (var43)) + (var44)) + (var45)) + (var46)) + (var47)) + (var48)) + (var49)) + (var50)) + (var51)) + (var52)) + (var53)) + (var54)) + (var55)) + (var56)) + (var57)) + (var58)) + (var59)) + (var60)) + (var61)) + (var62)) + (var63)) + (var64)) + (var65)) + (var66)) + (var67)) + (var68)) + (var69)) + (var70)) + (var71)) + (var72)) + (var73)) + (var74)) + (var75)) + (var76)) + (var77)) + (var78)) + (var79)) + (var80)) + (var81))) + (((((((((((((((((((((((((((((((((((((((((((((((((((((((var82) + (var83)) + (var84)) + (var85)) + (var86)) + (var87)) + (var88)) + (var89)) + (var90)) + (var91)) + (var92)) + (var93)) + (var94)) + (var95)) + (var96)) + (var97)) + (var98)) + (var99)) + (var100)) + (var101)) + (var102)) + (var103)) + (var104)) + (var105)) + (var106)) + (var107)) + (var108)) + (var109)) + (var110)) + (var111)) + (var112)) + (var113)) + (var114)) + (var115)) + (var116)) + (var117)) + (var118)) + (var119)) + (var120)) + (var121)) + (var122)) + (var123)) + (var124)) + (var125)) + (var126)) + (var127)) + (var128)) + (var129)) + (var130)) + (var131)) + (var132)) + (var133)) + (var134)) + (var135)) + (var136))) + ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((var137) + (var138)) + (var139)) + (var140)) + (var141)) + (var142)) + (var143)) + (var144)) + (var145)) + (var146)) + (var147)) + (var148)) + (var149)) + (var150)) + (var151)) + (var152)) + (var153)) + (var154)) + (var155)) + (var156)) + (var157)) + (var158)) + (var159)) + (var160)) + (var161)) + (var162)) + (var163)) + (var164)) + (var165)) + (var166)) + (var167)) + (var168)) + (var169)) + (var170)) + (var171)) + (var172)) + (var173)) + (var174)) + (var175)) + (var176)) + (var177)) + (var178)) + (var179)) + (var180)) + (var181)) + (var182)) + (var183)) + (var184)) + (var185)) + (var186)) + (var187)) + (var188)) + (var189)) + (var190)) + (var191)) + (var192)) + (var193)) + (var194))) + ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((var195) + (var196)) + (var197)) + (var198)) + (var199)) + (var200)) + (var201)) + (var202)) + (var203)) + (var204)) + (var205)) + (var206)) + (var207)) + (var208)) + (var209)) + (var210)) + (var211)) + (var212)) + (var213)) + (var214)) + (var215)) + (var216)) + (var217)) + (var218)) + (var219)) + (var220)) + (var221)) + (var222)) + (var223)) + (var224)) + (var225)) + (var226)) + (var227)) + (var228)) + (var229)) + (var230)) + (var231)) + (var232)) + (var233)) + (var234)) + (var235)) + (var236)) + (var237)) + (var238)) + (var239)) + (var240)) + (var241)) + (var242)) + (var243)) + (var244)) + (var245)) + (var246)) + (var247)) + (var248)) + (var249)) + (var250)) + (var251)) + (var252)) + (var253)) + (var254)) + (var255)) + (var256)) + (var257)) + (var258)) + (var259)) + (var260)) + (var261)) + (var262))) + (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((var263) + (var264)) + (var265)) + (var266)) + (var267)) + (var268)) + (var269)) + (var270)) + (var271)) + (var272)) + (var273)) + (var274)) + (var275)) + (var276)) + (var277)) + (var278)) + (var279)) + (var280)) + (var281)) + (var282)) + (var283)) + (var284)) + (var285)) + (var286)) + (var287)) + (var288)) + (var289)) + (var290)) + (var291)) + (var292)) + (var293)) + (var294)) + (var295)) + (var296)) + (var297)) + (var298)) + (var299)) + (var300)) + (var301)) + (var302)) + (var303)) + (var304)) + (var305)) + (var306)) + (var307)) + (var308)) + (var309)) + (var310)) + (var311)) + (var312)) + (var313)) + (var314)) + (var315)) + (var316)) + (var317)) + (var318)) + (var319)) + (var320)) + (var321)) + (var322)) + (var323)) + (var324)) + (var325)) + (var326)) + (var327)) + (var328)) + (var329)) + (var330)) + (var331)) + (var332)) + (var333)) + (var334)) + (var335)) + (var336)) + (var337)) + (var338)) + (var339)) + (var340)) + (var341)) + (var342)) + (var343))) + (((((((((((((((((((((((((((((((((var344) + (var345)) + (var346)) + (var347)) + (var348)) + (var349)) + (var350)) + (var351)) + (var352)) + (var353)) + (var354)) + (var355)) + (var356)) + (var357)) + (var358)) + (var359)) + (var360)) + (var361)) + (var362)) + (var363)) + (var364)) + (var365)) + (var366)) + (var367)) + (var368)) + (var369)) + (var370)) + (var371)) + (var372)) + (var373)) + (var374)) + (var375)) + (var376))))));
        memcpy(output, (double[]){(1) - (var377), var377}, 2 * sizeof(double));
    }
    

    Which can be translated into MQL4 language fairly easily.