Search code examples
cpostgresqlhpc

Need a fast method to convert large amount of double to string


I am writing a results output module for a high speed computing program.

My plan is:

  1. My task is to insert the results into database (PostgreSQL) with relative fast speed.
  2. I use [COPY FROM STDIN] of libpq, which I was told as the fastest way.
  3. The method requires converting the results into char* format.

While the results looks like this:

  1. Monthly CashFlow for next 106 years (1272 double in Total).
  2. Around 14 CashFlows for each entiry.
  3. Around 2800 entities (2790 for tested data).

And the table in database looks like this:

  1. Each row of the table contains one entity.
  2. There are some prefix to identify different entities.
  3. CashFlows are double array that follows the prefix (float8[] type in PGSQL).

The following present the code for creating the table in database:

create table AgentCF(
PlanID     int4,
Agent      int4,
Senario    int4,
RM_Prev    float8[], DrvFac_Cur float8[], Prem       float8[],
Comm       float8[], CommOR     float8[], FixExp     float8[],
VarExp     float8[], CIRCFee    float8[], SaftyFund  float8[],
Surr       float8[], Benefit_1  float8[], Benefit_2  float8[],
Benefit_3  float8[], Benefit_4  float8[], Benefit_5  float8[],
Benefit_6  float8[], Benefit_7  float8[], Benefit_8  float8[],
Benefit_9  float8[], Benefit_10 float8[]
);

Presenting code for function that prepare the inserted CashFlow:

void AsmbCF(char *buffer, int size, int ProdNo, int i, int Pos, int LineEnd)
{
    int     j, Step = sizeof(nodecf) / sizeof(double), PosST, Temp;
    double *LoopRate = &AllHeap[ProdNo].Heap.AgentRes[i].CF.NodeCF[0].Prem;
    strcpy_s(buffer, size, "{");
    for (j = 0; j < TOTLEN / 10; j++) {
        PosST = j * 10 * Step + Pos;
        sprintf_s(&buffer[strlen(buffer)], size - strlen(buffer), "%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,",
            LoopRate[PosST],
            LoopRate[PosST + 1 * Step],
            LoopRate[PosST + 2 * Step],
            LoopRate[PosST + 3 * Step],
            LoopRate[PosST + 4 * Step],
            LoopRate[PosST + 5 * Step],
            LoopRate[PosST + 6 * Step],
            LoopRate[PosST + 7 * Step],
            LoopRate[PosST + 8 * Step],
            LoopRate[PosST + 9 * Step]
        );
    }
    Temp = j * 10;
    PosST = Temp * Step + Pos;
    sprintf_s(&buffer[strlen(buffer)], size - strlen(buffer), "%f", LoopRate[PosST]);
    Temp = Temp + 1;
    for (j = Temp; j < TOTLEN; j++) {
        PosST = j * Step + Pos;
        sprintf_s(&buffer[strlen(buffer)], size - strlen(buffer), ",%f", LoopRate[PosST]);
    }
    if (LineEnd) {
        strcat_s(buffer, size, "}\n");
    }
    else {
        strcat_s(buffer, size, "}\t");
    }
}

The following are the code for speed testing:

void ThreadOutP(LPVOID pM)
{
    char       *buffer = malloc(BUFFLEN), sql[SQLLEN];
    int         Status, ProdNo = (int)pM, i, j, ben;
    PGconn     *conn = NULL;
    PGresult   *res;
    clock_t     begin, end;

    fprintf_s(fpOutP, "PlanID %d Start inseting...\n", AllHeap[ProdNo].PlanID);
    begin = clock();
    DBConn(&conn, CONNSTR, fpOutP);

#pragma region General cashflow
    //============================== Data Query ==============================
    //strcpy_s(&sql[0], SQLLEN, "COPY AgentCF(PlanID,Agent,Senario,Prem,Comm,CommOR,CIRCFee,SaftyFund,FixExp,VarExp,Surr");
    //for (ben = 1; ben <= AllHeap[ProdNo].Heap.TotNo.NoBenft; ben++) {
    //  strcat_s(&sql[0], SQLLEN, ",Benefit_");
    //  _itoa_s(ben, &sql[strlen(sql)], sizeof(sql) - strlen(sql), 10);
    //}
    //strcat_s(&sql[0], SQLLEN, ") FROM STDIN;");
    //res = PQexec(conn, &sql[0]);
    //if (PQresultStatus(res) != PGRES_COPY_IN) {
    //  fprintf_s(fpOutP, "Not in COPY_IN mode\n");
    //}
    //PQclear(res);
    //============================== Data Apply ==============================
    for (i = 0; i < AllHeap[ProdNo].MaxAgntPos + AllHeap[ProdNo].Heap.TotNo.NoSensi; i++) {
        sprintf_s(buffer, BUFFLEN, "%d\t%d\t%d\t", AllHeap[ProdNo].PlanID, AllHeap[ProdNo].Heap.AgentRes[i].Agent, AllHeap[ProdNo].Heap.AgentRes[i].Sensi);
        //Status = PQputCopyData(conn, buffer, (int)strlen(buffer));
        //if (1 != Status) {
        //  fprintf_s(fpOutP, "PlanID %d inserting error for agent %d\n", AllHeap[ProdNo].PlanID, AllHeap[ProdNo].Heap.AgentRes[i].Agent);
        //}
        for (j = 0; j < 8 + AllHeap[ProdNo].Heap.TotNo.NoBenft; j++) {
            if (j == 7 + AllHeap[ProdNo].Heap.TotNo.NoBenft) {
                AsmbCF(buffer, BUFFLEN, ProdNo, i, j, 1);
            }
            else {
                AsmbCF(buffer, BUFFLEN, ProdNo, i, j, 0);
            }
            //Status = PQputCopyData(conn, buffer, (int)strlen(buffer));
            //if (1 != Status) {
            //  fprintf_s(fpOutP, "PlanID %d inserting error for agent %d\n", AllHeap[ProdNo].PlanID, AllHeap[ProdNo].Heap.AgentRes[i].Agent);
            //}
        }
    }
    //Status = PQputCopyEnd(conn, NULL);
#pragma endregion

#pragma region K cashflow

#pragma endregion

    PQfinish(conn);
    FreeProd(ProdNo);
    free(buffer);
    end = clock();
    fprintf_s(fpOutP, "PlanID %d inserted, total %d rows inserted, %d millisecond cost\n", AllHeap[ProdNo].PlanID, i, end - begin);
    AllHeap[ProdNo].Printed = 1;
}

Please note that I disable the code that involving inserting.

The tested results are:

  1. Cost of only assembling the string is 45930 millisecond.
  2. Cost of assembling the string and insert is 54829 millisecond.

So the most of the cost lie on converting double to char.

Therefore I would like to ask if there is a faster way of converting series of double into string, because compare to the calculation cost, the bottleneck actually is the outputting of results.

By the way, my platform is Windows 10, PostgreSQL 11, Visual Studio 2017.

Thanks a lot!


Solution

  • fast method to convert large amount of double to string

    For full double range application, use sprintf(buf, "%a", some_double). If decimal output required, use "%e".

    Any other code will only be faster if it is comprises accuracy or allowable input range somehow.

    The usual approach is to convert double x to some wide scaled integer and convert that to a string. This implies limitations on x not clearly expressed yet by OP.

    Even if some other approaches appears faster, it may not be faster as code evolves or is ported.


    What OP needs to post is speed test code for objective performance assessment.