Search code examples
c++trie

concatenate string record of trie path


I'd like to concatenate a record of the path a struct takes through a trie - only recording when a non-matching path is used, here is my trie search function:

void Trie::suggestCorrectionsHelper(nodeT *w, int index, string record, string final, double edits, Trie::MatchesT &matchSet, string gap_record, int level)
{   
    if (record.empty() || w == NULL ) {//tried everyChar
        matchSet.candidate = final+record;
        if ( matchSet.candidate.length() == record_delimiterFormat.size() ) { //check length and record exists
            updateSet(w, matchSet, edits,  gap_record );
        }
    } else {//look for end of node paths
        if(index == w->alpha.size() ) { return; //no more paths here
        } else {            
            //CASE A: self
            if (record[0] == w->alpha[index].token_char ) {//replace char with self so no edit distance
                suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits, matchSet, gap_record, level+1); //follow char, no edit

            //CASE B: follow new path
            } else {
                //CASE B1: wilds
                if (record[0] == '*' && (final.length() + record.length() ) != record_delimiterFormat.size() ) { 
                    suggestCorrectionsHelper(w->alpha[index].next, 0, '*'+record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+=(IntegerToString(level)+","), level+1); //wilds, keep adding chars

                //CASE B2: no wilds
                } else {
                    suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+=(IntegerToString(level)+","), level+1);//follow path
                }
            }
            //CASE C: try next path - place outside else to allow finding alternative char even when self path is valid
            suggestCorrectionsHelper(w, index+1, record, final, edits, matchSet, gap_record, level);//increment and check next path
        }
    }
}

Once a path is completed it is added to:

//write to struct and then adds to struct vector 
void Trie::updateSet(nodeT *w, Trie::MatchesT & matchSet, double &edits, string gap_record)
{   
    CorrectionT cs; // struct with attributes of match n
    cs.suggestedRecord = matchSet.candidate; // add attributes to struct
    cs.editDistance = edits;
    cs.gap = gap_record;
    matchSet.candidateStack.push_back(cs);
}

I expected an out put looking like 0,1,3,5,12

But I get 0,0,0,0,0,0,2,2,2,3,3,4,12

It cannot go through the same level more than one, so through the recursion I am recording the paths, I want to record each path individually; how?

The trie class looks like this:

void Trie::AddFirstNode(){ // run-once, initial condition of first node
    nodeT *tempNode = new nodeT;
    root = tempNode;
}

void Trie::InsertNode(nodeT *w, ParseT & packet, string codeSoFar) // add new char
{
    for (unsigned int i = 0; i <= w->alpha.size(); i++) { // loop and insert tokens in sorted vector
        if (i == w->alpha.size() || codeSoFar[0] < w->alpha[i].token_char) { //position 

            //create new TokenT
            tokenT *tempChar = new tokenT;
            tempChar->next = NULL;
            tempChar->token_char = codeSoFar[0];

            nodeT *tempLeaf = new nodeT; //create new nodeT
            tempChar->next = tempLeaf; //link TokenT with its nodeT

            AddRecord(tempLeaf, packet, codeSoFar.substr(1)); //add next char in record, if last char AddRecord will terminate

            w->alpha.insert(w->alpha.begin()+i, *tempChar);
            return; 
        }
    }
}


//function to add record to trie
void Trie::AddRecord(nodeT *w, ParseT & packet, string codeSoFar)
{
    if (codeSoFar.empty() ) {   //condition 0: record's last char
        return;

    } else { //keep parsing down record path
        for (unsigned int i = 0; i < w->alpha.size(); i++) {    
            if (codeSoFar[0] == w->alpha[i].token_char) {
                return AddRecord(w->alpha[i].next, packet, codeSoFar.substr(1)); // condition 2: char exists
            }
        }
        InsertNode(w, packet, codeSoFar); //condition 3: no existing char
    }
}

The struct

struct MatchesT {
    string candidate;

    double testDistance; //try edit distance

    list<CorrectionT> candidateStack; //
};

the output

void PrintCorrections(Trie & tri, Trie::MatchesT & matchSet, ofstream& myfile)
{
while (!matchSet.candidateStack.empty() ) {
count++;
Trie::CorrectionT corr = matchSet.candidateStack.back();
matchSet.candidateStack.pop_back();

myfile << corr.suggestedRecord << ","  << fixed << setprecision(2) << corr.editDistance << "," << corr.gap << "\n";
}

Solution

  • i found a better solution, rather than add the concatenate as += then pass as a parameter, just pass the parameter as gap_record+ ; the the current node memory is preserved and the next node on the path has the concatenate.

    void Trie::suggestCorrectionsHelper(nodeT *w, int index, string record, string final, double edits, Trie::MatchesT &matchSet, string gap_record, int level)
    {   
        if (record.empty() || w == NULL ) {//tried everyChar
            matchSet.candidate = final+record;
            if ( matchSet.candidate.length() == record_delimiterFormat.size() ) { //check length and record exists
                updateSet(w, matchSet, edits,  gap_record );
            }
        } else {//look for end of node paths
            if(index == w->alpha.size() ) { return; //no more paths here
            } else {            
                //CASE A: self
                if (record[0] == w->alpha[index].token_char ) {//replace char with self so no edit distance
                    suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits, matchSet, gap_record, level+1); //follow char, no edit
    
                //CASE B: follow new path
                } else {
                    //CASE B1: wilds
                    if (record[0] == '*' && (final.length() + record.length() ) != record_delimiterFormat.size() ) { 
                        suggestCorrectionsHelper(w->alpha[index].next, 0, '*'+record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+(IntegerToString(level)+","), level+1); //wilds, keep adding chars
    
                    //CASE B2: no wilds
                    } else {
                        suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+(IntegerToString(level)+","), level+1);//follow path
                    }
                }
                //CASE C: try next path - place outside else to allow finding alternative char even when self path is valid
                suggestCorrectionsHelper(w, index+1, record, final, edits, matchSet, gap_record, level);//increment and check next path
            }
        }
    }