Search code examples
javascriptregexnode.jspromiseesprima

How to identify the following code patterns


I have a pattern of js promises that I want to identify for several keywords

For example if I put code like:

var deferred = Q.defer();

And in the file I have also the following respective value

deferred.reject(err);
deferred.resolve();
return deferred.promise;

The complete code

EXAMPLE 1

function writeError(errMessage) {
    var deferred = Q.defer();
    fs.writeFile("errors.log", errMessage, function (err) {
        if (err) {
            deferred.reject(err);
        } else {
            deferred.resolve();
        }
    });
    return deferred.promise;
}

And I want that if I put large code file (as string) to find that this file contain the pattern

Another example

var d = Q.defer(); /* or $q.defer */

And in the file you have also the following respective value

d.resolve(val);
d.reject(err); 
return d.promise;

Complete EXAMPLE 2

function getStuffDone(param) {           
    var d = Q.defer(); /* or $q.defer */ 

    Promise(function(resolve, reject) {
        // or = new $.Deferred() etc.        
        myPromiseFn(param+1)                 
        .then(function(val) { /* or .done */ 
            d.resolve(val);                  
        }).catch(function(err) { /* .fail */ 
            d.reject(err);                   
        });                                  
        return d.promise; /* or promise() */ 

}                  

There is open sources which can be used to do such analysis(provide a pattern and it will found...)

There is some more complex patters with childProcess but for now this is OK :)


Solution

  • UPDATE: I made one correction to the code, i.e. changed set[2] to set[set.length - 1] to accommodate query sets of any size. I then applied the exact same algorithm to your two examples.

    The solution I provide follows some rules that I think are reasonable for the type of search you are proposing. Assume you are looking for four lines, ABCD (case insensitive, so it will find ABCD or abcd or aBcD):

    • Multiple match sets can be found in a single file, i.e. it will find two sets in ABCDabcd.
    • Regex's are used for individual lines, meaning that variations can be included. (As only one consequence of this, it won't matter if you have a comment at the end of a matching line in your code.)
    • The patterns sought must always be on different lines, e.g. A and B can't be on the same line.
    • The matched set must be complete, e.g. it will not find ABC or ABD.
    • The matched set must be uninterrupted, i.e. it will not find anything in ABCaD. (Importantly, this also means that is will not find anything in overlapping sets, e.g. ABCaDbcd. You could argue that this is too limiting. However, in this example, which should be found, ABCD or abcd? The answer is arbitrary, and arbitrariness is difficult to code. Moreover, based on the examples you showed, such overlapping would not typically be expected, so this edge case seems unlikely, making this limitation reasonable.)
    • The matched set must be internally non-repeating, e.g. it will not find ABbCD. However, with AaBCD, it will find a set, i.e. it will find aBCD.
    • Embedded sets are allowed, but only the internal one will be found, e.g. with ABabcdCD, only abcd will be found.

    The code snippet below shows an example search. It does not demonstrate all of the edge cases. However, it does show the overall functionality.

    var queryRegexStrs = [
      "I( really)? (like|adore) strawberry",
      "I( really)? (like|adore) chocolate",
      "I( really)? (like|adore) vanilla"
    ];
    
    var codeStr =
      "....\n" +
      "Most people would say 'I like vanilla'\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "Amir's taste profile:\n" +
      "....\n" +
      "I like strawberry\n" +
      "....\n" +
      "....\n" +
      "I told Billy that I really adore chocolate a lot\n" +
      "....\n" +
      "I like vanilla most of the time\n" +
      "....\n" +
      "Let me emphasize that I like strawberry\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "Juanita's taste profile:\n" +
      "....\n" +
      "I really adore strawberry\n" +
      "I like vanilla\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "Rachel's taste profile:\n" +
      "I adore strawberry\n" +
      "....\n" +
      "Sometimes I like chocolate, I guess\n" +
      "....\n" +
      "I adore vanilla\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "";
    
    // allow for different types of end-of-line characters or character sequences
    var endOfLineStr = "\n";
    
    var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);
    
    
    
    
    
    function search(queryRegexStrs, codeStr, endOfLineStr) {
    
      // break the large code string into an array of line strings
      var codeLines = codeStr.split(endOfLineStr);
    
      // remember the number of lines being sought
      var numQueryLines = queryRegexStrs.length;
    
      // convert the input regex strings into actual regex's in a parallel array
      var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
        return new RegExp(queryRegexStr);
      });
    
      // search the array for each query line
      //   to find complete, uninterrupted, non-repeating sets of matches
    
      // make an array to hold potentially multiple match sets from the same file
      var matchSets = [];
    
      // prepare to try finding the next match set
      var currMatchSet;
    
      // keep track of which query line number is currently being sought
      var idxOfCurrQuery = 0;
    
      // whenever looking for a match set is (re-)initialized,
      //   start looking again for the first query,
      //   and forget any previous individual query matches that have been found
      var resetCurrQuery = function() {
        idxOfCurrQuery = 0;
        currMatchSet = [];
      };
    
      // check each line of code...
      codeLines.forEach(function(codeLine, codeLineNum, codeLines) {
    
        // ...against each query line
        queryRegexs.forEach(function(regex, regexNum, regexs) {
    
          // check if this line of code is a match with this query line
          var matchFound = regex.test(codeLine);
    
          // if so, remember which query line it matched
          if (matchFound) {
    
            // if this code line matches the first query line,
            //   then reset the current query and continue
            if (regexNum === 0) {
              resetCurrQuery();
            }
    
            // if this most recent individual match is the one expected next, proceed
            if (regexNum === idxOfCurrQuery) {
    
              // temporarily remember the line number of this most recent individual match
              currMatchSet.push(codeLineNum);
    
              // prepare to find the next query in the sequence
              idxOfCurrQuery += 1;
    
              // if a whole query set has just been found, then permanently remember
              //   the corresponding code line numbers, and reset the search
              if (idxOfCurrQuery === numQueryLines) {
                matchSets.push(currMatchSet);
                resetCurrQuery();
              }
    
              // if this most recent match is NOT the one expected next in the sequence,
              //   then start over in terms of starting to look again for the first query
            } else {
              resetCurrQuery();
            }
          }
        });
      });
    
      return matchSets;
    
    }
    
    
    
    
    // report the results
    document.write("<b>The code lines being sought:</b>");
    document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
    document.write("<b>The code being searched:</b>");
    document.write(
      "<pre><ol start='0'><li>" +
      codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
      "</li></ol></pre>"
    );
    document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
    document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
    document.write("<b>One possible formatted output:</b>");
    
    var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
    str += "<pre>";
    matchSets.forEach(function(set, setNum, arr) {
      str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
    });
    str += "</pre>";
    document.write(str);

    Here is the exact same algorithm, just using your original examples 1 and 2. Note a couple of things. First of all, anything that needs escaping in the regex strings actually needs double-escaping, e.g. in order to find a literal opening parenthesis you need to include "\\(" not just "\(". Also, the regex's perhaps seem a little complex. I have two comments about this. First: a lot of that is just finding the literal periods and parentheses. However, second, and importantly: the ability to use complex regex's is part of the power (read "flexibility") of this entire approach. e.g. The examples you provided required some alternation where, e.g., "a|b" means "find a OR b".

    var queryRegexStrs = [
      "var deferred = Q\\.defer\\(\\);",
      "deferred\\.reject\\(err\\);",
      "deferred\\.resolve\\(\\);",
      "return deferred\\.promise;"
    ];
    
    var codeStr =
      'function writeError(errMessage) {'                           + "\n" +
      '    var deferred = Q.defer();'                               + "\n" +
      '    fs.writeFile("errors.log", errMessage, function (err) {' + "\n" +
      '        if (err) {'                                          + "\n" +
      '            deferred.reject(err);'                           + "\n" +
      '        } else {'                                            + "\n" +
      '            deferred.resolve();'                             + "\n" +
      '        }'                                                   + "\n" +
      '    });'                                                     + "\n" +
      '    return deferred.promise;'                                + "\n" +
      '}'                                                           + "\n" +
      '';
    
    // allow for different types of end-of-line characters or character sequences
    var endOfLineStr = "\n";
    
    var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);
    
    
    
    
    
    function search(queryRegexStrs, codeStr, endOfLineStr) {
    
      // break the large code string into an array of line strings
      var codeLines = codeStr.split(endOfLineStr);
    
      // remember the number of lines being sought
      var numQueryLines = queryRegexStrs.length;
    
      // convert the input regex strings into actual regex's in a parallel array
      var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
        return new RegExp(queryRegexStr);
      });
    
      // search the array for each query line
      //   to find complete, uninterrupted, non-repeating sets of matches
    
      // make an array to hold potentially multiple match sets from the same file
      var matchSets = [];
    
      // prepare to try finding the next match set
      var currMatchSet;
    
      // keep track of which query line number is currently being sought
      var idxOfCurrQuery = 0;
    
      // whenever looking for a match set is (re-)initialized,
      //   start looking again for the first query,
      //   and forget any previous individual query matches that have been found
      var resetCurrQuery = function() {
        idxOfCurrQuery = 0;
        currMatchSet = [];
      };
    
      // check each line of code...
      codeLines.forEach(function(codeLine, codeLineNum, codeLines) {
    
        // ...against each query line
        queryRegexs.forEach(function(regex, regexNum, regexs) {
    
          // check if this line of code is a match with this query line
          var matchFound = regex.test(codeLine);
    
          // if so, remember which query line it matched
          if (matchFound) {
    
            // if this code line matches the first query line,
            //   then reset the current query and continue
            if (regexNum === 0) {
              resetCurrQuery();
            }
    
            // if this most recent individual match is the one expected next, proceed
            if (regexNum === idxOfCurrQuery) {
    
              // temporarily remember the line number of this most recent individual match
              currMatchSet.push(codeLineNum);
    
              // prepare to find the next query in the sequence
              idxOfCurrQuery += 1;
    
              // if a whole query set has just been found, then permanently remember
              //   the corresponding code line numbers, and reset the search
              if (idxOfCurrQuery === numQueryLines) {
                matchSets.push(currMatchSet);
                resetCurrQuery();
              }
    
              // if this most recent match is NOT the one expected next in the sequence,
              //   then start over in terms of starting to look again for the first query
            } else {
              resetCurrQuery();
            }
          }
        });
      });
    
      return matchSets;
    
    }
    
    
    
    
    // report the results
    document.write("<b>The code lines being sought:</b>");
    document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
    document.write("<b>The code being searched:</b>");
    document.write(
      "<pre><ol start='0'><li>" +
      codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
      "</li></ol></pre>"
    );
    document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
    document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
    document.write("<b>One possible formatted output:</b>");
    
    var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
    str += "<pre>";
    matchSets.forEach(function(set, setNum, arr) {
      str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
    });
    str += "</pre>";
    document.write(str);

    Here is the exact same algorithm, just using your original example 2:

    var queryRegexStrs = [
      "var d = (Q\\.defer\\(\\)|\\$q\\.defer);",
      "d\\.resolve\\(val\\);",
      "d\\.reject\\(err\\);",
      "return d\\.promise(\\(\\))?;"
    ];
    
    var codeStr =
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "function getStuffDone(param) {"               + "\n" +
      "    var d = Q.defer();"                       + "\n" +
      ""                                             + "\n" +
      "    Promise(function(resolve, reject) {"      + "\n" +
      "        // or = new $.Deferred() etc."        + "\n" +
      "        myPromiseFn(param+1)"                 + "\n" +
      "        .then(function(val) { /* or .done */" + "\n" +
      "            d.resolve(val);"                  + "\n" +
      "        }).catch(function(err) { /* .fail */" + "\n" +
      "            d.reject(err);"                   + "\n" +
      "        });"                                  + "\n" +
      "        return d.promise;"                    + "\n" +
      ""                                             + "\n" +
      "}"                                            + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "function getStuffDone(param) {"               + "\n" +
      "    var d = $q.defer;"                        + "\n" +
      ""                                             + "\n" +
      "    Promise(function(resolve, reject) {"      + "\n" +
      "        // or = new $.Deferred() etc."        + "\n" +
      "        myPromiseFn(param+1)"                 + "\n" +
      "        .then(function(val) { /* or .done */" + "\n" +
      "            d.resolve(val);"                  + "\n" +
      "        }).catch(function(err) { /* .fail */" + "\n" +
      "            d.reject(err);"                   + "\n" +
      "        });"                                  + "\n" +
      "        return d.promise();"                  + "\n" +
      ""                                             + "\n" +
      "}"                                            + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "";
    
    // allow for different types of end-of-line characters or character sequences
    var endOfLineStr = "\n";
    
    var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);
    
    
    
    
    
    function search(queryRegexStrs, codeStr, endOfLineStr) {
    
      // break the large code string into an array of line strings
      var codeLines = codeStr.split(endOfLineStr);
    
      // remember the number of lines being sought
      var numQueryLines = queryRegexStrs.length;
    
      // convert the input regex strings into actual regex's in a parallel array
      var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
        return new RegExp(queryRegexStr);
      });
    
      // search the array for each query line
      //   to find complete, uninterrupted, non-repeating sets of matches
    
      // make an array to hold potentially multiple match sets from the same file
      var matchSets = [];
    
      // prepare to try finding the next match set
      var currMatchSet;
    
      // keep track of which query line number is currently being sought
      var idxOfCurrQuery = 0;
    
      // whenever looking for a match set is (re-)initialized,
      //   start looking again for the first query,
      //   and forget any previous individual query matches that have been found
      var resetCurrQuery = function() {
        idxOfCurrQuery = 0;
        currMatchSet = [];
      };
    
      // check each line of code...
      codeLines.forEach(function(codeLine, codeLineNum, codeLines) {
    
        // ...against each query line
        queryRegexs.forEach(function(regex, regexNum, regexs) {
    
          // check if this line of code is a match with this query line
          var matchFound = regex.test(codeLine);
    
          // if so, remember which query line it matched
          if (matchFound) {
    
            // if this code line matches the first query line,
            //   then reset the current query and continue
            if (regexNum === 0) {
              resetCurrQuery();
            }
    
            // if this most recent individual match is the one expected next, proceed
            if (regexNum === idxOfCurrQuery) {
    
              // temporarily remember the line number of this most recent individual match
              currMatchSet.push(codeLineNum);
    
              // prepare to find the next query in the sequence
              idxOfCurrQuery += 1;
    
              // if a whole query set has just been found, then permanently remember
              //   the corresponding code line numbers, and reset the search
              if (idxOfCurrQuery === numQueryLines) {
                matchSets.push(currMatchSet);
                resetCurrQuery();
              }
    
              // if this most recent match is NOT the one expected next in the sequence,
              //   then start over in terms of starting to look again for the first query
            } else {
              resetCurrQuery();
            }
          }
        });
      });
    
      return matchSets;
    
    }
    
    
    
    
    // report the results
    document.write("<b>The code lines being sought:</b>");
    document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
    document.write("<b>The code being searched:</b>");
    document.write(
      "<pre><ol start='0'><li>" +
      codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
      "</li></ol></pre>"
    );
    document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
    document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
    document.write("<b>One possible formatted output:</b>");
    
    var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
    str += "<pre>";
    matchSets.forEach(function(set, setNum, arr) {
      str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
    });
    str += "</pre>";
    document.write(str);