I am trying to reproduce a C++
code that I found here about the LevenshteinDistance
.
More precisely, I am trying to reproduce the part starting by
static int LevenshteinDistance(string s, string t)
until
return d[n, m];
}
However, I am experiencing an error with Math.Min(Math.Min(
. I am not sure what is the translation in rcpp
. I tried with min
but it doesn't seem to work. Any clue ?
// [[Rcpp::export]]
NumericVector LevenshteinDistance(NumericVector s, NumericVector t) {
int n = s.size();
int m = t.size();
NumericMatrix d(n+1, m+1);
if (n == 0)
{
return m;
}
if (m == 0){
return n;
}
for (int i = 0; i < n; i++)
d(i, 0) = i;
for (int j = 0; j < m; j++)
d(0, j) = j;
for (int j = 1; j < m; j++)
for (int i = 1; i < n; i++)
if (s(i - 1) == t(j - 1))
d(i, j) = d(i - 1, j - 1); //no operation
else
d(i, j) = min(
d(i - 1, j) + 1, //a deletion
d(i, j - 1) + 1), //an insertion
d(i - 1, j - 1) + 1 //a substitution
);
return d(n, m);
}
So with
d(i, j) = min(
I get this error message : no matching function
.
Three issues really:
You have a typo
d(i, j) = min(
d(i - 1, j) + 1,
d(i, j - 1) + 1), //typo here with )
d(i - 1, j - 1) + 1
)
The use of std::min()
is defined to be between two values e.g. std::min(obj1,obj2)
whereas you are aiming use min()
in an R
way.
So,
min(c(1,2,3,4))
But, C++ definition of std::min requires it to be written as:
std::min(1, std::min(2, std::min(3,4)));
string
notion of LevenshteinDistance
, then you need to switch how the values are entering your function away from the NumericVector
type to any one of the following: std::string
, std::vector<std::string>
(see Gallery Post: Strings with Rcpp), Rcpp::StringVector
(see Gallery Post: Working with Rcpp StringVector), or Rcpp::CharacterVector
. The following should be sufficient:
#include <Rcpp.h>
// [[Rcpp::export]]
int LevenshteinDistance(std::string s,
std::string t) {
// Number of elements
int n = s.size();
int m = t.size();
Rcpp::IntegerMatrix d(n+1, m+1);
Rcpp::Rcout << "n:" << n << ", m:" << m << std::endl;
if (n == 0){
return m;
}
if (m == 0){
return n;
}
for (int i = 0; i <= n; i++){
d(i, 0) = i;
}
// No sense to revisit the (0,0) coordinate
for (int j = 1; j <= m; j++){
d(0, j) = j;
}
for (int j = 1; j <= m; j++){
for (int i = 1; i <= n; i++){
if (s[i - 1] == t[j - 1]){
d(i, j) = d(i - 1, j - 1); // no operation
} else {
d(i, j) = std::min(d(i - 1, j) + 1, //a deletion
std::min(d(i, j - 1) + 1, //an insertion
d(i - 1, j - 1) + 1)); //a substitution
} // end if
} // end inner for
} // end outer for
return d(n, m);
}
With this function being written, we can opt to write a quick wrapper to process different dimensions of comparisons such multiple sources to a target and so on...
// [[Rcpp::export]]
Rcpp::IntegerVector LevenshteinDistance_Vector(std::vector<std::string> s,
std::string t){
// Number of Sources
unsigned int sn = s.size();
Rcpp::IntegerVector o(sn); // populates automatically with 0.
for(unsigned int i = 0; i < sn; i++){
o(i) = LevenshteinDistance(s[i],t);
}
return o;
}