I am new to CUDA and I am getting a strange error. I want to print a string from a passed object and I get the error "calling host function from global function is not allowed" and I don't know why. But if I want to print an integer (changing get method to return sk1), everything works fine. Here is the code:
class Duomenys {
private:
string simb;
int sk1;
double sk2;
public:
__device__ __host__ Duomenys(void): simb(""), sk1(0), sk2(0.0) {}
__device__ __host__~Duomenys() {}
__device__ __host__ Duomenys::Duomenys(string simb1, int sk11, double sk21)
: simb(simb1), sk1(sk11), sk2(sk21) {}
__device__ __host__ string Duomenys::get(){
return simb;
}
};
And here I am calling Duomenys::get from __global__ function:
__global__ void Vec_add(Duomenys a) {
printf(" %s \n",a.get());
}
EDIT: I am trying to read data from a file and print it in a global function. In this code I am trying read all data and print just one object to see if everything works. This is the error I'm getting:
calling a __host__ function("std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string") from a __global__ function("Vec_add") is not allowed
Code:
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
#include <sstream>
using namespace std;
class Duomenys {
private:
string simb;
int sk1;
double sk2;
public:
__device__ __host__ Duomenys(void): simb(""), sk1(0), sk2(0.0) {}
__device__ __host__~Duomenys() {}
__device__ __host__ Duomenys::Duomenys(string simb1, int sk11, double sk21)
: simb(simb1), sk1(sk11), sk2(sk21) {}
__device__ __host__ string Duomenys::print()
{
stringstream ss;
ss << left << setw(10) << simb << setw(10) << sk1 << setw(10) << sk2;
return ss.str();
}
};
__global__ void Vec_add(Duomenys a) {
printf(" %s \n",a.print());
}
/* Host code */
int main(int argc, char* argv[]) {
setlocale (LC_ALL,"");
vector<Duomenys> vienas;
vector<vector<Duomenys>> visi;
//data reading to vector "vienas" (it works without any errors)
Duomenys *darr;
const size_t sz = size_t(2) * sizeof(Duomenys);
cudaMalloc((void**)&darr, sz);
Vec_add<<<1, 1>>>(visi[0].at(0));
cudaDeviceSynchronize();
cudaMemcpy(darr, &visi[0].at(0), sz, cudaMemcpyHostToDevice);
return 0;
}
Why would you pass a string
object to printf
when the %s
format specifier is expecting something else? When I try to do that in ordinary host code, I get warnings about "passing non-POD types through ellipsis (call will abort at runtime)". Note that this problem has nothing to do with CUDA.
But beyond that issue, presumably you're getting string
from the C++ standard library. (It's better if you show a complete reproducer code, then I don't have to guess at where you're getting things or what you are including.)
If I get string
as follows:
#include <string>
using namespace std;
Then I am using a function defined in the C++ Standard Library. CUDA supports the C++ language (mostly) but does not necessarily support usage of C++ libraries (or C libraries, for that matter) in device code. Libraries are (usually) composed of (at least some) compiled code (such as allocators, in this case), and this code has been compiled for CPUs, not for the GPU. When you try to use such a CPU compiled routine (e.g. an allocator associated with the string
class) in device code, the compiler will bark at you. If you include the complete error message in the question, it will be more obvious specifically what (compiled-for-the-host) function is actually the issue.
Use a standard C style string instead (i.e. char[]
and you will be able to use it directly in printf
.
EDIT: In response to a question in the comments, here is a modified version of the code posted that demonstrates how to use an ordinary C-style string (i.e. char[]
) and print from it in device code.
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
#include <sstream>
#define STRSZ 32
using namespace std;
class Duomenys {
private:
char simb[STRSZ];
int sk1;
double sk2;
public:
__device__ __host__ Duomenys(void): sk1(0), sk2(0.0) {}
__device__ __host__~Duomenys() {}
__device__ __host__ Duomenys(char *simb1, int sk11, double sk21)
: sk1(sk11), sk2(sk21) {}
__device__ __host__ char * print()
{
return simb;
}
__device__ __host__ void store_str(const char *str)
{
for (int i=0; i< STRSZ; i++)
simb[i] = str[i];
}
};
__global__ void Vec_add(Duomenys a) {
printf(" %s \n",a.print());
}
/* Host code */
int main(int argc, char* argv[]) {
string host_string("hello\n");
setlocale (LC_ALL,"");
vector<Duomenys> vienas(3);
vienas[0].store_str(host_string.c_str());
vector<vector<Duomenys> > visi(3);
visi[0] = vienas;
//data reading to vector "vienas" (it works without any errors)
Duomenys *darr;
const size_t sz = size_t(2) * sizeof(Duomenys);
cudaMalloc((void**)&darr, sz);
Vec_add<<<1, 1>>>(visi[0].at(0));
cudaDeviceSynchronize();
cudaMemcpy(darr, &(visi[0].at(0)), sz, cudaMemcpyHostToDevice);
return 0;
}
Note that I didn't try to understand your code or fix everything that looked strange to me. However this should demonstrate one possible approach.