I am writing a Python Library in C++ using the python C Api. There I have about 25 functions, that all accept two strings. Since Python might save strings in utf8/16/32 (the moment on char requires a bigger size the whole string will use the bigger size). When checking which kind the string is you get a enum value between 0 and 4. 0/4 should be handled as utf32, 1 as utf8 and 2 as utf16. So I currently have a nested switch for each combination:
The following example shows how the elements are handled in my code. random_func
is different for each of my functions and is a template, that accepts a string_view of any type. This way to write the code results in about 100 lines of boilerplate for each function that accepts two strings.
Is there a way to handle all these cases without this immense code duplication and without sacrificing performance?
double result = 0;
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
void* s1 = PyUnicode_DATA(py_s1);
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
void* s2 = PyUnicode_DATA(py_s2);
int s1_kind = PyUnicode_KIND(py_s1);
int s2_kind = PyUnicode_KIND(py_s2);
switch (s1_kind) {
case PyUnicode_1BYTE_KIND:
switch (s2_kind) {
case PyUnicode_1BYTE_KIND:
result = random_func(
basic_string_view<char>(static_cast<char*>(s1), len_s1),
basic_string_view<char>(static_cast<char*>(s2), len_s2));
break;
case PyUnicode_2BYTE_KIND:
result = random_func(
basic_string_view<char>(static_cast<char*>(s1), len_s1),
basic_string_view<char16_t>(static_cast<char16_t*>(s2), len_s2));
break;
default:
result = random_func(
basic_string_view<char>(static_cast<char*>(s1), len_s1),
basic_string_view<char32_t>(static_cast<char32_t*>(s2), len_s2));
break;
}
break;
case PyUnicode_2BYTE_KIND:
switch (s2_kind) {
case PyUnicode_1BYTE_KIND:
result = random_func(
basic_string_view<char16_t>(static_cast<char16_t*>(s1), len_s1),
basic_string_view<char>(static_cast<char*>(s2), len_s2));
break;
case PyUnicode_2BYTE_KIND:
result = random_func(
basic_string_view<char16_t>(static_cast<char16_t*>(s1), len_s1),
basic_string_view<char16_t>(static_cast<char16_t*>(s2), len_s2));
break;
default:
result = random_func(
basic_string_view<char16_t>(static_cast<char16_t*>(s1), len_s1),
basic_string_view<char32_t>(static_cast<char32_t*>(s2), len_s2));
break;
}
break;
default:
switch (s2_kind) {
case PyUnicode_1BYTE_KIND:
result = random_func(
basic_string_view<char32_t>(static_cast<char32_t*>(s1), len_s1),
basic_string_view<char>(static_cast<char*>(s2), len_s2));
break;
case PyUnicode_2BYTE_KIND:
result = random_func(
basic_string_view<char32_t>(static_cast<char32_t*>(s1), len_s1),
basic_string_view<char16_t>(static_cast<char16_t*>(s2), len_s2));
break;
default:
result = random_func(
basic_string_view<char32_t>(static_cast<char32_t*>(s1), len_s1),
basic_string_view<char32_t>(static_cast<char32_t*>(s2), len_s2));
break;
}
break;
}
Put the complexity away in a function using variants
using python_string_view = std::variant<std::basic_string_view<char>,
std::basic_string_view<char16_t>,
std::basic_string_view<char32_t>;
python_string_view decode_python_string(python_string py_str)
{
Py_ssize_t len_s = PyUnicode_GET_LENGTH(py_str);
void* s = PyUnicode_DATA(py_str);
int s_kind = PyUnicode_KIND(py_str);
switch (s_kind) {
//return correct string_view here
}
}
int main()
{
python_string s1 = ..., s2 = ...;
auto v1 = decode_python_string(s1);
auto v2 = decode_python_string(s2);
std::visit([](auto&& val1, auto&& val2) {
random_func(val1, val2);
}, v1, v2);
}
I'm unsure about the performance though.