I am trying to create a dictionary from a piece of structured text, but I can't wrap my head around the correct syntax.
text = 'english (fluently), spanish (poorly)'
# desired output:
{english: fluently, spanish: poorly}
# one of my many attempts:
dict((language,proficiency.strip('\(\)')) for language,proficiency in lp.split(' ') for lp in text.split(', '))
# but resulting error:
NameError: name 'lp' is not defined
I guess that lp in lp.split(' ') is not defined, but I can't figure out, how to rework the syntax to get the desired result.
In reality, the scenario is more complex. I have a dataframe and I am aiming to eventually use a function to tidy the aforementioned data into columns for each individual language and columns for each corresponding proficiency. Something like below (although it probably can be done way more efficiently)
# pandas dataframe
pd.DataFrame({'language': ['english, spanish (poorly)', 'turkish']})
# desired output:
pd.DataFrame({'Language: English': [True, False], 'Language proficiency: English': ['average', pd.NA], 'Language: Spanish': [True, False], 'Language proficiency: Spanish': ['poorly', pd.NA], 'Language: Turkish': [False, True], 'Language proficiency: Turkish': [pd.NA, 'average']})
# my attempt
def tidy(content):
if pd.isna(content):
pass
else:
dict((language,proficiency.strip('\(\)')) for language,proficiency in lp.split(' ') for lp in text.split(', '))
def tidy_language(language, content):
if pd.isna(content):
return pd.NA
else:
if language in content.keys():
return True
else:
return False
def tidy_proficiency(language, content):
if pd.isna(content):
return pd.NA
else:
if language in content.keys():
return content.language
else:
return pd.NA
languages = ['english', 'spanish', 'turkish']
df['language'] = df['language'].map(lambda x: tidy(x))
for language in languages:
df['Language: {}'.format(language.capitalize())] = df['language'].map(lambda x: tidy_language(language, content)
df['Language proficiency: {}'.format(language.capitalize())] = df['language'].map(lambda x: tidy_proficiency(language, content)
Here is a quick solution. Feed the text to the function.
def text_to_dict(text):
text=text+" "
new=""
for alphabet in text:
if alphabet=="," or alphabet=="(" or alphabet==")":
continue;
new+=alphabet
lis=[]
temp=""
for alphabet in new:
if alphabet==" ":
if temp[0]==" ":
temp=temp[1:len(temp)]
lis.append(temp)
temp=""
temp+=alphabet
dict={}
for el in lis:
if lis.index(el)%2==0:
dict[el]=lis[lis.index(el)+1]
return dict
if __name__=="__main__":
text="english (fluently), spanish (poorly), bangla (fluently)"
print(text_to_dict(text))