I am trying to create a pie chart for every column and compare it between two users, and when I run my code I get the error which is:
ValueError: x must be 1D
I am not sure what it really means, I tried to look up it but could not find anything useful, Any guidance on how can I solve this problem. Thank you!!!!
# Aggregate dataframe
summarized_df = df.groupby(['user']).agg({ 'favorite_counts': 'sum', # Tweets overview
'retweet_counts': 'sum',
'is_positive': 'sum',
'is_negative': 'sum',
'tweets_length': 'sum', # Tweets writing style
'tweets_uppercase': 'sum',
'tweets_punctuations': 'sum',
'tweets_questionmark': 'sum',
'is_norp': 'sum', # Tweets detailed writing style
'is_time': 'sum',
'is_org': 'sum',
'is_gpe': 'sum',
'is_loc': 'sum',
'is_product': 'sum',
'is_workart': 'sum',
'is_fac': 'sum',
'is_noun': 'sum',
'is_pron': 'sum',
'is_adv': 'sum',
'is_propn': 'sum',
'is_verb': 'sum',
'is_intj': 'sum' }).reset_index()
# Seperate dataframe
Tim_summarized_df = summarized_df[summarized_df.user == 'Timothy Caulfield'].drop('user', axis = 1).copy()
Keran_summarized_df = summarized_df[summarized_df.user == 'Dr. Karen James'].drop('user', axis = 1).copy()
# Get columns
summarized_cols = Tim_summarized_df.columns
# Create features for each round in list format
round_1_cols = summarized_cols[:5]
round_2_cols = summarized_cols[5:10]
round_3_cols = summarized_cols[10:15]
round_4_cols = summarized_cols[15:20]
round_5_cols = summarized_cols[20:22]
# Combine all lists into one list
temp_list = [round_1_cols, round_2_cols, round_3_cols, round_4_cols, round_5_cols]
# Dictionary to rename the titles
rep_title_dict = { 'favorite_counts': 'Tweets Likes',
'retweet_counts': 'Re-Tweets',
'is_positive': 'Positivity',
'is_negative': 'Negativity',
'tweets_length': 'Length of Tweets',
'tweets_uppercase': 'Uppercase Characters Used',
'tweets_punctuations': 'Punctuations Used',
'tweets_questionmark': 'Questionmark Used',
'is_norp': 'Nationalities | Religious | Political Groups',
'is_time': 'Mentioned Time Related',
'is_org': 'Corporate | Governmental',
'is_gpe': 'Countries | Cities | States',
'is_loc': 'Location Mentioned',
'is_product': 'Objects | Vehicles | Foods',
'is_workart': 'Books | Songs',
'is_fac': 'Buildings | Airports | Highways',
'is_noun': 'Noun Used',
'is_pron': 'Pronoun Used',
'is_adv': 'Adverb Used',
'is_propn': 'Propn (like Apple, UK, US)',
'is_verb': 'Verb Used',
'is_intj': 'Bravo | Hello | Ouch' }
# Create function to plot summarized details
def summarized_donut_plot(data_1, data_2, indx_list):#, plot_title):
if indx_list == 4:
fig, ax = plt.subplots(ncols = 2, figsize= (13,6))
else:
fig, ax = plt.subplots(ncols = 5, figsize= (35,6))
for indx, target in enumerate(temp_list[indx_list]):
total = df[target].sum()
# Data preproccessing
x = round(float(data_1[target] / total * 100), 1)
y = round(float(data_2[target] / total * 100), 1)
#print(total, x, y)
user_list = ['Timothy', 'Karen']
results_list = [x, y]
x = pd.DataFrame(data = results_list, index = user_list)
ax[indx].pie(x, autopct='%1.1f%%', textprops = {'fontsize': 10, 'color': 'w', 'weight': 'bold'})
ax[indx].add_patch(plt.Circle((0,0), 0.35, fc = 'white'))
ax[indx].legend(user_list, loc = 2)
ax[indx].set_title(rep_title_dict[target], size = 10)
plt.show()
for indx, sublist in enumerate(temp_list):
summarized_donut_plot( data_1 = Tim_summarized_df,
data_2 = Keran_summarized_df,
indx_list = indx )
The following is the error that I get.
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Input In [170], in <cell line: 1>()
1 for indx, sublist in enumerate(temp_list):
----> 2 summarized_donut_plot( data_1 = Tim_summarized_df,
3 data_2 = Keran_summarized_df,
4 indx_list = indx )
Input In [169], in summarized_donut_plot(data_1, data_2, indx_list)
86 results_list = [x, y]
88 x = pd.DataFrame(data = results_list, index = user_list)
---> 90 ax[indx].pie(x, autopct='%1.1f%%', textprops = {'fontsize': 10, 'color': 'w', 'weight': 'bold'})
91 ax[indx].add_patch(plt.Circle((0,0), 0.35, fc = 'white'))
93 ax[indx].legend(user_list, loc = 2)
File ~/.local/lib/python3.8/site-packages/matplotlib/__init__.py:1412, in _preprocess_data.<locals>.inner(ax, data, *args, **kwargs) 1409 @functools.wraps(func) 1410 def inner(ax, *args, data=None,
**kwargs): 1411 if data is None:
-> 1412 return func(ax, *map(sanitize_sequence, args), **kwargs) 1414 bound = new_sig.bind(ax, *args, **kwargs) 1415 auto_label = (bound.arguments.get(label_namer) 1416 or bound.kwargs.get(label_namer))
File ~/.local/lib/python3.8/site-packages/matplotlib/axes/_axes.py:3044, in Axes.pie(self, x, explode, labels, colors, autopct, pctdistance, shadow, labeldistance, startangle, radius, counterclock, wedgeprops, textprops, center, frame, rotatelabels, normalize) 3042 x = np.asarray(x, np.float32) 3043 if x.ndim > 1:
-> 3044 raise ValueError("x must be 1D") 3046 if np.any(x < 0): 3047 raise ValueError("Wedge sizes 'x' must be non negative values")
ValueError: x must be 1D
1D here means 1 dimensional. When you call the pie function, x must be a 1D sequence of values. Looks to me like you're assigning a DataFrame
to x, which is essentially a table and therefore it is 2D, even if it has only one row. If you add a print(x.ndim)
right before calling pie it should print 2 indicating this.
I believe just replacing x with x[0] to get only the row with the data in it instead of the table should fix your error.