sns.barplot ValueError: Length of values (9) does not match length of index (363)

Question

我想从列中提取特定值的数量 - (I s a x) 等等，所以当我提取时，我得到了我的图表不想读取的堆栈。

# 崩溃和新闻频率 (I s a x)

# occurance.sort()

qqq= df.Crashes.str.count("Wall Street Crash of 1929").sum()
www= df.Crashes.str.count("Russian financial crisis of 1998").sum()
eee= df.Crashes.str.count("Dot-com bubble of 2000").sum()
rrr= df.Crashes.str.count("Financial crisis of 2007–08").sum()
ttt= df.Crashes.str.count("Cryptocurrency crash of 2018").sum()
yyy= df.Crashes.str.count("Chinese stock bubble of 2007").sum()
uuu= df.Crashes.str.count("March Covid-19 crash of 2020").sum()
iii= df.Crashes.str.count("Other").sum()
ooo= df.Crashes.str.count("I do not know any").sum()


occurance11 = [qqq,www,eee,rrr,ttt,yyy,uuu,iii,ooo]


plt.figure(figsize=(8,6))
sns.barplot(df["News_frequency"], y=occurance11,)
plt.title('Correlation between Frequency of Following Financial News and Highest Education from Russians Investors', fontsize=14)
plt.xlabel("Frequency of Following Financial (1: Never, 4: Always)")
plt.ylabel("Type of Education");

# occurance.sort()

# plt.figure(figsize=(10,8))


# New_Colors = ['green','blue','purple','brown','teal','black','orange']
# plt.bar(Investment__goal, occurance,color=New_Colors)
# plt.title('Known Financial Crashes by Russians ', fontsize=14)
# plt.xlabel('Crashes', fontsize=14)
# plt.ylabel('Occurrence', fontsize=14)
# plt.grid(True)
# plt.xticks(
#     rotation=45, 
#     horizontalalignment='right',
#     fontweight='light',
#     fontsize='x-large')

# for index,data in enumerate(occurance):
#     plt.text(x=index , y =data+1 , s=f"{data}" , fontdict=dict(fontsize=12))
# plt.tight_layout()

# plt.show()
# print (len(df.Crashes))

ValueError                                Traceback (most recent call last)
/var/folders/q8/qn3d11d90fbbz0j6kllhpn9h0000gn/T/ipykernel_34081/161240386.py in <module>
     22 
     23 plt.figure(figsize=(8,6))
---> 24 sns.barplot(df["News_frequency"], y=occurance11,)
     25 plt.title('Correlation between Frequency of Following Financial News and Highest Education from Russians Investors', fontsize=14)
     26 plt.xlabel("Frequency of Following Financial (1: Never, 4: Always)")

~/opt/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

~/opt/anaconda3/lib/python3.9/site-packages/seaborn/categorical.py in barplot(x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge, ax, **kwargs)
   3180 ):
   3181 
-> 3182     plotter = _BarPlotter(x, y, hue, data, order, hue_order,
   3183                           estimator, ci, n_boot, units, seed,
   3184                           orient, color, palette, saturation,

~/opt/anaconda3/lib/python3.9/site-packages/seaborn/categorical.py in __init__(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge)
   1582                  errwidth, capsize, dodge):
   1583         """Initialize the plotter."""
-> 1584         self.establish_variables(x, y, hue, data, orient,
   1585                                  order, hue_order, units)
   1586         self.establish_colors(color, palette, saturation)

~/opt/anaconda3/lib/python3.9/site-packages/seaborn/categorical.py in establish_variables(self, x, y, hue, data, orient, order, hue_order, units)
    204 
    205                 # Group the numeric data
--> 206                 plot_data, value_label = self._group_longform(vals, groups,
    207                                                               group_names)
    208 

~/opt/anaconda3/lib/python3.9/site-packages/seaborn/categorical.py in _group_longform(self, vals, grouper, order)
    248             else:
    249                 index = None
--> 250             vals = pd.Series(vals, index=index)
    251 
    252         # Group the val data

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    428                 index = ibase.default_index(len(data))
    429             elif is_list_like(data):
--> 430                 com.require_length_match(data, index)
    431 
    432             # create/copy the manager

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/common.py in require_length_match(data, index)
    529     """
    530     if len(data) != len(index):
--> 531         raise ValueError(
    532             "Length of values "
    533             f"({len(data)}) "

ValueError: Length of values (9) does not match length of index (363)

<Figure size 576x432 with 0 Axes>

Answer 1

您可能希望将所有术语存储到一个列表中。这样，可以通过循环创建事件列表。这些术语可以用作条形图的标签。由于它们很长，可以插入换行符以在多行中显示它们。

将列表转换为 numpy 数组，np.argsort() 可用于查找值的顺序。添加 [::-1] 反转顺序，然后可用于索引数组。

下面是一些示例代码，其中包含显示其工作原理的虚拟数据：

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

df = ...
terms = ["Wall Street Crash of 1929", "Russian financial crisis of 1998", "Dot-com bubble of 2000",
         "Financial crisis of 2007–08", "Cryptocurrency crash of 2018", "Chinese stock bubble of 2007",
         "March Covid-19 crash of 2020", "Other", "I do not know any"]
occurance11 = np.array([df.Crashes.str.count(term).sum() for term in terms])
ordering = np.argsort(occurance11)[::-1]
terms_with_newlines = [term.replace(' ', '\n').replace('of\n', 'of ').replace('I\ndo', 'I do') for term in terms]
terms_with_newlines = np.array(terms_with_newlines)

fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x=terms_with_newlines[ordering], y=occurance11[ordering], palette='flare', ax=ax)
sns.despine()
ax.tick_params(axis='x', length=0, labelrotation=0)
plt.tight_layout()
plt.show()