Pandas Seaborn Swarmplot 不绘制
Pandas Seaborn Swarmplot doesn't plot
我正在尝试绘制一个 seaborn swarmplot,其中 col[2] 是频率,col[3] 是要分组的 类。下面给出了输入和代码。
输入
tweetcricscore,51,high active
tweetcricscore,46,event based
tweetcricscore,12,event based
tweetcricscore,46,event based
tweetcricscore,1,viewers
tweetcricscore,178,viewers
tweetcricscore,46,situational
tweetcricscore,23,situational
tweetcricscore,1,situational
tweetcricscore,8,situational
tweetcricscore,56,situational
代码:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid", color_codes=True)
df = pd.read_csv('input.csv', header = None)
df.columns = ['keyword','freq','class']
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
代码不会绘制也不会给出任何错误。有什么优化代码的建议吗?
我想你首先需要 read_csv
, then create new column class by concanecate with fillna
and last strip
个空格:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io
temp=u"""tweetcricscore 51 high active
tweetcricscore 46 event based
tweetcricscore 12 event based
tweetcricscore 46 event based
tweetcricscore 1 viewers
tweetcricscore 178 viewers
tweetcricscore 46 situational
tweetcricscore 23 situational
tweetcricscore 1 situational
tweetcricscore 8 situational
tweetcricscore 56 situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp),
sep="\s+", #separator is arbitrary whitespace
names=['keyword','freq','class1','class2']) #set new col names
df['class'] = df['class1'] + ' ' + df['class2'].fillna('')
df['class'] = df['class'].str.strip()
print df
keyword freq class1 class2 class
0 tweetcricscore 51 high active high active
1 tweetcricscore 46 event based event based
2 tweetcricscore 12 event based event based
3 tweetcricscore 46 event based event based
4 tweetcricscore 1 viewers NaN viewers
5 tweetcricscore 178 viewers NaN viewers
6 tweetcricscore 46 situational NaN situational
7 tweetcricscore 23 situational NaN situational
8 tweetcricscore 1 situational NaN situational
9 tweetcricscore 8 situational NaN situational
10 tweetcricscore 56 situational NaN situational
sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
列 class
不包含空格的解决方案:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io
temp=u"""tweetcricscore 51 highactive
tweetcricscore 46 eventbased
tweetcricscore 12 eventbased
tweetcricscore 46 eventbased
tweetcricscore 1 viewers
tweetcricscore 178 viewers
tweetcricscore 46 situational
tweetcricscore 23 situational
tweetcricscore 1 situational
tweetcricscore 8 situational
tweetcricscore 56 situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp),
sep="\s+", #separator is arbitrary whitespace
names=['keyword','freq','class']) #set new col names
print df
keyword freq class
0 tweetcricscore 51 highactive
1 tweetcricscore 46 eventbased
2 tweetcricscore 12 eventbased
3 tweetcricscore 46 eventbased
4 tweetcricscore 1 viewers
5 tweetcricscore 178 viewers
6 tweetcricscore 46 situational
7 tweetcricscore 23 situational
8 tweetcricscore 1 situational
9 tweetcricscore 8 situational
10 tweetcricscore 56 situational
sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
编辑 2:
如果分隔符是 ,
使用:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io
temp=u"""tweetcricscore,51,high active
tweetcricscore,46,event based
tweetcricscore,12,event based
tweetcricscore,46,event based
tweetcricscore,1,viewers
tweetcricscore,178,viewers
tweetcricscore,46,situational
tweetcricscore,23,situational
tweetcricscore,1,situational
tweetcricscore,8,situational
tweetcricscore,56,situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp), names=['keyword','freq','class'])
print df
keyword freq class
0 tweetcricscore 51 high active
1 tweetcricscore 46 event based
2 tweetcricscore 12 event based
3 tweetcricscore 46 event based
4 tweetcricscore 1 viewers
5 tweetcricscore 178 viewers
6 tweetcricscore 46 situational
7 tweetcricscore 23 situational
8 tweetcricscore 1 situational
9 tweetcricscore 8 situational
10 tweetcricscore 56 situational
sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
在 swamplot
中使用超过 8-10k
行的数据集并在 jezreal 不断的帮助和建议下进行了多次尝试。我们得出的结论是 seaborn
类别绘图 swarmplot
无法像教程文档中提到的 seaborn
中的其他绘图那样缩放大数据。因此,我将绘图样式更改为 bokeh
散点图,其中我使用 y
轴上的数值和 x
轴上的分组类别名称,这有点解决了我绘制 [=19= 的问题] 用类别绘制数据。
import numpy as np
import matplotlib.pyplot as plt
from pylab import*
import math
from matplotlib.ticker import LogLocator
import pandas as pd
from bokeh.models import BoxSelectTool, BoxZoomTool, LassoSelectTool
from bokeh.charts import Scatter, output_file, show
from bokeh.plotting import figure, hplot, vplot
from bokeh.models import LinearAxis
df = pd.read_csv('input.csv', header = None)
df.columns = ['user','freq','class']
scatter = Scatter( df, x='class', y='freq', color='class', marker='class', title=' User classification', legend=False)
output_file('output.html', title='output')
show(scatter)
这允许按 class
列分组,并根据组分配颜色和标记。 freq
沿 y 轴绘制。
注意:这可能是意外起作用的,因为数据是自然离散的。
我正在尝试绘制一个 seaborn swarmplot,其中 col[2] 是频率,col[3] 是要分组的 类。下面给出了输入和代码。 输入
tweetcricscore,51,high active
tweetcricscore,46,event based
tweetcricscore,12,event based
tweetcricscore,46,event based
tweetcricscore,1,viewers
tweetcricscore,178,viewers
tweetcricscore,46,situational
tweetcricscore,23,situational
tweetcricscore,1,situational
tweetcricscore,8,situational
tweetcricscore,56,situational
代码:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid", color_codes=True)
df = pd.read_csv('input.csv', header = None)
df.columns = ['keyword','freq','class']
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
代码不会绘制也不会给出任何错误。有什么优化代码的建议吗?
我想你首先需要 read_csv
, then create new column class by concanecate with fillna
and last strip
个空格:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io
temp=u"""tweetcricscore 51 high active
tweetcricscore 46 event based
tweetcricscore 12 event based
tweetcricscore 46 event based
tweetcricscore 1 viewers
tweetcricscore 178 viewers
tweetcricscore 46 situational
tweetcricscore 23 situational
tweetcricscore 1 situational
tweetcricscore 8 situational
tweetcricscore 56 situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp),
sep="\s+", #separator is arbitrary whitespace
names=['keyword','freq','class1','class2']) #set new col names
df['class'] = df['class1'] + ' ' + df['class2'].fillna('')
df['class'] = df['class'].str.strip()
print df
keyword freq class1 class2 class
0 tweetcricscore 51 high active high active
1 tweetcricscore 46 event based event based
2 tweetcricscore 12 event based event based
3 tweetcricscore 46 event based event based
4 tweetcricscore 1 viewers NaN viewers
5 tweetcricscore 178 viewers NaN viewers
6 tweetcricscore 46 situational NaN situational
7 tweetcricscore 23 situational NaN situational
8 tweetcricscore 1 situational NaN situational
9 tweetcricscore 8 situational NaN situational
10 tweetcricscore 56 situational NaN situational
sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
列 class
不包含空格的解决方案:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io
temp=u"""tweetcricscore 51 highactive
tweetcricscore 46 eventbased
tweetcricscore 12 eventbased
tweetcricscore 46 eventbased
tweetcricscore 1 viewers
tweetcricscore 178 viewers
tweetcricscore 46 situational
tweetcricscore 23 situational
tweetcricscore 1 situational
tweetcricscore 8 situational
tweetcricscore 56 situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp),
sep="\s+", #separator is arbitrary whitespace
names=['keyword','freq','class']) #set new col names
print df
keyword freq class
0 tweetcricscore 51 highactive
1 tweetcricscore 46 eventbased
2 tweetcricscore 12 eventbased
3 tweetcricscore 46 eventbased
4 tweetcricscore 1 viewers
5 tweetcricscore 178 viewers
6 tweetcricscore 46 situational
7 tweetcricscore 23 situational
8 tweetcricscore 1 situational
9 tweetcricscore 8 situational
10 tweetcricscore 56 situational
sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
编辑 2:
如果分隔符是 ,
使用:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io
temp=u"""tweetcricscore,51,high active
tweetcricscore,46,event based
tweetcricscore,12,event based
tweetcricscore,46,event based
tweetcricscore,1,viewers
tweetcricscore,178,viewers
tweetcricscore,46,situational
tweetcricscore,23,situational
tweetcricscore,1,situational
tweetcricscore,8,situational
tweetcricscore,56,situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp), names=['keyword','freq','class'])
print df
keyword freq class
0 tweetcricscore 51 high active
1 tweetcricscore 46 event based
2 tweetcricscore 12 event based
3 tweetcricscore 46 event based
4 tweetcricscore 1 viewers
5 tweetcricscore 178 viewers
6 tweetcricscore 46 situational
7 tweetcricscore 23 situational
8 tweetcricscore 1 situational
9 tweetcricscore 8 situational
10 tweetcricscore 56 situational
sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()
在 swamplot
中使用超过 8-10k
行的数据集并在 jezreal 不断的帮助和建议下进行了多次尝试。我们得出的结论是 seaborn
类别绘图 swarmplot
无法像教程文档中提到的 seaborn
中的其他绘图那样缩放大数据。因此,我将绘图样式更改为 bokeh
散点图,其中我使用 y
轴上的数值和 x
轴上的分组类别名称,这有点解决了我绘制 [=19= 的问题] 用类别绘制数据。
import numpy as np
import matplotlib.pyplot as plt
from pylab import*
import math
from matplotlib.ticker import LogLocator
import pandas as pd
from bokeh.models import BoxSelectTool, BoxZoomTool, LassoSelectTool
from bokeh.charts import Scatter, output_file, show
from bokeh.plotting import figure, hplot, vplot
from bokeh.models import LinearAxis
df = pd.read_csv('input.csv', header = None)
df.columns = ['user','freq','class']
scatter = Scatter( df, x='class', y='freq', color='class', marker='class', title=' User classification', legend=False)
output_file('output.html', title='output')
show(scatter)
这允许按 class
列分组,并根据组分配颜色和标记。 freq
沿 y 轴绘制。
注意:这可能是意外起作用的,因为数据是自然离散的。