교과서 Chapter 8

bigdata
Author

tiger

Published

July 16, 2024

read_csv

import pandas as pd
import matplotlib.pyplot as plt    
import seaborn as sns    
mpg=pd.read_csv('data/mpg.csv')
mpg.head()
manufacturer model displ year cyl trans drv cty hwy fl category
0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
3 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
4 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact

scatterplot

  • seaborn 을 사용한 산점도
sns.scatterplot(data=mpg,x="displ",y="hwy",hue="drv").set(xlim=[3,6], ylim=[10,30])
#plt.figure(figsize=(8,7))
plt.show()

  • plotly 를 사용한 산점도
import plotly.express as px
px.scatter(data_frame=mpg, x='cty',y='hwy',color='drv')

barplot

df_mpg=mpg.groupby("drv", as_index=False).agg(mean_hwy=('hwy','mean'))
sns.barplot(data=df_mpg, x="drv",y="mean_hwy",hue="drv")

groupby & barplot

df_mpg= df_mpg.sort_values("mean_hwy",ascending=False)
#208page
df_mpg=mpg.groupby("drv",as_index=False) \
          .agg(n=("drv","count"))    
sns.barplot(df_mpg,x="drv",y='n',hue="drv")

countplot

sns.countplot(mpg,x='drv',order=['4','f','r'])