import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd


tips_df = sns.load_dataset('tips')
tips_df.head(3)


tips_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


tips_df.describe(include = 'all')
# include = 'all'은 범주형 변수까지 전부 discribe로 나타내줌


# countplot: x축 범주형, y축 관측치
sns.countplot(data = tips_df, x = 'day')

<AxesSubplot:xlabel='day', ylabel='count'>


# barplot: X축이 범주형, Y축이 연속형 값
sns.barplot(data = tips_df, x = 'sex', y = 'tip')

<AxesSubplot:xlabel='sex', ylabel='tip'>


# barplot: X축이 범주형, Y축이 연속형 값
sns.barplot(data = tips_df, x = 'sex', y = 'tip', estimator = sum)
# estimator의 기본값은 mean
# 평균이 아닌 다른 계산을 하고 싶다면 그 함수를 넣어 적용할 수 있음

<AxesSubplot:xlabel='sex', ylabel='tip'>


sns.boxplot(data = tips_df, x = 'time',y = 'total_bill')

<AxesSubplot:xlabel='time', ylabel='total_bill'>


sns.histplot(data = tips_df, x = 'total_bill')

<AxesSubplot:xlabel='total_bill', ylabel='Count'>


tips_df['total_bill'].hist()
tips_df['total_bill'].plot.hist()

<AxesSubplot:ylabel='Frequency'>


#X축: 수치형변수
#Y축: 수치형변수
sns.scatterplot(data = tips_df, x = 'total_bill', y = 'tip')

<AxesSubplot:xlabel='total_bill', ylabel='tip'>


sns.pairplot(data = tips_df)

<seaborn.axisgrid.PairGrid at 0x1f91283db20>

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3

	total_bill	tip	sex	smoker	day	time	size
count	244.000000	244.000000	244	244	244	244	244.000000
unique	NaN	NaN	2	2	4	2	NaN
top	NaN	NaN	Male	No	Sat	Dinner	NaN
freq	NaN	NaN	157	151	87	176	NaN
mean	19.785943	2.998279	NaN	NaN	NaN	NaN	2.569672
std	8.902412	1.383638	NaN	NaN	NaN	NaN	0.951100
min	3.070000	1.000000	NaN	NaN	NaN	NaN	1.000000
25%	13.347500	2.000000	NaN	NaN	NaN	NaN	2.000000
50%	17.795000	2.900000	NaN	NaN	NaN	NaN	2.000000
75%	24.127500	3.562500	NaN	NaN	NaN	NaN	3.000000
max	50.810000	10.000000	NaN	NaN	NaN	NaN	6.000000

EDA¶