{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "05d8c8be-6f1d-496f-a71d-66e8228aea2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 패키지 로딩하기 : 패키지를 메모리(RAM)에 올리는 기능\n",
    "import pandas      as pd\n",
    "import scipy.stats as stats\n",
    "import numpy       as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "69165ad7-9833-46d0-a48d-a8d824355e04",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>money</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  money\n",
       "0   1     20\n",
       "1   2    100\n",
       "2   3     70\n",
       "3   4     30\n",
       "4   5     40\n",
       "5   6     55\n",
       "6   7     60"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 데이터 읽어오기\n",
    "income = pd.read_excel(io         = \"./income.xlsx\",\n",
    "                       sheet_name = 0,\n",
    "                       header     = 0)\n",
    "\n",
    "income"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "f88a8b13-d155-4bdf-9330-40586a8e366c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 데이터 저장하기 : excel\n",
    "income.to_excel(excel_writer = \"./income_2024_0221_1702.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "30852528-2ff1-4fb5-ada5-32ac31e8e7e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 데이터 저장하기 : pickle\n",
    "income.to_pickle(path = \"./income_2024_0221_1704.pickle\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "c72c39cb-de77-4d36-a7e5-2a61c838db53",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>money</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  money\n",
       "0   1     20\n",
       "1   2    100\n",
       "2   3     70\n",
       "3   4     30\n",
       "4   5     40\n",
       "5   6     55\n",
       "6   7     60"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 데이터 읽어오기 : pickle\n",
    "income2 = pd.read_pickle(filepath_or_buffer = \"./income_2024_0221_1704.pickle\")\n",
    "income2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dff857fd-2930-4e76-8b37-3570d483b48f",
   "metadata": {},
   "source": [
    "### 1. 일표본 검정(One samplet test)\n",
    "- 일표본 t검정(One sample t-test)\n",
    "- 윌콕슨의 부호 순위 검정(Wilcoxon's signed rank test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e1c9a5bb-b30b-4440-9db1-675ed88e6599",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 귀무가설 : 성인들의 용돈의 평균은 50만원이다.\n",
    "# 대립가설 : 성인들의 용돈의 평균은 50만원보다 많다."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8357c87f-9518-4a6c-81bb-92339ad04144",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1단계 : 정규성 검정(Normality Test)\n",
    "# 귀무가설(Null Hypothesis)        : 정규분포를 따른다.\n",
    "# 대립가설(Alternative Hypothesis) : 정규분포를 따르지 않는다.\n",
    "\n",
    "# n < 5000  : Shapiro-Wilk Normality Test     : scipy.stats.shapiro(data.variable)\n",
    "# n >= 5000 : Anderson-Darling Normality Test : scipy.stats.anderson(data.variable)\n",
    "\n",
    "# n = 7 < 5000 : Shapiro-Wilk Normality Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "37351b1f-8048-4a27-a4b9-eb2c07066c40",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ShapiroResult(statistic=0.9682345063181119, pvalue=0.8854483279544569)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.shapiro(income.money)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2858f793-1e6e-459f-abeb-c5558871ef6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 유의수준(alpha)     : 0.05\n",
    "# statistic = 0.968  : 검정통계량(W)\n",
    "# p-value    = 0.885 : 유의확률\n",
    "\n",
    "# p-value(0.885) > alpha(0.05) : 귀무가설 : 정규분포를 따른다."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e4a539ca-4681-44ef-829f-4efeaa88b0d8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TtestResult(statistic=0.35150886809475873, pvalue=0.3686107540123118, df=6)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2단계 : 일표본 t검정(One sample t-test)\n",
    "# stats.ttest_1samp(data.variable, popmean = , alternative = )\n",
    "\n",
    "stats.ttest_1samp(income.money,\n",
    "                  popmean     = 50,\n",
    "                  alternative = \"greater\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "394e7d2d-d848-4b46-8aa6-0f040ce78e82",
   "metadata": {},
   "outputs": [],
   "source": [
    "# statistic = 0.352 : 검정통계량(t)\n",
    "# p-value   = 0.369 : 유의확률\n",
    "# df        = 6     : 자유도(degree of freedom) = n-1 = 7-1\n",
    "\n",
    "# p-value(0.369) > alpha(0.05) : 귀무가설\n",
    "# 성인들의 용돈의 평균은 50만원이다.\n",
    "# 유의확률이 0.369이므로 유의수준 0.05에서\n",
    "# 성인들의 용돈에 통계적으로 유의한 변화는 없는 것으로 나타났다.\n",
    "\n",
    "# 참고\n",
    "# popmean     : 귀무가설의 모집단의 평균\n",
    "# alternative : 대립가설 : \"greater\", \"less\", \"two-sided\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "42e407d5-de9e-4bdd-8a86-02c1f413b05d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "53.57142857142857"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 참고\n",
    "income.money.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "7d011981-9235-424b-8d18-1d0efe3607b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 귀무가설에서 모집단의 평균 : 50만원\n",
    "# 표본에서 관찰된 표본 평균  : 53만 6천원\n",
    "# 약 3만 6천원 정도가 증가되었음. 이것이 우연에 의한 것인지 아니면 필연에 의한 것인지"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "a32ab29c-0482-4a33-b2a1-94f8c65a4805",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26.881574500371954"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (표본)표준편차 = S\n",
    "income.money.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "f934fc1b-74d7-4289-b157-2081edf45e7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.35150886809475873"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# t = (X bar - mu) / (S / root(n))\n",
    "(53.57142857142857 - 50) / (26.881574500371954/np.sqrt(7))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7089ef2-6955-4d76-a224-d9158aff2fda",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0bfc0b69-db51-4610-8f39-9b46da531c4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 귀무가설 : 다이아몬드의 가격의 평균은 4000달러이다.\n",
    "# 대립가설 : 다이아몬드의 가격의 평균은 4000달러보다 적다."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e7120483-a9f3-41de-b6e8-b36af86450ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>carat</th>\n",
       "      <th>cut</th>\n",
       "      <th>color</th>\n",
       "      <th>clarity</th>\n",
       "      <th>depth</th>\n",
       "      <th>table</th>\n",
       "      <th>price</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.23</td>\n",
       "      <td>Ideal</td>\n",
       "      <td>E</td>\n",
       "      <td>SI2</td>\n",
       "      <td>61.5</td>\n",
       "      <td>55.0</td>\n",
       "      <td>326</td>\n",
       "      <td>3.95</td>\n",
       "      <td>3.98</td>\n",
       "      <td>2.43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.21</td>\n",
       "      <td>Premium</td>\n",
       "      <td>E</td>\n",
       "      <td>SI1</td>\n",
       "      <td>59.8</td>\n",
       "      <td>61.0</td>\n",
       "      <td>326</td>\n",
       "      <td>3.89</td>\n",
       "      <td>3.84</td>\n",
       "      <td>2.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.23</td>\n",
       "      <td>Good</td>\n",
       "      <td>E</td>\n",
       "      <td>VS1</td>\n",
       "      <td>56.9</td>\n",
       "      <td>65.0</td>\n",
       "      <td>327</td>\n",
       "      <td>4.05</td>\n",
       "      <td>4.07</td>\n",
       "      <td>2.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.29</td>\n",
       "      <td>Premium</td>\n",
       "      <td>I</td>\n",
       "      <td>VS2</td>\n",
       "      <td>62.4</td>\n",
       "      <td>58.0</td>\n",
       "      <td>334</td>\n",
       "      <td>4.20</td>\n",
       "      <td>4.23</td>\n",
       "      <td>2.63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.31</td>\n",
       "      <td>Good</td>\n",
       "      <td>J</td>\n",
       "      <td>SI2</td>\n",
       "      <td>63.3</td>\n",
       "      <td>58.0</td>\n",
       "      <td>335</td>\n",
       "      <td>4.34</td>\n",
       "      <td>4.35</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  carat      cut color clarity  depth  table  price     x     y     z\n",
       "0   1   0.23    Ideal     E     SI2   61.5   55.0    326  3.95  3.98  2.43\n",
       "1   2   0.21  Premium     E     SI1   59.8   61.0    326  3.89  3.84  2.31\n",
       "2   3   0.23     Good     E     VS1   56.9   65.0    327  4.05  4.07  2.31\n",
       "3   4   0.29  Premium     I     VS2   62.4   58.0    334  4.20  4.23  2.63\n",
       "4   5   0.31     Good     J     SI2   63.3   58.0    335  4.34  4.35  2.75"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds = pd.read_excel(io         = \"./diamonds.xlsx\",\n",
    "                         sheet_name = \"data\",\n",
    "                         header     = 0)\n",
    "diamonds.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "16ddbb26-51e9-4445-97c4-0aa8272d5698",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1단계 : 정규성 검정(Normality Test)\n",
    "# 귀무가설 : 정규분포를 따른다.\n",
    "# 대립가설 : 정규분포를 따르지 않는다."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "389fee48-5784-4cef-a1c6-8d5a3d00fc9b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(53940, 11)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "7806a32f-d6aa-4a2b-80d8-9fca2f092f04",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 53940 : 행의 개수 : n = 53,940 > 5000 : Anderson-Darling Normality Test\n",
    "# 11    : 열의 개수"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "68216efe-0c4a-41e7-a12f-1c2cd3ab5bb5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AndersonResult(statistic=3474.0163510249404, critical_values=array([0.576, 0.656, 0.787, 0.918, 1.092]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]), fit_result=  params: FitParams(loc=3932.799721913237, scale=3989.439738146379)\n",
       " success: True\n",
       " message: '`anderson` successfully fit the distribution to the data.')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.anderson(diamonds.price)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "38f84abb-abf2-4f18-b7f8-d4ac7fa0c04a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# statistic = 3474.016 : 검정통계량(A)\n",
    "# critical_values      : 임계값(0.787)\n",
    "# significance_level   : 유의수준(0.05, 5%)\n",
    "\n",
    "# 검정통계량(3,474.016) > 임계값(0.787) : 대립가설 : 정규분포를 따르지 않는다.\n",
    "# 검정통계량 < 임계값                   : 귀무가설"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "70b21ad1-ce6b-463f-9899-0e1d03b5a96d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "WilcoxonResult(statistic=564679024.5, pvalue=0.0)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2단계 : 윌콕슨의 부호 순위 검정(Wilcoxon's signed rank test)\n",
    "# stats.wilcoxon(data.variable - mu, alternative = \"less\")\n",
    "\n",
    "stats.wilcoxon(diamonds.price - 4000,\n",
    "               alternative = \"less\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "7a8e1697-52d0-40b1-9e02-d23a812d1e94",
   "metadata": {},
   "outputs": [],
   "source": [
    "# statistic = 564679024.5 : 검정통계량(W+)\n",
    "# p-value   = 0.000       : 유의확률\n",
    "\n",
    "# p-value(0.000) < alpha(0.05) : 대립가설 : 다이아몬드 가격의 평균은 4000달러보다 적다.\n",
    "# 유의확률이 0.000 이므로 유의수준 0.05에서\n",
    "# 다이아몬드의 가격은 통계적으로 유의하게 적어진 것으로 나타났다."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "072c8ee4-932a-4ea9-8102-1767a5a69448",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    53940.000000\n",
       "mean      3932.799722\n",
       "std       3989.439738\n",
       "min        326.000000\n",
       "25%        950.000000\n",
       "50%       2401.000000\n",
       "75%       5324.250000\n",
       "max      18823.000000\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 참고\n",
    "diamonds.price.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "526b1100-23a5-4c19-920a-880704229de3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}