import matplotlib.pyplot as plt
import pandas as pd
!pip install pmdarima
from pmdarima.datasets import load_lynx
# 데이터 로드
lynx_data = load_lynx()
# 시계열 데이터 시각화
plt.figure(figsize = (10, 5))
plt.plot(lynx_data)
plt.title('Lynx Trappings Time Series')
plt.xlabel('Year')
plt.ylabel('Number of Lynx Trapped')
plt.grid(True)
plt.show()
Defaulting to user installation because normal site-packages is not writeable Requirement already satisfied: pmdarima in c:\users\glska\appdata\roaming\python\python39\site-packages (2.0.4) Requirement already satisfied: pandas>=0.19 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.4.2) Requirement already satisfied: packaging>=17.1 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (21.3) Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (0.29.28) Requirement already satisfied: urllib3 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.26.9) Requirement already satisfied: numpy>=1.21.2 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.21.5) Requirement already satisfied: statsmodels>=0.13.2 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (0.13.2) Requirement already satisfied: scipy>=1.3.2 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.7.3) Requirement already satisfied: joblib>=0.11 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.1.0) Requirement already satisfied: scikit-learn>=0.22 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.0.2) Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (61.2.0) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\programdata\anaconda3\lib\site-packages (from packaging>=17.1->pmdarima) (3.0.4) Requirement already satisfied: python-dateutil>=2.8.1 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.19->pmdarima) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.19->pmdarima) (2021.3) Requirement already satisfied: six>=1.5 in c:\programdata\anaconda3\lib\site-packages (from python-dateutil>=2.8.1->pandas>=0.19->pmdarima) (1.16.0) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.22->pmdarima) (2.2.0) Requirement already satisfied: patsy>=0.5.2 in c:\programdata\anaconda3\lib\site-packages (from statsmodels>=0.13.2->pmdarima) (0.5.2)
lynx_df = pd.DataFrame(lynx_data, columns = ['Number of Lynx Trapped'])
# 인덱스를 Year로 설정
lynx_df.index.name = 'Year'
lynx_df
Number of Lynx Trapped | |
---|---|
Year | |
0 | 269.0 |
1 | 321.0 |
2 | 585.0 |
3 | 871.0 |
4 | 1475.0 |
... | ... |
109 | 662.0 |
110 | 1000.0 |
111 | 1590.0 |
112 | 2657.0 |
113 | 3396.0 |
114 rows × 1 columns
# 이동 평균 계산
window_size = 10 # 이동 평균 및 이동 분산에 사용할 윈도우 크기
lynx_df['Rolling Mean'] = lynx_df['Number of Lynx Trapped'].rolling(window=window_size).mean()
# 시각화
plt.figure(figsize = (10, 5))
plt.plot(lynx_df.index, lynx_df['Number of Lynx Trapped'], label = 'Number of Lynx Trapped')
plt.plot(lynx_df.index, lynx_df['Rolling Mean'], label = f'Rolling Mean (window={window_size})', color = 'red')
plt.title('Lynx Trappings and Rolling Mean')
plt.xlabel('Year')
plt.ylabel('Number of Lynx Trapped')
plt.legend()
plt.grid(True)
plt.show()
# 2. 분산이 일정한지 확인 (이동 분산 계산)
lynx_df['Rolling Variance'] = lynx_df['Number of Lynx Trapped'].rolling(window = window_size).var()
plt.figure(figsize = (10, 5))
plt.plot(lynx_df.index, lynx_df['Rolling Variance'], label = 'Rolling Variance (window=10)', color = 'green')
plt.title('Rolling Variance of Lynx Trappings')
plt.xlabel('Year')
plt.ylabel('Variance')
plt.legend()
plt.grid(True)
plt.show()
import numpy as np
# 시차 h=10으로 데이터 생성
lag = 10
y_t = lynx_df['Number of Lynx Trapped'][:-lag].reset_index(drop=True)
y_t_plus_10 = lynx_df['Number of Lynx Trapped'][lag:].reset_index(drop=True)
# y_t와 y_t+10를 하나의 데이터프레임으로 결합
yt_yt_plus_10_df = pd.DataFrame({
'y_t': y_t,
'y_t+10': y_t_plus_10
})
# y_t와 y_t+10의 공분산 계산
covariance_10 = np.cov(yt_yt_plus_10_df['y_t'], yt_yt_plus_10_df['y_t+10'])[0, 1]
display(yt_yt_plus_10_df.head(10))
print(f'공분산은 {covariance_10:.2f} 입니다.')
y_t | y_t+10 | |
---|---|---|
0 | 269.0 | 523.0 |
1 | 321.0 | 98.0 |
2 | 585.0 | 184.0 |
3 | 871.0 | 279.0 |
4 | 1475.0 | 409.0 |
5 | 2821.0 | 2285.0 |
6 | 3928.0 | 2685.0 |
7 | 5943.0 | 3409.0 |
8 | 4950.0 | 1824.0 |
9 | 2577.0 | 409.0 |
공분산은 1415574.72 입니다.