In [1]:
import matplotlib.pyplot as plt
import pandas as pd
!pip install pmdarima
from pmdarima.datasets import load_lynx

# 데이터 로드
lynx_data = load_lynx()

# 시계열 데이터 시각화
plt.figure(figsize = (10, 5))
plt.plot(lynx_data)
plt.title('Lynx Trappings Time Series')
plt.xlabel('Year')
plt.ylabel('Number of Lynx Trapped')
plt.grid(True)
plt.show()
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: pmdarima in c:\users\glska\appdata\roaming\python\python39\site-packages (2.0.4)
Requirement already satisfied: pandas>=0.19 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.4.2)
Requirement already satisfied: packaging>=17.1 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (21.3)
Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (0.29.28)
Requirement already satisfied: urllib3 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.26.9)
Requirement already satisfied: numpy>=1.21.2 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.21.5)
Requirement already satisfied: statsmodels>=0.13.2 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (0.13.2)
Requirement already satisfied: scipy>=1.3.2 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.7.3)
Requirement already satisfied: joblib>=0.11 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.1.0)
Requirement already satisfied: scikit-learn>=0.22 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.0.2)
Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (61.2.0)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\programdata\anaconda3\lib\site-packages (from packaging>=17.1->pmdarima) (3.0.4)
Requirement already satisfied: python-dateutil>=2.8.1 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.19->pmdarima) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.19->pmdarima) (2021.3)
Requirement already satisfied: six>=1.5 in c:\programdata\anaconda3\lib\site-packages (from python-dateutil>=2.8.1->pandas>=0.19->pmdarima) (1.16.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.22->pmdarima) (2.2.0)
Requirement already satisfied: patsy>=0.5.2 in c:\programdata\anaconda3\lib\site-packages (from statsmodels>=0.13.2->pmdarima) (0.5.2)
In [2]:
lynx_df = pd.DataFrame(lynx_data, columns = ['Number of Lynx Trapped'])

# 인덱스를 Year로 설정
lynx_df.index.name = 'Year'
lynx_df
Out[2]:
Number of Lynx Trapped
Year
0 269.0
1 321.0
2 585.0
3 871.0
4 1475.0
... ...
109 662.0
110 1000.0
111 1590.0
112 2657.0
113 3396.0

114 rows × 1 columns

In [3]:
# 이동 평균 계산
window_size = 10  # 이동 평균 및 이동 분산에 사용할 윈도우 크기
lynx_df['Rolling Mean'] = lynx_df['Number of Lynx Trapped'].rolling(window=window_size).mean()

# 시각화
plt.figure(figsize = (10, 5))
plt.plot(lynx_df.index, lynx_df['Number of Lynx Trapped'], label = 'Number of Lynx Trapped')
plt.plot(lynx_df.index, lynx_df['Rolling Mean'], label = f'Rolling Mean (window={window_size})', color = 'red')
plt.title('Lynx Trappings and Rolling Mean')
plt.xlabel('Year')
plt.ylabel('Number of Lynx Trapped')
plt.legend()
plt.grid(True)
plt.show()
In [4]:
# 2. 분산이 일정한지 확인 (이동 분산 계산)
lynx_df['Rolling Variance'] = lynx_df['Number of Lynx Trapped'].rolling(window = window_size).var()

plt.figure(figsize = (10, 5))
plt.plot(lynx_df.index, lynx_df['Rolling Variance'], label = 'Rolling Variance (window=10)', color = 'green')
plt.title('Rolling Variance of Lynx Trappings')
plt.xlabel('Year')
plt.ylabel('Variance')
plt.legend()
plt.grid(True)
plt.show()
In [5]:
import numpy as np
# 시차 h=10으로 데이터 생성
lag = 10
y_t = lynx_df['Number of Lynx Trapped'][:-lag].reset_index(drop=True)
y_t_plus_10 = lynx_df['Number of Lynx Trapped'][lag:].reset_index(drop=True)

# y_t와 y_t+10를 하나의 데이터프레임으로 결합
yt_yt_plus_10_df = pd.DataFrame({
    'y_t': y_t,
    'y_t+10': y_t_plus_10
})

# y_t와 y_t+10의 공분산 계산
covariance_10 = np.cov(yt_yt_plus_10_df['y_t'], yt_yt_plus_10_df['y_t+10'])[0, 1]

display(yt_yt_plus_10_df.head(10))
print(f'공분산은 {covariance_10:.2f} 입니다.')
y_t y_t+10
0 269.0 523.0
1 321.0 98.0
2 585.0 184.0
3 871.0 279.0
4 1475.0 409.0
5 2821.0 2285.0
6 3928.0 2685.0
7 5943.0 3409.0
8 4950.0 1824.0
9 2577.0 409.0
공분산은 1415574.72 입니다.
In [ ]: