{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Untitled22.ipynb의 사본","provenance":[{"file_id":"1pVTE1tidKD-A_m88Eq3OeZFSmi-tiUD3","timestamp":1658459249897}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"t71SAVRfAJfQ"},"outputs":[],"source":["# 패키지 <- pandas \"파이썬에서 엑셀처럼 데이터를 다루게 도와주는 툴\"\n","\n","import pandas as pd #"]},{"cell_type":"code","source":[""],"metadata":{"id":"17fa_kRB3lMg"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["dataset = pd.read_csv(\"발전량데이터.csv\", encoding=\"cp949\")"],"metadata":{"id":"mS1fPO7MArdY"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["dataset.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"Q7xxHq6pA3tD","executionInfo":{"status":"ok","timestamp":1658458211533,"user_tz":-540,"elapsed":18,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"a7eb599b-5c86-477b-b691-ebd4c67d2319"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 일자 일기상태 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량 발전량\n","0 2017-01-26 맑음 62.47 0.00 8.6 51.0 0.0 10.4 13.6 2230.4\n","1 2017-01-27 맑음 44.53 2.41 8.1 44.6 0.0 14.7 13.2 2326.8\n","2 2017-01-28 맑음 38.88 0.35 10.8 37.0 0.0 10.7 13.6 2477.7\n","3 2017-01-29 흐림 37.18 7.35 6.7 99.0 4.0 9.4 2.4 241.4\n","4 2017-01-30 맑음 35.82 2.41 3.5 54.6 0.0 13.4 12.2 2768.2"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
일자일기상태미세먼지운량최고기온습도강수량시정거리일사량발전량
02017-01-26맑음62.470.008.651.00.010.413.62230.4
12017-01-27맑음44.532.418.144.60.014.713.22326.8
22017-01-28맑음38.880.3510.837.00.010.713.62477.7
32017-01-29흐림37.187.356.799.04.09.42.4241.4
42017-01-30맑음35.822.413.554.60.013.412.22768.2
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":["dataset.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CnZlqTLECTM2","executionInfo":{"status":"ok","timestamp":1658458211534,"user_tz":-540,"elapsed":14,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"b118e7dc-ba50-4671-c144-5420279bd80c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 1355 entries, 0 to 1354\n","Data columns (total 10 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 일자 1355 non-null object \n"," 1 일기상태 1355 non-null object \n"," 2 미세먼지 1355 non-null float64\n"," 3 운량 1355 non-null float64\n"," 4 최고기온 1355 non-null float64\n"," 5 습도 1355 non-null float64\n"," 6 강수량 1355 non-null float64\n"," 7 시정거리 1355 non-null float64\n"," 8 일사량 1355 non-null float64\n"," 9 발전량 1355 non-null float64\n","dtypes: float64(8), object(2)\n","memory usage: 106.0+ KB\n"]}]},{"cell_type":"code","source":["nu_dataset = dataset.select_dtypes(exclude=\"object\")\n","nu_dataset.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"4V-hzTbyCds6","executionInfo":{"status":"ok","timestamp":1658458211534,"user_tz":-540,"elapsed":11,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"6391d52a-2d17-4f3a-cb58-59d0c3e537cb"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량 발전량\n","0 62.47 0.00 8.6 51.0 0.0 10.4 13.6 2230.4\n","1 44.53 2.41 8.1 44.6 0.0 14.7 13.2 2326.8\n","2 38.88 0.35 10.8 37.0 0.0 10.7 13.6 2477.7\n","3 37.18 7.35 6.7 99.0 4.0 9.4 2.4 241.4\n","4 35.82 2.41 3.5 54.6 0.0 13.4 12.2 2768.2"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
미세먼지운량최고기온습도강수량시정거리일사량발전량
062.470.008.651.00.010.413.62230.4
144.532.418.144.60.014.713.22326.8
238.880.3510.837.00.010.713.62477.7
337.187.356.799.04.09.42.4241.4
435.822.413.554.60.013.412.22768.2
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["nu_dataset.corr() #상관관계 분석."],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":300},"id":"ilQ59KJdCdwZ","executionInfo":{"status":"ok","timestamp":1658458211873,"user_tz":-540,"elapsed":348,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"563c5db5-ef4b-43c9-bae1-d4d53c543e87"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량 \\\n","미세먼지 1.000000 -0.305240 0.103233 -0.206933 -0.183921 -0.096737 0.311603 \n","운량 -0.305240 1.000000 0.007060 0.697639 0.380827 -0.077140 -0.692041 \n","최고기온 0.103233 0.007060 1.000000 0.330682 0.026661 0.297134 0.454338 \n","습도 -0.206933 0.697639 0.330682 1.000000 0.401392 -0.021677 -0.503302 \n","강수량 -0.183921 0.380827 0.026661 0.401392 1.000000 -0.151036 -0.354614 \n","시정거리 -0.096737 -0.077140 0.297134 -0.021677 -0.151036 1.000000 0.257025 \n","일사량 0.311603 -0.692041 0.454338 -0.503302 -0.354614 0.257025 1.000000 \n","발전량 0.272666 -0.750304 0.248556 -0.608955 -0.350552 0.179736 0.901092 \n","\n"," 발전량 \n","미세먼지 0.272666 \n","운량 -0.750304 \n","최고기온 0.248556 \n","습도 -0.608955 \n","강수량 -0.350552 \n","시정거리 0.179736 \n","일사량 0.901092 \n","발전량 1.000000 "],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
미세먼지운량최고기온습도강수량시정거리일사량발전량
미세먼지1.000000-0.3052400.103233-0.206933-0.183921-0.0967370.3116030.272666
운량-0.3052401.0000000.0070600.6976390.380827-0.077140-0.692041-0.750304
최고기온0.1032330.0070601.0000000.3306820.0266610.2971340.4543380.248556
습도-0.2069330.6976390.3306821.0000000.401392-0.021677-0.503302-0.608955
강수량-0.1839210.3808270.0266610.4013921.000000-0.151036-0.354614-0.350552
시정거리-0.096737-0.0771400.297134-0.021677-0.1510361.0000000.2570250.179736
일사량0.311603-0.6920410.454338-0.503302-0.3546140.2570251.0000000.901092
발전량0.272666-0.7503040.248556-0.608955-0.3505520.1797360.9010921.000000
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":8}]},{"cell_type":"code","source":["#범주형 변수 데이터 레이블링 진행 (모델에 input 시키기 위함)\n","\n","from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()"],"metadata":{"id":"7nS5jbjECjwF"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["dataset[\"일기상태\"] = le.fit_transform(dataset[\"일기상태\"])"],"metadata":{"id":"tzByMAOECpV5"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# 독립변수와 종속변수 분리\n","\n","v_name = ['일기상태', '미세먼지', '운량', '최고기온', '습도', '강수량', '시정거리', '일사량'] #독립변수 8개\n","target_name = '발전량' # 1개 \n","\n","target = dataset[target_name]\n","input_data = dataset[v_name]"],"metadata":{"id":"TmFcdl_AC2ur"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["target.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fewteT8HDCiY","executionInfo":{"status":"ok","timestamp":1658458212224,"user_tz":-540,"elapsed":23,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"a8dcbbf8-de2f-40fe-f6aa-27effef04447"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 2230.4\n","1 2326.8\n","2 2477.7\n","3 241.4\n","4 2768.2\n","Name: 발전량, dtype: float64"]},"metadata":{},"execution_count":12}]},{"cell_type":"code","source":["input_data.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"niVqfI3mDNcM","executionInfo":{"status":"ok","timestamp":1658458212225,"user_tz":-540,"elapsed":19,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"d89a13c4-b1dc-4fb8-f8da-c43161ca3a94"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 일기상태 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량\n","0 2 62.47 0.00 8.6 51.0 0.0 10.4 13.6\n","1 2 44.53 2.41 8.1 44.6 0.0 14.7 13.2\n","2 2 38.88 0.35 10.8 37.0 0.0 10.7 13.6\n","3 4 37.18 7.35 6.7 99.0 4.0 9.4 2.4\n","4 2 35.82 2.41 3.5 54.6 0.0 13.4 12.2"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
일기상태미세먼지운량최고기온습도강수량시정거리일사량
0262.470.008.651.00.010.413.6
1244.532.418.144.60.014.713.2
2238.880.3510.837.00.010.713.6
3437.187.356.799.04.09.42.4
4235.822.413.554.60.013.412.2
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":13}]},{"cell_type":"code","source":["# train-validation 분리\n","\n","from sklearn.model_selection import train_test_split\n","\n","train_x, test_x, train_y, test_y = train_test_split(input_data, target, test_size=0.3, random_state = 1)"],"metadata":{"id":"Q2FdrxsTDPFn"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train_x.shape, test_x.shape, train_y.shape, test_y.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9eGswuSTDWjz","executionInfo":{"status":"ok","timestamp":1658458212226,"user_tz":-540,"elapsed":14,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"abd1f8f0-0894-4d34-d05e-57db82a7c28a"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((948, 8), (407, 8), (948,), (407,))"]},"metadata":{},"execution_count":15}]},{"cell_type":"code","source":["from sklearn.linear_model import LinearRegression #회귀분석모델\n","from xgboost import XGBRegressor #XGB 모델\n","\n","from sklearn.metrics import r2_score"],"metadata":{"id":"XqHoj0iDDYG3"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# model = LinearRegression()\n","model = XGBRegressor(max_depth=3, random_state=3)"],"metadata":{"id":"WEFq4XUFDbMb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model.fit(train_x, train_y) "],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mkkFa_TyDapq","executionInfo":{"status":"ok","timestamp":1658458212553,"user_tz":-540,"elapsed":5,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"f71d3852-56bd-45ff-d9f3-c8a2f8e575d6"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[02:50:11] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"]},{"output_type":"execute_result","data":{"text/plain":["XGBRegressor(random_state=3)"]},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["pred_y=model.predict(test_x)\n","r2=r2_score(test_y, pred_y)\n","\n","r2"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"KwYIJ9XTED5-","executionInfo":{"status":"ok","timestamp":1658458212554,"user_tz":-540,"elapsed":5,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"83fb8265-783a-456f-b526-61a46d4a93dc"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.8462919983806847"]},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":[""],"metadata":{"id":"FPu0ftVe3mcG"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["https://colab.research.google.com/drive/1Evz1DF1esLSjnOoK-XXuky1U3K3sWupi"],"metadata":{"id":"Gv2os9Md3mnb"},"execution_count":null,"outputs":[]}]}