{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Untitled22.ipynb의 사본","provenance":[{"file_id":"1pVTE1tidKD-A_m88Eq3OeZFSmi-tiUD3","timestamp":1658459249897}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"t71SAVRfAJfQ"},"outputs":[],"source":["# 패키지 <- pandas \"파이썬에서 엑셀처럼 데이터를 다루게 도와주는 툴\"\n","\n","import pandas as pd #"]},{"cell_type":"code","source":[""],"metadata":{"id":"17fa_kRB3lMg"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["dataset = pd.read_csv(\"발전량데이터.csv\", encoding=\"cp949\")"],"metadata":{"id":"mS1fPO7MArdY"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["dataset.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"Q7xxHq6pA3tD","executionInfo":{"status":"ok","timestamp":1658458211533,"user_tz":-540,"elapsed":18,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"a7eb599b-5c86-477b-b691-ebd4c67d2319"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 일자 일기상태 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량 발전량\n","0 2017-01-26 맑음 62.47 0.00 8.6 51.0 0.0 10.4 13.6 2230.4\n","1 2017-01-27 맑음 44.53 2.41 8.1 44.6 0.0 14.7 13.2 2326.8\n","2 2017-01-28 맑음 38.88 0.35 10.8 37.0 0.0 10.7 13.6 2477.7\n","3 2017-01-29 흐림 37.18 7.35 6.7 99.0 4.0 9.4 2.4 241.4\n","4 2017-01-30 맑음 35.82 2.41 3.5 54.6 0.0 13.4 12.2 2768.2"],"text/html":["\n","
\n","
\n","
\n","\n","
\n"," \n"," \n"," | \n"," 일자 | \n"," 일기상태 | \n"," 미세먼지 | \n"," 운량 | \n"," 최고기온 | \n"," 습도 | \n"," 강수량 | \n"," 시정거리 | \n"," 일사량 | \n"," 발전량 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 2017-01-26 | \n"," 맑음 | \n"," 62.47 | \n"," 0.00 | \n"," 8.6 | \n"," 51.0 | \n"," 0.0 | \n"," 10.4 | \n"," 13.6 | \n"," 2230.4 | \n","
\n"," \n"," 1 | \n"," 2017-01-27 | \n"," 맑음 | \n"," 44.53 | \n"," 2.41 | \n"," 8.1 | \n"," 44.6 | \n"," 0.0 | \n"," 14.7 | \n"," 13.2 | \n"," 2326.8 | \n","
\n"," \n"," 2 | \n"," 2017-01-28 | \n"," 맑음 | \n"," 38.88 | \n"," 0.35 | \n"," 10.8 | \n"," 37.0 | \n"," 0.0 | \n"," 10.7 | \n"," 13.6 | \n"," 2477.7 | \n","
\n"," \n"," 3 | \n"," 2017-01-29 | \n"," 흐림 | \n"," 37.18 | \n"," 7.35 | \n"," 6.7 | \n"," 99.0 | \n"," 4.0 | \n"," 9.4 | \n"," 2.4 | \n"," 241.4 | \n","
\n"," \n"," 4 | \n"," 2017-01-30 | \n"," 맑음 | \n"," 35.82 | \n"," 2.41 | \n"," 3.5 | \n"," 54.6 | \n"," 0.0 | \n"," 13.4 | \n"," 12.2 | \n"," 2768.2 | \n","
\n"," \n","
\n","
\n","
\n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":["dataset.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CnZlqTLECTM2","executionInfo":{"status":"ok","timestamp":1658458211534,"user_tz":-540,"elapsed":14,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"b118e7dc-ba50-4671-c144-5420279bd80c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 1355 entries, 0 to 1354\n","Data columns (total 10 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 일자 1355 non-null object \n"," 1 일기상태 1355 non-null object \n"," 2 미세먼지 1355 non-null float64\n"," 3 운량 1355 non-null float64\n"," 4 최고기온 1355 non-null float64\n"," 5 습도 1355 non-null float64\n"," 6 강수량 1355 non-null float64\n"," 7 시정거리 1355 non-null float64\n"," 8 일사량 1355 non-null float64\n"," 9 발전량 1355 non-null float64\n","dtypes: float64(8), object(2)\n","memory usage: 106.0+ KB\n"]}]},{"cell_type":"code","source":["nu_dataset = dataset.select_dtypes(exclude=\"object\")\n","nu_dataset.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"4V-hzTbyCds6","executionInfo":{"status":"ok","timestamp":1658458211534,"user_tz":-540,"elapsed":11,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"6391d52a-2d17-4f3a-cb58-59d0c3e537cb"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량 발전량\n","0 62.47 0.00 8.6 51.0 0.0 10.4 13.6 2230.4\n","1 44.53 2.41 8.1 44.6 0.0 14.7 13.2 2326.8\n","2 38.88 0.35 10.8 37.0 0.0 10.7 13.6 2477.7\n","3 37.18 7.35 6.7 99.0 4.0 9.4 2.4 241.4\n","4 35.82 2.41 3.5 54.6 0.0 13.4 12.2 2768.2"],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," | \n"," 미세먼지 | \n"," 운량 | \n"," 최고기온 | \n"," 습도 | \n"," 강수량 | \n"," 시정거리 | \n"," 일사량 | \n"," 발전량 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 62.47 | \n"," 0.00 | \n"," 8.6 | \n"," 51.0 | \n"," 0.0 | \n"," 10.4 | \n"," 13.6 | \n"," 2230.4 | \n","
\n"," \n"," 1 | \n"," 44.53 | \n"," 2.41 | \n"," 8.1 | \n"," 44.6 | \n"," 0.0 | \n"," 14.7 | \n"," 13.2 | \n"," 2326.8 | \n","
\n"," \n"," 2 | \n"," 38.88 | \n"," 0.35 | \n"," 10.8 | \n"," 37.0 | \n"," 0.0 | \n"," 10.7 | \n"," 13.6 | \n"," 2477.7 | \n","
\n"," \n"," 3 | \n"," 37.18 | \n"," 7.35 | \n"," 6.7 | \n"," 99.0 | \n"," 4.0 | \n"," 9.4 | \n"," 2.4 | \n"," 241.4 | \n","
\n"," \n"," 4 | \n"," 35.82 | \n"," 2.41 | \n"," 3.5 | \n"," 54.6 | \n"," 0.0 | \n"," 13.4 | \n"," 12.2 | \n"," 2768.2 | \n","
\n"," \n","
\n","
\n","
\n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["nu_dataset.corr() #상관관계 분석."],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":300},"id":"ilQ59KJdCdwZ","executionInfo":{"status":"ok","timestamp":1658458211873,"user_tz":-540,"elapsed":348,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"563c5db5-ef4b-43c9-bae1-d4d53c543e87"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량 \\\n","미세먼지 1.000000 -0.305240 0.103233 -0.206933 -0.183921 -0.096737 0.311603 \n","운량 -0.305240 1.000000 0.007060 0.697639 0.380827 -0.077140 -0.692041 \n","최고기온 0.103233 0.007060 1.000000 0.330682 0.026661 0.297134 0.454338 \n","습도 -0.206933 0.697639 0.330682 1.000000 0.401392 -0.021677 -0.503302 \n","강수량 -0.183921 0.380827 0.026661 0.401392 1.000000 -0.151036 -0.354614 \n","시정거리 -0.096737 -0.077140 0.297134 -0.021677 -0.151036 1.000000 0.257025 \n","일사량 0.311603 -0.692041 0.454338 -0.503302 -0.354614 0.257025 1.000000 \n","발전량 0.272666 -0.750304 0.248556 -0.608955 -0.350552 0.179736 0.901092 \n","\n"," 발전량 \n","미세먼지 0.272666 \n","운량 -0.750304 \n","최고기온 0.248556 \n","습도 -0.608955 \n","강수량 -0.350552 \n","시정거리 0.179736 \n","일사량 0.901092 \n","발전량 1.000000 "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," | \n"," 미세먼지 | \n"," 운량 | \n"," 최고기온 | \n"," 습도 | \n"," 강수량 | \n"," 시정거리 | \n"," 일사량 | \n"," 발전량 | \n","
\n"," \n"," \n"," \n"," 미세먼지 | \n"," 1.000000 | \n"," -0.305240 | \n"," 0.103233 | \n"," -0.206933 | \n"," -0.183921 | \n"," -0.096737 | \n"," 0.311603 | \n"," 0.272666 | \n","
\n"," \n"," 운량 | \n"," -0.305240 | \n"," 1.000000 | \n"," 0.007060 | \n"," 0.697639 | \n"," 0.380827 | \n"," -0.077140 | \n"," -0.692041 | \n"," -0.750304 | \n","
\n"," \n"," 최고기온 | \n"," 0.103233 | \n"," 0.007060 | \n"," 1.000000 | \n"," 0.330682 | \n"," 0.026661 | \n"," 0.297134 | \n"," 0.454338 | \n"," 0.248556 | \n","
\n"," \n"," 습도 | \n"," -0.206933 | \n"," 0.697639 | \n"," 0.330682 | \n"," 1.000000 | \n"," 0.401392 | \n"," -0.021677 | \n"," -0.503302 | \n"," -0.608955 | \n","
\n"," \n"," 강수량 | \n"," -0.183921 | \n"," 0.380827 | \n"," 0.026661 | \n"," 0.401392 | \n"," 1.000000 | \n"," -0.151036 | \n"," -0.354614 | \n"," -0.350552 | \n","
\n"," \n"," 시정거리 | \n"," -0.096737 | \n"," -0.077140 | \n"," 0.297134 | \n"," -0.021677 | \n"," -0.151036 | \n"," 1.000000 | \n"," 0.257025 | \n"," 0.179736 | \n","
\n"," \n"," 일사량 | \n"," 0.311603 | \n"," -0.692041 | \n"," 0.454338 | \n"," -0.503302 | \n"," -0.354614 | \n"," 0.257025 | \n"," 1.000000 | \n"," 0.901092 | \n","
\n"," \n"," 발전량 | \n"," 0.272666 | \n"," -0.750304 | \n"," 0.248556 | \n"," -0.608955 | \n"," -0.350552 | \n"," 0.179736 | \n"," 0.901092 | \n"," 1.000000 | \n","
\n"," \n","
\n","
\n","
\n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":8}]},{"cell_type":"code","source":["#범주형 변수 데이터 레이블링 진행 (모델에 input 시키기 위함)\n","\n","from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()"],"metadata":{"id":"7nS5jbjECjwF"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["dataset[\"일기상태\"] = le.fit_transform(dataset[\"일기상태\"])"],"metadata":{"id":"tzByMAOECpV5"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# 독립변수와 종속변수 분리\n","\n","v_name = ['일기상태', '미세먼지', '운량', '최고기온', '습도', '강수량', '시정거리', '일사량'] #독립변수 8개\n","target_name = '발전량' # 1개 \n","\n","target = dataset[target_name]\n","input_data = dataset[v_name]"],"metadata":{"id":"TmFcdl_AC2ur"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["target.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fewteT8HDCiY","executionInfo":{"status":"ok","timestamp":1658458212224,"user_tz":-540,"elapsed":23,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"a8dcbbf8-de2f-40fe-f6aa-27effef04447"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 2230.4\n","1 2326.8\n","2 2477.7\n","3 241.4\n","4 2768.2\n","Name: 발전량, dtype: float64"]},"metadata":{},"execution_count":12}]},{"cell_type":"code","source":["input_data.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"niVqfI3mDNcM","executionInfo":{"status":"ok","timestamp":1658458212225,"user_tz":-540,"elapsed":19,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"d89a13c4-b1dc-4fb8-f8da-c43161ca3a94"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 일기상태 미세먼지 운량 최고기온 습도 강수량 시정거리 일사량\n","0 2 62.47 0.00 8.6 51.0 0.0 10.4 13.6\n","1 2 44.53 2.41 8.1 44.6 0.0 14.7 13.2\n","2 2 38.88 0.35 10.8 37.0 0.0 10.7 13.6\n","3 4 37.18 7.35 6.7 99.0 4.0 9.4 2.4\n","4 2 35.82 2.41 3.5 54.6 0.0 13.4 12.2"],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," | \n"," 일기상태 | \n"," 미세먼지 | \n"," 운량 | \n"," 최고기온 | \n"," 습도 | \n"," 강수량 | \n"," 시정거리 | \n"," 일사량 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 2 | \n"," 62.47 | \n"," 0.00 | \n"," 8.6 | \n"," 51.0 | \n"," 0.0 | \n"," 10.4 | \n"," 13.6 | \n","
\n"," \n"," 1 | \n"," 2 | \n"," 44.53 | \n"," 2.41 | \n"," 8.1 | \n"," 44.6 | \n"," 0.0 | \n"," 14.7 | \n"," 13.2 | \n","
\n"," \n"," 2 | \n"," 2 | \n"," 38.88 | \n"," 0.35 | \n"," 10.8 | \n"," 37.0 | \n"," 0.0 | \n"," 10.7 | \n"," 13.6 | \n","
\n"," \n"," 3 | \n"," 4 | \n"," 37.18 | \n"," 7.35 | \n"," 6.7 | \n"," 99.0 | \n"," 4.0 | \n"," 9.4 | \n"," 2.4 | \n","
\n"," \n"," 4 | \n"," 2 | \n"," 35.82 | \n"," 2.41 | \n"," 3.5 | \n"," 54.6 | \n"," 0.0 | \n"," 13.4 | \n"," 12.2 | \n","
\n"," \n","
\n","
\n","
\n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":13}]},{"cell_type":"code","source":["# train-validation 분리\n","\n","from sklearn.model_selection import train_test_split\n","\n","train_x, test_x, train_y, test_y = train_test_split(input_data, target, test_size=0.3, random_state = 1)"],"metadata":{"id":"Q2FdrxsTDPFn"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train_x.shape, test_x.shape, train_y.shape, test_y.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9eGswuSTDWjz","executionInfo":{"status":"ok","timestamp":1658458212226,"user_tz":-540,"elapsed":14,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"abd1f8f0-0894-4d34-d05e-57db82a7c28a"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((948, 8), (407, 8), (948,), (407,))"]},"metadata":{},"execution_count":15}]},{"cell_type":"code","source":["from sklearn.linear_model import LinearRegression #회귀분석모델\n","from xgboost import XGBRegressor #XGB 모델\n","\n","from sklearn.metrics import r2_score"],"metadata":{"id":"XqHoj0iDDYG3"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# model = LinearRegression()\n","model = XGBRegressor(max_depth=3, random_state=3)"],"metadata":{"id":"WEFq4XUFDbMb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model.fit(train_x, train_y) "],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mkkFa_TyDapq","executionInfo":{"status":"ok","timestamp":1658458212553,"user_tz":-540,"elapsed":5,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"f71d3852-56bd-45ff-d9f3-c8a2f8e575d6"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[02:50:11] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"]},{"output_type":"execute_result","data":{"text/plain":["XGBRegressor(random_state=3)"]},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["pred_y=model.predict(test_x)\n","r2=r2_score(test_y, pred_y)\n","\n","r2"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"KwYIJ9XTED5-","executionInfo":{"status":"ok","timestamp":1658458212554,"user_tz":-540,"elapsed":5,"user":{"displayName":"최우석","userId":"07578836508955032439"}},"outputId":"83fb8265-783a-456f-b526-61a46d4a93dc"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.8462919983806847"]},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":[""],"metadata":{"id":"FPu0ftVe3mcG"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["https://colab.research.google.com/drive/1Evz1DF1esLSjnOoK-XXuky1U3K3sWupi"],"metadata":{"id":"Gv2os9Md3mnb"},"execution_count":null,"outputs":[]}]}