{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "파일 다운로드 완료\n", "====================\n", "\n", "데이터셋: 사용자이탈예측\n", "파일경로: data\\class.csv\n", "\n", "====================\n", "파일 다운로드 완료\n", "====================\n", "\n", "데이터셋: 사용자이탈예측\n", "파일경로: data\\event.csv\n", "\n", "====================\n", "파일 다운로드 완료\n", "====================\n", "\n", "데이터셋: 사용자이탈예측\n", "파일경로: data\\logs.csv\n", "\n", "====================\n", "파일 다운로드 완료\n", "====================\n", "\n", "데이터셋: 사용자이탈예측\n", "파일경로: data\\user_info.csv\n", "\n", "====================\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "!pip install --upgrade mySUNI -q\n", "from mySUNI import cds\n", "cds.download_data('사용자이탈예측')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
loguseriddate
0L00000049012330AS0093732018-04-01
1L00000049012331AS0153152018-04-01
2L00000049012332AS0408412018-04-01
3L00000049012333AS0465942018-04-01
4L00000049012334AS0732852018-04-01
............
197423L00000049209753TS9777032019-03-31
197424L00000049209754TS9795502019-03-31
197425L00000049209755TS9952992019-03-31
197426L00000049209756TS9958532019-03-31
197427L00000049209757TS9990792019-03-31
\n", "

197428 rows × 3 columns

\n", "
" ], "text/plain": [ " log userid date\n", "0 L00000049012330 AS009373 2018-04-01\n", "1 L00000049012331 AS015315 2018-04-01\n", "2 L00000049012332 AS040841 2018-04-01\n", "3 L00000049012333 AS046594 2018-04-01\n", "4 L00000049012334 AS073285 2018-04-01\n", "... ... ... ...\n", "197423 L00000049209753 TS977703 2019-03-31\n", "197424 L00000049209754 TS979550 2019-03-31\n", "197425 L00000049209755 TS995299 2019-03-31\n", "197426 L00000049209756 TS995853 2019-03-31\n", "197427 L00000049209757 TS999079 2019-03-31\n", "\n", "[197428 rows x 3 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# logs.csv 파일을 읽으세요.\n", "df_logs = pd.read_csv('data/logs.csv')\n", "df_logs" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deleted
0OA832399C01F2015-05-01 0:00NaNCA10
1PL270116C01M2015-05-01 0:00NaNCA10
2OA974876C01M2015-05-01 0:00NaNCA10
3HD024127C01F2015-05-01 0:00NaNCA1-1
4HD661448C03F2015-05-01 0:00NaNCA1-1
........................
4187HD676663C01M2019-03-14 0:00NaNCA10
4188HD246549C01F2019-03-14 0:00NaNCA10
4189GD037007C03M2019-03-14 0:00NaNCA10
4190OA953150C01M2019-03-14 0:00NaNCA1-1
4191IK692635C02F2019-03-15 0:00NaNCA10
\n", "

4192 rows × 7 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted\n", "0 OA832399 C01 F 2015-05-01 0:00 NaN CA1 0\n", "1 PL270116 C01 M 2015-05-01 0:00 NaN CA1 0\n", "2 OA974876 C01 M 2015-05-01 0:00 NaN CA1 0\n", "3 HD024127 C01 F 2015-05-01 0:00 NaN CA1 -1\n", "4 HD661448 C03 F 2015-05-01 0:00 NaN CA1 -1\n", "... ... ... ... ... ... ... ...\n", "4187 HD676663 C01 M 2019-03-14 0:00 NaN CA1 0\n", "4188 HD246549 C01 F 2019-03-14 0:00 NaN CA1 0\n", "4189 GD037007 C03 M 2019-03-14 0:00 NaN CA1 0\n", "4190 OA953150 C01 M 2019-03-14 0:00 NaN CA1 -1\n", "4191 IK692635 C02 F 2019-03-15 0:00 NaN CA1 0\n", "\n", "[4192 rows x 7 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user_info.csv 파일을 읽으세요.\n", "df_user_info = pd.read_csv('data/user_info.csv')\n", "df_user_info" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
classclass_nameprice
0C01all10000
1C02day7000
2C03night5000
\n", "
" ], "text/plain": [ " class class_name price\n", "0 C01 all 10000\n", "1 C02 day 7000\n", "2 C03 night 5000" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# class.csv 파일을 읽으세요.\n", "df_class = pd.read_csv('data/class.csv')\n", "df_class" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_type\\tevent_name
0CA1\\tNone
1CA2\\tSale
2CA3\\tFree
\n", "
" ], "text/plain": [ " event_type\\tevent_name\n", "0 CA1\\tNone\n", "1 CA2\\tSale\n", "2 CA3\\tFree" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# event.csv 파일을 읽으세요.\n", "df_event = pd.read_csv('data/event.csv')\n", "df_event" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_typeevent_name
0CA1None
1CA2Sale
2CA3Free
\n", "
" ], "text/plain": [ " event_type event_name\n", "0 CA1 None\n", "1 CA2 Sale\n", "2 CA3 Free" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# event.csv 파일을 다시 읽으세요.\n", "# 구분자는 \\t 입니다.\n", "df_event = pd.read_csv('data/event.csv', sep='\\t')\n", "df_event" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_nameprice
0OA832399C01F2015-05-01 0:00NaNCA10all10000
1PL270116C01M2015-05-01 0:00NaNCA10all10000
2OA974876C01M2015-05-01 0:00NaNCA10all10000
3HD024127C01F2015-05-01 0:00NaNCA1-1all10000
4IK271057C01M2015-05-01 0:00NaNCA10all10000
..............................
4187IK947853C02F2019-03-09 0:00NaNCA10day7000
4188OA301090C02F2019-03-12 0:00NaNCA10day7000
4189OA643695C02F2019-03-12 0:00NaNCA10day7000
4190HD522396C02F2019-03-13 0:00NaNCA10day7000
4191IK692635C02F2019-03-15 0:00NaNCA10day7000
\n", "

4192 rows × 9 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 0:00 NaN CA1 0 \n", "1 PL270116 C01 M 2015-05-01 0:00 NaN CA1 0 \n", "2 OA974876 C01 M 2015-05-01 0:00 NaN CA1 0 \n", "3 HD024127 C01 F 2015-05-01 0:00 NaN CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 0:00 NaN CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK947853 C02 F 2019-03-09 0:00 NaN CA1 0 \n", "4188 OA301090 C02 F 2019-03-12 0:00 NaN CA1 0 \n", "4189 OA643695 C02 F 2019-03-12 0:00 NaN CA1 0 \n", "4190 HD522396 C02 F 2019-03-13 0:00 NaN CA1 0 \n", "4191 IK692635 C02 F 2019-03-15 0:00 NaN CA1 0 \n", "\n", " class_name price \n", "0 all 10000 \n", "1 all 10000 \n", "2 all 10000 \n", "3 all 10000 \n", "4 all 10000 \n", "... ... ... \n", "4187 day 7000 \n", "4188 day 7000 \n", "4189 day 7000 \n", "4190 day 7000 \n", "4191 day 7000 \n", "\n", "[4192 rows x 9 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# 파편화된 데이터를 병합 합니다.\n", "# 1. df_user_info와 df_class을 class를 기준으로 병합하세요. \n", "user = pd.merge(df_user_info, df_class, on='class')\n", "user" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_name
0OA832399C01F2015-05-01 0:00NaNCA10all10000None
1PL270116C01M2015-05-01 0:00NaNCA10all10000None
2OA974876C01M2015-05-01 0:00NaNCA10all10000None
3HD024127C01F2015-05-01 0:00NaNCA1-1all10000None
4IK271057C01M2015-05-01 0:00NaNCA10all10000None
.................................
4187IK562610C02F2018-12-13 0:002019-02-28 0:00CA31day7000Free
4188HI599354C02M2018-12-13 0:00NaNCA30day7000Free
4189GD796859C02F2018-12-14 0:00NaNCA30day7000Free
4190GD082270C02F2018-12-14 0:002019-02-28 0:00CA31day7000Free
4191OA426023C02F2018-12-14 0:00NaNCA30day7000Free
\n", "

4192 rows × 10 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type \\\n", "0 OA832399 C01 F 2015-05-01 0:00 NaN CA1 \n", "1 PL270116 C01 M 2015-05-01 0:00 NaN CA1 \n", "2 OA974876 C01 M 2015-05-01 0:00 NaN CA1 \n", "3 HD024127 C01 F 2015-05-01 0:00 NaN CA1 \n", "4 IK271057 C01 M 2015-05-01 0:00 NaN CA1 \n", "... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 0:00 2019-02-28 0:00 CA3 \n", "4188 HI599354 C02 M 2018-12-13 0:00 NaN CA3 \n", "4189 GD796859 C02 F 2018-12-14 0:00 NaN CA3 \n", "4190 GD082270 C02 F 2018-12-14 0:00 2019-02-28 0:00 CA3 \n", "4191 OA426023 C02 F 2018-12-14 0:00 NaN CA3 \n", "\n", " is_deleted class_name price event_name \n", "0 0 all 10000 None \n", "1 0 all 10000 None \n", "2 0 all 10000 None \n", "3 -1 all 10000 None \n", "4 0 all 10000 None \n", "... ... ... ... ... \n", "4187 1 day 7000 Free \n", "4188 0 day 7000 Free \n", "4189 0 day 7000 Free \n", "4190 1 day 7000 Free \n", "4191 0 day 7000 Free \n", "\n", "[4192 rows x 10 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# 2. user와 df_event를 event_type을 기준으로 병합하세요.\n", "user = pd.merge(user, df_event, on='event_type')\n", "user" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 4192 entries, 0 to 4191\n", "Data columns (total 10 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 userid 4192 non-null object\n", " 1 class 4192 non-null object\n", " 2 gender 4192 non-null object\n", " 3 start_date 4192 non-null object\n", " 4 end_date 1350 non-null object\n", " 5 event_type 4192 non-null object\n", " 6 is_deleted 4192 non-null int64 \n", " 7 class_name 4192 non-null object\n", " 8 price 4192 non-null int64 \n", " 9 event_name 4192 non-null object\n", "dtypes: int64(2), object(8)\n", "memory usage: 360.2+ KB\n" ] } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임의 정보를 조회하세요\n", "user.info()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "userid 0\n", "class 0\n", "gender 0\n", "start_date 0\n", "end_date 2842\n", "event_type 0\n", "is_deleted 0\n", "class_name 0\n", "price 0\n", "event_name 0\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# 결측치를 조회하세요.\n", "user.isna().sum()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_name
0OA832399C01F2015-05-01 0:00NaNCA10all10000None
1PL270116C01M2015-05-01 0:00NaNCA10all10000None
2OA974876C01M2015-05-01 0:00NaNCA10all10000None
3HD024127C01F2015-05-01 0:00NaNCA1-1all10000None
4IK271057C01M2015-05-01 0:00NaNCA10all10000None
.................................
4184AS310166C02F2018-12-12 0:00NaNCA30day7000Free
4186HD071461C02F2018-12-13 0:00NaNCA30day7000Free
4188HI599354C02M2018-12-13 0:00NaNCA30day7000Free
4189GD796859C02F2018-12-14 0:00NaNCA30day7000Free
4191OA426023C02F2018-12-14 0:00NaNCA30day7000Free
\n", "

2842 rows × 10 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 0:00 NaN CA1 0 \n", "1 PL270116 C01 M 2015-05-01 0:00 NaN CA1 0 \n", "2 OA974876 C01 M 2015-05-01 0:00 NaN CA1 0 \n", "3 HD024127 C01 F 2015-05-01 0:00 NaN CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 0:00 NaN CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4184 AS310166 C02 F 2018-12-12 0:00 NaN CA3 0 \n", "4186 HD071461 C02 F 2018-12-13 0:00 NaN CA3 0 \n", "4188 HI599354 C02 M 2018-12-13 0:00 NaN CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 0:00 NaN CA3 0 \n", "4191 OA426023 C02 F 2018-12-14 0:00 NaN CA3 0 \n", "\n", " class_name price event_name \n", "0 all 10000 None \n", "1 all 10000 None \n", "2 all 10000 None \n", "3 all 10000 None \n", "4 all 10000 None \n", "... ... ... ... \n", "4184 day 7000 Free \n", "4186 day 7000 Free \n", "4188 day 7000 Free \n", "4189 day 7000 Free \n", "4191 day 7000 Free \n", "\n", "[2842 rows x 10 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# end_date컬럼이 결측치인 데이터만 선택하여 출력하세요\n", "user.loc[user['end_date'].isna(), :]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " 0 2131\n", "-1 711\n", "Name: is_deleted, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# end_date컬럼이 결측치인 데이터의 is_deleted 컬럼의 값별 개수를 카운트 하세요.\n", "user.loc[user['end_date'].isna(), 'is_deleted'].value_counts()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " 1 1013\n", "-1 337\n", "Name: is_deleted, dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# end_date컬럼이 결측치가 아닌 데이터의 is_deleted 컬럼의 값별 개수를 카운트 하세요.\n", "user.loc[~user['end_date'].isna(), 'is_deleted'].value_counts()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "class_name\n", "all 2045\n", "day 1019\n", "night 1128\n", "Name: userid, dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# class_name을 기준으로 그룹을 생성하고 userid의 개수를 조회하세요.\n", "user.groupby('class_name')['userid'].count()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "event_name\n", "Free 492\n", "None 3050\n", "Sale 650\n", "Name: userid, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# event_name을 기준으로 그룹을 생성하고 userid의 개수를 조회하세요.\n", "user.groupby('event_name')['userid'].count()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "gender\n", "F 1983\n", "M 2209\n", "Name: userid, dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# gender를 기준으로 그룹을 생성하고 userid의 개수를 조회하세요.\n", "user.groupby('gender')['userid'].count()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "is_deleted\n", "-1 1048\n", " 0 2131\n", " 1 1013\n", "Name: userid, dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# is_deleted를 기준으로 그룹을 생성하고 userid의 개수를 조회하세요.\n", "user.groupby('is_deleted')['userid'].count()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# 코드를 입력해 주세요\n", "# start_date 컬럼과 end_date 컬럼을 datetime 형태로 변환하세요\n", "user['start_date'] = pd.to_datetime(user['start_date'])\n", "user['end_date'] = pd.to_datetime(user['end_date'])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 4192 entries, 0 to 4191\n", "Data columns (total 10 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 userid 4192 non-null object \n", " 1 class 4192 non-null object \n", " 2 gender 4192 non-null object \n", " 3 start_date 4192 non-null datetime64[ns]\n", " 4 end_date 1350 non-null datetime64[ns]\n", " 5 event_type 4192 non-null object \n", " 6 is_deleted 4192 non-null int64 \n", " 7 class_name 4192 non-null object \n", " 8 price 4192 non-null int64 \n", " 9 event_name 4192 non-null object \n", "dtypes: datetime64[ns](2), int64(2), object(6)\n", "memory usage: 360.2+ KB\n" ] } ], "source": [ "# 코드를 입력해 주세요\n", "# datetime으로 변경을 확인하기 위해 정보를 조회하세요\n", "user.info()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_namestart_yearstart_monthstart_day
0OA832399C01F2015-05-01NaTCA10all10000None201551
1PL270116C01M2015-05-01NaTCA10all10000None201551
2OA974876C01M2015-05-01NaTCA10all10000None201551
3HD024127C01F2015-05-01NaTCA1-1all10000None201551
4IK271057C01M2015-05-01NaTCA10all10000None201551
..........................................
4187IK562610C02F2018-12-132019-02-28CA31day7000Free20181213
4188HI599354C02M2018-12-13NaTCA30day7000Free20181213
4189GD796859C02F2018-12-14NaTCA30day7000Free20181214
4190GD082270C02F2018-12-142019-02-28CA31day7000Free20181214
4191OA426023C02F2018-12-14NaTCA30day7000Free20181214
\n", "

4192 rows × 13 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 NaT CA1 0 \n", "1 PL270116 C01 M 2015-05-01 NaT CA1 0 \n", "2 OA974876 C01 M 2015-05-01 NaT CA1 0 \n", "3 HD024127 C01 F 2015-05-01 NaT CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 NaT CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 2019-02-28 CA3 1 \n", "4188 HI599354 C02 M 2018-12-13 NaT CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 NaT CA3 0 \n", "4190 GD082270 C02 F 2018-12-14 2019-02-28 CA3 1 \n", "4191 OA426023 C02 F 2018-12-14 NaT CA3 0 \n", "\n", " class_name price event_name start_year start_month start_day \n", "0 all 10000 None 2015 5 1 \n", "1 all 10000 None 2015 5 1 \n", "2 all 10000 None 2015 5 1 \n", "3 all 10000 None 2015 5 1 \n", "4 all 10000 None 2015 5 1 \n", "... ... ... ... ... ... ... \n", "4187 day 7000 Free 2018 12 13 \n", "4188 day 7000 Free 2018 12 13 \n", "4189 day 7000 Free 2018 12 14 \n", "4190 day 7000 Free 2018 12 14 \n", "4191 day 7000 Free 2018 12 14 \n", "\n", "[4192 rows x 13 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# start_date 컬럼을 기반으로 start_year, start_month, start_day 컬럼을 추가하세요\n", "# dt 사용\n", "user['start_year'] = user['start_date'].dt.year\n", "user['start_month'] = user['start_date'].dt.month\n", "user['start_day'] = user['start_date'].dt.day\n", "user" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_namestart_yearstart_monthstart_dayend_yearend_monthend_day
0OA832399C01F2015-05-01NaTCA10all10000None201551NaNNaNNaN
1PL270116C01M2015-05-01NaTCA10all10000None201551NaNNaNNaN
2OA974876C01M2015-05-01NaTCA10all10000None201551NaNNaNNaN
3HD024127C01F2015-05-01NaTCA1-1all10000None201551NaNNaNNaN
4IK271057C01M2015-05-01NaTCA10all10000None201551NaNNaNNaN
...................................................
4187IK562610C02F2018-12-132019-02-28CA31day7000Free201812132019.02.028.0
4188HI599354C02M2018-12-13NaTCA30day7000Free20181213NaNNaNNaN
4189GD796859C02F2018-12-14NaTCA30day7000Free20181214NaNNaNNaN
4190GD082270C02F2018-12-142019-02-28CA31day7000Free201812142019.02.028.0
4191OA426023C02F2018-12-14NaTCA30day7000Free20181214NaNNaNNaN
\n", "

4192 rows × 16 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 NaT CA1 0 \n", "1 PL270116 C01 M 2015-05-01 NaT CA1 0 \n", "2 OA974876 C01 M 2015-05-01 NaT CA1 0 \n", "3 HD024127 C01 F 2015-05-01 NaT CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 NaT CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 2019-02-28 CA3 1 \n", "4188 HI599354 C02 M 2018-12-13 NaT CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 NaT CA3 0 \n", "4190 GD082270 C02 F 2018-12-14 2019-02-28 CA3 1 \n", "4191 OA426023 C02 F 2018-12-14 NaT CA3 0 \n", "\n", " class_name price event_name start_year start_month start_day \\\n", "0 all 10000 None 2015 5 1 \n", "1 all 10000 None 2015 5 1 \n", "2 all 10000 None 2015 5 1 \n", "3 all 10000 None 2015 5 1 \n", "4 all 10000 None 2015 5 1 \n", "... ... ... ... ... ... ... \n", "4187 day 7000 Free 2018 12 13 \n", "4188 day 7000 Free 2018 12 13 \n", "4189 day 7000 Free 2018 12 14 \n", "4190 day 7000 Free 2018 12 14 \n", "4191 day 7000 Free 2018 12 14 \n", "\n", " end_year end_month end_day \n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "... ... ... ... \n", "4187 2019.0 2.0 28.0 \n", "4188 NaN NaN NaN \n", "4189 NaN NaN NaN \n", "4190 2019.0 2.0 28.0 \n", "4191 NaN NaN NaN \n", "\n", "[4192 rows x 16 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# end_date 컬럼을 기반으로 end_year, end_month, end_day 컬럼을 추가하세요\n", "# dt 사용\n", "user['end_year'] = user['end_date'].dt.year\n", "user['end_month'] = user['end_date'].dt.month\n", "user['end_day'] = user['end_date'].dt.day\n", "user" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "start_year start_month\n", "2015 5 62\n", " 6 69\n", " 7 53\n", " 8 53\n", " 9 71\n", " 10 54\n", " 11 63\n", " 12 67\n", "2016 1 56\n", " 2 48\n", " 3 55\n", " 4 57\n", " 5 56\n", " 6 76\n", " 7 69\n", " 8 77\n", " 9 75\n", " 10 71\n", " 11 65\n", " 12 76\n", "2017 1 75\n", " 2 74\n", " 3 72\n", " 4 99\n", " 5 105\n", " 6 95\n", " 7 100\n", " 8 122\n", " 9 115\n", " 10 98\n", " 11 108\n", " 12 160\n", "2018 1 122\n", " 2 104\n", " 3 94\n", " 4 175\n", " 5 193\n", " 6 166\n", " 7 135\n", " 8 102\n", " 9 88\n", " 10 66\n", " 11 65\n", " 12 157\n", "2019 1 90\n", " 2 74\n", " 3 65\n", "Name: userid, dtype: int64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# start_year와 start_month 컬럼을 이용하여 월별 신규 회원의 수를 조회하세요\n", "user.groupby(['start_year', 'start_month'])['userid'].count()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 코드를 입력해 주세요\n", "# 해당 결과를 시각화 하세요\n", "user.groupby(['start_year', 'start_month'])['userid'].count().plot()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "end_year end_month\n", "2018.0 4.0 121\n", " 5.0 125\n", " 6.0 106\n", " 7.0 102\n", " 8.0 113\n", " 9.0 104\n", " 10.0 90\n", " 11.0 89\n", " 12.0 111\n", "2019.0 1.0 129\n", " 2.0 149\n", " 3.0 111\n", "Name: userid, dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# end_year와 end_month 컬럼을 이용하여 월별 탈퇴 회원의 수를 조회하세요\n", "user.groupby(['end_year', 'end_month'])['userid'].count()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 코드를 입력해 주세요\n", "# 해당 결과를 시각화 하세요\n", "user.groupby(['end_year', 'end_month'])['userid'].count().plot()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "start_year start_month\n", "2018 5 193\n", " 4 175\n", " 6 166\n", "2017 12 160\n", "2018 12 157\n", "Name: userid, dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# 신규 가입이 가장 많은 상위 5개의 달을 조회하세요.\n", "user.groupby(['start_year', 'start_month'])['userid'].count().sort_values(ascending=False).head()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_namestart_yearstart_monthstart_dayend_yearend_monthend_day
3204HD805503C01M2018-05-01NaTCA2-1all10000Sale201851NaNNaNNaN
3205IK000432C01F2018-05-012019-03-31CA21all10000Sale2018512019.03.031.0
3206AS247467C01M2018-05-012018-11-30CA2-1all10000Sale2018512018.011.030.0
3207OA777947C01M2018-05-012018-12-31CA2-1all10000Sale2018512018.012.031.0
3208HI630247C01M2018-05-01NaTCA20all10000Sale201851NaNNaNNaN
...................................................
4153PL850297C02F2018-04-152018-05-31CA31day7000Free20184152018.05.031.0
4154HD104614C02F2018-04-152019-02-28CA31day7000Free20184152019.02.028.0
4155AS628722C02F2018-04-152019-02-28CA31day7000Free20184152019.02.028.0
4156GD716223C02F2018-04-15NaTCA30day7000Free2018415NaNNaNNaN
4157HD981197C02F2018-04-152018-12-31CA31day7000Free20184152018.012.031.0
\n", "

534 rows × 16 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "3204 HD805503 C01 M 2018-05-01 NaT CA2 -1 \n", "3205 IK000432 C01 F 2018-05-01 2019-03-31 CA2 1 \n", "3206 AS247467 C01 M 2018-05-01 2018-11-30 CA2 -1 \n", "3207 OA777947 C01 M 2018-05-01 2018-12-31 CA2 -1 \n", "3208 HI630247 C01 M 2018-05-01 NaT CA2 0 \n", "... ... ... ... ... ... ... ... \n", "4153 PL850297 C02 F 2018-04-15 2018-05-31 CA3 1 \n", "4154 HD104614 C02 F 2018-04-15 2019-02-28 CA3 1 \n", "4155 AS628722 C02 F 2018-04-15 2019-02-28 CA3 1 \n", "4156 GD716223 C02 F 2018-04-15 NaT CA3 0 \n", "4157 HD981197 C02 F 2018-04-15 2018-12-31 CA3 1 \n", "\n", " class_name price event_name start_year start_month start_day \\\n", "3204 all 10000 Sale 2018 5 1 \n", "3205 all 10000 Sale 2018 5 1 \n", "3206 all 10000 Sale 2018 5 1 \n", "3207 all 10000 Sale 2018 5 1 \n", "3208 all 10000 Sale 2018 5 1 \n", "... ... ... ... ... ... ... \n", "4153 day 7000 Free 2018 4 15 \n", "4154 day 7000 Free 2018 4 15 \n", "4155 day 7000 Free 2018 4 15 \n", "4156 day 7000 Free 2018 4 15 \n", "4157 day 7000 Free 2018 4 15 \n", "\n", " end_year end_month end_day \n", "3204 NaN NaN NaN \n", "3205 2019.0 3.0 31.0 \n", "3206 2018.0 11.0 30.0 \n", "3207 2018.0 12.0 31.0 \n", "3208 NaN NaN NaN \n", "... ... ... ... \n", "4153 2018.0 5.0 31.0 \n", "4154 2019.0 2.0 28.0 \n", "4155 2019.0 2.0 28.0 \n", "4156 NaN NaN NaN \n", "4157 2018.0 12.0 31.0 \n", "\n", "[534 rows x 16 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# 2018년 4~6월 데이터만 추출하세요. 결과를 top3 변수에 저장하세요.\n", "top3 = user.loc[ (user['start_year'] == 2018) & (user['start_month'].isin([4,5,6])), : ]\n", "top3" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sale 359\n", "Free 175\n", "Name: event_name, dtype: int64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# top3 데이터 프레임의 event_name 컬럼의 고유값별 개수를 조회하세요\n", "top3['event_name'].value_counts()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "all 272\n", "night 140\n", "day 122\n", "Name: class_name, dtype: int64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# top3 데이터 프레임의 class_name 컬럼의 고유값별 개수를 조회하세요\n", "top3['class_name'].value_counts()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
loguseriddate
0L00000049012330AS0093732018-04-01
1L00000049012331AS0153152018-04-01
2L00000049012332AS0408412018-04-01
3L00000049012333AS0465942018-04-01
4L00000049012334AS0732852018-04-01
............
197423L00000049209753TS9777032019-03-31
197424L00000049209754TS9795502019-03-31
197425L00000049209755TS9952992019-03-31
197426L00000049209756TS9958532019-03-31
197427L00000049209757TS9990792019-03-31
\n", "

197428 rows × 3 columns

\n", "
" ], "text/plain": [ " log userid date\n", "0 L00000049012330 AS009373 2018-04-01\n", "1 L00000049012331 AS015315 2018-04-01\n", "2 L00000049012332 AS040841 2018-04-01\n", "3 L00000049012333 AS046594 2018-04-01\n", "4 L00000049012334 AS073285 2018-04-01\n", "... ... ... ...\n", "197423 L00000049209753 TS977703 2019-03-31\n", "197424 L00000049209754 TS979550 2019-03-31\n", "197425 L00000049209755 TS995299 2019-03-31\n", "197426 L00000049209756 TS995853 2019-03-31\n", "197427 L00000049209757 TS999079 2019-03-31\n", "\n", "[197428 rows x 3 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs의 date 컬럼을 datetime 형식으로 변경하세요.\n", "df_logs['date'] = pd.to_datetime(df_logs['date'])\n", "df_logs" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
loguseriddatemonth
0L00000049012330AS0093732018-04-01201804
1L00000049012331AS0153152018-04-01201804
2L00000049012332AS0408412018-04-01201804
3L00000049012333AS0465942018-04-01201804
4L00000049012334AS0732852018-04-01201804
...............
197423L00000049209753TS9777032019-03-31201903
197424L00000049209754TS9795502019-03-31201903
197425L00000049209755TS9952992019-03-31201903
197426L00000049209756TS9958532019-03-31201903
197427L00000049209757TS9990792019-03-31201903
\n", "

197428 rows × 4 columns

\n", "
" ], "text/plain": [ " log userid date month\n", "0 L00000049012330 AS009373 2018-04-01 201804\n", "1 L00000049012331 AS015315 2018-04-01 201804\n", "2 L00000049012332 AS040841 2018-04-01 201804\n", "3 L00000049012333 AS046594 2018-04-01 201804\n", "4 L00000049012334 AS073285 2018-04-01 201804\n", "... ... ... ... ...\n", "197423 L00000049209753 TS977703 2019-03-31 201903\n", "197424 L00000049209754 TS979550 2019-03-31 201903\n", "197425 L00000049209755 TS995299 2019-03-31 201903\n", "197426 L00000049209756 TS995853 2019-03-31 201903\n", "197427 L00000049209757 TS999079 2019-03-31 201903\n", "\n", "[197428 rows x 4 columns]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df_logs의 date 컬럼을 년월 형태로 표현하세요.\n", "# dt와 strftime 함수 이용\n", "df_logs['month'] = df_logs['date'].dt.strftime('%Y%m')\n", "df_logs" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "month userid \n", "201804 AS002855 4\n", " AS009013 2\n", " AS009373 3\n", " AS015315 6\n", " AS015739 7\n", " ..\n", "201903 TS995853 8\n", " TS998593 8\n", " TS999079 3\n", " TS999231 6\n", " TS999855 4\n", "Name: log, Length: 36842, dtype: int64" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs에서 date와 userid를 기준으로 그룹을 생성 후 log의 개수를 카운트하세요.\n", "df_logs_month = df_logs.groupby(['month','userid'])['log'].count()\n", "df_logs_month" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthuseridlog
0201804AS0028554
1201804AS0090132
2201804AS0093733
3201804AS0153156
4201804AS0157397
............
36837201903TS9958538
36838201903TS9985938
36839201903TS9990793
36840201903TS9992316
36841201903TS9998554
\n", "

36842 rows × 3 columns

\n", "
" ], "text/plain": [ " month userid log\n", "0 201804 AS002855 4\n", "1 201804 AS009013 2\n", "2 201804 AS009373 3\n", "3 201804 AS015315 6\n", "4 201804 AS015739 7\n", "... ... ... ...\n", "36837 201903 TS995853 8\n", "36838 201903 TS998593 8\n", "36839 201903 TS999079 3\n", "36840 201903 TS999231 6\n", "36841 201903 TS999855 4\n", "\n", "[36842 rows x 3 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs_month 데이터 프레임의 인덱스를 초기화 하세요.\n", "df_logs_month = df_logs_month.reset_index()\n", "df_logs_month" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthuseridfreq
0201804AS0028554
1201804AS0090132
2201804AS0093733
3201804AS0153156
4201804AS0157397
............
36837201903TS9958538
36838201903TS9985938
36839201903TS9990793
36840201903TS9992316
36841201903TS9998554
\n", "

36842 rows × 3 columns

\n", "
" ], "text/plain": [ " month userid freq\n", "0 201804 AS002855 4\n", "1 201804 AS009013 2\n", "2 201804 AS009373 3\n", "3 201804 AS015315 6\n", "4 201804 AS015739 7\n", "... ... ... ...\n", "36837 201903 TS995853 8\n", "36838 201903 TS998593 8\n", "36839 201903 TS999079 3\n", "36840 201903 TS999231 6\n", "36841 201903 TS999855 4\n", "\n", "[36842 rows x 3 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs_month 데이터 프레임의 log 컬럼의 이름을 freq로 변경하세요\n", "df_logs_month.rename(columns={'log':'freq'}, inplace=True)\n", "df_logs_month" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
minmedianmeanmaxsum
userid
AS00285525.04.500000754
AS00880514.04.000000840
AS00901322.02.00000022
AS00937335.05.083333761
AS01523347.07.5454551183
..................
TS99585389.59.5000001119
TS99859378.08.142857957
TS99907925.54.916667959
TS99923115.04.666667856
TS99985534.04.583333755
\n", "

4192 rows × 5 columns

\n", "
" ], "text/plain": [ " min median mean max sum\n", "userid \n", "AS002855 2 5.0 4.500000 7 54\n", "AS008805 1 4.0 4.000000 8 40\n", "AS009013 2 2.0 2.000000 2 2\n", "AS009373 3 5.0 5.083333 7 61\n", "AS015233 4 7.0 7.545455 11 83\n", "... ... ... ... ... ...\n", "TS995853 8 9.5 9.500000 11 19\n", "TS998593 7 8.0 8.142857 9 57\n", "TS999079 2 5.5 4.916667 9 59\n", "TS999231 1 5.0 4.666667 8 56\n", "TS999855 3 4.0 4.583333 7 55\n", "\n", "[4192 rows x 5 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs_month 데이터 프레임의 userid를 이용하여 freq 컬럼의 최소, 중앙값, 평균, 최대, 합계를 계산하세요.\n", "user_log = df_logs_month.groupby('userid')['freq'].agg(['min', 'median', 'mean', 'max', 'sum'])\n", "user_log" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridminmedianmeanmaxsum
0AS00285525.04.500000754
1AS00880514.04.000000840
2AS00901322.02.00000022
3AS00937335.05.083333761
4AS01523347.07.5454551183
.....................
4187TS99585389.59.5000001119
4188TS99859378.08.142857957
4189TS99907925.54.916667959
4190TS99923115.04.666667856
4191TS99985534.04.583333755
\n", "

4192 rows × 6 columns

\n", "
" ], "text/plain": [ " userid min median mean max sum\n", "0 AS002855 2 5.0 4.500000 7 54\n", "1 AS008805 1 4.0 4.000000 8 40\n", "2 AS009013 2 2.0 2.000000 2 2\n", "3 AS009373 3 5.0 5.083333 7 61\n", "4 AS015233 4 7.0 7.545455 11 83\n", "... ... ... ... ... ... ...\n", "4187 TS995853 8 9.5 9.500000 11 19\n", "4188 TS998593 7 8.0 8.142857 9 57\n", "4189 TS999079 2 5.5 4.916667 9 59\n", "4190 TS999231 1 5.0 4.666667 8 56\n", "4191 TS999855 3 4.0 4.583333 7 55\n", "\n", "[4192 rows x 6 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user_log 데이터 프레임의 인덱스를 초기화 하세요.\n", "user_log.reset_index(inplace=True)\n", "user_log" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
loguseriddatemonthwday
0L00000049012330AS0093732018-04-012018046
1L00000049012331AS0153152018-04-012018046
2L00000049012332AS0408412018-04-012018046
3L00000049012333AS0465942018-04-012018046
4L00000049012334AS0732852018-04-012018046
..................
197423L00000049209753TS9777032019-03-312019036
197424L00000049209754TS9795502019-03-312019036
197425L00000049209755TS9952992019-03-312019036
197426L00000049209756TS9958532019-03-312019036
197427L00000049209757TS9990792019-03-312019036
\n", "

197428 rows × 5 columns

\n", "
" ], "text/plain": [ " log userid date month wday\n", "0 L00000049012330 AS009373 2018-04-01 201804 6\n", "1 L00000049012331 AS015315 2018-04-01 201804 6\n", "2 L00000049012332 AS040841 2018-04-01 201804 6\n", "3 L00000049012333 AS046594 2018-04-01 201804 6\n", "4 L00000049012334 AS073285 2018-04-01 201804 6\n", "... ... ... ... ... ...\n", "197423 L00000049209753 TS977703 2019-03-31 201903 6\n", "197424 L00000049209754 TS979550 2019-03-31 201903 6\n", "197425 L00000049209755 TS995299 2019-03-31 201903 6\n", "197426 L00000049209756 TS995853 2019-03-31 201903 6\n", "197427 L00000049209757 TS999079 2019-03-31 201903 6\n", "\n", "[197428 rows x 5 columns]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs 데이터 프레임을 이용하여 요일(weekday) 정보를 추출하세요\n", "df_logs[\"wday\"] = df_logs[\"date\"].dt.weekday\n", "df_logs" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
useridmonthwday
AS00285520180454
20180521
54
20180655
20180711
............
TS99985520190111
54
61
20190254
20190354
\n", "

93333 rows × 1 columns

\n", "
" ], "text/plain": [ " count\n", "userid month wday \n", "AS002855 201804 5 4\n", " 201805 2 1\n", " 5 4\n", " 201806 5 5\n", " 201807 1 1\n", "... ...\n", "TS999855 201901 1 1\n", " 5 4\n", " 6 1\n", " 201902 5 4\n", " 201903 5 4\n", "\n", "[93333 rows x 1 columns]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs 데이터 프레임의 userid, month, wday 컬럼을 그룹으로 log 컬럼의 개수를 카운트하세요.\n", "df_logs_weekly = df_logs.groupby(['userid','month','wday'])['log'].agg(['count'])\n", "df_logs_weekly" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridmonthwdaycount
0AS00285520180454
1AS00285520180521
2AS00285520180554
3AS00285520180655
4AS00285520180711
...............
93328TS99985520190111
93329TS99985520190154
93330TS99985520190161
93331TS99985520190254
93332TS99985520190354
\n", "

93333 rows × 4 columns

\n", "
" ], "text/plain": [ " userid month wday count\n", "0 AS002855 201804 5 4\n", "1 AS002855 201805 2 1\n", "2 AS002855 201805 5 4\n", "3 AS002855 201806 5 5\n", "4 AS002855 201807 1 1\n", "... ... ... ... ...\n", "93328 TS999855 201901 1 1\n", "93329 TS999855 201901 5 4\n", "93330 TS999855 201901 6 1\n", "93331 TS999855 201902 5 4\n", "93332 TS999855 201903 5 4\n", "\n", "[93333 rows x 4 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs_weekly의 인덱스를 초기화 하세요.\n", "df_logs_weekly.reset_index(inplace=True)\n", "df_logs_weekly" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthwdaycount
userid
AS00285520190365
AS00880520190364
AS00901320180402
AS00937320190365
AS01523320190365
............
TS99585320190365
TS99859320190365
TS99907920190365
TS99923120190365
TS99985520190365
\n", "

4192 rows × 3 columns

\n", "
" ], "text/plain": [ " month wday count\n", "userid \n", "AS002855 201903 6 5\n", "AS008805 201903 6 4\n", "AS009013 201804 0 2\n", "AS009373 201903 6 5\n", "AS015233 201903 6 5\n", "... ... ... ...\n", "TS995853 201903 6 5\n", "TS998593 201903 6 5\n", "TS999079 201903 6 5\n", "TS999231 201903 6 5\n", "TS999855 201903 6 5\n", "\n", "[4192 rows x 3 columns]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# 매주 방문하는 사람을 찾기 위해 df_logs_weekly 데이터 프레임의 userid 컬럼을 그룹으로 최대값을 계산하세요\n", "df_logs_weekly_routine = df_logs_weekly.groupby('userid').max()\n", "df_logs_weekly_routine" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthwdaycountroutine
userid
AS002855201903650
AS008805201903640
AS009013201804020
AS009373201903650
AS015233201903650
...............
TS995853201903650
TS998593201903650
TS999079201903650
TS999231201903650
TS999855201903650
\n", "

4192 rows × 4 columns

\n", "
" ], "text/plain": [ " month wday count routine\n", "userid \n", "AS002855 201903 6 5 0\n", "AS008805 201903 6 4 0\n", "AS009013 201804 0 2 0\n", "AS009373 201903 6 5 0\n", "AS015233 201903 6 5 0\n", "... ... ... ... ...\n", "TS995853 201903 6 5 0\n", "TS998593 201903 6 5 0\n", "TS999079 201903 6 5 0\n", "TS999231 201903 6 5 0\n", "TS999855 201903 6 5 0\n", "\n", "[4192 rows x 4 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs_weekly_routine 데이터 프레임에 0의 값을 가지는 routine 컬럼을 추가하세요.\n", "df_logs_weekly_routine['routine'] = 0\n", "df_logs_weekly_routine" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthwdaycountroutine
userid
AS002855201903651
AS008805201903641
AS009013201804020
AS009373201903651
AS015233201903651
...............
TS995853201903651
TS998593201903651
TS999079201903651
TS999231201903651
TS999855201903651
\n", "

4192 rows × 4 columns

\n", "
" ], "text/plain": [ " month wday count routine\n", "userid \n", "AS002855 201903 6 5 1\n", "AS008805 201903 6 4 1\n", "AS009013 201804 0 2 0\n", "AS009373 201903 6 5 1\n", "AS015233 201903 6 5 1\n", "... ... ... ... ...\n", "TS995853 201903 6 5 1\n", "TS998593 201903 6 5 1\n", "TS999079 201903 6 5 1\n", "TS999231 201903 6 5 1\n", "TS999855 201903 6 5 1\n", "\n", "[4192 rows x 4 columns]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# count 컬럼의 값이 4 이상인 경우 routine 컬럼의 값을 1로 변경하세요\n", "df_logs_weekly_routine.loc[df_logs_weekly_routine['count'] >= 4, 'routine'] = 1\n", "df_logs_weekly_routine" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridroutine
0AS0028551
1AS0088051
2AS0090130
3AS0093731
4AS0152331
.........
4187TS9958531
4188TS9985931
4189TS9990791
4190TS9992311
4191TS9998551
\n", "

4192 rows × 2 columns

\n", "
" ], "text/plain": [ " userid routine\n", "0 AS002855 1\n", "1 AS008805 1\n", "2 AS009013 0\n", "3 AS009373 1\n", "4 AS015233 1\n", "... ... ...\n", "4187 TS995853 1\n", "4188 TS998593 1\n", "4189 TS999079 1\n", "4190 TS999231 1\n", "4191 TS999855 1\n", "\n", "[4192 rows x 2 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# df_logs_weekly_routine 컬럼의 인덱스를 초기화하고 userid컬럼과 routine 컬럼만 추출하세요.\n", "routine = df_logs_weekly_routine.reset_index()[['userid', 'routine']]\n", "routine" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_name...start_monthstart_dayend_yearend_monthend_dayminmedianmeanmaxsum
0OA832399C01F2015-05-01NaTCA10all10000None...51NaNNaNNaN25.04.833333858
1PL270116C01M2015-05-01NaTCA10all10000None...51NaNNaNNaN35.05.083333761
2OA974876C01M2015-05-01NaTCA10all10000None...51NaNNaNNaN35.04.583333655
3HD024127C01F2015-05-01NaTCA1-1all10000None...51NaNNaNNaN24.54.833333758
4IK271057C01M2015-05-01NaTCA10all10000None...51NaNNaNNaN23.53.750000545
..................................................................
4187IK562610C02F2018-12-132019-02-28CA31day7000Free...12132019.02.028.034.05.6666671017
4188HI599354C02M2018-12-13NaTCA30day7000Free...1213NaNNaNNaN57.57.5000001030
4189GD796859C02F2018-12-14NaTCA30day7000Free...1214NaNNaNNaN67.57.250000829
4190GD082270C02F2018-12-142019-02-28CA31day7000Free...12142019.02.028.035.06.0000001018
4191OA426023C02F2018-12-14NaTCA30day7000Free...1214NaNNaNNaN89.09.5000001238
\n", "

4192 rows × 21 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 NaT CA1 0 \n", "1 PL270116 C01 M 2015-05-01 NaT CA1 0 \n", "2 OA974876 C01 M 2015-05-01 NaT CA1 0 \n", "3 HD024127 C01 F 2015-05-01 NaT CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 NaT CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 2019-02-28 CA3 1 \n", "4188 HI599354 C02 M 2018-12-13 NaT CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 NaT CA3 0 \n", "4190 GD082270 C02 F 2018-12-14 2019-02-28 CA3 1 \n", "4191 OA426023 C02 F 2018-12-14 NaT CA3 0 \n", "\n", " class_name price event_name ... start_month start_day end_year \\\n", "0 all 10000 None ... 5 1 NaN \n", "1 all 10000 None ... 5 1 NaN \n", "2 all 10000 None ... 5 1 NaN \n", "3 all 10000 None ... 5 1 NaN \n", "4 all 10000 None ... 5 1 NaN \n", "... ... ... ... ... ... ... ... \n", "4187 day 7000 Free ... 12 13 2019.0 \n", "4188 day 7000 Free ... 12 13 NaN \n", "4189 day 7000 Free ... 12 14 NaN \n", "4190 day 7000 Free ... 12 14 2019.0 \n", "4191 day 7000 Free ... 12 14 NaN \n", "\n", " end_month end_day min median mean max sum \n", "0 NaN NaN 2 5.0 4.833333 8 58 \n", "1 NaN NaN 3 5.0 5.083333 7 61 \n", "2 NaN NaN 3 5.0 4.583333 6 55 \n", "3 NaN NaN 2 4.5 4.833333 7 58 \n", "4 NaN NaN 2 3.5 3.750000 5 45 \n", "... ... ... ... ... ... ... ... \n", "4187 2.0 28.0 3 4.0 5.666667 10 17 \n", "4188 NaN NaN 5 7.5 7.500000 10 30 \n", "4189 NaN NaN 6 7.5 7.250000 8 29 \n", "4190 2.0 28.0 3 5.0 6.000000 10 18 \n", "4191 NaN NaN 8 9.0 9.500000 12 38 \n", "\n", "[4192 rows x 21 columns]" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임과 user_log 데이터 프레임을 userid를 기준으로 병합하세요\n", "user = pd.merge(user, user_log, on='userid')\n", "user" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_name...start_dayend_yearend_monthend_dayminmedianmeanmaxsumroutine
0OA832399C01F2015-05-01NaTCA10all10000None...1NaNNaNNaN25.04.8333338581
1PL270116C01M2015-05-01NaTCA10all10000None...1NaNNaNNaN35.05.0833337611
2OA974876C01M2015-05-01NaTCA10all10000None...1NaNNaNNaN35.04.5833336551
3HD024127C01F2015-05-01NaTCA1-1all10000None...1NaNNaNNaN24.54.8333337581
4IK271057C01M2015-05-01NaTCA10all10000None...1NaNNaNNaN23.53.7500005451
..................................................................
4187IK562610C02F2018-12-132019-02-28CA31day7000Free...132019.02.028.034.05.66666710170
4188HI599354C02M2018-12-13NaTCA30day7000Free...13NaNNaNNaN57.57.50000010301
4189GD796859C02F2018-12-14NaTCA30day7000Free...14NaNNaNNaN67.57.2500008291
4190GD082270C02F2018-12-142019-02-28CA31day7000Free...142019.02.028.035.06.00000010180
4191OA426023C02F2018-12-14NaTCA30day7000Free...14NaNNaNNaN89.09.50000012381
\n", "

4192 rows × 22 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 NaT CA1 0 \n", "1 PL270116 C01 M 2015-05-01 NaT CA1 0 \n", "2 OA974876 C01 M 2015-05-01 NaT CA1 0 \n", "3 HD024127 C01 F 2015-05-01 NaT CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 NaT CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 2019-02-28 CA3 1 \n", "4188 HI599354 C02 M 2018-12-13 NaT CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 NaT CA3 0 \n", "4190 GD082270 C02 F 2018-12-14 2019-02-28 CA3 1 \n", "4191 OA426023 C02 F 2018-12-14 NaT CA3 0 \n", "\n", " class_name price event_name ... start_day end_year end_month \\\n", "0 all 10000 None ... 1 NaN NaN \n", "1 all 10000 None ... 1 NaN NaN \n", "2 all 10000 None ... 1 NaN NaN \n", "3 all 10000 None ... 1 NaN NaN \n", "4 all 10000 None ... 1 NaN NaN \n", "... ... ... ... ... ... ... ... \n", "4187 day 7000 Free ... 13 2019.0 2.0 \n", "4188 day 7000 Free ... 13 NaN NaN \n", "4189 day 7000 Free ... 14 NaN NaN \n", "4190 day 7000 Free ... 14 2019.0 2.0 \n", "4191 day 7000 Free ... 14 NaN NaN \n", "\n", " end_day min median mean max sum routine \n", "0 NaN 2 5.0 4.833333 8 58 1 \n", "1 NaN 3 5.0 5.083333 7 61 1 \n", "2 NaN 3 5.0 4.583333 6 55 1 \n", "3 NaN 2 4.5 4.833333 7 58 1 \n", "4 NaN 2 3.5 3.750000 5 45 1 \n", "... ... ... ... ... ... ... ... \n", "4187 28.0 3 4.0 5.666667 10 17 0 \n", "4188 NaN 5 7.5 7.500000 10 30 1 \n", "4189 NaN 6 7.5 7.250000 8 29 1 \n", "4190 28.0 3 5.0 6.000000 10 18 0 \n", "4191 NaN 8 9.0 9.500000 12 38 1 \n", "\n", "[4192 rows x 22 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임과 routine 데이터 프레임을 userid를 기준으로 병합하세요\n", "user = pd.merge(user, routine, on='userid')\n", "user" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_name...end_yearend_monthend_dayminmedianmeanmaxsumroutineperiod
0OA832399C01F2015-05-01NaTCA10all10000None...NaNNaNNaN25.04.8333338581NaT
1PL270116C01M2015-05-01NaTCA10all10000None...NaNNaNNaN35.05.0833337611NaT
2OA974876C01M2015-05-01NaTCA10all10000None...NaNNaNNaN35.04.5833336551NaT
3HD024127C01F2015-05-01NaTCA1-1all10000None...NaNNaNNaN24.54.8333337581NaT
4IK271057C01M2015-05-01NaTCA10all10000None...NaNNaNNaN23.53.7500005451NaT
..................................................................
4187IK562610C02F2018-12-132019-02-28CA31day7000Free...2019.02.028.034.05.6666671017077 days
4188HI599354C02M2018-12-13NaTCA30day7000Free...NaNNaNNaN57.57.50000010301NaT
4189GD796859C02F2018-12-14NaTCA30day7000Free...NaNNaNNaN67.57.2500008291NaT
4190GD082270C02F2018-12-142019-02-28CA31day7000Free...2019.02.028.035.06.0000001018076 days
4191OA426023C02F2018-12-14NaTCA30day7000Free...NaNNaNNaN89.09.50000012381NaT
\n", "

4192 rows × 23 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 NaT CA1 0 \n", "1 PL270116 C01 M 2015-05-01 NaT CA1 0 \n", "2 OA974876 C01 M 2015-05-01 NaT CA1 0 \n", "3 HD024127 C01 F 2015-05-01 NaT CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 NaT CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 2019-02-28 CA3 1 \n", "4188 HI599354 C02 M 2018-12-13 NaT CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 NaT CA3 0 \n", "4190 GD082270 C02 F 2018-12-14 2019-02-28 CA3 1 \n", "4191 OA426023 C02 F 2018-12-14 NaT CA3 0 \n", "\n", " class_name price event_name ... end_year end_month end_day min \\\n", "0 all 10000 None ... NaN NaN NaN 2 \n", "1 all 10000 None ... NaN NaN NaN 3 \n", "2 all 10000 None ... NaN NaN NaN 3 \n", "3 all 10000 None ... NaN NaN NaN 2 \n", "4 all 10000 None ... NaN NaN NaN 2 \n", "... ... ... ... ... ... ... ... ... \n", "4187 day 7000 Free ... 2019.0 2.0 28.0 3 \n", "4188 day 7000 Free ... NaN NaN NaN 5 \n", "4189 day 7000 Free ... NaN NaN NaN 6 \n", "4190 day 7000 Free ... 2019.0 2.0 28.0 3 \n", "4191 day 7000 Free ... NaN NaN NaN 8 \n", "\n", " median mean max sum routine period \n", "0 5.0 4.833333 8 58 1 NaT \n", "1 5.0 5.083333 7 61 1 NaT \n", "2 5.0 4.583333 6 55 1 NaT \n", "3 4.5 4.833333 7 58 1 NaT \n", "4 3.5 3.750000 5 45 1 NaT \n", "... ... ... ... ... ... ... \n", "4187 4.0 5.666667 10 17 0 77 days \n", "4188 7.5 7.500000 10 30 1 NaT \n", "4189 7.5 7.250000 8 29 1 NaT \n", "4190 5.0 6.000000 10 18 0 76 days \n", "4191 9.0 9.500000 12 38 1 NaT \n", "\n", "[4192 rows x 23 columns]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# end_date 컬럼와 start_date 컬럼의 차이를 이용하여 회원 기간을 계산하세요\n", "user['period'] = user['end_date'] - user['start_date']\n", "user" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_name...end_yearend_monthend_dayminmedianmeanmaxsumroutineperiod
0OA832399C01F2015-05-01NaTCA10all10000None...NaNNaNNaN25.04.83333385811460 days
1PL270116C01M2015-05-01NaTCA10all10000None...NaNNaNNaN35.05.08333376111460 days
2OA974876C01M2015-05-01NaTCA10all10000None...NaNNaNNaN35.04.58333365511460 days
3HD024127C01F2015-05-01NaTCA1-1all10000None...NaNNaNNaN24.54.83333375811460 days
4IK271057C01M2015-05-01NaTCA10all10000None...NaNNaNNaN23.53.75000054511460 days
..................................................................
4187IK562610C02F2018-12-132019-02-28CA31day7000Free...2019.02.028.034.05.6666671017077 days
4188HI599354C02M2018-12-13NaTCA30day7000Free...NaNNaNNaN57.57.50000010301138 days
4189GD796859C02F2018-12-14NaTCA30day7000Free...NaNNaNNaN67.57.2500008291137 days
4190GD082270C02F2018-12-142019-02-28CA31day7000Free...2019.02.028.035.06.0000001018076 days
4191OA426023C02F2018-12-14NaTCA30day7000Free...NaNNaNNaN89.09.50000012381137 days
\n", "

4192 rows × 23 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 NaT CA1 0 \n", "1 PL270116 C01 M 2015-05-01 NaT CA1 0 \n", "2 OA974876 C01 M 2015-05-01 NaT CA1 0 \n", "3 HD024127 C01 F 2015-05-01 NaT CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 NaT CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 2019-02-28 CA3 1 \n", "4188 HI599354 C02 M 2018-12-13 NaT CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 NaT CA3 0 \n", "4190 GD082270 C02 F 2018-12-14 2019-02-28 CA3 1 \n", "4191 OA426023 C02 F 2018-12-14 NaT CA3 0 \n", "\n", " class_name price event_name ... end_year end_month end_day min \\\n", "0 all 10000 None ... NaN NaN NaN 2 \n", "1 all 10000 None ... NaN NaN NaN 3 \n", "2 all 10000 None ... NaN NaN NaN 3 \n", "3 all 10000 None ... NaN NaN NaN 2 \n", "4 all 10000 None ... NaN NaN NaN 2 \n", "... ... ... ... ... ... ... ... ... \n", "4187 day 7000 Free ... 2019.0 2.0 28.0 3 \n", "4188 day 7000 Free ... NaN NaN NaN 5 \n", "4189 day 7000 Free ... NaN NaN NaN 6 \n", "4190 day 7000 Free ... 2019.0 2.0 28.0 3 \n", "4191 day 7000 Free ... NaN NaN NaN 8 \n", "\n", " median mean max sum routine period \n", "0 5.0 4.833333 8 58 1 1460 days \n", "1 5.0 5.083333 7 61 1 1460 days \n", "2 5.0 4.583333 6 55 1 1460 days \n", "3 4.5 4.833333 7 58 1 1460 days \n", "4 3.5 3.750000 5 45 1 1460 days \n", "... ... ... ... ... ... ... \n", "4187 4.0 5.666667 10 17 0 77 days \n", "4188 7.5 7.500000 10 30 1 138 days \n", "4189 7.5 7.250000 8 29 1 137 days \n", "4190 5.0 6.000000 10 18 0 76 days \n", "4191 9.0 9.500000 12 38 1 137 days \n", "\n", "[4192 rows x 23 columns]" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# period 컬럼이 NaT인 경우 아직 탈퇴하지않은 회원으로 2019년04월30일을 기준으로 차이를 계산합니다.\n", "user.loc[user['period'].isna(), 'period'] = pd.to_datetime(\"20190430\") - user['start_date']\n", "user" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
minmedianmeanmaxsum
count4192.0000004192.0000004192.0000004192.0000004192.000000
mean3.0412695.2505965.3331277.82395047.096374
std1.9515651.8748741.7775332.16895924.218124
min1.0000001.0000001.0000001.0000001.000000
25%2.0000004.0000004.2500007.00000026.000000
50%3.0000005.0000005.0000008.00000053.000000
75%4.0000006.5000006.4166679.00000065.000000
max12.00000012.00000012.00000014.000000105.000000
\n", "
" ], "text/plain": [ " min median mean max sum\n", "count 4192.000000 4192.000000 4192.000000 4192.000000 4192.000000\n", "mean 3.041269 5.250596 5.333127 7.823950 47.096374\n", "std 1.951565 1.874874 1.777533 2.168959 24.218124\n", "min 1.000000 1.000000 1.000000 1.000000 1.000000\n", "25% 2.000000 4.000000 4.250000 7.000000 26.000000\n", "50% 3.000000 5.000000 5.000000 8.000000 53.000000\n", "75% 4.000000 6.500000 6.416667 9.000000 65.000000\n", "max 12.000000 12.000000 12.000000 14.000000 105.000000" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임의 min, median, mean, max, sum 컬럼의 기술통계요약정보를 출력하세요.\n", "user[['min', 'median', 'mean', 'max', 'sum']].describe()" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 3413\n", "0 779\n", "Name: routine, dtype: int64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임의 routine 컬럼의 고유값별 개수를 계산하세요\n", "user['routine'].value_counts()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXWElEQVR4nO3df4ycd4Hf8ffnHBKcLMTrC9n6bKv2tS6tg0XAqxSO6rR7phdDEI6qRjIKV6fNyf0jdw1Xn4pdpKL7w6ppG9qrQtquMK3vDFm5hjRWcrni+rJCSCQ+HAKOY1wv2OfYDjbHxYENkWHTT/+YJzDsznrGu/PsTr73eUmreeb7fJ95PjNef2b2mV+yTURElOWXFjpARER0X8o9IqJAKfeIiAKl3CMiCpRyj4go0DULHQDgpptu8qpVq6aNv/LKK9xwww3zH2gWkrUeyVqPZK3HfGc9cuTIX9p+W8uVthf8Z/369W7lySefbDnei5K1Hslaj2Stx3xnBb7uGXo1h2UiIgqUco+IKFDKPSKiQCn3iIgCpdwjIgqUco+IKFDKPSKiQCn3iIgCpdwjIgrUEx8/MFertj++IPs9veuOBdlvREQ7eeQeEVGglHtERIFS7hERBUq5R0QUKOUeEVGglHtERIFS7hERBUq5R0QUKOUeEVGgjspd0u9JOibpOUkPS3qzpKWSDko6WZ32N83fIWlc0glJt9cXPyIiWmlb7pKWA/8CGLT9DmARsBnYDhyyvQY4VJ1H0tpq/S3ARuAhSYvqiR8REa10eljmGmCxpGuA64HzwCZgT7V+D3BntbwJGLV92fYpYBy4rWuJIyKiLdluP0m6H9gJvAp82fbdki7ZXtI05yXb/ZIeBJ6yvbca3w08YXv/lMvcCmwFGBgYWD86OjptvxMTE/T19bXNd/Tcy23n1GHd8ht/ttxp1l6QrPVI1nok68yGh4eP2B5sta7tp0JWx9I3AauBS8D/lPTRK23SYmzaPYjtEWAEYHBw0ENDQ9M2Ghsbo9X4VPcs1KdC3j30s+VOs/aCZK1HstYjWWenk8My7wdO2f6+7Z8CXwJ+DbggaRlAdXqxmn8WWNm0/Qoah3EiImKedFLuZ4D3SLpekoANwHHgALClmrMFeLRaPgBslnSdpNXAGuBwd2NHRMSVtD0sY/tpSfuBZ4BJ4Bs0Dqf0Afsk3UvjDuCuav4xSfuA56v599l+rab8ERHRQkffxGT7k8AnpwxfpvEovtX8nTSegI2IiAWQd6hGRBQo5R4RUaCUe0REgVLuEREFSrlHRBQo5R4RUaCUe0REgVLuEREFSrlHRBQo5R4RUaCUe0REgVLuEREFSrlHRBQo5R4RUaCUe0REgdqWu6S3S3q26eeHkj4maamkg5JOVqf9TdvskDQu6YSk2+u9ChERMVXbcrd9wvattm8F1gM/Bh4BtgOHbK8BDlXnkbQW2AzcAmwEHpK0qJ74ERHRytUeltkAfMf2XwCbgD3V+B7gzmp5EzBq+7LtU8A4cFsXskZERIeuttw3Aw9XywO2XwSoTm+uxpcDLzRtc7Yai4iIeSLbnU2UrgXOA7fYviDpku0lTetfst0v6TPA12zvrcZ3A39i+4tTLm8rsBVgYGBg/ejo6LR9TkxM0NfX1zbb0XMvd3Qdum3d8ht/ttxp1l6QrPVI1nok68yGh4eP2B5sta6jL8iufAB4xvaF6vwFSctsvyhpGXCxGj8LrGzabgWNO4VfYHsEGAEYHBz00NDQtB2OjY3Ranyqe7Y/3vm16Kajr/xscdu613jgq69cYXL3nN51x5y27/R27QXJWo9krUcvZb2awzIf4eeHZAAOAFuq5S3Ao03jmyVdJ2k1sAY4PNegERHRuY4euUu6HviHwD9vGt4F7JN0L3AGuAvA9jFJ+4DngUngPtuvdTV1RERcUUflbvvHwC9PGfsBjVfPtJq/E9g553QRETEreYdqRESBUu4REQVKuUdEFCjlHhFRoJR7RESBUu4REQVKuUdEFCjlHhFRoJR7RESBUu4REQVKuUdEFCjlHhFRoJR7RESBUu4REQVKuUdEFCjlHhFRoI7KXdISSfslfVvScUnvlbRU0kFJJ6vT/qb5OySNSzoh6fb64kdERCudPnL/Q+BPbf9d4J3AcWA7cMj2GuBQdR5Ja4HNwC3ARuAhSYu6HTwiImbWttwlvRX4dWA3gO2f2L4EbAL2VNP2AHdWy5uAUduXbZ8CxoHbuhs7IiKuRLavPEG6FRih8YXX7wSOAPcD52wvaZr3ku1+SQ8CT9neW43vBp6wvX/K5W4FtgIMDAysHx0dnbbviYkJ+vr62l6Jo+debjunbgOL4cKr87OvdctvnNP2nd6uvSBZ65Gs9ZjvrMPDw0dsD7Za18kXZF8DvBv4XdtPS/pDqkMwM1CLsWn3ILZHaNxpMDg46KGhoWkbjY2N0Wp8qnu2P952Tt22rZvkgaMdfd/4nJ2+e2hO23d6u/aCZK1Hstajl7J2csz9LHDW9tPV+f00yv6CpGUA1enFpvkrm7ZfAZzvTtyIiOhE23K3/T3gBUlvr4Y20DhEcwDYUo1tAR6tlg8AmyVdJ2k1sAY43NXUERFxRZ0eR/hd4POSrgW+C/xTGncM+yTdC5wB7gKwfUzSPhp3AJPAfbZf63ryiIiYUUflbvtZoNVB+w0zzN8J7Jx9rIiImIu8QzUiokAp94iIAqXcIyIKlHKPiChQyj0iokAp94iIAqXcIyIKlHKPiChQyj0iokAp94iIAqXcIyIKlHKPiChQyj0iokAp94iIAqXcIyIK1FG5Szot6aikZyV9vRpbKumgpJPVaX/T/B2SxiWdkHR7XeEjIqK1q3nkPmz71qZv2t4OHLK9BjhUnUfSWmAzcAuwEXhI0qIuZo6IiDbmclhmE7CnWt4D3Nk0Pmr7su1TwDhw2xz2ExERV0m220+STgEvAQb+m+0RSZdsL2ma85LtfkkPAk/Z3luN7waesL1/ymVuBbYCDAwMrB8dHZ2234mJCfr6+trmO3ru5bZz6jawGC68Oj/7Wrf8xjlt3+nt2guStR7JWo/5zjo8PHyk6WjKL+j0C7LfZ/u8pJuBg5K+fYW5ajE27R7E9ggwAjA4OOihoaFpG42NjdFqfKp7tj/edk7dtq2b5IGjnd6cc3P67qE5bd/p7doLkrUeyVqPXsra0WEZ2+er04vAIzQOs1yQtAygOr1YTT8LrGzafAVwvluBIyKivbblLukGSW95fRn4TeA54ACwpZq2BXi0Wj4AbJZ0naTVwBrgcLeDR0TEzDo5jjAAPCLp9flfsP2nkv4c2CfpXuAMcBeA7WOS9gHPA5PAfbZfqyV9RES01LbcbX8XeGeL8R8AG2bYZiewc87pIiJiVubnGcDoqlVzfAJ527rJWT8JfXrXHXPad0TMj3z8QEREgVLuEREFSrlHRBQo5R4RUaCUe0REgVLuEREFSrlHRBQo5R4RUaCUe0REgVLuEREFSrlHRBQo5R4RUaCUe0REgVLuEREF6rjcJS2S9A1Jj1Xnl0o6KOlkddrfNHeHpHFJJyTdXkfwiIiY2dU8cr8fON50fjtwyPYa4FB1Hklrgc3ALcBG4CFJi7oTNyIiOtFRuUtaAdwBfLZpeBOwp1reA9zZND5q+7LtU8A4jS/UjoiIeSLb7SdJ+4F/C7wF+H3bH5J0yfaSpjkv2e6X9CDwlO291fhu4Anb+6dc5lZgK8DAwMD60dHRafudmJigr6+vbb6j515uO6duA4vhwqsLnaIzc8m6bvmN3Q3TRqe/A70gWeuRrDMbHh4+Ynuw1bq2X7Mn6UPARdtHJA11sD+1GJt2D2J7BBgBGBwc9NDQ9IseGxuj1fhUs/3KuG7atm6SB46+Mb61cC5ZT9891N0wbXT6O9ALkrUeyTo7nfwPfx/wYUkfBN4MvFXSXuCCpGW2X5S0DLhYzT8LrGzafgVwvpuhIyLiytoec7e9w/YK26toPFH6Z7Y/ChwAtlTTtgCPVssHgM2SrpO0GlgDHO568oiImNFcjiPsAvZJuhc4A9wFYPuYpH3A88AkcJ/t1+acNCIiOnZV5W57DBirln8AbJhh3k5g5xyzRUTELOUdqhERBUq5R0QU6I3x2r3oGavm+WWn29ZNcs/2xzm964553W/EG10euUdEFCjlHhFRoJR7RESBUu4REQVKuUdEFCjlHhFRoJR7RESBUu4REQXKm5gi2uj0jVuvv+GqW/LGrZiLPHKPiChQHrlH9Kg6P+qh3V8Z+avhjS+P3CMiCtS23CW9WdJhSd+UdEzSH1TjSyUdlHSyOu1v2maHpHFJJyTdXucViIiI6To5LHMZ+A3bE5LeBHxV0hPAPwIO2d4laTuwHfi4pLU0vo7vFuBXgP8j6e/k25hiLub70yjjr5du/X7N5kn1ug6BdfIdqrY9UZ19U/VjYBOwpxrfA9xZLW8CRm1ftn0KGAdu62boiIi4MtluP0laBBwB/jbwGdsfl3TJ9pKmOS/Z7pf0IPCU7b3V+G7gCdv7p1zmVmArwMDAwPrR0dFp+52YmKCvr69tvqPnXm47p24Di+HCqwudojPJWo+Ssq5bfuP8hWmj0x6Yi251yGx+B+ZyWw8PDx+xPdhqXUevlqkOqdwqaQnwiKR3XGG6Wl1Ei8scAUYABgcHPTQ0NG2jsbExWo1P1c3XFs/WtnWTPHD0jfHio2StR0lZT989NH9h2ui0B+aiWx0ym9+Bum7rq3q1jO1LNL4geyNwQdIygOr0YjXtLLCyabMVwPm5Bo2IiM61vYuR9Dbgp7YvSVoMvB/4FHAA2ALsqk4frTY5AHxB0qdpPKG6BjhcQ/aIKEyrJza7/c7fvy46+fthGbCnOu7+S8A+249J+hqwT9K9wBngLgDbxyTtA54HJoH78kqZiIj51bbcbX8LeFeL8R8AG2bYZiewc87pIiJiVvIO1YiIAqXcIyIKlHKPiChQyj0iokAp94iIAqXcIyIKlHKPiChQyj0iokAp94iIAqXcIyIK9Mb4fNKImFf55qs3vjxyj4goUMo9IqJAKfeIiAKl3CMiCtS23CWtlPSkpOOSjkm6vxpfKumgpJPVaX/TNjskjUs6Ien2Oq9ARERM18kj90lgm+2/B7wHuE/SWmA7cMj2GuBQdZ5q3WbgFhrftfpQ9S1OERExT9qWu+0XbT9TLf8IOA4sBzYBe6ppe4A7q+VNwKjty7ZPAePAbV3OHRERVyDbnU+WVgFfAd4BnLG9pGndS7b7JT0IPGV7bzW+G3jC9v4pl7UV2AowMDCwfnR0dNr+JiYm6Ovra5vr6LmXO74OdRlYDBdeXegUnUnWeiRrPUrPum75jbPe3/Dw8BHbg63WdfwmJkl9wBeBj9n+oaQZp7YYm3YPYnsEGAEYHBz00NDQtI3GxsZoNT5VL3wz+rZ1kzxw9I3xnrBkrUey1qP0rKfvHqolS0evlpH0JhrF/nnbX6qGL0haVq1fBlysxs8CK5s2XwGc707ciIjoRCevlhGwGzhu+9NNqw4AW6rlLcCjTeObJV0naTWwBjjcvcgREdFOJ38/vA/4LeCopGersX8N7AL2SboXOAPcBWD7mKR9wPM0Xmlzn+3Xuh08IiJm1rbcbX+V1sfRATbMsM1OYOccckVExBzkHaoREQVKuUdEFCjlHhFRoJR7RESBUu4REQVKuUdEFCjlHhFRoJR7RESBUu4REQVKuUdEFCjlHhFRoJR7RESBUu4REQVKuUdEFCjlHhFRoE6+ielzki5Keq5pbKmkg5JOVqf9Tet2SBqXdELS7XUFj4iImXXyyP1/ABunjG0HDtleAxyqziNpLbAZuKXa5iFJi7qWNiIiOtK23G1/BfirKcObgD3V8h7gzqbxUduXbZ8CxoHbuhM1IiI6Ndtj7gO2XwSoTm+uxpcDLzTNO1uNRUTEPJLt9pOkVcBjtt9Rnb9ke0nT+pds90v6DPA123ur8d3An9j+YovL3ApsBRgYGFg/Ojo6bb8TExP09fW1zXf03Mtt59RtYDFceHWhU3QmWeuRrPUoPeu65TfOen/Dw8NHbA+2Wtf2C7JncEHSMtsvSloGXKzGzwIrm+atAM63ugDbI8AIwODgoIeGhqbNGRsbo9X4VPdsf/xqstdi27pJHjg625tzfiVrPZK1HqVnPX33UC1ZZntY5gCwpVreAjzaNL5Z0nWSVgNrgMNzixgREVer7V2MpIeBIeAmSWeBTwK7gH2S7gXOAHcB2D4maR/wPDAJ3Gf7tZqyR0TEDNqWu+2PzLBqwwzzdwI75xIqIiLmJu9QjYgoUMo9IqJAKfeIiAKl3CMiCpRyj4goUMo9IqJAKfeIiAKl3CMiCpRyj4goUMo9IqJAKfeIiAKl3CMiCpRyj4goUMo9IqJAKfeIiAKl3CMiClRbuUvaKOmEpHFJ2+vaT0RETFdLuUtaBHwG+ACwFviIpLV17CsiIqar65H7bcC47e/a/gkwCmyqaV8RETGFbHf/QqV/DGy0/dvV+d8C/r7t32masxXYWp19O3CixUXdBPxl1wPWI1nrkaz1SNZ6zHfWv2n7ba1WtP2C7FlSi7FfuBexPQKMXPFCpK/bHuxmsLokaz2StR7JWo9eylrXYZmzwMqm8yuA8zXtKyIipqir3P8cWCNptaRrgc3AgZr2FRERU9RyWMb2pKTfAf43sAj4nO1js7ioKx626THJWo9krUey1qNnstbyhGpERCysvEM1IqJAKfeIiAL1bLn30scXSFop6UlJxyUdk3R/Nb5U0kFJJ6vT/qZtdlTZT0i6fQEyL5L0DUmP9XJWSUsk7Zf07er2fW8PZ/296t//OUkPS3pzr2SV9DlJFyU91zR21dkkrZd0tFr3nyW1ellzHVn/ffU78C1Jj0ha0qtZm9b9viRLuqkXsk5ju+d+aDwJ+x3gV4FrgW8CaxcwzzLg3dXyW4D/S+NjFf4dsL0a3w58qlpeW2W+DlhdXZdF85z5XwJfAB6rzvdkVmAP8NvV8rXAkl7MCiwHTgGLq/P7gHt6JSvw68C7geeaxq46G3AYeC+N96o8AXxgnrL+JnBNtfypXs5aja+k8YKRvwBu6oWsU3969ZF7T318ge0XbT9TLf8IOE7jP/smGuVEdXpntbwJGLV92fYpYJzGdZoXklYAdwCfbRruuayS3krjP89uANs/sX2pF7NWrgEWS7oGuJ7Gezd6IqvtrwB/NWX4qrJJWga81fbX3GikP2raptastr9se7I6+xSN98b0ZNbKfwT+Fb/45swFzTpVr5b7cuCFpvNnq7EFJ2kV8C7gaWDA9ovQuAMAbq6mLXT+/0TjF+//NY31YtZfBb4P/PfqENJnJd3Qi1ltnwP+A3AGeBF42faXezFrk6vNtrxanjo+3/4ZjUe30INZJX0YOGf7m1NW9VTWXi33th9fsBAk9QFfBD5m+4dXmtpibF7yS/oQcNH2kU43aTE2X7f1NTT+5P0vtt8FvELj8MFMFvJ27afxyGw18CvADZI+eqVNWowt+O9wZaZsC55Z0ieASeDzrw+1mLZgWSVdD3wC+DetVrcYW7CsvVruPffxBZLeRKPYP2/7S9XwhepPLqrTi9X4QuZ/H/BhSadpHM76DUl7ezTrWeCs7aer8/tplH0vZn0/cMr2923/FPgS8Gs9mvV1V5vtLD8/HNI8Pi8kbQE+BNxdHb6A3sv6t2jcwX+z+j+2AnhG0t/otay9Wu499fEF1TPbu4Hjtj/dtOoAsKVa3gI82jS+WdJ1klYDa2g8oVI72ztsr7C9isbt9me2P9qjWb8HvCDp7dXQBuD5XsxK43DMeyRdX/0+bKDx3EsvZn3dVWWrDt38SNJ7quv4T5q2qZWkjcDHgQ/b/vGU69AzWW0ftX2z7VXV/7GzNF5s8b1ey1r7qwxm+wN8kMarUr4DfGKBs/wDGn9GfQt4tvr5IPDLwCHgZHW6tGmbT1TZTzAPz4zPkHuIn79apiezArcCX69u2/8F9Pdw1j8Avg08B/wxjVdF9ERW4GEazwX8lEbh3DubbMBgdf2+AzxI9S72ecg6TuN49ev/v/5rr2adsv401atlFjrr1J98/EBERIF69bBMRETMQco9IqJAKfeIiAKl3CMiCpRyj4goUMo9IqJAKfeIiAL9fx+OjyVQSZm4AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임의 period_day 컬럼의 histogram을 그려보세요.\n", "user['period_day'] = user['period'].dt.days\n", "user['period_day'].hist()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# ML (Classification)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridclassgenderstart_dateend_dateevent_typeis_deletedclass_namepriceevent_name...end_monthend_dayminmedianmeanmaxsumroutineperiodperiod_day
0OA832399C01F2015-05-01NaTCA10all10000None...NaNNaN25.04.83333385811460 days1460
1PL270116C01M2015-05-01NaTCA10all10000None...NaNNaN35.05.08333376111460 days1460
2OA974876C01M2015-05-01NaTCA10all10000None...NaNNaN35.04.58333365511460 days1460
3HD024127C01F2015-05-01NaTCA1-1all10000None...NaNNaN24.54.83333375811460 days1460
4IK271057C01M2015-05-01NaTCA10all10000None...NaNNaN23.53.75000054511460 days1460
..................................................................
4187IK562610C02F2018-12-132019-02-28CA31day7000Free...2.028.034.05.6666671017077 days77
4188HI599354C02M2018-12-13NaTCA30day7000Free...NaNNaN57.57.50000010301138 days138
4189GD796859C02F2018-12-14NaTCA30day7000Free...NaNNaN67.57.2500008291137 days137
4190GD082270C02F2018-12-142019-02-28CA31day7000Free...2.028.035.06.0000001018076 days76
4191OA426023C02F2018-12-14NaTCA30day7000Free...NaNNaN89.09.50000012381137 days137
\n", "

4192 rows × 24 columns

\n", "
" ], "text/plain": [ " userid class gender start_date end_date event_type is_deleted \\\n", "0 OA832399 C01 F 2015-05-01 NaT CA1 0 \n", "1 PL270116 C01 M 2015-05-01 NaT CA1 0 \n", "2 OA974876 C01 M 2015-05-01 NaT CA1 0 \n", "3 HD024127 C01 F 2015-05-01 NaT CA1 -1 \n", "4 IK271057 C01 M 2015-05-01 NaT CA1 0 \n", "... ... ... ... ... ... ... ... \n", "4187 IK562610 C02 F 2018-12-13 2019-02-28 CA3 1 \n", "4188 HI599354 C02 M 2018-12-13 NaT CA3 0 \n", "4189 GD796859 C02 F 2018-12-14 NaT CA3 0 \n", "4190 GD082270 C02 F 2018-12-14 2019-02-28 CA3 1 \n", "4191 OA426023 C02 F 2018-12-14 NaT CA3 0 \n", "\n", " class_name price event_name ... end_month end_day min median \\\n", "0 all 10000 None ... NaN NaN 2 5.0 \n", "1 all 10000 None ... NaN NaN 3 5.0 \n", "2 all 10000 None ... NaN NaN 3 5.0 \n", "3 all 10000 None ... NaN NaN 2 4.5 \n", "4 all 10000 None ... NaN NaN 2 3.5 \n", "... ... ... ... ... ... ... ... ... \n", "4187 day 7000 Free ... 2.0 28.0 3 4.0 \n", "4188 day 7000 Free ... NaN NaN 5 7.5 \n", "4189 day 7000 Free ... NaN NaN 6 7.5 \n", "4190 day 7000 Free ... 2.0 28.0 3 5.0 \n", "4191 day 7000 Free ... NaN NaN 8 9.0 \n", "\n", " mean max sum routine period period_day \n", "0 4.833333 8 58 1 1460 days 1460 \n", "1 5.083333 7 61 1 1460 days 1460 \n", "2 4.583333 6 55 1 1460 days 1460 \n", "3 4.833333 7 58 1 1460 days 1460 \n", "4 3.750000 5 45 1 1460 days 1460 \n", "... ... ... ... ... ... ... \n", "4187 5.666667 10 17 0 77 days 77 \n", "4188 7.500000 10 30 1 138 days 138 \n", "4189 7.250000 8 29 1 137 days 137 \n", "4190 6.000000 10 18 0 76 days 76 \n", "4191 9.500000 12 38 1 137 days 137 \n", "\n", "[4192 rows x 24 columns]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임을 조회하세요.\n", "user" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderis_deletedclass_namepriceevent_nameminmaxmedianmaxsumroutineperiod_day
0F0all10000None285.085811460
1M0all10000None375.076111460
2M0all10000None365.065511460
3F-1all10000None274.575811460
4M0all10000None253.554511460
.......................................
4187F1day7000Free3104.01017077
4188M0day7000Free5107.510301138
4189F0day7000Free687.58291137
4190F1day7000Free3105.01018076
4191F0day7000Free8129.012381137
\n", "

4192 rows × 12 columns

\n", "
" ], "text/plain": [ " gender is_deleted class_name price event_name min max median max \\\n", "0 F 0 all 10000 None 2 8 5.0 8 \n", "1 M 0 all 10000 None 3 7 5.0 7 \n", "2 M 0 all 10000 None 3 6 5.0 6 \n", "3 F -1 all 10000 None 2 7 4.5 7 \n", "4 M 0 all 10000 None 2 5 3.5 5 \n", "... ... ... ... ... ... ... ... ... ... \n", "4187 F 1 day 7000 Free 3 10 4.0 10 \n", "4188 M 0 day 7000 Free 5 10 7.5 10 \n", "4189 F 0 day 7000 Free 6 8 7.5 8 \n", "4190 F 1 day 7000 Free 3 10 5.0 10 \n", "4191 F 0 day 7000 Free 8 12 9.0 12 \n", "\n", " sum routine period_day \n", "0 58 1 1460 \n", "1 61 1 1460 \n", "2 55 1 1460 \n", "3 58 1 1460 \n", "4 45 1 1460 \n", "... ... ... ... \n", "4187 17 0 77 \n", "4188 30 1 138 \n", "4189 29 1 137 \n", "4190 18 0 76 \n", "4191 38 1 137 \n", "\n", "[4192 rows x 12 columns]" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# user 데이터 프레임에서 다음의 결과와 같이 컬럼을 선택 후 df 변수에 저장하세요.\n", "df = user[['gender', 'is_deleted', 'class_name', 'price', 'event_name', 'min', 'max', 'median', 'max', 'sum', 'routine', 'period_day']]\n", "df" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "# 코드를 입력해 주세요\n", "# df에서 is_deleted 컬럼을 Y로 나머지를 X로 설정하세요\n", "X = df.loc[:, df.columns != 'is_deleted']\n", "Y = df['is_deleted']" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderclass_namepriceevent_nameminmaxmedianmaxsumroutineperiod_day
0Fall10000None285.085811460
1Mall10000None375.076111460
2Mall10000None365.065511460
3Fall10000None274.575811460
4Mall10000None253.554511460
....................................
4187Fday7000Free3104.01017077
4188Mday7000Free5107.510301138
4189Fday7000Free687.58291137
4190Fday7000Free3105.01018076
4191Fday7000Free8129.012381137
\n", "

4192 rows × 11 columns

\n", "
" ], "text/plain": [ " gender class_name price event_name min max median max sum routine \\\n", "0 F all 10000 None 2 8 5.0 8 58 1 \n", "1 M all 10000 None 3 7 5.0 7 61 1 \n", "2 M all 10000 None 3 6 5.0 6 55 1 \n", "3 F all 10000 None 2 7 4.5 7 58 1 \n", "4 M all 10000 None 2 5 3.5 5 45 1 \n", "... ... ... ... ... ... ... ... ... ... ... \n", "4187 F day 7000 Free 3 10 4.0 10 17 0 \n", "4188 M day 7000 Free 5 10 7.5 10 30 1 \n", "4189 F day 7000 Free 6 8 7.5 8 29 1 \n", "4190 F day 7000 Free 3 10 5.0 10 18 0 \n", "4191 F day 7000 Free 8 12 9.0 12 38 1 \n", "\n", " period_day \n", "0 1460 \n", "1 1460 \n", "2 1460 \n", "3 1460 \n", "4 1460 \n", "... ... \n", "4187 77 \n", "4188 138 \n", "4189 137 \n", "4190 76 \n", "4191 137 \n", "\n", "[4192 rows x 11 columns]" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# X를 출력하세요.\n", "X" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 0\n", "2 0\n", "3 -1\n", "4 0\n", " ..\n", "4187 1\n", "4188 0\n", "4189 0\n", "4190 1\n", "4191 0\n", "Name: is_deleted, Length: 4192, dtype: int64" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# Y를 출력하세요.\n", "Y" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
priceminmaxmedianmaxsumroutineperiod_daygender_Fgender_Mclass_name_allclass_name_dayclass_name_nightevent_name_Freeevent_name_Noneevent_name_Sale
010000285.08581146010100010
110000375.07611146001100010
210000365.06551146001100010
310000274.57581146010100010
410000253.55451146001100010
...................................................
418770003104.0101707710010100
418870005107.51030113801010100
41897000687.5829113710010100
419070003105.0101807610010100
419170008129.01238113710010100
\n", "

4192 rows × 16 columns

\n", "
" ], "text/plain": [ " price min max median max sum routine period_day gender_F \\\n", "0 10000 2 8 5.0 8 58 1 1460 1 \n", "1 10000 3 7 5.0 7 61 1 1460 0 \n", "2 10000 3 6 5.0 6 55 1 1460 0 \n", "3 10000 2 7 4.5 7 58 1 1460 1 \n", "4 10000 2 5 3.5 5 45 1 1460 0 \n", "... ... ... ... ... ... ... ... ... ... \n", "4187 7000 3 10 4.0 10 17 0 77 1 \n", "4188 7000 5 10 7.5 10 30 1 138 0 \n", "4189 7000 6 8 7.5 8 29 1 137 1 \n", "4190 7000 3 10 5.0 10 18 0 76 1 \n", "4191 7000 8 12 9.0 12 38 1 137 1 \n", "\n", " gender_M class_name_all class_name_day class_name_night \\\n", "0 0 1 0 0 \n", "1 1 1 0 0 \n", "2 1 1 0 0 \n", "3 0 1 0 0 \n", "4 1 1 0 0 \n", "... ... ... ... ... \n", "4187 0 0 1 0 \n", "4188 1 0 1 0 \n", "4189 0 0 1 0 \n", "4190 0 0 1 0 \n", "4191 0 0 1 0 \n", "\n", " event_name_Free event_name_None event_name_Sale \n", "0 0 1 0 \n", "1 0 1 0 \n", "2 0 1 0 \n", "3 0 1 0 \n", "4 0 1 0 \n", "... ... ... ... \n", "4187 1 0 0 \n", "4188 1 0 0 \n", "4189 1 0 0 \n", "4190 1 0 0 \n", "4191 1 0 0 \n", "\n", "[4192 rows x 16 columns]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# X를 One Hot 변환하세요\n", "X = pd.get_dummies(X)\n", "X" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "# 코드를 입력해 주세요\n", "# X와 Y를 Train 과 Test로 분할하세요.\n", "from sklearn.model_selection import train_test_split\n", "x_train, x_test, y_train, y_test = train_test_split(X, Y, stratify=Y, random_state=0)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.8110687022900763, 0.6688931297709924)" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# KNN 알고리즘을 이용하여 분류하세요.\n", "from sklearn.neighbors import KNeighborsClassifier\n", "model = KNeighborsClassifier(n_neighbors=3).fit(x_train, y_train)\n", "model.score(x_train, y_train), model.score(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.7232824427480916, 0.7146946564885496)" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# LogisticRegression 알고리즘을 이용하여 분류하세요.\n", "from sklearn.linear_model import LogisticRegression\n", "model = LogisticRegression(max_iter=10000).fit(x_train, y_train)\n", "model.score(x_train, y_train), model.score(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.7487277353689568, 0.7309160305343512)" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# DecisionTree 알고리즘을 이용하여 분류하세요.\n", "from sklearn.tree import DecisionTreeClassifier\n", "model = DecisionTreeClassifier(max_depth=5).fit(x_train, y_train)\n", "model.score(x_train, y_train), model.score(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "price 0.004176\n", "min 0.021206\n", "max 0.005352\n", "median 0.341665\n", "max 0.009488\n", "sum 0.548655\n", "routine 0.000000\n", "period_day 0.067465\n", "gender_F 0.000000\n", "gender_M 0.000000\n", "class_name_all 0.000000\n", "class_name_day 0.000000\n", "class_name_night 0.000000\n", "event_name_Free 0.000000\n", "event_name_None 0.000000\n", "event_name_Sale 0.001994\n", "dtype: float64" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 코드를 입력해 주세요\n", "# feature_importances_를 Series로 변환하세요.\n", "fi = pd.Series(model.feature_importances_, index=X.columns)\n", "fi" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 코드를 입력해 주세요\n", "# 변환된 결과를 시각화 하세요.\n", "fi.sort_values(ascending=False).plot(kind='bar')\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 4 }