{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3875b458", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.svm import SVC\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": 4, "id": "bee27b63", "metadata": {}, "outputs": [], "source": [ "#데이터 경로의 경우 사용자의 환경에 맞게 설정해야 합니다.\n", "df_train = pd.read_csv(\"C:/Users/User/Downloads/train.csv\", encoding = \"UTF-8\")\n", "df_test = pd.read_csv(\"C:/Users/User/Downloads/test (1).csv\", encoding = \"UTF-8\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "7e5f5019", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 PassengerId 891 non-null int64 \n", " 1 Survived 891 non-null int64 \n", " 2 Pclass 891 non-null int64 \n", " 3 Name 891 non-null object \n", " 4 Sex 891 non-null object \n", " 5 Age 714 non-null float64\n", " 6 SibSp 891 non-null int64 \n", " 7 Parch 891 non-null int64 \n", " 8 Ticket 891 non-null object \n", " 9 Fare 891 non-null float64\n", " 10 Cabin 204 non-null object \n", " 11 Embarked 889 non-null object \n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 83.7+ KB\n" ] } ], "source": [ "df_train.info()" ] }, { "cell_type": "code", "execution_count": 8, "id": "d019aad7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 418 entries, 0 to 417\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 PassengerId 418 non-null int64 \n", " 1 Pclass 418 non-null int64 \n", " 2 Name 418 non-null object \n", " 3 Sex 418 non-null object \n", " 4 Age 332 non-null float64\n", " 5 SibSp 418 non-null int64 \n", " 6 Parch 418 non-null int64 \n", " 7 Ticket 418 non-null object \n", " 8 Fare 417 non-null float64\n", " 9 Cabin 91 non-null object \n", " 10 Embarked 418 non-null object \n", "dtypes: float64(2), int64(4), object(5)\n", "memory usage: 36.0+ KB\n" ] } ], "source": [ "df_test.info()" ] }, { "cell_type": "code", "execution_count": 9, "id": "3a3cec61", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
.......................................
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNS
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88888903Johnston, Miss. Catherine Helen \"Carrie\"femaleNaN12W./C. 660723.4500NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ
\n", "

891 rows × 12 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", ".. ... ... ... \n", "886 887 0 2 \n", "887 888 1 1 \n", "888 889 0 3 \n", "889 890 1 1 \n", "890 891 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", ".. ... ... ... ... \n", "886 Montvila, Rev. Juozas male 27.0 0 \n", "887 Graham, Miss. Margaret Edith female 19.0 0 \n", "888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n", "889 Behr, Mr. Karl Howell male 26.0 0 \n", "890 Dooley, Mr. Patrick male 32.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S \n", ".. ... ... ... ... ... \n", "886 0 211536 13.0000 NaN S \n", "887 0 112053 30.0000 B42 S \n", "888 2 W./C. 6607 23.4500 NaN S \n", "889 0 111369 30.0000 C148 C \n", "890 0 370376 7.7500 NaN Q \n", "\n", "[891 rows x 12 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train" ] }, { "cell_type": "code", "execution_count": 10, "id": "07697498", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 0\n", "Survived 0\n", "Pclass 0\n", "Name 0\n", "Sex 0\n", "Age 177\n", "SibSp 0\n", "Parch 0\n", "Ticket 0\n", "Fare 0\n", "Cabin 687\n", "Embarked 2\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 16, "id": "075aac37", "metadata": {}, "outputs": [], "source": [ "df_train['Age'] = df_train[\"Age\"].fillna(df_train['Age'].mean())\n", "df_train['Fare'] = df_train[\"Fare\"].fillna(df_train['Fare'].mean())\n", "df_train['Embarked'] = df_train[\"Embarked\"].fillna(df_train[\"Embarked\"].mode().iloc[0])\n", "df_test['Age'] = df_test[\"Age\"].fillna(df_test['Age'].mean())\n", "df_test['Fare'] = df_test[\"Fare\"].fillna(df_test['Fare'].mean())\n", "df_test['Embarked'] = df_test[\"Embarked\"].fillna(df_test[\"Embarked\"].mode().iloc[0])" ] }, { "cell_type": "code", "execution_count": 17, "id": "5ef89440", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.00000010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.00000010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.00000000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.0000001011380353.1000C123S
4503Allen, Mr. William Henrymale35.000000003734508.0500NaNS
.......................................
88688702Montvila, Rev. Juozasmale27.0000000021153613.0000NaNS
88788811Graham, Miss. Margaret Edithfemale19.0000000011205330.0000B42S
88888903Johnston, Miss. Catherine Helen \"Carrie\"female29.69911812W./C. 660723.4500NaNS
88989011Behr, Mr. Karl Howellmale26.0000000011136930.0000C148C
89089103Dooley, Mr. Patrickmale32.000000003703767.7500NaNQ
\n", "

891 rows × 12 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", ".. ... ... ... \n", "886 887 0 2 \n", "887 888 1 1 \n", "888 889 0 3 \n", "889 890 1 1 \n", "890 891 0 3 \n", "\n", " Name Sex Age \\\n", "0 Braund, Mr. Owen Harris male 22.000000 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.000000 \n", "2 Heikkinen, Miss. Laina female 26.000000 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.000000 \n", "4 Allen, Mr. William Henry male 35.000000 \n", ".. ... ... ... \n", "886 Montvila, Rev. Juozas male 27.000000 \n", "887 Graham, Miss. Margaret Edith female 19.000000 \n", "888 Johnston, Miss. Catherine Helen \"Carrie\" female 29.699118 \n", "889 Behr, Mr. Karl Howell male 26.000000 \n", "890 Dooley, Mr. Patrick male 32.000000 \n", "\n", " SibSp Parch Ticket Fare Cabin Embarked \n", "0 1 0 A/5 21171 7.2500 NaN S \n", "1 1 0 PC 17599 71.2833 C85 C \n", "2 0 0 STON/O2. 3101282 7.9250 NaN S \n", "3 1 0 113803 53.1000 C123 S \n", "4 0 0 373450 8.0500 NaN S \n", ".. ... ... ... ... ... ... \n", "886 0 0 211536 13.0000 NaN S \n", "887 0 0 112053 30.0000 B42 S \n", "888 1 2 W./C. 6607 23.4500 NaN S \n", "889 0 0 111369 30.0000 C148 C \n", "890 0 0 370376 7.7500 NaN Q \n", "\n", "[891 rows x 12 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train" ] }, { "cell_type": "code", "execution_count": 18, "id": "3c2cebc2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 0\n", "Survived 0\n", "Pclass 0\n", "Name 0\n", "Sex 0\n", "Age 0\n", "SibSp 0\n", "Parch 0\n", "Ticket 0\n", "Fare 0\n", "Cabin 687\n", "Embarked 0\n", "dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 19, "id": "351e9183", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 0\n", "Pclass 0\n", "Name 0\n", "Sex 0\n", "Age 0\n", "SibSp 0\n", "Parch 0\n", "Ticket 0\n", "Fare 0\n", "Cabin 327\n", "Embarked 0\n", "dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 20, "id": "606b71fa", "metadata": {}, "outputs": [], "source": [ "df_train[['Age', 'Fare']] = df_train[['Age', 'Fare']].applymap(lambda x: np.round(x) if pd.notnull(x) else x)\n", "df_test[['Age', 'Fare']] = df_test[['Age', 'Fare']].applymap(lambda x: np.round(x) if pd.notnull(x) else x)\n" ] }, { "cell_type": "code", "execution_count": 24, "id": "6a43c8ac", "metadata": {}, "outputs": [], "source": [ "df_train['Sex'] = df_train['Sex'].map({'male':0, 'female':1})\n", "df_train['Embarked'] = df_train['Embarked'].map({'S':0, 'C':1, 'Q':2})\n", "df_test['Sex'] = df_test['Sex'].map({'male':0, 'female':1})\n", "df_test['Embarked'] = df_test['Embarked'].map({'S':0, 'C':1, 'Q':2})" ] }, { "cell_type": "code", "execution_count": 25, "id": "29c92fc6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harris022.010A/5 211717.0NaN0
1211Cumings, Mrs. John Bradley (Florence Briggs Th...138.010PC 1759971.0C851
2313Heikkinen, Miss. Laina126.000STON/O2. 31012828.0NaN0
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)135.01011380353.0C1230
4503Allen, Mr. William Henry035.0003734508.0NaN0
.......................................
88688702Montvila, Rev. Juozas027.00021153613.0NaN0
88788811Graham, Miss. Margaret Edith119.00011205330.0B420
88888903Johnston, Miss. Catherine Helen \"Carrie\"130.012W./C. 660723.0NaN0
88989011Behr, Mr. Karl Howell026.00011136930.0C1481
89089103Dooley, Mr. Patrick032.0003703768.0NaN2
\n", "

891 rows × 12 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", ".. ... ... ... \n", "886 887 0 2 \n", "887 888 1 1 \n", "888 889 0 3 \n", "889 890 1 1 \n", "890 891 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris 0 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 38.0 1 \n", "2 Heikkinen, Miss. Laina 1 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 35.0 1 \n", "4 Allen, Mr. William Henry 0 35.0 0 \n", ".. ... ... ... ... \n", "886 Montvila, Rev. Juozas 0 27.0 0 \n", "887 Graham, Miss. Margaret Edith 1 19.0 0 \n", "888 Johnston, Miss. Catherine Helen \"Carrie\" 1 30.0 1 \n", "889 Behr, Mr. Karl Howell 0 26.0 0 \n", "890 Dooley, Mr. Patrick 0 32.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.0 NaN 0 \n", "1 0 PC 17599 71.0 C85 1 \n", "2 0 STON/O2. 3101282 8.0 NaN 0 \n", "3 0 113803 53.0 C123 0 \n", "4 0 373450 8.0 NaN 0 \n", ".. ... ... ... ... ... \n", "886 0 211536 13.0 NaN 0 \n", "887 0 112053 30.0 B42 0 \n", "888 2 W./C. 6607 23.0 NaN 0 \n", "889 0 111369 30.0 C148 1 \n", "890 0 370376 8.0 NaN 2 \n", "\n", "[891 rows x 12 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train" ] }, { "cell_type": "code", "execution_count": 26, "id": "1c2cc7cd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 PassengerId 891 non-null int64 \n", " 1 Survived 891 non-null int64 \n", " 2 Pclass 891 non-null int64 \n", " 3 Name 891 non-null object \n", " 4 Sex 891 non-null int64 \n", " 5 Age 891 non-null float64\n", " 6 SibSp 891 non-null int64 \n", " 7 Parch 891 non-null int64 \n", " 8 Ticket 891 non-null object \n", " 9 Fare 891 non-null float64\n", " 10 Cabin 204 non-null object \n", " 11 Embarked 891 non-null int64 \n", "dtypes: float64(2), int64(7), object(3)\n", "memory usage: 83.7+ KB\n" ] } ], "source": [ "df_train.info()" ] }, { "cell_type": "code", "execution_count": 30, "id": "6589eecf", "metadata": {}, "outputs": [], "source": [ "#Train data의 One-hot Encoding\n", "tr_pclass_dummies = pd.get_dummies(df_train['Pclass'], prefix='Pclass')\n", "tr_embarked_dummies = pd.get_dummies(df_train['Embarked'], prefix='Embarked')\n", "df_train = pd.concat([df_train, tr_pclass_dummies, tr_embarked_dummies], axis=1)\n", "df_train = df_train.drop(['Pclass', 'Embarked'], axis=1)\n", "\n", "#Test data의 One-hot Encoding\n", "te_pclass_dummies = pd.get_dummies(df_test['Pclass'], prefix='Pclass')\n", "te_embarked_dummies = pd.get_dummies(df_test['Embarked'], prefix='Embarked')\n", "df_test = pd.concat([df_test, te_pclass_dummies, te_embarked_dummies], axis=1)\n", "df_test = df_test.drop(['Pclass', 'Embarked'], axis=1)" ] }, { "cell_type": "code", "execution_count": 31, "id": "598733f7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedNameSexAgeSibSpParchTicketFareCabinPclass_1Pclass_2Pclass_3Embarked_0Embarked_1Embarked_2
010Braund, Mr. Owen Harris022.010A/5 211717.0NaN001100
121Cumings, Mrs. John Bradley (Florence Briggs Th...138.010PC 1759971.0C85100010
231Heikkinen, Miss. Laina126.000STON/O2. 31012828.0NaN001100
341Futrelle, Mrs. Jacques Heath (Lily May Peel)135.01011380353.0C123100100
450Allen, Mr. William Henry035.0003734508.0NaN001100
...................................................
8868870Montvila, Rev. Juozas027.00021153613.0NaN010100
8878881Graham, Miss. Margaret Edith119.00011205330.0B42100100
8888890Johnston, Miss. Catherine Helen \"Carrie\"130.012W./C. 660723.0NaN001100
8898901Behr, Mr. Karl Howell026.00011136930.0C148100010
8908910Dooley, Mr. Patrick032.0003703768.0NaN001001
\n", "

891 rows × 16 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Name \\\n", "0 1 0 Braund, Mr. Owen Harris \n", "1 2 1 Cumings, Mrs. John Bradley (Florence Briggs Th... \n", "2 3 1 Heikkinen, Miss. Laina \n", "3 4 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) \n", "4 5 0 Allen, Mr. William Henry \n", ".. ... ... ... \n", "886 887 0 Montvila, Rev. Juozas \n", "887 888 1 Graham, Miss. Margaret Edith \n", "888 889 0 Johnston, Miss. Catherine Helen \"Carrie\" \n", "889 890 1 Behr, Mr. Karl Howell \n", "890 891 0 Dooley, Mr. Patrick \n", "\n", " Sex Age SibSp Parch Ticket Fare Cabin Pclass_1 \\\n", "0 0 22.0 1 0 A/5 21171 7.0 NaN 0 \n", "1 1 38.0 1 0 PC 17599 71.0 C85 1 \n", "2 1 26.0 0 0 STON/O2. 3101282 8.0 NaN 0 \n", "3 1 35.0 1 0 113803 53.0 C123 1 \n", "4 0 35.0 0 0 373450 8.0 NaN 0 \n", ".. ... ... ... ... ... ... ... ... \n", "886 0 27.0 0 0 211536 13.0 NaN 0 \n", "887 1 19.0 0 0 112053 30.0 B42 1 \n", "888 1 30.0 1 2 W./C. 6607 23.0 NaN 0 \n", "889 0 26.0 0 0 111369 30.0 C148 1 \n", "890 0 32.0 0 0 370376 8.0 NaN 0 \n", "\n", " Pclass_2 Pclass_3 Embarked_0 Embarked_1 Embarked_2 \n", "0 0 1 1 0 0 \n", "1 0 0 0 1 0 \n", "2 0 1 1 0 0 \n", "3 0 0 1 0 0 \n", "4 0 1 1 0 0 \n", ".. ... ... ... ... ... \n", "886 1 0 1 0 0 \n", "887 0 0 1 0 0 \n", "888 0 1 1 0 0 \n", "889 0 0 0 1 0 \n", "890 0 1 0 0 1 \n", "\n", "[891 rows x 16 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train" ] }, { "cell_type": "code", "execution_count": 145, "id": "cd39362a", "metadata": {}, "outputs": [], "source": [ "high_sibsp_threshold = 3\n", "#3를 기준으로 3보다 작을 경우 1, 3이상일 경우 0의 값을 가지는 새로운 Column 생성\n", "df_train['High_SibSp'] = (df_train['SibSp'] < high_sibsp_threshold).astype(int)\n", "df_test['High_SibSp'] = (df_test['SibSp'] < high_sibsp_threshold).astype(int)\n" ] }, { "cell_type": "code", "execution_count": 146, "id": "f63c5804", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " High_SibSp Survived\n", "High_SibSp 1.000000 0.111145\n", "Survived 0.111145 1.000000\n" ] } ], "source": [ "# Specify the three columns of interest\n", "columns_of_interest = ['High_SibSp', 'Survived']\n", "\n", "# Calculate the correlation coefficient for the specified columns\n", "correlation_matrix = df_train[columns_of_interest].corr()\n", "\n", "# Display the correlation matrix\n", "print(correlation_matrix)" ] }, { "cell_type": "code", "execution_count": 147, "id": "a260708f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedNameSexAgeSibSpParchTicketFareCabinPclass_1Pclass_2Pclass_3Embarked_0Embarked_1Embarked_2High_SibSp
010Braund, Mr. Owen Harris022.010A/5 211717.0NaN0011001
121Cumings, Mrs. John Bradley (Florence Briggs Th...138.010PC 1759971.0C851000101
231Heikkinen, Miss. Laina126.000STON/O2. 31012828.0NaN0011001
341Futrelle, Mrs. Jacques Heath (Lily May Peel)135.01011380353.0C1231001001
450Allen, Mr. William Henry035.0003734508.0NaN0011001
......................................................
8868870Montvila, Rev. Juozas027.00021153613.0NaN0101001
8878881Graham, Miss. Margaret Edith119.00011205330.0B421001001
8888890Johnston, Miss. Catherine Helen \"Carrie\"130.012W./C. 660723.0NaN0011001
8898901Behr, Mr. Karl Howell026.00011136930.0C1481000101
8908910Dooley, Mr. Patrick032.0003703768.0NaN0010011
\n", "

891 rows × 17 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Name \\\n", "0 1 0 Braund, Mr. Owen Harris \n", "1 2 1 Cumings, Mrs. John Bradley (Florence Briggs Th... \n", "2 3 1 Heikkinen, Miss. Laina \n", "3 4 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) \n", "4 5 0 Allen, Mr. William Henry \n", ".. ... ... ... \n", "886 887 0 Montvila, Rev. Juozas \n", "887 888 1 Graham, Miss. Margaret Edith \n", "888 889 0 Johnston, Miss. Catherine Helen \"Carrie\" \n", "889 890 1 Behr, Mr. Karl Howell \n", "890 891 0 Dooley, Mr. Patrick \n", "\n", " Sex Age SibSp Parch Ticket Fare Cabin Pclass_1 \\\n", "0 0 22.0 1 0 A/5 21171 7.0 NaN 0 \n", "1 1 38.0 1 0 PC 17599 71.0 C85 1 \n", "2 1 26.0 0 0 STON/O2. 3101282 8.0 NaN 0 \n", "3 1 35.0 1 0 113803 53.0 C123 1 \n", "4 0 35.0 0 0 373450 8.0 NaN 0 \n", ".. ... ... ... ... ... ... ... ... \n", "886 0 27.0 0 0 211536 13.0 NaN 0 \n", "887 1 19.0 0 0 112053 30.0 B42 1 \n", "888 1 30.0 1 2 W./C. 6607 23.0 NaN 0 \n", "889 0 26.0 0 0 111369 30.0 C148 1 \n", "890 0 32.0 0 0 370376 8.0 NaN 0 \n", "\n", " Pclass_2 Pclass_3 Embarked_0 Embarked_1 Embarked_2 High_SibSp \n", "0 0 1 1 0 0 1 \n", "1 0 0 0 1 0 1 \n", "2 0 1 1 0 0 1 \n", "3 0 0 1 0 0 1 \n", "4 0 1 1 0 0 1 \n", ".. ... ... ... ... ... ... \n", "886 1 0 1 0 0 1 \n", "887 0 0 1 0 0 1 \n", "888 0 1 1 0 0 1 \n", "889 0 0 0 1 0 1 \n", "890 0 1 0 0 1 1 \n", "\n", "[891 rows x 17 columns]" ] }, "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train" ] }, { "cell_type": "code", "execution_count": 151, "id": "631886a5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdNameSexAgeSibSpParchTicketFareCabinPclass_1Pclass_2Pclass_3Embarked_0Embarked_1Embarked_2High_SibSp
0892Kelly, Mr. James034.0003309118.0NaN0010011
1893Wilkes, Mrs. James (Ellen Needs)147.0103632727.0NaN0011001
2894Myles, Mr. Thomas Francis062.00024027610.0NaN0100011
3895Wirz, Mr. Albert027.0003151549.0NaN0011001
4896Hirvonen, Mrs. Alexander (Helga E Lindqvist)122.011310129812.0NaN0011001
...................................................
4131305Spector, Mr. Woolf030.000A.5. 32368.0NaN0011001
4141306Oliva y Ocana, Dona. Fermina139.000PC 17758109.0C1051000101
4151307Saether, Mr. Simon Sivertsen038.000SOTON/O.Q. 31012627.0NaN0011001
4161308Ware, Mr. Frederick030.0003593098.0NaN0011001
4171309Peter, Master. Michael J030.011266822.0NaN0010101
\n", "

418 rows × 16 columns

\n", "
" ], "text/plain": [ " PassengerId Name Sex Age \\\n", "0 892 Kelly, Mr. James 0 34.0 \n", "1 893 Wilkes, Mrs. James (Ellen Needs) 1 47.0 \n", "2 894 Myles, Mr. Thomas Francis 0 62.0 \n", "3 895 Wirz, Mr. Albert 0 27.0 \n", "4 896 Hirvonen, Mrs. Alexander (Helga E Lindqvist) 1 22.0 \n", ".. ... ... ... ... \n", "413 1305 Spector, Mr. Woolf 0 30.0 \n", "414 1306 Oliva y Ocana, Dona. Fermina 1 39.0 \n", "415 1307 Saether, Mr. Simon Sivertsen 0 38.0 \n", "416 1308 Ware, Mr. Frederick 0 30.0 \n", "417 1309 Peter, Master. Michael J 0 30.0 \n", "\n", " SibSp Parch Ticket Fare Cabin Pclass_1 Pclass_2 \\\n", "0 0 0 330911 8.0 NaN 0 0 \n", "1 1 0 363272 7.0 NaN 0 0 \n", "2 0 0 240276 10.0 NaN 0 1 \n", "3 0 0 315154 9.0 NaN 0 0 \n", "4 1 1 3101298 12.0 NaN 0 0 \n", ".. ... ... ... ... ... ... ... \n", "413 0 0 A.5. 3236 8.0 NaN 0 0 \n", "414 0 0 PC 17758 109.0 C105 1 0 \n", "415 0 0 SOTON/O.Q. 3101262 7.0 NaN 0 0 \n", "416 0 0 359309 8.0 NaN 0 0 \n", "417 1 1 2668 22.0 NaN 0 0 \n", "\n", " Pclass_3 Embarked_0 Embarked_1 Embarked_2 High_SibSp \n", "0 1 0 0 1 1 \n", "1 1 1 0 0 1 \n", "2 0 0 0 1 1 \n", "3 1 1 0 0 1 \n", "4 1 1 0 0 1 \n", ".. ... ... ... ... ... \n", "413 1 1 0 0 1 \n", "414 0 0 1 0 1 \n", "415 1 1 0 0 1 \n", "416 1 1 0 0 1 \n", "417 1 0 1 0 1 \n", "\n", "[418 rows x 16 columns]" ] }, "execution_count": 151, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test" ] }, { "cell_type": "code", "execution_count": 155, "id": "de0ca58e", "metadata": {}, "outputs": [], "source": [ "x_train = df_train.drop(['PassengerId','Survived', 'Name', 'SibSp', 'Cabin', 'Ticket'], axis = 1)\n", "y_train = df_train['Survived']\n" ] }, { "cell_type": "code", "execution_count": 156, "id": "9cf56151", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SexAgeParchFarePclass_1Pclass_2Pclass_3Embarked_0Embarked_1Embarked_2High_SibSp
0022.007.00011001
1138.0071.01000101
2126.008.00011001
3135.0053.01001001
4035.008.00011001
....................................
886027.0013.00101001
887119.0030.01001001
888130.0223.00011001
889026.0030.01000101
890032.008.00010011
\n", "

891 rows × 11 columns

\n", "
" ], "text/plain": [ " Sex Age Parch Fare Pclass_1 Pclass_2 Pclass_3 Embarked_0 \\\n", "0 0 22.0 0 7.0 0 0 1 1 \n", "1 1 38.0 0 71.0 1 0 0 0 \n", "2 1 26.0 0 8.0 0 0 1 1 \n", "3 1 35.0 0 53.0 1 0 0 1 \n", "4 0 35.0 0 8.0 0 0 1 1 \n", ".. ... ... ... ... ... ... ... ... \n", "886 0 27.0 0 13.0 0 1 0 1 \n", "887 1 19.0 0 30.0 1 0 0 1 \n", "888 1 30.0 2 23.0 0 0 1 1 \n", "889 0 26.0 0 30.0 1 0 0 0 \n", "890 0 32.0 0 8.0 0 0 1 0 \n", "\n", " Embarked_1 Embarked_2 High_SibSp \n", "0 0 0 1 \n", "1 1 0 1 \n", "2 0 0 1 \n", "3 0 0 1 \n", "4 0 0 1 \n", ".. ... ... ... \n", "886 0 0 1 \n", "887 0 0 1 \n", "888 0 0 1 \n", "889 1 0 1 \n", "890 0 1 1 \n", "\n", "[891 rows x 11 columns]" ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train" ] }, { "cell_type": "code", "execution_count": 157, "id": "dae9dca0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 1\n", "2 1\n", "3 1\n", "4 0\n", " ..\n", "886 0\n", "887 1\n", "888 0\n", "889 1\n", "890 0\n", "Name: Survived, Length: 891, dtype: int64" ] }, "execution_count": 157, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train" ] }, { "cell_type": "code", "execution_count": 158, "id": "1d7aeefe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
SVC()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "SVC()" ] }, "execution_count": 158, "metadata": {}, "output_type": "execute_result" } ], "source": [ "svm_model = SVC()\n", "\n", "svm_model.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 160, "id": "f944189c", "metadata": {}, "outputs": [], "source": [ "x_test = df_test.drop(['PassengerId', 'Name', 'SibSp', 'Cabin', 'Ticket'], axis = 1)" ] }, { "cell_type": "code", "execution_count": 162, "id": "b5694767", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SexAgeParchFarePclass_1Pclass_2Pclass_3Embarked_0Embarked_1Embarked_2High_SibSp
0022.007.00011001
1138.0071.01000101
2126.008.00011001
3135.0053.01001001
4035.008.00011001
....................................
886027.0013.00101001
887119.0030.01001001
888130.0223.00011001
889026.0030.01000101
890032.008.00010011
\n", "

891 rows × 11 columns

\n", "
" ], "text/plain": [ " Sex Age Parch Fare Pclass_1 Pclass_2 Pclass_3 Embarked_0 \\\n", "0 0 22.0 0 7.0 0 0 1 1 \n", "1 1 38.0 0 71.0 1 0 0 0 \n", "2 1 26.0 0 8.0 0 0 1 1 \n", "3 1 35.0 0 53.0 1 0 0 1 \n", "4 0 35.0 0 8.0 0 0 1 1 \n", ".. ... ... ... ... ... ... ... ... \n", "886 0 27.0 0 13.0 0 1 0 1 \n", "887 1 19.0 0 30.0 1 0 0 1 \n", "888 1 30.0 2 23.0 0 0 1 1 \n", "889 0 26.0 0 30.0 1 0 0 0 \n", "890 0 32.0 0 8.0 0 0 1 0 \n", "\n", " Embarked_1 Embarked_2 High_SibSp \n", "0 0 0 1 \n", "1 1 0 1 \n", "2 0 0 1 \n", "3 0 0 1 \n", "4 0 0 1 \n", ".. ... ... ... \n", "886 0 0 1 \n", "887 0 0 1 \n", "888 0 0 1 \n", "889 1 0 1 \n", "890 0 1 1 \n", "\n", "[891 rows x 11 columns]" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train" ] }, { "cell_type": "code", "execution_count": 163, "id": "afe7479b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SexAgeParchFarePclass_1Pclass_2Pclass_3Embarked_0Embarked_1Embarked_2High_SibSp
0034.008.00010011
1147.007.00011001
2062.0010.00100011
3027.009.00011001
4122.0112.00011001
....................................
413030.008.00011001
414139.00109.01000101
415038.007.00011001
416030.008.00011001
417030.0122.00010101
\n", "

418 rows × 11 columns

\n", "
" ], "text/plain": [ " Sex Age Parch Fare Pclass_1 Pclass_2 Pclass_3 Embarked_0 \\\n", "0 0 34.0 0 8.0 0 0 1 0 \n", "1 1 47.0 0 7.0 0 0 1 1 \n", "2 0 62.0 0 10.0 0 1 0 0 \n", "3 0 27.0 0 9.0 0 0 1 1 \n", "4 1 22.0 1 12.0 0 0 1 1 \n", ".. ... ... ... ... ... ... ... ... \n", "413 0 30.0 0 8.0 0 0 1 1 \n", "414 1 39.0 0 109.0 1 0 0 0 \n", "415 0 38.0 0 7.0 0 0 1 1 \n", "416 0 30.0 0 8.0 0 0 1 1 \n", "417 0 30.0 1 22.0 0 0 1 0 \n", "\n", " Embarked_1 Embarked_2 High_SibSp \n", "0 0 1 1 \n", "1 0 0 1 \n", "2 0 1 1 \n", "3 0 0 1 \n", "4 0 0 1 \n", ".. ... ... ... \n", "413 0 0 1 \n", "414 1 0 1 \n", "415 0 0 1 \n", "416 0 0 1 \n", "417 1 0 1 \n", "\n", "[418 rows x 11 columns]" ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_test" ] }, { "cell_type": "code", "execution_count": 164, "id": "c50ea3eb", "metadata": {}, "outputs": [], "source": [ "y_pred = svm_model.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 165, "id": "79b53695", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0,\n", " 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n", " 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", " 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", " 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,\n", " 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", " 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,\n", " 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,\n", " 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,\n", " 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],\n", " dtype=int64)" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_pred" ] }, { "cell_type": "code", "execution_count": 173, "id": "a3b2c656", "metadata": {}, "outputs": [], "source": [ "from pandas import DataFrame\n", "predictions_df = pd.DataFrame(y_pred, columns=['Survived'])\n", "\n", "predictions_df['PassengerId'] = df_test['PassengerId']\n", "\n", "predictions_df = predictions_df[['PassengerId', 'Survived']]\n", "\n", "predictions_df.to_csv(\"predicted_survived_with_passengerid.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "baa439d1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "ml", "language": "python", "name": "ml" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" } }, "nbformat": 4, "nbformat_minor": 5 }