{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "c383c15b", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "id": "f8f4aee1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
.......................................
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNS
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88888903Johnston, Miss. Catherine Helen \"Carrie\"femaleNaN12W./C. 660723.4500NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ
\n", "

891 rows × 12 columns

\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", ".. ... ... ... \n", "886 887 0 2 \n", "887 888 1 1 \n", "888 889 0 3 \n", "889 890 1 1 \n", "890 891 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", ".. ... ... ... ... \n", "886 Montvila, Rev. Juozas male 27.0 0 \n", "887 Graham, Miss. Margaret Edith female 19.0 0 \n", "888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n", "889 Behr, Mr. Karl Howell male 26.0 0 \n", "890 Dooley, Mr. Patrick male 32.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S \n", ".. ... ... ... ... ... \n", "886 0 211536 13.0000 NaN S \n", "887 0 112053 30.0000 B42 S \n", "888 2 W./C. 6607 23.4500 NaN S \n", "889 0 111369 30.0000 C148 C \n", "890 0 370376 7.7500 NaN Q \n", "\n", "[891 rows x 12 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data=pd.read_csv('C:/Users/User/Downloads/train.csv')\n", "data" ] }, { "cell_type": "code", "execution_count": 3, "id": "0b086e18", "metadata": {}, "outputs": [], "source": [ "data = data.drop(['PassengerId','Name','Ticket', 'Cabin'] , axis = 1)" ] }, { "cell_type": "code", "execution_count": 4, "id": "dcb80c0d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassSexAgeSibSpParchFareEmbarked
003male22.0107.2500S
111female38.01071.2833C
213female26.0007.9250S
311female35.01053.1000S
403male35.0008.0500S
...........................
88602male27.00013.0000S
88711female19.00030.0000S
88803femaleNaN1223.4500S
88911male26.00030.0000C
89003male32.0007.7500Q
\n", "

891 rows × 8 columns

\n", "
" ], "text/plain": [ " Survived Pclass Sex Age SibSp Parch Fare Embarked\n", "0 0 3 male 22.0 1 0 7.2500 S\n", "1 1 1 female 38.0 1 0 71.2833 C\n", "2 1 3 female 26.0 0 0 7.9250 S\n", "3 1 1 female 35.0 1 0 53.1000 S\n", "4 0 3 male 35.0 0 0 8.0500 S\n", ".. ... ... ... ... ... ... ... ...\n", "886 0 2 male 27.0 0 0 13.0000 S\n", "887 1 1 female 19.0 0 0 30.0000 S\n", "888 0 3 female NaN 1 2 23.4500 S\n", "889 1 1 male 26.0 0 0 30.0000 C\n", "890 0 3 male 32.0 0 0 7.7500 Q\n", "\n", "[891 rows x 8 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data " ] }, { "cell_type": "code", "execution_count": 5, "id": "5d47e602", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Survived 0\n", "Pclass 0\n", "Sex 0\n", "Age 177\n", "SibSp 0\n", "Parch 0\n", "Fare 0\n", "Embarked 2\n", "dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 6, "id": "0e1456f4", "metadata": {}, "outputs": [], "source": [ "data['Embarked'].fillna(value = data[\"Embarked\"].mode().to_string(index=False), inplace = True)" ] }, { "cell_type": "code", "execution_count": 7, "id": "4e13a1ff", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassSexAgeSibSpParchFareEmbarked
003male22.0107.2500S
111female38.01071.2833C
213female26.0007.9250S
311female35.01053.1000S
403male35.0008.0500S
...........................
88602male27.00013.0000S
88711female19.00030.0000S
88803femaleNaN1223.4500S
88911male26.00030.0000C
89003male32.0007.7500Q
\n", "

891 rows × 8 columns

\n", "
" ], "text/plain": [ " Survived Pclass Sex Age SibSp Parch Fare Embarked\n", "0 0 3 male 22.0 1 0 7.2500 S\n", "1 1 1 female 38.0 1 0 71.2833 C\n", "2 1 3 female 26.0 0 0 7.9250 S\n", "3 1 1 female 35.0 1 0 53.1000 S\n", "4 0 3 male 35.0 0 0 8.0500 S\n", ".. ... ... ... ... ... ... ... ...\n", "886 0 2 male 27.0 0 0 13.0000 S\n", "887 1 1 female 19.0 0 0 30.0000 S\n", "888 0 3 female NaN 1 2 23.4500 S\n", "889 1 1 male 26.0 0 0 30.0000 C\n", "890 0 3 male 32.0 0 0 7.7500 Q\n", "\n", "[891 rows x 8 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 8, "id": "f225e386", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Survived 0\n", "Pclass 0\n", "Sex 0\n", "Age 177\n", "SibSp 0\n", "Parch 0\n", "Fare 0\n", "Embarked 0\n", "dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 9, "id": "83d957c0", "metadata": {}, "outputs": [], "source": [ "data[\"Age\"].fillna(data[\"Age\"].mean() , inplace=True)" ] }, { "cell_type": "code", "execution_count": 10, "id": "247ac6eb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Survived 0\n", "Pclass 0\n", "Sex 0\n", "Age 0\n", "SibSp 0\n", "Parch 0\n", "Fare 0\n", "Embarked 0\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 11, "id": "be4ad71a", "metadata": {}, "outputs": [], "source": [ "sex_dummies = pd.get_dummies(data['Sex'])\n", "\n", "sex_dummies.columns = ['Female', 'Male']\n", "\n", "data.drop(['Sex'], axis=1, inplace=True)\n", "\n", "data = data.join(sex_dummies)\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "fbbe1bc6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassAgeSibSpParchFareEmbarkedFemaleMale
00322.000000107.2500S01
11138.0000001071.2833C10
21326.000000007.9250S10
31135.0000001053.1000S10
40335.000000008.0500S01
..............................
8860227.0000000013.0000S01
8871119.0000000030.0000S10
8880329.6991181223.4500S10
8891126.0000000030.0000C01
8900332.000000007.7500Q01
\n", "

891 rows × 9 columns

\n", "
" ], "text/plain": [ " Survived Pclass Age SibSp Parch Fare Embarked Female Male\n", "0 0 3 22.000000 1 0 7.2500 S 0 1\n", "1 1 1 38.000000 1 0 71.2833 C 1 0\n", "2 1 3 26.000000 0 0 7.9250 S 1 0\n", "3 1 1 35.000000 1 0 53.1000 S 1 0\n", "4 0 3 35.000000 0 0 8.0500 S 0 1\n", ".. ... ... ... ... ... ... ... ... ...\n", "886 0 2 27.000000 0 0 13.0000 S 0 1\n", "887 1 1 19.000000 0 0 30.0000 S 1 0\n", "888 0 3 29.699118 1 2 23.4500 S 1 0\n", "889 1 1 26.000000 0 0 30.0000 C 0 1\n", "890 0 3 32.000000 0 0 7.7500 Q 0 1\n", "\n", "[891 rows x 9 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 13, "id": "0091cd03", "metadata": {}, "outputs": [], "source": [ "embarked_dummies = pd.get_dummies(data['Embarked'])\n", "\n", "embarked_dummies.columns = ['S', 'C', 'Q']\n", "\n", "data.drop(['Embarked'], axis=1, inplace=True)\n", "\n", "data = data.join(embarked_dummies)" ] }, { "cell_type": "code", "execution_count": 14, "id": "6dc601da", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassAgeSibSpParchFareFemaleMaleSCQ
00322.000000107.250001001
11138.0000001071.283310100
21326.000000007.925010001
31135.0000001053.100010001
40335.000000008.050001001
....................................
8860227.0000000013.000001001
8871119.0000000030.000010001
8880329.6991181223.450010001
8891126.0000000030.000001100
8900332.000000007.750001010
\n", "

891 rows × 11 columns

\n", "
" ], "text/plain": [ " Survived Pclass Age SibSp Parch Fare Female Male S C Q\n", "0 0 3 22.000000 1 0 7.2500 0 1 0 0 1\n", "1 1 1 38.000000 1 0 71.2833 1 0 1 0 0\n", "2 1 3 26.000000 0 0 7.9250 1 0 0 0 1\n", "3 1 1 35.000000 1 0 53.1000 1 0 0 0 1\n", "4 0 3 35.000000 0 0 8.0500 0 1 0 0 1\n", ".. ... ... ... ... ... ... ... ... .. .. ..\n", "886 0 2 27.000000 0 0 13.0000 0 1 0 0 1\n", "887 1 1 19.000000 0 0 30.0000 1 0 0 0 1\n", "888 0 3 29.699118 1 2 23.4500 1 0 0 0 1\n", "889 1 1 26.000000 0 0 30.0000 0 1 1 0 0\n", "890 0 3 32.000000 0 0 7.7500 0 1 0 1 0\n", "\n", "[891 rows x 11 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 15, "id": "0a89cf30", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassAgeSibSpParchFareFemaleMaleSCQ
00322.0107.250001001
11138.01071.283310100
21326.0007.925010001
31135.01053.100010001
40335.0008.050001001
\n", "
" ], "text/plain": [ " Survived Pclass Age SibSp Parch Fare Female Male S C Q\n", "0 0 3 22.0 1 0 7.2500 0 1 0 0 1\n", "1 1 1 38.0 1 0 71.2833 1 0 1 0 0\n", "2 1 3 26.0 0 0 7.9250 1 0 0 0 1\n", "3 1 1 35.0 1 0 53.1000 1 0 0 0 1\n", "4 0 3 35.0 0 0 8.0500 0 1 0 0 1" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 16, "id": "42236c31", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassAgeSibSpParchFareFemaleMaleSCQ
00322.000000107.250001001
11138.0000001071.283310100
21326.000000007.925010001
31135.0000001053.100010001
40335.000000008.050001001
....................................
8860227.0000000013.000001001
8871119.0000000030.000010001
8880329.6991181223.450010001
8891126.0000000030.000001100
8900332.000000007.750001010
\n", "

891 rows × 11 columns

\n", "
" ], "text/plain": [ " Survived Pclass Age SibSp Parch Fare Female Male S C Q\n", "0 0 3 22.000000 1 0 7.2500 0 1 0 0 1\n", "1 1 1 38.000000 1 0 71.2833 1 0 1 0 0\n", "2 1 3 26.000000 0 0 7.9250 1 0 0 0 1\n", "3 1 1 35.000000 1 0 53.1000 1 0 0 0 1\n", "4 0 3 35.000000 0 0 8.0500 0 1 0 0 1\n", ".. ... ... ... ... ... ... ... ... .. .. ..\n", "886 0 2 27.000000 0 0 13.0000 0 1 0 0 1\n", "887 1 1 19.000000 0 0 30.0000 1 0 0 0 1\n", "888 0 3 29.699118 1 2 23.4500 1 0 0 0 1\n", "889 1 1 26.000000 0 0 30.0000 0 1 1 0 0\n", "890 0 3 32.000000 0 0 7.7500 0 1 0 1 0\n", "\n", "[891 rows x 11 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 17, "id": "d21d6273", "metadata": {}, "outputs": [], "source": [ "data['Age'] = round(data['Age'].apply(np.ceil))\n", "data['Fare'] = round(data['Fare'].apply(np.ceil))" ] }, { "cell_type": "code", "execution_count": 18, "id": "5fc1f952", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassAgeSibSpParchFareFemaleMaleSCQ
00322.0108.001001
11138.01072.010100
21326.0008.010001
31135.01054.010001
40335.0009.001001
....................................
8860227.00013.001001
8871119.00030.010001
8880330.01224.010001
8891126.00030.001100
8900332.0008.001010
\n", "

891 rows × 11 columns

\n", "
" ], "text/plain": [ " Survived Pclass Age SibSp Parch Fare Female Male S C Q\n", "0 0 3 22.0 1 0 8.0 0 1 0 0 1\n", "1 1 1 38.0 1 0 72.0 1 0 1 0 0\n", "2 1 3 26.0 0 0 8.0 1 0 0 0 1\n", "3 1 1 35.0 1 0 54.0 1 0 0 0 1\n", "4 0 3 35.0 0 0 9.0 0 1 0 0 1\n", ".. ... ... ... ... ... ... ... ... .. .. ..\n", "886 0 2 27.0 0 0 13.0 0 1 0 0 1\n", "887 1 1 19.0 0 0 30.0 1 0 0 0 1\n", "888 0 3 30.0 1 2 24.0 1 0 0 0 1\n", "889 1 1 26.0 0 0 30.0 0 1 1 0 0\n", "890 0 3 32.0 0 0 8.0 0 1 0 1 0\n", "\n", "[891 rows x 11 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 19, "id": "22f53441", "metadata": {}, "outputs": [], "source": [ "x=data.drop(columns=['Survived'])\n", "y=data['Survived']" ] }, { "cell_type": "code", "execution_count": 20, "id": "5edf3054", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PclassAgeSibSpParchFareFemaleMaleSCQ
0322.0108.001001
1138.01072.010100
2326.0008.010001
3135.01054.010001
4335.0009.001001
.................................
886227.00013.001001
887119.00030.010001
888330.01224.010001
889126.00030.001100
890332.0008.001010
\n", "

891 rows × 10 columns

\n", "
" ], "text/plain": [ " Pclass Age SibSp Parch Fare Female Male S C Q\n", "0 3 22.0 1 0 8.0 0 1 0 0 1\n", "1 1 38.0 1 0 72.0 1 0 1 0 0\n", "2 3 26.0 0 0 8.0 1 0 0 0 1\n", "3 1 35.0 1 0 54.0 1 0 0 0 1\n", "4 3 35.0 0 0 9.0 0 1 0 0 1\n", ".. ... ... ... ... ... ... ... .. .. ..\n", "886 2 27.0 0 0 13.0 0 1 0 0 1\n", "887 1 19.0 0 0 30.0 1 0 0 0 1\n", "888 3 30.0 1 2 24.0 1 0 0 0 1\n", "889 1 26.0 0 0 30.0 0 1 1 0 0\n", "890 3 32.0 0 0 8.0 0 1 0 1 0\n", "\n", "[891 rows x 10 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x" ] }, { "cell_type": "code", "execution_count": 21, "id": "322d43c0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 1\n", "2 1\n", "3 1\n", "4 0\n", " ..\n", "886 0\n", "887 1\n", "888 0\n", "889 1\n", "890 0\n", "Name: Survived, Length: 891, dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y" ] }, { "cell_type": "code", "execution_count": 22, "id": "0b65ae1f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pclass -0.630548\n", "Age 0.422911\n", "SibSp 3.695352\n", "Parch 2.749117\n", "Fare 4.790179\n", "Female 0.618921\n", "Male -0.618921\n", "S 1.595149\n", "C 2.948778\n", "Q -1.009665\n", "dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.skew()" ] }, { "cell_type": "code", "execution_count": 23, "id": "4917c787", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)" ] }, { "cell_type": "code", "execution_count": 24, "id": "c4011b3b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PclassAgeSibSpParchFareFemaleMaleSCQ
604135.00027.001100
242229.00011.001001
664320.0108.001001
34022.01126.001001
626257.00013.001010
.................................
702318.00115.010100
746316.01121.001001
447134.00027.001001
209140.00031.001100
80322.0009.001001
\n", "

623 rows × 10 columns

\n", "
" ], "text/plain": [ " Pclass Age SibSp Parch Fare Female Male S C Q\n", "604 1 35.0 0 0 27.0 0 1 1 0 0\n", "242 2 29.0 0 0 11.0 0 1 0 0 1\n", "664 3 20.0 1 0 8.0 0 1 0 0 1\n", "340 2 2.0 1 1 26.0 0 1 0 0 1\n", "626 2 57.0 0 0 13.0 0 1 0 1 0\n", ".. ... ... ... ... ... ... ... .. .. ..\n", "702 3 18.0 0 1 15.0 1 0 1 0 0\n", "746 3 16.0 1 1 21.0 0 1 0 0 1\n", "447 1 34.0 0 0 27.0 0 1 0 0 1\n", "209 1 40.0 0 0 31.0 0 1 1 0 0\n", "80 3 22.0 0 0 9.0 0 1 0 0 1\n", "\n", "[623 rows x 10 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train" ] }, { "cell_type": "code", "execution_count": 40, "id": "1d810ca5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
KNeighborsClassifier(n_neighbors=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "KNeighborsClassifier(n_neighbors=3)" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "model= KNeighborsClassifier(n_neighbors=3)\n", "model.fit(x_train,y_train)" ] }, { "cell_type": "code", "execution_count": 41, "id": "eb5ae8f9", "metadata": {}, "outputs": [], "source": [ "y_pred=model.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 42, "id": "e009b08a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", " 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", " 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n", " 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,\n", " 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1,\n", " 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,\n", " 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,\n", " 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0,\n", " 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,\n", " 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,\n", " 1, 0, 1, 0], dtype=int64)" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_pred" ] }, { "cell_type": "code", "execution_count": 43, "id": "8a38e9a0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "780 1\n", "23 1\n", "875 1\n", "794 0\n", "884 0\n", "231 0\n", "507 1\n", "493 0\n", "503 0\n", "375 1\n", "42 0\n", "285 0\n", "101 0\n", "707 1\n", "872 0\n", "315 1\n", "717 1\n", "290 1\n", "777 1\n", "200 0\n", "Name: Survived, dtype: int64" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test.head(20)" ] }, { "cell_type": "code", "execution_count": 54, "id": "dcd0eed9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7089552238805971" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.score(x_test,y_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "3ef57ffe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "ml", "language": "python", "name": "ml" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" } }, "nbformat": 4, "nbformat_minor": 5 }