{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 머신러닝 기본 개념\n", "" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 분류 (Classification)\n", "- 어떤 대상을 정해진 범주에 구분해 넣는 작업\n", "- **주어진 특성에 따라 어떤 대상을 범주(target)로 구분하는 방법**\n", " - 레이블 또는 타겟이 범주형일때 사용\n", "- 범주가 2개인 경우 **이진 분류 (Binary Classification)**\n", "- 범주가 세개 이상인 경우 **다중 분류 (MultiClass Classification)**" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | mean radius | \n", "mean texture | \n", "mean perimeter | \n", "mean area | \n", "mean smoothness | \n", "mean compactness | \n", "mean concavity | \n", "mean concave points | \n", "mean symmetry | \n", "mean fractal dimension | \n", "... | \n", "worst texture | \n", "worst perimeter | \n", "worst area | \n", "worst smoothness | \n", "worst compactness | \n", "worst concavity | \n", "worst concave points | \n", "worst symmetry | \n", "worst fractal dimension | \n", "target | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "17.99 | \n", "10.38 | \n", "122.80 | \n", "1001.0 | \n", "0.11840 | \n", "0.27760 | \n", "0.30010 | \n", "0.14710 | \n", "0.2419 | \n", "0.07871 | \n", "... | \n", "17.33 | \n", "184.60 | \n", "2019.0 | \n", "0.16220 | \n", "0.66560 | \n", "0.7119 | \n", "0.2654 | \n", "0.4601 | \n", "0.11890 | \n", "0 | \n", "
1 | \n", "20.57 | \n", "17.77 | \n", "132.90 | \n", "1326.0 | \n", "0.08474 | \n", "0.07864 | \n", "0.08690 | \n", "0.07017 | \n", "0.1812 | \n", "0.05667 | \n", "... | \n", "23.41 | \n", "158.80 | \n", "1956.0 | \n", "0.12380 | \n", "0.18660 | \n", "0.2416 | \n", "0.1860 | \n", "0.2750 | \n", "0.08902 | \n", "0 | \n", "
2 | \n", "19.69 | \n", "21.25 | \n", "130.00 | \n", "1203.0 | \n", "0.10960 | \n", "0.15990 | \n", "0.19740 | \n", "0.12790 | \n", "0.2069 | \n", "0.05999 | \n", "... | \n", "25.53 | \n", "152.50 | \n", "1709.0 | \n", "0.14440 | \n", "0.42450 | \n", "0.4504 | \n", "0.2430 | \n", "0.3613 | \n", "0.08758 | \n", "0 | \n", "
3 | \n", "11.42 | \n", "20.38 | \n", "77.58 | \n", "386.1 | \n", "0.14250 | \n", "0.28390 | \n", "0.24140 | \n", "0.10520 | \n", "0.2597 | \n", "0.09744 | \n", "... | \n", "26.50 | \n", "98.87 | \n", "567.7 | \n", "0.20980 | \n", "0.86630 | \n", "0.6869 | \n", "0.2575 | \n", "0.6638 | \n", "0.17300 | \n", "0 | \n", "
4 | \n", "20.29 | \n", "14.34 | \n", "135.10 | \n", "1297.0 | \n", "0.10030 | \n", "0.13280 | \n", "0.19800 | \n", "0.10430 | \n", "0.1809 | \n", "0.05883 | \n", "... | \n", "16.67 | \n", "152.20 | \n", "1575.0 | \n", "0.13740 | \n", "0.20500 | \n", "0.4000 | \n", "0.1625 | \n", "0.2364 | \n", "0.07678 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
564 | \n", "21.56 | \n", "22.39 | \n", "142.00 | \n", "1479.0 | \n", "0.11100 | \n", "0.11590 | \n", "0.24390 | \n", "0.13890 | \n", "0.1726 | \n", "0.05623 | \n", "... | \n", "26.40 | \n", "166.10 | \n", "2027.0 | \n", "0.14100 | \n", "0.21130 | \n", "0.4107 | \n", "0.2216 | \n", "0.2060 | \n", "0.07115 | \n", "0 | \n", "
565 | \n", "20.13 | \n", "28.25 | \n", "131.20 | \n", "1261.0 | \n", "0.09780 | \n", "0.10340 | \n", "0.14400 | \n", "0.09791 | \n", "0.1752 | \n", "0.05533 | \n", "... | \n", "38.25 | \n", "155.00 | \n", "1731.0 | \n", "0.11660 | \n", "0.19220 | \n", "0.3215 | \n", "0.1628 | \n", "0.2572 | \n", "0.06637 | \n", "0 | \n", "
566 | \n", "16.60 | \n", "28.08 | \n", "108.30 | \n", "858.1 | \n", "0.08455 | \n", "0.10230 | \n", "0.09251 | \n", "0.05302 | \n", "0.1590 | \n", "0.05648 | \n", "... | \n", "34.12 | \n", "126.70 | \n", "1124.0 | \n", "0.11390 | \n", "0.30940 | \n", "0.3403 | \n", "0.1418 | \n", "0.2218 | \n", "0.07820 | \n", "0 | \n", "
567 | \n", "20.60 | \n", "29.33 | \n", "140.10 | \n", "1265.0 | \n", "0.11780 | \n", "0.27700 | \n", "0.35140 | \n", "0.15200 | \n", "0.2397 | \n", "0.07016 | \n", "... | \n", "39.42 | \n", "184.60 | \n", "1821.0 | \n", "0.16500 | \n", "0.86810 | \n", "0.9387 | \n", "0.2650 | \n", "0.4087 | \n", "0.12400 | \n", "0 | \n", "
568 | \n", "7.76 | \n", "24.54 | \n", "47.92 | \n", "181.0 | \n", "0.05263 | \n", "0.04362 | \n", "0.00000 | \n", "0.00000 | \n", "0.1587 | \n", "0.05884 | \n", "... | \n", "30.37 | \n", "59.16 | \n", "268.6 | \n", "0.08996 | \n", "0.06444 | \n", "0.0000 | \n", "0.0000 | \n", "0.2871 | \n", "0.07039 | \n", "1 | \n", "
569 rows × 31 columns
\n", "\n", " | mean radius | \n", "mean texture | \n", "mean perimeter | \n", "mean area | \n", "mean smoothness | \n", "mean compactness | \n", "mean concavity | \n", "mean concave points | \n", "mean symmetry | \n", "mean fractal dimension | \n", "... | \n", "worst radius | \n", "worst texture | \n", "worst perimeter | \n", "worst area | \n", "worst smoothness | \n", "worst compactness | \n", "worst concavity | \n", "worst concave points | \n", "worst symmetry | \n", "worst fractal dimension | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "17.99 | \n", "10.38 | \n", "122.80 | \n", "1001.0 | \n", "0.11840 | \n", "0.27760 | \n", "0.30010 | \n", "0.14710 | \n", "0.2419 | \n", "0.07871 | \n", "... | \n", "25.380 | \n", "17.33 | \n", "184.60 | \n", "2019.0 | \n", "0.16220 | \n", "0.66560 | \n", "0.7119 | \n", "0.2654 | \n", "0.4601 | \n", "0.11890 | \n", "
1 | \n", "20.57 | \n", "17.77 | \n", "132.90 | \n", "1326.0 | \n", "0.08474 | \n", "0.07864 | \n", "0.08690 | \n", "0.07017 | \n", "0.1812 | \n", "0.05667 | \n", "... | \n", "24.990 | \n", "23.41 | \n", "158.80 | \n", "1956.0 | \n", "0.12380 | \n", "0.18660 | \n", "0.2416 | \n", "0.1860 | \n", "0.2750 | \n", "0.08902 | \n", "
2 | \n", "19.69 | \n", "21.25 | \n", "130.00 | \n", "1203.0 | \n", "0.10960 | \n", "0.15990 | \n", "0.19740 | \n", "0.12790 | \n", "0.2069 | \n", "0.05999 | \n", "... | \n", "23.570 | \n", "25.53 | \n", "152.50 | \n", "1709.0 | \n", "0.14440 | \n", "0.42450 | \n", "0.4504 | \n", "0.2430 | \n", "0.3613 | \n", "0.08758 | \n", "
3 | \n", "11.42 | \n", "20.38 | \n", "77.58 | \n", "386.1 | \n", "0.14250 | \n", "0.28390 | \n", "0.24140 | \n", "0.10520 | \n", "0.2597 | \n", "0.09744 | \n", "... | \n", "14.910 | \n", "26.50 | \n", "98.87 | \n", "567.7 | \n", "0.20980 | \n", "0.86630 | \n", "0.6869 | \n", "0.2575 | \n", "0.6638 | \n", "0.17300 | \n", "
4 | \n", "20.29 | \n", "14.34 | \n", "135.10 | \n", "1297.0 | \n", "0.10030 | \n", "0.13280 | \n", "0.19800 | \n", "0.10430 | \n", "0.1809 | \n", "0.05883 | \n", "... | \n", "22.540 | \n", "16.67 | \n", "152.20 | \n", "1575.0 | \n", "0.13740 | \n", "0.20500 | \n", "0.4000 | \n", "0.1625 | \n", "0.2364 | \n", "0.07678 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
564 | \n", "21.56 | \n", "22.39 | \n", "142.00 | \n", "1479.0 | \n", "0.11100 | \n", "0.11590 | \n", "0.24390 | \n", "0.13890 | \n", "0.1726 | \n", "0.05623 | \n", "... | \n", "25.450 | \n", "26.40 | \n", "166.10 | \n", "2027.0 | \n", "0.14100 | \n", "0.21130 | \n", "0.4107 | \n", "0.2216 | \n", "0.2060 | \n", "0.07115 | \n", "
565 | \n", "20.13 | \n", "28.25 | \n", "131.20 | \n", "1261.0 | \n", "0.09780 | \n", "0.10340 | \n", "0.14400 | \n", "0.09791 | \n", "0.1752 | \n", "0.05533 | \n", "... | \n", "23.690 | \n", "38.25 | \n", "155.00 | \n", "1731.0 | \n", "0.11660 | \n", "0.19220 | \n", "0.3215 | \n", "0.1628 | \n", "0.2572 | \n", "0.06637 | \n", "
566 | \n", "16.60 | \n", "28.08 | \n", "108.30 | \n", "858.1 | \n", "0.08455 | \n", "0.10230 | \n", "0.09251 | \n", "0.05302 | \n", "0.1590 | \n", "0.05648 | \n", "... | \n", "18.980 | \n", "34.12 | \n", "126.70 | \n", "1124.0 | \n", "0.11390 | \n", "0.30940 | \n", "0.3403 | \n", "0.1418 | \n", "0.2218 | \n", "0.07820 | \n", "
567 | \n", "20.60 | \n", "29.33 | \n", "140.10 | \n", "1265.0 | \n", "0.11780 | \n", "0.27700 | \n", "0.35140 | \n", "0.15200 | \n", "0.2397 | \n", "0.07016 | \n", "... | \n", "25.740 | \n", "39.42 | \n", "184.60 | \n", "1821.0 | \n", "0.16500 | \n", "0.86810 | \n", "0.9387 | \n", "0.2650 | \n", "0.4087 | \n", "0.12400 | \n", "
568 | \n", "7.76 | \n", "24.54 | \n", "47.92 | \n", "181.0 | \n", "0.05263 | \n", "0.04362 | \n", "0.00000 | \n", "0.00000 | \n", "0.1587 | \n", "0.05884 | \n", "... | \n", "9.456 | \n", "30.37 | \n", "59.16 | \n", "268.6 | \n", "0.08996 | \n", "0.06444 | \n", "0.0000 | \n", "0.0000 | \n", "0.2871 | \n", "0.07039 | \n", "
569 rows × 30 columns
\n", "\n", " | CRIM | \n", "ZN | \n", "INDUS | \n", "CHAS | \n", "NOX | \n", "RM | \n", "AGE | \n", "DIS | \n", "RAD | \n", "TAX | \n", "PTRATIO | \n", "B | \n", "LSTAT | \n", "target | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.00632 | \n", "18.0 | \n", "2.31 | \n", "0.0 | \n", "0.538 | \n", "6.575 | \n", "65.2 | \n", "4.0900 | \n", "1.0 | \n", "296.0 | \n", "15.3 | \n", "396.90 | \n", "4.98 | \n", "24.0 | \n", "
1 | \n", "0.02731 | \n", "0.0 | \n", "7.07 | \n", "0.0 | \n", "0.469 | \n", "6.421 | \n", "78.9 | \n", "4.9671 | \n", "2.0 | \n", "242.0 | \n", "17.8 | \n", "396.90 | \n", "9.14 | \n", "21.6 | \n", "
2 | \n", "0.02729 | \n", "0.0 | \n", "7.07 | \n", "0.0 | \n", "0.469 | \n", "7.185 | \n", "61.1 | \n", "4.9671 | \n", "2.0 | \n", "242.0 | \n", "17.8 | \n", "392.83 | \n", "4.03 | \n", "34.7 | \n", "
3 | \n", "0.03237 | \n", "0.0 | \n", "2.18 | \n", "0.0 | \n", "0.458 | \n", "6.998 | \n", "45.8 | \n", "6.0622 | \n", "3.0 | \n", "222.0 | \n", "18.7 | \n", "394.63 | \n", "2.94 | \n", "33.4 | \n", "
4 | \n", "0.06905 | \n", "0.0 | \n", "2.18 | \n", "0.0 | \n", "0.458 | \n", "7.147 | \n", "54.2 | \n", "6.0622 | \n", "3.0 | \n", "222.0 | \n", "18.7 | \n", "396.90 | \n", "5.33 | \n", "36.2 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
501 | \n", "0.06263 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.593 | \n", "69.1 | \n", "2.4786 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "391.99 | \n", "9.67 | \n", "22.4 | \n", "
502 | \n", "0.04527 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.120 | \n", "76.7 | \n", "2.2875 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "9.08 | \n", "20.6 | \n", "
503 | \n", "0.06076 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.976 | \n", "91.0 | \n", "2.1675 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "5.64 | \n", "23.9 | \n", "
504 | \n", "0.10959 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.794 | \n", "89.3 | \n", "2.3889 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "393.45 | \n", "6.48 | \n", "22.0 | \n", "
505 | \n", "0.04741 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.030 | \n", "80.8 | \n", "2.5050 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "7.88 | \n", "11.9 | \n", "
506 rows × 14 columns
\n", "\n", " | 바나나 | \n", "복숭아 | \n", "사과 | \n", "키위 | \n", "
---|---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
2 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
4 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
5 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
6 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
7 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "