Commit d5d3be26 authored by Allen Bose(UST's avatar Allen Bose(UST

Commit message for file2

parent 68ac38da
GEMINI_API_KEY="AIzaSyDBjYmw-qp-r5TZV7y0uMLYxMkSmQb7D4A"
\ No newline at end of file
# Default ignored files
/shelf/
/workspace.xml
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.13 (week_1_assaingmnets)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/week_1_assaingmnets.iml" filepath="$PROJECT_DIR$/.idea/week_1_assaingmnets.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/next-100-gen-ai-programme" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="py.test" />
</component>
</module>
\ No newline at end of file
{"do_lower_case": true, "model_max_length": 512}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
{"model_max_length": 1024}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.113003Z",
"start_time": "2025-09-21T07:51:57.627074Z"
}
},
"cell_type": "code",
"source": "import pandas as pd",
"id": "1d16d3cd5fc60a5e",
"outputs": [],
"execution_count": 1
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Reading csv file",
"id": "702437040fee0797"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.147785Z",
"start_time": "2025-09-21T07:51:58.119581Z"
}
},
"cell_type": "code",
"source": "churn_data=pd.read_csv(\"Telco-Customer-Churn.csv\")",
"id": "227974751e33b85e",
"outputs": [],
"execution_count": 2
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Displaying First 5 rows",
"id": "5ab1a0ef3de84c11"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.181377Z",
"start_time": "2025-09-21T07:51:58.156590Z"
}
},
"cell_type": "code",
"source": "churn_data.head()",
"id": "e185fa78d4ddd856",
"outputs": [
{
"data": {
"text/plain": [
" customerID gender SeniorCitizen Partner Dependents tenure PhoneService \\\n",
"0 7590-VHVEG Female 0 Yes No 1 No \n",
"1 5575-GNVDE Male 0 No No 34 Yes \n",
"2 3668-QPYBK Male 0 No No 2 Yes \n",
"3 7795-CFOCW Male 0 No No 45 No \n",
"4 9237-HQITU Female 0 No No 2 Yes \n",
"\n",
" MultipleLines InternetService OnlineSecurity ... DeviceProtection \\\n",
"0 No phone service DSL No ... No \n",
"1 No DSL Yes ... Yes \n",
"2 No DSL Yes ... No \n",
"3 No phone service DSL Yes ... Yes \n",
"4 No Fiber optic No ... No \n",
"\n",
" TechSupport StreamingTV StreamingMovies Contract PaperlessBilling \\\n",
"0 No No No Month-to-month Yes \n",
"1 No No No One year No \n",
"2 No No No Month-to-month Yes \n",
"3 Yes No No One year No \n",
"4 No No No Month-to-month Yes \n",
"\n",
" PaymentMethod MonthlyCharges TotalCharges Churn \n",
"0 Electronic check 29.85 29.85 No \n",
"1 Mailed check 56.95 1889.5 No \n",
"2 Mailed check 53.85 108.15 Yes \n",
"3 Bank transfer (automatic) 42.30 1840.75 No \n",
"4 Electronic check 70.70 151.65 Yes \n",
"\n",
"[5 rows x 21 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customerID</th>\n",
" <th>gender</th>\n",
" <th>SeniorCitizen</th>\n",
" <th>Partner</th>\n",
" <th>Dependents</th>\n",
" <th>tenure</th>\n",
" <th>PhoneService</th>\n",
" <th>MultipleLines</th>\n",
" <th>InternetService</th>\n",
" <th>OnlineSecurity</th>\n",
" <th>...</th>\n",
" <th>DeviceProtection</th>\n",
" <th>TechSupport</th>\n",
" <th>StreamingTV</th>\n",
" <th>StreamingMovies</th>\n",
" <th>Contract</th>\n",
" <th>PaperlessBilling</th>\n",
" <th>PaymentMethod</th>\n",
" <th>MonthlyCharges</th>\n",
" <th>TotalCharges</th>\n",
" <th>Churn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7590-VHVEG</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>No</td>\n",
" <td>No phone service</td>\n",
" <td>DSL</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Month-to-month</td>\n",
" <td>Yes</td>\n",
" <td>Electronic check</td>\n",
" <td>29.85</td>\n",
" <td>29.85</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5575-GNVDE</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>34</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>DSL</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>One year</td>\n",
" <td>No</td>\n",
" <td>Mailed check</td>\n",
" <td>56.95</td>\n",
" <td>1889.5</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3668-QPYBK</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>DSL</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Month-to-month</td>\n",
" <td>Yes</td>\n",
" <td>Mailed check</td>\n",
" <td>53.85</td>\n",
" <td>108.15</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7795-CFOCW</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>45</td>\n",
" <td>No</td>\n",
" <td>No phone service</td>\n",
" <td>DSL</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>One year</td>\n",
" <td>No</td>\n",
" <td>Bank transfer (automatic)</td>\n",
" <td>42.30</td>\n",
" <td>1840.75</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>9237-HQITU</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Fiber optic</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Month-to-month</td>\n",
" <td>Yes</td>\n",
" <td>Electronic check</td>\n",
" <td>70.70</td>\n",
" <td>151.65</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 3
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### Find out is data clean",
"id": "d7cfdd6b8f0ca43b"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.209768Z",
"start_time": "2025-09-21T07:51:58.191441Z"
}
},
"cell_type": "code",
"source": "churn_data.info()",
"id": "908d8b7da652899",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 7043 entries, 0 to 7042\n",
"Data columns (total 21 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 customerID 7043 non-null object \n",
" 1 gender 7043 non-null object \n",
" 2 SeniorCitizen 7043 non-null int64 \n",
" 3 Partner 7043 non-null object \n",
" 4 Dependents 7043 non-null object \n",
" 5 tenure 7043 non-null int64 \n",
" 6 PhoneService 7043 non-null object \n",
" 7 MultipleLines 7043 non-null object \n",
" 8 InternetService 7043 non-null object \n",
" 9 OnlineSecurity 7043 non-null object \n",
" 10 OnlineBackup 7043 non-null object \n",
" 11 DeviceProtection 7043 non-null object \n",
" 12 TechSupport 7043 non-null object \n",
" 13 StreamingTV 7043 non-null object \n",
" 14 StreamingMovies 7043 non-null object \n",
" 15 Contract 7043 non-null object \n",
" 16 PaperlessBilling 7043 non-null object \n",
" 17 PaymentMethod 7043 non-null object \n",
" 18 MonthlyCharges 7043 non-null float64\n",
" 19 TotalCharges 7043 non-null object \n",
" 20 Churn 7043 non-null object \n",
"dtypes: float64(1), int64(2), object(18)\n",
"memory usage: 1.1+ MB\n"
]
}
],
"execution_count": 4
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### Overall view of data",
"id": "84939b8e4889985e"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.234080Z",
"start_time": "2025-09-21T07:51:58.217847Z"
}
},
"cell_type": "code",
"source": "churn_data.describe()",
"id": "e52259622789d631",
"outputs": [
{
"data": {
"text/plain": [
" SeniorCitizen tenure MonthlyCharges\n",
"count 7043.000000 7043.000000 7043.000000\n",
"mean 0.162147 32.371149 64.761692\n",
"std 0.368612 24.559481 30.090047\n",
"min 0.000000 0.000000 18.250000\n",
"25% 0.000000 9.000000 35.500000\n",
"50% 0.000000 29.000000 70.350000\n",
"75% 0.000000 55.000000 89.850000\n",
"max 1.000000 72.000000 118.750000"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SeniorCitizen</th>\n",
" <th>tenure</th>\n",
" <th>MonthlyCharges</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>7043.000000</td>\n",
" <td>7043.000000</td>\n",
" <td>7043.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.162147</td>\n",
" <td>32.371149</td>\n",
" <td>64.761692</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.368612</td>\n",
" <td>24.559481</td>\n",
" <td>30.090047</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>18.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000</td>\n",
" <td>9.000000</td>\n",
" <td>35.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000</td>\n",
" <td>29.000000</td>\n",
" <td>70.350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.000000</td>\n",
" <td>55.000000</td>\n",
" <td>89.850000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000</td>\n",
" <td>72.000000</td>\n",
" <td>118.750000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.251347Z",
"start_time": "2025-09-21T07:51:58.245545Z"
}
},
"cell_type": "code",
"source": [
"# Convert churn column from yes/no to 1/0 and cast to int\n",
"churn_data['Churn'] = churn_data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0).astype('int')"
],
"id": "f5389a65fde53eed",
"outputs": [],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.291983Z",
"start_time": "2025-09-21T07:51:58.272497Z"
}
},
"cell_type": "code",
"source": "churn_data.head()",
"id": "c03e3641e3bb4408",
"outputs": [
{
"data": {
"text/plain": [
" customerID gender SeniorCitizen Partner Dependents tenure PhoneService \\\n",
"0 7590-VHVEG Female 0 Yes No 1 No \n",
"1 5575-GNVDE Male 0 No No 34 Yes \n",
"2 3668-QPYBK Male 0 No No 2 Yes \n",
"3 7795-CFOCW Male 0 No No 45 No \n",
"4 9237-HQITU Female 0 No No 2 Yes \n",
"\n",
" MultipleLines InternetService OnlineSecurity ... DeviceProtection \\\n",
"0 No phone service DSL No ... No \n",
"1 No DSL Yes ... Yes \n",
"2 No DSL Yes ... No \n",
"3 No phone service DSL Yes ... Yes \n",
"4 No Fiber optic No ... No \n",
"\n",
" TechSupport StreamingTV StreamingMovies Contract PaperlessBilling \\\n",
"0 No No No Month-to-month Yes \n",
"1 No No No One year No \n",
"2 No No No Month-to-month Yes \n",
"3 Yes No No One year No \n",
"4 No No No Month-to-month Yes \n",
"\n",
" PaymentMethod MonthlyCharges TotalCharges Churn \n",
"0 Electronic check 29.85 29.85 0 \n",
"1 Mailed check 56.95 1889.5 0 \n",
"2 Mailed check 53.85 108.15 1 \n",
"3 Bank transfer (automatic) 42.30 1840.75 0 \n",
"4 Electronic check 70.70 151.65 1 \n",
"\n",
"[5 rows x 21 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customerID</th>\n",
" <th>gender</th>\n",
" <th>SeniorCitizen</th>\n",
" <th>Partner</th>\n",
" <th>Dependents</th>\n",
" <th>tenure</th>\n",
" <th>PhoneService</th>\n",
" <th>MultipleLines</th>\n",
" <th>InternetService</th>\n",
" <th>OnlineSecurity</th>\n",
" <th>...</th>\n",
" <th>DeviceProtection</th>\n",
" <th>TechSupport</th>\n",
" <th>StreamingTV</th>\n",
" <th>StreamingMovies</th>\n",
" <th>Contract</th>\n",
" <th>PaperlessBilling</th>\n",
" <th>PaymentMethod</th>\n",
" <th>MonthlyCharges</th>\n",
" <th>TotalCharges</th>\n",
" <th>Churn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7590-VHVEG</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>No</td>\n",
" <td>No phone service</td>\n",
" <td>DSL</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Month-to-month</td>\n",
" <td>Yes</td>\n",
" <td>Electronic check</td>\n",
" <td>29.85</td>\n",
" <td>29.85</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5575-GNVDE</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>34</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>DSL</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>One year</td>\n",
" <td>No</td>\n",
" <td>Mailed check</td>\n",
" <td>56.95</td>\n",
" <td>1889.5</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3668-QPYBK</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>DSL</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Month-to-month</td>\n",
" <td>Yes</td>\n",
" <td>Mailed check</td>\n",
" <td>53.85</td>\n",
" <td>108.15</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7795-CFOCW</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>45</td>\n",
" <td>No</td>\n",
" <td>No phone service</td>\n",
" <td>DSL</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>One year</td>\n",
" <td>No</td>\n",
" <td>Bank transfer (automatic)</td>\n",
" <td>42.30</td>\n",
" <td>1840.75</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>9237-HQITU</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Fiber optic</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Month-to-month</td>\n",
" <td>Yes</td>\n",
" <td>Electronic check</td>\n",
" <td>70.70</td>\n",
" <td>151.65</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 7
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### convert categorical columns to 0,1 pattern",
"id": "d66c3bef301ac592"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:58.316188Z",
"start_time": "2025-09-21T07:51:58.309014Z"
}
},
"cell_type": "code",
"source": "churn_data.drop('customerID', axis=1,inplace=True)",
"id": "bbe521ebd5d0e342",
"outputs": [],
"execution_count": 8
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.379105Z",
"start_time": "2025-09-21T07:51:58.327535Z"
}
},
"cell_type": "code",
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"# Get categorical columns\n",
"categorical_cols = churn_data.select_dtypes(include=['object']).columns\n",
"\n",
"# Apply label encoding to all categorical columns\n",
"for col in categorical_cols:\n",
" le = LabelEncoder()\n",
" churn_data[col] = le.fit_transform(churn_data[col])\n",
"\n"
],
"id": "716cc9d0ba146c58",
"outputs": [],
"execution_count": 9
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.401377Z",
"start_time": "2025-09-21T07:51:59.389452Z"
}
},
"cell_type": "code",
"source": "churn_data.head()",
"id": "56b33b787138b343",
"outputs": [
{
"data": {
"text/plain": [
" gender SeniorCitizen Partner Dependents tenure PhoneService \\\n",
"0 0 0 1 0 1 0 \n",
"1 1 0 0 0 34 1 \n",
"2 1 0 0 0 2 1 \n",
"3 1 0 0 0 45 0 \n",
"4 0 0 0 0 2 1 \n",
"\n",
" MultipleLines InternetService OnlineSecurity OnlineBackup \\\n",
"0 1 0 0 2 \n",
"1 0 0 2 0 \n",
"2 0 0 2 2 \n",
"3 1 0 2 0 \n",
"4 0 1 0 0 \n",
"\n",
" DeviceProtection TechSupport StreamingTV StreamingMovies Contract \\\n",
"0 0 0 0 0 0 \n",
"1 2 0 0 0 1 \n",
"2 0 0 0 0 0 \n",
"3 2 2 0 0 1 \n",
"4 0 0 0 0 0 \n",
"\n",
" PaperlessBilling PaymentMethod MonthlyCharges TotalCharges Churn \n",
"0 1 2 29.85 2505 0 \n",
"1 0 3 56.95 1466 0 \n",
"2 1 3 53.85 157 1 \n",
"3 0 0 42.30 1400 0 \n",
"4 1 2 70.70 925 1 "
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>SeniorCitizen</th>\n",
" <th>Partner</th>\n",
" <th>Dependents</th>\n",
" <th>tenure</th>\n",
" <th>PhoneService</th>\n",
" <th>MultipleLines</th>\n",
" <th>InternetService</th>\n",
" <th>OnlineSecurity</th>\n",
" <th>OnlineBackup</th>\n",
" <th>DeviceProtection</th>\n",
" <th>TechSupport</th>\n",
" <th>StreamingTV</th>\n",
" <th>StreamingMovies</th>\n",
" <th>Contract</th>\n",
" <th>PaperlessBilling</th>\n",
" <th>PaymentMethod</th>\n",
" <th>MonthlyCharges</th>\n",
" <th>TotalCharges</th>\n",
" <th>Churn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>29.85</td>\n",
" <td>2505</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>56.95</td>\n",
" <td>1466</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>53.85</td>\n",
" <td>157</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>45</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>42.30</td>\n",
" <td>1400</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>70.70</td>\n",
" <td>925</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 10
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.438612Z",
"start_time": "2025-09-21T07:51:59.422336Z"
}
},
"cell_type": "code",
"source": "churn_data.corr()['Churn']",
"id": "b81d6c63c92d6d47",
"outputs": [
{
"data": {
"text/plain": [
"gender -0.008612\n",
"SeniorCitizen 0.150889\n",
"Partner -0.150448\n",
"Dependents -0.164221\n",
"tenure -0.352229\n",
"PhoneService 0.011942\n",
"MultipleLines 0.038037\n",
"InternetService -0.047291\n",
"OnlineSecurity -0.289309\n",
"OnlineBackup -0.195525\n",
"DeviceProtection -0.178134\n",
"TechSupport -0.282492\n",
"StreamingTV -0.036581\n",
"StreamingMovies -0.038492\n",
"Contract -0.396713\n",
"PaperlessBilling 0.191825\n",
"PaymentMethod 0.107062\n",
"MonthlyCharges 0.193356\n",
"TotalCharges 0.014479\n",
"Churn 1.000000\n",
"Name: Churn, dtype: float64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 11
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"#### 1).Contract type is your strongest predictor - focus on getting customers into longer contracts\n",
"#### 2).Add-on services (OnlineSecurity, TechSupport, OnlineBackup) significantly reduce churn\n",
"#### 3).Monthly charges matter - higher prices increase churn risk\n",
"#### 4).Services like StreamingTV/Movies have minimal impact on churn"
],
"id": "d9e0a8f16c87a702"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## best_features are ['Contract', 'OnlineSecurity', 'TechSupport', 'MonthlyCharges','PaperlessBilling', 'OnlineBackup', 'DeviceProtection']",
"id": "ffb488811796db2"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.452206Z",
"start_time": "2025-09-21T07:51:59.448874Z"
}
},
"cell_type": "code",
"source": [
"best_features = ['Contract', 'OnlineSecurity', 'TechSupport', 'MonthlyCharges',\n",
" 'PaperlessBilling', 'OnlineBackup', 'DeviceProtection']"
],
"id": "202e86d9a1fe9c7",
"outputs": [],
"execution_count": 12
},
{
"metadata": {},
"cell_type": "markdown",
"source": "#### Splittted dataset with like target and other columns",
"id": "3c8bc4c2b7433c91"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.478052Z",
"start_time": "2025-09-21T07:51:59.470533Z"
}
},
"cell_type": "code",
"source": [
"X = churn_data.drop(['Churn'], axis=1) # Drop target\n",
"y = churn_data['Churn']\n",
"\n",
"# 2. Check shapes\n",
"print(f\"Feature matrix shape: {X.shape}\")\n",
"print(f\"Target vector shape: {y.shape}\")"
],
"id": "70692bf39df01132",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature matrix shape: (7043, 19)\n",
"Target vector shape: (7043,)\n"
]
}
],
"execution_count": 13
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.630037Z",
"start_time": "2025-09-21T07:51:59.499063Z"
}
},
"cell_type": "code",
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y,\n",
" test_size=0.2, # 20% for testing, 80% for training\n",
" random_state=42 # For reproducible results\n",
")"
],
"id": "a08b36c5bab72e00",
"outputs": [],
"execution_count": 14
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.876045Z",
"start_time": "2025-09-21T07:51:59.639683Z"
}
},
"cell_type": "code",
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"# Create and train the model\n",
"# model = LogisticRegression()\n",
"model = LogisticRegression(class_weight='balanced')\n",
"model.fit(X_train, y_train)\n"
],
"id": "a9a1b7354e2a3cbe",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\281879\\Documents\\AI\\week_1_assaingmnets\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:473: ConvergenceWarning: lbfgs failed to converge after 100 iteration(s) (status=1):\n",
"STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT\n",
"\n",
"Increase the number of iterations to improve the convergence (max_iter=100).\n",
"You might also want to scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
},
{
"data": {
"text/plain": [
"LogisticRegression(class_weight='balanced')"
],
"text/html": [
"<style>#sk-container-id-1 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: #000;\n",
" --sklearn-color-text-muted: #666;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-1 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-1 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-1 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: flex;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
" align-items: start;\n",
" justify-content: space-between;\n",
" gap: 0.5em;\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label .caption {\n",
" font-size: 0.6rem;\n",
" font-weight: lighter;\n",
" color: var(--sklearn-color-text-muted);\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content {\n",
" display: none;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" display: block;\n",
" width: 100%;\n",
" overflow: visible;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-1 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-1 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-1 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-1 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 0.5em;\n",
" text-align: center;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".estimator-table summary {\n",
" padding: .5rem;\n",
" font-family: monospace;\n",
" cursor: pointer;\n",
"}\n",
"\n",
".estimator-table details[open] {\n",
" padding-left: 0.1rem;\n",
" padding-right: 0.1rem;\n",
" padding-bottom: 0.3rem;\n",
"}\n",
"\n",
".estimator-table .parameters-table {\n",
" margin-left: auto !important;\n",
" margin-right: auto !important;\n",
"}\n",
"\n",
".estimator-table .parameters-table tr:nth-child(odd) {\n",
" background-color: #fff;\n",
"}\n",
"\n",
".estimator-table .parameters-table tr:nth-child(even) {\n",
" background-color: #f6f6f6;\n",
"}\n",
"\n",
".estimator-table .parameters-table tr:hover {\n",
" background-color: #e0e0e0;\n",
"}\n",
"\n",
".estimator-table table td {\n",
" border: 1px solid rgba(106, 105, 104, 0.232);\n",
"}\n",
"\n",
".user-set td {\n",
" color:rgb(255, 94, 0);\n",
" text-align: left;\n",
"}\n",
"\n",
".user-set td.value pre {\n",
" color:rgb(255, 94, 0) !important;\n",
" background-color: transparent !important;\n",
"}\n",
"\n",
".default td {\n",
" color: black;\n",
" text-align: left;\n",
"}\n",
"\n",
".user-set td i,\n",
".default td i {\n",
" color: black;\n",
"}\n",
"\n",
".copy-paste-icon {\n",
" background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n",
" background-repeat: no-repeat;\n",
" background-size: 14px 14px;\n",
" background-position: 0;\n",
" display: inline-block;\n",
" width: 14px;\n",
" height: 14px;\n",
" cursor: pointer;\n",
"}\n",
"</style><body><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(class_weight=&#x27;balanced&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>LogisticRegression</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.7/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\" data-param-prefix=\"\">\n",
" <div class=\"estimator-table\">\n",
" <details>\n",
" <summary>Parameters</summary>\n",
" <table class=\"parameters-table\">\n",
" <tbody>\n",
" \n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('penalty',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">penalty&nbsp;</td>\n",
" <td class=\"value\">&#x27;l2&#x27;</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('dual',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">dual&nbsp;</td>\n",
" <td class=\"value\">False</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('tol',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">tol&nbsp;</td>\n",
" <td class=\"value\">0.0001</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('C',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">C&nbsp;</td>\n",
" <td class=\"value\">1.0</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('fit_intercept',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">fit_intercept&nbsp;</td>\n",
" <td class=\"value\">True</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('intercept_scaling',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">intercept_scaling&nbsp;</td>\n",
" <td class=\"value\">1</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"user-set\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('class_weight',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">class_weight&nbsp;</td>\n",
" <td class=\"value\">&#x27;balanced&#x27;</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('random_state',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">random_state&nbsp;</td>\n",
" <td class=\"value\">None</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('solver',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">solver&nbsp;</td>\n",
" <td class=\"value\">&#x27;lbfgs&#x27;</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('max_iter',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">max_iter&nbsp;</td>\n",
" <td class=\"value\">100</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('multi_class',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">multi_class&nbsp;</td>\n",
" <td class=\"value\">&#x27;deprecated&#x27;</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('verbose',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">verbose&nbsp;</td>\n",
" <td class=\"value\">0</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('warm_start',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">warm_start&nbsp;</td>\n",
" <td class=\"value\">False</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('n_jobs',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">n_jobs&nbsp;</td>\n",
" <td class=\"value\">None</td>\n",
" </tr>\n",
" \n",
"\n",
" <tr class=\"default\">\n",
" <td><i class=\"copy-paste-icon\"\n",
" onclick=\"copyToClipboard('l1_ratio',\n",
" this.parentElement.nextElementSibling)\"\n",
" ></i></td>\n",
" <td class=\"param\">l1_ratio&nbsp;</td>\n",
" <td class=\"value\">None</td>\n",
" </tr>\n",
" \n",
" </tbody>\n",
" </table>\n",
" </details>\n",
" </div>\n",
" </div></div></div></div></div><script>function copyToClipboard(text, element) {\n",
" // Get the parameter prefix from the closest toggleable content\n",
" const toggleableContent = element.closest('.sk-toggleable__content');\n",
" const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
" const fullParamName = paramPrefix ? `${paramPrefix}${text}` : text;\n",
"\n",
" const originalStyle = element.style;\n",
" const computedStyle = window.getComputedStyle(element);\n",
" const originalWidth = computedStyle.width;\n",
" const originalHTML = element.innerHTML.replace('Copied!', '');\n",
"\n",
" navigator.clipboard.writeText(fullParamName)\n",
" .then(() => {\n",
" element.style.width = originalWidth;\n",
" element.style.color = 'green';\n",
" element.innerHTML = \"Copied!\";\n",
"\n",
" setTimeout(() => {\n",
" element.innerHTML = originalHTML;\n",
" element.style = originalStyle;\n",
" }, 2000);\n",
" })\n",
" .catch(err => {\n",
" console.error('Failed to copy:', err);\n",
" element.style.color = 'red';\n",
" element.innerHTML = \"Failed!\";\n",
" setTimeout(() => {\n",
" element.innerHTML = originalHTML;\n",
" element.style = originalStyle;\n",
" }, 2000);\n",
" });\n",
" return false;\n",
"}\n",
"\n",
"document.querySelectorAll('.fa-regular.fa-copy').forEach(function(element) {\n",
" const toggleableContent = element.closest('.sk-toggleable__content');\n",
" const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
" const paramName = element.parentElement.nextElementSibling.textContent.trim();\n",
" const fullParamName = paramPrefix ? `${paramPrefix}${paramName}` : paramName;\n",
"\n",
" element.setAttribute('title', fullParamName);\n",
"});\n",
"</script></body>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 15
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.892241Z",
"start_time": "2025-09-21T07:51:59.884892Z"
}
},
"cell_type": "code",
"source": [
"# Make predictions\n",
"y_pred = model.predict(X_test)"
],
"id": "15510ca63305d3a8",
"outputs": [],
"execution_count": 16
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.909894Z",
"start_time": "2025-09-21T07:51:59.903069Z"
}
},
"cell_type": "code",
"source": [
"from sklearn.metrics import accuracy_score\n",
"\n",
"accuracy = accuracy_score(y_test, y_pred)\n",
"print(f\"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\")"
],
"id": "7f06c5a38d67361b",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.7473 (74.73%)\n"
]
}
],
"execution_count": 17
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.941200Z",
"start_time": "2025-09-21T07:51:59.922375Z"
}
},
"cell_type": "code",
"source": [
"from sklearn.metrics import classification_report,confusion_matrix, accuracy_score\n",
"\n",
"print(\"Classification Report:\")\n",
"print(classification_report(y_test, y_pred))\n",
"print(\"\\nConfusion Matrix:\")\n",
"print(confusion_matrix(y_test, y_pred))\n",
"# Accuracy\n"
],
"id": "17e1612821887886",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" 0 0.92 0.72 0.81 1036\n",
" 1 0.51 0.82 0.63 373\n",
"\n",
" accuracy 0.75 1409\n",
" macro avg 0.72 0.77 0.72 1409\n",
"weighted avg 0.81 0.75 0.76 1409\n",
"\n",
"\n",
"Confusion Matrix:\n",
"[[749 287]\n",
" [ 69 304]]\n"
]
}
],
"execution_count": 18
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-21T07:51:59.968133Z",
"start_time": "2025-09-21T07:51:59.955198Z"
}
},
"cell_type": "code",
"source": [
"# Quick check\n",
"train_acc = model.score(X_train, y_train)\n",
"test_acc = model.score(X_test, y_test)\n",
"\n",
"print(f\"Training Accuracy: {train_acc:.4f}\")\n",
"print(f\"Test Accuracy: {test_acc:.4f}\")\n",
"print(f\"Difference: {train_acc - test_acc:.4f}\")\n",
"\n",
"if train_acc - test_acc > 0.1: # 10% difference\n",
" print(\"OVERFITTING: Training accuracy much higher than test!\")\n",
"elif train_acc - test_acc > 0.05: # 5% difference\n",
" print(\"Possible overfitting\")\n",
"else:\n",
" print(\"Good generalization\")"
],
"id": "6733660e55b07d6a",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training Accuracy: 0.7448\n",
"Test Accuracy: 0.7473\n",
"Difference: -0.0026\n",
"Good generalization\n"
]
}
],
"execution_count": 19
},
{
"metadata": {},
"cell_type": "markdown",
"source": "##### No Churn: 0.72 → Model correctly identifies 72% of customers who actually didn't churn. Churn: 0.82 → Model correctly identifies 82% of customers who actually churned this we get from the recall value that is currently needed for the model ,So i given more priority for Recall than the precision.In precision for Churned customers we are getting only 51% accuracy that means we are doing false alarms on our loayal customers but according to business we got a recall of 82% that is we are able to find the customers who is going to churn.",
"id": "45ee6471491777de"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "##### precision-whats my prediction and what has happend really ie,if i predicted 100 and i got correct as 51 so my precision is 51%",
"id": "b071e7356fcbce1b"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "##### Recall-whats has orginally happend and inside that how much i predicted ie,if there are 100 actually churned and i got a prediction of 82 so my recall is 82%",
"id": "b4a0bf637e18cc7e"
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"\n",
"##### So in my perspective Recall is more important for this modal because its identifying 82% percent of churned ones\n"
],
"id": "695ecdd9397942da"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "##### Model is very good in predicting the churned ones so that we can",
"id": "851da7895fe038d8"
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"#### Over all performance of the model is 75% ie,our model correctly predicts 75% of all customers.For non churning customers Precision: 92% - When you predict \"won't churn\", we are almost always right.Recall: 72% - You catch 72% of customers who actually stay.Our Model is giving Strong performance for identifying loyal customers.For Churned customers Precision: 51% - Only half your churn predictions are correct Recall: 82% - our catches 82% of customers who actually churn Good at finding churners, but many false alarms.\n",
"#### Strong areas of our model are\n",
"##### 1).Excellent churn detection (82% recall) - You won't miss many leaving customers\n",
"##### 2). High confidence in loyal customers (92% precision for non-churn)\n",
"##### 3).Actionable for retention campaigns\n"
],
"id": "ada9ea863b911e24"
}
],
"metadata": {
"language_info": {
"name": "python"
},
"kernelspec": {
"name": "python3",
"language": "python",
"display_name": "Python 3 (ipykernel)"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
This source diff could not be displayed because it is too large. You can view the blob instead.
next-100-gen-ai-programme @ f6dbd759
Subproject commit f6dbd759e6e418b8f0d875b0e92c6c078a111776
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment