diff --git a/Regression_model.ipynb b/Regression_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..71a3782f39339a75f7ca22ead132664506594d37 --- /dev/null +++ b/Regression_model.ipynb @@ -0,0 +1,1961 @@ +{ + "cells": [ + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.113003Z", + "start_time": "2025-09-21T07:51:57.627074Z" + } + }, + "cell_type": "code", + "source": "import pandas as pd", + "id": "1d16d3cd5fc60a5e", + "outputs": [], + "execution_count": 1 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "# Reading csv file", + "id": "702437040fee0797" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.147785Z", + "start_time": "2025-09-21T07:51:58.119581Z" + } + }, + "cell_type": "code", + "source": "churn_data=pd.read_csv(\"Telco-Customer-Churn.csv\")", + "id": "227974751e33b85e", + "outputs": [], + "execution_count": 2 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "# Displaying First 5 rows", + "id": "5ab1a0ef3de84c11" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.181377Z", + "start_time": "2025-09-21T07:51:58.156590Z" + } + }, + "cell_type": "code", + "source": "churn_data.head()", + "id": "e185fa78d4ddd856", + "outputs": [ + { + "data": { + "text/plain": [ + " customerID gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", + "0 7590-VHVEG Female 0 Yes No 1 No \n", + "1 5575-GNVDE Male 0 No No 34 Yes \n", + "2 3668-QPYBK Male 0 No No 2 Yes \n", + "3 7795-CFOCW Male 0 No No 45 No \n", + "4 9237-HQITU Female 0 No No 2 Yes \n", + "\n", + " MultipleLines InternetService OnlineSecurity ... DeviceProtection \\\n", + "0 No phone service DSL No ... No \n", + "1 No DSL Yes ... Yes \n", + "2 No DSL Yes ... No \n", + "3 No phone service DSL Yes ... Yes \n", + "4 No Fiber optic No ... No \n", + "\n", + " TechSupport StreamingTV StreamingMovies Contract PaperlessBilling \\\n", + "0 No No No Month-to-month Yes \n", + "1 No No No One year No \n", + "2 No No No Month-to-month Yes \n", + "3 Yes No No One year No \n", + "4 No No No Month-to-month Yes \n", + "\n", + " PaymentMethod MonthlyCharges TotalCharges Churn \n", + "0 Electronic check 29.85 29.85 No \n", + "1 Mailed check 56.95 1889.5 No \n", + "2 Mailed check 53.85 108.15 Yes \n", + "3 Bank transfer (automatic) 42.30 1840.75 No \n", + "4 Electronic check 70.70 151.65 Yes \n", + "\n", + "[5 rows x 21 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerIDgenderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurity...DeviceProtectionTechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurn
07590-VHVEGFemale0YesNo1NoNo phone serviceDSLNo...NoNoNoNoMonth-to-monthYesElectronic check29.8529.85No
15575-GNVDEMale0NoNo34YesNoDSLYes...YesNoNoNoOne yearNoMailed check56.951889.5No
23668-QPYBKMale0NoNo2YesNoDSLYes...NoNoNoNoMonth-to-monthYesMailed check53.85108.15Yes
37795-CFOCWMale0NoNo45NoNo phone serviceDSLYes...YesYesNoNoOne yearNoBank transfer (automatic)42.301840.75No
49237-HQITUFemale0NoNo2YesNoFiber opticNo...NoNoNoNoMonth-to-monthYesElectronic check70.70151.65Yes
\n", + "

5 rows × 21 columns

\n", + "
" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 3 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Find out is data clean", + "id": "d7cfdd6b8f0ca43b" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.209768Z", + "start_time": "2025-09-21T07:51:58.191441Z" + } + }, + "cell_type": "code", + "source": "churn_data.info()", + "id": "908d8b7da652899", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 7043 entries, 0 to 7042\n", + "Data columns (total 21 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customerID 7043 non-null object \n", + " 1 gender 7043 non-null object \n", + " 2 SeniorCitizen 7043 non-null int64 \n", + " 3 Partner 7043 non-null object \n", + " 4 Dependents 7043 non-null object \n", + " 5 tenure 7043 non-null int64 \n", + " 6 PhoneService 7043 non-null object \n", + " 7 MultipleLines 7043 non-null object \n", + " 8 InternetService 7043 non-null object \n", + " 9 OnlineSecurity 7043 non-null object \n", + " 10 OnlineBackup 7043 non-null object \n", + " 11 DeviceProtection 7043 non-null object \n", + " 12 TechSupport 7043 non-null object \n", + " 13 StreamingTV 7043 non-null object \n", + " 14 StreamingMovies 7043 non-null object \n", + " 15 Contract 7043 non-null object \n", + " 16 PaperlessBilling 7043 non-null object \n", + " 17 PaymentMethod 7043 non-null object \n", + " 18 MonthlyCharges 7043 non-null float64\n", + " 19 TotalCharges 7043 non-null object \n", + " 20 Churn 7043 non-null object \n", + "dtypes: float64(1), int64(2), object(18)\n", + "memory usage: 1.1+ MB\n" + ] + } + ], + "execution_count": 4 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Overall view of data", + "id": "84939b8e4889985e" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.234080Z", + "start_time": "2025-09-21T07:51:58.217847Z" + } + }, + "cell_type": "code", + "source": "churn_data.describe()", + "id": "e52259622789d631", + "outputs": [ + { + "data": { + "text/plain": [ + " SeniorCitizen tenure MonthlyCharges\n", + "count 7043.000000 7043.000000 7043.000000\n", + "mean 0.162147 32.371149 64.761692\n", + "std 0.368612 24.559481 30.090047\n", + "min 0.000000 0.000000 18.250000\n", + "25% 0.000000 9.000000 35.500000\n", + "50% 0.000000 29.000000 70.350000\n", + "75% 0.000000 55.000000 89.850000\n", + "max 1.000000 72.000000 118.750000" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SeniorCitizentenureMonthlyCharges
count7043.0000007043.0000007043.000000
mean0.16214732.37114964.761692
std0.36861224.55948130.090047
min0.0000000.00000018.250000
25%0.0000009.00000035.500000
50%0.00000029.00000070.350000
75%0.00000055.00000089.850000
max1.00000072.000000118.750000
\n", + "
" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 5 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.251347Z", + "start_time": "2025-09-21T07:51:58.245545Z" + } + }, + "cell_type": "code", + "source": [ + "# Convert churn column from yes/no to 1/0 and cast to int\n", + "churn_data['Churn'] = churn_data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0).astype('int')" + ], + "id": "f5389a65fde53eed", + "outputs": [], + "execution_count": 6 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.291983Z", + "start_time": "2025-09-21T07:51:58.272497Z" + } + }, + "cell_type": "code", + "source": "churn_data.head()", + "id": "c03e3641e3bb4408", + "outputs": [ + { + "data": { + "text/plain": [ + " customerID gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", + "0 7590-VHVEG Female 0 Yes No 1 No \n", + "1 5575-GNVDE Male 0 No No 34 Yes \n", + "2 3668-QPYBK Male 0 No No 2 Yes \n", + "3 7795-CFOCW Male 0 No No 45 No \n", + "4 9237-HQITU Female 0 No No 2 Yes \n", + "\n", + " MultipleLines InternetService OnlineSecurity ... DeviceProtection \\\n", + "0 No phone service DSL No ... No \n", + "1 No DSL Yes ... Yes \n", + "2 No DSL Yes ... No \n", + "3 No phone service DSL Yes ... Yes \n", + "4 No Fiber optic No ... No \n", + "\n", + " TechSupport StreamingTV StreamingMovies Contract PaperlessBilling \\\n", + "0 No No No Month-to-month Yes \n", + "1 No No No One year No \n", + "2 No No No Month-to-month Yes \n", + "3 Yes No No One year No \n", + "4 No No No Month-to-month Yes \n", + "\n", + " PaymentMethod MonthlyCharges TotalCharges Churn \n", + "0 Electronic check 29.85 29.85 0 \n", + "1 Mailed check 56.95 1889.5 0 \n", + "2 Mailed check 53.85 108.15 1 \n", + "3 Bank transfer (automatic) 42.30 1840.75 0 \n", + "4 Electronic check 70.70 151.65 1 \n", + "\n", + "[5 rows x 21 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerIDgenderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurity...DeviceProtectionTechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurn
07590-VHVEGFemale0YesNo1NoNo phone serviceDSLNo...NoNoNoNoMonth-to-monthYesElectronic check29.8529.850
15575-GNVDEMale0NoNo34YesNoDSLYes...YesNoNoNoOne yearNoMailed check56.951889.50
23668-QPYBKMale0NoNo2YesNoDSLYes...NoNoNoNoMonth-to-monthYesMailed check53.85108.151
37795-CFOCWMale0NoNo45NoNo phone serviceDSLYes...YesYesNoNoOne yearNoBank transfer (automatic)42.301840.750
49237-HQITUFemale0NoNo2YesNoFiber opticNo...NoNoNoNoMonth-to-monthYesElectronic check70.70151.651
\n", + "

5 rows × 21 columns

\n", + "
" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 7 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### convert categorical columns to 0,1 pattern", + "id": "d66c3bef301ac592" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:58.316188Z", + "start_time": "2025-09-21T07:51:58.309014Z" + } + }, + "cell_type": "code", + "source": "churn_data.drop('customerID', axis=1,inplace=True)", + "id": "bbe521ebd5d0e342", + "outputs": [], + "execution_count": 8 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.379105Z", + "start_time": "2025-09-21T07:51:58.327535Z" + } + }, + "cell_type": "code", + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "# Get categorical columns\n", + "categorical_cols = churn_data.select_dtypes(include=['object']).columns\n", + "\n", + "# Apply label encoding to all categorical columns\n", + "for col in categorical_cols:\n", + " le = LabelEncoder()\n", + " churn_data[col] = le.fit_transform(churn_data[col])\n", + "\n" + ], + "id": "716cc9d0ba146c58", + "outputs": [], + "execution_count": 9 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.401377Z", + "start_time": "2025-09-21T07:51:59.389452Z" + } + }, + "cell_type": "code", + "source": "churn_data.head()", + "id": "56b33b787138b343", + "outputs": [ + { + "data": { + "text/plain": [ + " gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", + "0 0 0 1 0 1 0 \n", + "1 1 0 0 0 34 1 \n", + "2 1 0 0 0 2 1 \n", + "3 1 0 0 0 45 0 \n", + "4 0 0 0 0 2 1 \n", + "\n", + " MultipleLines InternetService OnlineSecurity OnlineBackup \\\n", + "0 1 0 0 2 \n", + "1 0 0 2 0 \n", + "2 0 0 2 2 \n", + "3 1 0 2 0 \n", + "4 0 1 0 0 \n", + "\n", + " DeviceProtection TechSupport StreamingTV StreamingMovies Contract \\\n", + "0 0 0 0 0 0 \n", + "1 2 0 0 0 1 \n", + "2 0 0 0 0 0 \n", + "3 2 2 0 0 1 \n", + "4 0 0 0 0 0 \n", + "\n", + " PaperlessBilling PaymentMethod MonthlyCharges TotalCharges Churn \n", + "0 1 2 29.85 2505 0 \n", + "1 0 3 56.95 1466 0 \n", + "2 1 3 53.85 157 1 \n", + "3 0 0 42.30 1400 0 \n", + "4 1 2 70.70 925 1 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
genderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurityOnlineBackupDeviceProtectionTechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurn
00010101002000001229.8525050
110003410020200010356.9514660
21000210022000001353.851571
310004501020220010042.3014000
40000210100000001270.709251
\n", + "
" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 10 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.438612Z", + "start_time": "2025-09-21T07:51:59.422336Z" + } + }, + "cell_type": "code", + "source": "churn_data.corr()['Churn']", + "id": "b81d6c63c92d6d47", + "outputs": [ + { + "data": { + "text/plain": [ + "gender -0.008612\n", + "SeniorCitizen 0.150889\n", + "Partner -0.150448\n", + "Dependents -0.164221\n", + "tenure -0.352229\n", + "PhoneService 0.011942\n", + "MultipleLines 0.038037\n", + "InternetService -0.047291\n", + "OnlineSecurity -0.289309\n", + "OnlineBackup -0.195525\n", + "DeviceProtection -0.178134\n", + "TechSupport -0.282492\n", + "StreamingTV -0.036581\n", + "StreamingMovies -0.038492\n", + "Contract -0.396713\n", + "PaperlessBilling 0.191825\n", + "PaymentMethod 0.107062\n", + "MonthlyCharges 0.193356\n", + "TotalCharges 0.014479\n", + "Churn 1.000000\n", + "Name: Churn, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### 1).Contract type is your strongest predictor - focus on getting customers into longer contracts\n", + "#### 2).Add-on services (OnlineSecurity, TechSupport, OnlineBackup) significantly reduce churn\n", + "#### 3).Monthly charges matter - higher prices increase churn risk\n", + "#### 4).Services like StreamingTV/Movies have minimal impact on churn" + ], + "id": "d9e0a8f16c87a702" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## best_features are ['Contract', 'OnlineSecurity', 'TechSupport', 'MonthlyCharges','PaperlessBilling', 'OnlineBackup', 'DeviceProtection']", + "id": "ffb488811796db2" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.452206Z", + "start_time": "2025-09-21T07:51:59.448874Z" + } + }, + "cell_type": "code", + "source": [ + "best_features = ['Contract', 'OnlineSecurity', 'TechSupport', 'MonthlyCharges',\n", + " 'PaperlessBilling', 'OnlineBackup', 'DeviceProtection']" + ], + "id": "202e86d9a1fe9c7", + "outputs": [], + "execution_count": 12 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "#### Splittted dataset with like target and other columns", + "id": "3c8bc4c2b7433c91" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.478052Z", + "start_time": "2025-09-21T07:51:59.470533Z" + } + }, + "cell_type": "code", + "source": [ + "X = churn_data.drop(['Churn'], axis=1) # Drop target\n", + "y = churn_data['Churn']\n", + "\n", + "# 2. Check shapes\n", + "print(f\"Feature matrix shape: {X.shape}\")\n", + "print(f\"Target vector shape: {y.shape}\")" + ], + "id": "70692bf39df01132", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature matrix shape: (7043, 19)\n", + "Target vector shape: (7043,)\n" + ] + } + ], + "execution_count": 13 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.630037Z", + "start_time": "2025-09-21T07:51:59.499063Z" + } + }, + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y,\n", + " test_size=0.2, # 20% for testing, 80% for training\n", + " random_state=42 # For reproducible results\n", + ")" + ], + "id": "a08b36c5bab72e00", + "outputs": [], + "execution_count": 14 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.876045Z", + "start_time": "2025-09-21T07:51:59.639683Z" + } + }, + "cell_type": "code", + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "# Create and train the model\n", + "# model = LogisticRegression()\n", + "model = LogisticRegression(class_weight='balanced')\n", + "model.fit(X_train, y_train)\n" + ], + "id": "a9a1b7354e2a3cbe", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\281879\\Documents\\AI\\week_1_assaingmnets\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:473: ConvergenceWarning: lbfgs failed to converge after 100 iteration(s) (status=1):\n", + "STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT\n", + "\n", + "Increase the number of iterations to improve the convergence (max_iter=100).\n", + "You might also want to scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "data": { + "text/plain": [ + "LogisticRegression(class_weight='balanced')" + ], + "text/html": [ + "
LogisticRegression(class_weight='balanced')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 15 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.892241Z", + "start_time": "2025-09-21T07:51:59.884892Z" + } + }, + "cell_type": "code", + "source": [ + "# Make predictions\n", + "y_pred = model.predict(X_test)" + ], + "id": "15510ca63305d3a8", + "outputs": [], + "execution_count": 16 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.909894Z", + "start_time": "2025-09-21T07:51:59.903069Z" + } + }, + "cell_type": "code", + "source": [ + "from sklearn.metrics import accuracy_score\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(f\"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\")" + ], + "id": "7f06c5a38d67361b", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.7473 (74.73%)\n" + ] + } + ], + "execution_count": 17 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.941200Z", + "start_time": "2025-09-21T07:51:59.922375Z" + } + }, + "cell_type": "code", + "source": [ + "from sklearn.metrics import classification_report,confusion_matrix, accuracy_score\n", + "\n", + "print(\"Classification Report:\")\n", + "print(classification_report(y_test, y_pred))\n", + "print(\"\\nConfusion Matrix:\")\n", + "print(confusion_matrix(y_test, y_pred))\n", + "# Accuracy\n" + ], + "id": "17e1612821887886", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.92 0.72 0.81 1036\n", + " 1 0.51 0.82 0.63 373\n", + "\n", + " accuracy 0.75 1409\n", + " macro avg 0.72 0.77 0.72 1409\n", + "weighted avg 0.81 0.75 0.76 1409\n", + "\n", + "\n", + "Confusion Matrix:\n", + "[[749 287]\n", + " [ 69 304]]\n" + ] + } + ], + "execution_count": 18 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-09-21T07:51:59.968133Z", + "start_time": "2025-09-21T07:51:59.955198Z" + } + }, + "cell_type": "code", + "source": [ + "# Quick check\n", + "train_acc = model.score(X_train, y_train)\n", + "test_acc = model.score(X_test, y_test)\n", + "\n", + "print(f\"Training Accuracy: {train_acc:.4f}\")\n", + "print(f\"Test Accuracy: {test_acc:.4f}\")\n", + "print(f\"Difference: {train_acc - test_acc:.4f}\")\n", + "\n", + "if train_acc - test_acc > 0.1: # 10% difference\n", + " print(\"OVERFITTING: Training accuracy much higher than test!\")\n", + "elif train_acc - test_acc > 0.05: # 5% difference\n", + " print(\"Possible overfitting\")\n", + "else:\n", + " print(\"Good generalization\")" + ], + "id": "6733660e55b07d6a", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Accuracy: 0.7448\n", + "Test Accuracy: 0.7473\n", + "Difference: -0.0026\n", + "Good generalization\n" + ] + } + ], + "execution_count": 19 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "##### No Churn: 0.72 → Model correctly identifies 72% of customers who actually didn't churn. Churn: 0.82 → Model correctly identifies 82% of customers who actually churned this we get from the recall value that is currently needed for the model ,So i given more priority for Recall than the precision.In precision for Churned customers we are getting only 51% accuracy that means we are doing false alarms on our loayal customers but according to business we got a recall of 82% that is we are able to find the customers who is going to churn.", + "id": "45ee6471491777de" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "##### precision-whats my prediction and what has happend really ie,if i predicted 100 and i got correct as 51 so my precision is 51%", + "id": "b071e7356fcbce1b" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "##### Recall-whats has orginally happend and inside that how much i predicted ie,if there are 100 actually churned and i got a prediction of 82 so my recall is 82%", + "id": "b4a0bf637e18cc7e" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + "##### So in my perspective Recall is more important for this modal because its identifying 82% percent of churned ones\n" + ], + "id": "695ecdd9397942da" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "##### Model is very good in predicting the churned ones so that we can", + "id": "851da7895fe038d8" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### Over all performance of the model is 75% ie,our model correctly predicts 75% of all customers.For non churning customers Precision: 92% - When you predict \"won't churn\", we are almost always right.Recall: 72% - You catch 72% of customers who actually stay.Our Model is giving Strong performance for identifying loyal customers.For Churned customers Precision: 51% - Only half your churn predictions are correct Recall: 82% - our catches 82% of customers who actually churn Good at finding churners, but many false alarms.\n", + "#### Strong ares of our mode ls are\n", + "##### 1).Excellent churn detection (82% recall) - You won't miss many leaving customers\n", + "##### 2). High confidence in loyal customers (92% precision for non-churn)\n", + "##### 3).Actionable for retention campaigns\n" + ], + "id": "ada9ea863b911e24" + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "language": "python", + "display_name": "Python 3 (ipykernel)" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}