Add notebook

6504744f · Allen Bose(UST · 6504744f
Commit 6504744f authored Oct 08, 2025 by Allen Bose(UST
Show whitespace changes
Inline Side-by-side

Showing with 438 additions and 0 deletions

Customer_Sentimental_Analysis.ipynb Customer_Sentimental_Analysis.ipynb +438 -0

No files found.
--- a/Customer_Sentimental_Analysis.ipynb
+++ b/Customer_Sentimental_Analysis.ipynb
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "raw",
+   "source": "",
+   "id": "924893c1049cf0c7"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-10-08T13:32:13.291888Z",
+     "start_time": "2025-10-08T13:32:13.282373Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "reviews = [\n",
+    "    # Review 1: Positive\n",
+    "    \"I absolutely love the new QuantumX Pro camera! The picture quality is stellar and the battery life is amazing. Shipped super fast too. A++!\",\n",
+    "\n",
+    "    # Review 2: Negative with specific issue\n",
+    "    \"The SonicWave earbuds have a serious design flaw. The left earbud stopped charging after just one week. I expected better for the price. Very disappointed.\",\n",
+    "\n",
+    "    # Review 3: Mixed with a question\n",
+    "    \"The Titan smartwatch is decent. The screen is bright and the features are good, but the step counter seems inaccurate. It's off by at least 20%. Is there a way to calibrate it?\",\n",
+    "\n",
+    "    # Review 4: Negative with multiple issues\n",
+    "    \"My order for the AeroDrone was a disaster. It arrived with a broken propeller and the battery was completely dead on arrival. Customer service has been unresponsive for 3 days.\",\n",
+    "\n",
+    "    # Review 5: Positive but mentions a minor issue\n",
+    "    \"Overall, I'm happy with the PureGlow Air Purifier. It's quiet and effective. My only complaint is that the replacement filters are a bit expensive.\"\n",
+    "]"
+   ],
+   "id": "fd312978b3783683",
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-10-08T13:33:27.500868100Z",
+     "start_time": "2025-10-08T13:32:38.700941Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# Simple Tokenization Example\n",
+    "from transformers import BertTokenizer, GPT2Tokenizer\n",
+    "\n",
+    "# Load tokenizers\n",
+    "\n",
+    "gpt2_tokenizer = GPT2Tokenizer.from_pretrained(r\"C:\\Users\\281879\\Documents\\AI\\week_1_assaingmnets\\Gpt-2\")\n",
+    "bert_tokenizer = BertTokenizer.from_pretrained(r\"C:\\Users\\281879\\Documents\\AI\\week_1_assaingmnets\\BERT-Certi\")\n",
+    "\n",
+    "\n",
+    "\n",
+    "print(\"Review:\", reviews[2])\n",
+    "print(\"\\n\" + \"=\"*50)\n",
+    "\n",
+    "# Tokenize with GPT-2\n",
+    "gpt2_tokens = gpt2_tokenizer.tokenize(reviews[2])\n",
+    "print(f\"\\nGPT-2 Tokens ({len(gpt2_tokens)} tokens):\")\n",
+    "print(gpt2_tokens)\n",
+    "\n",
+    "# Tokenize with BERT\n",
+    "bert_tokens = bert_tokenizer.tokenize(reviews[2])\n",
+    "print(f\"\\nBERT Tokens ({len(bert_tokens)} tokens):\")\n",
+    "print(bert_tokens)"
+   ],
+   "id": "c9812515ebc6678f",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\281879\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Review: The Titan smartwatch is decent. The screen is bright and the features are good, but the step counter seems inaccurate. It's off by at least 20%. Is there a way to calibrate it?\n",
+      "\n",
+      "==================================================\n",
+      "\n",
+      "GPT-2 Tokens (41 tokens):\n",
+      "['The', 'ĠTitan', 'Ġsmart', 'watch', 'Ġis', 'Ġdecent', '.', 'ĠThe', 'Ġscreen', 'Ġis', 'Ġbright', 'Ġand', 'Ġthe', 'Ġfeatures', 'Ġare', 'Ġgood', ',', 'Ġbut', 'Ġthe', 'Ġstep', 'Ġcounter', 'Ġseems', 'Ġinaccurate', '.', 'ĠIt', \"'s\", 'Ġoff', 'Ġby', 'Ġat', 'Ġleast', 'Ġ20', '%.', 'ĠIs', 'Ġthere', 'Ġa', 'Ġway', 'Ġto', 'Ġcalibr', 'ate', 'Ġit', '?']\n",
+      "\n",
+      "BERT Tokens (44 tokens):\n",
+      "['the', 'titan', 'smart', '##watch', 'is', 'decent', '.', 'the', 'screen', 'is', 'bright', 'and', 'the', 'features', 'are', 'good', ',', 'but', 'the', 'step', 'counter', 'seems', 'inaccurate', '.', 'it', \"'\", 's', 'off', 'by', 'at', 'least', '20', '%', '.', 'is', 'there', 'a', 'way', 'to', 'cal', '##ib', '##rate', 'it', '?']\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##### In this the token list are not identical got GPT-2 Tokens there are 41 tokens and BERT Tokens there are 44 tokens and the splitting pattern is also different\n",
+    "##### In GPT-2 it uses the symbol Ġ  for tockenising and it refers that before this symbol space is there and in BERT it uses ## symbol and defines like one word splitted into 2\n",
+    "##### These models have distinct splitting patterns because of variation in Algorithm,training data,tried to balance how many words it can understand and how fast it can work"
+   ],
+   "id": "b547dc499b4309f9"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-10-08T13:35:36.200411Z",
+     "start_time": "2025-10-08T13:35:36.163158Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "\n",
+    "# Load environment variables from the .env file\n",
+    "load_dotenv()\n",
+    "api_key = os.getenv(\"GEMINI_API_KEY\")\n"
+   ],
+   "id": "10dbb1049b5d1966",
+   "outputs": [],
+   "execution_count": 7
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-10-08T13:36:03.447262Z",
+     "start_time": "2025-10-08T13:36:01.686080Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from google import genai\n",
+    "from google.generativeai import configure, GenerativeModel\n",
+    "client = genai.Client(api_key=api_key)\n",
+    "for m in client.models.list():\n",
+    "    print(m.name, m.supported_actions)\n",
+    "model = GenerativeModel(model_name=\"gemini-2.5-flash-lite\")\n"
+   ],
+   "id": "cee160daa377d0d2",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "models/embedding-gecko-001 ['embedText', 'countTextTokens']\n",
+      "models/gemini-2.5-pro-preview-03-25 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-flash-preview-05-20 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-flash ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-flash-lite-preview-06-17 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-pro-preview-05-06 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-pro-preview-06-05 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-pro ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-exp ['generateContent', 'countTokens', 'bidiGenerateContent']\n",
+      "models/gemini-2.0-flash ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-001 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-exp-image-generation ['generateContent', 'countTokens', 'bidiGenerateContent']\n",
+      "models/gemini-2.0-flash-lite-001 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-lite ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-preview-image-generation ['generateContent', 'countTokens', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-lite-preview-02-05 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-lite-preview ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-pro-exp ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-pro-exp-02-05 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-exp-1206 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-thinking-exp-01-21 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-thinking-exp ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.0-flash-thinking-exp-1219 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-flash-preview-tts ['countTokens', 'generateContent']\n",
+      "models/gemini-2.5-pro-preview-tts ['countTokens', 'generateContent']\n",
+      "models/learnlm-2.0-flash-experimental ['generateContent', 'countTokens']\n",
+      "models/gemma-3-1b-it ['generateContent', 'countTokens']\n",
+      "models/gemma-3-4b-it ['generateContent', 'countTokens']\n",
+      "models/gemma-3-12b-it ['generateContent', 'countTokens']\n",
+      "models/gemma-3-27b-it ['generateContent', 'countTokens']\n",
+      "models/gemma-3n-e4b-it ['generateContent', 'countTokens']\n",
+      "models/gemma-3n-e2b-it ['generateContent', 'countTokens']\n",
+      "models/gemini-flash-latest ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-flash-lite-latest ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-pro-latest ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-flash-lite ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-flash-image-preview ['generateContent', 'countTokens']\n",
+      "models/gemini-2.5-flash-image ['generateContent', 'countTokens']\n",
+      "models/gemini-2.5-flash-preview-09-2025 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-2.5-flash-lite-preview-09-2025 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']\n",
+      "models/gemini-robotics-er-1.5-preview ['generateContent', 'countTokens']\n",
+      "models/gemini-2.5-computer-use-preview-10-2025 ['generateContent', 'countTokens']\n",
+      "models/embedding-001 ['embedContent']\n",
+      "models/text-embedding-004 ['embedContent']\n",
+      "models/gemini-embedding-exp-03-07 ['embedContent', 'countTextTokens', 'countTokens']\n",
+      "models/gemini-embedding-exp ['embedContent', 'countTextTokens', 'countTokens']\n",
+      "models/gemini-embedding-001 ['embedContent', 'countTextTokens', 'countTokens', 'asyncBatchEmbedContent']\n",
+      "models/aqa ['generateAnswer']\n",
+      "models/imagen-3.0-generate-002 ['predict']\n",
+      "models/imagen-4.0-generate-preview-06-06 ['predict']\n",
+      "models/imagen-4.0-ultra-generate-preview-06-06 ['predict']\n",
+      "models/imagen-4.0-generate-001 ['predict']\n",
+      "models/imagen-4.0-ultra-generate-001 ['predict']\n",
+      "models/imagen-4.0-fast-generate-001 ['predict']\n",
+      "models/veo-2.0-generate-001 ['predictLongRunning']\n",
+      "models/veo-3.0-generate-preview ['predictLongRunning']\n",
+      "models/veo-3.0-fast-generate-preview ['predictLongRunning']\n",
+      "models/veo-3.0-generate-001 ['predictLongRunning']\n",
+      "models/veo-3.0-fast-generate-001 ['predictLongRunning']\n",
+      "models/gemini-2.5-flash-preview-native-audio-dialog ['countTokens', 'bidiGenerateContent']\n",
+      "models/gemini-2.5-flash-exp-native-audio-thinking-dialog ['countTokens', 'bidiGenerateContent']\n",
+      "models/gemini-2.0-flash-live-001 ['bidiGenerateContent', 'countTokens']\n",
+      "models/gemini-live-2.5-flash-preview ['bidiGenerateContent', 'countTokens']\n",
+      "models/gemini-2.5-flash-live-preview ['bidiGenerateContent', 'countTokens']\n",
+      "models/gemini-2.5-flash-native-audio-latest ['countTokens', 'bidiGenerateContent']\n",
+      "models/gemini-2.5-flash-native-audio-preview-09-2025 ['countTokens', 'bidiGenerateContent']\n"
+     ]
+    }
+   ],
+   "execution_count": 9
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-10-08T13:36:12.466937Z",
+     "start_time": "2025-10-08T13:36:10.142939Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from google.generativeai import configure, GenerativeModel\n",
+    "\n",
+    "# Step 2: Configure with your API key\n",
+    "configure(api_key=api_key)\n",
+    "\n",
+    "# Step 3: Create the model\n",
+    "model = GenerativeModel(model_name=\"gemini-2.0-flash-exp\");\n",
+    "\n",
+    "# Step 4: Generate content\n",
+    "response = model.generate_content(f\"\"\"from this input {reviews} i need the output in the format positive,negative and mixed review like example 1: the moview is amazing example 2: positive\n",
+    "example 2:\n",
+    "Input: servie was terrible and slow\n",
+    "output: Negative\n",
+    "example 3:\n",
+    "Input:product is okay nothing special\n",
+    "output: Mixed\"\"\")\n",
+    "\n",
+    "\n",
+    "# Step 5: Print the result\n",
+    "print(response.text)\n"
+   ],
+   "id": "59e929211ee96fa6",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Here's the classification of each review from your input, based on your provided examples:\n",
+      "\n",
+      "1.  **Input:** 'I absolutely love the new QuantumX Pro camera! The picture quality is stellar and the battery life is amazing. Shipped super fast too. A++!'\n",
+      "    **Output:** Positive\n",
+      "\n",
+      "2.  **Input:** 'The SonicWave earbuds have a serious design flaw. The left earbud stopped charging after just one week. I expected better for the price. Very disappointed.'\n",
+      "    **Output:** Negative\n",
+      "\n",
+      "3.  **Input:** \"The Titan smartwatch is decent. The screen is bright and the features are good, but the step counter seems inaccurate. It's off by at least 20%. Is there a way to calibrate it?\"\n",
+      "    **Output:** Mixed\n",
+      "\n",
+      "4.  **Input:** 'My order for the AeroDrone was a disaster. It arrived with a broken propeller and the battery was completely dead on arrival. Customer service has been unresponsive for 3 days.'\n",
+      "    **Output:** Negative\n",
+      "\n",
+      "5.  **Input:** \"Overall, I'm happy with the PureGlow Air Purifier. It's quiet and effective. My only complaint is that the replacement filters are a bit expensive.\"\n",
+      "    **Output:** Mixed\n",
+      "\n"
+     ]
+    }
+   ],
+   "execution_count": 10
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-10-08T13:36:25.269513Z",
+     "start_time": "2025-10-08T13:36:22.731210Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "\n",
+    "response = model.generate_content(f\"\"\"From this review, I want the product_name, issue_summary and sentiment (positive, negative or mixed) in a JSON format.\n",
+    "\n",
+    "Examples:\n",
+    "Input: \"The Sony Speaker is so good, but battery drains so quickly\"\n",
+    "Output:\n",
+    "{{\n",
+    "\"product_name\": \"Sony Speaker\",\n",
+    "\"issue_summary\": \"battery drains so quickly\",\n",
+    "\"sentiment\": \"Mixed\"\n",
+    "}}\n",
+    "\n",
+    "Input: \"Movie is so good\"\n",
+    "Output:\n",
+    "{{\n",
+    "\"product_name\": \"N/A\",\n",
+    "\"issue_summary\": \"N/A\",\n",
+    "\"sentiment\": \"Positive\"\n",
+    "}}\n",
+    "\n",
+    "Now analyze this review:\n",
+    "Input: \"{reviews}\"\n",
+    "Output:\"\"\")\n",
+    "\n",
+    "\n",
+    "# Step 5: Print the result\n",
+    "print(response.text)\n"
+   ],
+   "id": "eeccac983f0b013f",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "```json\n",
+      "[\n",
+      "  {\n",
+      "    \"product_name\": \"QuantumX Pro camera\",\n",
+      "    \"issue_summary\": \"N/A\",\n",
+      "    \"sentiment\": \"Positive\"\n",
+      "  },\n",
+      "  {\n",
+      "    \"product_name\": \"SonicWave earbuds\",\n",
+      "    \"issue_summary\": \"left earbud stopped charging after just one week\",\n",
+      "    \"sentiment\": \"Negative\"\n",
+      "  },\n",
+      "  {\n",
+      "    \"product_name\": \"Titan smartwatch\",\n",
+      "    \"issue_summary\": \"step counter seems inaccurate\",\n",
+      "    \"sentiment\": \"Mixed\"\n",
+      "  },\n",
+      "  {\n",
+      "    \"product_name\": \"AeroDrone\",\n",
+      "    \"issue_summary\": \"arrived with a broken propeller and the battery was completely dead on arrival. Customer service has been unresponsive\",\n",
+      "    \"sentiment\": \"Negative\"\n",
+      "  },\n",
+      "  {\n",
+      "    \"product_name\": \"PureGlow Air Purifier\",\n",
+      "    \"issue_summary\": \"replacement filters are a bit expensive\",\n",
+      "    \"sentiment\": \"Mixed\"\n",
+      "  }\n",
+      "]\n",
+      "```\n"
+     ]
+    }
+   ],
+   "execution_count": 11
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-10-08T13:48:51.613267Z",
+     "start_time": "2025-10-08T13:48:49.018569Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "\n",
+    "response = model.generate_content(f\"\"\"Analyze the following customer review to identify the root cause of their issue. First, state the main problem. Second, explain your reasoning in a single sentence. Let's think step by step.\n",
+    "from \"{reviews}\" i need to get like first the product name then state the main problem and then the reasoning\n",
+    "\n",
+    "\n",
+    "Analysis:\n",
+    "step 1 - : find main problem of that product\n",
+    "step 2 - : fin the reasoning part\"\"\")\n",
+    "\n",
+    "\n",
+    "# Step 5: Print the result\n",
+    "print(response.text)\n"
+   ],
+   "id": "bab82e5965d4830a",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Here's the breakdown of each review, identifying the product, main problem, and reasoning:\n",
+      "\n",
+      "*   **Product:** SonicWave earbuds\n",
+      "    *   **Main Problem:** Left earbud stopped charging after one week.\n",
+      "    *   **Reasoning:** The customer states the earbud failed after only a week, expressing disappointment given the price, indicating a quality control or design flaw.\n",
+      "\n",
+      "*   **Product:** Titan smartwatch\n",
+      "    *   **Main Problem:** Inaccurate step counter.\n",
+      "    *   **Reasoning:** The customer states the step counter is off by at least 20%, suggesting a calibration or sensor issue.\n",
+      "\n",
+      "*   **Product:** AeroDrone\n",
+      "    *   **Main Problem:** Arrived damaged and non-functional; unresponsive customer service.\n",
+      "    *   **Reasoning:** The drone arrived with a broken propeller and a dead battery, implying damage during shipping or a faulty product, compounded by the lack of customer service, indicating a problem with both product quality and support.\n",
+      "\n",
+      "*   **Product:** PureGlow Air Purifier\n",
+      "    *   **Main Problem:** Replacement filters are expensive.\n",
+      "    *   **Reasoning:** The customer expresses a complaint about the high cost of replacement filters, implying that the ongoing cost of maintaining the product is a concern.\n",
+      "\n"
+     ]
+    }
+   ],
+   "execution_count": 17
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "23855b3226facfaf"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}