5. Created a menu-driven function

For users

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from faker import Faker
import random

# ==================== Initialize Faker & Set Seed ====================
fake = Faker()
random.seed(42)
fake.seed_instance(42)  # Ensures reproducibility

# ==================== Generate Fake User Data ====================
users = []
for i in range(1, 21):
    user = {
        "user_id": "PWR" + str(100 + i),
        "name": fake.name(),
        "age": random.randint(18, 60),
        "country": fake.country(),
        "group": random.choice(["A", "B"]),
        "clicks": random.randint(0, 20),
        "converted": random.choice([0, 1])
    }
    users.append(user)

# Create DataFrame
df = pd.DataFrame(users)

# Shift index to start from 1
df.index = df.index + 1

# Save dataset
df.to_csv("ab_testing_data.csv", index=False)

print("="*50)
print("Wohoo! Data saved successfully, 1/3rd of your project is complete!")

# ==================== Load Dataset for Analysis ====================
print("\n" + "="*50)
print("Loading and Analyzing Dataset with Pandas and NumPy")
print("="*50)

df = pd.read_csv("ab_testing_data.csv")  # Load dataset

# ==================== Summary Statistics ====================
print("\n" + "="*50)
print("Summary Stats (Numerical Data)")
print(df.describe())  
print("="*50)

# ==================== NumPy-Based Statistical Analysis ====================
print("\n" + "="*50)
print("NumPy-Based Statistical Analysis")
print("="*50)

clicks = df["clicks"].values  # Convert Pandas column to NumPy array

# NumPy Calculations
print(f"Average Clicks per User: {np.mean(clicks):.2f}")
print(f"Median Clicks per User: {np.median(clicks)}")
print(f"Click Standard Deviation: {np.std(clicks):.2f}")
print(f"Click Variance: {np.var(clicks):.2f}")
print(f"Min Clicks: {np.min(clicks)}, Max Clicks: {np.max(clicks)}")
print(f"25th Percentile Clicks: {np.percentile(clicks, 25)}")
print(f"75th Percentile Clicks: {np.percentile(clicks, 75)}")
print("="*50)

# ==================== A/B Test Analysis ====================
print("\n" + "="*50)
print("A/B Test Analysis (Average Clicks & Conversion Rates)")
print("="*50)

group_analysis = df.groupby("group").agg({
    "clicks": "mean",
    "converted": "mean"
})
print(group_analysis)
print("="*50)

# ==================== Data Visualization ====================

# Set Seaborn style
sns.set_style("whitegrid")

# 1️⃣ Histogram of Clicks
def histogram_of_clicks():
    plt.figure(figsize=(8,5))
    sns.histplot(df["clicks"], bins=10, kde=True, color="skyblue")
    plt.title("Distribution of Clicks per User")
    plt.xlabel("Number of Clicks")
    plt.ylabel("Frequency")
    plt.show()

# 2️⃣ Box Plot for Clicks
def box_plot_for_clicks():
    plt.figure(figsize=(6,5))
    sns.boxplot(x=df["clicks"], color="orange")
    plt.title("Box Plot of Clicks per User")
    plt.xlabel("Number of Clicks")
    plt.show()

# 3️⃣ Bar Chart for A/B Group Clicks
def bar_chart_for_group_clicks():
    plt.figure(figsize=(6,5))
    sns.barplot(x=df["group"], y=df["clicks"], estimator=np.mean, palette="viridis")
    plt.title("Average Clicks by A/B Test Group")
    plt.xlabel("Test Group")
    plt.ylabel("Average Clicks")
    plt.show()

# 4️⃣ Pie Chart for Conversion Rates
def pie_chart_for_conversion_rates():
    conversion_counts = df["converted"].value_counts()
    plt.figure(figsize=(6,6))
    plt.pie(conversion_counts, labels=["Not Converted", "Converted"], autopct="%1.1f%%", colors=["red", "green"])
    plt.title("Conversion Rate Distribution")
    plt.show()

# 5️⃣ Scatter Plot (Clicks vs Age)
def scatter_plot():
    plt.figure(figsize=(8,5))
    sns.scatterplot(x=df["age"], y=df["clicks"], hue=df["group"], palette="coolwarm")
    plt.title("Clicks vs Age (A/B Groups)")
    plt.xlabel("Age")
    plt.ylabel("Number of Clicks")
    plt.show()

# ==================== Save Cleaned Data ====================
df.to_csv("cleaned_ab_testing_data_numpy.csv", index=False)

print("\n" + "="*50)
print("Wohoo! You just learned how to use NumPy, Pandas & Matplotlib together and completed 3/3rd of your overall project! 🚀")
print("="*50)

# ==================== Menu for user interaction ====================
while(True):
    print("\n1. Histogram of Clicks")
    print("2. Box Plot for Clicks")
    print("3. Bar Chart for A/B Group Clicks")
    print("4. Pie Chart for Conversion Rates")
    print("5.Scatter Plot (Clicks vs Age")
    print("6. Exit the program")

    choice = str(input("Choose an option (1/2/3/4/5/6): "))

    print(f"User entered: {choice}")
    if choice == "1":
        print("Calling Histogram of Clicks")
        histogram_of_clicks()
    elif choice == "2":
        print("Calling Box Plot for Clicks")
        box_plot_for_clicks()
    elif choice == "3":
        print("Calling Bar Chart for Group Clicks")
        bar_chart_for_group_clicks()
    elif choice == "4":
        print("Calling Pie Chart for Conversion Rates")
        pie_chart_for_conversion_rates()  
    elif choice == "5":
        print("Calling Scatter Plot")
        scatter_plot()
        break
    elif choice == "6":
        print("Exiting program. Goodbye!")
    else:
	    print("Invalid choice! Please enter 1,2,3,4,5 or 6.")

Last updated