In [None]:
%pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.5.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.5.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.5.0 scikit-optimize-0.10.2


In [3]:
from google.colab import files
uploaded = files.upload()

Saving archive.zip to archive.zip


In [5]:
%pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.5.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.5.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.5.0 scikit-optimize-0.10.2


In [15]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from skopt import BayesSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# 1️⃣ Load data
df = pd.read_csv('archive.zip')
print("Original Data")
print(df.head())
print("\nShape:", df.shape)


# 2️⃣ Preprocess
# Fill missing Age with median
df['Age'] = df['Age'].fillna(df['Age'].median())
# Fill Embarked with most frequent
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
# Encode Sex and Embarked
for col in ['Sex', 'Embarked']:
    df[col] = LabelEncoder().fit_transform(df[col])
    print("\n After Preprocessing")
print(df.head())
print("\nShape:", df.shape)


# 3️⃣ Select features and target
features = ['Pclass', 'Sex', 'Age', 'sibsp', 'Parch', 'Fare', 'Embarked']
X = df[features]
y = df['2urvived']

# 4️⃣ Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 5️⃣ Define LightGBM and hyperparameter search space
lgbm = lgb.LGBMClassifier(objective='binary', n_estimators=200, random_state=42)
search_spaces = {
    'num_leaves': (20, 100),
    'max_depth': (3, 12),
    'learning_rate': (0.01, 0.3, 'log-uniform'),
    'min_child_samples': (5, 100),
    'subsample': (0.5, 1.0, 'uniform'),
    'colsample_bytree': (0.5, 1.0, 'uniform')
}

# 6️⃣ Setup BayesSearchCV
opt = BayesSearchCV(
    estimator=lgbm,
    search_spaces=search_spaces,
    n_iter=30,
    cv=3,
    scoring='accuracy',
    random_state=42,
    n_jobs=-1  # speed up with parallel jobs
)

# 7️⃣ Fit optimizer
opt.fit(X_train, y_train)

# 8️⃣ Evaluate & display results
print(" Best CV Accuracy:", round(opt.best_score_, 4))
print(" Best Hyperparameters:", opt.best_params_)

# 9️⃣ Final evaluation on test set
best_model = opt.best_estimator_
y_pred = best_model.predict(X_test)
test_acc = accuracy_score(y_test, y_pred)
print(" Test Set Accuracy:", round(test_acc, 4))

Original Data
   Passengerid   Age     Fare  Sex  sibsp  zero  zero.1  zero.2  zero.3  \
0            1  22.0   7.2500    0      1     0       0       0       0   
1            2  38.0  71.2833    1      1     0       0       0       0   
2            3  26.0   7.9250    1      0     0       0       0       0   
3            4  35.0  53.1000    1      1     0       0       0       0   
4            5  35.0   8.0500    0      0     0       0       0       0   

   zero.4  ...  zero.12  zero.13  zero.14  Pclass  zero.15  zero.16  Embarked  \
0       0  ...        0        0        0       3        0        0       2.0   
1       0  ...        0        0        0       1        0        0       0.0   
2       0  ...        0        0        0       3        0        0       2.0   
3       0  ...        0        0        0       1        0        0       2.0   
4       0  ...        0        0        0       3        0        0       2.0   

   zero.17  zero.18  2urvived  
0        0      