import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.metrics import silhouette_score from sklearn.cluster import KMeans features = ["Flipper Length (mm)", "Culmen Length (mm)"] penguins = pd.read_csv("penguins.csv", usecols=features) '''print(penguins.head()) print(penguins.info()) print(penguins.describe()) sns.pairplot(penguins.dropna()) plt.show()''' # gibt es fehlende Werte? #print(penguins.isnull().sum()) penguins.fillna(penguins.mean(numeric_only=True), inplace=True) #print(penguins.isnull().sum()) X = penguins scaler = StandardScaler() X_scaled = scaler.fit_transform(X) inertias = [] k_values = range(2, 7) for k in k_values: model = KMeans(n_clusters=k) model.fit(X_scaled) inertias.append(model.inertia_) df_inertias = pd.DataFrame(dict(x=k_values, y=inertias)) df_inertias.plot(x="x", y="y", xticks=k_values, grid=True, figsize=(8,6), xlabel="k", ylabel="inertia") plt.show() kmeans = KMeans(n_clusters=3) clusters = kmeans.fit_predict(X_scaled) silhouette_kmeans = silhouette_score(X_scaled, clusters) print("KMEans Silhouette", silhouette_kmeans)