import math import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.multiclass import OneVsOneClassifier from sklearn.multiclass import OneVsRestClassifier from sklearn.datasets import fetch_openml from sklearn.linear_model import SGDClassifier from sklearn.model_selection import cross_val_predict from sklearn.metrics import precision_score, recall_score, f1_score # Datensatz herunterladen print("✅ Datensatz herunterladen") mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto') mnist.keys() X, y = mnist["data"], mnist["target"] X.shape # Ziffer aus dem Datensatz: 5 print("✅ Ziffer aus dem Datensatz: 5") some_digit = X[0] some_digit_image = some_digit.reshape(28, 28) plt.imshow(some_digit_image, cmap=mpl.cm.binary) plt.show() # Ziffer aus dem Datensatz: 0 print("✅ Ziffer aus dem Datensatz: 0") some_other_digit = X[1] some_other_digit_image = some_other_digit.reshape(28, 28) plt.imshow(some_other_digit_image, cmap=mpl.cm.binary) plt.show() # Label print("✅ Label") print(y[0]) y = y.astype(np.uint8) # Zahlen Matrix print("✅ Zahlen Matrix") i = 1 for number in some_digit: #28 Spalten if i < 28: if number > 0: print("\x1b[31m{:03d}".format(math.trunc(number.item())), end = '\x1b[0m ') else: print("{:03d}".format(math.trunc(number.item())), end = ' ') else: print("{:03d}".format(math.trunc(number.item()))) i = 0 i = i+1 # Train-Test-Split print("✅ Train-Test-Split") X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:] # Testdaten vorbereiten für die Klassifikation der Ziffer 5 print("✅ Testdaten vorbereiten für die Klassifikation der Ziffer 5") y_train_5 = (y_train == 5) y_test_5 = (y_test == 5) print(y_train_5) # Logistische Regression zur binären Klassifikation (Ziffer aus dem Datensatz: 5) print("✅ Logistische Regression zur binären Klassifikation") model_log = SGDClassifier(loss="log_loss", max_iter=1000, tol=1e-3, random_state=42) model_log.fit(X_train, y_train_5) model_log.predict([some_digit]) # Support Vector Machine zur binären Klassifikation (Ziffer aus dem Datensatz: 0) print("✅ Support Vector Machine zur binären Klassifikation") model_hinge = SGDClassifier(loss="hinge", max_iter=1000, tol=1e-3, random_state=42) model_hinge.fit(X_train, y_train_5) model_hinge.predict([some_other_digit]) # Evaluation print("✅ Evaluation") model = model_hinge y_train_pred = cross_val_predict(model_hinge, X_train, y_train_5, cv=3) y_test_pred = cross_val_predict(model_hinge, X_test, y_test_5, cv=3) #precision_score(y_train_5, y_train_pred) precision_score(y_test_5, y_test_pred) #recall_score(y_train_5, y_train_pred) recall_score(y_test_5, y_test_pred) #f1_score(y_train_5, y_train_pred) f1_score(y_test_5, y_test_pred) # One-versus-One (OvO) print("✅ One-versus-One (OvO)") model_ovo = OneVsOneClassifier(SVC(gamma="auto", random_state=42)) model_ovo.fit(X_train[:100], y_train[:100]) model_ovo.predict([some_digit]) # One-versus-the-Rest (OvR) print("✅ One-versus-the-Rest (OvR)") model_ovr = OneVsRestClassifier(SVC(gamma="auto", random_state=42)) model_ovr.fit(X_train[:100], y_train[:100]) model_ovr.predict([some_digit]) # Multilabel Classification print("✅ Multilabel Classification") y_train_large = (y_train >= 7) # grosse ziffern (7,8,9) y_train_odd = (y_train % 2 == 1) # ungerade = true, gerade = false y_multilabel = np.c_[y_train_large, y_train_odd] # 1-D array als spalte in a 2-D array konvertieren model_knn = KNeighborsClassifier() model_knn.fit(X_train, y_multilabel) # Multiclass Multioutput Classification print("✅ Multiclass Multioutput Classification") model_svc = SVC(gamma="auto", random_state=42) model_svc.fit(X_train[:1000], y_train[:1000]) # y_train, not y_train_5 model_svc.predict([some_digit]) model_svc.classes_