added folder classification and file classification_mnist_demo.py added folder datasets and updated README.md to explain what python libraries need to be installed for running all the scripts
This commit is contained in:
parent
fe9b12b374
commit
cb5d0149f7
@ -0,0 +1,8 @@
|
|||||||
|
This reposetory is used for storing my datasets and scripts for data science.
|
||||||
|
|
||||||
|
Install this Python libraries in your virtual environment. Use (uv) pip install ...
|
||||||
|
|
||||||
|
numpy
|
||||||
|
matplotlib
|
||||||
|
openpyxl
|
||||||
|
scikit-learn
|
||||||
119
classification/classification_mnist_demo.py
Normal file
119
classification/classification_mnist_demo.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
import math
|
||||||
|
import matplotlib as mpl
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.multiclass import OneVsOneClassifier
|
||||||
|
from sklearn.multiclass import OneVsRestClassifier
|
||||||
|
from sklearn.datasets import fetch_openml
|
||||||
|
from sklearn.linear_model import SGDClassifier
|
||||||
|
from sklearn.model_selection import cross_val_predict
|
||||||
|
from sklearn.metrics import precision_score, recall_score, f1_score
|
||||||
|
|
||||||
|
|
||||||
|
# Datensatz herunterladen
|
||||||
|
print("✅ Datensatz herunterladen")
|
||||||
|
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
|
||||||
|
mnist.keys()
|
||||||
|
|
||||||
|
X, y = mnist["data"], mnist["target"]
|
||||||
|
X.shape
|
||||||
|
|
||||||
|
# Ziffer aus dem Datensatz: 5
|
||||||
|
print("✅ Ziffer aus dem Datensatz: 5")
|
||||||
|
some_digit = X[0]
|
||||||
|
some_digit_image = some_digit.reshape(28, 28)
|
||||||
|
plt.imshow(some_digit_image, cmap=mpl.cm.binary)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# Ziffer aus dem Datensatz: 0
|
||||||
|
print("✅ Ziffer aus dem Datensatz: 0")
|
||||||
|
some_other_digit = X[1]
|
||||||
|
some_other_digit_image = some_other_digit.reshape(28, 28)
|
||||||
|
plt.imshow(some_other_digit_image, cmap=mpl.cm.binary)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# Label
|
||||||
|
print("✅ Label")
|
||||||
|
print(y[0])
|
||||||
|
y = y.astype(np.uint8)
|
||||||
|
|
||||||
|
# Zahlen Matrix
|
||||||
|
print("✅ Zahlen Matrix")
|
||||||
|
i = 1
|
||||||
|
for number in some_digit:
|
||||||
|
#28 Spalten
|
||||||
|
if i < 28:
|
||||||
|
if number > 0:
|
||||||
|
print("\x1b[31m{:03d}".format(math.trunc(number.item())), end = '\x1b[0m ')
|
||||||
|
else:
|
||||||
|
print("{:03d}".format(math.trunc(number.item())), end = ' ')
|
||||||
|
else:
|
||||||
|
print("{:03d}".format(math.trunc(number.item())))
|
||||||
|
i = 0
|
||||||
|
i = i+1
|
||||||
|
|
||||||
|
# Train-Test-Split
|
||||||
|
print("✅ Train-Test-Split")
|
||||||
|
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
|
||||||
|
|
||||||
|
# Testdaten vorbereiten für die Klassifikation der Ziffer 5
|
||||||
|
print("✅ Testdaten vorbereiten für die Klassifikation der Ziffer 5")
|
||||||
|
|
||||||
|
y_train_5 = (y_train == 5)
|
||||||
|
y_test_5 = (y_test == 5)
|
||||||
|
|
||||||
|
print(y_train_5)
|
||||||
|
|
||||||
|
# Logistische Regression zur binären Klassifikation (Ziffer aus dem Datensatz: 5)
|
||||||
|
print("✅ Logistische Regression zur binären Klassifikation")
|
||||||
|
model_log = SGDClassifier(loss="log_loss", max_iter=1000, tol=1e-3, random_state=42)
|
||||||
|
model_log.fit(X_train, y_train_5)
|
||||||
|
model_log.predict([some_digit])
|
||||||
|
|
||||||
|
# Support Vector Machine zur binären Klassifikation (Ziffer aus dem Datensatz: 0)
|
||||||
|
print("✅ Support Vector Machine zur binären Klassifikation")
|
||||||
|
model_hinge = SGDClassifier(loss="hinge", max_iter=1000, tol=1e-3, random_state=42)
|
||||||
|
model_hinge.fit(X_train, y_train_5)
|
||||||
|
model_hinge.predict([some_other_digit])
|
||||||
|
|
||||||
|
# Evaluation
|
||||||
|
print("✅ Evaluation")
|
||||||
|
model = model_hinge
|
||||||
|
y_train_pred = cross_val_predict(model_hinge, X_train, y_train_5, cv=3)
|
||||||
|
y_test_pred = cross_val_predict(model_hinge, X_test, y_test_5, cv=3)
|
||||||
|
#precision_score(y_train_5, y_train_pred)
|
||||||
|
precision_score(y_test_5, y_test_pred)
|
||||||
|
#recall_score(y_train_5, y_train_pred)
|
||||||
|
recall_score(y_test_5, y_test_pred)
|
||||||
|
#f1_score(y_train_5, y_train_pred)
|
||||||
|
f1_score(y_test_5, y_test_pred)
|
||||||
|
|
||||||
|
# One-versus-One (OvO)
|
||||||
|
print("✅ One-versus-One (OvO)")
|
||||||
|
model_ovo = OneVsOneClassifier(SVC(gamma="auto", random_state=42))
|
||||||
|
model_ovo.fit(X_train[:100], y_train[:100])
|
||||||
|
model_ovo.predict([some_digit])
|
||||||
|
|
||||||
|
# One-versus-the-Rest (OvR)
|
||||||
|
print("✅ One-versus-the-Rest (OvR)")
|
||||||
|
model_ovr = OneVsRestClassifier(SVC(gamma="auto", random_state=42))
|
||||||
|
model_ovr.fit(X_train[:100], y_train[:100])
|
||||||
|
model_ovr.predict([some_digit])
|
||||||
|
|
||||||
|
# Multilabel Classification
|
||||||
|
print("✅ Multilabel Classification")
|
||||||
|
y_train_large = (y_train >= 7) # grosse ziffern (7,8,9)
|
||||||
|
y_train_odd = (y_train % 2 == 1) # ungerade = true, gerade = false
|
||||||
|
y_multilabel = np.c_[y_train_large, y_train_odd] # 1-D array als spalte in a 2-D array konvertieren
|
||||||
|
|
||||||
|
model_knn = KNeighborsClassifier()
|
||||||
|
model_knn.fit(X_train, y_multilabel)
|
||||||
|
|
||||||
|
# Multiclass Multioutput Classification
|
||||||
|
print("✅ Multiclass Multioutput Classification")
|
||||||
|
model_svc = SVC(gamma="auto", random_state=42)
|
||||||
|
model_svc.fit(X_train[:1000], y_train[:1000]) # y_train, not y_train_5
|
||||||
|
model_svc.predict([some_digit])
|
||||||
|
model_svc.classes_
|
||||||
BIN
datasets/hardrock100_results_2022.xlsx
Normal file
BIN
datasets/hardrock100_results_2022.xlsx
Normal file
Binary file not shown.
BIN
datasets/hardrock100_results_2022_full.xlsx
Normal file
BIN
datasets/hardrock100_results_2022_full.xlsx
Normal file
Binary file not shown.
@ -56,6 +56,7 @@ def main():
|
|||||||
|
|
||||||
df["finish_seconds"] = df["finish"].apply(time_str_to_seconds)
|
df["finish_seconds"] = df["finish"].apply(time_str_to_seconds)
|
||||||
|
|
||||||
|
# create boxplot
|
||||||
df.boxplot(column=["finish_seconds"])
|
df.boxplot(column=["finish_seconds"])
|
||||||
plt.title("Verteilung der Zielzeiten in Sekunden")
|
plt.title("Verteilung der Zielzeiten in Sekunden")
|
||||||
plt.ylabel("Sekunden")
|
plt.ylabel("Sekunden")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user