cds1011-ls2/py/cartesian.py

from py.my_functions import *

# create dataframe from csv and drop any row with null values
def load_dataframe(file_path):
    try:
        colum_list = FEATURES
        df = pd.read_csv(file_path, usecols = colum_list).dropna()
        return df
    except FileNotFoundError as error:
        print(error)
        quit()

def get_score_from_cli():
    try:
        x = float(input("x: "))
        y = float(input("y: "))
        return np.array([x, y]).reshape(1, -1)
    except ValueError:
        print("Invalid input. Please enter numeric values.")
        return None

def cartesian(file_path, inf, graph):

    # load dataframe with argument [1]
    df = load_dataframe(file_path)

    # print dataframe information if argument [3] is true
    if inf:
        print(df.describe())
        print(df.head())
        print(df.head().info())

    # display graphs if argument [4] is true
    if graph:
        sns.countplot(x = df["points"])
        plt.show()

        sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
        plt.show()

        sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
        plt.show()

    features = ["x", "y"]
    X = df[features]

    y = pd.get_dummies(df['points'])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

    random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
    decision_tree = DecisionTreeClassifier(random_state=0)
    k_neighbors = KNeighborsClassifier(n_neighbors=5)

    models = {
        "Random Forest Classifier": random_forest,
        "Decision Tree Classifier": decision_tree,
        "K-Neighbors": k_neighbors
    }

    for name, model in models.items():
        model.fit(X_train.values, y_train.values)

    for name, model in models.items():
        pred = model.predict(X_test.values)

        my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
        print(f'My F1 score of {name} is {my_f1_macro_score}')

        f1_sklearn = f1_score(y_test.values, pred, average='macro')
        print(f'Sklearn F1 score of {name} is {f1_sklearn}')

    score = get_score_from_cli()

    label_encoder = LabelEncoder()
    df["points"] = label_encoder.fit_transform(df["points"])

    for name, model in models.items():
        pred = model.predict(score)
        points_number = pd.DataFrame(pred).idxmax(axis=1)
        points = label_encoder.inverse_transform(points_number)[0]
        print(f"{name}: {points} Punkte")