{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Data Processing\n", "import pandas as pd\n", "import numpy as np\n", "\n", "# Data Visualization\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "# Training / Evaluation\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.metrics import f1_score" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Loading Data\n", "dataset_path = 'datasets/UCI HAR Dataset/'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Load feature names\n", "features = pd.read_csv(f'{dataset_path}features.txt', sep='\\s+', names=['feature_id', 'feature_name'])\n", "feature_names = features['feature_name']\n", "\n", "# Creating a unique name for \"duplicate\" feature names (example: fBodyAcc-bandsEnergy()-1,8 )\n", "name_count = {}\n", "unique_feature_names = []\n", "\n", "for name in feature_names:\n", " if name in name_count:\n", " name_count[name] += 1\n", " unique_feature_names.append(f\"{name}_{name_count[name]}\")\n", " else:\n", " name_count[name] = 0\n", " unique_feature_names.append(name)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Loading training data\n", "X_train = pd.read_csv(f'{dataset_path}train/X_train.txt', sep='\\s+', names=unique_feature_names)\n", "y_train = pd.read_csv(f'{dataset_path}train/y_train.txt', sep='\\s+', names=['Activity'])\n", "\n", "# Loading testing data\n", "X_test = pd.read_csv(f'{dataset_path}test/X_test.txt', sep='\\s+', names=unique_feature_names)\n", "y_test = pd.read_csv(f'{dataset_path}test/y_test.txt', sep='\\s+', names=['activity'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Activity | \n", "
---|---|
0 | \n", "5 | \n", "
1 | \n", "5 | \n", "
2 | \n", "5 | \n", "
3 | \n", "5 | \n", "
4 | \n", "5 | \n", "
\n", " | tBodyAcc-mean()-X | \n", "tBodyAcc-mean()-Y | \n", "tBodyAcc-mean()-Z | \n", "tBodyAcc-std()-X | \n", "tBodyAcc-std()-Y | \n", "tBodyAcc-std()-Z | \n", "tBodyAcc-mad()-X | \n", "tBodyAcc-mad()-Y | \n", "tBodyAcc-mad()-Z | \n", "tBodyAcc-max()-X | \n", "... | \n", "fBodyBodyGyroJerkMag-meanFreq() | \n", "fBodyBodyGyroJerkMag-skewness() | \n", "fBodyBodyGyroJerkMag-kurtosis() | \n", "angle(tBodyAccMean,gravity) | \n", "angle(tBodyAccJerkMean),gravityMean) | \n", "angle(tBodyGyroMean,gravityMean) | \n", "angle(tBodyGyroJerkMean,gravityMean) | \n", "angle(X,gravityMean) | \n", "angle(Y,gravityMean) | \n", "angle(Z,gravityMean) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.288585 | \n", "-0.020294 | \n", "-0.132905 | \n", "-0.995279 | \n", "-0.983111 | \n", "-0.913526 | \n", "-0.995112 | \n", "-0.983185 | \n", "-0.923527 | \n", "-0.934724 | \n", "... | \n", "-0.074323 | \n", "-0.298676 | \n", "-0.710304 | \n", "-0.112754 | \n", "0.030400 | \n", "-0.464761 | \n", "-0.018446 | \n", "-0.841247 | \n", "0.179941 | \n", "-0.058627 | \n", "
1 | \n", "0.278419 | \n", "-0.016411 | \n", "-0.123520 | \n", "-0.998245 | \n", "-0.975300 | \n", "-0.960322 | \n", "-0.998807 | \n", "-0.974914 | \n", "-0.957686 | \n", "-0.943068 | \n", "... | \n", "0.158075 | \n", "-0.595051 | \n", "-0.861499 | \n", "0.053477 | \n", "-0.007435 | \n", "-0.732626 | \n", "0.703511 | \n", "-0.844788 | \n", "0.180289 | \n", "-0.054317 | \n", "
2 | \n", "0.279653 | \n", "-0.019467 | \n", "-0.113462 | \n", "-0.995380 | \n", "-0.967187 | \n", "-0.978944 | \n", "-0.996520 | \n", "-0.963668 | \n", "-0.977469 | \n", "-0.938692 | \n", "... | \n", "0.414503 | \n", "-0.390748 | \n", "-0.760104 | \n", "-0.118559 | \n", "0.177899 | \n", "0.100699 | \n", "0.808529 | \n", "-0.848933 | \n", "0.180637 | \n", "-0.049118 | \n", "
3 | \n", "0.279174 | \n", "-0.026201 | \n", "-0.123283 | \n", "-0.996091 | \n", "-0.983403 | \n", "-0.990675 | \n", "-0.997099 | \n", "-0.982750 | \n", "-0.989302 | \n", "-0.938692 | \n", "... | \n", "0.404573 | \n", "-0.117290 | \n", "-0.482845 | \n", "-0.036788 | \n", "-0.012892 | \n", "0.640011 | \n", "-0.485366 | \n", "-0.848649 | \n", "0.181935 | \n", "-0.047663 | \n", "
4 | \n", "0.276629 | \n", "-0.016570 | \n", "-0.115362 | \n", "-0.998139 | \n", "-0.980817 | \n", "-0.990482 | \n", "-0.998321 | \n", "-0.979672 | \n", "-0.990441 | \n", "-0.942469 | \n", "... | \n", "0.087753 | \n", "-0.351471 | \n", "-0.699205 | \n", "0.123320 | \n", "0.122542 | \n", "0.693578 | \n", "-0.615971 | \n", "-0.847865 | \n", "0.185151 | \n", "-0.043892 | \n", "
5 rows × 561 columns
\n", "