316 lines
7.4 KiB
Plaintext
316 lines
7.4 KiB
Plaintext
|
---
|
||
|
title: "Quantitative Methods HS22"
|
||
|
author: "Marc Gauch"
|
||
|
date: "`r Sys.Date()`"
|
||
|
output:
|
||
|
html_document:
|
||
|
toc: true
|
||
|
toc_depth: 2
|
||
|
toc_float: true
|
||
|
number_sections: true
|
||
|
---
|
||
|
|
||
|
```{r setup, include=FALSE}
|
||
|
knitr::opts_chunk$set(echo = TRUE)
|
||
|
```
|
||
|
|
||
|
# Preparation
|
||
|
|
||
|
```{r, message=FALSE}
|
||
|
if (!require(tidyverse)){
|
||
|
install.packages("tidyverse")
|
||
|
library(tidyverse)
|
||
|
}
|
||
|
```
|
||
|
|
||
|
# Load Data
|
||
|
## Load from CSV
|
||
|
``` {r loadData}
|
||
|
litdata <- read_csv("DataLit_R.csv", show_col_types = FALSE)
|
||
|
litdata <- as_tibble(litdata)
|
||
|
```
|
||
|
|
||
|
## First inspection of data {.tabset}
|
||
|
### Summary
|
||
|
``` {r}
|
||
|
summary(litdata)
|
||
|
```
|
||
|
### Glimpse
|
||
|
``` {r}
|
||
|
glimpse(litdata)
|
||
|
```
|
||
|
### Print
|
||
|
``` {r}
|
||
|
print(litdata)
|
||
|
```
|
||
|
### Head
|
||
|
``` {r}
|
||
|
head(litdata)
|
||
|
```
|
||
|
|
||
|
# Data cleaning
|
||
|
## Converting Strings to numbers and *Keine Antwort* zu *NaN*
|
||
|
``` {r}
|
||
|
litdata <- litdata %>%
|
||
|
mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
|
||
|
mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
|
||
|
mutate_all(~ replace(., . == "Keine Antwort-", NaN))
|
||
|
```
|
||
|
|
||
|
## Make it numeric
|
||
|
The following code will **NOT** be run. The Idea is to show a way to automatically edit all columns. It works but some columns are NOT numeric.
|
||
|
```{r, eval=FALSE}
|
||
|
# All colnames that exist
|
||
|
litdataColnames <- colnames(litdata)
|
||
|
# the ones we don't want to change
|
||
|
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
|
||
|
# the colnames that should be changed
|
||
|
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
|
||
|
print(litdataColsToMakeNumeric)
|
||
|
litdataColsToMakeNumeric <- c("R1")
|
||
|
for (col in litdataColsToMakeNumeric) {
|
||
|
litdata[[col]] <- as.numeric(litdata[[col]])
|
||
|
}
|
||
|
```
|
||
|
|
||
|
First we rename all the columns
|
||
|
```{r}
|
||
|
litdata <- litdata %>% rename(
|
||
|
"A1" = "W001",
|
||
|
"A2" = "W002",
|
||
|
"A3" = "W003",
|
||
|
"A4" = "W004",
|
||
|
"A5" = "W005",
|
||
|
"A6" = "W006",
|
||
|
"A7" = "W007",
|
||
|
"A8" = "W008",
|
||
|
"A9" = "W009",
|
||
|
|
||
|
"B1" = "K001",
|
||
|
"B2" = "K002",
|
||
|
"B3" = "K003",
|
||
|
"B4" = "K004",
|
||
|
"B5" = "K005",
|
||
|
"B6" = "K006",
|
||
|
"B7" = "K007",
|
||
|
"B8" = "K008",
|
||
|
"B9" = "K009",
|
||
|
|
||
|
"C1_1" = "TK001_01",
|
||
|
"C1_2" = "TK001_02",
|
||
|
"C1_3" = "TK001_03",
|
||
|
"C1_4" = "TK001_04",
|
||
|
|
||
|
"C2_1" = "TK002_01",
|
||
|
"C2_2" = "TK002_02",
|
||
|
"C2_3" = "TK002_03",
|
||
|
"C2_4" = "TK002_04",
|
||
|
|
||
|
"C3_1" = "TK003_01",
|
||
|
"C3_2" = "TK003_02",
|
||
|
"C3_3" = "TK003_03",
|
||
|
"C3_4" = "TK003_04",
|
||
|
|
||
|
"C4_1" = "TK004_01",
|
||
|
"C4_2" = "TK004_02",
|
||
|
"C4_3" = "TK004_03",
|
||
|
"C4_4" = "TK004_04",
|
||
|
|
||
|
"C5_1" = "TK005_01",
|
||
|
"C5_2" = "TK005_02",
|
||
|
"C5_3" = "TK005_03",
|
||
|
"C5_4" = "TK005_04",
|
||
|
|
||
|
"C6_1" = "TK006_01",
|
||
|
"C6_2" = "TK006_02",
|
||
|
"C6_3" = "TK006_03",
|
||
|
"C6_4" = "TK006_04",
|
||
|
|
||
|
"D1_1" = "H001_001",
|
||
|
"D1_2" = "H001_002",
|
||
|
"D1_3" = "H001_003",
|
||
|
"D1_4" = "H001_004",
|
||
|
"D1_5" = "H001_005",
|
||
|
"D1_6" = "H001_006",
|
||
|
"D1_7" = "H001_007",
|
||
|
|
||
|
"D2" = "H002",
|
||
|
|
||
|
"D3" = "H003",
|
||
|
|
||
|
"D4" = "H004",
|
||
|
"D4_comment" = "H004_other",
|
||
|
|
||
|
"D5" = "H005",
|
||
|
"D5_comment" = "H005_other",
|
||
|
|
||
|
"D6" = "H006",
|
||
|
|
||
|
"D7" = "H007",
|
||
|
|
||
|
"D8" = "H008",
|
||
|
|
||
|
"E1" = "R1"
|
||
|
)
|
||
|
```
|
||
|
Then we change the datatype and fix the values
|
||
|
```{r}
|
||
|
litdata$A1 <- as.numeric(litdata$A1)
|
||
|
litdata$A2 <- as.numeric(litdata$A2)
|
||
|
litdata$A3 <- as.numeric(litdata$A3)
|
||
|
litdata$A4 <- as.numeric(litdata$A4)
|
||
|
litdata$A5 <- as.numeric(litdata$A5)
|
||
|
litdata$A6 <- as.numeric(litdata$A6)
|
||
|
litdata$A7 <- as.numeric(litdata$A7)
|
||
|
litdata$A8 <- as.numeric(litdata$A8)
|
||
|
litdata$A9 <- as.numeric(litdata$A9)
|
||
|
|
||
|
litdata$B1 <- as.numeric(litdata$B1)
|
||
|
litdata$B2 <- as.numeric(litdata$B2)
|
||
|
litdata$B3 <- as.numeric(litdata$B3)
|
||
|
litdata$B4 <- as.numeric(litdata$B4)
|
||
|
litdata$B5 <- as.numeric(litdata$B5)
|
||
|
litdata$B6 <- as.numeric(litdata$B6)
|
||
|
litdata$B7 <- as.numeric(litdata$B7)
|
||
|
litdata$B8 <- as.numeric(litdata$B8)
|
||
|
litdata$B9 <- as.numeric(litdata$B9)
|
||
|
|
||
|
litdata$C1_1 <- as.numeric(litdata$C1_1)
|
||
|
litdata$C1_2 <- as.numeric(litdata$C1_2)
|
||
|
litdata$C1_3 <- as.numeric(litdata$C1_3)
|
||
|
litdata$C1_4 <- as.numeric(litdata$C1_4)
|
||
|
litdata$C2_1 <- as.numeric(litdata$C2_1)
|
||
|
litdata$C2_2 <- as.numeric(litdata$C2_2)
|
||
|
litdata$C2_3 <- as.numeric(litdata$C2_3)
|
||
|
litdata$C2_4 <- as.numeric(litdata$C2_4)
|
||
|
litdata$C3_1 <- as.numeric(litdata$C3_1)
|
||
|
litdata$C3_2 <- as.numeric(litdata$C3_2)
|
||
|
litdata$C3_3 <- as.numeric(litdata$C3_3)
|
||
|
litdata$C3_4 <- as.numeric(litdata$C3_4)
|
||
|
litdata$C4_1 <- as.numeric(litdata$C4_1)
|
||
|
litdata$C4_2 <- as.numeric(litdata$C4_2)
|
||
|
litdata$C4_3 <- as.numeric(litdata$C4_3)
|
||
|
litdata$C4_4 <- as.numeric(litdata$C4_4)
|
||
|
litdata$C5_1 <- as.numeric(litdata$C5_1)
|
||
|
litdata$C5_2 <- as.numeric(litdata$C5_2)
|
||
|
litdata$C5_3 <- as.numeric(litdata$C5_3)
|
||
|
litdata$C5_4 <- as.numeric(litdata$C5_4)
|
||
|
litdata$C6_1 <- as.numeric(litdata$C6_1)
|
||
|
litdata$C6_2 <- as.numeric(litdata$C6_2)
|
||
|
litdata$C6_3 <- as.numeric(litdata$C6_3)
|
||
|
litdata$C6_4 <- as.numeric(litdata$C6_4)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D1_1 = ifelse(D1_1 == "Ja", TRUE, ifelse(D1_1 == "Nicht Gewählt", FALSE, D1_1)))
|
||
|
litdata$D1_1 <- as.logical(litdata$D1_1)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D1_2 = ifelse(D1_2 == "Ja", TRUE, ifelse(D1_2 == "Nicht Gewählt", FALSE, D1_2)))
|
||
|
litdata$D1_2 <- as.logical(litdata$D1_2)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D1_3 = ifelse(D1_3 == "Ja", TRUE, ifelse(D1_3 == "Nicht Gewählt", FALSE, D1_3)))
|
||
|
litdata$D1_3 <- as.logical(litdata$D1_3)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D1_4 = ifelse(D1_4 == "Ja", TRUE, ifelse(D1_4 == "Nicht Gewählt", FALSE, D1_4)))
|
||
|
litdata$D1_4 <- as.logical(litdata$D1_4)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D1_5 = ifelse(D1_5 == "Ja", TRUE, ifelse(D1_5 == "Nicht Gewählt", FALSE, D1_5)))
|
||
|
litdata$D1_5 <- as.logical(litdata$D1_5)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D1_6 = ifelse(D1_6 == "Ja", TRUE, ifelse(D1_6 == "Nicht Gewählt", FALSE, D1_6)))
|
||
|
litdata$D1_6 <- as.logical(litdata$D1_6)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D1_7 = ifelse(D1_7 == "Ja", TRUE, ifelse(D1_7 == "Nicht Gewählt", FALSE, D1_7)))
|
||
|
litdata$D1_7 <- as.logical(litdata$D1_7)
|
||
|
|
||
|
litdata <- litdata %>% mutate(D2 = ifelse(D2 == "Ja", TRUE, ifelse(D2 == "Nein", FALSE, D2)))
|
||
|
litdata$D2 <- as.logical(litdata$D2)
|
||
|
|
||
|
# skipping D3 because it's just a free text
|
||
|
|
||
|
litdata$D4 <- as.factor(litdata$D4)
|
||
|
|
||
|
# skipping D4_comment because it's a free text
|
||
|
|
||
|
litdata$D5 <- as.factor(litdata$D5)
|
||
|
|
||
|
# skipping D5_comment because it's a free text
|
||
|
|
||
|
# can't be a number as there is a 2010 or earlier option.
|
||
|
litdata$D6 <- as.factor(litdata$D6)
|
||
|
|
||
|
litdata$D7 <- as.numeric(litdata$D7)
|
||
|
|
||
|
litdata$D8 <- as.factor(litdata$D8)
|
||
|
|
||
|
# skipping E1 because it's a free text
|
||
|
|
||
|
```
|
||
|
|
||
|
|
||
|
## Second inspection of data {.tabset}
|
||
|
### Summary
|
||
|
``` {r}
|
||
|
summary(litdata)
|
||
|
```
|
||
|
### Glimpse
|
||
|
``` {r}
|
||
|
glimpse(litdata)
|
||
|
```
|
||
|
### Print
|
||
|
``` {r}
|
||
|
print(litdata)
|
||
|
```
|
||
|
### Head
|
||
|
``` {r}
|
||
|
head(litdata)
|
||
|
```
|
||
|
|
||
|
# Selbststudium 1
|
||
|
*Berechnen Sie die Häufigkeiten für die Variablen W003, K003, H001_001, H005, H007 und H008.*
|
||
|
|
||
|
## Data {.tabset}
|
||
|
```{r}
|
||
|
displayFunction1 <- function(table, column) {
|
||
|
tmp <- table[column]
|
||
|
tmp <- rename(tmp, value = all_of(column))
|
||
|
tmp <- tmp %>%
|
||
|
count(value) %>%
|
||
|
mutate(percentage = prop.table(n)*100)
|
||
|
print(tmp, n = 100)
|
||
|
ggplot(tmp,
|
||
|
aes(x = value, y=n)) +
|
||
|
geom_bar(stat = "identity") +
|
||
|
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||
|
}
|
||
|
```
|
||
|
### A3 (W003) {-}
|
||
|
```{r}
|
||
|
displayFunction1(litdata, "A3")
|
||
|
```
|
||
|
|
||
|
### B3 (K003) {-}
|
||
|
```{r}
|
||
|
displayFunction1(litdata, "B3")
|
||
|
```
|
||
|
|
||
|
### D1_1 (H001_001) {-}
|
||
|
```{r}
|
||
|
displayFunction1(litdata, "D1_1")
|
||
|
```
|
||
|
|
||
|
### D5 (H005) {-}
|
||
|
```{r}
|
||
|
displayFunction1(litdata, "D5")
|
||
|
```
|
||
|
|
||
|
### D7 (H007) {-}
|
||
|
```{r}
|
||
|
displayFunction1(litdata, "D7")
|
||
|
```
|
||
|
|
||
|
Die Warnung ist resultiert daraus, dass es sehr viele *NA* gibt.
|
||
|
|
||
|
### D8 (H008) {-}
|
||
|
```{r}
|
||
|
displayFunction1(litdata, "D8")
|
||
|
```
|