1 Preparation

if (!require(tidyverse)) {
  install.packages("tidyverse")
  library(tidyverse)
}
if (!require(e1071)) {
  install.packages("e1071")
  library(e1071)
}

2 Own functions

2.1 Frequency Table ordered from wish.com

freq <- function(data, rounded_digits = 2) {
  # counts
  total_count <- length(data)
  na_count <- length(data[is.na(data)])
  valid_count <- total_count - na_count
  
  frequency <- table(data)
  p <- prop.table(frequency)
  valid_percent <- round(p * 100, digits = rounded_digits)
  na_percent <- round(na_count / length(data) * 100, digits = rounded_digits)
  
  percent_raw <- frequency/total_count*100
  percent <- round(percent_raw, digits = rounded_digits)
  cumulative_percent <- round(cumsum(p) * 100, digits = rounded_digits)
  freq_table <- cbind(frequency, percent, percent_raw, valid_percent, cumulative_percent)

  valid_percent_sum <- round(sum(as.data.frame(freq_table)$percent_raw), digits = rounded_digits)
  Valid_Total <- c(valid_count, valid_percent_sum, 100, NaN)
  
  freq_table <- subset(freq_table, select = -c(percent_raw))
  
  NAs <- c(na_count, na_percent, NaN, NaN)
  Total <- c(total_count, 100, NaN, NaN)
  print(rbind(freq_table, Valid_Total, NAs, Total))
}

Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted

2.2 Modus

getmode <- function(v) {
  uniqv <- unique(v)
  x <- tabulate(match(v, uniqv))
  uniqv[which(x == max(x))]
}

2.3 Remove NA

removeNA <- function(d){
  return(d[!is.na(d)])
}

3 Load Data

3.1 Load from CSV

litdata <- read_csv("DataLit_R.csv", show_col_types = FALSE)
litdata <- as_tibble(litdata)

3.2 First inspection of data

3.2.1 Summary

summary(litdata)
##        id       submitdate           lastpage      startlanguage     
##  Min.   :  1   Length:313         Min.   :-1.000   Length:313        
##  1st Qu.: 81   Class :character   1st Qu.: 2.000   Class :character  
##  Median :162   Mode  :character   Median : 5.000   Mode  :character  
##  Mean   :163                      Mean   : 3.556                     
##  3rd Qu.:245                      3rd Qu.: 5.000                     
##  Max.   :327                      Max.   : 5.000                     
##                                   NA's   :108                        
##       seed            startdate          datestamp             W001          
##  Min.   :5.647e+06   Length:313         Length:313         Length:313        
##  1st Qu.:5.568e+08   Class :character   Class :character   Class :character  
##  Median :1.086e+09   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :1.081e+09                                                           
##  3rd Qu.:1.637e+09                                                           
##  Max.   :2.147e+09                                                           
##                                                                              
##      W002               W003               W004               W005          
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      W006               W007               W008               W009          
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      K001               K002               K003               K004          
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      K005               K006               K007               K008          
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      K009             TK001_01           TK001_02           TK001_03        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    TK001_04           TK002_01           TK002_02           TK002_03        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    TK002_04           TK003_01           TK003_02           TK003_03        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    TK003_04           TK004_01           TK004_02           TK004_03        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    TK004_04           TK005_01           TK005_02           TK005_03        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    TK005_04           TK006_01           TK006_02           TK006_03        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    TK006_04           H001_001           H001_002           H001_003        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    H001_004           H001_005           H001_006           H001_007        
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      H002               H003               H004            H004_other       
##  Length:313         Length:313         Length:313         Length:313        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      H005           H005_other          H006           H007     
##  Length:313         Mode:logical   Min.   :2012   Min.   :1971  
##  Class :character   NA's:313       1st Qu.:2018   1st Qu.:1991  
##  Mode  :character                  Median :2020   Median :1995  
##                                    Mean   :2019   Mean   :1993  
##                                    3rd Qu.:2020   3rd Qu.:1998  
##                                    Max.   :2021   Max.   :2002  
##                                    NA's   :201    NA's   :207   
##      H008                R1           
##  Length:313         Length:313        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 

3.2.2 Glimpse

glimpse(litdata)
## Rows: 313
## Columns: 66
## $ id            <dbl> 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, …
## $ submitdate    <chr> "10/25/2021 11:07:44", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lastpage      <dbl> 5, NA, NA, 2, 2, NA, NA, NA, 2, 1, NA, NA, 5, NA, NA, NA…
## $ startlanguage <chr> "en", "de", "de", "de", "en", "de", "de", "de", "en", "d…
## $ seed          <dbl> 664891087, 334145431, 683903577, 2082427237, 438283320, …
## $ startdate     <chr> "10/25/2021 11:07:40", "10/25/2021 11:33:06", "10/25/202…
## $ datestamp     <chr> "10/25/2021 11:07:44", "10/25/2021 11:33:06", "10/25/202…
## $ W001          <chr> NA, NA, NA, "Stimme voll zu5", "3", NA, NA, NA, "Stimme …
## $ W002          <chr> NA, NA, NA, "Stimme voll zu5", "3", NA, NA, NA, "4", "2"…
## $ W003          <chr> NA, NA, NA, "4", "3", NA, NA, NA, "4", "4", NA, NA, "Sti…
## $ W004          <chr> NA, NA, NA, "Stimme voll zu5", "Stimme voll zu5", NA, NA…
## $ W005          <chr> NA, NA, NA, "Stimme voll zu5", "Stimme voll zu5", NA, NA…
## $ W006          <chr> NA, NA, NA, "4", "4", NA, NA, NA, "4", "4", NA, NA, "4",…
## $ W007          <chr> NA, NA, NA, "4", "Stimme voll zu5", NA, NA, NA, "Stimme …
## $ W008          <chr> NA, NA, NA, "Stimme voll zu5", "4", NA, NA, NA, "Stimme …
## $ W009          <chr> NA, NA, NA, "Stimme voll zu5", "Stimme voll zu5", NA, NA…
## $ K001          <chr> NA, NA, NA, "4", "4", NA, NA, NA, "4", NA, NA, NA, "3", …
## $ K002          <chr> NA, NA, NA, "3", "3", NA, NA, NA, "4", NA, NA, NA, "4", …
## $ K003          <chr> NA, NA, NA, "3", "Stimme voll zu5", NA, NA, NA, "3", NA,…
## $ K004          <chr> NA, NA, NA, "3", "4", NA, NA, NA, "4", NA, NA, NA, "3", …
## $ K005          <chr> NA, NA, NA, "4", "3", NA, NA, NA, "4", NA, NA, NA, "3", …
## $ K006          <chr> NA, NA, NA, "4", "4", NA, NA, NA, "4", NA, NA, NA, "4", …
## $ K007          <chr> NA, NA, NA, "4", "3", NA, NA, NA, "3", NA, NA, NA, "4", …
## $ K008          <chr> NA, NA, NA, "4", "3", NA, NA, NA, "4", NA, NA, NA, "Stim…
## $ K009          <chr> NA, NA, NA, "4", "2", NA, NA, NA, "4", NA, NA, NA, "Stim…
## $ TK001_01      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK001_02      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK001_03      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK001_04      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK002_01      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK002_02      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK002_03      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK002_04      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK003_01      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "3", NA,…
## $ TK003_02      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK003_03      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK003_04      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_01      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_02      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_03      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_04      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK005_01      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK005_02      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK005_03      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK005_04      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_01      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_02      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_03      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_04      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ H001_001      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Ja", NA…
## $ H001_002      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_003      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Ja", NA…
## $ H001_004      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_005      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_006      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_007      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H002          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Ja", NA…
## $ H003          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Bibliot…
## $ H004          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Gesundh…
## $ H004_other    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H005          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Bachelo…
## $ H005_other    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H006          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H007          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H008          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Weiblic…
## $ R1            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Die Fra…

3.2.3 Print

print(litdata)
## # A tibble: 313 × 66
##       id submit…¹ lastp…² start…³   seed start…⁴ dates…⁵ W001  W002  W003  W004 
##    <dbl> <chr>      <dbl> <chr>    <dbl> <chr>   <chr>   <chr> <chr> <chr> <chr>
##  1     1 10/25/2…       5 en      6.65e8 10/25/… 10/25/… <NA>  <NA>  <NA>  <NA> 
##  2     2 <NA>          NA de      3.34e8 10/25/… 10/25/… <NA>  <NA>  <NA>  <NA> 
##  3     3 <NA>          NA de      6.84e8 10/25/… 10/25/… <NA>  <NA>  <NA>  <NA> 
##  4     5 <NA>           2 de      2.08e9 10/25/… 10/25/… Stim… Stim… 4     Stim…
##  5     6 <NA>           2 en      4.38e8 10/25/… 10/25/… 3     3     3     Stim…
##  6     7 <NA>          NA de      2.15e9 10/25/… 10/25/… <NA>  <NA>  <NA>  <NA> 
##  7     8 <NA>          NA de      1.74e9 10/25/… 10/25/… <NA>  <NA>  <NA>  <NA> 
##  8     9 <NA>          NA de      4.64e8 10/25/… 10/25/… <NA>  <NA>  <NA>  <NA> 
##  9    10 <NA>           2 en      1.09e9 10/25/… 10/25/… Stim… 4     4     4    
## 10    11 <NA>           1 de      1.55e9 10/25/… 10/25/… 3     2     4     4    
## # … with 303 more rows, 55 more variables: W005 <chr>, W006 <chr>, W007 <chr>,
## #   W008 <chr>, W009 <chr>, K001 <chr>, K002 <chr>, K003 <chr>, K004 <chr>,
## #   K005 <chr>, K006 <chr>, K007 <chr>, K008 <chr>, K009 <chr>, TK001_01 <chr>,
## #   TK001_02 <chr>, TK001_03 <chr>, TK001_04 <chr>, TK002_01 <chr>,
## #   TK002_02 <chr>, TK002_03 <chr>, TK002_04 <chr>, TK003_01 <chr>,
## #   TK003_02 <chr>, TK003_03 <chr>, TK003_04 <chr>, TK004_01 <chr>,
## #   TK004_02 <chr>, TK004_03 <chr>, TK004_04 <chr>, TK005_01 <chr>, …

4 Data cleaning

4.1 Converting Strings to numbers and Keine Antwort zu NaN

litdata <- litdata %>%
  mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
  mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
  mutate_all(~ replace(., . == "Keine Antwort-", NaN))

4.2 Make it numeric

The following code will NOT be run. The Idea is to show a way to automatically edit all columns. It works but some columns are NOT numeric.

# All colnames that exist
litdataColnames <- colnames(litdata)
# the ones we don't want to change
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
# the colnames that should be changed
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
print(litdataColsToMakeNumeric)
litdataColsToMakeNumeric <- c("R1")
for (col in litdataColsToMakeNumeric) {
  litdata[[col]] <- as.numeric(litdata[[col]])
}

First we rename all the columns

litdata <- litdata %>% rename(
  "A1" = "W001",
  "A2" = "W002",
  "A3" = "W003",
  "A4" = "W004",
  "A5" = "W005",
  "A6" = "W006",
  "A7" = "W007",
  "A8" = "W008",
  "A9" = "W009",
  "B1" = "K001",
  "B2" = "K002",
  "B3" = "K003",
  "B4" = "K004",
  "B5" = "K005",
  "B6" = "K006",
  "B7" = "K007",
  "B8" = "K008",
  "B9" = "K009",
  "C1_1" = "TK001_01",
  "C1_2" = "TK001_02",
  "C1_3" = "TK001_03",
  "C1_4" = "TK001_04",
  "C2_1" = "TK002_01",
  "C2_2" = "TK002_02",
  "C2_3" = "TK002_03",
  "C2_4" = "TK002_04",
  "C3_1" = "TK003_01",
  "C3_2" = "TK003_02",
  "C3_3" = "TK003_03",
  "C3_4" = "TK003_04",
  "C4_1" = "TK004_01",
  "C4_2" = "TK004_02",
  "C4_3" = "TK004_03",
  "C4_4" = "TK004_04",
  "C5_1" = "TK005_01",
  "C5_2" = "TK005_02",
  "C5_3" = "TK005_03",
  "C5_4" = "TK005_04",
  "C6_1" = "TK006_01",
  "C6_2" = "TK006_02",
  "C6_3" = "TK006_03",
  "C6_4" = "TK006_04",
  "D1_1" = "H001_001",
  "D1_2" = "H001_002",
  "D1_3" = "H001_003",
  "D1_4" = "H001_004",
  "D1_5" = "H001_005",
  "D1_6" = "H001_006",
  "D1_7" = "H001_007",
  "D2" = "H002",
  "D3" = "H003",
  "D4" = "H004",
  "D4_comment" = "H004_other",
  "D5" = "H005",
  "D5_comment" = "H005_other",
  "D6" = "H006",
  "D7" = "H007",
  "D8" = "H008",
  "E1" = "R1"
)

Then we change the datatype and fix the values

litdata$A1 <- as.numeric(litdata$A1)
litdata$A2 <- as.numeric(litdata$A2)
litdata$A3 <- as.numeric(litdata$A3)
litdata$A4 <- as.numeric(litdata$A4)
litdata$A5 <- as.numeric(litdata$A5)
litdata$A6 <- as.numeric(litdata$A6)
litdata$A7 <- as.numeric(litdata$A7)
litdata$A8 <- as.numeric(litdata$A8)
litdata$A9 <- as.numeric(litdata$A9)

litdata$B1 <- as.numeric(litdata$B1)
litdata$B2 <- as.numeric(litdata$B2)
litdata$B3 <- as.numeric(litdata$B3)
litdata$B4 <- as.numeric(litdata$B4)
litdata$B5 <- as.numeric(litdata$B5)
litdata$B6 <- as.numeric(litdata$B6)
litdata$B7 <- as.numeric(litdata$B7)
litdata$B8 <- as.numeric(litdata$B8)
litdata$B9 <- as.numeric(litdata$B9)

litdata$C1_1 <- as.numeric(litdata$C1_1)
litdata$C1_2 <- as.numeric(litdata$C1_2)
litdata$C1_3 <- as.numeric(litdata$C1_3)
litdata$C1_4 <- as.numeric(litdata$C1_4)
litdata$C2_1 <- as.numeric(litdata$C2_1)
litdata$C2_2 <- as.numeric(litdata$C2_2)
litdata$C2_3 <- as.numeric(litdata$C2_3)
litdata$C2_4 <- as.numeric(litdata$C2_4)
litdata$C3_1 <- as.numeric(litdata$C3_1)
litdata$C3_2 <- as.numeric(litdata$C3_2)
litdata$C3_3 <- as.numeric(litdata$C3_3)
litdata$C3_4 <- as.numeric(litdata$C3_4)
litdata$C4_1 <- as.numeric(litdata$C4_1)
litdata$C4_2 <- as.numeric(litdata$C4_2)
litdata$C4_3 <- as.numeric(litdata$C4_3)
litdata$C4_4 <- as.numeric(litdata$C4_4)
litdata$C5_1 <- as.numeric(litdata$C5_1)
litdata$C5_2 <- as.numeric(litdata$C5_2)
litdata$C5_3 <- as.numeric(litdata$C5_3)
litdata$C5_4 <- as.numeric(litdata$C5_4)
litdata$C6_1 <- as.numeric(litdata$C6_1)
litdata$C6_2 <- as.numeric(litdata$C6_2)
litdata$C6_3 <- as.numeric(litdata$C6_3)
litdata$C6_4 <- as.numeric(litdata$C6_4)

litdata <- litdata %>% mutate(D1_1 = ifelse(D1_1 == "Ja", TRUE, ifelse(D1_1 == "Nicht Gewählt", FALSE, D1_1)))
litdata$D1_1 <- as.logical(litdata$D1_1)

litdata <- litdata %>% mutate(D1_2 = ifelse(D1_2 == "Ja", TRUE, ifelse(D1_2 == "Nicht Gewählt", FALSE, D1_2)))
litdata$D1_2 <- as.logical(litdata$D1_2)

litdata <- litdata %>% mutate(D1_3 = ifelse(D1_3 == "Ja", TRUE, ifelse(D1_3 == "Nicht Gewählt", FALSE, D1_3)))
litdata$D1_3 <- as.logical(litdata$D1_3)


litdata <- litdata %>% mutate(D1_4 = ifelse(D1_4 == "Ja", TRUE, ifelse(D1_4 == "Nicht Gewählt", FALSE, D1_4)))
litdata$D1_4 <- as.logical(litdata$D1_4)

litdata <- litdata %>% mutate(D1_5 = ifelse(D1_5 == "Ja", TRUE, ifelse(D1_5 == "Nicht Gewählt", FALSE, D1_5)))
litdata$D1_5 <- as.logical(litdata$D1_5)

litdata <- litdata %>% mutate(D1_6 = ifelse(D1_6 == "Ja", TRUE, ifelse(D1_6 == "Nicht Gewählt", FALSE, D1_6)))
litdata$D1_6 <- as.logical(litdata$D1_6)

litdata <- litdata %>% mutate(D1_7 = ifelse(D1_7 == "Ja", TRUE, ifelse(D1_7 == "Nicht Gewählt", FALSE, D1_7)))
litdata$D1_7 <- as.logical(litdata$D1_7)

litdata <- litdata %>% mutate(D2 = ifelse(D2 == "Ja", TRUE, ifelse(D2 == "Nein", FALSE, D2)))
litdata$D2 <- as.logical(litdata$D2)

# skipping D3 because it's just a free text

litdata$D4 <- as.factor(litdata$D4)

# skipping D4_comment because it's a free text

litdata$D5 <- as.factor(litdata$D5)

# skipping D5_comment because it's a free text

# can't be a number as there is a 2010 or earlier option.
litdata$D6 <- as.factor(litdata$D6)

litdata$D7 <- as.numeric(litdata$D7)

litdata$D8 <- as.factor(litdata$D8)

# skipping E1 because it's a free text

4.3 Second inspection of data

4.3.1 Summary

summary(litdata)
##        id       submitdate           lastpage      startlanguage     
##  Min.   :  1   Length:313         Min.   :-1.000   Length:313        
##  1st Qu.: 81   Class :character   1st Qu.: 2.000   Class :character  
##  Median :162   Mode  :character   Median : 5.000   Mode  :character  
##  Mean   :163                      Mean   : 3.556                     
##  3rd Qu.:245                      3rd Qu.: 5.000                     
##  Max.   :327                      Max.   : 5.000                     
##                                   NA's   :108                        
##       seed            startdate          datestamp               A1       
##  Min.   :5.647e+06   Length:313         Length:313         Min.   :2.000  
##  1st Qu.:5.568e+08   Class :character   Class :character   1st Qu.:4.000  
##  Median :1.086e+09   Mode  :character   Mode  :character   Median :5.000  
##  Mean   :1.081e+09                                         Mean   :4.515  
##  3rd Qu.:1.637e+09                                         3rd Qu.:5.000  
##  Max.   :2.147e+09                                         Max.   :5.000  
##                                                            NA's   :117    
##        A2              A3              A4              A5       
##  Min.   :1.000   Min.   :2.000   Min.   :1.000   Min.   :2.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Median :4.000   Median :4.000   Median :5.000   Median :5.000  
##  Mean   :3.955   Mean   :4.246   Mean   :4.523   Mean   :4.411  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  NA's   :114     NA's   :114     NA's   :114     NA's   :116    
##        A6              A7              A8              A9       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :4.000   Median :5.000   Median :4.000   Median :4.000  
##  Mean   :4.347   Mean   :4.824   Mean   :3.581   Mean   :3.663  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  NA's   :117     NA's   :114     NA's   :122     NA's   :120    
##        B1              B2              B3              B4       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:3.000  
##  Median :3.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   :3.293   Mean   :2.851   Mean   :2.863   Mean   :3.221  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  NA's   :132     NA's   :132     NA's   :131     NA's   :132    
##        B5              B6              B7              B8             B9       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:2.00   1st Qu.:2.000  
##  Median :3.000   Median :3.000   Median :4.000   Median :3.00   Median :3.000  
##  Mean   :3.409   Mean   :2.956   Mean   :3.522   Mean   :2.72   Mean   :2.657  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.75   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000  
##  NA's   :132     NA's   :132     NA's   :131     NA's   :131    NA's   :132    
##       C1_1            C1_2            C1_3            C1_4      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:2.000  
##  Median :4.000   Median :4.000   Median :5.000   Median :3.000  
##  Mean   :3.606   Mean   :4.153   Mean   :4.336   Mean   :2.956  
##  3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  NA's   :204     NA's   :202     NA's   :200     NA's   :200    
##       C2_1            C2_2            C2_3            C2_4      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:2.000  
##  Median :3.000   Median :4.000   Median :5.000   Median :3.000  
##  Mean   :3.409   Mean   :4.055   Mean   :4.279   Mean   :3.071  
##  3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  NA's   :203     NA's   :204     NA's   :202     NA's   :201    
##       C3_1            C3_2            C3_3           C3_4            C4_1      
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.00   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :4.000   Median :5.00   Median :3.000   Median :4.000  
##  Mean   :3.495   Mean   :4.189   Mean   :4.42   Mean   :3.321   Mean   :3.759  
##  3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:5.00   3rd Qu.:4.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.000  
##  NA's   :202     NA's   :202     NA's   :201    NA's   :201     NA's   :201    
##       C4_2            C4_3            C4_4           C5_1            C5_2      
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.00   1st Qu.:3.000   1st Qu.:4.000  
##  Median :5.000   Median :5.000   Median :3.00   Median :4.000   Median :4.000  
##  Mean   :4.279   Mean   :4.396   Mean   :3.33   Mean   :3.727   Mean   :4.183  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.00   3rd Qu.:5.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.000  
##  NA's   :202     NA's   :202     NA's   :201    NA's   :203     NA's   :204    
##       C5_3            C5_4            C6_1            C6_2      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:4.000  
##  Median :5.000   Median :3.000   Median :4.000   Median :4.000  
##  Mean   :4.369   Mean   :3.255   Mean   :3.609   Mean   :4.136  
##  3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  NA's   :202     NA's   :203     NA's   :203     NA's   :203    
##       C6_3            C6_4          D1_1            D1_2        
##  Min.   :1.000   Min.   :1.000   Mode :logical   Mode :logical  
##  1st Qu.:4.000   1st Qu.:3.000   FALSE:64        FALSE:69       
##  Median :5.000   Median :3.000   TRUE :51        TRUE :46       
##  Mean   :4.245   Mean   :3.191   NA's :198       NA's :198      
##  3rd Qu.:5.000   3rd Qu.:4.000                                  
##  Max.   :5.000   Max.   :5.000                                  
##  NA's   :203     NA's   :203                                    
##     D1_3            D1_4            D1_5            D1_6        
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:30        FALSE:81        FALSE:108       FALSE:115      
##  TRUE :85        TRUE :34        TRUE :7         NA's :198      
##  NA's :198       NA's :198       NA's :198                      
##                                                                 
##                                                                 
##                                                                 
##     D1_7             D2               D3           
##  Mode :logical   Mode :logical   Length:313        
##  FALSE:115       FALSE:12        Class :character  
##  NA's :198       TRUE :103       Mode  :character  
##                  NA's :198                         
##                                                    
##                                                    
##                                                    
##                                                                                                        D4     
##  Berufe des Managements und der Administration, des Bank- und Versicherungsgewerbes und des Rechtswesens: 29  
##  Gesundheits-, Lehr- und Kulturberufe, Wissenschaftler                                                  : 24  
##  Technische Berufe sowie Informatikberufe                                                               : 24  
##  Berufe des Gastgewerbes und Berufe zur Erbringung persönlicher Dienstleistungens- und Verkehrsberufe   : 13  
##  -                                                                                                      :  8  
##  (Other)                                                                                                :  5  
##  NA's                                                                                                   :210  
##   D4_comment                                             D5        D5_comment 
##  Length:313         Bachelor Information Science          : 40   Min.   : NA  
##  Class :character   Master Information and Data Management: 10   1st Qu.: NA  
##  Mode  :character   Bachelor Multimedia Production        :  8   Median : NA  
##                     Bachelor Tourismus                    :  8   Mean   :NaN  
##                     Bachelor Betriebsökonomie             :  7   3rd Qu.: NA  
##                     (Other)                               : 40   Max.   : NA  
##                     NA's                                  :200   NA's   :313  
##        D6            D7              D8           E1           
##  2020   : 34   Min.   :1971   männlich: 39   Length:313        
##  2018   : 26   1st Qu.:1991   Weiblich: 75   Class :character  
##  2021   : 25   Median :1995   NA's    :199   Mode  :character  
##  2019   : 20   Mean   :1993                                    
##  2017   :  4   3rd Qu.:1998                                    
##  (Other):  3   Max.   :2002                                    
##  NA's   :201   NA's   :207

4.3.2 Glimpse

glimpse(litdata)
## Rows: 313
## Columns: 66
## $ id            <dbl> 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, …
## $ submitdate    <chr> "10/25/2021 11:07:44", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lastpage      <dbl> 5, NA, NA, 2, 2, NA, NA, NA, 2, 1, NA, NA, 5, NA, NA, NA…
## $ startlanguage <chr> "en", "de", "de", "de", "en", "de", "de", "de", "en", "d…
## $ seed          <dbl> 664891087, 334145431, 683903577, 2082427237, 438283320, …
## $ startdate     <chr> "10/25/2021 11:07:40", "10/25/2021 11:33:06", "10/25/202…
## $ datestamp     <chr> "10/25/2021 11:07:44", "10/25/2021 11:33:06", "10/25/202…
## $ A1            <dbl> NA, NA, NA, 5, 3, NA, NA, NA, 5, 3, NA, NA, 5, NA, NA, N…
## $ A2            <dbl> NA, NA, NA, 5, 3, NA, NA, NA, 4, 2, NA, NA, 5, NA, NA, N…
## $ A3            <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 4, 4, NA, NA, 5, NA, NA, N…
## $ A4            <dbl> NA, NA, NA, 5, 5, NA, NA, NA, 4, 4, NA, NA, 5, NA, NA, N…
## $ A5            <dbl> NA, NA, NA, 5, 5, NA, NA, NA, 4, 4, NA, NA, 4, NA, NA, N…
## $ A6            <dbl> NA, NA, NA, 4, 4, NA, NA, NA, 4, 4, NA, NA, 4, NA, NA, N…
## $ A7            <dbl> NA, NA, NA, 4, 5, NA, NA, NA, 5, 3, NA, NA, 5, NA, NA, N…
## $ A8            <dbl> NA, NA, NA, 5, 4, NA, NA, NA, 5, 4, NA, NA, 5, NA, NA, N…
## $ A9            <dbl> NA, NA, NA, 5, 5, NA, NA, NA, 5, 5, NA, NA, 5, NA, NA, N…
## $ B1            <dbl> NA, NA, NA, 4, 4, NA, NA, NA, 4, NA, NA, NA, 3, NA, NA, …
## $ B2            <dbl> NA, NA, NA, 3, 3, NA, NA, NA, 4, NA, NA, NA, 4, NA, NA, …
## $ B3            <dbl> NA, NA, NA, 3, 5, NA, NA, NA, 3, NA, NA, NA, 4, NA, NA, …
## $ B4            <dbl> NA, NA, NA, 3, 4, NA, NA, NA, 4, NA, NA, NA, 3, NA, NA, …
## $ B5            <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 4, NA, NA, NA, 3, NA, NA, …
## $ B6            <dbl> NA, NA, NA, 4, 4, NA, NA, NA, 4, NA, NA, NA, 4, NA, NA, …
## $ B7            <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 3, NA, NA, NA, 4, NA, NA, …
## $ B8            <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 4, NA, NA, NA, 5, NA, NA, …
## $ B9            <dbl> NA, NA, NA, 4, 2, NA, NA, NA, 4, NA, NA, NA, 5, NA, NA, …
## $ C1_1          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C1_2          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C1_3          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C1_4          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C2_1          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C2_2          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C2_3          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C2_4          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C3_1          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3, NA, N…
## $ C3_2          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C3_3          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C3_4          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_1          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_2          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_3          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_4          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C5_1          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C5_2          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C5_3          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C5_4          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_1          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_2          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_3          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_4          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ D1_1          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, NA…
## $ D1_2          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_3          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, NA…
## $ D1_4          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_5          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_6          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_7          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D2            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, NA…
## $ D3            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Bibliot…
## $ D4            <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Gesundh…
## $ D4_comment    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D5            <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, Bachelor…
## $ D5_comment    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D6            <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D7            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D8            <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, Weiblich…
## $ E1            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Die Fra…

4.3.4 Head

head(litdata)
## # A tibble: 6 × 66
##      id submitd…¹ lastp…² start…³   seed start…⁴ dates…⁵    A1    A2    A3    A4
##   <dbl> <chr>       <dbl> <chr>    <dbl> <chr>   <chr>   <dbl> <dbl> <dbl> <dbl>
## 1     1 10/25/20…       5 en      6.65e8 10/25/… 10/25/…    NA    NA    NA    NA
## 2     2 <NA>           NA de      3.34e8 10/25/… 10/25/…    NA    NA    NA    NA
## 3     3 <NA>           NA de      6.84e8 10/25/… 10/25/…    NA    NA    NA    NA
## 4     5 <NA>            2 de      2.08e9 10/25/… 10/25/…     5     5     4     5
## 5     6 <NA>            2 en      4.38e8 10/25/… 10/25/…     3     3     3     5
## 6     7 <NA>           NA de      2.15e9 10/25/… 10/25/…    NA    NA    NA    NA
## # … with 55 more variables: A5 <dbl>, A6 <dbl>, A7 <dbl>, A8 <dbl>, A9 <dbl>,
## #   B1 <dbl>, B2 <dbl>, B3 <dbl>, B4 <dbl>, B5 <dbl>, B6 <dbl>, B7 <dbl>,
## #   B8 <dbl>, B9 <dbl>, C1_1 <dbl>, C1_2 <dbl>, C1_3 <dbl>, C1_4 <dbl>,
## #   C2_1 <dbl>, C2_2 <dbl>, C2_3 <dbl>, C2_4 <dbl>, C3_1 <dbl>, C3_2 <dbl>,
## #   C3_3 <dbl>, C3_4 <dbl>, C4_1 <dbl>, C4_2 <dbl>, C4_3 <dbl>, C4_4 <dbl>,
## #   C5_1 <dbl>, C5_2 <dbl>, C5_3 <dbl>, C5_4 <dbl>, C6_1 <dbl>, C6_2 <dbl>,
## #   C6_3 <dbl>, C6_4 <dbl>, D1_1 <lgl>, D1_2 <lgl>, D1_3 <lgl>, D1_4 <lgl>, …

5 Selbststudium 1

Berechnen Sie die Häufigkeiten für die Variablen W003, K003, H001_001, H005, H007 und H008.

5.1 Data

displayFunction1 <- function(table, column) {
  tmp <- table[column]
  tmp <- rename(tmp, value = all_of(column))
  tmp <- tmp %>%
    count(value) %>%
    mutate(percentage = prop.table(n) * 100)
  print(tmp, n = 100)
  ggplot(
    tmp,
    aes(x = value, y = n)
  ) +
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

A3 (W003)

displayFunction1(litdata, "A3")
## # A tibble: 6 × 3
##   value     n percentage
##   <dbl> <int>      <dbl>
## 1     2     6      1.92 
## 2     3    29      9.27 
## 3     4    74     23.6  
## 4     5    90     28.8  
## 5    NA   112     35.8  
## 6   NaN     2      0.639
## Warning: Removed 2 rows containing missing values (`position_stack()`).

B3 (K003)

displayFunction1(litdata, "B3")
## # A tibble: 7 × 3
##   value     n percentage
##   <dbl> <int>      <dbl>
## 1     1    16      5.11 
## 2     2    47     15.0  
## 3     3    71     22.7  
## 4     4    42     13.4  
## 5     5     6      1.92 
## 6    NA   130     41.5  
## 7   NaN     1      0.319
## Warning: Removed 2 rows containing missing values (`position_stack()`).

D1_1 (H001_001)

displayFunction1(litdata, "D1_1")
## # A tibble: 3 × 3
##   value     n percentage
##   <lgl> <int>      <dbl>
## 1 FALSE    64       20.4
## 2 TRUE     51       16.3
## 3 NA      198       63.3

D5 (H005)

displayFunction1(litdata, "D5")
## # A tibble: 18 × 3
##    value                                       n percentage
##    <fct>                                   <int>      <dbl>
##  1 Bachelor Architektur                        1      0.319
##  2 Bachelor Bauingenieurwesen                  2      0.639
##  3 Bachelor Betriebsökonomie                   7      2.24 
##  4 Bachelor Computational and Data Science     2      0.639
##  5 Bachelor Digital Business Management        6      1.92 
##  6 Bachelor Information Science               40     12.8  
##  7 Bachelor Mobile Robotics                    1      0.319
##  8 Bachelor Multimedia Production              8      2.56 
##  9 Bachelor Photonics                          3      0.958
## 10 Bachelor Service Innovation and Design      5      1.60 
## 11 Bachelor Sport Management                   7      2.24 
## 12 Bachelor Tourismus                          8      2.56 
## 13 CAS Sport Management 4.0                    1      0.319
## 14 MAS Information Science                     1      0.319
## 15 Master Information and Data Management     10      3.19 
## 16 Master New Business                         7      2.24 
## 17 Master Tourism and Change                   4      1.28 
## 18 <NA>                                      200     63.9

D7 (H007)

displayFunction1(litdata, "D7")
## # A tibble: 24 × 3
##    value     n percentage
##    <dbl> <int>      <dbl>
##  1  1971     1      0.319
##  2  1972     1      0.319
##  3  1973     1      0.319
##  4  1974     3      0.958
##  5  1980     1      0.319
##  6  1984     1      0.319
##  7  1985     4      1.28 
##  8  1987     5      1.60 
##  9  1988     3      0.958
## 10  1989     2      0.639
## 11  1990     4      1.28 
## 12  1991     5      1.60 
## 13  1992     6      1.92 
## 14  1993     8      2.56 
## 15  1994     2      0.639
## 16  1995    10      3.19 
## 17  1996     3      0.958
## 18  1997    14      4.47 
## 19  1998    15      4.79 
## 20  1999     7      2.24 
## 21  2000     7      2.24 
## 22  2001     2      0.639
## 23  2002     1      0.319
## 24    NA   207     66.1
## Warning: Removed 1 rows containing missing values (`position_stack()`).

Die Warnung resultiert daraus, dass es sehr viele NA gibt.

D8 (H008)

displayFunction1(litdata, "D8")
## # A tibble: 3 × 3
##   value        n percentage
##   <fct>    <int>      <dbl>
## 1 männlich    39       12.5
## 2 Weiblich    75       24.0
## 3 <NA>       199       63.6

6 Selbststudium 2.1

We have the year 2021 ## Preparation

# remove NAs
birthyears <- removeNA(litdata$D7)
age <- 2021 - birthyears

6.1 Frequency

freq(age)
##             frequency percent valid_percent cumulative_percent
## 19                  1    0.94          0.94               0.94
## 20                  2    1.89          1.89               2.83
## 21                  7    6.60          6.60               9.43
## 22                  7    6.60          6.60              16.04
## 23                 15   14.15         14.15              30.19
## 24                 14   13.21         13.21              43.40
## 25                  3    2.83          2.83              46.23
## 26                 10    9.43          9.43              55.66
## 27                  2    1.89          1.89              57.55
## 28                  8    7.55          7.55              65.09
## 29                  6    5.66          5.66              70.75
## 30                  5    4.72          4.72              75.47
## 31                  4    3.77          3.77              79.25
## 32                  2    1.89          1.89              81.13
## 33                  3    2.83          2.83              83.96
## 34                  5    4.72          4.72              88.68
## 36                  4    3.77          3.77              92.45
## 37                  1    0.94          0.94              93.40
## 41                  1    0.94          0.94              94.34
## 47                  3    2.83          2.83              97.17
## 48                  1    0.94          0.94              98.11
## 49                  1    0.94          0.94              99.06
## 50                  1    0.94          0.94             100.00
## Valid_Total       106  100.00        100.00                NaN
## NAs                 0    0.00           NaN                NaN
## Total             106  100.00           NaN                NaN
# with NA
freq(2021 - litdata$D7)
##             frequency percent valid_percent cumulative_percent
## 19                  1    0.32          0.94               0.94
## 20                  2    0.64          1.89               2.83
## 21                  7    2.24          6.60               9.43
## 22                  7    2.24          6.60              16.04
## 23                 15    4.79         14.15              30.19
## 24                 14    4.47         13.21              43.40
## 25                  3    0.96          2.83              46.23
## 26                 10    3.19          9.43              55.66
## 27                  2    0.64          1.89              57.55
## 28                  8    2.56          7.55              65.09
## 29                  6    1.92          5.66              70.75
## 30                  5    1.60          4.72              75.47
## 31                  4    1.28          3.77              79.25
## 32                  2    0.64          1.89              81.13
## 33                  3    0.96          2.83              83.96
## 34                  5    1.60          4.72              88.68
## 36                  4    1.28          3.77              92.45
## 37                  1    0.32          0.94              93.40
## 41                  1    0.32          0.94              94.34
## 47                  3    0.96          2.83              97.17
## 48                  1    0.32          0.94              98.11
## 49                  1    0.32          0.94              99.06
## 50                  1    0.32          0.94             100.00
## Valid_Total       106   33.87        100.00                NaN
## NAs               207   66.13           NaN                NaN
## Total             313  100.00           NaN                NaN

6.2 Selbststudium 2

https://www.beratung-statistik.de/statistik-beratung-infos/r-tutorial/deskriptive-statistik-r/

6.2.1 Modalwert

# own method
getmode(age)
## [1] 23

6.2.2 Median

median(age)
## [1] 26

6.2.3 Arithmetischer Mittelwert

mean(age)
## [1] 27.78302

6.2.4 Spannweite

max(age) - min(age)
## [1] 31

6.2.5 Quartilsabstand

IQR(age)
## [1] 7
# just for fun
summary(age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   23.00   26.00   27.78   30.00   50.00

6.2.6 Varianz

var(age)
## [1] 45.04771

6.2.7 Standardabweichung

sd(age)
## [1] 6.711759

6.2.8 Schiefe

skewness(age)
## [1] 1.550033

Die Kennzahl Schiefe ist wird Null bei einer perfekt symmetrischen Verteilung, größer als Null bei einer rechtsschiefen und kleiner als Null bei einer linksschiefen Verteilung. https://www.beratung-statistik.de/statistik-beratung-infos/r-tutorial/deskriptive-statistik-r/

6.2.9 Kurtosis

kurtosis(age, type = 2)
## [1] 2.55861

SPSS berechnet die Kurtosis mit einer anderen Formel.

Eine weitere bekannte Kennzahl ist die Kurtosis. Um eine Vorstellung von der Bedeutung der Kurtosis zu erhalten, betrachten Sie nachfolgende Graphik.
In dieser Graphik sind eine Normalverteilung, sowie eine steilgipflige (aka leptokurtisch) und eine flachgipflige (aka platykurtisch) dargestellt. Die steilgipflige Verteilung ist in der Mitte spitzer als die Normalverteilung und an den Rändern breiter. Bei der flachgipligen Verteilung ist es anders herum. Die Kurtosis ist nun eine Kennzahl, mit der untersucht wird, ob eine Verteilung im Vergleich zur Normalverteilung flachgipflig oder steilgipflig ist:
- Für eine Normalverteilung nimmt die Kurtosis genau den Wert 3 an.
- Eine steilgipflige Verteilung hat eine Kurtosis, die größer als 3 ist.
- Für eine flachgipflige Verteilung ist die Kurtosis kleiner als 3.
- Beachten Sie: Anstatt der Kurtosis wird häufig auch der sogenannte Exzess verwendet. Dies ist eine weitere Kennzahl, die definiert ist durch die Formel: Exzess = Kurtosis - 3.
- Der Exzess ist somit größer als Null, wenn die Verteilung steilgipflig ist, und kleiner als Null bei einer flachgipfligen Verteilung.
Abbildung Kurtosis
Frech kopiert von: https://www.beratung-statistik.de/statistik-beratung-infos/r-tutorial/deskriptive-statistik-r/

6.2.10 QQ-Plot

qqnorm(age)
qqline(age)

6.2.11 Historam for age

Frequency

hist(age, freq = F)
lines(density(age), lwd = 2, col = "black")

Auto Breaks

hist(age)

3 Breaks

hist(age, breaks = 3)

5 Breaks

hist(age, breaks = 5)

7 Breaks

hist(age, breaks = 7)

10 Breaks

hist(age, breaks = 10)

15 Breaks

hist(age, breaks = 15)

20 Breaks

hist(age, breaks = 20)

30 Breaks

hist(age, breaks = 30)

7 Selbststudium 2.2

Auf der Grundlage von Daten aus einer Schweizer Schüllererhebung wird aus verschiedenen Variablen (z.B. Angaben zum Beruf der Eltern, zur Elternausbildung sowie zur Anzahl von Bücchern zu Hause) ein Index zur sozialen Herkunft erstellt. Dieser Index erscheint in einer neu gebildeten numerischen Variable im Datensatz, gibt also für jeden Fall in diesem Datensatz einen Skalenwert zur sozialen Herkunft an. Die neu gebildete Skala läuft von 0 (Wert mit der geringsten Ausprägung) bis 10 (Wert mit der höchsten Ausprägung).

Für die gesamte Schweiz liegt der arithmetische Mittelwert auf dieser Skala bei 5.6 und die Standardabweichung beträgt 1.8 (Zahlen sind von mir frei erfunden!). Die Verteilung entspricht einer Normalverteilung.

Der Mittelwert der Verteilung der Bündner Schüler liegt etwas tiefer als in der Gesamtschweiz, nämlich bei 5.1 mit einer Standardabweichung von 2.

7.1 Aufgabenstellung 1

Wo in der Verteilung der Schweiz liegt der Bündner Mittelwert, bzw. wie viele Schweizer Schüler haben bzgl. der sozialen Herkunft einen tieferen Wert als der typische Bündner Schüler?

pnorm(5.1, mean = 5.6, sd = 1.8)
## [1] 0.3905915

7.2 Aufgabenstellung 2

In einem Bündner Ort beträgt der Mittelwert auf der Skala zur sozialen Herkunft 5.6, er ist also genauso hoch wie in der Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem Wert darunter?

pnorm(5.6, mean = 5.1, sd = 2.0)
## [1] 0.5987063