if (!require(tidyverse)){
install.packages("tidyverse")
library(tidyverse)
}
if (!require(moments)){
install.packages("moments")
library(moments)
}
freq <- function(data){
na_count = length(data[is.na(data)])
valid_count = length(data)-na_count
frequency <- table(data)
p <- prop.table(frequency)
percent <- round(p*100, digits = 2)
frequency_sum <- cumsum(frequency)
hkum <- cumsum(p)
percent_sum <- round(hkum*100, digits = 2)
freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
valid_percent <- round(valid_count / length(data)*100, digits = 2)
na_percent <- round(na_count / length(data)*100, digits = 2)
print(freq_table)
count <- c(valid_count, na_count, valid_count+na_count)
percent <- c(valid_percent, na_percent, valid_percent+na_percent)
totall <- c(valid_count+na_count, valid_percent+na_percent)
df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
print(df)
}
Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted
getmode <- function(v) {
uniqv <- unique(v)
x <- tabulate(match(v, uniqv))
uniqv[which(x==max(x))]
}
litdata <- read_csv("DataLit_R.csv", show_col_types = FALSE)
litdata <- as_tibble(litdata)
summary(litdata)
## id submitdate lastpage startlanguage
## Min. : 1 Length:313 Min. :-1.000 Length:313
## 1st Qu.: 81 Class :character 1st Qu.: 2.000 Class :character
## Median :162 Mode :character Median : 5.000 Mode :character
## Mean :163 Mean : 3.556
## 3rd Qu.:245 3rd Qu.: 5.000
## Max. :327 Max. : 5.000
## NA's :108
## seed startdate datestamp W001
## Min. :5.647e+06 Length:313 Length:313 Length:313
## 1st Qu.:5.568e+08 Class :character Class :character Class :character
## Median :1.086e+09 Mode :character Mode :character Mode :character
## Mean :1.081e+09
## 3rd Qu.:1.637e+09
## Max. :2.147e+09
##
## W002 W003 W004 W005
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## W006 W007 W008 W009
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## K001 K002 K003 K004
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## K005 K006 K007 K008
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## K009 TK001_01 TK001_02 TK001_03
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TK001_04 TK002_01 TK002_02 TK002_03
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TK002_04 TK003_01 TK003_02 TK003_03
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TK003_04 TK004_01 TK004_02 TK004_03
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TK004_04 TK005_01 TK005_02 TK005_03
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TK005_04 TK006_01 TK006_02 TK006_03
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TK006_04 H001_001 H001_002 H001_003
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## H001_004 H001_005 H001_006 H001_007
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## H002 H003 H004 H004_other
## Length:313 Length:313 Length:313 Length:313
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## H005 H005_other H006 H007
## Length:313 Mode:logical Min. :2012 Min. :1971
## Class :character NA's:313 1st Qu.:2018 1st Qu.:1991
## Mode :character Median :2020 Median :1995
## Mean :2019 Mean :1993
## 3rd Qu.:2020 3rd Qu.:1998
## Max. :2021 Max. :2002
## NA's :201 NA's :207
## H008 R1
## Length:313 Length:313
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
glimpse(litdata)
## Rows: 313
## Columns: 66
## $ id <dbl> 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, …
## $ submitdate <chr> "10/25/2021 11:07:44", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lastpage <dbl> 5, NA, NA, 2, 2, NA, NA, NA, 2, 1, NA, NA, 5, NA, NA, NA…
## $ startlanguage <chr> "en", "de", "de", "de", "en", "de", "de", "de", "en", "d…
## $ seed <dbl> 664891087, 334145431, 683903577, 2082427237, 438283320, …
## $ startdate <chr> "10/25/2021 11:07:40", "10/25/2021 11:33:06", "10/25/202…
## $ datestamp <chr> "10/25/2021 11:07:44", "10/25/2021 11:33:06", "10/25/202…
## $ W001 <chr> NA, NA, NA, "Stimme voll zu5", "3", NA, NA, NA, "Stimme …
## $ W002 <chr> NA, NA, NA, "Stimme voll zu5", "3", NA, NA, NA, "4", "2"…
## $ W003 <chr> NA, NA, NA, "4", "3", NA, NA, NA, "4", "4", NA, NA, "Sti…
## $ W004 <chr> NA, NA, NA, "Stimme voll zu5", "Stimme voll zu5", NA, NA…
## $ W005 <chr> NA, NA, NA, "Stimme voll zu5", "Stimme voll zu5", NA, NA…
## $ W006 <chr> NA, NA, NA, "4", "4", NA, NA, NA, "4", "4", NA, NA, "4",…
## $ W007 <chr> NA, NA, NA, "4", "Stimme voll zu5", NA, NA, NA, "Stimme …
## $ W008 <chr> NA, NA, NA, "Stimme voll zu5", "4", NA, NA, NA, "Stimme …
## $ W009 <chr> NA, NA, NA, "Stimme voll zu5", "Stimme voll zu5", NA, NA…
## $ K001 <chr> NA, NA, NA, "4", "4", NA, NA, NA, "4", NA, NA, NA, "3", …
## $ K002 <chr> NA, NA, NA, "3", "3", NA, NA, NA, "4", NA, NA, NA, "4", …
## $ K003 <chr> NA, NA, NA, "3", "Stimme voll zu5", NA, NA, NA, "3", NA,…
## $ K004 <chr> NA, NA, NA, "3", "4", NA, NA, NA, "4", NA, NA, NA, "3", …
## $ K005 <chr> NA, NA, NA, "4", "3", NA, NA, NA, "4", NA, NA, NA, "3", …
## $ K006 <chr> NA, NA, NA, "4", "4", NA, NA, NA, "4", NA, NA, NA, "4", …
## $ K007 <chr> NA, NA, NA, "4", "3", NA, NA, NA, "3", NA, NA, NA, "4", …
## $ K008 <chr> NA, NA, NA, "4", "3", NA, NA, NA, "4", NA, NA, NA, "Stim…
## $ K009 <chr> NA, NA, NA, "4", "2", NA, NA, NA, "4", NA, NA, NA, "Stim…
## $ TK001_01 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK001_02 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK001_03 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK001_04 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK002_01 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK002_02 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK002_03 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK002_04 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK003_01 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "3", NA,…
## $ TK003_02 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK003_03 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK003_04 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_01 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_02 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_03 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK004_04 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ TK005_01 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK005_02 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK005_03 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK005_04 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_01 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_02 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_03 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Stimme …
## $ TK006_04 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4", NA,…
## $ H001_001 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Ja", NA…
## $ H001_002 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_003 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Ja", NA…
## $ H001_004 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_005 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_006 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H001_007 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Nicht G…
## $ H002 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Ja", NA…
## $ H003 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Bibliot…
## $ H004 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Gesundh…
## $ H004_other <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H005 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Bachelo…
## $ H005_other <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H006 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H007 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ H008 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Weiblic…
## $ R1 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Die Fra…
print(litdata)
## # A tibble: 313 × 66
## id submit…¹ lastp…² start…³ seed start…⁴ dates…⁵ W001 W002 W003 W004
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 10/25/2… 5 en 6.65e8 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 2 2 <NA> NA de 3.34e8 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 3 3 <NA> NA de 6.84e8 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 4 5 <NA> 2 de 2.08e9 10/25/… 10/25/… Stim… Stim… 4 Stim…
## 5 6 <NA> 2 en 4.38e8 10/25/… 10/25/… 3 3 3 Stim…
## 6 7 <NA> NA de 2.15e9 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 7 8 <NA> NA de 1.74e9 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 8 9 <NA> NA de 4.64e8 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 9 10 <NA> 2 en 1.09e9 10/25/… 10/25/… Stim… 4 4 4
## 10 11 <NA> 1 de 1.55e9 10/25/… 10/25/… 3 2 4 4
## # … with 303 more rows, 55 more variables: W005 <chr>, W006 <chr>, W007 <chr>,
## # W008 <chr>, W009 <chr>, K001 <chr>, K002 <chr>, K003 <chr>, K004 <chr>,
## # K005 <chr>, K006 <chr>, K007 <chr>, K008 <chr>, K009 <chr>, TK001_01 <chr>,
## # TK001_02 <chr>, TK001_03 <chr>, TK001_04 <chr>, TK002_01 <chr>,
## # TK002_02 <chr>, TK002_03 <chr>, TK002_04 <chr>, TK003_01 <chr>,
## # TK003_02 <chr>, TK003_03 <chr>, TK003_04 <chr>, TK004_01 <chr>,
## # TK004_02 <chr>, TK004_03 <chr>, TK004_04 <chr>, TK005_01 <chr>, …
head(litdata)
## # A tibble: 6 × 66
## id submitd…¹ lastp…² start…³ seed start…⁴ dates…⁵ W001 W002 W003 W004
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 10/25/20… 5 en 6.65e8 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 2 2 <NA> NA de 3.34e8 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 3 3 <NA> NA de 6.84e8 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## 4 5 <NA> 2 de 2.08e9 10/25/… 10/25/… Stim… Stim… 4 Stim…
## 5 6 <NA> 2 en 4.38e8 10/25/… 10/25/… 3 3 3 Stim…
## 6 7 <NA> NA de 2.15e9 10/25/… 10/25/… <NA> <NA> <NA> <NA>
## # … with 55 more variables: W005 <chr>, W006 <chr>, W007 <chr>, W008 <chr>,
## # W009 <chr>, K001 <chr>, K002 <chr>, K003 <chr>, K004 <chr>, K005 <chr>,
## # K006 <chr>, K007 <chr>, K008 <chr>, K009 <chr>, TK001_01 <chr>,
## # TK001_02 <chr>, TK001_03 <chr>, TK001_04 <chr>, TK002_01 <chr>,
## # TK002_02 <chr>, TK002_03 <chr>, TK002_04 <chr>, TK003_01 <chr>,
## # TK003_02 <chr>, TK003_03 <chr>, TK003_04 <chr>, TK004_01 <chr>,
## # TK004_02 <chr>, TK004_03 <chr>, TK004_04 <chr>, TK005_01 <chr>, …
litdata <- litdata %>%
mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
mutate_all(~ replace(., . == "Keine Antwort-", NaN))
The following code will NOT be run. The Idea is to show a way to automatically edit all columns. It works but some columns are NOT numeric.
# All colnames that exist
litdataColnames <- colnames(litdata)
# the ones we don't want to change
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
# the colnames that should be changed
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
print(litdataColsToMakeNumeric)
litdataColsToMakeNumeric <- c("R1")
for (col in litdataColsToMakeNumeric) {
litdata[[col]] <- as.numeric(litdata[[col]])
}
First we rename all the columns
litdata <- litdata %>% rename(
"A1" = "W001",
"A2" = "W002",
"A3" = "W003",
"A4" = "W004",
"A5" = "W005",
"A6" = "W006",
"A7" = "W007",
"A8" = "W008",
"A9" = "W009",
"B1" = "K001",
"B2" = "K002",
"B3" = "K003",
"B4" = "K004",
"B5" = "K005",
"B6" = "K006",
"B7" = "K007",
"B8" = "K008",
"B9" = "K009",
"C1_1" = "TK001_01",
"C1_2" = "TK001_02",
"C1_3" = "TK001_03",
"C1_4" = "TK001_04",
"C2_1" = "TK002_01",
"C2_2" = "TK002_02",
"C2_3" = "TK002_03",
"C2_4" = "TK002_04",
"C3_1" = "TK003_01",
"C3_2" = "TK003_02",
"C3_3" = "TK003_03",
"C3_4" = "TK003_04",
"C4_1" = "TK004_01",
"C4_2" = "TK004_02",
"C4_3" = "TK004_03",
"C4_4" = "TK004_04",
"C5_1" = "TK005_01",
"C5_2" = "TK005_02",
"C5_3" = "TK005_03",
"C5_4" = "TK005_04",
"C6_1" = "TK006_01",
"C6_2" = "TK006_02",
"C6_3" = "TK006_03",
"C6_4" = "TK006_04",
"D1_1" = "H001_001",
"D1_2" = "H001_002",
"D1_3" = "H001_003",
"D1_4" = "H001_004",
"D1_5" = "H001_005",
"D1_6" = "H001_006",
"D1_7" = "H001_007",
"D2" = "H002",
"D3" = "H003",
"D4" = "H004",
"D4_comment" = "H004_other",
"D5" = "H005",
"D5_comment" = "H005_other",
"D6" = "H006",
"D7" = "H007",
"D8" = "H008",
"E1" = "R1"
)
Then we change the datatype and fix the values
litdata$A1 <- as.numeric(litdata$A1)
litdata$A2 <- as.numeric(litdata$A2)
litdata$A3 <- as.numeric(litdata$A3)
litdata$A4 <- as.numeric(litdata$A4)
litdata$A5 <- as.numeric(litdata$A5)
litdata$A6 <- as.numeric(litdata$A6)
litdata$A7 <- as.numeric(litdata$A7)
litdata$A8 <- as.numeric(litdata$A8)
litdata$A9 <- as.numeric(litdata$A9)
litdata$B1 <- as.numeric(litdata$B1)
litdata$B2 <- as.numeric(litdata$B2)
litdata$B3 <- as.numeric(litdata$B3)
litdata$B4 <- as.numeric(litdata$B4)
litdata$B5 <- as.numeric(litdata$B5)
litdata$B6 <- as.numeric(litdata$B6)
litdata$B7 <- as.numeric(litdata$B7)
litdata$B8 <- as.numeric(litdata$B8)
litdata$B9 <- as.numeric(litdata$B9)
litdata$C1_1 <- as.numeric(litdata$C1_1)
litdata$C1_2 <- as.numeric(litdata$C1_2)
litdata$C1_3 <- as.numeric(litdata$C1_3)
litdata$C1_4 <- as.numeric(litdata$C1_4)
litdata$C2_1 <- as.numeric(litdata$C2_1)
litdata$C2_2 <- as.numeric(litdata$C2_2)
litdata$C2_3 <- as.numeric(litdata$C2_3)
litdata$C2_4 <- as.numeric(litdata$C2_4)
litdata$C3_1 <- as.numeric(litdata$C3_1)
litdata$C3_2 <- as.numeric(litdata$C3_2)
litdata$C3_3 <- as.numeric(litdata$C3_3)
litdata$C3_4 <- as.numeric(litdata$C3_4)
litdata$C4_1 <- as.numeric(litdata$C4_1)
litdata$C4_2 <- as.numeric(litdata$C4_2)
litdata$C4_3 <- as.numeric(litdata$C4_3)
litdata$C4_4 <- as.numeric(litdata$C4_4)
litdata$C5_1 <- as.numeric(litdata$C5_1)
litdata$C5_2 <- as.numeric(litdata$C5_2)
litdata$C5_3 <- as.numeric(litdata$C5_3)
litdata$C5_4 <- as.numeric(litdata$C5_4)
litdata$C6_1 <- as.numeric(litdata$C6_1)
litdata$C6_2 <- as.numeric(litdata$C6_2)
litdata$C6_3 <- as.numeric(litdata$C6_3)
litdata$C6_4 <- as.numeric(litdata$C6_4)
litdata <- litdata %>% mutate(D1_1 = ifelse(D1_1 == "Ja", TRUE, ifelse(D1_1 == "Nicht Gewählt", FALSE, D1_1)))
litdata$D1_1 <- as.logical(litdata$D1_1)
litdata <- litdata %>% mutate(D1_2 = ifelse(D1_2 == "Ja", TRUE, ifelse(D1_2 == "Nicht Gewählt", FALSE, D1_2)))
litdata$D1_2 <- as.logical(litdata$D1_2)
litdata <- litdata %>% mutate(D1_3 = ifelse(D1_3 == "Ja", TRUE, ifelse(D1_3 == "Nicht Gewählt", FALSE, D1_3)))
litdata$D1_3 <- as.logical(litdata$D1_3)
litdata <- litdata %>% mutate(D1_4 = ifelse(D1_4 == "Ja", TRUE, ifelse(D1_4 == "Nicht Gewählt", FALSE, D1_4)))
litdata$D1_4 <- as.logical(litdata$D1_4)
litdata <- litdata %>% mutate(D1_5 = ifelse(D1_5 == "Ja", TRUE, ifelse(D1_5 == "Nicht Gewählt", FALSE, D1_5)))
litdata$D1_5 <- as.logical(litdata$D1_5)
litdata <- litdata %>% mutate(D1_6 = ifelse(D1_6 == "Ja", TRUE, ifelse(D1_6 == "Nicht Gewählt", FALSE, D1_6)))
litdata$D1_6 <- as.logical(litdata$D1_6)
litdata <- litdata %>% mutate(D1_7 = ifelse(D1_7 == "Ja", TRUE, ifelse(D1_7 == "Nicht Gewählt", FALSE, D1_7)))
litdata$D1_7 <- as.logical(litdata$D1_7)
litdata <- litdata %>% mutate(D2 = ifelse(D2 == "Ja", TRUE, ifelse(D2 == "Nein", FALSE, D2)))
litdata$D2 <- as.logical(litdata$D2)
# skipping D3 because it's just a free text
litdata$D4 <- as.factor(litdata$D4)
# skipping D4_comment because it's a free text
litdata$D5 <- as.factor(litdata$D5)
# skipping D5_comment because it's a free text
# can't be a number as there is a 2010 or earlier option.
litdata$D6 <- as.factor(litdata$D6)
litdata$D7 <- as.numeric(litdata$D7)
litdata$D8 <- as.factor(litdata$D8)
# skipping E1 because it's a free text
summary(litdata)
## id submitdate lastpage startlanguage
## Min. : 1 Length:313 Min. :-1.000 Length:313
## 1st Qu.: 81 Class :character 1st Qu.: 2.000 Class :character
## Median :162 Mode :character Median : 5.000 Mode :character
## Mean :163 Mean : 3.556
## 3rd Qu.:245 3rd Qu.: 5.000
## Max. :327 Max. : 5.000
## NA's :108
## seed startdate datestamp A1
## Min. :5.647e+06 Length:313 Length:313 Min. :2.000
## 1st Qu.:5.568e+08 Class :character Class :character 1st Qu.:4.000
## Median :1.086e+09 Mode :character Mode :character Median :5.000
## Mean :1.081e+09 Mean :4.515
## 3rd Qu.:1.637e+09 3rd Qu.:5.000
## Max. :2.147e+09 Max. :5.000
## NA's :117
## A2 A3 A4 A5
## Min. :1.000 Min. :2.000 Min. :1.000 Min. :2.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :5.000 Median :5.000
## Mean :3.955 Mean :4.246 Mean :4.523 Mean :4.411
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :114 NA's :114 NA's :114 NA's :116
## A6 A7 A8 A9
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :5.000 Median :4.000 Median :4.000
## Mean :4.347 Mean :4.824 Mean :3.581 Mean :3.663
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :117 NA's :114 NA's :122 NA's :120
## B1 B2 B3 B4
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :3.293 Mean :2.851 Mean :2.863 Mean :3.221
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :132 NA's :132 NA's :131 NA's :132
## B5 B6 B7 B8 B9
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:2.00 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :4.000 Median :3.00 Median :3.000
## Mean :3.409 Mean :2.956 Mean :3.522 Mean :2.72 Mean :2.657
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.75 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000
## NA's :132 NA's :132 NA's :131 NA's :131 NA's :132
## C1_1 C1_2 C1_3 C1_4
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:2.000
## Median :4.000 Median :4.000 Median :5.000 Median :3.000
## Mean :3.606 Mean :4.153 Mean :4.336 Mean :2.956
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :204 NA's :202 NA's :200 NA's :200
## C2_1 C2_2 C2_3 C2_4
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:2.000
## Median :3.000 Median :4.000 Median :5.000 Median :3.000
## Mean :3.409 Mean :4.055 Mean :4.279 Mean :3.071
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :203 NA's :204 NA's :202 NA's :201
## C3_1 C3_2 C3_3 C3_4 C4_1
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :4.000 Median :5.00 Median :3.000 Median :4.000
## Mean :3.495 Mean :4.189 Mean :4.42 Mean :3.321 Mean :3.759
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:5.00 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000
## NA's :202 NA's :202 NA's :201 NA's :201 NA's :201
## C4_2 C4_3 C4_4 C5_1 C5_2
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.00 1st Qu.:3.000 1st Qu.:4.000
## Median :5.000 Median :5.000 Median :3.00 Median :4.000 Median :4.000
## Mean :4.279 Mean :4.396 Mean :3.33 Mean :3.727 Mean :4.183
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.00 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000
## NA's :202 NA's :202 NA's :201 NA's :203 NA's :204
## C5_3 C5_4 C6_1 C6_2
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.000
## Median :5.000 Median :3.000 Median :4.000 Median :4.000
## Mean :4.369 Mean :3.255 Mean :3.609 Mean :4.136
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :202 NA's :203 NA's :203 NA's :203
## C6_3 C6_4 D1_1 D1_2
## Min. :1.000 Min. :1.000 Mode :logical Mode :logical
## 1st Qu.:4.000 1st Qu.:3.000 FALSE:64 FALSE:69
## Median :5.000 Median :3.000 TRUE :51 TRUE :46
## Mean :4.245 Mean :3.191 NA's :198 NA's :198
## 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000
## NA's :203 NA's :203
## D1_3 D1_4 D1_5 D1_6
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:30 FALSE:81 FALSE:108 FALSE:115
## TRUE :85 TRUE :34 TRUE :7 NA's :198
## NA's :198 NA's :198 NA's :198
##
##
##
## D1_7 D2 D3
## Mode :logical Mode :logical Length:313
## FALSE:115 FALSE:12 Class :character
## NA's :198 TRUE :103 Mode :character
## NA's :198
##
##
##
## D4
## Berufe des Managements und der Administration, des Bank- und Versicherungsgewerbes und des Rechtswesens: 29
## Gesundheits-, Lehr- und Kulturberufe, Wissenschaftler : 24
## Technische Berufe sowie Informatikberufe : 24
## Berufe des Gastgewerbes und Berufe zur Erbringung persönlicher Dienstleistungens- und Verkehrsberufe : 13
## - : 8
## (Other) : 5
## NA's :210
## D4_comment D5 D5_comment
## Length:313 Bachelor Information Science : 40 Min. : NA
## Class :character Master Information and Data Management: 10 1st Qu.: NA
## Mode :character Bachelor Multimedia Production : 8 Median : NA
## Bachelor Tourismus : 8 Mean :NaN
## Bachelor Betriebsökonomie : 7 3rd Qu.: NA
## (Other) : 40 Max. : NA
## NA's :200 NA's :313
## D6 D7 D8 E1
## 2020 : 34 Min. :1971 männlich: 39 Length:313
## 2018 : 26 1st Qu.:1991 Weiblich: 75 Class :character
## 2021 : 25 Median :1995 NA's :199 Mode :character
## 2019 : 20 Mean :1993
## 2017 : 4 3rd Qu.:1998
## (Other): 3 Max. :2002
## NA's :201 NA's :207
glimpse(litdata)
## Rows: 313
## Columns: 66
## $ id <dbl> 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, …
## $ submitdate <chr> "10/25/2021 11:07:44", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lastpage <dbl> 5, NA, NA, 2, 2, NA, NA, NA, 2, 1, NA, NA, 5, NA, NA, NA…
## $ startlanguage <chr> "en", "de", "de", "de", "en", "de", "de", "de", "en", "d…
## $ seed <dbl> 664891087, 334145431, 683903577, 2082427237, 438283320, …
## $ startdate <chr> "10/25/2021 11:07:40", "10/25/2021 11:33:06", "10/25/202…
## $ datestamp <chr> "10/25/2021 11:07:44", "10/25/2021 11:33:06", "10/25/202…
## $ A1 <dbl> NA, NA, NA, 5, 3, NA, NA, NA, 5, 3, NA, NA, 5, NA, NA, N…
## $ A2 <dbl> NA, NA, NA, 5, 3, NA, NA, NA, 4, 2, NA, NA, 5, NA, NA, N…
## $ A3 <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 4, 4, NA, NA, 5, NA, NA, N…
## $ A4 <dbl> NA, NA, NA, 5, 5, NA, NA, NA, 4, 4, NA, NA, 5, NA, NA, N…
## $ A5 <dbl> NA, NA, NA, 5, 5, NA, NA, NA, 4, 4, NA, NA, 4, NA, NA, N…
## $ A6 <dbl> NA, NA, NA, 4, 4, NA, NA, NA, 4, 4, NA, NA, 4, NA, NA, N…
## $ A7 <dbl> NA, NA, NA, 4, 5, NA, NA, NA, 5, 3, NA, NA, 5, NA, NA, N…
## $ A8 <dbl> NA, NA, NA, 5, 4, NA, NA, NA, 5, 4, NA, NA, 5, NA, NA, N…
## $ A9 <dbl> NA, NA, NA, 5, 5, NA, NA, NA, 5, 5, NA, NA, 5, NA, NA, N…
## $ B1 <dbl> NA, NA, NA, 4, 4, NA, NA, NA, 4, NA, NA, NA, 3, NA, NA, …
## $ B2 <dbl> NA, NA, NA, 3, 3, NA, NA, NA, 4, NA, NA, NA, 4, NA, NA, …
## $ B3 <dbl> NA, NA, NA, 3, 5, NA, NA, NA, 3, NA, NA, NA, 4, NA, NA, …
## $ B4 <dbl> NA, NA, NA, 3, 4, NA, NA, NA, 4, NA, NA, NA, 3, NA, NA, …
## $ B5 <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 4, NA, NA, NA, 3, NA, NA, …
## $ B6 <dbl> NA, NA, NA, 4, 4, NA, NA, NA, 4, NA, NA, NA, 4, NA, NA, …
## $ B7 <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 3, NA, NA, NA, 4, NA, NA, …
## $ B8 <dbl> NA, NA, NA, 4, 3, NA, NA, NA, 4, NA, NA, NA, 5, NA, NA, …
## $ B9 <dbl> NA, NA, NA, 4, 2, NA, NA, NA, 4, NA, NA, NA, 5, NA, NA, …
## $ C1_1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C1_2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C1_3 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C1_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C2_1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C2_2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C2_3 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C2_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C3_1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3, NA, N…
## $ C3_2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C3_3 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C3_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_3 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C4_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ C5_1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C5_2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C5_3 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C5_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_3 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, N…
## $ C6_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, NA, N…
## $ D1_1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, NA…
## $ D1_2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_3 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, NA…
## $ D1_4 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_5 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_6 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D1_7 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, FALSE, N…
## $ D2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, NA…
## $ D3 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Bibliot…
## $ D4 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Gesundh…
## $ D4_comment <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D5 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, Bachelor…
## $ D5_comment <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D6 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ D8 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, Weiblich…
## $ E1 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Die Fra…
print(litdata)
## # A tibble: 313 × 66
## id submit…¹ lastp…² start…³ seed start…⁴ dates…⁵ A1 A2 A3 A4
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 10/25/2… 5 en 6.65e8 10/25/… 10/25/… NA NA NA NA
## 2 2 <NA> NA de 3.34e8 10/25/… 10/25/… NA NA NA NA
## 3 3 <NA> NA de 6.84e8 10/25/… 10/25/… NA NA NA NA
## 4 5 <NA> 2 de 2.08e9 10/25/… 10/25/… 5 5 4 5
## 5 6 <NA> 2 en 4.38e8 10/25/… 10/25/… 3 3 3 5
## 6 7 <NA> NA de 2.15e9 10/25/… 10/25/… NA NA NA NA
## 7 8 <NA> NA de 1.74e9 10/25/… 10/25/… NA NA NA NA
## 8 9 <NA> NA de 4.64e8 10/25/… 10/25/… NA NA NA NA
## 9 10 <NA> 2 en 1.09e9 10/25/… 10/25/… 5 4 4 4
## 10 11 <NA> 1 de 1.55e9 10/25/… 10/25/… 3 2 4 4
## # … with 303 more rows, 55 more variables: A5 <dbl>, A6 <dbl>, A7 <dbl>,
## # A8 <dbl>, A9 <dbl>, B1 <dbl>, B2 <dbl>, B3 <dbl>, B4 <dbl>, B5 <dbl>,
## # B6 <dbl>, B7 <dbl>, B8 <dbl>, B9 <dbl>, C1_1 <dbl>, C1_2 <dbl>, C1_3 <dbl>,
## # C1_4 <dbl>, C2_1 <dbl>, C2_2 <dbl>, C2_3 <dbl>, C2_4 <dbl>, C3_1 <dbl>,
## # C3_2 <dbl>, C3_3 <dbl>, C3_4 <dbl>, C4_1 <dbl>, C4_2 <dbl>, C4_3 <dbl>,
## # C4_4 <dbl>, C5_1 <dbl>, C5_2 <dbl>, C5_3 <dbl>, C5_4 <dbl>, C6_1 <dbl>,
## # C6_2 <dbl>, C6_3 <dbl>, C6_4 <dbl>, D1_1 <lgl>, D1_2 <lgl>, D1_3 <lgl>, …
head(litdata)
## # A tibble: 6 × 66
## id submitd…¹ lastp…² start…³ seed start…⁴ dates…⁵ A1 A2 A3 A4
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 10/25/20… 5 en 6.65e8 10/25/… 10/25/… NA NA NA NA
## 2 2 <NA> NA de 3.34e8 10/25/… 10/25/… NA NA NA NA
## 3 3 <NA> NA de 6.84e8 10/25/… 10/25/… NA NA NA NA
## 4 5 <NA> 2 de 2.08e9 10/25/… 10/25/… 5 5 4 5
## 5 6 <NA> 2 en 4.38e8 10/25/… 10/25/… 3 3 3 5
## 6 7 <NA> NA de 2.15e9 10/25/… 10/25/… NA NA NA NA
## # … with 55 more variables: A5 <dbl>, A6 <dbl>, A7 <dbl>, A8 <dbl>, A9 <dbl>,
## # B1 <dbl>, B2 <dbl>, B3 <dbl>, B4 <dbl>, B5 <dbl>, B6 <dbl>, B7 <dbl>,
## # B8 <dbl>, B9 <dbl>, C1_1 <dbl>, C1_2 <dbl>, C1_3 <dbl>, C1_4 <dbl>,
## # C2_1 <dbl>, C2_2 <dbl>, C2_3 <dbl>, C2_4 <dbl>, C3_1 <dbl>, C3_2 <dbl>,
## # C3_3 <dbl>, C3_4 <dbl>, C4_1 <dbl>, C4_2 <dbl>, C4_3 <dbl>, C4_4 <dbl>,
## # C5_1 <dbl>, C5_2 <dbl>, C5_3 <dbl>, C5_4 <dbl>, C6_1 <dbl>, C6_2 <dbl>,
## # C6_3 <dbl>, C6_4 <dbl>, D1_1 <lgl>, D1_2 <lgl>, D1_3 <lgl>, D1_4 <lgl>, …
Berechnen Sie die Häufigkeiten für die Variablen W003, K003, H001_001, H005, H007 und H008.
displayFunction1 <- function(table, column) {
tmp <- table[column]
tmp <- rename(tmp, value = all_of(column))
tmp <- tmp %>%
count(value) %>%
mutate(percentage = prop.table(n)*100)
print(tmp, n = 100)
ggplot(tmp,
aes(x = value, y=n)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
displayFunction1(litdata, "A3")
## # A tibble: 6 × 3
## value n percentage
## <dbl> <int> <dbl>
## 1 2 6 1.92
## 2 3 29 9.27
## 3 4 74 23.6
## 4 5 90 28.8
## 5 NA 112 35.8
## 6 NaN 2 0.639
## Warning: Removed 2 rows containing missing values (`position_stack()`).
displayFunction1(litdata, "B3")
## # A tibble: 7 × 3
## value n percentage
## <dbl> <int> <dbl>
## 1 1 16 5.11
## 2 2 47 15.0
## 3 3 71 22.7
## 4 4 42 13.4
## 5 5 6 1.92
## 6 NA 130 41.5
## 7 NaN 1 0.319
## Warning: Removed 2 rows containing missing values (`position_stack()`).
displayFunction1(litdata, "D1_1")
## # A tibble: 3 × 3
## value n percentage
## <lgl> <int> <dbl>
## 1 FALSE 64 20.4
## 2 TRUE 51 16.3
## 3 NA 198 63.3
displayFunction1(litdata, "D5")
## # A tibble: 18 × 3
## value n percentage
## <fct> <int> <dbl>
## 1 Bachelor Architektur 1 0.319
## 2 Bachelor Bauingenieurwesen 2 0.639
## 3 Bachelor Betriebsökonomie 7 2.24
## 4 Bachelor Computational and Data Science 2 0.639
## 5 Bachelor Digital Business Management 6 1.92
## 6 Bachelor Information Science 40 12.8
## 7 Bachelor Mobile Robotics 1 0.319
## 8 Bachelor Multimedia Production 8 2.56
## 9 Bachelor Photonics 3 0.958
## 10 Bachelor Service Innovation and Design 5 1.60
## 11 Bachelor Sport Management 7 2.24
## 12 Bachelor Tourismus 8 2.56
## 13 CAS Sport Management 4.0 1 0.319
## 14 MAS Information Science 1 0.319
## 15 Master Information and Data Management 10 3.19
## 16 Master New Business 7 2.24
## 17 Master Tourism and Change 4 1.28
## 18 <NA> 200 63.9
displayFunction1(litdata, "D7")
## # A tibble: 24 × 3
## value n percentage
## <dbl> <int> <dbl>
## 1 1971 1 0.319
## 2 1972 1 0.319
## 3 1973 1 0.319
## 4 1974 3 0.958
## 5 1980 1 0.319
## 6 1984 1 0.319
## 7 1985 4 1.28
## 8 1987 5 1.60
## 9 1988 3 0.958
## 10 1989 2 0.639
## 11 1990 4 1.28
## 12 1991 5 1.60
## 13 1992 6 1.92
## 14 1993 8 2.56
## 15 1994 2 0.639
## 16 1995 10 3.19
## 17 1996 3 0.958
## 18 1997 14 4.47
## 19 1998 15 4.79
## 20 1999 7 2.24
## 21 2000 7 2.24
## 22 2001 2 0.639
## 23 2002 1 0.319
## 24 NA 207 66.1
## Warning: Removed 1 rows containing missing values (`position_stack()`).
Die Warnung resultiert daraus, dass es sehr viele NA gibt.
displayFunction1(litdata, "D8")
## # A tibble: 3 × 3
## value n percentage
## <fct> <int> <dbl>
## 1 männlich 39 12.5
## 2 Weiblich 75 24.0
## 3 <NA> 199 63.6
We have the year 2021 ## Preparation
birthyears <- litdata$D7
# remove NAs
birthyears <- birthyears[!is.na(birthyears)]
age <- 2021-birthyears
freq(age)
## frequency percent frequency_sum percent_sum
## 19 1 0.94 1 0.94
## 20 2 1.89 3 2.83
## 21 7 6.60 10 9.43
## 22 7 6.60 17 16.04
## 23 15 14.15 32 30.19
## 24 14 13.21 46 43.40
## 25 3 2.83 49 46.23
## 26 10 9.43 59 55.66
## 27 2 1.89 61 57.55
## 28 8 7.55 69 65.09
## 29 6 5.66 75 70.75
## 30 5 4.72 80 75.47
## 31 4 3.77 84 79.25
## 32 2 1.89 86 81.13
## 33 3 2.83 89 83.96
## 34 5 4.72 94 88.68
## 36 4 3.77 98 92.45
## 37 1 0.94 99 93.40
## 41 1 0.94 100 94.34
## 47 3 2.83 103 97.17
## 48 1 0.94 104 98.11
## 49 1 0.94 105 99.06
## 50 1 0.94 106 100.00
## count percent
## valid 106 100
## NA 0 0
## Total 106 100
# with NA
freq(2021-litdata$D7)
## frequency percent frequency_sum percent_sum
## 19 1 0.94 1 0.94
## 20 2 1.89 3 2.83
## 21 7 6.60 10 9.43
## 22 7 6.60 17 16.04
## 23 15 14.15 32 30.19
## 24 14 13.21 46 43.40
## 25 3 2.83 49 46.23
## 26 10 9.43 59 55.66
## 27 2 1.89 61 57.55
## 28 8 7.55 69 65.09
## 29 6 5.66 75 70.75
## 30 5 4.72 80 75.47
## 31 4 3.77 84 79.25
## 32 2 1.89 86 81.13
## 33 3 2.83 89 83.96
## 34 5 4.72 94 88.68
## 36 4 3.77 98 92.45
## 37 1 0.94 99 93.40
## 41 1 0.94 100 94.34
## 47 3 2.83 103 97.17
## 48 1 0.94 104 98.11
## 49 1 0.94 105 99.06
## 50 1 0.94 106 100.00
## count percent
## valid 106 33.87
## NA 207 66.13
## Total 313 100.00
https://www.beratung-statistik.de/statistik-beratung-infos/r-tutorial/deskriptive-statistik-r/
# own method
getmode(age)
## [1] 23
median(age)
## [1] 26
mean(age)
## [1] 27.78302
max(age)-min(age)
## [1] 31
IQR(age)
## [1] 7
# just for fun
summary(age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 23.00 26.00 27.78 30.00 50.00
var(age)
## [1] 45.04771
sd(age)
## [1] 6.711759
library(moments)
skewness(age)
## [1] 1.572229
Die Kennzahl Schiefe ist wird Null bei einer perfekt symmetrischen Verteilung, größer als Null bei einer rechtsschiefen und kleiner als Null bei einer linksschiefen Verteilung. https://www.beratung-statistik.de/statistik-beratung-infos/r-tutorial/deskriptive-statistik-r/
kurtosis(age)
## [1] 5.383429
Eine weitere bekannte Kennzahl ist die Kurtosis. Um eine Vorstellung von der Bedeutung der Kurtosis zu erhalten, betrachten Sie nachfolgende Graphik.
In dieser Graphik sind eine Normalverteilung, sowie eine steilgipflige (aka leptokurtisch) und eine flachgipflige (aka platykurtisch) dargestellt. Die steilgipflige Verteilung ist in der Mitte spitzer als die Normalverteilung und an den Rändern breiter. Bei der flachgipligen Verteilung ist es anders herum. Die Kurtosis ist nun eine Kennzahl, mit der untersucht wird, ob eine Verteilung im Vergleich zur Normalverteilung flachgipflig oder steilgipflig ist:
- Für eine Normalverteilung nimmt die Kurtosis genau den Wert 3 an.
- Eine steilgipflige Verteilung hat eine Kurtosis, die größer als 3 ist.
- Für eine flachgipflige Verteilung ist die Kurtosis kleiner als 3.
- Beachten Sie: Anstatt der Kurtosis wird häufig auch der sogenannte Exzess verwendet. Dies ist eine weitere Kennzahl, die definiert ist durch die Formel: Exzess = Kurtosis - 3.
- Der Exzess ist somit größer als Null, wenn die Verteilung steilgipflig ist, und kleiner als Null bei einer flachgipfligen Verteilung.
Frech kopiert von: https://www.beratung-statistik.de/statistik-beratung-infos/r-tutorial/deskriptive-statistik-r/
qqnorm(age)
qqline(age)
hist(age)
hist(age, breaks = 3)
hist(age, breaks = 5)
hist(age, breaks = 7)
hist(age, breaks = 10)
hist(age, breaks = 15)
hist(age, breaks = 20)
hist(age, breaks = 30)
Auf der Grundlage von Daten aus einer Schweizer Schüllererhebung wird aus verschiedenen Variablen (z.B. Angaben zum Beruf der Eltern, zur Elternausbildung sowie zur Anzahl von Bücchern zu Hause) ein Index zur sozialen Herkunft erstellt. Dieser Index erscheint in einer neu gebildeten numerischen Variable im Datensatz, gibt also für jeden Fall in diesem Datensatz einen Skalenwert zur sozialen Herkunft an. Die neu gebildete Skala läuft von 0 (Wert mit der geringsten Ausprägung) bis 10 (Wert mit der höchsten Ausprägung).
Für die gesamte Schweiz liegt der arithmetische Mittelwert auf dieser Skala bei 5.6 und die Standardabweichung beträgt 1.8 (Zahlen sind von mir frei erfunden!). Die Verteilung entspricht einer Normalverteilung.
Der Mittelwert der Verteilung der Bündner Schüler liegt etwas tiefer als in der Gesamtschweiz, nämlich bei 5.1 mit einer Standardabweichung von 2.
Wo in der Verteilung der Schweiz liegt der Bündner Mittelwert, bzw. wie viele Schweizer Schüler haben bzgl. der sozialen Herkunft einen tieferen Wert als der typische Bündner Schüler?
pnorm(5.1, mean = 5.6, sd = 1.8)
## [1] 0.3905915
In einem Bündner Ort beträgt der Mittelwert auf der Skala zur sozialen Herkunft 5.6, er ist also genauso hoch wie in der Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem Wert darunter?
pnorm(5.6, mean = 5.1, sd=2.0)
## [1] 0.5987063