From 1871414592acb8d0e5fc4e6acad92386e80cd2e4 Mon Sep 17 00:00:00 2001 From: Marc Gauch <34353267+marcgauch@users.noreply.github.com> Date: Fri, 25 Nov 2022 16:31:02 +0100 Subject: [PATCH] A bit improved --- report.html | 151 +++++++++++++++++++++++++++------------------------- report.rmd | 31 +++++------ 2 files changed, 94 insertions(+), 88 deletions(-) diff --git a/report.html b/report.html index a93971f..063db62 100644 --- a/report.html +++ b/report.html @@ -11,7 +11,7 @@ - + Quantitative Methods HS22 @@ -1487,7 +1487,7 @@ border-radius: 0px;

Quantitative Methods HS22

Marc Gauch

-

2022-11-18

+

2022-11-25

@@ -1508,26 +1508,27 @@ if (!require(moments)) {

2.1 Frequency Table ordered from wish.com

-
freq <- function(data) {
+
freq <- function(data, rounded_digits = 2) {
+  # counts
+  total_count <- length(data)
   na_count <- length(data[is.na(data)])
-  valid_count <- length(data) - na_count
+  valid_count <- total_count - na_count
+  
   frequency <- table(data)
   p <- prop.table(frequency)
-  percent <- round(p * 100, digits = 2)
-  frequency_sum <- cumsum(frequency)
-  hkum <- cumsum(p)
-  percent_sum <- round(hkum * 100, digits = 2)
-  freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
-  valid_percent <- round(valid_count / length(data) * 100, digits = 2)
-  na_percent <- round(na_count / length(data) * 100, digits = 2)
+  valid_percent <- round(p * 100, digits = rounded_digits)
+  na_percent <- round(na_count / length(data) * 100, digits = rounded_digits)
+  
+  percent <- round(frequency/total_count*100, digits = rounded_digits)
+  cumulative_percent <- round(cumsum(p) * 100, digits = rounded_digits)
+  freq_table <- cbind(frequency, percent, valid_percent, cumulative_percent)
 
+  valid_percent_sum <- sum(as.data.frame(freq_table)$percent)
+  Valid_Total <- c(valid_count, valid_percent_sum, 100, NaN)
 
-  print(freq_table)
-
-  count <- c(valid_count, na_count, valid_count + na_count)
-  percent <- c(valid_percent, na_percent, valid_percent + na_percent)
-  df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
-  print(df)
+  NAs <- c(na_count, na_percent, NaN, NaN)
+  Total <- c(total_count, 100, NaN, NaN)
+  print(rbind(freq_table, Valid_Total, NAs, Total))
 }

Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted

@@ -2395,64 +2396,62 @@ age <- 2021 - birthyears

6.1 Frequency

freq(age)
-
##    frequency percent frequency_sum percent_sum
-## 19         1    0.94             1        0.94
-## 20         2    1.89             3        2.83
-## 21         7    6.60            10        9.43
-## 22         7    6.60            17       16.04
-## 23        15   14.15            32       30.19
-## 24        14   13.21            46       43.40
-## 25         3    2.83            49       46.23
-## 26        10    9.43            59       55.66
-## 27         2    1.89            61       57.55
-## 28         8    7.55            69       65.09
-## 29         6    5.66            75       70.75
-## 30         5    4.72            80       75.47
-## 31         4    3.77            84       79.25
-## 32         2    1.89            86       81.13
-## 33         3    2.83            89       83.96
-## 34         5    4.72            94       88.68
-## 36         4    3.77            98       92.45
-## 37         1    0.94            99       93.40
-## 41         1    0.94           100       94.34
-## 47         3    2.83           103       97.17
-## 48         1    0.94           104       98.11
-## 49         1    0.94           105       99.06
-## 50         1    0.94           106      100.00
-##       count percent
-## valid   106     100
-## NA        0       0
-## Total   106     100
+
##             frequency percent valid_percent cumulative_percent
+## 19                  1    0.94          0.94               0.94
+## 20                  2    1.89          1.89               2.83
+## 21                  7    6.60          6.60               9.43
+## 22                  7    6.60          6.60              16.04
+## 23                 15   14.15         14.15              30.19
+## 24                 14   13.21         13.21              43.40
+## 25                  3    2.83          2.83              46.23
+## 26                 10    9.43          9.43              55.66
+## 27                  2    1.89          1.89              57.55
+## 28                  8    7.55          7.55              65.09
+## 29                  6    5.66          5.66              70.75
+## 30                  5    4.72          4.72              75.47
+## 31                  4    3.77          3.77              79.25
+## 32                  2    1.89          1.89              81.13
+## 33                  3    2.83          2.83              83.96
+## 34                  5    4.72          4.72              88.68
+## 36                  4    3.77          3.77              92.45
+## 37                  1    0.94          0.94              93.40
+## 41                  1    0.94          0.94              94.34
+## 47                  3    2.83          2.83              97.17
+## 48                  1    0.94          0.94              98.11
+## 49                  1    0.94          0.94              99.06
+## 50                  1    0.94          0.94             100.00
+## Valid_Total       106   99.98        100.00                NaN
+## NAs                 0    0.00           NaN                NaN
+## Total             106  100.00           NaN                NaN
# with NA
 freq(2021 - litdata$D7)
-
##    frequency percent frequency_sum percent_sum
-## 19         1    0.94             1        0.94
-## 20         2    1.89             3        2.83
-## 21         7    6.60            10        9.43
-## 22         7    6.60            17       16.04
-## 23        15   14.15            32       30.19
-## 24        14   13.21            46       43.40
-## 25         3    2.83            49       46.23
-## 26        10    9.43            59       55.66
-## 27         2    1.89            61       57.55
-## 28         8    7.55            69       65.09
-## 29         6    5.66            75       70.75
-## 30         5    4.72            80       75.47
-## 31         4    3.77            84       79.25
-## 32         2    1.89            86       81.13
-## 33         3    2.83            89       83.96
-## 34         5    4.72            94       88.68
-## 36         4    3.77            98       92.45
-## 37         1    0.94            99       93.40
-## 41         1    0.94           100       94.34
-## 47         3    2.83           103       97.17
-## 48         1    0.94           104       98.11
-## 49         1    0.94           105       99.06
-## 50         1    0.94           106      100.00
-##       count percent
-## valid   106   33.87
-## NA      207   66.13
-## Total   313  100.00
+
##             frequency percent valid_percent cumulative_percent
+## 19                  1    0.32          0.94               0.94
+## 20                  2    0.64          1.89               2.83
+## 21                  7    2.24          6.60               9.43
+## 22                  7    2.24          6.60              16.04
+## 23                 15    4.79         14.15              30.19
+## 24                 14    4.47         13.21              43.40
+## 25                  3    0.96          2.83              46.23
+## 26                 10    3.19          9.43              55.66
+## 27                  2    0.64          1.89              57.55
+## 28                  8    2.56          7.55              65.09
+## 29                  6    1.92          5.66              70.75
+## 30                  5    1.60          4.72              75.47
+## 31                  4    1.28          3.77              79.25
+## 32                  2    0.64          1.89              81.13
+## 33                  3    0.96          2.83              83.96
+## 34                  5    1.60          4.72              88.68
+## 36                  4    1.28          3.77              92.45
+## 37                  1    0.32          0.94              93.40
+## 41                  1    0.32          0.94              94.34
+## 47                  3    0.96          2.83              97.17
+## 48                  1    0.32          0.94              98.11
+## 49                  1    0.32          0.94              99.06
+## 50                  1    0.32          0.94             100.00
+## Valid_Total       106   33.89        100.00                NaN
+## NAs               207   66.13           NaN                NaN
+## Total             313  100.00           NaN                NaN

6.2 Selbststudium 2

@@ -2550,6 +2549,12 @@ qqline(age)

6.2.11 Historam for age

+
+

Frequency

+
hist(age, freq = F)
+lines(density(age), lwd = 2, col = "black")
+

+

Auto Breaks

hist(age)
diff --git a/report.rmd b/report.rmd index 854b98f..00d1c7e 100644 --- a/report.rmd +++ b/report.rmd @@ -34,26 +34,27 @@ if (!require(moments)) { ## Frequency Table ordered from wish.com ```{r} -freq <- function(data) { +freq <- function(data, rounded_digits = 2) { + # counts + total_count <- length(data) na_count <- length(data[is.na(data)]) - valid_count <- length(data) - na_count + valid_count <- total_count - na_count + frequency <- table(data) p <- prop.table(frequency) - percent <- round(p * 100, digits = 2) - frequency_sum <- cumsum(frequency) - hkum <- cumsum(p) - percent_sum <- round(hkum * 100, digits = 2) - freq_table <- cbind(frequency, percent, frequency_sum, percent_sum) - valid_percent <- round(valid_count / length(data) * 100, digits = 2) - na_percent <- round(na_count / length(data) * 100, digits = 2) + valid_percent <- round(p * 100, digits = rounded_digits) + na_percent <- round(na_count / length(data) * 100, digits = rounded_digits) + + percent <- round(frequency/total_count*100, digits = rounded_digits) + cumulative_percent <- round(cumsum(p) * 100, digits = rounded_digits) + freq_table <- cbind(frequency, percent, valid_percent, cumulative_percent) + valid_percent_sum <- sum(as.data.frame(freq_table)$percent) + Valid_Total <- c(valid_count, valid_percent_sum, 100, NaN) - print(freq_table) - - count <- c(valid_count, na_count, valid_count + na_count) - percent <- c(valid_percent, na_percent, valid_percent + na_percent) - df <- data.frame(count, percent, row.names = c("valid", "NA", "Total")) - print(df) + NAs <- c(na_count, na_percent, NaN, NaN) + Total <- c(total_count, 100, NaN, NaN) + print(rbind(freq_table, Valid_Total, NAs, Total)) } ``` *Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted*