From 1871414592acb8d0e5fc4e6acad92386e80cd2e4 Mon Sep 17 00:00:00 2001 From: Marc Gauch <34353267+marcgauch@users.noreply.github.com> Date: Fri, 25 Nov 2022 16:31:02 +0100 Subject: [PATCH] A bit improved --- report.html | 151 +++++++++++++++++++++++++++------------------------- report.rmd | 31 +++++------ 2 files changed, 94 insertions(+), 88 deletions(-) diff --git a/report.html b/report.html index a93971f..063db62 100644 --- a/report.html +++ b/report.html @@ -11,7 +11,7 @@ - +
freq <- function(data) {
+freq <- function(data, rounded_digits = 2) {
+ # counts
+ total_count <- length(data)
na_count <- length(data[is.na(data)])
- valid_count <- length(data) - na_count
+ valid_count <- total_count - na_count
+
frequency <- table(data)
p <- prop.table(frequency)
- percent <- round(p * 100, digits = 2)
- frequency_sum <- cumsum(frequency)
- hkum <- cumsum(p)
- percent_sum <- round(hkum * 100, digits = 2)
- freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
- valid_percent <- round(valid_count / length(data) * 100, digits = 2)
- na_percent <- round(na_count / length(data) * 100, digits = 2)
+ valid_percent <- round(p * 100, digits = rounded_digits)
+ na_percent <- round(na_count / length(data) * 100, digits = rounded_digits)
+
+ percent <- round(frequency/total_count*100, digits = rounded_digits)
+ cumulative_percent <- round(cumsum(p) * 100, digits = rounded_digits)
+ freq_table <- cbind(frequency, percent, valid_percent, cumulative_percent)
+ valid_percent_sum <- sum(as.data.frame(freq_table)$percent)
+ Valid_Total <- c(valid_count, valid_percent_sum, 100, NaN)
- print(freq_table)
-
- count <- c(valid_count, na_count, valid_count + na_count)
- percent <- c(valid_percent, na_percent, valid_percent + na_percent)
- df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
- print(df)
+ NAs <- c(na_count, na_percent, NaN, NaN)
+ Total <- c(total_count, 100, NaN, NaN)
+ print(rbind(freq_table, Valid_Total, NAs, Total))
}
Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret
and adapted
@@ -2395,64 +2396,62 @@ age <- 2021 - birthyears
freq(age)
-## frequency percent frequency_sum percent_sum
-## 19 1 0.94 1 0.94
-## 20 2 1.89 3 2.83
-## 21 7 6.60 10 9.43
-## 22 7 6.60 17 16.04
-## 23 15 14.15 32 30.19
-## 24 14 13.21 46 43.40
-## 25 3 2.83 49 46.23
-## 26 10 9.43 59 55.66
-## 27 2 1.89 61 57.55
-## 28 8 7.55 69 65.09
-## 29 6 5.66 75 70.75
-## 30 5 4.72 80 75.47
-## 31 4 3.77 84 79.25
-## 32 2 1.89 86 81.13
-## 33 3 2.83 89 83.96
-## 34 5 4.72 94 88.68
-## 36 4 3.77 98 92.45
-## 37 1 0.94 99 93.40
-## 41 1 0.94 100 94.34
-## 47 3 2.83 103 97.17
-## 48 1 0.94 104 98.11
-## 49 1 0.94 105 99.06
-## 50 1 0.94 106 100.00
-## count percent
-## valid 106 100
-## NA 0 0
-## Total 106 100
+## frequency percent valid_percent cumulative_percent
+## 19 1 0.94 0.94 0.94
+## 20 2 1.89 1.89 2.83
+## 21 7 6.60 6.60 9.43
+## 22 7 6.60 6.60 16.04
+## 23 15 14.15 14.15 30.19
+## 24 14 13.21 13.21 43.40
+## 25 3 2.83 2.83 46.23
+## 26 10 9.43 9.43 55.66
+## 27 2 1.89 1.89 57.55
+## 28 8 7.55 7.55 65.09
+## 29 6 5.66 5.66 70.75
+## 30 5 4.72 4.72 75.47
+## 31 4 3.77 3.77 79.25
+## 32 2 1.89 1.89 81.13
+## 33 3 2.83 2.83 83.96
+## 34 5 4.72 4.72 88.68
+## 36 4 3.77 3.77 92.45
+## 37 1 0.94 0.94 93.40
+## 41 1 0.94 0.94 94.34
+## 47 3 2.83 2.83 97.17
+## 48 1 0.94 0.94 98.11
+## 49 1 0.94 0.94 99.06
+## 50 1 0.94 0.94 100.00
+## Valid_Total 106 99.98 100.00 NaN
+## NAs 0 0.00 NaN NaN
+## Total 106 100.00 NaN NaN
# with NA
freq(2021 - litdata$D7)
-## frequency percent frequency_sum percent_sum
-## 19 1 0.94 1 0.94
-## 20 2 1.89 3 2.83
-## 21 7 6.60 10 9.43
-## 22 7 6.60 17 16.04
-## 23 15 14.15 32 30.19
-## 24 14 13.21 46 43.40
-## 25 3 2.83 49 46.23
-## 26 10 9.43 59 55.66
-## 27 2 1.89 61 57.55
-## 28 8 7.55 69 65.09
-## 29 6 5.66 75 70.75
-## 30 5 4.72 80 75.47
-## 31 4 3.77 84 79.25
-## 32 2 1.89 86 81.13
-## 33 3 2.83 89 83.96
-## 34 5 4.72 94 88.68
-## 36 4 3.77 98 92.45
-## 37 1 0.94 99 93.40
-## 41 1 0.94 100 94.34
-## 47 3 2.83 103 97.17
-## 48 1 0.94 104 98.11
-## 49 1 0.94 105 99.06
-## 50 1 0.94 106 100.00
-## count percent
-## valid 106 33.87
-## NA 207 66.13
-## Total 313 100.00
+## frequency percent valid_percent cumulative_percent
+## 19 1 0.32 0.94 0.94
+## 20 2 0.64 1.89 2.83
+## 21 7 2.24 6.60 9.43
+## 22 7 2.24 6.60 16.04
+## 23 15 4.79 14.15 30.19
+## 24 14 4.47 13.21 43.40
+## 25 3 0.96 2.83 46.23
+## 26 10 3.19 9.43 55.66
+## 27 2 0.64 1.89 57.55
+## 28 8 2.56 7.55 65.09
+## 29 6 1.92 5.66 70.75
+## 30 5 1.60 4.72 75.47
+## 31 4 1.28 3.77 79.25
+## 32 2 0.64 1.89 81.13
+## 33 3 0.96 2.83 83.96
+## 34 5 1.60 4.72 88.68
+## 36 4 1.28 3.77 92.45
+## 37 1 0.32 0.94 93.40
+## 41 1 0.32 0.94 94.34
+## 47 3 0.96 2.83 97.17
+## 48 1 0.32 0.94 98.11
+## 49 1 0.32 0.94 99.06
+## 50 1 0.32 0.94 100.00
+## Valid_Total 106 33.89 100.00 NaN
+## NAs 207 66.13 NaN NaN
+## Total 313 100.00 NaN NaN
hist(age, freq = F)
+lines(density(age), lwd = 2, col = "black")
+
+hist(age)
diff --git a/report.rmd b/report.rmd
index 854b98f..00d1c7e 100644
--- a/report.rmd
+++ b/report.rmd
@@ -34,26 +34,27 @@ if (!require(moments)) {
## Frequency Table ordered from wish.com
```{r}
-freq <- function(data) {
+freq <- function(data, rounded_digits = 2) {
+ # counts
+ total_count <- length(data)
na_count <- length(data[is.na(data)])
- valid_count <- length(data) - na_count
+ valid_count <- total_count - na_count
+
frequency <- table(data)
p <- prop.table(frequency)
- percent <- round(p * 100, digits = 2)
- frequency_sum <- cumsum(frequency)
- hkum <- cumsum(p)
- percent_sum <- round(hkum * 100, digits = 2)
- freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
- valid_percent <- round(valid_count / length(data) * 100, digits = 2)
- na_percent <- round(na_count / length(data) * 100, digits = 2)
+ valid_percent <- round(p * 100, digits = rounded_digits)
+ na_percent <- round(na_count / length(data) * 100, digits = rounded_digits)
+
+ percent <- round(frequency/total_count*100, digits = rounded_digits)
+ cumulative_percent <- round(cumsum(p) * 100, digits = rounded_digits)
+ freq_table <- cbind(frequency, percent, valid_percent, cumulative_percent)
+ valid_percent_sum <- sum(as.data.frame(freq_table)$percent)
+ Valid_Total <- c(valid_count, valid_percent_sum, 100, NaN)
- print(freq_table)
-
- count <- c(valid_count, na_count, valid_count + na_count)
- percent <- c(valid_percent, na_percent, valid_percent + na_percent)
- df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
- print(df)
+ NAs <- c(na_count, na_percent, NaN, NaN)
+ Total <- c(total_count, 100, NaN, NaN)
+ print(rbind(freq_table, Valid_Total, NAs, Total))
}
```
*Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted*