Linted
parent
afd7d1babf
commit
51cd711da8
106
report.html
106
report.html
|
@ -1494,11 +1494,11 @@ border-radius: 0px;
|
||||||
|
|
||||||
<div id="preparation" class="section level1" number="1">
|
<div id="preparation" class="section level1" number="1">
|
||||||
<h1><span class="header-section-number">1</span> Preparation</h1>
|
<h1><span class="header-section-number">1</span> Preparation</h1>
|
||||||
<pre class="r"><code>if (!require(tidyverse)){
|
<pre class="r"><code>if (!require(tidyverse)) {
|
||||||
install.packages("tidyverse")
|
install.packages("tidyverse")
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
}
|
}
|
||||||
if (!require(moments)){
|
if (!require(moments)) {
|
||||||
install.packages("moments")
|
install.packages("moments")
|
||||||
library(moments)
|
library(moments)
|
||||||
}</code></pre>
|
}</code></pre>
|
||||||
|
@ -1508,28 +1508,26 @@ if (!require(moments)){
|
||||||
<div id="frequency-table-ordered-from-wish.com" class="section level2" number="2.1">
|
<div id="frequency-table-ordered-from-wish.com" class="section level2" number="2.1">
|
||||||
<h2><span class="header-section-number">2.1</span> Frequency Table
|
<h2><span class="header-section-number">2.1</span> Frequency Table
|
||||||
ordered from wish.com</h2>
|
ordered from wish.com</h2>
|
||||||
<pre class="r"><code>freq <- function(data){
|
<pre class="r"><code>freq <- function(data) {
|
||||||
na_count = length(data[is.na(data)])
|
na_count <- length(data[is.na(data)])
|
||||||
valid_count = length(data)-na_count
|
valid_count <- length(data) - na_count
|
||||||
frequency <- table(data)
|
frequency <- table(data)
|
||||||
p <- prop.table(frequency)
|
p <- prop.table(frequency)
|
||||||
percent <- round(p*100, digits = 2)
|
percent <- round(p * 100, digits = 2)
|
||||||
frequency_sum <- cumsum(frequency)
|
frequency_sum <- cumsum(frequency)
|
||||||
hkum <- cumsum(p)
|
hkum <- cumsum(p)
|
||||||
percent_sum <- round(hkum*100, digits = 2)
|
percent_sum <- round(hkum * 100, digits = 2)
|
||||||
freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
|
freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
|
||||||
valid_percent <- round(valid_count / length(data)*100, digits = 2)
|
valid_percent <- round(valid_count / length(data) * 100, digits = 2)
|
||||||
na_percent <- round(na_count / length(data)*100, digits = 2)
|
na_percent <- round(na_count / length(data) * 100, digits = 2)
|
||||||
|
|
||||||
|
|
||||||
print(freq_table)
|
print(freq_table)
|
||||||
|
|
||||||
count <- c(valid_count, na_count, valid_count+na_count)
|
count <- c(valid_count, na_count, valid_count + na_count)
|
||||||
percent <- c(valid_percent, na_percent, valid_percent+na_percent)
|
percent <- c(valid_percent, na_percent, valid_percent + na_percent)
|
||||||
totall <- c(valid_count+na_count, valid_percent+na_percent)
|
|
||||||
df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
|
df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
|
||||||
print(df)
|
print(df)
|
||||||
|
|
||||||
}</code></pre>
|
}</code></pre>
|
||||||
<p><em>Source: <a href="https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret" class="uri">https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret</a>
|
<p><em>Source: <a href="https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret" class="uri">https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret</a>
|
||||||
and adapted</em></p>
|
and adapted</em></p>
|
||||||
|
@ -1537,9 +1535,9 @@ and adapted</em></p>
|
||||||
<div id="modus" class="section level2" number="2.2">
|
<div id="modus" class="section level2" number="2.2">
|
||||||
<h2><span class="header-section-number">2.2</span> Modus</h2>
|
<h2><span class="header-section-number">2.2</span> Modus</h2>
|
||||||
<pre class="r"><code>getmode <- function(v) {
|
<pre class="r"><code>getmode <- function(v) {
|
||||||
uniqv <- unique(v)
|
uniqv <- unique(v)
|
||||||
x <- tabulate(match(v, uniqv))
|
x <- tabulate(match(v, uniqv))
|
||||||
uniqv[which(x==max(x))]
|
uniqv[which(x == max(x))]
|
||||||
}</code></pre>
|
}</code></pre>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1816,7 +1814,7 @@ inspection of data</h2>
|
||||||
<div id="converting-strings-to-numbers-and-keine-antwort-zu-nan" class="section level2" number="4.1">
|
<div id="converting-strings-to-numbers-and-keine-antwort-zu-nan" class="section level2" number="4.1">
|
||||||
<h2><span class="header-section-number">4.1</span> Converting Strings to
|
<h2><span class="header-section-number">4.1</span> Converting Strings to
|
||||||
numbers and <em>Keine Antwort</em> zu <em>NaN</em></h2>
|
numbers and <em>Keine Antwort</em> zu <em>NaN</em></h2>
|
||||||
<pre class="r"><code>litdata <- litdata %>%
|
<pre class="r"><code>litdata <- litdata %>%
|
||||||
mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
|
mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
|
||||||
mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
|
mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
|
||||||
mutate_all(~ replace(., . == "Keine Antwort-", NaN))</code></pre>
|
mutate_all(~ replace(., . == "Keine Antwort-", NaN))</code></pre>
|
||||||
|
@ -1826,17 +1824,17 @@ numbers and <em>Keine Antwort</em> zu <em>NaN</em></h2>
|
||||||
<p>The following code will <strong>NOT</strong> be run. The Idea is to
|
<p>The following code will <strong>NOT</strong> be run. The Idea is to
|
||||||
show a way to automatically edit all columns. It works but some columns
|
show a way to automatically edit all columns. It works but some columns
|
||||||
are NOT numeric.</p>
|
are NOT numeric.</p>
|
||||||
<pre class="r"><code> # All colnames that exist
|
<pre class="r"><code># All colnames that exist
|
||||||
litdataColnames <- colnames(litdata)
|
litdataColnames <- colnames(litdata)
|
||||||
# the ones we don't want to change
|
# the ones we don't want to change
|
||||||
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
|
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
|
||||||
# the colnames that should be changed
|
# the colnames that should be changed
|
||||||
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
|
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
|
||||||
print(litdataColsToMakeNumeric)
|
print(litdataColsToMakeNumeric)
|
||||||
litdataColsToMakeNumeric <- c("R1")
|
litdataColsToMakeNumeric <- c("R1")
|
||||||
for (col in litdataColsToMakeNumeric) {
|
for (col in litdataColsToMakeNumeric) {
|
||||||
litdata[[col]] <- as.numeric(litdata[[col]])
|
litdata[[col]] <- as.numeric(litdata[[col]])
|
||||||
}</code></pre>
|
}</code></pre>
|
||||||
<p>First we rename all the columns</p>
|
<p>First we rename all the columns</p>
|
||||||
<pre class="r"><code>litdata <- litdata %>% rename(
|
<pre class="r"><code>litdata <- litdata %>% rename(
|
||||||
"A1" = "W001",
|
"A1" = "W001",
|
||||||
|
@ -1848,7 +1846,6 @@ are NOT numeric.</p>
|
||||||
"A7" = "W007",
|
"A7" = "W007",
|
||||||
"A8" = "W008",
|
"A8" = "W008",
|
||||||
"A9" = "W009",
|
"A9" = "W009",
|
||||||
|
|
||||||
"B1" = "K001",
|
"B1" = "K001",
|
||||||
"B2" = "K002",
|
"B2" = "K002",
|
||||||
"B3" = "K003",
|
"B3" = "K003",
|
||||||
|
@ -1858,37 +1855,30 @@ are NOT numeric.</p>
|
||||||
"B7" = "K007",
|
"B7" = "K007",
|
||||||
"B8" = "K008",
|
"B8" = "K008",
|
||||||
"B9" = "K009",
|
"B9" = "K009",
|
||||||
|
|
||||||
"C1_1" = "TK001_01",
|
"C1_1" = "TK001_01",
|
||||||
"C1_2" = "TK001_02",
|
"C1_2" = "TK001_02",
|
||||||
"C1_3" = "TK001_03",
|
"C1_3" = "TK001_03",
|
||||||
"C1_4" = "TK001_04",
|
"C1_4" = "TK001_04",
|
||||||
|
|
||||||
"C2_1" = "TK002_01",
|
"C2_1" = "TK002_01",
|
||||||
"C2_2" = "TK002_02",
|
"C2_2" = "TK002_02",
|
||||||
"C2_3" = "TK002_03",
|
"C2_3" = "TK002_03",
|
||||||
"C2_4" = "TK002_04",
|
"C2_4" = "TK002_04",
|
||||||
|
|
||||||
"C3_1" = "TK003_01",
|
"C3_1" = "TK003_01",
|
||||||
"C3_2" = "TK003_02",
|
"C3_2" = "TK003_02",
|
||||||
"C3_3" = "TK003_03",
|
"C3_3" = "TK003_03",
|
||||||
"C3_4" = "TK003_04",
|
"C3_4" = "TK003_04",
|
||||||
|
|
||||||
"C4_1" = "TK004_01",
|
"C4_1" = "TK004_01",
|
||||||
"C4_2" = "TK004_02",
|
"C4_2" = "TK004_02",
|
||||||
"C4_3" = "TK004_03",
|
"C4_3" = "TK004_03",
|
||||||
"C4_4" = "TK004_04",
|
"C4_4" = "TK004_04",
|
||||||
|
|
||||||
"C5_1" = "TK005_01",
|
"C5_1" = "TK005_01",
|
||||||
"C5_2" = "TK005_02",
|
"C5_2" = "TK005_02",
|
||||||
"C5_3" = "TK005_03",
|
"C5_3" = "TK005_03",
|
||||||
"C5_4" = "TK005_04",
|
"C5_4" = "TK005_04",
|
||||||
|
|
||||||
"C6_1" = "TK006_01",
|
"C6_1" = "TK006_01",
|
||||||
"C6_2" = "TK006_02",
|
"C6_2" = "TK006_02",
|
||||||
"C6_3" = "TK006_03",
|
"C6_3" = "TK006_03",
|
||||||
"C6_4" = "TK006_04",
|
"C6_4" = "TK006_04",
|
||||||
|
|
||||||
"D1_1" = "H001_001",
|
"D1_1" = "H001_001",
|
||||||
"D1_2" = "H001_002",
|
"D1_2" = "H001_002",
|
||||||
"D1_3" = "H001_003",
|
"D1_3" = "H001_003",
|
||||||
|
@ -1896,23 +1886,15 @@ are NOT numeric.</p>
|
||||||
"D1_5" = "H001_005",
|
"D1_5" = "H001_005",
|
||||||
"D1_6" = "H001_006",
|
"D1_6" = "H001_006",
|
||||||
"D1_7" = "H001_007",
|
"D1_7" = "H001_007",
|
||||||
|
|
||||||
"D2" = "H002",
|
"D2" = "H002",
|
||||||
|
|
||||||
"D3" = "H003",
|
"D3" = "H003",
|
||||||
|
|
||||||
"D4" = "H004",
|
"D4" = "H004",
|
||||||
"D4_comment" = "H004_other",
|
"D4_comment" = "H004_other",
|
||||||
|
|
||||||
"D5" = "H005",
|
"D5" = "H005",
|
||||||
"D5_comment" = "H005_other",
|
"D5_comment" = "H005_other",
|
||||||
|
|
||||||
"D6" = "H006",
|
"D6" = "H006",
|
||||||
|
|
||||||
"D7" = "H007",
|
"D7" = "H007",
|
||||||
|
|
||||||
"D8" = "H008",
|
"D8" = "H008",
|
||||||
|
|
||||||
"E1" = "R1"
|
"E1" = "R1"
|
||||||
)</code></pre>
|
)</code></pre>
|
||||||
<p>Then we change the datatype and fix the values</p>
|
<p>Then we change the datatype and fix the values</p>
|
||||||
|
@ -2278,16 +2260,18 @@ Data</h2>
|
||||||
tmp <- rename(tmp, value = all_of(column))
|
tmp <- rename(tmp, value = all_of(column))
|
||||||
tmp <- tmp %>%
|
tmp <- tmp %>%
|
||||||
count(value) %>%
|
count(value) %>%
|
||||||
mutate(percentage = prop.table(n)*100)
|
mutate(percentage = prop.table(n) * 100)
|
||||||
print(tmp, n = 100)
|
print(tmp, n = 100)
|
||||||
ggplot(tmp,
|
ggplot(
|
||||||
aes(x = value, y=n)) +
|
tmp,
|
||||||
geom_bar(stat = "identity") +
|
aes(x = value, y = n)
|
||||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
) +
|
||||||
|
geom_bar(stat = "identity") +
|
||||||
|
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||||
}</code></pre>
|
}</code></pre>
|
||||||
<div id="a3-w003" class="section level3 unnumbered">
|
<div id="a3-w003" class="section level3 unnumbered">
|
||||||
<h3 class="unnumbered">A3 (W003)</h3>
|
<h3 class="unnumbered">A3 (W003)</h3>
|
||||||
<pre class="r"><code> displayFunction1(litdata, "A3")</code></pre>
|
<pre class="r"><code>displayFunction1(litdata, "A3")</code></pre>
|
||||||
<pre><code>## # A tibble: 6 × 3
|
<pre><code>## # A tibble: 6 × 3
|
||||||
## value n percentage
|
## value n percentage
|
||||||
## <dbl> <int> <dbl>
|
## <dbl> <int> <dbl>
|
||||||
|
@ -2302,7 +2286,7 @@ Data</h2>
|
||||||
</div>
|
</div>
|
||||||
<div id="b3-k003" class="section level3 unnumbered">
|
<div id="b3-k003" class="section level3 unnumbered">
|
||||||
<h3 class="unnumbered">B3 (K003)</h3>
|
<h3 class="unnumbered">B3 (K003)</h3>
|
||||||
<pre class="r"><code> displayFunction1(litdata, "B3")</code></pre>
|
<pre class="r"><code>displayFunction1(litdata, "B3")</code></pre>
|
||||||
<pre><code>## # A tibble: 7 × 3
|
<pre><code>## # A tibble: 7 × 3
|
||||||
## value n percentage
|
## value n percentage
|
||||||
## <dbl> <int> <dbl>
|
## <dbl> <int> <dbl>
|
||||||
|
@ -2318,7 +2302,7 @@ Data</h2>
|
||||||
</div>
|
</div>
|
||||||
<div id="d1_1-h001_001" class="section level3 unnumbered">
|
<div id="d1_1-h001_001" class="section level3 unnumbered">
|
||||||
<h3 class="unnumbered">D1_1 (H001_001)</h3>
|
<h3 class="unnumbered">D1_1 (H001_001)</h3>
|
||||||
<pre class="r"><code> displayFunction1(litdata, "D1_1")</code></pre>
|
<pre class="r"><code>displayFunction1(litdata, "D1_1")</code></pre>
|
||||||
<pre><code>## # A tibble: 3 × 3
|
<pre><code>## # A tibble: 3 × 3
|
||||||
## value n percentage
|
## value n percentage
|
||||||
## <lgl> <int> <dbl>
|
## <lgl> <int> <dbl>
|
||||||
|
@ -2329,7 +2313,7 @@ Data</h2>
|
||||||
</div>
|
</div>
|
||||||
<div id="d5-h005" class="section level3 unnumbered">
|
<div id="d5-h005" class="section level3 unnumbered">
|
||||||
<h3 class="unnumbered">D5 (H005)</h3>
|
<h3 class="unnumbered">D5 (H005)</h3>
|
||||||
<pre class="r"><code> displayFunction1(litdata, "D5")</code></pre>
|
<pre class="r"><code>displayFunction1(litdata, "D5")</code></pre>
|
||||||
<pre><code>## # A tibble: 18 × 3
|
<pre><code>## # A tibble: 18 × 3
|
||||||
## value n percentage
|
## value n percentage
|
||||||
## <fct> <int> <dbl>
|
## <fct> <int> <dbl>
|
||||||
|
@ -2355,7 +2339,7 @@ Data</h2>
|
||||||
</div>
|
</div>
|
||||||
<div id="d7-h007" class="section level3 unnumbered">
|
<div id="d7-h007" class="section level3 unnumbered">
|
||||||
<h3 class="unnumbered">D7 (H007)</h3>
|
<h3 class="unnumbered">D7 (H007)</h3>
|
||||||
<pre class="r"><code> displayFunction1(litdata, "D7")</code></pre>
|
<pre class="r"><code>displayFunction1(litdata, "D7")</code></pre>
|
||||||
<pre><code>## # A tibble: 24 × 3
|
<pre><code>## # A tibble: 24 × 3
|
||||||
## value n percentage
|
## value n percentage
|
||||||
## <dbl> <int> <dbl>
|
## <dbl> <int> <dbl>
|
||||||
|
@ -2390,7 +2374,7 @@ gibt.</p>
|
||||||
</div>
|
</div>
|
||||||
<div id="d8-h008" class="section level3 unnumbered">
|
<div id="d8-h008" class="section level3 unnumbered">
|
||||||
<h3 class="unnumbered">D8 (H008)</h3>
|
<h3 class="unnumbered">D8 (H008)</h3>
|
||||||
<pre class="r"><code> displayFunction1(litdata, "D8")</code></pre>
|
<pre class="r"><code>displayFunction1(litdata, "D8")</code></pre>
|
||||||
<pre><code>## # A tibble: 3 × 3
|
<pre><code>## # A tibble: 3 × 3
|
||||||
## value n percentage
|
## value n percentage
|
||||||
## <fct> <int> <dbl>
|
## <fct> <int> <dbl>
|
||||||
|
@ -2407,7 +2391,7 @@ gibt.</p>
|
||||||
<pre class="r"><code>birthyears <- litdata$D7
|
<pre class="r"><code>birthyears <- litdata$D7
|
||||||
# remove NAs
|
# remove NAs
|
||||||
birthyears <- birthyears[!is.na(birthyears)]
|
birthyears <- birthyears[!is.na(birthyears)]
|
||||||
age <- 2021-birthyears</code></pre>
|
age <- 2021 - birthyears</code></pre>
|
||||||
<div id="frequency" class="section level2" number="6.1">
|
<div id="frequency" class="section level2" number="6.1">
|
||||||
<h2><span class="header-section-number">6.1</span> Frequency</h2>
|
<h2><span class="header-section-number">6.1</span> Frequency</h2>
|
||||||
<pre class="r"><code>freq(age)</code></pre>
|
<pre class="r"><code>freq(age)</code></pre>
|
||||||
|
@ -2440,7 +2424,7 @@ age <- 2021-birthyears</code></pre>
|
||||||
## NA 0 0
|
## NA 0 0
|
||||||
## Total 106 100</code></pre>
|
## Total 106 100</code></pre>
|
||||||
<pre class="r"><code># with NA
|
<pre class="r"><code># with NA
|
||||||
freq(2021-litdata$D7)</code></pre>
|
freq(2021 - litdata$D7)</code></pre>
|
||||||
<pre><code>## frequency percent frequency_sum percent_sum
|
<pre><code>## frequency percent frequency_sum percent_sum
|
||||||
## 19 1 0.94 1 0.94
|
## 19 1 0.94 1 0.94
|
||||||
## 20 2 1.89 3 2.83
|
## 20 2 1.89 3 2.83
|
||||||
|
@ -2492,7 +2476,7 @@ Mittelwert</h3>
|
||||||
</div>
|
</div>
|
||||||
<div id="spannweite" class="section level3" number="6.2.4">
|
<div id="spannweite" class="section level3" number="6.2.4">
|
||||||
<h3><span class="header-section-number">6.2.4</span> Spannweite</h3>
|
<h3><span class="header-section-number">6.2.4</span> Spannweite</h3>
|
||||||
<pre class="r"><code>max(age)-min(age)</code></pre>
|
<pre class="r"><code>max(age) - min(age)</code></pre>
|
||||||
<pre><code>## [1] 31</code></pre>
|
<pre><code>## [1] 31</code></pre>
|
||||||
</div>
|
</div>
|
||||||
<div id="quartilsabstand" class="section level3" number="6.2.5">
|
<div id="quartilsabstand" class="section level3" number="6.2.5">
|
||||||
|
@ -2643,7 +2627,7 @@ sozialen Herkunft 5.6, er ist also genauso hoch wie in der
|
||||||
Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung
|
Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung
|
||||||
in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem
|
in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem
|
||||||
Wert darunter?</p>
|
Wert darunter?</p>
|
||||||
<pre class="r"><code>pnorm(5.6, mean = 5.1, sd=2.0)</code></pre>
|
<pre class="r"><code>pnorm(5.6, mean = 5.1, sd = 2.0)</code></pre>
|
||||||
<pre><code>## [1] 0.5987063</code></pre>
|
<pre><code>## [1] 0.5987063</code></pre>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
107
report.rmd
107
report.rmd
|
@ -20,11 +20,11 @@ knitr::opts_chunk$set(echo = TRUE)
|
||||||
# Preparation
|
# Preparation
|
||||||
|
|
||||||
```{r, message=FALSE}
|
```{r, message=FALSE}
|
||||||
if (!require(tidyverse)){
|
if (!require(tidyverse)) {
|
||||||
install.packages("tidyverse")
|
install.packages("tidyverse")
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
}
|
}
|
||||||
if (!require(moments)){
|
if (!require(moments)) {
|
||||||
install.packages("moments")
|
install.packages("moments")
|
||||||
library(moments)
|
library(moments)
|
||||||
}
|
}
|
||||||
|
@ -34,28 +34,26 @@ if (!require(moments)){
|
||||||
|
|
||||||
## Frequency Table ordered from wish.com
|
## Frequency Table ordered from wish.com
|
||||||
```{r}
|
```{r}
|
||||||
freq <- function(data){
|
freq <- function(data) {
|
||||||
na_count = length(data[is.na(data)])
|
na_count <- length(data[is.na(data)])
|
||||||
valid_count = length(data)-na_count
|
valid_count <- length(data) - na_count
|
||||||
frequency <- table(data)
|
frequency <- table(data)
|
||||||
p <- prop.table(frequency)
|
p <- prop.table(frequency)
|
||||||
percent <- round(p*100, digits = 2)
|
percent <- round(p * 100, digits = 2)
|
||||||
frequency_sum <- cumsum(frequency)
|
frequency_sum <- cumsum(frequency)
|
||||||
hkum <- cumsum(p)
|
hkum <- cumsum(p)
|
||||||
percent_sum <- round(hkum*100, digits = 2)
|
percent_sum <- round(hkum * 100, digits = 2)
|
||||||
freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
|
freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
|
||||||
valid_percent <- round(valid_count / length(data)*100, digits = 2)
|
valid_percent <- round(valid_count / length(data) * 100, digits = 2)
|
||||||
na_percent <- round(na_count / length(data)*100, digits = 2)
|
na_percent <- round(na_count / length(data) * 100, digits = 2)
|
||||||
|
|
||||||
|
|
||||||
print(freq_table)
|
print(freq_table)
|
||||||
|
|
||||||
count <- c(valid_count, na_count, valid_count+na_count)
|
count <- c(valid_count, na_count, valid_count + na_count)
|
||||||
percent <- c(valid_percent, na_percent, valid_percent+na_percent)
|
percent <- c(valid_percent, na_percent, valid_percent + na_percent)
|
||||||
totall <- c(valid_count+na_count, valid_percent+na_percent)
|
|
||||||
df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
|
df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
|
||||||
print(df)
|
print(df)
|
||||||
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
*Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted*
|
*Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted*
|
||||||
|
@ -63,9 +61,9 @@ freq <- function(data){
|
||||||
## Modus
|
## Modus
|
||||||
```{r}
|
```{r}
|
||||||
getmode <- function(v) {
|
getmode <- function(v) {
|
||||||
uniqv <- unique(v)
|
uniqv <- unique(v)
|
||||||
x <- tabulate(match(v, uniqv))
|
x <- tabulate(match(v, uniqv))
|
||||||
uniqv[which(x==max(x))]
|
uniqv[which(x == max(x))]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -97,7 +95,7 @@ head(litdata)
|
||||||
# Data cleaning
|
# Data cleaning
|
||||||
## Converting Strings to numbers and *Keine Antwort* zu *NaN*
|
## Converting Strings to numbers and *Keine Antwort* zu *NaN*
|
||||||
``` {r}
|
``` {r}
|
||||||
litdata <- litdata %>%
|
litdata <- litdata %>%
|
||||||
mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
|
mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
|
||||||
mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
|
mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
|
||||||
mutate_all(~ replace(., . == "Keine Antwort-", NaN))
|
mutate_all(~ replace(., . == "Keine Antwort-", NaN))
|
||||||
|
@ -106,17 +104,17 @@ litdata <- litdata %>%
|
||||||
## Make it numeric
|
## Make it numeric
|
||||||
The following code will **NOT** be run. The Idea is to show a way to automatically edit all columns. It works but some columns are NOT numeric.
|
The following code will **NOT** be run. The Idea is to show a way to automatically edit all columns. It works but some columns are NOT numeric.
|
||||||
```{r, eval=FALSE}
|
```{r, eval=FALSE}
|
||||||
# All colnames that exist
|
# All colnames that exist
|
||||||
litdataColnames <- colnames(litdata)
|
litdataColnames <- colnames(litdata)
|
||||||
# the ones we don't want to change
|
# the ones we don't want to change
|
||||||
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
|
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
|
||||||
# the colnames that should be changed
|
# the colnames that should be changed
|
||||||
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
|
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
|
||||||
print(litdataColsToMakeNumeric)
|
print(litdataColsToMakeNumeric)
|
||||||
litdataColsToMakeNumeric <- c("R1")
|
litdataColsToMakeNumeric <- c("R1")
|
||||||
for (col in litdataColsToMakeNumeric) {
|
for (col in litdataColsToMakeNumeric) {
|
||||||
litdata[[col]] <- as.numeric(litdata[[col]])
|
litdata[[col]] <- as.numeric(litdata[[col]])
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
First we rename all the columns
|
First we rename all the columns
|
||||||
|
@ -131,7 +129,6 @@ litdata <- litdata %>% rename(
|
||||||
"A7" = "W007",
|
"A7" = "W007",
|
||||||
"A8" = "W008",
|
"A8" = "W008",
|
||||||
"A9" = "W009",
|
"A9" = "W009",
|
||||||
|
|
||||||
"B1" = "K001",
|
"B1" = "K001",
|
||||||
"B2" = "K002",
|
"B2" = "K002",
|
||||||
"B3" = "K003",
|
"B3" = "K003",
|
||||||
|
@ -141,37 +138,30 @@ litdata <- litdata %>% rename(
|
||||||
"B7" = "K007",
|
"B7" = "K007",
|
||||||
"B8" = "K008",
|
"B8" = "K008",
|
||||||
"B9" = "K009",
|
"B9" = "K009",
|
||||||
|
|
||||||
"C1_1" = "TK001_01",
|
"C1_1" = "TK001_01",
|
||||||
"C1_2" = "TK001_02",
|
"C1_2" = "TK001_02",
|
||||||
"C1_3" = "TK001_03",
|
"C1_3" = "TK001_03",
|
||||||
"C1_4" = "TK001_04",
|
"C1_4" = "TK001_04",
|
||||||
|
|
||||||
"C2_1" = "TK002_01",
|
"C2_1" = "TK002_01",
|
||||||
"C2_2" = "TK002_02",
|
"C2_2" = "TK002_02",
|
||||||
"C2_3" = "TK002_03",
|
"C2_3" = "TK002_03",
|
||||||
"C2_4" = "TK002_04",
|
"C2_4" = "TK002_04",
|
||||||
|
|
||||||
"C3_1" = "TK003_01",
|
"C3_1" = "TK003_01",
|
||||||
"C3_2" = "TK003_02",
|
"C3_2" = "TK003_02",
|
||||||
"C3_3" = "TK003_03",
|
"C3_3" = "TK003_03",
|
||||||
"C3_4" = "TK003_04",
|
"C3_4" = "TK003_04",
|
||||||
|
|
||||||
"C4_1" = "TK004_01",
|
"C4_1" = "TK004_01",
|
||||||
"C4_2" = "TK004_02",
|
"C4_2" = "TK004_02",
|
||||||
"C4_3" = "TK004_03",
|
"C4_3" = "TK004_03",
|
||||||
"C4_4" = "TK004_04",
|
"C4_4" = "TK004_04",
|
||||||
|
|
||||||
"C5_1" = "TK005_01",
|
"C5_1" = "TK005_01",
|
||||||
"C5_2" = "TK005_02",
|
"C5_2" = "TK005_02",
|
||||||
"C5_3" = "TK005_03",
|
"C5_3" = "TK005_03",
|
||||||
"C5_4" = "TK005_04",
|
"C5_4" = "TK005_04",
|
||||||
|
|
||||||
"C6_1" = "TK006_01",
|
"C6_1" = "TK006_01",
|
||||||
"C6_2" = "TK006_02",
|
"C6_2" = "TK006_02",
|
||||||
"C6_3" = "TK006_03",
|
"C6_3" = "TK006_03",
|
||||||
"C6_4" = "TK006_04",
|
"C6_4" = "TK006_04",
|
||||||
|
|
||||||
"D1_1" = "H001_001",
|
"D1_1" = "H001_001",
|
||||||
"D1_2" = "H001_002",
|
"D1_2" = "H001_002",
|
||||||
"D1_3" = "H001_003",
|
"D1_3" = "H001_003",
|
||||||
|
@ -179,23 +169,15 @@ litdata <- litdata %>% rename(
|
||||||
"D1_5" = "H001_005",
|
"D1_5" = "H001_005",
|
||||||
"D1_6" = "H001_006",
|
"D1_6" = "H001_006",
|
||||||
"D1_7" = "H001_007",
|
"D1_7" = "H001_007",
|
||||||
|
|
||||||
"D2" = "H002",
|
"D2" = "H002",
|
||||||
|
|
||||||
"D3" = "H003",
|
"D3" = "H003",
|
||||||
|
|
||||||
"D4" = "H004",
|
"D4" = "H004",
|
||||||
"D4_comment" = "H004_other",
|
"D4_comment" = "H004_other",
|
||||||
|
|
||||||
"D5" = "H005",
|
"D5" = "H005",
|
||||||
"D5_comment" = "H005_other",
|
"D5_comment" = "H005_other",
|
||||||
|
|
||||||
"D6" = "H006",
|
"D6" = "H006",
|
||||||
|
|
||||||
"D7" = "H007",
|
"D7" = "H007",
|
||||||
|
|
||||||
"D8" = "H008",
|
"D8" = "H008",
|
||||||
|
|
||||||
"E1" = "R1"
|
"E1" = "R1"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
@ -289,7 +271,6 @@ litdata$D7 <- as.numeric(litdata$D7)
|
||||||
litdata$D8 <- as.factor(litdata$D8)
|
litdata$D8 <- as.factor(litdata$D8)
|
||||||
|
|
||||||
# skipping E1 because it's a free text
|
# skipping E1 because it's a free text
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -321,44 +302,46 @@ displayFunction1 <- function(table, column) {
|
||||||
tmp <- rename(tmp, value = all_of(column))
|
tmp <- rename(tmp, value = all_of(column))
|
||||||
tmp <- tmp %>%
|
tmp <- tmp %>%
|
||||||
count(value) %>%
|
count(value) %>%
|
||||||
mutate(percentage = prop.table(n)*100)
|
mutate(percentage = prop.table(n) * 100)
|
||||||
print(tmp, n = 100)
|
print(tmp, n = 100)
|
||||||
ggplot(tmp,
|
ggplot(
|
||||||
aes(x = value, y=n)) +
|
tmp,
|
||||||
geom_bar(stat = "identity") +
|
aes(x = value, y = n)
|
||||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
) +
|
||||||
|
geom_bar(stat = "identity") +
|
||||||
|
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
### A3 (W003) {-}
|
### A3 (W003) {-}
|
||||||
```{r}
|
```{r}
|
||||||
displayFunction1(litdata, "A3")
|
displayFunction1(litdata, "A3")
|
||||||
```
|
```
|
||||||
|
|
||||||
### B3 (K003) {-}
|
### B3 (K003) {-}
|
||||||
```{r}
|
```{r}
|
||||||
displayFunction1(litdata, "B3")
|
displayFunction1(litdata, "B3")
|
||||||
```
|
```
|
||||||
|
|
||||||
### D1_1 (H001_001) {-}
|
### D1_1 (H001_001) {-}
|
||||||
```{r}
|
```{r}
|
||||||
displayFunction1(litdata, "D1_1")
|
displayFunction1(litdata, "D1_1")
|
||||||
```
|
```
|
||||||
|
|
||||||
### D5 (H005) {-}
|
### D5 (H005) {-}
|
||||||
```{r}
|
```{r}
|
||||||
displayFunction1(litdata, "D5")
|
displayFunction1(litdata, "D5")
|
||||||
```
|
```
|
||||||
|
|
||||||
### D7 (H007) {-}
|
### D7 (H007) {-}
|
||||||
```{r}
|
```{r}
|
||||||
displayFunction1(litdata, "D7")
|
displayFunction1(litdata, "D7")
|
||||||
```
|
```
|
||||||
|
|
||||||
Die Warnung resultiert daraus, dass es sehr viele *NA* gibt.
|
Die Warnung resultiert daraus, dass es sehr viele *NA* gibt.
|
||||||
|
|
||||||
### D8 (H008) {-}
|
### D8 (H008) {-}
|
||||||
```{r}
|
```{r}
|
||||||
displayFunction1(litdata, "D8")
|
displayFunction1(litdata, "D8")
|
||||||
```
|
```
|
||||||
|
|
||||||
# Selbststudium 2.1
|
# Selbststudium 2.1
|
||||||
|
@ -368,14 +351,14 @@ We have the year 2021
|
||||||
birthyears <- litdata$D7
|
birthyears <- litdata$D7
|
||||||
# remove NAs
|
# remove NAs
|
||||||
birthyears <- birthyears[!is.na(birthyears)]
|
birthyears <- birthyears[!is.na(birthyears)]
|
||||||
age <- 2021-birthyears
|
age <- 2021 - birthyears
|
||||||
```
|
```
|
||||||
|
|
||||||
## Frequency
|
## Frequency
|
||||||
```{r}
|
```{r}
|
||||||
freq(age)
|
freq(age)
|
||||||
# with NA
|
# with NA
|
||||||
freq(2021-litdata$D7)
|
freq(2021 - litdata$D7)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Selbststudium 2
|
## Selbststudium 2
|
||||||
|
@ -399,7 +382,7 @@ mean(age)
|
||||||
|
|
||||||
### Spannweite
|
### Spannweite
|
||||||
```{r}
|
```{r}
|
||||||
max(age)-min(age)
|
max(age) - min(age)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Quartilsabstand
|
### Quartilsabstand
|
||||||
|
@ -511,6 +494,6 @@ pnorm(5.1, mean = 5.6, sd = 1.8)
|
||||||
In einem Bündner Ort beträgt der Mittelwert auf der Skala zur sozialen Herkunft 5.6, er ist also genauso hoch wie in der Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem Wert darunter?
|
In einem Bündner Ort beträgt der Mittelwert auf der Skala zur sozialen Herkunft 5.6, er ist also genauso hoch wie in der Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem Wert darunter?
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
pnorm(5.6, mean = 5.1, sd=2.0)
|
pnorm(5.6, mean = 5.1, sd = 2.0)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue