master
marcgauch 2022-11-18 21:10:07 +01:00
parent afd7d1babf
commit 51cd711da8
2 changed files with 90 additions and 123 deletions

View File

@ -1494,11 +1494,11 @@ border-radius: 0px;
<div id="preparation" class="section level1" number="1"> <div id="preparation" class="section level1" number="1">
<h1><span class="header-section-number">1</span> Preparation</h1> <h1><span class="header-section-number">1</span> Preparation</h1>
<pre class="r"><code>if (!require(tidyverse)){ <pre class="r"><code>if (!require(tidyverse)) {
install.packages(&quot;tidyverse&quot;) install.packages(&quot;tidyverse&quot;)
library(tidyverse) library(tidyverse)
} }
if (!require(moments)){ if (!require(moments)) {
install.packages(&quot;moments&quot;) install.packages(&quot;moments&quot;)
library(moments) library(moments)
}</code></pre> }</code></pre>
@ -1508,28 +1508,26 @@ if (!require(moments)){
<div id="frequency-table-ordered-from-wish.com" class="section level2" number="2.1"> <div id="frequency-table-ordered-from-wish.com" class="section level2" number="2.1">
<h2><span class="header-section-number">2.1</span> Frequency Table <h2><span class="header-section-number">2.1</span> Frequency Table
ordered from wish.com</h2> ordered from wish.com</h2>
<pre class="r"><code>freq &lt;- function(data){ <pre class="r"><code>freq &lt;- function(data) {
na_count = length(data[is.na(data)]) na_count &lt;- length(data[is.na(data)])
valid_count = length(data)-na_count valid_count &lt;- length(data) - na_count
frequency &lt;- table(data) frequency &lt;- table(data)
p &lt;- prop.table(frequency) p &lt;- prop.table(frequency)
percent &lt;- round(p*100, digits = 2) percent &lt;- round(p * 100, digits = 2)
frequency_sum &lt;- cumsum(frequency) frequency_sum &lt;- cumsum(frequency)
hkum &lt;- cumsum(p) hkum &lt;- cumsum(p)
percent_sum &lt;- round(hkum*100, digits = 2) percent_sum &lt;- round(hkum * 100, digits = 2)
freq_table &lt;- cbind(frequency, percent, frequency_sum, percent_sum) freq_table &lt;- cbind(frequency, percent, frequency_sum, percent_sum)
valid_percent &lt;- round(valid_count / length(data)*100, digits = 2) valid_percent &lt;- round(valid_count / length(data) * 100, digits = 2)
na_percent &lt;- round(na_count / length(data)*100, digits = 2) na_percent &lt;- round(na_count / length(data) * 100, digits = 2)
print(freq_table) print(freq_table)
count &lt;- c(valid_count, na_count, valid_count+na_count) count &lt;- c(valid_count, na_count, valid_count + na_count)
percent &lt;- c(valid_percent, na_percent, valid_percent+na_percent) percent &lt;- c(valid_percent, na_percent, valid_percent + na_percent)
totall &lt;- c(valid_count+na_count, valid_percent+na_percent)
df &lt;- data.frame(count, percent, row.names = c(&quot;valid&quot;, &quot;NA&quot;, &quot;Total&quot;)) df &lt;- data.frame(count, percent, row.names = c(&quot;valid&quot;, &quot;NA&quot;, &quot;Total&quot;))
print(df) print(df)
}</code></pre> }</code></pre>
<p><em>Source: <a href="https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret" class="uri">https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret</a> <p><em>Source: <a href="https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret" class="uri">https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret</a>
and adapted</em></p> and adapted</em></p>
@ -1537,9 +1535,9 @@ and adapted</em></p>
<div id="modus" class="section level2" number="2.2"> <div id="modus" class="section level2" number="2.2">
<h2><span class="header-section-number">2.2</span> Modus</h2> <h2><span class="header-section-number">2.2</span> Modus</h2>
<pre class="r"><code>getmode &lt;- function(v) { <pre class="r"><code>getmode &lt;- function(v) {
uniqv &lt;- unique(v) uniqv &lt;- unique(v)
x &lt;- tabulate(match(v, uniqv)) x &lt;- tabulate(match(v, uniqv))
uniqv[which(x==max(x))] uniqv[which(x == max(x))]
}</code></pre> }</code></pre>
</div> </div>
</div> </div>
@ -1816,7 +1814,7 @@ inspection of data</h2>
<div id="converting-strings-to-numbers-and-keine-antwort-zu-nan" class="section level2" number="4.1"> <div id="converting-strings-to-numbers-and-keine-antwort-zu-nan" class="section level2" number="4.1">
<h2><span class="header-section-number">4.1</span> Converting Strings to <h2><span class="header-section-number">4.1</span> Converting Strings to
numbers and <em>Keine Antwort</em> zu <em>NaN</em></h2> numbers and <em>Keine Antwort</em> zu <em>NaN</em></h2>
<pre class="r"><code>litdata &lt;- litdata %&gt;% <pre class="r"><code>litdata &lt;- litdata %&gt;%
mutate_all(~ replace(., . == &quot;Stimme voll zu5&quot;, 5)) %&gt;% mutate_all(~ replace(., . == &quot;Stimme voll zu5&quot;, 5)) %&gt;%
mutate_all(~ replace(., . == &quot;Stimme überhaupt nicht zu1&quot;, 1)) %&gt;% mutate_all(~ replace(., . == &quot;Stimme überhaupt nicht zu1&quot;, 1)) %&gt;%
mutate_all(~ replace(., . == &quot;Keine Antwort-&quot;, NaN))</code></pre> mutate_all(~ replace(., . == &quot;Keine Antwort-&quot;, NaN))</code></pre>
@ -1826,17 +1824,17 @@ numbers and <em>Keine Antwort</em> zu <em>NaN</em></h2>
<p>The following code will <strong>NOT</strong> be run. The Idea is to <p>The following code will <strong>NOT</strong> be run. The Idea is to
show a way to automatically edit all columns. It works but some columns show a way to automatically edit all columns. It works but some columns
are NOT numeric.</p> are NOT numeric.</p>
<pre class="r"><code> # All colnames that exist <pre class="r"><code># All colnames that exist
litdataColnames &lt;- colnames(litdata) litdataColnames &lt;- colnames(litdata)
# the ones we don&#39;t want to change # the ones we don&#39;t want to change
litdataNonNumericCols &lt;- c(&quot;submitdate&quot;, &quot;startlanguage&quot;, &quot;startdate&quot;, &quot;datestamp&quot;, &quot;lastpage&quot;, &quot;seed&quot;) litdataNonNumericCols &lt;- c(&quot;submitdate&quot;, &quot;startlanguage&quot;, &quot;startdate&quot;, &quot;datestamp&quot;, &quot;lastpage&quot;, &quot;seed&quot;)
# the colnames that should be changed # the colnames that should be changed
litdataColsToMakeNumeric &lt;- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)] litdataColsToMakeNumeric &lt;- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
print(litdataColsToMakeNumeric) print(litdataColsToMakeNumeric)
litdataColsToMakeNumeric &lt;- c(&quot;R1&quot;) litdataColsToMakeNumeric &lt;- c(&quot;R1&quot;)
for (col in litdataColsToMakeNumeric) { for (col in litdataColsToMakeNumeric) {
litdata[[col]] &lt;- as.numeric(litdata[[col]]) litdata[[col]] &lt;- as.numeric(litdata[[col]])
}</code></pre> }</code></pre>
<p>First we rename all the columns</p> <p>First we rename all the columns</p>
<pre class="r"><code>litdata &lt;- litdata %&gt;% rename( <pre class="r"><code>litdata &lt;- litdata %&gt;% rename(
&quot;A1&quot; = &quot;W001&quot;, &quot;A1&quot; = &quot;W001&quot;,
@ -1848,7 +1846,6 @@ are NOT numeric.</p>
&quot;A7&quot; = &quot;W007&quot;, &quot;A7&quot; = &quot;W007&quot;,
&quot;A8&quot; = &quot;W008&quot;, &quot;A8&quot; = &quot;W008&quot;,
&quot;A9&quot; = &quot;W009&quot;, &quot;A9&quot; = &quot;W009&quot;,
&quot;B1&quot; = &quot;K001&quot;, &quot;B1&quot; = &quot;K001&quot;,
&quot;B2&quot; = &quot;K002&quot;, &quot;B2&quot; = &quot;K002&quot;,
&quot;B3&quot; = &quot;K003&quot;, &quot;B3&quot; = &quot;K003&quot;,
@ -1858,37 +1855,30 @@ are NOT numeric.</p>
&quot;B7&quot; = &quot;K007&quot;, &quot;B7&quot; = &quot;K007&quot;,
&quot;B8&quot; = &quot;K008&quot;, &quot;B8&quot; = &quot;K008&quot;,
&quot;B9&quot; = &quot;K009&quot;, &quot;B9&quot; = &quot;K009&quot;,
&quot;C1_1&quot; = &quot;TK001_01&quot;, &quot;C1_1&quot; = &quot;TK001_01&quot;,
&quot;C1_2&quot; = &quot;TK001_02&quot;, &quot;C1_2&quot; = &quot;TK001_02&quot;,
&quot;C1_3&quot; = &quot;TK001_03&quot;, &quot;C1_3&quot; = &quot;TK001_03&quot;,
&quot;C1_4&quot; = &quot;TK001_04&quot;, &quot;C1_4&quot; = &quot;TK001_04&quot;,
&quot;C2_1&quot; = &quot;TK002_01&quot;, &quot;C2_1&quot; = &quot;TK002_01&quot;,
&quot;C2_2&quot; = &quot;TK002_02&quot;, &quot;C2_2&quot; = &quot;TK002_02&quot;,
&quot;C2_3&quot; = &quot;TK002_03&quot;, &quot;C2_3&quot; = &quot;TK002_03&quot;,
&quot;C2_4&quot; = &quot;TK002_04&quot;, &quot;C2_4&quot; = &quot;TK002_04&quot;,
&quot;C3_1&quot; = &quot;TK003_01&quot;, &quot;C3_1&quot; = &quot;TK003_01&quot;,
&quot;C3_2&quot; = &quot;TK003_02&quot;, &quot;C3_2&quot; = &quot;TK003_02&quot;,
&quot;C3_3&quot; = &quot;TK003_03&quot;, &quot;C3_3&quot; = &quot;TK003_03&quot;,
&quot;C3_4&quot; = &quot;TK003_04&quot;, &quot;C3_4&quot; = &quot;TK003_04&quot;,
&quot;C4_1&quot; = &quot;TK004_01&quot;, &quot;C4_1&quot; = &quot;TK004_01&quot;,
&quot;C4_2&quot; = &quot;TK004_02&quot;, &quot;C4_2&quot; = &quot;TK004_02&quot;,
&quot;C4_3&quot; = &quot;TK004_03&quot;, &quot;C4_3&quot; = &quot;TK004_03&quot;,
&quot;C4_4&quot; = &quot;TK004_04&quot;, &quot;C4_4&quot; = &quot;TK004_04&quot;,
&quot;C5_1&quot; = &quot;TK005_01&quot;, &quot;C5_1&quot; = &quot;TK005_01&quot;,
&quot;C5_2&quot; = &quot;TK005_02&quot;, &quot;C5_2&quot; = &quot;TK005_02&quot;,
&quot;C5_3&quot; = &quot;TK005_03&quot;, &quot;C5_3&quot; = &quot;TK005_03&quot;,
&quot;C5_4&quot; = &quot;TK005_04&quot;, &quot;C5_4&quot; = &quot;TK005_04&quot;,
&quot;C6_1&quot; = &quot;TK006_01&quot;, &quot;C6_1&quot; = &quot;TK006_01&quot;,
&quot;C6_2&quot; = &quot;TK006_02&quot;, &quot;C6_2&quot; = &quot;TK006_02&quot;,
&quot;C6_3&quot; = &quot;TK006_03&quot;, &quot;C6_3&quot; = &quot;TK006_03&quot;,
&quot;C6_4&quot; = &quot;TK006_04&quot;, &quot;C6_4&quot; = &quot;TK006_04&quot;,
&quot;D1_1&quot; = &quot;H001_001&quot;, &quot;D1_1&quot; = &quot;H001_001&quot;,
&quot;D1_2&quot; = &quot;H001_002&quot;, &quot;D1_2&quot; = &quot;H001_002&quot;,
&quot;D1_3&quot; = &quot;H001_003&quot;, &quot;D1_3&quot; = &quot;H001_003&quot;,
@ -1896,23 +1886,15 @@ are NOT numeric.</p>
&quot;D1_5&quot; = &quot;H001_005&quot;, &quot;D1_5&quot; = &quot;H001_005&quot;,
&quot;D1_6&quot; = &quot;H001_006&quot;, &quot;D1_6&quot; = &quot;H001_006&quot;,
&quot;D1_7&quot; = &quot;H001_007&quot;, &quot;D1_7&quot; = &quot;H001_007&quot;,
&quot;D2&quot; = &quot;H002&quot;, &quot;D2&quot; = &quot;H002&quot;,
&quot;D3&quot; = &quot;H003&quot;, &quot;D3&quot; = &quot;H003&quot;,
&quot;D4&quot; = &quot;H004&quot;, &quot;D4&quot; = &quot;H004&quot;,
&quot;D4_comment&quot; = &quot;H004_other&quot;, &quot;D4_comment&quot; = &quot;H004_other&quot;,
&quot;D5&quot; = &quot;H005&quot;, &quot;D5&quot; = &quot;H005&quot;,
&quot;D5_comment&quot; = &quot;H005_other&quot;, &quot;D5_comment&quot; = &quot;H005_other&quot;,
&quot;D6&quot; = &quot;H006&quot;, &quot;D6&quot; = &quot;H006&quot;,
&quot;D7&quot; = &quot;H007&quot;, &quot;D7&quot; = &quot;H007&quot;,
&quot;D8&quot; = &quot;H008&quot;, &quot;D8&quot; = &quot;H008&quot;,
&quot;E1&quot; = &quot;R1&quot; &quot;E1&quot; = &quot;R1&quot;
)</code></pre> )</code></pre>
<p>Then we change the datatype and fix the values</p> <p>Then we change the datatype and fix the values</p>
@ -2278,16 +2260,18 @@ Data</h2>
tmp &lt;- rename(tmp, value = all_of(column)) tmp &lt;- rename(tmp, value = all_of(column))
tmp &lt;- tmp %&gt;% tmp &lt;- tmp %&gt;%
count(value) %&gt;% count(value) %&gt;%
mutate(percentage = prop.table(n)*100) mutate(percentage = prop.table(n) * 100)
print(tmp, n = 100) print(tmp, n = 100)
ggplot(tmp, ggplot(
aes(x = value, y=n)) + tmp,
geom_bar(stat = &quot;identity&quot;) + aes(x = value, y = n)
theme(axis.text.x = element_text(angle = 45, hjust = 1)) ) +
geom_bar(stat = &quot;identity&quot;) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}</code></pre> }</code></pre>
<div id="a3-w003" class="section level3 unnumbered"> <div id="a3-w003" class="section level3 unnumbered">
<h3 class="unnumbered">A3 (W003)</h3> <h3 class="unnumbered">A3 (W003)</h3>
<pre class="r"><code> displayFunction1(litdata, &quot;A3&quot;)</code></pre> <pre class="r"><code>displayFunction1(litdata, &quot;A3&quot;)</code></pre>
<pre><code>## # A tibble: 6 × 3 <pre><code>## # A tibble: 6 × 3
## value n percentage ## value n percentage
## &lt;dbl&gt; &lt;int&gt; &lt;dbl&gt; ## &lt;dbl&gt; &lt;int&gt; &lt;dbl&gt;
@ -2302,7 +2286,7 @@ Data</h2>
</div> </div>
<div id="b3-k003" class="section level3 unnumbered"> <div id="b3-k003" class="section level3 unnumbered">
<h3 class="unnumbered">B3 (K003)</h3> <h3 class="unnumbered">B3 (K003)</h3>
<pre class="r"><code> displayFunction1(litdata, &quot;B3&quot;)</code></pre> <pre class="r"><code>displayFunction1(litdata, &quot;B3&quot;)</code></pre>
<pre><code>## # A tibble: 7 × 3 <pre><code>## # A tibble: 7 × 3
## value n percentage ## value n percentage
## &lt;dbl&gt; &lt;int&gt; &lt;dbl&gt; ## &lt;dbl&gt; &lt;int&gt; &lt;dbl&gt;
@ -2318,7 +2302,7 @@ Data</h2>
</div> </div>
<div id="d1_1-h001_001" class="section level3 unnumbered"> <div id="d1_1-h001_001" class="section level3 unnumbered">
<h3 class="unnumbered">D1_1 (H001_001)</h3> <h3 class="unnumbered">D1_1 (H001_001)</h3>
<pre class="r"><code> displayFunction1(litdata, &quot;D1_1&quot;)</code></pre> <pre class="r"><code>displayFunction1(litdata, &quot;D1_1&quot;)</code></pre>
<pre><code>## # A tibble: 3 × 3 <pre><code>## # A tibble: 3 × 3
## value n percentage ## value n percentage
## &lt;lgl&gt; &lt;int&gt; &lt;dbl&gt; ## &lt;lgl&gt; &lt;int&gt; &lt;dbl&gt;
@ -2329,7 +2313,7 @@ Data</h2>
</div> </div>
<div id="d5-h005" class="section level3 unnumbered"> <div id="d5-h005" class="section level3 unnumbered">
<h3 class="unnumbered">D5 (H005)</h3> <h3 class="unnumbered">D5 (H005)</h3>
<pre class="r"><code> displayFunction1(litdata, &quot;D5&quot;)</code></pre> <pre class="r"><code>displayFunction1(litdata, &quot;D5&quot;)</code></pre>
<pre><code>## # A tibble: 18 × 3 <pre><code>## # A tibble: 18 × 3
## value n percentage ## value n percentage
## &lt;fct&gt; &lt;int&gt; &lt;dbl&gt; ## &lt;fct&gt; &lt;int&gt; &lt;dbl&gt;
@ -2355,7 +2339,7 @@ Data</h2>
</div> </div>
<div id="d7-h007" class="section level3 unnumbered"> <div id="d7-h007" class="section level3 unnumbered">
<h3 class="unnumbered">D7 (H007)</h3> <h3 class="unnumbered">D7 (H007)</h3>
<pre class="r"><code> displayFunction1(litdata, &quot;D7&quot;)</code></pre> <pre class="r"><code>displayFunction1(litdata, &quot;D7&quot;)</code></pre>
<pre><code>## # A tibble: 24 × 3 <pre><code>## # A tibble: 24 × 3
## value n percentage ## value n percentage
## &lt;dbl&gt; &lt;int&gt; &lt;dbl&gt; ## &lt;dbl&gt; &lt;int&gt; &lt;dbl&gt;
@ -2390,7 +2374,7 @@ gibt.</p>
</div> </div>
<div id="d8-h008" class="section level3 unnumbered"> <div id="d8-h008" class="section level3 unnumbered">
<h3 class="unnumbered">D8 (H008)</h3> <h3 class="unnumbered">D8 (H008)</h3>
<pre class="r"><code> displayFunction1(litdata, &quot;D8&quot;)</code></pre> <pre class="r"><code>displayFunction1(litdata, &quot;D8&quot;)</code></pre>
<pre><code>## # A tibble: 3 × 3 <pre><code>## # A tibble: 3 × 3
## value n percentage ## value n percentage
## &lt;fct&gt; &lt;int&gt; &lt;dbl&gt; ## &lt;fct&gt; &lt;int&gt; &lt;dbl&gt;
@ -2407,7 +2391,7 @@ gibt.</p>
<pre class="r"><code>birthyears &lt;- litdata$D7 <pre class="r"><code>birthyears &lt;- litdata$D7
# remove NAs # remove NAs
birthyears &lt;- birthyears[!is.na(birthyears)] birthyears &lt;- birthyears[!is.na(birthyears)]
age &lt;- 2021-birthyears</code></pre> age &lt;- 2021 - birthyears</code></pre>
<div id="frequency" class="section level2" number="6.1"> <div id="frequency" class="section level2" number="6.1">
<h2><span class="header-section-number">6.1</span> Frequency</h2> <h2><span class="header-section-number">6.1</span> Frequency</h2>
<pre class="r"><code>freq(age)</code></pre> <pre class="r"><code>freq(age)</code></pre>
@ -2440,7 +2424,7 @@ age &lt;- 2021-birthyears</code></pre>
## NA 0 0 ## NA 0 0
## Total 106 100</code></pre> ## Total 106 100</code></pre>
<pre class="r"><code># with NA <pre class="r"><code># with NA
freq(2021-litdata$D7)</code></pre> freq(2021 - litdata$D7)</code></pre>
<pre><code>## frequency percent frequency_sum percent_sum <pre><code>## frequency percent frequency_sum percent_sum
## 19 1 0.94 1 0.94 ## 19 1 0.94 1 0.94
## 20 2 1.89 3 2.83 ## 20 2 1.89 3 2.83
@ -2492,7 +2476,7 @@ Mittelwert</h3>
</div> </div>
<div id="spannweite" class="section level3" number="6.2.4"> <div id="spannweite" class="section level3" number="6.2.4">
<h3><span class="header-section-number">6.2.4</span> Spannweite</h3> <h3><span class="header-section-number">6.2.4</span> Spannweite</h3>
<pre class="r"><code>max(age)-min(age)</code></pre> <pre class="r"><code>max(age) - min(age)</code></pre>
<pre><code>## [1] 31</code></pre> <pre><code>## [1] 31</code></pre>
</div> </div>
<div id="quartilsabstand" class="section level3" number="6.2.5"> <div id="quartilsabstand" class="section level3" number="6.2.5">
@ -2643,7 +2627,7 @@ sozialen Herkunft 5.6, er ist also genauso hoch wie in der
Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung
in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem
Wert darunter?</p> Wert darunter?</p>
<pre class="r"><code>pnorm(5.6, mean = 5.1, sd=2.0)</code></pre> <pre class="r"><code>pnorm(5.6, mean = 5.1, sd = 2.0)</code></pre>
<pre><code>## [1] 0.5987063</code></pre> <pre><code>## [1] 0.5987063</code></pre>
</div> </div>
</div> </div>

View File

@ -20,11 +20,11 @@ knitr::opts_chunk$set(echo = TRUE)
# Preparation # Preparation
```{r, message=FALSE} ```{r, message=FALSE}
if (!require(tidyverse)){ if (!require(tidyverse)) {
install.packages("tidyverse") install.packages("tidyverse")
library(tidyverse) library(tidyverse)
} }
if (!require(moments)){ if (!require(moments)) {
install.packages("moments") install.packages("moments")
library(moments) library(moments)
} }
@ -34,28 +34,26 @@ if (!require(moments)){
## Frequency Table ordered from wish.com ## Frequency Table ordered from wish.com
```{r} ```{r}
freq <- function(data){ freq <- function(data) {
na_count = length(data[is.na(data)]) na_count <- length(data[is.na(data)])
valid_count = length(data)-na_count valid_count <- length(data) - na_count
frequency <- table(data) frequency <- table(data)
p <- prop.table(frequency) p <- prop.table(frequency)
percent <- round(p*100, digits = 2) percent <- round(p * 100, digits = 2)
frequency_sum <- cumsum(frequency) frequency_sum <- cumsum(frequency)
hkum <- cumsum(p) hkum <- cumsum(p)
percent_sum <- round(hkum*100, digits = 2) percent_sum <- round(hkum * 100, digits = 2)
freq_table <- cbind(frequency, percent, frequency_sum, percent_sum) freq_table <- cbind(frequency, percent, frequency_sum, percent_sum)
valid_percent <- round(valid_count / length(data)*100, digits = 2) valid_percent <- round(valid_count / length(data) * 100, digits = 2)
na_percent <- round(na_count / length(data)*100, digits = 2) na_percent <- round(na_count / length(data) * 100, digits = 2)
print(freq_table) print(freq_table)
count <- c(valid_count, na_count, valid_count+na_count) count <- c(valid_count, na_count, valid_count + na_count)
percent <- c(valid_percent, na_percent, valid_percent+na_percent) percent <- c(valid_percent, na_percent, valid_percent + na_percent)
totall <- c(valid_count+na_count, valid_percent+na_percent)
df <- data.frame(count, percent, row.names = c("valid", "NA", "Total")) df <- data.frame(count, percent, row.names = c("valid", "NA", "Total"))
print(df) print(df)
} }
``` ```
*Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted* *Source: https://tellmi.psy.lmu.de/tutorials/deskriptive-statistiken-und-grafiken.html#haeufigkeiten-diskret and adapted*
@ -63,9 +61,9 @@ freq <- function(data){
## Modus ## Modus
```{r} ```{r}
getmode <- function(v) { getmode <- function(v) {
uniqv <- unique(v) uniqv <- unique(v)
x <- tabulate(match(v, uniqv)) x <- tabulate(match(v, uniqv))
uniqv[which(x==max(x))] uniqv[which(x == max(x))]
} }
``` ```
@ -97,7 +95,7 @@ head(litdata)
# Data cleaning # Data cleaning
## Converting Strings to numbers and *Keine Antwort* zu *NaN* ## Converting Strings to numbers and *Keine Antwort* zu *NaN*
``` {r} ``` {r}
litdata <- litdata %>% litdata <- litdata %>%
mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>% mutate_all(~ replace(., . == "Stimme voll zu5", 5)) %>%
mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>% mutate_all(~ replace(., . == "Stimme überhaupt nicht zu1", 1)) %>%
mutate_all(~ replace(., . == "Keine Antwort-", NaN)) mutate_all(~ replace(., . == "Keine Antwort-", NaN))
@ -106,17 +104,17 @@ litdata <- litdata %>%
## Make it numeric ## Make it numeric
The following code will **NOT** be run. The Idea is to show a way to automatically edit all columns. It works but some columns are NOT numeric. The following code will **NOT** be run. The Idea is to show a way to automatically edit all columns. It works but some columns are NOT numeric.
```{r, eval=FALSE} ```{r, eval=FALSE}
# All colnames that exist # All colnames that exist
litdataColnames <- colnames(litdata) litdataColnames <- colnames(litdata)
# the ones we don't want to change # the ones we don't want to change
litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed") litdataNonNumericCols <- c("submitdate", "startlanguage", "startdate", "datestamp", "lastpage", "seed")
# the colnames that should be changed # the colnames that should be changed
litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)] litdataColsToMakeNumeric <- litdataColnames[!(litdataColnames %in% litdataNonNumericCols)]
print(litdataColsToMakeNumeric) print(litdataColsToMakeNumeric)
litdataColsToMakeNumeric <- c("R1") litdataColsToMakeNumeric <- c("R1")
for (col in litdataColsToMakeNumeric) { for (col in litdataColsToMakeNumeric) {
litdata[[col]] <- as.numeric(litdata[[col]]) litdata[[col]] <- as.numeric(litdata[[col]])
} }
``` ```
First we rename all the columns First we rename all the columns
@ -131,7 +129,6 @@ litdata <- litdata %>% rename(
"A7" = "W007", "A7" = "W007",
"A8" = "W008", "A8" = "W008",
"A9" = "W009", "A9" = "W009",
"B1" = "K001", "B1" = "K001",
"B2" = "K002", "B2" = "K002",
"B3" = "K003", "B3" = "K003",
@ -141,37 +138,30 @@ litdata <- litdata %>% rename(
"B7" = "K007", "B7" = "K007",
"B8" = "K008", "B8" = "K008",
"B9" = "K009", "B9" = "K009",
"C1_1" = "TK001_01", "C1_1" = "TK001_01",
"C1_2" = "TK001_02", "C1_2" = "TK001_02",
"C1_3" = "TK001_03", "C1_3" = "TK001_03",
"C1_4" = "TK001_04", "C1_4" = "TK001_04",
"C2_1" = "TK002_01", "C2_1" = "TK002_01",
"C2_2" = "TK002_02", "C2_2" = "TK002_02",
"C2_3" = "TK002_03", "C2_3" = "TK002_03",
"C2_4" = "TK002_04", "C2_4" = "TK002_04",
"C3_1" = "TK003_01", "C3_1" = "TK003_01",
"C3_2" = "TK003_02", "C3_2" = "TK003_02",
"C3_3" = "TK003_03", "C3_3" = "TK003_03",
"C3_4" = "TK003_04", "C3_4" = "TK003_04",
"C4_1" = "TK004_01", "C4_1" = "TK004_01",
"C4_2" = "TK004_02", "C4_2" = "TK004_02",
"C4_3" = "TK004_03", "C4_3" = "TK004_03",
"C4_4" = "TK004_04", "C4_4" = "TK004_04",
"C5_1" = "TK005_01", "C5_1" = "TK005_01",
"C5_2" = "TK005_02", "C5_2" = "TK005_02",
"C5_3" = "TK005_03", "C5_3" = "TK005_03",
"C5_4" = "TK005_04", "C5_4" = "TK005_04",
"C6_1" = "TK006_01", "C6_1" = "TK006_01",
"C6_2" = "TK006_02", "C6_2" = "TK006_02",
"C6_3" = "TK006_03", "C6_3" = "TK006_03",
"C6_4" = "TK006_04", "C6_4" = "TK006_04",
"D1_1" = "H001_001", "D1_1" = "H001_001",
"D1_2" = "H001_002", "D1_2" = "H001_002",
"D1_3" = "H001_003", "D1_3" = "H001_003",
@ -179,23 +169,15 @@ litdata <- litdata %>% rename(
"D1_5" = "H001_005", "D1_5" = "H001_005",
"D1_6" = "H001_006", "D1_6" = "H001_006",
"D1_7" = "H001_007", "D1_7" = "H001_007",
"D2" = "H002", "D2" = "H002",
"D3" = "H003", "D3" = "H003",
"D4" = "H004", "D4" = "H004",
"D4_comment" = "H004_other", "D4_comment" = "H004_other",
"D5" = "H005", "D5" = "H005",
"D5_comment" = "H005_other", "D5_comment" = "H005_other",
"D6" = "H006", "D6" = "H006",
"D7" = "H007", "D7" = "H007",
"D8" = "H008", "D8" = "H008",
"E1" = "R1" "E1" = "R1"
) )
``` ```
@ -289,7 +271,6 @@ litdata$D7 <- as.numeric(litdata$D7)
litdata$D8 <- as.factor(litdata$D8) litdata$D8 <- as.factor(litdata$D8)
# skipping E1 because it's a free text # skipping E1 because it's a free text
``` ```
@ -321,44 +302,46 @@ displayFunction1 <- function(table, column) {
tmp <- rename(tmp, value = all_of(column)) tmp <- rename(tmp, value = all_of(column))
tmp <- tmp %>% tmp <- tmp %>%
count(value) %>% count(value) %>%
mutate(percentage = prop.table(n)*100) mutate(percentage = prop.table(n) * 100)
print(tmp, n = 100) print(tmp, n = 100)
ggplot(tmp, ggplot(
aes(x = value, y=n)) + tmp,
geom_bar(stat = "identity") + aes(x = value, y = n)
theme(axis.text.x = element_text(angle = 45, hjust = 1)) ) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
} }
``` ```
### A3 (W003) {-} ### A3 (W003) {-}
```{r} ```{r}
displayFunction1(litdata, "A3") displayFunction1(litdata, "A3")
``` ```
### B3 (K003) {-} ### B3 (K003) {-}
```{r} ```{r}
displayFunction1(litdata, "B3") displayFunction1(litdata, "B3")
``` ```
### D1_1 (H001_001) {-} ### D1_1 (H001_001) {-}
```{r} ```{r}
displayFunction1(litdata, "D1_1") displayFunction1(litdata, "D1_1")
``` ```
### D5 (H005) {-} ### D5 (H005) {-}
```{r} ```{r}
displayFunction1(litdata, "D5") displayFunction1(litdata, "D5")
``` ```
### D7 (H007) {-} ### D7 (H007) {-}
```{r} ```{r}
displayFunction1(litdata, "D7") displayFunction1(litdata, "D7")
``` ```
Die Warnung resultiert daraus, dass es sehr viele *NA* gibt. Die Warnung resultiert daraus, dass es sehr viele *NA* gibt.
### D8 (H008) {-} ### D8 (H008) {-}
```{r} ```{r}
displayFunction1(litdata, "D8") displayFunction1(litdata, "D8")
``` ```
# Selbststudium 2.1 # Selbststudium 2.1
@ -368,14 +351,14 @@ We have the year 2021
birthyears <- litdata$D7 birthyears <- litdata$D7
# remove NAs # remove NAs
birthyears <- birthyears[!is.na(birthyears)] birthyears <- birthyears[!is.na(birthyears)]
age <- 2021-birthyears age <- 2021 - birthyears
``` ```
## Frequency ## Frequency
```{r} ```{r}
freq(age) freq(age)
# with NA # with NA
freq(2021-litdata$D7) freq(2021 - litdata$D7)
``` ```
## Selbststudium 2 ## Selbststudium 2
@ -399,7 +382,7 @@ mean(age)
### Spannweite ### Spannweite
```{r} ```{r}
max(age)-min(age) max(age) - min(age)
``` ```
### Quartilsabstand ### Quartilsabstand
@ -511,6 +494,6 @@ pnorm(5.1, mean = 5.6, sd = 1.8)
In einem Bündner Ort beträgt der Mittelwert auf der Skala zur sozialen Herkunft 5.6, er ist also genauso hoch wie in der Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem Wert darunter? In einem Bündner Ort beträgt der Mittelwert auf der Skala zur sozialen Herkunft 5.6, er ist also genauso hoch wie in der Gesamtschweiz. Was ist zur Lage dieses Wertes bezogen auf die Verteilung in Graubünden zu sagen? Also: Wie viele Schüler in GR liegen mit ihrem Wert darunter?
```{r} ```{r}
pnorm(5.6, mean = 5.1, sd=2.0) pnorm(5.6, mean = 5.1, sd = 2.0)
``` ```