-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2-Data.qmd
108 lines (90 loc) · 2.61 KB
/
2-Data.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Data
Regression von AFD Wahlergebnissen und Mindestlohnbeziehern in einem Kreis
## Import der Daten
```{r}
library(tidyverse)
```
```{r}
mindestlohn <- read_csv2("data/WSI_mindestlohn.csv")
wahlergebnisse <- read_csv2("data/Wahlergebnisse.csv")
```
Clean up the data
```{r}
wahl <- wahlergebnisse %>%
rename(
"kreis" = "...2",
"waehler" = "...5",
"afd_stimmen" = "...57",
"linke_stimmen" = "...59",
"name" = "...3",
"land" = "© Der Bundeswahlleiter, Wiesbaden 2022"
) %>%
mutate(
afd_prozent = as.numeric(afd_stimmen) / as.numeric(waehler),
linke_prozent = as.numeric(linke_stimmen) / as.numeric(waehler),
waehler = as.numeric(waehler),
afd_stimmen = as.numeric(afd_stimmen),
linke_stimmen = as.numeric(linke_stimmen)
) %>% # calculate afd percentage
select(kreis, name, land, waehler, afd_stimmen, afd_prozent, linke_prozent) %>%
slice(6:nrow(.)) %>%#remove unused rows with metadata and total germany
filter(!is.na(kreis))
#calculater for berlin as whole from east (11100) and west (11200) to 11000
wahl <- wahl %>%
add_row(
kreis = "11000",
name = "Berlin",
land = "BE",
waehler = wahl$waehler[wahl$kreis == "11100"] + wahl$waehler[wahl$kreis == "11200"],
afd_stimmen = wahl$afd_stimmen[wahl$kreis == "11100"] + wahl$afd_stimmen[wahl$kreis == "11200"],
afd_prozent = (afd_stimmen / waehler)
) %>%
filter(!kreis %in% c("11100", "11200")) #remove east and west berlin
```
```{r}
lohn <- mindestlohn %>%
rename(
"kreis" = "...1",
"name" = "Region",
"lohn_prozent" = "Anteil in Prozent der Beschäftigten mit Mindestlohn-anspruch"
) %>%
select(kreis, name, lohn_prozent) %>%
# select kreis with 4 or 5 digits
filter(
nchar(kreis) == 4 | nchar(kreis) == 5
) %>%
# add leading zero to kreis with 4 digits
mutate(
kreis = ifelse(
nchar(kreis) == 4,
paste0("0", kreis),
kreis)
)
```
Show difference between the two datasets
```{r}
wahl %>%
filter(!kreis %in% lohn$kreis) %>%
select(kreis, name) %>%
arrange(kreis)
```
Hamburg is missing, get data from WSI website
```{r}
lohn <- lohn %>%
add_row(kreis = "02000", name = "Hamburg", lohn_prozent = 14.7) %>%
add_row(kreis = "11000", name = "Berlin", lohn_prozent = 17.8)
```
Merge the two datasets
```{r}
wahl_lohn <- wahl %>%
left_join(lohn, by = "kreis") %>%
mutate(
lohn_prozent = lohn_prozent/100,
name = name.x
) %>%
select(kreis, name, land, afd_prozent, lohn_prozent, linke_prozent)
```
Regression in next file, save the data frame
```{r}
saveRDS(wahl_lohn, "data/wahl_lohn.rds") #for leading zeros and reimporting
```