
## ------------------------------------------------------------------------
library(mosaic)
library(tidyverse)
library(stringr)
library(car)

data(stats_test, package = "pradadata")
data(flights, package = "nycflights13")
data(stats_test, package = "pradadata")
data(profiles, package = "okcupiddata")
data(airlines, package = "nycflights13")

## ----filter-ex, eval = FALSE---------------------------------------------
df_frauen <- filter(profiles, sex == "f")  # nur die Frauen
df_alt <- filter(profiles, age > 70)  # nur die alten Menschen
# nur die alten Frauen, d.h. UND-Verknüpfung:
df_alte_frauen <- filter(profiles, age > 70, sex == "f")
# zwischen (between) 35 und 60:
df_mittelalt <- filter(profiles, between(age, 35, 60))
# liefert alle Personen, die Nicht-Raucher *oder* Nicht-Trinker sind:
df_nosmoke_nodrinks <- filter(profiles,
                              smokes == "no" | drinks == "not at all")

## ----eval = FALSE--------------------------------------------------------
# dplyr:
filter(profiles, age > 70, sex == "f", drugs == "sometimes")

# base-R:
profiles[profiles$age > 70 & profiles$sex == "f" &
           profiles$drugs == "sometimes", ]


## ----eval = FALSE--------------------------------------------------------
filter(profiles, body_type %in% c("a little extra", "average"))

## ----eval = FALSE--------------------------------------------------------
filter(profiles, str_detect(pets, "cats"))

## ----eval = FALSE--------------------------------------------------------
profiles_keine_nas <- drop_na(profiles)

## ----eval = FALSE--------------------------------------------------------
profiles_keine_nas2 <- drop_na(profiles, income, sex)
filter(profiles_keine_nas2)


## ----eval = FALSE--------------------------------------------------------
select(stats_test, score)  # Spalte `score` auswählen
select(stats_test, score, study_time)
# Spalten `score` und `study_time` auswählen

select(stats_test, score:study_time) # dito
select(stats_test, 5:6)  # Spalten 5 bis 6 auswählen

## ----select-one-of, eval = FALSE-----------------------------------------
vars <- c("score", "study_time")
select(stats_test, one_of(vars))

## `select()` wählt *Zeilen* aus.

## ----arrange-demo, eval = FALSE------------------------------------------

arrange(stats_test, score) # *schlechteste* Noten zuerst
arrange(stats_test, -score) # *beste* Noten zuerst
arrange(stats_test, interest, score)  # zwei Sortierkriterien


## ----arrange-demo2, echo = FALSE-----------------------------------------
stats_test %>%
  select(-c(row_number, date_time)) %>%
  arrange(-score) %>%
  head(2)
arrange(stats_test, score)  %>% head(2) # liefert die *schlechtesten* Noten zurück


## ----demo-groupby-no-eval, eval = FALSE----------------------------------
test_gruppiert <- group_by(stats_test, interest)
test_gruppiert

## ----demo-groupby, eval = TRUE-------------------------------------------
test_gruppiert <- group_by(stats_test, interest)
select(test_gruppiert, study_time, interest, score) %>% head(4)




## ------------------------------------------------------------------------
summarise(stats_test, mean(score))

## ------------------------------------------------------------------------
test_gruppiert <- group_by(stats_test, interest)
summarise(test_gruppiert, mean(score, na.rm = TRUE))

## ------------------------------------------------------------------------
test_gruppiert <- group_by(stats_test, interest)
summarise(test_gruppiert, mw_pro_gruppe = mean(score, na.rm = TRUE))

## Merke: Mit summarise kann man eine Spalte eines Dataframes zu einem Wert zusammenfassen.

## ------------------------------------------------------------------------
summarise(stats_test, n())
summarise(test_gruppiert, n())
nrow(stats_test)

## ------------------------------------------------------------------------
dplyr::count(stats_test, interest)

## ----eval = FALSE--------------------------------------------------------
dplyr::count(stats_test, study_time, sort = TRUE)
dplyr::count(stats_test, interest, study_time)

## ----eval = FALSE--------------------------------------------------------
filter(summarise(group_by(filter(stats_test,  # oh je
       !is.na(score)), interest), mw = mean(score)), mw > 30)

## ------------------------------------------------------------------------
stats_test %>%   # oh ja
  filter(!is.na(score)) %>%
  group_by(interest) %>%
  summarise(mw = mean(score)) %>%
  filter(mw > 30)


## ------------------------------------------------------------------------
stats_test %>%
  select(bestanden, interest, score) %>%
  mutate(Streber = score > 38) %>%
  head()


## ----ungetuem, eval = FALSE----------------------------------------------
bestanden_gruppen <-
  filter(
    summarise(
      group_by(filter(select(stats_test,
                             -c(row_number, date_time)),
                      bestanden == "ja"), interest),
      Punkte = mean(score), n = n()))

## ----ungetuem-easy, eval = FALSE-----------------------------------------
stats_test %>%
  select(-row_number, -date_time) %>%
  filter(bestanden == "ja") %>%
  group_by(interest) %>%
  summarise(Punkte = mean(score),
            n = n())


## ----eval = FALSE--------------------------------------------------------
stats_test %>%
  select(score) %>%
  mutate(score_delta = score - mean(.$score)) %>%
  mutate(score_delta_squared = score_delta^2) %>%
  summarise(score_var = mean(score_delta_squared)) %>%
  summarise(sqrt(score_var))

## ----suffix-if-----------------------------------------------------------
stats_test %>%
  summarise_if(is.numeric, mean, na.rm = TRUE)

## Nimm die Tabelle "stats_test" UND DANN

## ----suffix-all----------------------------------------------------------
stats_test %>%
  summarise_all(funs(is.na(.) %>% sum))

## Nimm die Tabelle "stats_test" UND DANN

## ------------------------------------------------------------------------
stats_test %>%
  select(-date_time) %>%
  summarise_at(.vars = vars(study_time, self_eval),
               .funs = max,
               na.rm = TRUE) %>%
  head(3)

## ------------------------------------------------------------------------
stats_test %>%
  drop_na() %>%
  mutate_at(.vars = vars(study_time, self_eval, interest),
            .funs = funs(prop =  ./max(.))) %>%
  select(contains("_prop"))



## ------------------------------------------------------------------------
flights %>%
  select(carrier) %>%
  head(3)

## ------------------------------------------------------------------------
head(airlines, 3)

## ------------------------------------------------------------------------
flights %>%
  inner_join(airlines, by = "carrier") -> flights_joined

head(flights_joined$name)


