

## ----libs-flights--------------------------------------------------------
library(tidyverse)
library(viridis)
library(GGally)
library(corrr)

data(flights, package = "nycflights13")

## ------------------------------------------------------------------------
flights %>%
  arrange(arr_delay)

## ------------------------------------------------------------------------
flights %>%
  arrange(arr_delay) %>%
  select(arr_delay, carrier, month, day, dep_time, flight, dest)

## ----max-arr-delay-stats-------------------------------------------------
flights %>%
  arrange(-arr_delay) %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  filter(row_number() < 11)

## ----max-arr-delay-stats-per-carrier-------------------------------------
flights %>%
  arrange(-arr_delay) %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  group_by(carrier) %>%
  filter(row_number() < 4)

## ----max-arr-delay-stats-per-carrier2------------------------------------
flights %>%
  arrange(-arr_delay) %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  group_by(carrier) %>%
  filter(row_number() < 4) %>%
  arrange(carrier)

## ----mean-arr-delay------------------------------------------------------
flights %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  group_by(carrier) %>%
  summarise(delay_mean = mean(arr_delay, na.rm = TRUE)) %>%
  arrange(-delay_mean) %>%
  head

## ----cor-overall---------------------------------------------------------
cor(flights$arr_delay, flights$distance, use = "complete.obs")

## ----warnings = FALSE----------------------------------------------------
flights %>%
  group_by(carrier) %>%
  summarise(cor_delay_dist = cor(arr_delay, distance, use = "complete.obs")) %>%
  filter(abs(cor_delay_dist) > .2)

## ------------------------------------------------------------------------
flights %>%
  select(-year) %>%
  select_if(is.numeric) %>%
  correlate() %>%
  focus(arr_delay) %>%
  filter(abs(arr_delay) > .2)

## ----max-arr-delay-stats-bar-plot-no-eval, eval = FALSE------------------
flights %>%
  arrange(-arr_delay) %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  filter(between(row_number(),1, 10)) %>%
  ggplot() +
  aes(x = reorder(tailnum, -arr_delay), y = arr_delay, fill = arr_delay) +
  geom_col() +
  scale_fill_viridis(direction = -1)


## ----max-arr-delay-stats-bar-plot, eval = TRUE, echo = FALSE, fig.cap = "Verspätung von Flügen", out.width = "100%", cache = FALSE----
library(tidyverse)
library(viridis)

p_delay1 <- flights %>%
  arrange(-arr_delay) %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  filter(between(row_number(),1, 10)) %>%
  ggplot() +
  aes(x = reorder(tailnum, -arr_delay), y = arr_delay, fill = arr_delay) +
  geom_col() +
  coord_flip() +
  scale_fill_viridis(direction = -1) +
  labs(x = "Flugzeug",
       y = "Verspätung",
       title = "Top-10 der verspäteten Flugzeuge")  +
  theme(title = element_text(size = rel(0.5)),
        axis.text = element_text(size = rel(0.5)))

p_delay2 <- flights %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  group_by(carrier) %>%
  summarise(delay_mean = mean(arr_delay, na.rm = TRUE)) %>%
  arrange(-delay_mean) %>%
  filter(between(row_number(),1, 10)) %>%
  ggplot() +
  aes(x = reorder(carrier, -delay_mean), y = delay_mean) +
  geom_point() +
  labs(x = "Fluggesellschaft",
       y = "Verspätung",
       fill = "Verspätung",
       title = "Top-10 der Verspätungs-Airlines")   +
  theme(title = element_text(size = rel(0.5)),
        axis.text = element_text(size = rel(0.5)))

gridExtra::grid.arrange(p_delay1, p_delay2, nrow = 1)

## ------------------------------------------------------------------------
flights %>%
  select(arr_delay, carrier, month, day, dep_time, tailnum, flight, dest) %>%
  group_by(carrier) %>%
  summarise(delay_md = median(arr_delay, na.rm = TRUE),
            delay_mean = mean(arr_delay, na.rm = TRUE),
            delay_sd = sd(arr_delay, na.rm = TRUE),
            delay_iqr_lower = quantile(arr_delay, na.rm = TRUE, probs = .25),
            delay_iqr_upper = quantile(arr_delay, na.rm = TRUE, probs = .75),
            delay_count = n()) %>%
  ungroup() -> flights_summary

## ----mean-arr-delay-plot2-no-eval, eval = FALSE--------------------------
flights_summary %>%
  ggplot() +
  aes(x = reorder(carrier, -delay_mean), y = delay_mean) +
  geom_point() +
  labs(title = "Verspätung nach Fluggesellschaft",
       x = "Fluggesellschaft",
       y = "Verspätung") +
  coord_flip() +
  theme(title = element_text(size = rel(0.7))) -> p_delay2

## ----mean-arr-delay-plot3------------------------------------------------
p_delay3 <- flights_summary %>%
  ggplot() +
  aes(x = reorder(carrier, -delay_md),
      y = delay_md) +
  geom_point(aes(size = delay_count,
                 color = delay_count)) +
  geom_errorbar(aes(ymin = delay_iqr_lower,
                    ymax = delay_iqr_upper),
                color = "grey60") +
  labs(title = "Verspätung nach Fluggesellschaft",
       x = "Fluggesellschaft",
       y = "Median der Verspätung",
       caption = paste0("Größe und Farbe der Punkte",
                        " spiegeln die Anzahl der Flüge wider")) +
  theme(title = element_text(size = rel(0.7)))

## ----ggpairs-flights, fig.cap = "Streudiagramm-Matrix", fig.asp = .7, out.width="100%"----
flights %>%
  select(arr_delay, air_time, distance, dep_time, carrier) %>%
  filter(carrier %in% c("F9", "AS")) %>%
  ggpairs(aes(color = carrier, fill = carrier))

## ------------------------------------------------------------------------
praise::praise()


