Skip to contents

Counts

A first step in many transportation safety analyses involves counting the number of relevant crashes, fatalities, or people involved. counts() lets users specify what to count, where to count them (rural/urban and/or in specified states or regions), who to include, the interval over which to count (annually or monthly), and factors involved in the crashes. It returns a simple tibble that can be easily piped into ggplot() to quickly visualize counts.

Generating Counts

First we load the required libraries:

Then pull a year of FARS data for Virginia:

myFARS <- get_fars(years = 2021, states = "VA", proceed = T)
## ✓ 2021 data downloaded
## Preparing raw data files...
## ✓ Accident file processed
## ✓ Vehicle file processed
## ✓ Person file processed
## ✓ Weather file(s) processed
## ✓ Crash risk factors file processed
## ✓ Vehicle-level files processed
## ✓ PBtype file processed
## ✓ SafetyEq file processed
## ✓ Person-level files processed
## ✓ Flat file constructed
## ✓ Multi_acc file constructed
## ✓ Multi_veh file constructed
## ✓ Multi_per file constructed
## ✓ SOE file constructed
## ✓ Prepared files saved in C:/Users/STEVEJ~1/AppData/Local/Temp/RtmpGmtyS1/FARS data/prepd/2021
## ✓ Codebook file saved in C:/Users/STEVEJ~1/AppData/Local/Temp/RtmpGmtyS1/FARS data/prepd/

Then we can use counts() to reduce the data to desired counts.

Here we count crashes:

my_counts <- counts(
  myFARS,
  what = "crashes",
  interval = c("month")
  )

This returns the following dataframe:

knitr::kable(my_counts, format = "html")
month date n what states region urb who
April 2021-04-01 83 crashes all all all all
August 2021-08-01 77 crashes all all all all
December 2021-12-01 74 crashes all all all all
February 2021-02-01 47 crashes all all all all
January 2021-01-01 50 crashes all all all all
July 2021-07-01 100 crashes all all all all
June 2021-06-01 65 crashes all all all all
March 2021-03-01 57 crashes all all all all
May 2021-05-01 87 crashes all all all all
November 2021-11-01 79 crashes all all all all
October 2021-10-01 108 crashes all all all all
September 2021-09-01 79 crashes all all all all

Which we can graph:

my_counts %>%
  ggplot(aes(x=date, y=n, label=scales::comma(n))) + 
    geom_col() + 
    geom_label(vjust=1.2) +
    labs(x=NULL, y=NULL, title = "Fatal Crashes in Virginia")

We could alternatively count fatalities:

counts(
  myFARS,
  what = "fatalities",
  interval = c("month")
  ) %>%
  ggplot(aes(x=date, y=n, label=scales::comma(n))) + 
    geom_col() + 
    geom_label(vjust=1.2) +
    labs(x=NULL, y=NULL, title = "Fatalities in Virginia")

Or fatalities involving speeding:

counts(myFARS,
       what = "fatalities",
       interval = c("month"),
       involved = "speeding"
       ) %>%
  ggplot(aes(x=date, y=n, label=scales::comma(n))) + 
    geom_col() + 
    geom_label(vjust=1.2) +
    labs(x=NULL, y=NULL, title = "Speeding-Related Fatalities in Virginia")

Or fatalities involving speeding in rural areas:

counts(myFARS,
       what = "fatalities",
       where = list(urb="rural"),
       interval = c("month"),
       involved = "speeding"
       ) %>%
  ggplot(aes(x=date, y=n, label=scales::comma(n))) + 
    geom_col() + 
    geom_label(vjust=1.2) +
    labs(x=NULL, y=NULL, title = "Speeding-Related Fatalities in Rural Virginia")

Comparing Counts

We can use compare_counts() to quickly produce comparison graphs.

Here we compare speeding-related fatalities in rural and urban areas:

compare_counts(
  df = myFARS,
  interval = "month",
  involved = "speeding",
  what = "fatalities",
  where = list(urb="rural"),
  where2 = list(urb="urban")
  ) %>%
  ggplot(aes(x=date, y=n, label=scales::comma(n))) + 
    geom_col() + 
    geom_label(vjust=1.2) +
    facet_wrap(.~urb) +
    labs(x=NULL, y=NULL, title = "Speeding-Related Fatalities in Virginia", fill=NULL)

And here we compare speeding-related crashes to those related to distraction:

compare_counts(
  df = myFARS,
  interval = "month",
  involved = "speeding",
  involved2 = "distracted driver",
  what = "crashes",
  ) %>%
  ggplot(aes(x=date, y=n, label=scales::comma(n))) + 
    geom_col() + 
    geom_label(vjust=1.2) +
    facet_wrap(.~involved) +
    labs(x=NULL, y=NULL, title = "Speeding- and Distraction-Related Crashes in Virginia", fill=NULL)

annual_counts data table

rfars includes annual_counts, a table of annual crash counts and related metrics:

rfars::annual_counts %>%
  filter(involved != "general") %>%
  replace_na(list(type="")) %>%
  unite(type, what, col="what2", sep = " ", remove = T) %>%
  mutate_at(c("involved", "what2"), stringr::str_to_sentence) %>%

  ggplot(aes(x=year, y=n)) +
  geom_line() +
  facet_grid(what2~involved, scales="free_y", labeller = label_wrap_gen(width = 10)) +
  scale_y_continuous(label=scales::comma, expand = expansion()) +
  scale_x_continuous(labels = seq(2014, 2023, 2), breaks = seq(2014, 2023, 2), expand = expansion()) +
  labs(x=NULL, y=NULL, title = "Crash Types Over Time") +
  theme(
    strip.text.y.right = element_text(angle=0),
    axis.ticks = element_blank(),
    axis.text.x.bottom = element_text(angle=45, hjust=1, size=8)
    )

rfars::annual_counts %>%
  filter(involved == "general") %>%
  replace_na(list(type="")) %>%
  unite(type, what, col="what2", sep = " ", remove = T) %>%
  mutate_at(c("involved", "what2"), stringr::str_to_sentence) %>%

  ggplot(aes(x=year, y=n)) +
  geom_line() +
  facet_grid(what2~., scales="free_y", labeller = label_wrap_gen(width = 10)) +
  scale_y_continuous(label=scales::comma, expand = expansion()) +
  scale_x_continuous(labels = seq(2014, 2023, 2), breaks = seq(2014, 2023, 2), expand = expansion()) +
  labs(x=NULL, y=NULL, title = "Crash Types Over Time") +
  theme(
    strip.text.y.right = element_text(angle=0),
    axis.ticks = element_blank()
  )

rfars::annual_counts %>%
  filter(involved %in% c("pedalcyclist", "bicyclist", "pedestrian", "pedbike")) %>%
  replace_na(list(type="")) %>%
  unite(type, what, col="what2", sep = " ", remove = T) %>%
  mutate_at(c("involved", "what2"), stringr::str_to_sentence) %>%

  ggplot(aes(x=year, y=n)) +
  geom_line() +
  facet_grid(what2~involved, scales="free_y", labeller = label_wrap_gen(width = 10)) +
  scale_y_continuous(label=scales::comma, expand = expansion()) +
  scale_x_continuous(labels = seq(2014, 2023, 2), breaks = seq(2014, 2023, 2), expand = expansion()) +
  labs(x=NULL, y=NULL, title = "Crash Types Over Time") +
  theme(
    strip.text.y.right = element_text(angle=0),
    axis.ticks = element_blank(),
    axis.text.x.bottom = element_text(angle=45, hjust=1, size=8)
  )