Plotting vowels with ggplot2 in R

2025-05-18

Slides:

🔗 kazmierski-ylmp-2025-vowels.netlify.app

Online exercises:

(free posit cloud account necessary)

🔗 posit.cloud/content/4139172

Code for slides (index.qmd) and offline exercises:

🔗 osf.io/h5yn2

Single-point measurements

word vowel f1 f2
blue 475 1740

What to expect today

  • Main tool: ggplot2 in
  • When extra package used: indicated
  • Specialized packages: vowels and phonR, not covered

mass_small <- read_rds("data/mass_ymlp_small.rds")
mass_small
# A tibble: 41,667 × 6
   speaker word      f1    f2 vowel environment
   <chr>   <chr>  <dbl> <dbl> <chr> <chr>      
 1 mass01  MOVED   332. 1286. uː    elsewhere  
 2 mass01  YEAH    792. 1841. æ     elsewhere  
 3 mass01  SCHOOL  450.  903. uː    pre-l      
 4 mass01  WENT    580  1865. e     <NA>       
 5 mass01  TO      418. 2268. uː    elsewhere  
 6 mass01  ITS     456. 2149. ɪ     <NA>       
 7 mass01  SCHOOL  419.  710. uː    pre-l      
 8 mass01  AREA    585. 2056. e     <NA>       
 9 mass01  DROVE   561  1310. oʊ    elsewhere  
10 mass01  YOUNG   674. 1528. ʌ     <NA>       
# ℹ 41,657 more rows

Plot all tokens as dots (1st attempt)

ggplot(mass_small, aes(f2, f1)) +
  geom_point()

Plot all tokens as dots (1st attempt)

Plot all tokens as dots (2nd attempt)

ggplot(mass_small, aes(f2, f1)) +
  geom_point() +
  scale_x_reverse() +
  scale_y_reverse()

Plot all tokens as dots (2nd attempt)

Calculate and plot means

# Create a table with means
vowel_means <- mass_small |> 
  reframe(mean_f1 = mean(f1), mean_f2 = mean(f2), 
          .by = vowel)

# Plot means as dots
ggplot(vowel_means, aes(mean_f2, mean_f1)) +
  geom_point() +
  scale_x_reverse() +
  scale_y_reverse()

Calculate and plot means

Means as IPA symbols

ggplot(vowel_means,
       aes(mean_f2, mean_f1)) +
  geom_text(
    aes(label = vowel),
    size = 6) +
  scale_x_reverse() +
  scale_y_reverse()

Means as IPA symbols

Means as IPA symbols in proper font

extrafont::loadfonts('win', quiet = TRUE)
ggplot(vowel_means, aes(mean_f2, mean_f1)) +
  geom_text(
    aes(label = vowel),
    family = "Charis SIL",
    size = 6) +
  scale_x_reverse() +
  scale_y_reverse()

Means as IPA symbols in proper font

IPA + dots (1st attempt)

ggplot(vowel_means, aes(mean_f2, mean_f1)) +
  geom_text(aes(label = vowel), family = "Charis SIL", size = 6) +
  geom_point() +
  scale_x_reverse() +
  scale_y_reverse()

IPA + dots (1st attempt)

IPA + dots (2nd attempt)

ggplot(vowel_means, aes(mean_f2, mean_f1)) +
  geom_text(aes(label = vowel), family = "Charis SIL", size = 6,
    nudge_y = 25) +
  geom_point() +
  scale_x_reverse() +
  scale_y_reverse()

IPA + dots (2nd attempt)

Pre-l retraction

Pre-l retraction

Use case: Pre-l retraction

Connect the dots :: 1st attempt

ggplot(vowel_means, aes(mean_f2, mean_f1)) +
  geom_path() +
  geom_text(aes(label = vowel), family = "Charis SIL", size = 6) +
  scale_x_reverse() +
  scale_y_reverse()

Connect the dots :: 1st attempt

Connect the dots :: 2nd attempt

vowel_means |> 
  mutate(vowel = factor(vowel, 
    levels = c(
      "iː", "ɪ", "eɪ", "e", "æ", "ʌ", "ɑː", "ɔː", "oʊ", "ʊ", "uː"
      ))) |> 
  arrange(vowel) |> 
  ggplot(aes(mean_f2, mean_f1)) +
  geom_path() +
  geom_text(aes(label = vowel), family = "Charis SIL", size = 6) +
  scale_x_reverse() +
  scale_y_reverse()

Connect the dots :: 2nd attempt

Connect the dots :: 3rd attempt

vowel_means |> 
  mutate(vowel = factor(vowel, 
    levels = c(
      "iː", "ɪ", "eɪ", "e", "æ", "ʌ", "ɑː", "ɔː", "oʊ", "ʊ", "uː"
      ))) |> 
  arrange(vowel) |> 
  ggplot(aes(mean_f2, mean_f1)) +
  geom_path() +
  geom_label(aes(label = vowel), family = "Charis SIL", size = 6) +
  scale_x_reverse() +
  scale_y_reverse(limits = c(800, 300))

Connect the dots :: 3rd attempt

Confidence ellipses

ggplot(mass_small, aes(f2, f1, col = vowel)) +
  stat_ellipse(level = 0.4) +
  scale_x_reverse() +
  scale_y_reverse() +
  
  # disable legend
  guides(color = "none") 

Confidence ellipses

Ellipses + means

ggplot(mass_small, aes(f2, f1, col = vowel)) +
  stat_ellipse(level = 0.4) +
  geom_text(data = vowel_means, #< another data table
    aes(mean_f2, mean_f1, label = vowel),
    family = "Charis SIL", size = 6) +
  scale_x_reverse() +
  scale_y_reverse() +
  # disable legend
  guides(color = "none")

Ellipses + means

Alternative 1: geom_density_2d()

ggplot(mass_small, aes(f2, f1, col = vowel)) +
  geom_density_2d() +
  scale_x_reverse() +
  scale_y_reverse() +
  theme(legend.position = 'none')

Alternative 1: geom_density_2d()

stat_ellipsis() vs geom_density_2d()

Alt. 2: ggdensity::stat_hdr()

library(ggdensity)
ggplot(mass_small, aes(
  f2, f1, fill = vowel)) +
  stat_hdr(probs = c(0.7, 0.5, 0.2)) +
  geom_text(data = vowel_means,
            aes(x = mean_f2,
                y = mean_f1,
                label = vowel),
            size = 6, col = "white",
            family = "Charis SIL"
            )+
  scale_x_reverse() +
  scale_y_reverse() +
  theme(legend.position = 'none')

Alt. 2: ggdensity::stat_hdr()

Zooming in on TRAP /æ/

ggplot(mass_small |> filter(vowel == "æ"), aes(f2, f1)) +
  stat_hdr(probs = c(0.7, 0.5, 0.2)) +
  scale_x_reverse() +
  scale_y_reverse() +
  guides(alpha = "none")

Pre-nasal TRAP-tensing

Pre-nasal TRAP-tensing

Use case: TRAP-tensing

ggplot(mass_small |> filter(vowel == "æ"), aes(f2, f1, 
           fill = environment)) +
  stat_hdr(probs = c(0.7, 0.5, 0.2)) +
  scale_x_reverse() + scale_y_reverse() + 
  scale_fill_manual(values = c("darkmagenta", "seagreen")) +
  guides(alpha = "none") + theme(legend.position = "top")

Use case: TRAP-tensing

Formant trajectories

mass <- read_rds("data/mass_ymlp.rds")
mass[1:5, ]
# A tibble: 5 × 8
  speaker word  vowel environment measurement_point    f1    f2 token
  <chr>   <chr> <chr> <chr>                   <int> <dbl> <dbl> <int>
1 mass01  MOVED uː    elsewhere                   1  343. 1384.     1
2 mass01  MOVED uː    elsewhere                   2  343. 1384.     1
3 mass01  MOVED uː    elsewhere                   3  323. 1108.     1
4 mass01  MOVED uː    elsewhere                   4  331. 1156.     1
5 mass01  MOVED uː    elsewhere                   5  331. 1197.     1

Single token of /uː/

mass[1:5, ] |> 
  ggplot(aes(x = measurement_point)) +
  geom_point(aes(y = f1), col = "pink", size = 3) +
  geom_path(aes(y = f1),  col = "pink") +
  geom_point(aes(y = f2), col = "skyblue", size = 3) +
  geom_path(aes(y = f2),  col = "skyblue") + 
  labs(y = "Hz")

Single token of /uː/

Each /uː/ token as a line

mass |> 
  filter(vowel == "uː") |> 
  ggplot(aes(x = measurement_point)) +
  geom_path(aes(y = f1, group = token), col = "pink",
            alpha = 0.05) +
  geom_path(aes(y = f2, group = token), col = "skyblue",
            alpha = 0.05) +
  labs(y = "Hz")

Each /uː/ token as a line

/uː/ broken down by environment

mass |> 
  filter(vowel == "uː") |> 
  ggplot(aes(x = measurement_point)) +
  geom_path(aes(y = f1, group = token), alpha = 0.05, col = "pink") +
  geom_path(aes(y = f2, group = token), alpha = 0.05, col = "skyblue") +
  facet_wrap(~environment) +
  labs(y = "Hz")

/uː/ broken down by environment

Smoother: ‘typical’ /uː/ formants

mass |> 
  filter(vowel == "uː") |> 
  ggplot(aes(x = measurement_point)) +
  geom_path(aes(y = f1, group = token), alpha = 0.04, col = "pink") +
  geom_path(aes(y = f2, group = token), alpha = 0.04, col = "skyblue") +
  geom_smooth(aes(y = f1), col = "pink2", method = "loess", 
              linewidth = 1, se = FALSE) +
  geom_smooth(aes(y = f2), col = "skyblue2", method = "loess", 
              linewidth = 1, se = FALSE) +
  labs(y = "Hz") +
  facet_wrap(~environment) +
  theme(panel.grid.major.x = element_blank())

Smoother

PRICE-raising

wide [waɪd]

white [wʌit]

Two formants :: PRICE-raising

mass |> 
  filter(vowel == "aɪ") |> 
  group_by(environment, measurement_point) |> 
  reframe(mean_f1 = mean(f1, na.rm = TRUE),
          mean_f2 = mean(f2, na.rm = TRUE)) |> 
  ggplot(aes(
    x = measurement_point, 
    col = environment)) +
  geom_path(aes(y = mean_f2), linewidth = 2) +
  geom_path(aes(y = mean_f1), linewidth = 2) +
  scale_color_manual(values = c("seagreen", "pink"))

Two formants :: PRICE-raising

Trajectories inside a vowel chart

mass |> 
  filter(vowel == "aɪ") |> 
  group_by(environment, measurement_point) |> 
  reframe(mean_f1 = mean(f1, na.rm = TRUE),
          mean_f2 = mean(f2, na.rm = TRUE)) |> 
  ggplot(aes(mean_f2, mean_f1, col = environment)) +
  geom_path() +
  scale_x_reverse() +
  scale_y_reverse()

Trajectories inside a vowel chart

Themes

Not shown, but applied throughout this presentation

library(hrbrthemes)
theme_set(theme_ipsum(base_family = "Titillium Web"))

Saving: plot, save, inspect, repeat

ggsave("plots/price_raising.png", dpi = 300, width = 5, height = 5)

RStudio: only a preview ❌

PNG made with ggsave()

Resources

☝️ Chapters 9-11

Thank you!

Slides:

🔗 kazmierski-ylmp-2025-vowels.netlify.app

Online exercises:

(free posit cloud account necessary)

🔗 posit.cloud/content/4139172

Code for slides (index.qmd) and offline exercises:

🔗 osf.io/h5yn2