6  Clean Water and Sanitation

Ensure availability and sustainable management of water and sanitation for all

Setup Code
library(tidyverse)
theme_set(theme_bw())

6.1 Original Data

WHO/UNICEF Joint Monitoring Programme for Water Supply, Sanitation and Hygiene (JMP) (2024) – with major processing by Our World in Data)

  • Entity: Country
  • Code: 3-letter ISO country code
  • Year
  • Share of the population using safely managed drinking water services: safe
  • Share of the population using only basic drinking water services: basic
  • Share of the population using limited drinking water services: limited
  • Share of the population using unimproved drinking water services: unimproved
  • Share of the population using surface water as a primary source of drinking water: surface

6.2 Simplified Subsets

6.2.1 Long Countries

A data set with long structure and short column names consistent with other data sets in this book, limited to individual countries.

Code
access_levels <- c("safe", "basic", "limited", "unimproved", "surface")

water_countries <- read_csv(
  "data/06/access-drinking-water/access-drinking-water-stacked.csv", 
  col_names = c("country_name", "country_code", "year", access_levels),
  skip = 1, show_col_types = FALSE
) |> 
  filter(!is.na(country_code)) |>
  pivot_longer(safe:surface, names_to = "access", values_to = "pcnt") |>
  mutate(access = factor(access, access_levels),
         pcnt = round(pcnt, 2),
         country_code = gsub("OWID_", "", country_code))

This set is good for dealing with categorical columns with a clear order that isn’t the default on import.

Table 6.1: The first rows of water_countries
country_name country_code year access pcnt
Afghanistan AFG 2000 safe 11.09
Afghanistan AFG 2000 basic 16.35
Afghanistan AFG 2000 limited 3.30
Afghanistan AFG 2000 unimproved 43.86
Afghanistan AFG 2000 surface 25.40
Afghanistan AFG 2001 safe 11.11
Code
access_col <- c("#26BDE2", "#4C9F38", "#DDA63A", "#FD6925", "#DD1367")

water_countries |> 
  # narorw to complete entries
  filter(year == 2022) |>
  #filter(sum(pcnt) > 99, .by = c(country_name)) |>
  mutate(quantile = case_when(row_number()<= n()/4 ~ 1,
                              row_number()<= 2*n()/4 ~ 2,
                              row_number()<= 3*n()/4 ~ 3,
                              .default = 4)) |>
  ggplot(aes(x = fct_rev(country_code), y = pcnt, fill = access)) +
  facet_wrap(~quantile, scales = "free", ncol = 4) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = access_col) +
  labs(x = NULL, y = NULL) +
  theme(legend.position = "top",
        strip.background = element_blank(),
        strip.text = element_blank())
Figure 6.1: Water access by country for the most recent year in the data set (2022)

6.2.2 Water Regions

The data are also grouped by WHO and SDG regions, and by development levels. This set includes just those regions, with region_type as the grouping (WHO, SDG, DEV).

Code
access_levels <- c("safe", "basic", "limited", "unimproved", "surface")

water_regions <- read_csv(
  "data/06/access-drinking-water/access-drinking-water-stacked.csv", 
  col_names = c("region_name", "country_code", "year", access_levels),
  skip = 1, show_col_types = FALSE
) |> 
  filter(is.na(country_code), region_name != "Bonaire, Sint Eustatius and Saba") |>
  select(-country_code) |>
  pivot_longer(safe:surface, names_to = "access", values_to = "pcnt") |>
  mutate(access = factor(access, access_levels),
         pcnt = round(pcnt, 2)) |>
  separate(region_name, c("region", "region_type"), sep = "\\(|\\)", extra = "drop", fill = "right") |>
  mutate(region_type = replace_na(region_type, "DEV"),
         region = trimws(region))
Table 6.2: The first rows of water_regions
region region_type year access pcnt
Africa WHO 2000 safe 19.31
Africa WHO 2000 basic 28.48
Africa WHO 2000 limited 8.81
Africa WHO 2000 unimproved 25.89
Africa WHO 2000 surface 17.50
Africa WHO 2001 safe 19.61
Code
region_names <- c("Low-income countries", 
                  "Lower-middle-income countries",
                  "Upper-middle-income countries", 
                  "High-income countries")
region_labels <- c("low", "lower-middle", "upper-middle", "high")
access_col <- c("#26BDE2", "#4C9F38", "#DDA63A", "#FD6925", "#DD1367")

water_regions |> 
  filter(grepl("income", region)) |>
  mutate(region = factor(region, region_names, region_labels)) |>
  ggplot(aes(x = year, y = pcnt, fill = access)) +
  geom_area() +
  facet_wrap(~region, ncol = 4) +
  scale_x_continuous(breaks = seq(2000, 2020, 10)) +
  scale_fill_manual(values = access_col) +
  guides(fill = guide_legend(position = "inside")) +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme(axis.text.x = element_text(angle = -45, hjust = 0),
        legend.background = element_blank(),
        legend.position.inside = c(.87,.75),
        legend.text = element_text(color = "white", face = "bold"))
Figure 6.2: Water access by development level from 2000 to 2022

6.3 Resources