An Analysis of Global and U.S. democracy

Last updated on Apr 2, 2024 1 min read software

I gave a one hour presentation at the 5th annual international symposium at Colorado State University. The topic of my talk was a global assessment of democracy with a deep dive into the U.S. case. We examined global trends in democratization and democratic backsliding as well as trends in the U.S.

There was a special focus on media, Congress, parties, voters, and states. Below is code that replicates some of the figures I generated with publicly available data.

You can check out the code on Github Gist or examine it below. Let me know if you have any questions or suggestions!

You can learn more about the amazing work of the International Program on their website!

	## This R Script has code for some of the visualizations I made for my talk
	## at the CSU International Symposium on Democracy


	# Install Packages
	if(!require('pacman')) {
	install.packages('pacman')
	library('pacman')
	}

	# loading the p_load function
	p_load(devtools, tidyverse, scales,haven,sjlabelled,
	ggrepel, viridis, ggthemes, maps, countrycode,
	install = TRUE)

	devtools::install_github("vdeminstitute/vdemdata")
	install_github("jamesmartherus/anesr")

	library(vdemdata)
	library(anesr)


	# Generate a world map with changes between 2002 and 2022
	df_vdem_map <-
	vdem \|>
	dplyr::select(country_name, country_id, year,
	v2x_polyarchy, v2x_libdem, v2x_partipdem, v2x_egaldem) \|>
	filter(year %in% c(2002, 2022)) \|>
	group_by(country_name) \|>
	mutate(delta_polyarchy = v2x_polyarchy - lag(v2x_polyarchy)) \|>
	filter(year == 2022)

	world_map = map_data("world")
	world_map$country_id <- countrycode(world_map$region, origin = 'country.name', destination = 'vdem')

	df_democracy <-
	world_map \|>
	left_join(df_vdem_map)

	world_map = map_data("world") %>%
	filter(! long > 180)

	world_map$country_id <- countrycode(world_map$region, origin = 'country.name', destination = 'vdem')

	df_democracy <-
	world_map \|>
	left_join(df_vdem_map)

	countries = df_democracy %>%
	distinct(region,delta_polyarchy)

	countries %>%
	filter(region != "Antarctica") \|>
	ggplot(aes(fill = delta_polyarchy, map_id = region)) +
	geom_map(map = world_map) +
	expand_limits(x = world_map$long, y = world_map$lat) +
	scale_fill_viridis(option="inferno") + theme_bw() +
	labs(x = "Longitude", y = "Lattitude",
	fill='Change in Democracy',
	title = "Democracy around the world",
	subtitle = "V-Dem's Polyarchy between 2002 and 2022") +
	theme(legend.position="bottom") +
	coord_map("moll") +
	theme_map() + theme(text=element_text(size=15))


	# Delta Plot of democracy scores for a country in 2002 and 2022
	vdem \|>
	dplyr::select(country_name, year, v2x_polyarchy) \|>
	group_by(country_name) \|>
	filter(year %in% c(2002, 2022)) \|>
	mutate(delta_polyarchy = v2x_polyarchy - lag(v2x_polyarchy),
	lag_polyarchy = lag(v2x_polyarchy),
	pos_dummy = ifelse(delta_polyarchy >=0, "positive", "negative")) \|>
	drop_na() \|>
	filter(year == 2022) \|>
	ggplot(aes(y = v2x_polyarchy, x = lag_polyarchy, color = pos_dummy, label = country_name)) +
	geom_point(alpha = 1/2) + theme_bw() +
	geom_abline(intercept = 0, slope = 1, color = "black", linetype = "dashed") +
	ylim(0,1) + xlim(0,1) + geom_text_repel() + theme(legend.position = "none") +
	labs(title = "Changes in Polyarchy between 2002 and 2022",
	caption = "Note: Analysis based on V-Dem Version 14",
	y = "Polyarchy, 2022",
	x = "Polyarchy, 2002") + theme(text=element_text(size=15))

	# Gobal democracy average since 1789
	vdem \|>
	group_by(year) \|>
	summarize(mean_polyarchy = mean(v2x_polyarchy, na.rm = TRUE), n = n()) \|>
	ggplot(aes(y = mean_polyarchy, x = year)) +
	geom_line() +
	theme_bw() +
	labs(title = "Average Democracy in the World",
	caption = "Note: Analysis based on V-Dem Version 14",
	x = "Year",
	y = "V-Dem Polyarchy, global average") +
	ylim(0,1)+ theme(text=element_text(size=15))+
	annotate('rect', xmin=2008, xmax=2024, ymin=0.4, ymax=0.62, alpha=.2, fill='red')


	## Examining attitudes of American voters
	data(timeseries_2020)
	data(timeseries_cum) #Load Time Series Cumulative File (1948-2016)

	# 2020 is missing from the cumulative file. The code below prepares 2020 for a merge
	# with the cumulative file
	df_anes_2020 <-
	timeseries_2020 \|>
	remove_all_labels() \|>
	dplyr::select(V201156, V201157, V201228, V201229, V201230, V201231x, V200010a) \|>
	mutate(VCF0004 = 2020) \|>
	rename(year = VCF0004)\|> # Year of response
	mutate(pid_3 = ifelse(V201231x %in% c(1,2, 3), "Democrat",
	ifelse(V201231x %in% c(5,6,7), "Republican",
	ifelse(V201231x == 4, "Independent", NA)))) \|>
	rename(pid_7 = V201231x)\|> #7 scale Party ID val: 1-7. Strong Democrat 2. Weak Democrat3. Independent - Democrat4. Independent - Independent5. Independent - Republican6. Weak Republican7. Strong Republican
	#rename(pid_3 = V201228)\|> #
	rename(weight = V200010a) \|>
	rename(therm_dem = V201156)\|> # val 00-96 cold-warm as coded; 97: 97-100, 98: DK, 99. NA
	rename(therm_rep = V201157) \|> # val 00-96 cold-warm as coded; 97: 97-100, 98: DK, 99. NA
	mutate(therm_dem = ifelse(therm_dem <0 \| therm_dem > 100 , NA, therm_dem),
	therm_dem = ifelse(therm_dem %in% c(97,98,99,100), 97, therm_dem),
	therm_rep =ifelse(therm_rep <0 \| therm_rep > 100, NA, therm_rep),
	therm_rep = ifelse(therm_rep %in% c(97,98,99,100), 97, therm_rep),
	strong_partisan = if_else(pid_7 == 1\|pid_7 ==7, 1, 0),
	pid_7_num = as.numeric(pid_7),
	pid_7 = recode(pid_7,
	"1" = "Strong Democrat",
	"2" = "Weak Democrat",
	"3" = "Independent - Democrat",
	"4" = "Independent - Independent",
	"5" = "Independent - Republican",
	"6" = "Weak Republican",
	"7" = "Strong Republican"),
	pid_7 = reorder(pid_7, pid_7_num),
	pid_3_num = as.numeric(pid_3),
	pid_3 = recode(pid_3,
	"1" = "Democrat",
	"3" = "Independent",
	"2" = "Republican"),
	pid_3 = reorder(pid_3, pid_3_num),
	pid_3_sort = factor(recode(pid_7_num, #different from regular pid_3 which codes 3, 5 as partisan
	"1" = "Democrat",
	"2" = "Democrat",
	"3" = "Independent",
	"4" = "Independent",
	"5" = "Independent",
	"6" = "Republican",
	"7" = "Republican"),
	levels = c("Democrat",
	"Independent",
	"Republican")),
	pid_2_sort = na_if(pid_3_sort, "Independent"), #better just to filter(pid_3_sort != "Independent"), but used to build other vars
	pid_2 = na_if(pid_3, "Independent"),
	therm_inparty = if_else(pid_3=="Democrat", therm_dem, therm_rep),
	therm_inparty = if_else(pid_3=="Democrat" \| pid_3 == "Republican",
	if_else(pid_3=="Democrat", therm_dem, therm_rep),
	(therm_dem + therm_rep)/2), #therm in/out are equal
	therm_inparty = na_if(therm_inparty, -9),
	therm_outparty = if_else(pid_3=="Democrat" \| pid_3 == "Republican",
	if_else(pid_3=="Democrat", therm_rep, therm_dem),
	(therm_dem + therm_rep)/2),
	npa_party = therm_inparty - therm_outparty,
	therm_parties_mean = (therm_dem + therm_rep)/2)

	# The cumulative file
	df_anes <-
	timeseries_cum \|>
	remove_all_labels() \|>
	dplyr::select(VCF0004, VCF0301, VCF0303, VCF0305, VCF0311, VCF0312, VCF0803,
	VCF0218, VCF0224, VCF0201, VCF0202,VCF0009z) \|>
	rename(year = VCF0004)\|> # Year of response
	rename(pid_7 = VCF0301)\|> #7 scale Party ID val: 1-7. Strong Democrat 2. Weak Democrat3. Independent - Democrat4. Independent - Independent5. Independent - Republican6. Weak Republican7. Strong Republican
	rename(pid_3 = VCF0303)\|> # Party ID 3 categories val: "Republican", "Independent", "Democrat" (Dem/Rep include Leaners)
	rename(pid_str = VCF0305)\|> # PID strength val: 1. Independent 2. Leaning Independent 3. Weak Partisan 4. Strong Partisan Kept this because I wanted to create basically this variable later
	rename(win_care_pres = VCF0311)\|> # How much do you care which party wins presidency? val: 1. Don't care very much or DK, pro-con, depends, and other, 2. Care a great deal
	rename(win_care_cong = VCF0312)\|> # How much do you care which party wins congress? val: 1. Don't care very much or DK, pro-con, depends, and other, 2. Care a great deal notes: only asked through 2008
	rename(respondent_ideo = VCF0803)\|> # Liberal-conservative scale val: 1(extremely liberal)- 7(extremely conservative) 9. DK; haven't much thought about it
	rename(therm_dem = VCF0218)\|> # val 00-96 cold-warm as coded; 97: 97-100, 98: DK, 99. NA
	rename(therm_rep = VCF0224) \|> # val 00-96 cold-warm as coded; 97: 97-100, 98: DK, 99. NA
	rename(therm_dem_old = VCF0201) \|>
	rename(therm_rep_old = VCF0202) \|>
	rename(weight = VCF0009z) \|>
	mutate(therm_dem = na_if(therm_dem, 98),
	therm_dem = na_if(therm_dem, 99),
	therm_rep = na_if(therm_rep, 98),
	therm_rep = na_if(therm_rep, 99),
	respondent_ideo = na_if(respondent_ideo, 9), #the recode() function is used in the next 4 pipes to apply new values to observation in the columns. ANES uses numerical values to represent factors.
	strong_partisan = if_else(pid_7 == 1\|pid_7 ==7, 1, 0),
	pid_7_num = as.numeric(pid_7),
	pid_7 = recode(pid_7,
	"1" = "Strong Democrat",
	"2" = "Weak Democrat",
	"3" = "Independent - Democrat",
	"4" = "Independent - Independent",
	"5" = "Independent - Republican",
	"6" = "Weak Republican",
	"7" = "Strong Republican"),
	pid_7 = reorder(pid_7, pid_7_num),
	pid_3_num = as.numeric(pid_3),
	pid_3 = recode(pid_3,
	"1" = "Democrat",
	"2" = "Independent",
	"3" = "Republican"),
	pid_3 = reorder(pid_3, pid_3_num),
	pid_str_num = as.numeric(pid_str),
	pid_str = recode(pid_str,
	"1" = "Independent",
	"2" = "Leaning Independent",
	"3" = "Weak Partisan",
	"4" = "Strong Partisan"),
	pid_str = reorder(pid_str, pid_str_num),
	respondent_ideo_num = as.numeric(respondent_ideo),
	respondent_ideo = recode(respondent_ideo,
	"1" = "Extremely Liberal",
	"2" = "Liberal",
	"3" = "Somewhat Liberal",
	"4" = "Moderate",
	"5" = "Somewhat Conservative",
	"6" = "Conservative",
	"7" = "Extremely Conservative"),
	respondent_ideo = reorder(respondent_ideo, respondent_ideo_num),
	pid_3_sort = factor(recode(pid_7_num, #different from regular pid_3 which codes 3, 5 as partisan
	"1" = "Democrat",
	"2" = "Democrat",
	"3" = "Independent",
	"4" = "Independent",
	"5" = "Independent",
	"6" = "Republican",
	"7" = "Republican"),
	levels = c("Democrat",
	"Independent",
	"Republican")),
	pid_2_sort = na_if(pid_3_sort, "Independent"), #better just to filter(pid_3_sort != "Independent"), but used to build other vars
	pid_2 = na_if(pid_3, "Independent"),
	therm_inparty = if_else(pid_3=="Democrat", therm_dem, therm_rep),
	therm_inparty = if_else(pid_3=="Democrat" \| pid_3 == "Republican",
	if_else(pid_3=="Democrat", therm_dem, therm_rep),
	(therm_dem + therm_rep)/2), #therm in/out are equal
	therm_inparty = na_if(therm_inparty, -9),
	therm_outparty = if_else(pid_3=="Democrat" \| pid_3 == "Republican",
	if_else(pid_3=="Democrat", therm_rep, therm_dem),
	(therm_dem + therm_rep)/2),
	npa_party = therm_inparty - therm_outparty,
	therm_parties_mean = (therm_dem + therm_rep)/2,
	therm_dem_old = na_if(therm_dem_old, 98),
	therm_dem_old = na_if(therm_dem_old, 99),
	therm_rep_old = na_if(therm_rep_old, 98),
	therm_rep_old = na_if(therm_rep_old, 99),
	therm_party_ingroup = if_else(pid_2_sort=="Democrat", therm_dem_old, therm_rep_old),
	therm_party_outgroup = if_else(pid_2_sort=="Democrat", therm_rep_old, therm_dem_old),
	npa_partisans = therm_party_ingroup - therm_party_outgroup) \|>
	bind_rows(df_anes_2020)

	# Prepare a data frame for visualizations
	fig_1_df <- df_anes %>%
	mutate(independent_count = str_count(pid_7, "Independent"), # Code here moves leaners into partisan coding
	pid_3_lean = ifelse(str_detect(pid_7, "Republican"), "Republican",
	ifelse(str_detect(pid_7, "Democrat"), "Democrat",
	ifelse(independent_count == 2, "Independent", NA)))) \|>
	dplyr::select(-pid_3) \|>
	rename(pid_3 = pid_3_lean) \|>
	filter(pid_3 != "Independent") %>%
	#filter(pid_3 != "Independent")%>%
	# filter(year <= 2008)%>% #un/comment this line for the original data.
	filter(year >= 1978)%>%
	select(year,
	weight,
	pid_3,
	therm_inparty,
	therm_outparty) %>%
	pivot_longer(therm_inparty:therm_outparty, names_to = "ft_towards", values_to = "ft")%>%
	unite("group", pid_3:ft_towards)%>%
	mutate(group = recode(group,
	"Democrat_therm_inparty" = "Democrat - In Party",
	"Democrat_therm_outparty" = "Democrat - Out Party",
	"Republican_therm_inparty" = "Republican - In Party",
	"Republican_therm_outparty" = "Republican - Out Party"))%>%
	group_by(year, group)%>%
	summarize(mean = weighted.mean(ft, weight, na.rm = TRUE),
	#mean = mean(ft, na.rm = TRUE),
	sd = sd(ft, na.rm = TRUE),
	n = n()) \|>
	separate(group, into = c("party", "group_type"), sep = " - ", remove = FALSE)

	# In and out party affect over time
	fig_1_df \|>
	filter(year >= 1978) \|>
	#filter(str_detect(group_type, "In")) \|>
	mutate(mean = ifelse(str_detect(group_type, "Out"), NA, mean)) \|>
	ggplot(aes(x = year, y=mean)) +
	geom_point(aes(shape = group_type, color = party)) +
	geom_smooth(aes(linetype = group_type, color = party), se=F)+
	scale_color_manual(values = c("blue", "red"))+ theme_bw() +
	scale_linetype_manual(values = c("In Party" = "solid",
	"Out Party" = "dashed",
	"In Party" = "solid",
	"Out Party" = "dashed")) +
	scale_shape_manual(values = c("In Party" = 3,
	"Out Party" = 2)) +
	scale_x_continuous(breaks = seq(1976, 2020, by = 4)) +
	scale_y_continuous(limits = c(10,80)) +
	theme(legend.position = "none") +
	labs(title = "Partisan In and Outgroup Feeling Thermometer",
	caption = "Solid lines: In group, Dashed lines: Out group, Leaners coded as partisans",
	y = "Mean Thermometer Ratings of Partisans",
	x = "Year")+ theme(text=element_text(size=15))

	# The difference between in and out party affect
	df_anes \|>
	dplyr::select(year, pid_3, therm_inparty, therm_outparty, weight) \|>
	drop_na(pid_3) \|>
	filter(pid_3 != "Independent")%>%
	mutate(delta_therm = therm_inparty- therm_outparty) \|>
	group_by(year) \|>
	summarize(mean = weighted.mean(delta_therm, weight, na.rm = TRUE),
	sd = sd(delta_therm, na.rm = TRUE),
	n = n()) \|>
	filter(year >= 1978) \|>
	ggplot(aes(x = year, y = mean)) +
	geom_point() +
	geom_smooth(color = "gray50", se=T) +
	scale_x_continuous(breaks = seq(1976, 2020, by = 4)) +
	theme_bw() +
	theme(legend.position = "none") +
	labs(title = "Affective Polarization",
	caption = "Note: Analysis based on American National Election Study data.",
	y = "Average Difference between In and Outgroup Affect",
	x = "Year")+ theme(text=element_text(size=15))

	# The difference between in and out party affect by partisans
	df_anes \|>
	dplyr::select(year, pid_3, therm_inparty, therm_outparty, weight) \|>
	drop_na(pid_3) \|>
	filter(pid_3 != "Independent")%>%
	mutate(delta_therm = therm_inparty- therm_outparty) \|>
	group_by(year, pid_3) \|>
	summarize(mean = weighted.mean(delta_therm, weight, na.rm = TRUE),
	sd = sd(delta_therm, na.rm = TRUE),
	n = n()) \|>
	filter(year >= 1978) \|>
	ggplot(aes(x = year, y = mean, group = pid_3, color = pid_3)) +
	geom_point() +
	geom_smooth(aes(color = pid_3), se=F) +
	scale_x_continuous(breaks = seq(1976, 2020, by = 4)) +
	scale_color_manual(values = c("red", "blue"))+ theme_bw() +
	theme(legend.position = "none") +
	labs(title = "Affective Polarization",
	caption = "Note: Analysis based on American National Election Study data.",
	y = "Average Difference between In and Outgroup Affect",
	x = "Year")+ theme(text=element_text(size=15))


	# US Party Analysis
	# Loading in the V-Dem VParty data set
	df_vparty <- vparty \|>
	mutate(v2paopresp = scales::rescale(v2paopresp),
	v2xpa_antiplural = scales::rescale(v2xpa_antiplural),
	v2xpa_popul = scales::rescale(v2xpa_popul),
	v2paviol = scales::rescale(v2paviol)) \|>
	filter(country_name == "United States of America") \|>
	dplyr::select(country_name, year,v2paenname, v2paopresp,v2xpa_antiplural,
	v2xpa_popul, v2paviol)

	## Plotting rejection of political violence by U.S. political parties over time
	df_vparty \|>
	drop_na(v2paviol) \|>
	ggplot(aes(x = year, y = v2paviol, color = v2paenname)) +
	geom_line() +
	scale_color_manual(values = c("blue", "red")) +
	theme_bw() +
	labs(title = "Rejection of political violence by U.S. political parties",
	x = "Year",
	y = "Rejection of political violence",
	subtitle = "Analysis based on V-Party Version 2",
	caption = "Question: To what extent does the leadership of this party explicitly discourage the use of violence
	against domestic political opponents?",
	color = "Party:") + ylim(0,1)+
	theme(legend.position = "bottom") + theme(text=element_text(size=15))

	## Plotting attacks against political opponents by U.S. political parties over time
	df_vparty \|>
	drop_na(v2paopresp) \|>
	ggplot(aes(x = year, y = v2paopresp, color = v2paenname)) +
	geom_line() +
	scale_color_manual(values = c("blue", "red")) +
	theme_bw() +
	labs(title = "Attacks against political opponents by U.S. political parties",
	subtitle = "Analysis based on V-Party Version 2",
	x = "Year",
	y = "Political opponents",
	color = "Party:",
	caption = "Question: Prior to this election, have leaders of this party used severe personal attacks or tactics of
	demonization against their opponents?") +ylim(0,1)+
	theme(legend.position = "bottom")+ theme(text=element_text(size=15))

	## Plotting populism by U.S. political parties over time
	df_vparty \|>
	drop_na(v2xpa_popul) \|>
	ggplot(aes(x = year, y = v2xpa_popul, color = v2paenname)) +
	geom_line() +
	scale_color_manual(values = c("blue", "red")) +
	theme_bw() +
	labs(title = "Populism in U.S. political parties",
	subtitle = "Analysis based on V-Party Version 2",
	x = "Year",
	y = "Populism Index",
	color = "Party:",
	caption = "Question: To what extent do representatives of the party use populist rhetoric?") +
	theme(legend.position = "bottom") + ylim(0,1)+theme(text=element_text(size=15))


	## Plotting anti-pluralism by U.S. political parties over time
	df_vparty \|>
	drop_na(v2xpa_antiplural) \|>
	ggplot(aes(x = year, y = v2xpa_antiplural, color = v2paenname)) +
	geom_line() +
	scale_color_manual(values = c("blue", "red")) +
	theme_bw() +
	labs(title = "Anti-pluralism in U.S. political parties",
	subtitle = "Analysis based on V-Party Version 2",
	x = "Year",
	y = "Anti-Pluralism Index",
	color = "Party:",
	caption = "Question = To what extent does the party show a lacking commitment to democratic norms prior to
	elections?") +ylim(0,1)+
	theme(legend.position = "bottom") + theme(text=element_text(size=15))

view raw democracy_presentation.R hosted with ❤ by GitHub