Classwork 3

Panel Data Models

Author

Byeong-Hak Choe

Published

January 28, 2026

Modified

January 26, 2026

library(tidyverse)
library(plm)
library(lmtest)
library(sandwich)
library(stargazer)

co2 <- read_csv("https://bcdanl.github.io/data/owid-co2-data.csv")
codebook <- read_csv("https://bcdanl.github.io/data/owid-co2-codebook.csv")

Build a clean country-year panel

df <- co2 |>
  # Keep only country observations (drop regions / aggregates)
  # OWID country entries typically have a 3-letter ISO code
  filter(!is.na(iso_code), nchar(iso_code) == 3) |>
  # Keep useful sample years (adjust as you want)
  filter(year >= 1990) |>
  # Select a simple EKC-style set of variables
  mutate(
    gdp_per_capita = gdp / population, 
    .after = gdp
  ) |> 
  select(
    iso_code, country, year,
    co2_per_capita,
    gdp_per_capita,
    population,
    energy_per_capita,         # optional control
    primary_energy_consumption # optional control (can be NA a lot)
  ) |>
  # Drop missing core variables
  filter(!is.na(co2_per_capita), !is.na(gdp_per_capita)) |>
  # Create logs and squared log GDP per capita
  mutate(
    log_co2pc = log(co2_per_capita),
    log_gdppc = log(gdp_per_capita),
    log_gdppc2 = log_gdppc^2,
    log_pop = log(population),
    log_energy_pc = ifelse(!is.na(energy_per_capita) & energy_per_capita > 0,
                          log(energy_per_capita), 
                          NA)
  ) |>
  # Keep rows with finite log outcomes/predictors
  filter(is.finite(log_co2pc), 
         is.finite(log_gdppc), 
         is.finite(log_gdppc2), 
         is.finite(log_pop))

# Convert to pdata.frame (panel format for plm)
df_panel <- pdata.frame(df, index = c("iso_code", "year"))

Specify models

# Baseline (no optional controls)
f_base <- log_co2pc ~ log_gdppc + log_gdppc2

# Extended model (adds energy per capita; will drop rows with missing log_energy_pc)
f_ext  <- log_co2pc ~ log_gdppc + log_gdppc2 + log_energy_pc

# --- Models (BASE) ---
m1_pool <- plm(f_base, data = df_panel, model = "pooling")                 # pooled OLS
m2_fe   <- plm(f_base, data = df_panel, model = "within", effect = "individual")  # country FE
m3_twfe <- plm(f_base, data = df_panel, model = "within", effect = "twoways")     # country FE + year FE
m4_re   <- plm(f_base, data = df_panel, model = "random", effect = "individual") # random effects

# --- Models (EXTENDED) ---
# (Optional) include energy per capita as a control
m1_pool_ext <- plm(f_ext, data = df_panel, model = "pooling")
m2_fe_ext   <- plm(f_ext, data = df_panel, model = "within", effect = "individual")
m3_twfe_ext <- plm(f_ext, data = df_panel, model = "within", effect = "twoways")
m4_re_ext   <- plm(f_ext, data = df_panel, model = "random", effect = "individual")

Robust / clustered standard errors

# Function to compute clustered SE (country-clustered)
se_cluster_country_safe <- function(model) {
  V <- vcovHC(model, type = "HC1", cluster = "group")
  se <- sqrt(diag(V))
  se[is.na(se)] <- 0              # βœ… replace NA SEs so stargazer won't crash
  return(se)
}

# Clustered SEs for BASE models
se_m1 <- se_cluster_country_safe(m1_pool)
se_m2 <- se_cluster_country_safe(m2_fe)
se_m3 <- se_cluster_country_safe(m3_twfe)
se_m4 <- se_cluster_country_safe(m4_re)

# Clustered SEs for EXTENDED models
se_m1e <- se_cluster_country_safe(m1_pool_ext)
se_m2e <- se_cluster_country_safe(m2_fe_ext)
se_m3e <- se_cluster_country_safe(m3_twfe_ext)
se_m4e <- se_cluster_country_safe(m4_re_ext)

Panel diagnostics

# FE vs pooled OLS (is panel structure important?)
pFtest(m2_fe, m1_pool)

    F test for individual effects

data:  f_base
F = 193.79, df1 = 163, df2 = 5243, p-value < 2.2e-16
alternative hypothesis: significant effects
# RE vs pooled OLS (Breusch-Pagan LM test)
plmtest(m1_pool, type = "bp")

    Lagrange Multiplier Test - (Breusch-Pagan)

data:  f_base
chisq = 42925, df = 1, p-value < 2.2e-16
alternative hypothesis: significant effects
# FE vs RE (Hausman test)
phtest(m2_fe, m4_re)

    Hausman Test

data:  f_base
chisq = 9819.7, df = 2, p-value < 2.2e-16
alternative hypothesis: one model is inconsistent

Results

# --- Table A: BASE models ---
# Table (omit intercept so pooled/re match FE models cleanly)
stargazer(
  m1_pool, m2_fe, m3_twfe, m4_re,
  type = "html",
  se = list(se_m1, se_m2, se_m3, se_m4),
  title = "Panel Models (OWID CO2): Pooled OLS vs FE vs TWFE vs RE",
  column.labels = c("Pooled OLS", "Country FE", "Two-way FE", "Random Effects"),
  dep.var.labels = "log(CO2 per capita)",
  omit = "Intercept",                      # βœ… avoids intercept mismatch across models
  omit.stat = c("f", "ser"),
  digits = 3,
  notes = c("Standard errors clustered by country (iso_code).",
            "Country FE controls for time-invariant differences across countries.",
            "Two-way FE adds year fixed effects to absorb global shocks.")
)
Panel Models (OWID CO2): Pooled OLS vs FE vs TWFE vs RE
Dependent variable:
log(CO2 per capita)
Pooled OLS Country FE Two-way FE Random Effects
(1) (2) (3) (4)
log_gdppc 4.009*** 3.440*** 3.453*** 3.539***
(0.470) (0.444) (0.445) (0.437)
log_gdppc2 -0.157*** -0.168*** -0.167*** -0.170***
(0.027) (0.023) (0.023) (0.023)
Constant -22.396*** -17.176***
(2.057) (2.055)
Observations 5,409 5,409 5,409 5,409
R2 0.837 0.357 0.281 0.386
Adjusted R2 0.837 0.337 0.254 0.386
Note: p<0.1; p<0.05; p<0.01
Standard errors clustered by country (iso
Country FE controls for time-invariant differences across countries.
Two-way FE adds year fixed effects to absorb global shocks.
# --- Table B: EXTENDED models (adds energy control) ---
stargazer(
  m1_pool_ext, m2_fe_ext, m3_twfe_ext, m4_re_ext,
  type = "html",
  se = list(se_m1, se_m2, se_m3, se_m4),
  title = "Panel Models (OWID CO2): With Energy Control",
  column.labels = c("Pooled OLS", "Country FE", "Two-way FE", "Random Effects"),
  dep.var.labels = "log(CO2 per capita)",
  omit = "Intercept",                      # βœ… avoids intercept mismatch across models
  omit.stat = c("f", "ser"),
  digits = 3,
  notes = c("Standard errors clustered by country (iso_code).",
            "Adding energy per capita often reduces the GDP effect because energy use is a major driver of CO2.")
)
Panel Models (OWID CO2): With Energy Control
Dependent variable:
log(CO2 per capita)
Pooled OLS Country FE Two-way FE Random Effects
(1) (2) (3) (4)
log_gdppc 1.378*** 1.903*** 1.919*** 1.832***
(0.470) (0.444) (0.445) (0.437)
log_gdppc2 -0.072*** -0.100*** -0.099*** -0.095***
(0.027) (0.023) (0.023) (0.023)
log_energy_pc 0.869 0.621 0.621 0.672
Constant -13.726*** -14.103***
(2.057) (2.055)
Observations 5,358 5,358 5,358 5,358
R2 0.948 0.628 0.582 0.695
Adjusted R2 0.948 0.616 0.566 0.695
Note: p<0.1; p<0.05; p<0.01
Standard errors clustered by country (iso
Adding energy per capita often reduces the GDP effect because energy use is a major driver of CO2.
Back to top