df <- co2 |># Keep only country observations (drop regions / aggregates)# OWID country entries typically have a 3-letter ISO codefilter(!is.na(iso_code), nchar(iso_code) ==3) |># Keep useful sample years (adjust as you want)filter(year >=1990) |># Select a simple EKC-style set of variablesmutate(gdp_per_capita = gdp / population, .after = gdp ) |>select( iso_code, country, year, co2_per_capita, gdp_per_capita, population, energy_per_capita, # optional control primary_energy_consumption # optional control (can be NA a lot) ) |># Drop missing core variablesfilter(!is.na(co2_per_capita), !is.na(gdp_per_capita)) |># Create logs and squared log GDP per capitamutate(log_co2pc =log(co2_per_capita),log_gdppc =log(gdp_per_capita),log_gdppc2 = log_gdppc^2,log_pop =log(population),log_energy_pc =ifelse(!is.na(energy_per_capita) & energy_per_capita >0,log(energy_per_capita), NA) ) |># Keep rows with finite log outcomes/predictorsfilter(is.finite(log_co2pc), is.finite(log_gdppc), is.finite(log_gdppc2), is.finite(log_pop))# Convert to pdata.frame (panel format for plm)df_panel <-pdata.frame(df, index =c("iso_code", "year"))
Specify models
# Baseline (no optional controls)f_base <- log_co2pc ~ log_gdppc + log_gdppc2# Extended model (adds energy per capita; will drop rows with missing log_energy_pc)f_ext <- log_co2pc ~ log_gdppc + log_gdppc2 + log_energy_pc# --- Models (BASE) ---m1_pool <-plm(f_base, data = df_panel, model ="pooling") # pooled OLSm2_fe <-plm(f_base, data = df_panel, model ="within", effect ="individual") # country FEm3_twfe <-plm(f_base, data = df_panel, model ="within", effect ="twoways") # country FE + year FEm4_re <-plm(f_base, data = df_panel, model ="random", effect ="individual") # random effects# --- Models (EXTENDED) ---# (Optional) include energy per capita as a controlm1_pool_ext <-plm(f_ext, data = df_panel, model ="pooling")m2_fe_ext <-plm(f_ext, data = df_panel, model ="within", effect ="individual")m3_twfe_ext <-plm(f_ext, data = df_panel, model ="within", effect ="twoways")m4_re_ext <-plm(f_ext, data = df_panel, model ="random", effect ="individual")
Robust / clustered standard errors
# Function to compute clustered SE (country-clustered)se_cluster_country_safe <-function(model) { V <-vcovHC(model, type ="HC1", cluster ="group") se <-sqrt(diag(V)) se[is.na(se)] <-0# β replace NA SEs so stargazer won't crashreturn(se)}# Clustered SEs for BASE modelsse_m1 <-se_cluster_country_safe(m1_pool)se_m2 <-se_cluster_country_safe(m2_fe)se_m3 <-se_cluster_country_safe(m3_twfe)se_m4 <-se_cluster_country_safe(m4_re)# Clustered SEs for EXTENDED modelsse_m1e <-se_cluster_country_safe(m1_pool_ext)se_m2e <-se_cluster_country_safe(m2_fe_ext)se_m3e <-se_cluster_country_safe(m3_twfe_ext)se_m4e <-se_cluster_country_safe(m4_re_ext)
Panel diagnostics
# FE vs pooled OLS (is panel structure important?)pFtest(m2_fe, m1_pool)
F test for individual effects
data: f_base
F = 193.79, df1 = 163, df2 = 5243, p-value < 2.2e-16
alternative hypothesis: significant effects
# RE vs pooled OLS (Breusch-Pagan LM test)plmtest(m1_pool, type ="bp")
Lagrange Multiplier Test - (Breusch-Pagan)
data: f_base
chisq = 42925, df = 1, p-value < 2.2e-16
alternative hypothesis: significant effects
# FE vs RE (Hausman test)phtest(m2_fe, m4_re)
Hausman Test
data: f_base
chisq = 9819.7, df = 2, p-value < 2.2e-16
alternative hypothesis: one model is inconsistent
Results
# --- Table A: BASE models ---# Table (omit intercept so pooled/re match FE models cleanly)stargazer( m1_pool, m2_fe, m3_twfe, m4_re,type ="html",se =list(se_m1, se_m2, se_m3, se_m4),title ="Panel Models (OWID CO2): Pooled OLS vs FE vs TWFE vs RE",column.labels =c("Pooled OLS", "Country FE", "Two-way FE", "Random Effects"),dep.var.labels ="log(CO2 per capita)",omit ="Intercept", # β avoids intercept mismatch across modelsomit.stat =c("f", "ser"),digits =3,notes =c("Standard errors clustered by country (iso_code).","Country FE controls for time-invariant differences across countries.","Two-way FE adds year fixed effects to absorb global shocks."))
Panel Models (OWID CO2): Pooled OLS vs FE vs TWFE vs RE
Dependent variable:
log(CO2 per capita)
Pooled OLS
Country FE
Two-way FE
Random Effects
(1)
(2)
(3)
(4)
log_gdppc
4.009***
3.440***
3.453***
3.539***
(0.470)
(0.444)
(0.445)
(0.437)
log_gdppc2
-0.157***
-0.168***
-0.167***
-0.170***
(0.027)
(0.023)
(0.023)
(0.023)
Constant
-22.396***
-17.176***
(2.057)
(2.055)
Observations
5,409
5,409
5,409
5,409
R2
0.837
0.357
0.281
0.386
Adjusted R2
0.837
0.337
0.254
0.386
Note:
p<0.1; p<0.05; p<0.01
Standard errors clustered by country (iso
Country FE controls for time-invariant differences across countries.
Two-way FE adds year fixed effects to absorb global shocks.
# --- Table B: EXTENDED models (adds energy control) ---stargazer( m1_pool_ext, m2_fe_ext, m3_twfe_ext, m4_re_ext,type ="html",se =list(se_m1, se_m2, se_m3, se_m4),title ="Panel Models (OWID CO2): With Energy Control",column.labels =c("Pooled OLS", "Country FE", "Two-way FE", "Random Effects"),dep.var.labels ="log(CO2 per capita)",omit ="Intercept", # β avoids intercept mismatch across modelsomit.stat =c("f", "ser"),digits =3,notes =c("Standard errors clustered by country (iso_code).","Adding energy per capita often reduces the GDP effect because energy use is a major driver of CO2."))
Panel Models (OWID CO2): With Energy Control
Dependent variable:
log(CO2 per capita)
Pooled OLS
Country FE
Two-way FE
Random Effects
(1)
(2)
(3)
(4)
log_gdppc
1.378***
1.903***
1.919***
1.832***
(0.470)
(0.444)
(0.445)
(0.437)
log_gdppc2
-0.072***
-0.100***
-0.099***
-0.095***
(0.027)
(0.023)
(0.023)
(0.023)
log_energy_pc
0.869
0.621
0.621
0.672
Constant
-13.726***
-14.103***
(2.057)
(2.055)
Observations
5,358
5,358
5,358
5,358
R2
0.948
0.628
0.582
0.695
Adjusted R2
0.948
0.616
0.566
0.695
Note:
p<0.1; p<0.05; p<0.01
Standard errors clustered by country (iso
Adding energy per capita often reduces the GDP effect because energy use is a major driver of CO2.