Data Frame Creation (Climate Change)

Data Frame Creation (Climate Change)

Library

Packages needed to run this code. Also initializing a lag function.

library(janitor) # Cleaning Data
library(tidyverse) # Data Manipulation
library(plm) # Panel Linear Models
library(stargazer) # Table Creation
library(readxl) # Read Excel Files
library(vdemdata) # V-dem Dataset

# Lag function
diff <- function(x) {x - dplyr::lag(x)}

V-Dem Data

Here we bring in the V-Dem data set which allows us to collect the yearly corruption values from 1999 to 2020. Other control variables are included.

vdem_df <-vdem
vdem_df_filtered <- vdem_df %>% filter(year %in% 1999:2020) %>% 
  select(country_name, year, country_text_id, v2x_corr, v2xnp_regcorr, e_gdppc, 
         e_pop) %>% rename(country= country_name, iso= country_text_id, 
                           corr = v2x_corr, reg_corr = v2xnp_regcorr, 
                           pop = e_pop, gdppc = e_gdppc) %>% 
  mutate(corr_rate = diff(corr),reg_rate = diff(reg_corr))

df <- vdem_df_filtered %>% filter(year %in% 2000:2020) 
colSums(is.na(df)) # Identifying Missing Data
##   country      year       iso      corr  reg_corr     gdppc       pop corr_rate 
##         0         0         0         6         1       272       272         8 
##  reg_rate 
##         2

Disaster Data

The disaster data is was taken from EM-DAT database. Here I filter by biological disaster. I also create three subgroups and create dummy columns by disaster type. This uses the fastDummies package.

dis_df <- read_excel("dataset2.xlsx", skip = 6) %>% clean_names()
dis_df1 <- dis_df %>% select(start_year, country, 
                          region,iso, disaster_type, iso, disaster_subtype,
                          disaster_subgroup, contains("total")) %>% 
  rename(year = start_year) # Getting the Rows Needed

# Filtering by Biological Disaster
dis_df2 <- dis_df1 %>% filter(disaster_subgroup != "Biological") %>% 
  filter(disaster_type %in% c("Flood" , "Drought" , "Earthquake")) 
unique(dis_df2$disaster_subgroup) # Three Subgroups
## [1] "Hydrological"   "Climatological" "Geophysical"
dis_df3 <- fastDummies::dummy_cols(dis_df2, select_columns = "disaster_type") # Creating Dummy Columns

Amount of occurrences per disaster.

occurrences <- dis_df1 %>%
  count(disaster_subgroup)
print(occurrences)
## # A tibble: 4 × 2
##   disaster_subgroup     n
##   <chr>             <int>
## 1 Climatological      599
## 2 Geophysical         687
## 3 Hydrological       3847
## 4 Meteorological     2610

Here I am adding in zeros, when there is no disaster for that subgroup.

dis_df4 <- dis_df3 %>%
  mutate_if(is.numeric, ~replace_na(., 0))

Aggregating several variables, in particular the amount of damages and those affected.

names <- c("disaster_type_Drought", "disaster_type_Earthquake","disaster_type_Flood") 
dataframe <- dis_df4 %>% group_by(country, year, iso) %>% summarise(death = sum(total_deaths),
                                 affected = sum(total_affected),
                                 damages= sum(total_damages_000_us),
                                 occurences = n(), 
                                 n_floods = sum(disaster_type_Flood),
                                                n_drought= sum(disaster_type_Drought),
                                                               n_earth = sum(disaster_type_Earthquake)) 

Merging

Merging the V-Dem data with the EM-DAT data. The final line collects how many missing values.

data_frame <- merge(df, dataframe, by=c("year", "iso"), all = TRUE) %>% rename(country = country.x) %>%
  select(-country.y) # Combine V-Dem with Disaster Data
keep_row <-unique(dis_df$iso)
df2 <- data_frame %>% filter(iso %in% keep_row)
df3 <- df2 %>% filter(!is.na(country))
df4 <- mutate_if(df3[,10:16], is.numeric, ~replace_na(., 0))
final_df <- cbind(df3[,1:9],df4)
colSums(is.na(final_df))
##       year        iso    country       corr   reg_corr      gdppc        pop 
##          0          0          0          1          1        210        210 
##  corr_rate   reg_rate      death   affected    damages occurences   n_floods 
##          2          2          0          0          0          0          0 
##  n_drought    n_earth 
##          0          0

OECD

Now it is time to bring in the OECD data so we can understand what parts of the world were most affected.

filename <- "https://raw.githubusercontent.com/openclimatedata/countrygroups/main/data/oecd.csv"
mydata <-read_csv(url(filename))
names_oecd <- mydata$Name

final_df$oecd <- +(final_df$country %in% names_oecd)
final_df <- final_df %>% mutate(l_dam = log(1+damages), l_aff = log(1+affected),
                     l_death = log(1+death)) 

Adding Dummy Variables

Finally we create dummy variables for each variable type in the final data frame. The summary output only shows the first 10 columns to save space.

final_df$dis <- if_else(final_df$occurences > 0, 1, 0)
final_df$flood <- if_else(final_df$n_floods > 0, 1, 0)
final_df$drought <- if_else(final_df$n_drought > 0, 1, 0)
final_df$earth <- if_else(final_df$n_earth > 0, 1, 0)
selected_df <- final_df[, 1:10]
summary(selected_df)
##       year          iso              country               corr       
##  Min.   :2000   Length:3559        Length:3559        Min.   :0.0020  
##  1st Qu.:2005   Class :character   Class :character   1st Qu.:0.2060  
##  Median :2010   Mode  :character   Mode  :character   Median :0.5710  
##  Mean   :2010                                         Mean   :0.5118  
##  3rd Qu.:2015                                         3rd Qu.:0.7887  
##  Max.   :2020                                         Max.   :0.9660  
##                                                       NA's   :1       
##     reg_corr          gdppc              pop              corr_rate        
##  Min.   :0.0020   Min.   :  0.538   Min.   :     8.76   Min.   :-0.670000  
##  1st Qu.:0.1893   1st Qu.:  3.069   1st Qu.:   378.19   1st Qu.: 0.000000  
##  Median :0.5640   Median :  8.874   Median :  1057.76   Median : 0.000000  
##  Mean   :0.5055   Mean   : 15.016   Mean   :  4333.55   Mean   :-0.001615  
##  3rd Qu.:0.7750   3rd Qu.: 21.251   3rd Qu.:  2978.95   3rd Qu.: 0.000000  
##  Max.   :0.9680   Max.   :100.865   Max.   :148256.42   Max.   : 0.451000  
##  NA's   :1        NA's   :210       NA's   :210         NA's   :2          
##     reg_rate             death         
##  Min.   :-0.661000   Min.   :     0.0  
##  1st Qu.: 0.000000   1st Qu.:     0.0  
##  Median : 0.000000   Median :     0.0  
##  Mean   :-0.002098   Mean   :   239.7  
##  3rd Qu.: 0.000000   3rd Qu.:     9.0  
##  Max.   : 0.515000   Max.   :222614.0  
##  NA's   :2