Replication of Tables (Thesis)
Requirements
Packages to load.
library(dplyr) # For Data Manipulation
library(lme4) # For Linear Mixed-effects Modeling
library(lmerTest) # For Type III Anova
library(texreg) # Last three packages for Tables
library(stargazer) # Table Creation
library(xtable) # Table Creation
Encoding and including the data.
encoding <- "UTF-8"
csv_file <- "sen_youtube_data.csv"
df <- read.csv(csv_file, fileEncoding = encoding) # Load in the necessary file. Make sure its in the working directory.
Output is currently in Text Format. The thesis used type = “Latex”. ### Table 5.1: Descriptive Statistics Video Level
vars_video <- df[, c("log_viewCount", "durationMin", "pos_prob",
"neu_prob", "neg_prob", "compound_score", "log_likeCount")]
stargazer(
vars_video,
title = "Descriptive Statistics: Video Level Variables",
type = "text",
summary.stat = c("mean", "median", "sd", "min", "max"),
align = TRUE,
covariate.labels = c("View Count (Log)", "Duration (Minutes)",
"Positive Sentiment", "Neutral Sentiment",
"Negative Sentiment", "Compound Score", "Like Count (Log)*"),
digits = 2,
omit = "Constant",
notes = c("N = 44,100 (videos). Stargazer: February 16th, 2024.",
"*N = 39,100."),
label = "tab:video_level"
)
##
## Descriptive Statistics: Video Level Variables
## =========================================================
## Statistic Mean Median St. Dev. Min Max
## ---------------------------------------------------------
## View Count (Log) 5.63 5.35 1.80 0.00 14.85
## Duration (Minutes) 6.36 5.32 5.98 0.12 59.87
## Positive Sentiment 0.28 0.15 0.27 0.002 0.99
## Neutral Sentiment 0.47 0.51 0.18 0.005 0.94
## Negative Sentiment 0.25 0.21 0.22 0.001 0.98
## Compound Score 0.02 -0.06 0.46 -0.97 0.99
## Like Count (Log)* 2.28 1.95 1.79 0.00 11.28
## ---------------------------------------------------------
## N = 44,100 (videos). Stargazer: February 16th, 2024.
## *N = 39,100.
Table 5.2: Descriptive Statistics Senator Level
vars_sens <- df[, c("abs_nom", "age", "seniority", "log_subscriber")]
unique_vars_sens <- vars_sens[!duplicated(vars_sens), ]
stargazer(
unique_vars_sens,
title = "Descriptive Statistics: Senator Level Variables",
type = "text",
summary.stat = c("mean", "median", "sd", "min", "max"),
align = TRUE,
covariate.labels = c("*Ideology", "Age",
"Seniority", "Subscriber Count (Logged)"),
digits = 2,
omit = "Constant",
notes = c("N = 99 (senators). Stargazer: February 16th, 2024.",
"*Ideology is the absolute value of the DW-NOMINATE score."),
label = "tab:senator_level"
)
##
## Descriptive Statistics: Senator Level Variables
## ==============================================================
## Statistic Mean Median St. Dev. Min Max
## --------------------------------------------------------------
## *Ideology 0.44 0.41 0.19 0.06 0.94
## Age 64.47 66 10.79 36 90
## Seniority 12.28 11 8.51 1 43
## Subscriber Count (Logged) 7.60 7.31 1.74 2.89 12.48
## --------------------------------------------------------------
## N = 99 (senators). Stargazer: February 16th, 2024.
## *Ideology is the absolute value of the DW-NOMINATE score.
Table 6.1: Intercept Models (1-4)
null_model_int <- lmer(log_viewCount ~ 1 + (1 | channelId ), data = df)
intercept_model1 <- lmer(log_viewCount ~ neg_prob + abs_nom +
(1 | channelId ), data = df)
intercept_model2 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + (1 | channelId ), data = df)
intercept_model3 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +(1 | channelId ), data = df)
model_list_6_1 <- list(null_model_int, intercept_model1, intercept_model2,intercept_model3)
latex_code_6_1 <- texreg(
model_list_6_1,
include.rsquared = TRUE,
custom.coef.names = c("Intercept", "Negative Sentiment",
"Ideology", "Duration (Minutes)", "Seniority",
"Gender (M)", "Subscribers (Log)"),
use.viewer = FALSE, dcolumn = FALSE, booktabs = TRUE,
center = TRUE,
use.packages = FALSE,
caption.above = TRUE,
caption = "Multilevel Linear Regression (Random-Intercept Models)",
label = "table:models1",
custom.model.names = c("Model 1", "Model 2", "Model 3", "Model 4"),
custom.gof.names = c("AIC", "BIC", "Log Likelihood", "Num. obs.",
"Num. groups: Channels", "Var: Channel (Intercept)",
"Var: Residual")
)
cat(latex_code_6_1)
##
## \begin{table}
## \caption{Multilevel Linear Regression (Random-Intercept Models)}
## \begin{center}
## \begin{tabular}{l c c c c}
## \toprule
## & Model 1 & Model 2 & Model 3 & Model 4 \\
## \midrule
## Intercept & $5.51^{***}$ & $4.70^{***}$ & $3.83^{***}$ & $1.59^{***}$ \\
## & $(0.11)$ & $(0.27)$ & $(0.38)$ & $(0.32)$ \\
## Negative Sentiment & & $0.91^{***}$ & $0.90^{***}$ & $0.90^{***}$ \\
## & & $(0.03)$ & $(0.03)$ & $(0.03)$ \\
## Ideology & & $1.31^{*}$ & $1.87^{**}$ & $-0.08$ \\
## & & $(0.56)$ & $(0.59)$ & $(0.42)$ \\
## Duration (Minutes) & & & $0.03^{***}$ & $0.03^{***}$ \\
## & & & $(0.00)$ & $(0.00)$ \\
## Seniority & & & $0.03^{*}$ & $-0.01$ \\
## & & & $(0.01)$ & $(0.01)$ \\
## Gender (M) & & & $0.03$ & $-0.09$ \\
## & & & $(0.23)$ & $(0.15)$ \\
## Subscribers (Log) & & & & $0.48^{***}$ \\
## & & & & $(0.04)$ \\
## \midrule
## AIC & $151508.25$ & $150620.80$ & $149832.88$ & $149762.89$ \\
## BIC & $151534.34$ & $150664.27$ & $149902.44$ & $149841.14$ \\
## Log Likelihood & $-75751.13$ & $-75305.40$ & $-74908.44$ & $-74872.45$ \\
## Num. obs. & $44100$ & $44100$ & $44100$ & $44100$ \\
## Num. groups: Channels & $99$ & $99$ & $99$ & $99$ \\
## Var: Channel (Intercept) & $1.16$ & $1.05$ & $0.98$ & $0.42$ \\
## Var: Residual & $1.80$ & $1.76$ & $1.73$ & $1.73$ \\
## \bottomrule
## \multicolumn{5}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
## \end{tabular}
## \label{table:models1}
## \end{center}
## \end{table}
Table 6.2: Random Slope Models (5-8)
base_model_slope <- lmer(log_viewCount ~ 1 + (1 + neg_prob| channelId ), data = df)
negative_model1 <- lmer(log_viewCount ~ neg_prob + abs_nom +
(1 + neg_prob| channelId ), data = df)
negative_model2 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender +
(1 + neg_prob| channelId ), data = df)
negative_model3 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 + neg_prob| channelId ), data = df)
model_list_6_2 <- list(base_model_slope, negative_model1, negative_model2, negative_model3)
latex_code_6_2 <- texreg(
model_list_6_2,
include.rsquared = TRUE,
custom.coef.names = c("Intercept", "Negative Sentiment", "Ideology",
"Duration (Minutes)", "Seniority",
"Gender (M)", "Subscribers (Log)"),
use.viewer = FALSE, dcolumn = FALSE, booktabs = TRUE,
center = TRUE,
use.packages = FALSE,
caption.above = TRUE,
caption = "Multilevel Linear Regression (Random Slope)",
label = "table:models2",
custom.model.names = c("Model 5", "Model 6", "Model 7", "Model 8"),
custom.gof.names = c("AIC", "BIC", "Log Likelihood", "Num. obs.",
"Num. groups: Channels", "Var: Channel (Intercept)",
"Var: Channel Negativity",
"Cov: Channel (Int.) Neg.", "Var: Residual")
)
cat(latex_code_6_2)
##
## \begin{table}
## \caption{Multilevel Linear Regression (Random Slope)}
## \begin{center}
## \begin{tabular}{l c c c c}
## \toprule
## & Model 5 & Model 6 & Model 7 & Model 8 \\
## \midrule
## Intercept & $5.20^{***}$ & $4.81^{***}$ & $3.84^{***}$ & $1.64^{***}$ \\
## & $(0.10)$ & $(0.26)$ & $(0.36)$ & $(0.33)$ \\
## Negative Sentiment & & $0.80^{***}$ & $0.81^{***}$ & $0.79^{***}$ \\
## & & $(0.08)$ & $(0.08)$ & $(0.08)$ \\
## Ideology & & $1.13^{*}$ & $1.75^{**}$ & $-0.01$ \\
## & & $(0.53)$ & $(0.55)$ & $(0.43)$ \\
## Duration (Minutes) & & & $0.03^{***}$ & $0.03^{***}$ \\
## & & & $(0.00)$ & $(0.00)$ \\
## Seniority & & & $0.04^{**}$ & $-0.01$ \\
## & & & $(0.01)$ & $(0.01)$ \\
## Gender (M) & & & $0.03$ & $-0.07$ \\
## & & & $(0.22)$ & $(0.16)$ \\
## Subscribers (Log) & & & & $0.47^{***}$ \\
## & & & & $(0.04)$ \\
## \midrule
## AIC & $150217.27$ & $150156.85$ & $149376.96$ & $149323.36$ \\
## BIC & $150260.74$ & $150217.71$ & $149463.90$ & $149419.00$ \\
## Log Likelihood & $-75103.63$ & $-75071.43$ & $-74678.48$ & $-74650.68$ \\
## Num. obs. & $44100$ & $44100$ & $44100$ & $44100$ \\
## Num. groups: Channels & $99$ & $99$ & $99$ & $99$ \\
## Var: Channel (Intercept) & $1.00$ & $0.93$ & $0.86$ & $0.45$ \\
## Var: Channel Negativity & $1.14$ & $0.53$ & $0.49$ & $0.48$ \\
## Cov: Channel (Int.) Neg. & $0.24$ & $0.15$ & $0.17$ & $-0.12$ \\
## Var: Residual & $1.74$ & $1.74$ & $1.71$ & $1.71$ \\
## \bottomrule
## \multicolumn{5}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
## \end{tabular}
## \label{table:models2}
## \end{center}
## \end{table}
Table 6.3: Composite Score Models (9-12)
compound_model1 <- lmer(log_viewCount ~ 1 +
(1 + compound_score| channelId ), data = df)
compound_model2 <- lmer(log_viewCount ~ compound_score + abs_nom +
(1 + compound_score| channelId ), data = df)
compound_model3 <- lmer(log_viewCount ~ compound_score + abs_nom + durationMin +
seniority + gender +
(1 + compound_score| channelId ), data = df)
compound_model4 <- lmer(log_viewCount ~ compound_score + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 + compound_score| channelId ), data = df)
model_list_6_3 <- list(compound_model1, compound_model2, compound_model3,compound_model4)
latex_code_6_3 <- texreg(
model_list_6_3,
include.rsquared = TRUE,
custom.coef.names = c("Intercept", "Composite Score",
"Ideology", "Duration (Minutes)", "Seniority", "Gender (M)", "Subscribers (Log)"),
use.viewer = FALSE, dcolumn = FALSE, booktabs = TRUE,
center = TRUE,
use.packages = FALSE,
caption.above = TRUE,
caption = "Multilevel Linear Regression (Composite Score)",
label = "table:models3",
custom.model.names = c("Model 9", "Model 10", "Model 11", "Model 12"),
custom.gof.names = c("AIC", "BIC", "Log Likelihood", "Num. obs.",
"Num. groups: Channels", "Var: Channel (Intercept)",
"Var: Channel Sentiment",
"Cov: Channel (Int.) Sent.", "Var: Residual")
)
cat(latex_code_6_3)
##
## \begin{table}
## \caption{Multilevel Linear Regression (Composite Score)}
## \begin{center}
## \begin{tabular}{l c c c c}
## \toprule
## & Model 9 & Model 10 & Model 11 & Model 12 \\
## \midrule
## Intercept & $5.11^{***}$ & $5.11^{***}$ & $4.24^{***}$ & $1.91^{***}$ \\
## & $(0.10)$ & $(0.25)$ & $(0.35)$ & $(0.33)$ \\
## Composite Score & & $-0.44^{***}$ & $-0.41^{***}$ & $-0.40^{***}$ \\
## & & $(0.04)$ & $(0.04)$ & $(0.04)$ \\
## Ideology & & $0.91$ & $1.52^{**}$ & $-0.00$ \\
## & & $(0.51)$ & $(0.53)$ & $(0.43)$ \\
## Duration (Minutes) & & & $0.03^{***}$ & $0.03^{***}$ \\
## & & & $(0.00)$ & $(0.00)$ \\
## Seniority & & & $0.03^{**}$ & $-0.00$ \\
## & & & $(0.01)$ & $(0.01)$ \\
## Gender (M) & & & $0.00$ & $-0.07$ \\
## & & & $(0.21)$ & $(0.16)$ \\
## Subscribers (Log) & & & & $0.46^{***}$ \\
## & & & & $(0.04)$ \\
## \midrule
## AIC & $149836.30$ & $149775.71$ & $149140.71$ & $149092.88$ \\
## BIC & $149879.78$ & $149836.57$ & $149227.65$ & $149188.52$ \\
## Log Likelihood & $-74913.15$ & $-74880.85$ & $-74560.35$ & $-74535.44$ \\
## Num. obs. & $44100$ & $44100$ & $44100$ & $44100$ \\
## Num. groups: Channels & $99$ & $99$ & $99$ & $99$ \\
## Var: Channel (Intercept) & $1.25$ & $1.04$ & $0.97$ & $0.43$ \\
## Var: Channel Sentiment & $0.34$ & $0.16$ & $0.15$ & $0.15$ \\
## Cov: Channel (Int.) Sent. & $-0.37$ & $-0.19$ & $-0.18$ & $-0.01$ \\
## Var: Residual & $1.72$ & $1.72$ & $1.70$ & $1.70$ \\
## \bottomrule
## \multicolumn{5}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
## \end{tabular}
## \label{table:models3}
## \end{center}
## \end{table}
# Table 7.1: Random-effect Anova
negative_model3 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 + neg_prob| channelId ), data = df)
intercept_model3 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 | channelId ), data = df)
ranova_negative <- ranova(negative_model3)
ranova_intercept <- ranova(intercept_model3)
combined_results <- rbind(negative_model3 = ranova_negative, intercept_model3 = ranova_intercept)
rownames(combined_results) <- c("Negative Slope Model", "Removing Random Slope",
"Negative Intercept Model", "Removing Random Intercept")
combined_results <- combined_results[, -which(names(combined_results) == "AIC")]
last_col <- ncol(combined_results)
combined_results[, last_col] <- sprintf("%.2e", combined_results[, last_col])
print(xtable(combined_results, caption = "Random-effects ANOVA"),
caption.placement = "top", include.rownames = TRUE)
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Wed May 29 17:16:22 2024
## \begin{table}[ht]
## \centering
## \caption{Random-effects ANOVA}
## \begin{tabular}{lrrrrr}
## \hline
## & npar & logLik & LRT & Df & Pr($>$Chisq) \\
## \hline
## Negative Slope Model & 11.00 & -74650.68 & & & NA \\
## Removing Random Slope & 9.00 & -74872.45 & 443.53 & 2 & 4.89e-97 \\
## Negative Intercept Model & 9.00 & -74872.45 & & & NA \\
## Removing Random Intercept & 8.00 & -77447.87 & 5150.84 & 1 & 0.00e+00 \\
## \hline
## \end{tabular}
## \end{table}
Table 7.2: Likelihood Ratio Test
anova_results <- anova(negative_model3, intercept_model3)
## refitting model(s) with ML (instead of REML)
anova_df <- as.data.frame(anova_results)
anova_df$`Pr(>Chisq)` <- sprintf("%.2e", anova_df$`Pr(>Chisq)`)
print(xtable(anova_df, caption = "Likelihood Ratio Test Comparing Model 4 and Model 8"),
caption.placement = "top")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Wed May 29 17:16:23 2024
## \begin{table}[ht]
## \centering
## \caption{Likelihood Ratio Test Comparing Model 4 and Model 8}
## \begin{tabular}{rrrrrrrrl}
## \hline
## & npar & AIC & BIC & logLik & deviance & Chisq & Df & Pr($>$Chisq) \\
## \hline
## intercept\_model3 & 9.00 & 149727.94 & 149806.19 & -74854.97 & 149709.94 & & & NA \\
## negative\_model3 & 11.00 & 149290.54 & 149386.17 & -74634.27 & 149268.54 & 441.40 & 2.00 & 1.41e-96 \\
## \hline
## \end{tabular}
## \end{table}
Table A.1: Like Count Models (13-16)
base_like_model <- lmer(log_likeCount ~ 1 + (1 + neg_prob| channelId ), data = df)
like_model1 <- lmer(log_likeCount ~ neg_prob + abs_nom +
(1 + neg_prob| channelId ), data = df)
like_model2 <- lmer(log_likeCount ~ neg_prob + abs_nom + durationMin +
seniority + gender +
(1 + neg_prob| channelId ), data = df)
like_model3 <- lmer(log_likeCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 + neg_prob| channelId ), data = df)
model_list_A_1 <- list(base_like_model, like_model1, like_model2, like_model3)
latex_code_A_1 <- texreg(
model_list_A_1,
include.rsquared = TRUE,
custom.coef.names = c("Intercept", "Negative Sentiment", "Ideology",
"Duration (Minutes)", "Seniority",
"Gender (M)", "Subscribers (Log)"),
use.viewer = FALSE, dcolumn = FALSE, booktabs = TRUE,
center = TRUE,
use.packages = FALSE,
caption.above = TRUE,
caption = "Multilevel Linear Regression (Like Count)",
label = "table:models4",
custom.model.names = c("Model 13", "Model 14", "Model 15", "Model 16"),
custom.gof.names = c("AIC", "BIC", "Log Likelihood", "Num. obs.",
"Num. groups: Channels", "Var: Channel (Intercept)",
"Var: Channel Negativity",
"Cov: Channel (Int.) Neg.", "Var: Residual")
)
cat(latex_code_A_1)
##
## \begin{table}
## \caption{Multilevel Linear Regression (Like Count)}
## \begin{center}
## \begin{tabular}{l c c c c}
## \toprule
## & Model 13 & Model 14 & Model 15 & Model 16 \\
## \midrule
## Intercept & $1.51^{***}$ & $1.18^{***}$ & $0.39$ & $-2.16^{***}$ \\
## & $(0.10)$ & $(0.26)$ & $(0.37)$ & $(0.32)$ \\
## Negative Sentiment & & $0.88^{***}$ & $0.89^{***}$ & $0.89^{***}$ \\
## & & $(0.09)$ & $(0.09)$ & $(0.09)$ \\
## Ideology & & $1.35^{*}$ & $1.81^{**}$ & $0.49$ \\
## & & $(0.55)$ & $(0.57)$ & $(0.42)$ \\
## Duration (Minutes) & & & $0.02^{***}$ & $0.02^{***}$ \\
## & & & $(0.00)$ & $(0.00)$ \\
## Seniority & & & $0.03^{**}$ & $-0.01$ \\
## & & & $(0.01)$ & $(0.01)$ \\
## Gender (M) & & & $0.05$ & $-0.06$ \\
## & & & $(0.22)$ & $(0.15)$ \\
## Subscribers (Log) & & & & $0.49^{***}$ \\
## & & & & $(0.04)$ \\
## \midrule
## AIC & $122032.09$ & $121964.60$ & $121502.52$ & $121441.44$ \\
## BIC & $122074.96$ & $122024.62$ & $121588.26$ & $121535.75$ \\
## Log Likelihood & $-61011.05$ & $-60975.30$ & $-60741.26$ & $-60709.72$ \\
## Num. obs. & $39100$ & $39100$ & $39100$ & $39100$ \\
## Num. groups: Channels & $96$ & $96$ & $96$ & $96$ \\
## Var: Channel (Intercept) & $1.11$ & $0.96$ & $0.90$ & $0.40$ \\
## Var: Channel Negativity & $1.29$ & $0.55$ & $0.52$ & $0.52$ \\
## Cov: Channel (Int.) Neg. & $0.51$ & $0.22$ & $0.22$ & $-0.07$ \\
## Var: Residual & $1.30$ & $1.30$ & $1.29$ & $1.29$ \\
## \bottomrule
## \multicolumn{5}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
## \end{tabular}
## \label{table:models4}
## \end{center}
## \end{table}
Table A.2: Channels with more than 50 videos Models (17-20)
channel_counts <- table(df$channelId)
filtered_channel_df <- df[df$channelId %in% names(channel_counts[channel_counts >= 50]), ]
base_channel_model <- lmer(log_viewCount ~ 1 + (1 + neg_prob| channelId ),
data = filtered_channel_df)
channel_model1 <- lmer(log_viewCount ~ neg_prob + abs_nom +
(1 + neg_prob| channelId ), data = filtered_channel_df)
channel_model2 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender +
(1 + neg_prob| channelId ), data = filtered_channel_df)
channel_model3 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 + neg_prob| channelId ), data = filtered_channel_df)
model_list_A_2 <- list(base_channel_model, channel_model1, channel_model2,channel_model3)
latex_code_A_2 <- texreg(
model_list_A_2,
include.rsquared = TRUE,
custom.coef.names = c("Intercept", "Negative Sentiment", "Ideology",
"Duration (Minutes)", "Seniority",
"Gender (M)", "Subscribers (Log)"),
use.viewer = FALSE, dcolumn = FALSE, booktabs = TRUE,
center = TRUE,
use.packages = FALSE,
caption.above = TRUE,
caption = "Multilevel Linear Regression (Without channels w/ under 50 videos)",
label = "table:models5",
custom.model.names = c("Model 17", "Model 18", "Model 19", "Model 20"),
custom.gof.names = c("AIC", "BIC", "Log Likelihood", "Num. obs.",
"Num. groups: Channels", "Var: Channel (Intercept)",
"Var: Channel Negativity",
"Cov: Channel (Int.) Neg.", "Var: Residual")
)
cat(latex_code_A_2)
##
## \begin{table}
## \caption{Multilevel Linear Regression (Without channels w/ under 50 videos)}
## \begin{center}
## \begin{tabular}{l c c c c}
## \toprule
## & Model 17 & Model 18 & Model 19 & Model 20 \\
## \midrule
## Intercept & $5.13^{***}$ & $4.56^{***}$ & $3.58^{***}$ & $0.99^{***}$ \\
## & $(0.10)$ & $(0.26)$ & $(0.36)$ & $(0.29)$ \\
## Negative Sentiment & & $0.80^{***}$ & $0.81^{***}$ & $0.80^{***}$ \\
## & & $(0.09)$ & $(0.08)$ & $(0.08)$ \\
## Ideology & & $1.57^{**}$ & $2.09^{***}$ & $-0.04$ \\
## & & $(0.54)$ & $(0.54)$ & $(0.37)$ \\
## Duration (Minutes) & & & $0.03^{***}$ & $0.03^{***}$ \\
## & & & $(0.00)$ & $(0.00)$ \\
## Seniority & & & $0.03^{**}$ & $-0.01$ \\
## & & & $(0.01)$ & $(0.01)$ \\
## Gender (M) & & & $0.16$ & $0.12$ \\
## & & & $(0.22)$ & $(0.13)$ \\
## Subscribers (Log) & & & & $0.53^{***}$ \\
## & & & & $(0.04)$ \\
## \midrule
## AIC & $149460.15$ & $149398.23$ & $148619.21$ & $148539.69$ \\
## BIC & $149503.59$ & $149459.05$ & $148706.10$ & $148635.26$ \\
## Log Likelihood & $-74725.08$ & $-74692.11$ & $-74299.60$ & $-74258.84$ \\
## Num. obs. & $43864$ & $43864$ & $43864$ & $43864$ \\
## Num. groups: Channels & $90$ & $90$ & $90$ & $90$ \\
## Var: Channel (Intercept) & $0.95$ & $0.84$ & $0.77$ & $0.32$ \\
## Var: Channel Negativity & $1.15$ & $0.53$ & $0.49$ & $0.49$ \\
## Cov: Channel (Int.) Neg. & $0.27$ & $0.12$ & $0.14$ & $-0.15$ \\
## Var: Residual & $1.74$ & $1.74$ & $1.71$ & $1.71$ \\
## \bottomrule
## \multicolumn{5}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
## \end{tabular}
## \label{table:models5}
## \end{center}
## \end{table}
Table A.3: Removed videos with extremely large view counts Model (21-24)
z_scores_log <- (df$log_viewCount - mean(df$log_viewCount)) / sd(df$log_viewCount)
outliers_log <- df[abs(z_scores_log) > 3, ]
outlier_indices <- which(abs(z_scores_log) > 3)
df_outliers <- df[-outlier_indices, ]
base_outliers_model <- lmer(log_viewCount ~ 1 + (1 + neg_prob| channelId ), data = df_outliers)
outlier_model1 <- lmer(log_viewCount ~ neg_prob + abs_nom +
(1 + neg_prob| channelId ), data = df_outliers)
outlier_model2 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender +
(1 + neg_prob| channelId ), data = df_outliers)
outlier_model3 <- lmer(log_viewCount ~ neg_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 + neg_prob| channelId ), data = df_outliers)
model_list_A_3 <- list(base_outliers_model, outlier_model1, outlier_model2,outlier_model3)
latex_code_A_3 <- texreg(
model_list_A_3,
include.rsquared = TRUE,
custom.coef.names = c("Intercept", "Negative Sentiment", "Ideology",
"Duration (Minutes)", "Seniority",
"Gender (M)", "Subscribers (Log)"),
use.viewer = FALSE, dcolumn = FALSE, booktabs = TRUE,
center = TRUE,
use.packages = FALSE,
caption.above = TRUE,
caption = "Multilevel Linear Regression (Outliers Removed)",
label = "table:models6",
custom.model.names = c("Model 21", "Model 22", "Model 23", "Model 24"),
custom.gof.names = c("AIC", "BIC", "Log Likelihood", "Num. obs.",
"Num. groups: Channels", "Var: Channel (Intercept)",
"Var: Channel Negativity",
"Cov: Channel (Int.) Neg.", "Var: Residual")
)
cat(latex_code_A_3)
##
## \begin{table}
## \caption{Multilevel Linear Regression (Outliers Removed)}
## \begin{center}
## \begin{tabular}{l c c c c}
## \toprule
## & Model 21 & Model 22 & Model 23 & Model 24 \\
## \midrule
## Intercept & $5.29^{***}$ & $4.79^{***}$ & $3.85^{***}$ & $1.79^{***}$ \\
## & $(0.10)$ & $(0.25)$ & $(0.35)$ & $(0.32)$ \\
## Negative Sentiment & & $0.72^{***}$ & $0.73^{***}$ & $0.71^{***}$ \\
## & & $(0.08)$ & $(0.08)$ & $(0.08)$ \\
## Ideology & & $1.15^{*}$ & $1.76^{**}$ & $-0.04$ \\
## & & $(0.52)$ & $(0.54)$ & $(0.42)$ \\
## Duration (Minutes) & & & $0.03^{***}$ & $0.03^{***}$ \\
## & & & $(0.00)$ & $(0.00)$ \\
## Seniority & & & $0.04^{**}$ & $-0.01$ \\
## & & & $(0.01)$ & $(0.01)$ \\
## Gender (M) & & & $0.04$ & $-0.06$ \\
## & & & $(0.21)$ & $(0.15)$ \\
## Subscribers (Log) & & & & $0.45^{***}$ \\
## & & & & $(0.04)$ \\
## \midrule
## AIC & $144233.77$ & $144177.95$ & $143420.71$ & $143364.52$ \\
## BIC & $144277.20$ & $144238.75$ & $143507.56$ & $143460.06$ \\
## Log Likelihood & $-72111.88$ & $-72081.98$ & $-71700.35$ & $-71671.26$ \\
## Num. obs. & $43710$ & $43710$ & $43710$ & $43710$ \\
## Num. groups: Channels & $99$ & $99$ & $99$ & $99$ \\
## Var: Channel (Intercept) & $0.95$ & $0.89$ & $0.82$ & $0.44$ \\
## Var: Channel Negativity & $0.98$ & $0.47$ & $0.44$ & $0.44$ \\
## Cov: Channel (Int.) Neg. & $0.07$ & $0.07$ & $0.09$ & $-0.12$ \\
## Var: Residual & $1.56$ & $1.56$ & $1.53$ & $1.53$ \\
## \bottomrule
## \multicolumn{5}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
## \end{tabular}
## \label{table:models6}
## \end{center}
## \end{table}
Table A.4: Positive Sentiment Models (25-28)
base_pos_model <- lmer(log_viewCount ~ 1 + (1 + pos_prob| channelId ), data = df)
positive_model1 <- lmer(log_viewCount ~ pos_prob + abs_nom +
(1 + pos_prob| channelId ), data = df)
positive_model2 <- lmer(log_viewCount ~ pos_prob + abs_nom + durationMin +
seniority + gender +
(1 + pos_prob| channelId ), data = df)
positive_model3 <- lmer(log_viewCount ~ pos_prob + abs_nom + durationMin +
seniority + gender + log_subscriber +
(1 + pos_prob| channelId ), data = df)
model_list_A_4 <- list(base_pos_model, positive_model1, positive_model2, positive_model3)
latex_code_A_4 <- texreg(
model_list_A_4,
include.rsquared = TRUE,
custom.coef.names = c("Intercept", "Positive Sentiment", "Ideology",
"Duration (Minutes)", "Seniority",
"Gender (M)", "Subscribers (Log)"),
use.viewer = FALSE, dcolumn = FALSE, booktabs = TRUE,
center = TRUE,
use.packages = FALSE,
caption.above = TRUE,
caption = "Multilevel Linear Regression (Positive Sentiment)",
label = "table:models7",
custom.model.names = c("Model 25", "Model 26", "Model 27", "Model 28"),
custom.gof.names = c("AIC", "BIC", "Log Likelihood", "Num. obs.",
"Num. groups: Channels", "Var: Channel (Intercept)",
"Var: Channel Positivity",
"Cov: Channel (Int.) Pos.", "Var: Residual")
)
cat(latex_code_A_4)
##
## \begin{table}
## \caption{Multilevel Linear Regression (Positive Sentiment)}
## \begin{center}
## \begin{tabular}{l c c c c}
## \toprule
## & Model 25 & Model 26 & Model 27 & Model 28 \\
## \midrule
## Intercept & $5.07^{***}$ & $5.37^{***}$ & $4.54^{***}$ & $2.15^{***}$ \\
## & $(0.09)$ & $(0.25)$ & $(0.35)$ & $(0.33)$ \\
## Positive Sentiment & & $-0.76^{***}$ & $-0.68^{***}$ & $-0.66^{***}$ \\
## & & $(0.08)$ & $(0.08)$ & $(0.08)$ \\
## Ideology & & $0.76$ & $1.38^{**}$ & $0.01$ \\
## & & $(0.50)$ & $(0.53)$ & $(0.43)$ \\
## Duration (Minutes) & & & $0.03^{***}$ & $0.03^{***}$ \\
## & & & $(0.00)$ & $(0.00)$ \\
## Seniority & & & $0.03^{**}$ & $-0.00$ \\
## & & & $(0.01)$ & $(0.01)$ \\
## Gender (M) & & & $-0.02$ & $-0.07$ \\
## & & & $(0.21)$ & $(0.16)$ \\
## Subscribers (Log) & & & & $0.45^{***}$ \\
## & & & & $(0.04)$ \\
## \midrule
## AIC & $149876.82$ & $149818.95$ & $149275.26$ & $149229.49$ \\
## BIC & $149920.29$ & $149879.80$ & $149362.20$ & $149325.12$ \\
## Log Likelihood & $-74933.41$ & $-74902.47$ & $-74627.63$ & $-74603.74$ \\
## Num. obs. & $44100$ & $44100$ & $44100$ & $44100$ \\
## Num. groups: Channels & $99$ & $99$ & $99$ & $99$ \\
## Var: Channel (Intercept) & $1.72$ & $1.28$ & $1.18$ & $0.48$ \\
## Var: Channel Positivity & $1.06$ & $0.51$ & $0.49$ & $0.49$ \\
## Cov: Channel (Int.) Pos. & $-1.00$ & $-0.52$ & $-0.46$ & $-0.16$ \\
## Var: Residual & $1.72$ & $1.72$ & $1.70$ & $1.70$ \\
## \bottomrule
## \multicolumn{5}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
## \end{tabular}
## \label{table:models7}
## \end{center}
## \end{table}