This document provides R-based analyses that complement the main NLP pipeline (run in Python/Colab). These analyses require R’s regression infrastructure and are not duplicated in the Python notebook.
What this document covers (unique to R):
What is handled in the Python notebook (not duplicated here):
required_packages <- c(
"tidyverse",
"psych",
"broom",
"knitr",
"car",
"effectsize",
"scales",
"lmerTest",
"performance"
)
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(required_packages, library, character.only = TRUE)
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "psych" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "broom" "psych" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "knitr" "broom" "psych" "lubridate" "forcats" "stringr"
## [7] "dplyr" "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [13] "tidyverse" "stats" "graphics" "grDevices" "utils" "datasets"
## [19] "methods" "base"
##
## [[5]]
## [1] "car" "carData" "knitr" "broom" "psych" "lubridate"
## [7] "forcats" "stringr" "dplyr" "purrr" "readr" "tidyr"
## [13] "tibble" "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [19] "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "effectsize" "car" "carData" "knitr" "broom"
## [6] "psych" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "scales" "effectsize" "car" "carData" "knitr"
## [6] "broom" "psych" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "lmerTest" "lme4" "Matrix" "scales" "effectsize"
## [6] "car" "carData" "knitr" "broom" "psych"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[9]]
## [1] "performance" "lmerTest" "lme4" "Matrix" "scales"
## [6] "effectsize" "car" "carData" "knitr" "broom"
## [11] "psych" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
options(scipen = 999)
options(digits = 4)
# Load the NLP features dataset (contains both NLP and psychological variables)
# UPDATE PATH AS NEEDED
df <- read.csv("/Users/dgkamper/Library/CloudStorage/[email protected]/My Drive/DGK Lab/Collaborations/Dan Simon/Punishment/Analysis/NLP Pipeline/Second Pass/punishment_212_nlp_features.csv", stringsAsFactors = FALSE)
cat("Loaded dataset: N =", nrow(df), "rows,", ncol(df), "columns\n\n")
## Loaded dataset: N = 496 rows, 258 columns
# Verify key variables
nlp_vars <- c("sim_prosocial_mean", "sim_dark_mean", "sim_prosocial_minus_dark",
"vader_compound", "just_prosocial", "just_dark",
"zs_prosocial_mean", "zs_dark_mean", "facade_residual")
psych_vars <- c("punitiveness_agg", "hostile_agg", "crime_concerns_agg",
"hatred_comp", "revenge_comp", "sdo_comp")
cat("NLP variables present:", sum(nlp_vars %in% names(df)), "/", length(nlp_vars), "\n")
## NLP variables present: 9 / 9
cat("Psych variables present:", sum(psych_vars %in% names(df)), "/", length(psych_vars), "\n")
## Psych variables present: 6 / 6
cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("INCREMENTAL VALIDITY: TEXT FEATURES PREDICTING PUNITIVENESS\n")
## INCREMENTAL VALIDITY: TEXT FEATURES PREDICTING PUNITIVENESS
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
# All models include factor(vignette) to account for between-condition differences
# (Main RMD found vignette random effect variance ≈ 0, so fixed effect is conservative)
# Step 1: Psychological predictors only (+ vignette)
model1 <- lm(punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg +
emotions_agg + personality_agg, data = df)
cat("Model 1: Psychological Predictors (+ vignette covariate)\n")
## Model 1: Psychological Predictors (+ vignette covariate)
cat(sprintf(" R² = %.3f, Adj R² = %.3f\n",
summary(model1)$r.squared, summary(model1)$adj.r.squared))
## R² = 0.387, Adj R² = 0.380
cat(" Predictors: hostile_agg, crime_concerns_agg, emotions_agg, personality_agg\n\n")
## Predictors: hostile_agg, crime_concerns_agg, emotions_agg, personality_agg
# Step 2: Add text features
model2 <- lm(punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg +
emotions_agg + personality_agg +
sim_prosocial_mean + sim_dark_mean + vader_compound, data = df)
cat("Model 2: Psychological + Text Features (+ vignette covariate)\n")
## Model 2: Psychological + Text Features (+ vignette covariate)
cat(sprintf(" R² = %.3f, Adj R² = %.3f\n\n",
summary(model2)$r.squared, summary(model2)$adj.r.squared))
## R² = 0.443, Adj R² = 0.432
# Compare models
r2_change <- summary(model2)$r.squared - summary(model1)$r.squared
anova_comparison <- anova(model1, model2)
cat(sprintf("Incremental R² (ΔR²): %.4f\n\n", r2_change))
## Incremental R² (ΔR²): 0.0552
cat("Model comparison F-test:\n")
## Model comparison F-test:
print(anova_comparison)
## Analysis of Variance Table
##
## Model 1: punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg +
## emotions_agg + personality_agg
## Model 2: punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg +
## emotions_agg + personality_agg + sim_prosocial_mean + sim_dark_mean +
## vader_compound
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 489 202
## 2 486 184 3 18.2 16 0.00000000059 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\nModel 2 Coefficients (excluding vignette dummies):\n")
##
## Model 2 Coefficients (excluding vignette dummies):
print(tidy(model2) %>%
filter(!grepl("vignette", term)) %>%
mutate(across(where(is.numeric), ~round(., 3))) %>%
select(term, estimate, std.error, statistic, p.value))
## # A tibble: 8 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1.77 0.2 -8.85 0
## 2 hostile_agg 0.216 0.037 5.80 0
## 3 crime_concerns_agg 0.051 0.027 1.91 0.057
## 4 emotions_agg 0.132 0.029 4.49 0
## 5 personality_agg 0.082 0.036 2.30 0.022
## 6 sim_prosocial_mean -2.31 0.439 -5.26 0
## 7 sim_dark_mean 1.72 0.403 4.27 0
## 8 vader_compound -0.224 0.05 -4.46 0
# Robustness: Compare with and without vignette covariate
model2_novign <- lm(punitiveness_agg ~ hostile_agg + crime_concerns_agg +
emotions_agg + personality_agg +
sim_prosocial_mean + sim_dark_mean + vader_compound, data = df)
cat(sprintf("\nRobustness: R² without vignette covariate: %.3f (diff = %.4f)\n",
summary(model2_novign)$r.squared,
summary(model2)$r.squared - summary(model2_novign)$r.squared))
##
## Robustness: R² without vignette covariate: 0.442 (diff = 0.0010)
cat("\n=== STANDARDIZED COEFFICIENTS (β) ===\n\n")
##
## === STANDARDIZED COEFFICIENTS (β) ===
cat("(Vignette dummies included but not shown; continuous predictors standardized)\n\n")
## (Vignette dummies included but not shown; continuous predictors standardized)
model2_std <- lm(scale(punitiveness_agg) ~ factor(vignette) +
scale(hostile_agg) + scale(crime_concerns_agg) +
scale(emotions_agg) + scale(personality_agg) +
scale(sim_prosocial_mean) + scale(sim_dark_mean) +
scale(vader_compound), data = df)
std_coefs <- tidy(model2_std) %>%
filter(term != "(Intercept)" & !grepl("vignette", term)) %>%
mutate(
term = c("Hostile Agg", "Crime Concerns", "Emotions", "Personality",
"Prosocial Sim", "Dark Sim", "Sentiment"),
estimate = round(estimate, 3),
p.value = round(p.value, 4)
) %>%
select(Predictor = term, Beta = estimate, p = p.value) %>%
arrange(desc(abs(Beta)))
print(std_coefs, row.names = FALSE)
## # A tibble: 7 × 3
## Predictor Beta p
## <chr> <dbl> <dbl>
## 1 Hostile Agg 0.319 0
## 2 Prosocial Sim -0.228 0
## 3 Emotions 0.22 0
## 4 Dark Sim 0.185 0
## 5 Sentiment -0.153 0
## 6 Personality 0.106 0.022
## 7 Crime Concerns 0.073 0.0568
cat("\nInterpretation:\n")
##
## Interpretation:
text_betas <- std_coefs %>% filter(Predictor %in% c("Prosocial Sim", "Dark Sim", "Sentiment"))
max_text_beta <- max(abs(text_betas$Beta))
max_psych_beta <- max(abs(std_coefs$Beta[!std_coefs$Predictor %in% c("Prosocial Sim", "Dark Sim", "Sentiment")]))
cat(sprintf("Largest text feature β: %.3f\n", max_text_beta))
## Largest text feature β: 0.228
cat(sprintf("Largest psychological β: %.3f\n", max_psych_beta))
## Largest psychological β: 0.319
if(max_text_beta < 0.10) {
cat("Text features show minimal unique contribution beyond psychological measures.\n")
cat("Language reflects the psychological profile but does not add to it.\n")
} else {
cat("Text features show some unique contribution beyond psychological measures.\n")
}
## Text features show some unique contribution beyond psychological measures.
cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("TEXT FEATURES PREDICTING SENTENCING BEHAVIOR\n")
## TEXT FEATURES PREDICTING SENTENCING BEHAVIOR
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
text_for_sentence <- c("sim_prosocial_mean", "sim_dark_mean",
"sim_proto_rehabilitation", "sim_proto_deterrence",
"sim_proto_revenge", "sim_proto_suffering",
"zs_rehabilitation_and_reform", "zs_deterrence_and_prevention",
"zs_revenge_and_payback", "zs_punishment_and_suffering",
"vader_compound", "just_dark", "just_prosocial")
text_for_sentence <- text_for_sentence[text_for_sentence %in% names(df)]
sentence_cors <- data.frame()
for(var in text_for_sentence) {
test <- cor.test(df[[var]], df$Sentence_z, use = "pairwise.complete.obs")
sentence_cors <- rbind(sentence_cors, data.frame(
Text_Feature = var,
r = round(test$estimate, 3),
p = test$p.value,
sig = ifelse(test$p.value < .001, "***",
ifelse(test$p.value < .01, "**",
ifelse(test$p.value < .05, "*", "")))
))
}
sentence_cors <- sentence_cors %>% arrange(r)
cat("Text Features × Sentence (z-scored):\n")
## Text Features × Sentence (z-scored):
print(sentence_cors %>% mutate(p = format(p, scientific = FALSE, digits = 4)), row.names = FALSE)
## Text_Feature r p sig
## zs_rehabilitation_and_reform -0.451 0.00000000000000000000000002864 ***
## vader_compound -0.219 0.00000087362351588873359931558 ***
## sim_proto_rehabilitation -0.212 0.00000195824699748705184702792 ***
## sim_prosocial_mean -0.093 0.03749698177493523965075894466 *
## just_prosocial -0.084 0.06020641098299508997548912248
## sim_proto_deterrence -0.082 0.06640940330856597018094333862
## sim_proto_suffering 0.051 0.25998734759621255907546810704
## sim_proto_revenge 0.082 0.06817465281839056590040826222
## sim_dark_mean 0.109 0.01517783457620437009838454401 *
## just_dark 0.156 0.00047514372812243872669021871 ***
## zs_punishment_and_suffering 0.164 0.00023952531985010720987777644 ***
## zs_deterrence_and_prevention 0.215 0.00000142552426173001954844448 ***
## zs_revenge_and_payback 0.291 0.00000000003953194372538350949 ***
cat("\n=== REGRESSION: SENTENCE LENGTH ===\n\n")
##
## === REGRESSION: SENTENCE LENGTH ===
cat("Note: Sentence_z is already z-scored within vignette, but we include\n")
## Note: Sentence_z is already z-scored within vignette, but we include
cat("vignette as covariate for robustness.\n\n")
## vignette as covariate for robustness.
# Model 1: Psychological predictors (+ vignette)
sent_model1 <- lm(Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg +
punitiveness_8item, data = df)
cat("Model 1: Psychological Predictors (+ vignette)\n")
## Model 1: Psychological Predictors (+ vignette)
cat(sprintf(" R² = %.3f, Adj R² = %.3f\n\n",
summary(sent_model1)$r.squared, summary(sent_model1)$adj.r.squared))
## R² = 0.117, Adj R² = 0.108
# Model 2: Add text features
sent_text_vars <- c("zs_rehabilitation_and_reform", "vader_compound")
sent_text_vars <- sent_text_vars[sent_text_vars %in% names(df)]
if(length(sent_text_vars) > 0) {
formula_str <- paste("Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + punitiveness_8item +",
paste(sent_text_vars, collapse = " + "))
sent_model2 <- lm(as.formula(formula_str), data = df)
cat("Model 2: Psychological + Text Features (+ vignette)\n")
cat(sprintf(" R² = %.3f, Adj R² = %.3f\n",
summary(sent_model2)$r.squared, summary(sent_model2)$adj.r.squared))
sent_r2_change <- summary(sent_model2)$r.squared - summary(sent_model1)$r.squared
cat(sprintf(" ΔR² = %.4f\n\n", sent_r2_change))
cat("Model comparison:\n")
print(anova(sent_model1, sent_model2))
cat("\nModel 2 Coefficients (excluding vignette dummies):\n")
print(tidy(sent_model2) %>%
filter(!grepl("vignette", term)) %>%
mutate(across(where(is.numeric), ~round(., 3))))
}
## Model 2: Psychological + Text Features (+ vignette)
## R² = 0.258, Adj R² = 0.247
## ΔR² = 0.1414
##
## Model comparison:
## Analysis of Variance Table
##
## Model 1: Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg +
## punitiveness_8item
## Model 2: Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg +
## punitiveness_8item + zs_rehabilitation_and_reform + vader_compound
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 490 435
## 2 488 366 2 69.7 46.5 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model 2 Coefficients (excluding vignette dummies):
## # A tibble: 6 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.566 0.192 -2.94 0.003
## 2 hostile_agg -0.012 0.047 -0.25 0.803
## 3 crime_concerns_agg 0.034 0.037 0.927 0.355
## 4 punitiveness_8item 0.177 0.045 3.93 0
## 5 zs_rehabilitation_and_reform -1.07 0.127 -8.41 0
## 6 vader_compound -0.129 0.075 -1.72 0.086
If the cultural default interpretation is correct, liberals and conservatives should both use prosocial language to justify punishment — despite conservatives being more punitive on average. This section tests whether political orientation moderates the language patterns.
cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("POLITICAL IDEOLOGY MODERATION\n")
## POLITICAL IDEOLOGY MODERATION
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
# Create political groups (matching main RMD: 1-3 = Liberal, 4 = Moderate, 5-7 = Conservative)
if(!"political_group" %in% names(df) || all(is.na(df$political_group))) {
df <- df %>%
mutate(political_group = case_when(
politid <= 3 ~ "Liberal",
politid == 4 ~ "Moderate",
politid >= 5 ~ "Conservative"
))
}
cat("Group sizes:\n")
## Group sizes:
print(table(df$political_group, useNA = "ifany"))
##
## Conservative Liberal Moderate
## 190 190 116
cat("\n")
# Mean punitiveness by group (context)
cat("Punitiveness by Political Group (context):\n")
## Punitiveness by Political Group (context):
pun_by_group <- df %>%
filter(!is.na(political_group)) %>%
group_by(political_group) %>%
summarise(
n = n(),
Mean_Punitiveness = round(mean(punitiveness_agg, na.rm = TRUE), 3),
SD_Punitiveness = round(sd(punitiveness_agg, na.rm = TRUE), 3),
Mean_Hostile = round(mean(hostile_agg, na.rm = TRUE), 3)
)
print(as.data.frame(pun_by_group), row.names = FALSE)
## political_group n Mean_Punitiveness SD_Punitiveness Mean_Hostile
## Conservative 190 0.259 0.742 4.125
## Liberal 190 -0.313 0.811 3.329
## Moderate 116 0.088 0.775 3.912
# Text features by group
cat("\n\nText Features by Political Group:\n")
##
##
## Text Features by Political Group:
cat("-" %>% rep(60) %>% paste(collapse = ""), "\n")
## ------------------------------------------------------------
text_by_politics <- df %>%
filter(!is.na(political_group)) %>%
group_by(political_group) %>%
summarise(
n = n(),
Prosocial_Sim = round(mean(sim_prosocial_mean, na.rm = TRUE), 3),
Dark_Sim = round(mean(sim_dark_mean, na.rm = TRUE), 3),
ProDark_Gap = round(mean(sim_prosocial_minus_dark, na.rm = TRUE), 3),
Sentiment = round(mean(vader_compound, na.rm = TRUE), 3),
Dict_Prosocial = round(mean(just_prosocial, na.rm = TRUE), 3),
Dict_Dark = round(mean(just_dark, na.rm = TRUE), 3)
)
print(as.data.frame(text_by_politics), row.names = FALSE)
## political_group n Prosocial_Sim Dark_Sim ProDark_Gap Sentiment
## Conservative 190 0.384 0.414 -0.030 -0.594
## Liberal 190 0.392 0.425 -0.033 -0.407
## Moderate 116 0.389 0.423 -0.034 -0.506
## Dict_Prosocial Dict_Dark
## 0.056 0.004
## 0.052 0.003
## 0.056 0.007
cat("\n=== INTERACTION: Political Orientation × Hostile Aggression → Language ===\n\n")
##
## === INTERACTION: Political Orientation × Hostile Aggression → Language ===
# Continuous interaction (politid × hostile_agg → prosocial language)
int_model1 <- lm(sim_prosocial_mean ~ hostile_agg * politid + factor(vignette), data = df)
int_coefs1 <- tidy(int_model1) %>% filter(grepl("hostile_agg:politid", term))
cat("Prosocial Similarity ~ Hostile Agg × Political Orientation (+ vignette):\n")
## Prosocial Similarity ~ Hostile Agg × Political Orientation (+ vignette):
cat(sprintf(" Interaction: b = %.4f, p = %.4f\n",
int_coefs1$estimate, int_coefs1$p.value))
## Interaction: b = -0.0017, p = 0.2570
int_model2 <- lm(sim_prosocial_minus_dark ~ hostile_agg * politid + factor(vignette), data = df)
int_coefs2 <- tidy(int_model2) %>% filter(grepl("hostile_agg:politid", term))
cat(sprintf("\nPro-Dark Gap ~ Hostile Agg × Political Orientation (+ vignette):\n"))
##
## Pro-Dark Gap ~ Hostile Agg × Political Orientation (+ vignette):
cat(sprintf(" Interaction: b = %.4f, p = %.4f\n",
int_coefs2$estimate, int_coefs2$p.value))
## Interaction: b = -0.0016, p = 0.2416
int_model3 <- lm(vader_compound ~ hostile_agg * politid + factor(vignette), data = df)
int_coefs3 <- tidy(int_model3) %>% filter(grepl("hostile_agg:politid", term))
cat(sprintf("\nSentiment ~ Hostile Agg × Political Orientation (+ vignette):\n"))
##
## Sentiment ~ Hostile Agg × Political Orientation (+ vignette):
cat(sprintf(" Interaction: b = %.4f, p = %.4f\n\n",
int_coefs3$estimate, int_coefs3$p.value))
## Interaction: b = 0.0040, p = 0.7042
all_ns <- all(c(int_coefs1$p.value, int_coefs2$p.value, int_coefs3$p.value) >= 0.05)
if(all_ns) {
cat("RESULT: No significant interactions.\n")
cat("Political orientation does NOT moderate the language-hostility link.\n")
cat("The cultural default (prosocial framing regardless of attitudes) holds\n")
cat("equally for liberals and conservatives.\n")
} else {
cat("RESULT: At least one significant interaction detected.\n")
cat("Political orientation may moderate the language-hostility relationship.\n")
cat("Examine group-level correlations above for interpretation.\n")
}
## RESULT: No significant interactions.
## Political orientation does NOT moderate the language-hostility link.
## The cultural default (prosocial framing regardless of attitudes) holds
## equally for liberals and conservatives.
cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("TEXT FEATURES BY VIGNETTE\n")
## TEXT FEATURES BY VIGNETTE
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
vignette_labels <- c("1" = "Stranger Felony-Murder",
"2" = "Domestic Violence",
"3" = "Organized Crime")
vignette_text <- df %>%
group_by(vignette) %>%
summarise(
n = n(),
Prosocial_Sim = mean(sim_prosocial_mean, na.rm = TRUE),
Dark_Sim = mean(sim_dark_mean, na.rm = TRUE),
Sentiment = mean(vader_compound, na.rm = TRUE),
Word_Count = mean(text_combined_wordcount, na.rm = TRUE)
) %>%
mutate(across(where(is.numeric) & !c(vignette, n), ~round(., 3)))
vignette_text$Vignette_Label <- vignette_labels[as.character(vignette_text$vignette)]
print(vignette_text, row.names = FALSE)
## # A tibble: 3 × 7
## vignette n Prosocial_Sim Dark_Sim Sentiment Word_Count Vignette_Label
## <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1 168 0.396 0.432 -0.459 48 Stranger Felony-Mu…
## 2 2 176 0.381 0.418 -0.504 45.7 Domestic Violence
## 3 3 152 0.387 0.411 -0.546 51.6 Organized Crime
cat("\nANOVA: Prosocial Similarity by Vignette\n")
##
## ANOVA: Prosocial Similarity by Vignette
aov_prosocial <- aov(sim_prosocial_mean ~ factor(vignette), data = df)
print(summary(aov_prosocial))
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(vignette) 2 0.02 0.01041 1.61 0.2
## Residuals 493 3.18 0.00646
cat("\nANOVA: Dark Similarity by Vignette\n")
##
## ANOVA: Dark Similarity by Vignette
aov_dark <- aov(sim_dark_mean ~ factor(vignette), data = df)
print(summary(aov_dark))
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(vignette) 2 0.04 0.01834 2.39 0.093 .
## Residuals 493 3.79 0.00768
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\nANOVA: Sentiment by Vignette\n")
##
## ANOVA: Sentiment by Vignette
aov_sentiment <- aov(vader_compound ~ factor(vignette), data = df)
print(summary(aov_sentiment))
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(vignette) 2 0.6 0.308 1 0.37
## Residuals 493 152.1 0.309
cat("\n=== SAVING OUTPUTS ===\n\n")
##
## === SAVING OUTPUTS ===
write.csv(sentence_cors, "nlp_sentence_correlations.csv", row.names = FALSE)
cat("Saved: nlp_sentence_correlations.csv\n")
## Saved: nlp_sentence_correlations.csv
write.csv(as.data.frame(vignette_text), "nlp_vignette_text_features.csv", row.names = FALSE)
cat("Saved: nlp_vignette_text_features.csv\n")
## Saved: nlp_vignette_text_features.csv
write.csv(as.data.frame(political_cors), "nlp_political_moderation.csv", row.names = FALSE)
cat("Saved: nlp_political_moderation.csv\n")
## Saved: nlp_political_moderation.csv
cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("NLP INTEGRATION SUMMARY (R-SPECIFIC ANALYSES)\n")
## NLP INTEGRATION SUMMARY (R-SPECIFIC ANALYSES)
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
cat("All regressions include factor(vignette) as covariate.\n\n")
## All regressions include factor(vignette) as covariate.
cat("1. INCREMENTAL VALIDITY:\n")
## 1. INCREMENTAL VALIDITY:
cat(sprintf(" ΔR² when adding text to psychological predictors: %.4f\n", r2_change))
## ΔR² when adding text to psychological predictors: 0.0552
if(r2_change < 0.01) {
cat(" Text features add negligible unique variance.\n")
cat(" Language reflects the psychological profile but does not improve prediction.\n\n")
} else {
cat(" Text features add modest unique variance.\n\n")
}
## Text features add modest unique variance.
cat("2. STANDARDIZED COEFFICIENTS:\n")
## 2. STANDARDIZED COEFFICIENTS:
cat(" See Section 1.2 for relative importance of text vs. psychological predictors.\n\n")
## See Section 1.2 for relative importance of text vs. psychological predictors.
cat("3. TEXT PREDICTING SENTENCING:\n")
## 3. TEXT PREDICTING SENTENCING:
if(exists("sent_r2_change")) {
cat(sprintf(" ΔR² for text beyond psychological predictors: %.4f\n", sent_r2_change))
}
## ΔR² for text beyond psychological predictors: 0.1414
if("zs_rehabilitation_and_reform" %in% names(df)) {
rehab_r <- cor(df$zs_rehabilitation_and_reform, df$Sentence_z, use = "pairwise.complete.obs")
cat(sprintf(" Rehabilitation language × Sentence: r = %.3f\n", rehab_r))
}
## Rehabilitation language × Sentence: r = -0.451
sent_r <- cor(df$vader_compound, df$Sentence_z, use = "pairwise.complete.obs")
cat(sprintf(" Sentiment × Sentence: r = %.3f\n\n", sent_r))
## Sentiment × Sentence: r = -0.219
cat("4. POLITICAL IDEOLOGY MODERATION:\n")
## 4. POLITICAL IDEOLOGY MODERATION:
cat(" Liberals, moderates, and conservatives all use prosocial language.\n")
## Liberals, moderates, and conservatives all use prosocial language.
if(all_ns) {
cat(" No significant ideology × hostility interactions.\n")
cat(" Cultural default holds across the political spectrum.\n\n")
} else {
cat(" Some interactions detected — see Section 4.3.\n\n")
}
## No significant ideology × hostility interactions.
## Cultural default holds across the political spectrum.
cat("5. VIGNETTE MODERATION:\n")
## 5. VIGNETTE MODERATION:
cat(" See Section 5.1 for ANOVA results across crime types.\n\n")
## See Section 5.1 for ANOVA results across crime types.
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("For facade detection, cross-method convergence, competing interpretations,\n")
## For facade detection, cross-method convergence, competing interpretations,
cat("prototype sensitivity, and individual-level directional tests, see the\n")
## prototype sensitivity, and individual-level directional tests, see the
cat("Python NLP notebook (Punishment_212_NLP_Analysis_v2.ipynb).\n")
## Python NLP notebook (Punishment_212_NLP_Analysis_v2.ipynb).
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================