OVERVIEW

This document provides R-based analyses that complement the main NLP pipeline (run in Python/Colab). These analyses require R’s regression infrastructure and are not duplicated in the Python notebook.

What this document covers (unique to R):

Incremental Validity: Do text features predict punitiveness beyond psychological measures? (with vignette covariate)
Standardized Coefficients: Relative importance of text vs. psychological predictors
Text Predicting Sentencing: Regression models for sentence length (with vignette covariate)
Political Ideology Moderation: Does the cultural default hold across the political spectrum?
Vignette Moderation: ANOVA and vignette-specific patterns

What is handled in the Python notebook (not duplicated here):

Facade detection correlations (text × psychological measures)
Group comparisons (high vs. low hostile)
Cross-method convergence (BART ML, FC, Dictionary, BERT Similarity)
Competing interpretations (facade vs. sincerity vs. folk theory)
Deterrence regression / folk theory test
Individual-level directional tests (hostile_agg × all text features)
Prototype sensitivity analysis
Collective facade evidence

SECTION 0: SETUP

required_packages <- c(
  "tidyverse",
  "psych",
  "broom",
  "knitr",
  "car",
  "effectsize",
  "scales",
  "lmerTest",
  "performance"
)

new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(required_packages, library, character.only = TRUE)

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "psych"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "broom"     "psych"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "knitr"     "broom"     "psych"     "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "car"       "carData"   "knitr"     "broom"     "psych"     "lubridate"
##  [7] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"     "tidyr"    
## [13] "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [19] "utils"     "datasets"  "methods"   "base"     
## 
## [[6]]
##  [1] "effectsize" "car"        "carData"    "knitr"      "broom"     
##  [6] "psych"      "lubridate"  "forcats"    "stringr"    "dplyr"     
## [11] "purrr"      "readr"      "tidyr"      "tibble"     "ggplot2"   
## [16] "tidyverse"  "stats"      "graphics"   "grDevices"  "utils"     
## [21] "datasets"   "methods"    "base"      
## 
## [[7]]
##  [1] "scales"     "effectsize" "car"        "carData"    "knitr"     
##  [6] "broom"      "psych"      "lubridate"  "forcats"    "stringr"   
## [11] "dplyr"      "purrr"      "readr"      "tidyr"      "tibble"    
## [16] "ggplot2"    "tidyverse"  "stats"      "graphics"   "grDevices" 
## [21] "utils"      "datasets"   "methods"    "base"      
## 
## [[8]]
##  [1] "lmerTest"   "lme4"       "Matrix"     "scales"     "effectsize"
##  [6] "car"        "carData"    "knitr"      "broom"      "psych"     
## [11] "lubridate"  "forcats"    "stringr"    "dplyr"      "purrr"     
## [16] "readr"      "tidyr"      "tibble"     "ggplot2"    "tidyverse" 
## [21] "stats"      "graphics"   "grDevices"  "utils"      "datasets"  
## [26] "methods"    "base"      
## 
## [[9]]
##  [1] "performance" "lmerTest"    "lme4"        "Matrix"      "scales"     
##  [6] "effectsize"  "car"         "carData"     "knitr"       "broom"      
## [11] "psych"       "lubridate"   "forcats"     "stringr"     "dplyr"      
## [16] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
## [21] "tidyverse"   "stats"       "graphics"    "grDevices"   "utils"      
## [26] "datasets"    "methods"     "base"

options(scipen = 999)
options(digits = 4)

# Load the NLP features dataset (contains both NLP and psychological variables)
# UPDATE PATH AS NEEDED
df <- read.csv("/Users/dgkamper/Library/CloudStorage/[email protected]/My Drive/DGK Lab/Collaborations/Dan Simon/Punishment/Analysis/NLP Pipeline/Second Pass/punishment_212_nlp_features.csv", stringsAsFactors = FALSE)

cat("Loaded dataset: N =", nrow(df), "rows,", ncol(df), "columns\n\n")

## Loaded dataset: N = 496 rows, 258 columns

# Verify key variables
nlp_vars <- c("sim_prosocial_mean", "sim_dark_mean", "sim_prosocial_minus_dark",
              "vader_compound", "just_prosocial", "just_dark", 
              "zs_prosocial_mean", "zs_dark_mean", "facade_residual")
psych_vars <- c("punitiveness_agg", "hostile_agg", "crime_concerns_agg",
                "hatred_comp", "revenge_comp", "sdo_comp")

cat("NLP variables present:", sum(nlp_vars %in% names(df)), "/", length(nlp_vars), "\n")

## NLP variables present: 9 / 9

cat("Psych variables present:", sum(psych_vars %in% names(df)), "/", length(psych_vars), "\n")

## Psych variables present: 6 / 6

SECTION 1: INCREMENTAL VALIDITY

1.1 Do Text Features Predict Punitiveness Beyond Psychological Measures?

cat("\n")

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")

## ======================================================================

cat("INCREMENTAL VALIDITY: TEXT FEATURES PREDICTING PUNITIVENESS\n")

## INCREMENTAL VALIDITY: TEXT FEATURES PREDICTING PUNITIVENESS

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")

## ======================================================================

# All models include factor(vignette) to account for between-condition differences
# (Main RMD found vignette random effect variance ≈ 0, so fixed effect is conservative)

# Step 1: Psychological predictors only (+ vignette)
model1 <- lm(punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
               emotions_agg + personality_agg, data = df)

cat("Model 1: Psychological Predictors (+ vignette covariate)\n")

## Model 1: Psychological Predictors (+ vignette covariate)

cat(sprintf("  R² = %.3f, Adj R² = %.3f\n", 
            summary(model1)$r.squared, summary(model1)$adj.r.squared))

##   R² = 0.387, Adj R² = 0.380

cat("  Predictors: hostile_agg, crime_concerns_agg, emotions_agg, personality_agg\n\n")

##   Predictors: hostile_agg, crime_concerns_agg, emotions_agg, personality_agg

# Step 2: Add text features
model2 <- lm(punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
               emotions_agg + personality_agg +
               sim_prosocial_mean + sim_dark_mean + vader_compound, data = df)

cat("Model 2: Psychological + Text Features (+ vignette covariate)\n")

## Model 2: Psychological + Text Features (+ vignette covariate)

cat(sprintf("  R² = %.3f, Adj R² = %.3f\n\n", 
            summary(model2)$r.squared, summary(model2)$adj.r.squared))

##   R² = 0.443, Adj R² = 0.432

# Compare models
r2_change <- summary(model2)$r.squared - summary(model1)$r.squared
anova_comparison <- anova(model1, model2)

cat(sprintf("Incremental R² (ΔR²): %.4f\n\n", r2_change))

## Incremental R² (ΔR²): 0.0552

cat("Model comparison F-test:\n")

## Model comparison F-test:

print(anova_comparison)

## Analysis of Variance Table
## 
## Model 1: punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     emotions_agg + personality_agg
## Model 2: punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     emotions_agg + personality_agg + sim_prosocial_mean + sim_dark_mean + 
##     vader_compound
##   Res.Df RSS Df Sum of Sq  F        Pr(>F)    
## 1    489 202                                  
## 2    486 184  3      18.2 16 0.00000000059 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\nModel 2 Coefficients (excluding vignette dummies):\n")

## 
## Model 2 Coefficients (excluding vignette dummies):

print(tidy(model2) %>% 
        filter(!grepl("vignette", term)) %>%
        mutate(across(where(is.numeric), ~round(., 3))) %>%
        select(term, estimate, std.error, statistic, p.value))

## # A tibble: 8 × 5
##   term               estimate std.error statistic p.value
##   <chr>                 <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)          -1.77      0.2       -8.85   0    
## 2 hostile_agg           0.216     0.037      5.80   0    
## 3 crime_concerns_agg    0.051     0.027      1.91   0.057
## 4 emotions_agg          0.132     0.029      4.49   0    
## 5 personality_agg       0.082     0.036      2.30   0.022
## 6 sim_prosocial_mean   -2.31      0.439     -5.26   0    
## 7 sim_dark_mean         1.72      0.403      4.27   0    
## 8 vader_compound       -0.224     0.05      -4.46   0

# Robustness: Compare with and without vignette covariate
model2_novign <- lm(punitiveness_agg ~ hostile_agg + crime_concerns_agg + 
                      emotions_agg + personality_agg +
                      sim_prosocial_mean + sim_dark_mean + vader_compound, data = df)
cat(sprintf("\nRobustness: R² without vignette covariate: %.3f (diff = %.4f)\n",
            summary(model2_novign)$r.squared,
            summary(model2)$r.squared - summary(model2_novign)$r.squared))

## 
## Robustness: R² without vignette covariate: 0.442 (diff = 0.0010)

1.2 Standardized Coefficients

cat("\n=== STANDARDIZED COEFFICIENTS (β) ===\n\n")

## 
## === STANDARDIZED COEFFICIENTS (β) ===

cat("(Vignette dummies included but not shown; continuous predictors standardized)\n\n")

## (Vignette dummies included but not shown; continuous predictors standardized)

model2_std <- lm(scale(punitiveness_agg) ~ factor(vignette) + 
                   scale(hostile_agg) + scale(crime_concerns_agg) + 
                   scale(emotions_agg) + scale(personality_agg) +
                   scale(sim_prosocial_mean) + scale(sim_dark_mean) + 
                   scale(vader_compound), data = df)

std_coefs <- tidy(model2_std) %>%
  filter(term != "(Intercept)" & !grepl("vignette", term)) %>%
  mutate(
    term = c("Hostile Agg", "Crime Concerns", "Emotions", "Personality",
             "Prosocial Sim", "Dark Sim", "Sentiment"),
    estimate = round(estimate, 3),
    p.value = round(p.value, 4)
  ) %>%
  select(Predictor = term, Beta = estimate, p = p.value) %>%
  arrange(desc(abs(Beta)))

print(std_coefs, row.names = FALSE)

## # A tibble: 7 × 3
##   Predictor        Beta      p
##   <chr>           <dbl>  <dbl>
## 1 Hostile Agg     0.319 0     
## 2 Prosocial Sim  -0.228 0     
## 3 Emotions        0.22  0     
## 4 Dark Sim        0.185 0     
## 5 Sentiment      -0.153 0     
## 6 Personality     0.106 0.022 
## 7 Crime Concerns  0.073 0.0568

cat("\nInterpretation:\n")

## 
## Interpretation:

text_betas <- std_coefs %>% filter(Predictor %in% c("Prosocial Sim", "Dark Sim", "Sentiment"))
max_text_beta <- max(abs(text_betas$Beta))
max_psych_beta <- max(abs(std_coefs$Beta[!std_coefs$Predictor %in% c("Prosocial Sim", "Dark Sim", "Sentiment")]))

cat(sprintf("Largest text feature β: %.3f\n", max_text_beta))

## Largest text feature β: 0.228

cat(sprintf("Largest psychological β: %.3f\n", max_psych_beta))

## Largest psychological β: 0.319

if(max_text_beta < 0.10) {
  cat("Text features show minimal unique contribution beyond psychological measures.\n")
  cat("Language reflects the psychological profile but does not add to it.\n")
} else {
  cat("Text features show some unique contribution beyond psychological measures.\n")
}

## Text features show some unique contribution beyond psychological measures.

SECTION 2: TEXT PREDICTING SENTENCING BEHAVIOR

2.1 Correlations: Text Features × Sentence Length

cat("\n")

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")

## ======================================================================

cat("TEXT FEATURES PREDICTING SENTENCING BEHAVIOR\n")

## TEXT FEATURES PREDICTING SENTENCING BEHAVIOR

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")

## ======================================================================

text_for_sentence <- c("sim_prosocial_mean", "sim_dark_mean", 
                       "sim_proto_rehabilitation", "sim_proto_deterrence",
                       "sim_proto_revenge", "sim_proto_suffering",
                       "zs_rehabilitation_and_reform", "zs_deterrence_and_prevention",
                       "zs_revenge_and_payback", "zs_punishment_and_suffering",
                       "vader_compound", "just_dark", "just_prosocial")
text_for_sentence <- text_for_sentence[text_for_sentence %in% names(df)]

sentence_cors <- data.frame()
for(var in text_for_sentence) {
  test <- cor.test(df[[var]], df$Sentence_z, use = "pairwise.complete.obs")
  sentence_cors <- rbind(sentence_cors, data.frame(
    Text_Feature = var,
    r = round(test$estimate, 3),
    p = test$p.value,
    sig = ifelse(test$p.value < .001, "***",
                 ifelse(test$p.value < .01, "**",
                        ifelse(test$p.value < .05, "*", "")))
  ))
}

sentence_cors <- sentence_cors %>% arrange(r)
cat("Text Features × Sentence (z-scored):\n")

## Text Features × Sentence (z-scored):

print(sentence_cors %>% mutate(p = format(p, scientific = FALSE, digits = 4)), row.names = FALSE)

##                  Text_Feature      r                               p sig
##  zs_rehabilitation_and_reform -0.451 0.00000000000000000000000002864 ***
##                vader_compound -0.219 0.00000087362351588873359931558 ***
##      sim_proto_rehabilitation -0.212 0.00000195824699748705184702792 ***
##            sim_prosocial_mean -0.093 0.03749698177493523965075894466   *
##                just_prosocial -0.084 0.06020641098299508997548912248    
##          sim_proto_deterrence -0.082 0.06640940330856597018094333862    
##           sim_proto_suffering  0.051 0.25998734759621255907546810704    
##             sim_proto_revenge  0.082 0.06817465281839056590040826222    
##                 sim_dark_mean  0.109 0.01517783457620437009838454401   *
##                     just_dark  0.156 0.00047514372812243872669021871 ***
##   zs_punishment_and_suffering  0.164 0.00023952531985010720987777644 ***
##  zs_deterrence_and_prevention  0.215 0.00000142552426173001954844448 ***
##        zs_revenge_and_payback  0.291 0.00000000003953194372538350949 ***

2.2 Regression: Psychological + Text Predicting Sentence

cat("\n=== REGRESSION: SENTENCE LENGTH ===\n\n")

## 
## === REGRESSION: SENTENCE LENGTH ===

cat("Note: Sentence_z is already z-scored within vignette, but we include\n")

## Note: Sentence_z is already z-scored within vignette, but we include

cat("vignette as covariate for robustness.\n\n")

## vignette as covariate for robustness.

# Model 1: Psychological predictors (+ vignette)
sent_model1 <- lm(Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
                    punitiveness_8item, data = df)

cat("Model 1: Psychological Predictors (+ vignette)\n")

## Model 1: Psychological Predictors (+ vignette)

cat(sprintf("  R² = %.3f, Adj R² = %.3f\n\n", 
            summary(sent_model1)$r.squared, summary(sent_model1)$adj.r.squared))

##   R² = 0.117, Adj R² = 0.108

# Model 2: Add text features
sent_text_vars <- c("zs_rehabilitation_and_reform", "vader_compound")
sent_text_vars <- sent_text_vars[sent_text_vars %in% names(df)]

if(length(sent_text_vars) > 0) {
  formula_str <- paste("Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + punitiveness_8item +",
                       paste(sent_text_vars, collapse = " + "))
  sent_model2 <- lm(as.formula(formula_str), data = df)
  
  cat("Model 2: Psychological + Text Features (+ vignette)\n")
  cat(sprintf("  R² = %.3f, Adj R² = %.3f\n", 
              summary(sent_model2)$r.squared, summary(sent_model2)$adj.r.squared))
  sent_r2_change <- summary(sent_model2)$r.squared - summary(sent_model1)$r.squared
  cat(sprintf("  ΔR² = %.4f\n\n", sent_r2_change))
  
  cat("Model comparison:\n")
  print(anova(sent_model1, sent_model2))
  
  cat("\nModel 2 Coefficients (excluding vignette dummies):\n")
  print(tidy(sent_model2) %>% 
          filter(!grepl("vignette", term)) %>%
          mutate(across(where(is.numeric), ~round(., 3))))
}

## Model 2: Psychological + Text Features (+ vignette)
##   R² = 0.258, Adj R² = 0.247
##   ΔR² = 0.1414
## 
## Model comparison:
## Analysis of Variance Table
## 
## Model 1: Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     punitiveness_8item
## Model 2: Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     punitiveness_8item + zs_rehabilitation_and_reform + vader_compound
##   Res.Df RSS Df Sum of Sq    F              Pr(>F)    
## 1    490 435                                          
## 2    488 366  2      69.7 46.5 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model 2 Coefficients (excluding vignette dummies):
## # A tibble: 6 × 5
##   term                         estimate std.error statistic p.value
##   <chr>                           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)                    -0.566     0.192    -2.94    0.003
## 2 hostile_agg                    -0.012     0.047    -0.25    0.803
## 3 crime_concerns_agg              0.034     0.037     0.927   0.355
## 4 punitiveness_8item              0.177     0.045     3.93    0    
## 5 zs_rehabilitation_and_reform   -1.07      0.127    -8.41    0    
## 6 vader_compound                 -0.129     0.075    -1.72    0.086

SECTION 3: VISUALIZATION

3.1 Scatter: Hostile Aggression × Prosocial Language

test1 <- cor.test(df$sim_prosocial_mean, df$hostile_agg)

ggplot(df, aes(x = hostile_agg, y = sim_prosocial_mean, color = punitiveness_agg)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed") +
  scale_color_gradient2(low = "#3498DB", mid = "#F1C40F", high = "#E74C3C",
                        midpoint = mean(df$punitiveness_agg, na.rm = TRUE),
                        name = "Punitiveness") +
  labs(x = "Hostile Aggression (Quantitative)",
       y = "Prosocial Language Similarity",
       title = "Hostile Aggression vs. Prosocial Language",
       subtitle = sprintf("r = %.3f, p = %.4f", 
                          test1$estimate, test1$p.value)) +
  theme_minimal() +
  theme(legend.position = "right")

ggsave("facade_scatter_plot.png", width = 10, height = 6, dpi = 150)
cat("Saved: facade_scatter_plot.png\n")

## Saved: facade_scatter_plot.png

SECTION 4: POLITICAL IDEOLOGY MODERATION

If the cultural default interpretation is correct, liberals and conservatives should both use prosocial language to justify punishment — despite conservatives being more punitive on average. This section tests whether political orientation moderates the language patterns.

4.1 Language Features by Political Group

cat("\n")

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")

## ======================================================================

cat("POLITICAL IDEOLOGY MODERATION\n")

## POLITICAL IDEOLOGY MODERATION

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")

## ======================================================================

# Create political groups (matching main RMD: 1-3 = Liberal, 4 = Moderate, 5-7 = Conservative)
if(!"political_group" %in% names(df) || all(is.na(df$political_group))) {
  df <- df %>%
    mutate(political_group = case_when(
      politid <= 3 ~ "Liberal",
      politid == 4 ~ "Moderate",
      politid >= 5 ~ "Conservative"
    ))
}

cat("Group sizes:\n")

## Group sizes:

print(table(df$political_group, useNA = "ifany"))

## 
## Conservative      Liberal     Moderate 
##          190          190          116

cat("\n")

# Mean punitiveness by group (context)
cat("Punitiveness by Political Group (context):\n")

## Punitiveness by Political Group (context):

pun_by_group <- df %>%
  filter(!is.na(political_group)) %>%
  group_by(political_group) %>%
  summarise(
    n = n(),
    Mean_Punitiveness = round(mean(punitiveness_agg, na.rm = TRUE), 3),
    SD_Punitiveness = round(sd(punitiveness_agg, na.rm = TRUE), 3),
    Mean_Hostile = round(mean(hostile_agg, na.rm = TRUE), 3)
  )
print(as.data.frame(pun_by_group), row.names = FALSE)

##  political_group   n Mean_Punitiveness SD_Punitiveness Mean_Hostile
##     Conservative 190             0.259           0.742        4.125
##          Liberal 190            -0.313           0.811        3.329
##         Moderate 116             0.088           0.775        3.912

# Text features by group
cat("\n\nText Features by Political Group:\n")

## 
## 
## Text Features by Political Group:

cat("-" %>% rep(60) %>% paste(collapse = ""), "\n")

## ------------------------------------------------------------

text_by_politics <- df %>%
  filter(!is.na(political_group)) %>%
  group_by(political_group) %>%
  summarise(
    n = n(),
    Prosocial_Sim = round(mean(sim_prosocial_mean, na.rm = TRUE), 3),
    Dark_Sim = round(mean(sim_dark_mean, na.rm = TRUE), 3),
    ProDark_Gap = round(mean(sim_prosocial_minus_dark, na.rm = TRUE), 3),
    Sentiment = round(mean(vader_compound, na.rm = TRUE), 3),
    Dict_Prosocial = round(mean(just_prosocial, na.rm = TRUE), 3),
    Dict_Dark = round(mean(just_dark, na.rm = TRUE), 3)
  )
print(as.data.frame(text_by_politics), row.names = FALSE)

##  political_group   n Prosocial_Sim Dark_Sim ProDark_Gap Sentiment
##     Conservative 190         0.384    0.414      -0.030    -0.594
##          Liberal 190         0.392    0.425      -0.033    -0.407
##         Moderate 116         0.389    0.423      -0.034    -0.506
##  Dict_Prosocial Dict_Dark
##           0.056     0.004
##           0.052     0.003
##           0.056     0.007

4.2 Cultural Default Test: Prosocial Language × Hostile Aggression by Group

cat("\n\nKey test: Does hostile_agg × prosocial language differ by political group?\n")

## 
## 
## Key test: Does hostile_agg × prosocial language differ by political group?

cat("If cultural default holds, correlations should be near zero for ALL groups.\n\n")

## If cultural default holds, correlations should be near zero for ALL groups.

political_cors <- df %>%
  filter(!is.na(political_group)) %>%
  group_by(political_group) %>%
  summarise(
    n = n(),
    r_prosocial_hostile = round(cor(sim_prosocial_mean, hostile_agg, use = "pairwise.complete.obs"), 3),
    r_dark_hostile = round(cor(sim_dark_mean, hostile_agg, use = "pairwise.complete.obs"), 3),
    r_sentiment_hostile = round(cor(vader_compound, hostile_agg, use = "pairwise.complete.obs"), 3),
    r_sentiment_pun = round(cor(vader_compound, punitiveness_agg, use = "pairwise.complete.obs"), 3),
    pct_closer_dark = round(mean(sim_prosocial_minus_dark < 0, na.rm = TRUE) * 100, 1)
  )
print(as.data.frame(political_cors), row.names = FALSE)

##  political_group   n r_prosocial_hostile r_dark_hostile r_sentiment_hostile
##     Conservative 190              -0.075          0.042               0.015
##          Liberal 190               0.026          0.050               0.028
##         Moderate 116              -0.046         -0.114              -0.112
##  r_sentiment_pun pct_closer_dark
##           -0.054            64.7
##           -0.159            67.9
##           -0.303            71.6

cat("\nInterpretation:\n")

## 
## Interpretation:

cat("If r_prosocial_hostile is near zero across all groups → cultural default holds\n")

## If r_prosocial_hostile is near zero across all groups → cultural default holds

cat("universally: liberals, moderates, and conservatives all use prosocial framing\n")

## universally: liberals, moderates, and conservatives all use prosocial framing

cat("regardless of their hostile attitudes.\n\n")

## regardless of their hostile attitudes.

cat("If pct_closer_dark is similar across groups → the semantic mismatch\n")

## If pct_closer_dark is similar across groups → the semantic mismatch

cat("(prosocial words, dark meaning) is not a conservative or liberal phenomenon.\n")

## (prosocial words, dark meaning) is not a conservative or liberal phenomenon.

4.3 Interaction Test: Does Political Orientation Moderate the Language-Hostility Link?

cat("\n=== INTERACTION: Political Orientation × Hostile Aggression → Language ===\n\n")

## 
## === INTERACTION: Political Orientation × Hostile Aggression → Language ===

# Continuous interaction (politid × hostile_agg → prosocial language)
int_model1 <- lm(sim_prosocial_mean ~ hostile_agg * politid + factor(vignette), data = df)
int_coefs1 <- tidy(int_model1) %>% filter(grepl("hostile_agg:politid", term))

cat("Prosocial Similarity ~ Hostile Agg × Political Orientation (+ vignette):\n")

## Prosocial Similarity ~ Hostile Agg × Political Orientation (+ vignette):

cat(sprintf("  Interaction: b = %.4f, p = %.4f\n",
            int_coefs1$estimate, int_coefs1$p.value))

##   Interaction: b = -0.0017, p = 0.2570

int_model2 <- lm(sim_prosocial_minus_dark ~ hostile_agg * politid + factor(vignette), data = df)
int_coefs2 <- tidy(int_model2) %>% filter(grepl("hostile_agg:politid", term))

cat(sprintf("\nPro-Dark Gap ~ Hostile Agg × Political Orientation (+ vignette):\n"))

## 
## Pro-Dark Gap ~ Hostile Agg × Political Orientation (+ vignette):

cat(sprintf("  Interaction: b = %.4f, p = %.4f\n",
            int_coefs2$estimate, int_coefs2$p.value))

##   Interaction: b = -0.0016, p = 0.2416

int_model3 <- lm(vader_compound ~ hostile_agg * politid + factor(vignette), data = df)
int_coefs3 <- tidy(int_model3) %>% filter(grepl("hostile_agg:politid", term))

cat(sprintf("\nSentiment ~ Hostile Agg × Political Orientation (+ vignette):\n"))

## 
## Sentiment ~ Hostile Agg × Political Orientation (+ vignette):

cat(sprintf("  Interaction: b = %.4f, p = %.4f\n\n",
            int_coefs3$estimate, int_coefs3$p.value))

##   Interaction: b = 0.0040, p = 0.7042

all_ns <- all(c(int_coefs1$p.value, int_coefs2$p.value, int_coefs3$p.value) >= 0.05)
if(all_ns) {
  cat("RESULT: No significant interactions.\n")
  cat("Political orientation does NOT moderate the language-hostility link.\n")
  cat("The cultural default (prosocial framing regardless of attitudes) holds\n")
  cat("equally for liberals and conservatives.\n")
} else {
  cat("RESULT: At least one significant interaction detected.\n")
  cat("Political orientation may moderate the language-hostility relationship.\n")
  cat("Examine group-level correlations above for interpretation.\n")
}

## RESULT: No significant interactions.
## Political orientation does NOT moderate the language-hostility link.
## The cultural default (prosocial framing regardless of attitudes) holds
## equally for liberals and conservatives.

SECTION 5: VIGNETTE MODERATION

5.1 Text Features by Vignette

cat("\n")

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")

## ======================================================================

cat("TEXT FEATURES BY VIGNETTE\n")

## TEXT FEATURES BY VIGNETTE

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")

## ======================================================================

vignette_labels <- c("1" = "Stranger Felony-Murder", 
                     "2" = "Domestic Violence", 
                     "3" = "Organized Crime")

vignette_text <- df %>%
  group_by(vignette) %>%
  summarise(
    n = n(),
    Prosocial_Sim = mean(sim_prosocial_mean, na.rm = TRUE),
    Dark_Sim = mean(sim_dark_mean, na.rm = TRUE),
    Sentiment = mean(vader_compound, na.rm = TRUE),
    Word_Count = mean(text_combined_wordcount, na.rm = TRUE)
  ) %>%
  mutate(across(where(is.numeric) & !c(vignette, n), ~round(., 3)))

vignette_text$Vignette_Label <- vignette_labels[as.character(vignette_text$vignette)]

print(vignette_text, row.names = FALSE)

## # A tibble: 3 × 7
##   vignette     n Prosocial_Sim Dark_Sim Sentiment Word_Count Vignette_Label     
##      <int> <int>         <dbl>    <dbl>     <dbl>      <dbl> <chr>              
## 1        1   168         0.396    0.432    -0.459       48   Stranger Felony-Mu…
## 2        2   176         0.381    0.418    -0.504       45.7 Domestic Violence  
## 3        3   152         0.387    0.411    -0.546       51.6 Organized Crime

cat("\nANOVA: Prosocial Similarity by Vignette\n")

## 
## ANOVA: Prosocial Similarity by Vignette

aov_prosocial <- aov(sim_prosocial_mean ~ factor(vignette), data = df)
print(summary(aov_prosocial))

##                   Df Sum Sq Mean Sq F value Pr(>F)
## factor(vignette)   2   0.02 0.01041    1.61    0.2
## Residuals        493   3.18 0.00646

cat("\nANOVA: Dark Similarity by Vignette\n")

## 
## ANOVA: Dark Similarity by Vignette

aov_dark <- aov(sim_dark_mean ~ factor(vignette), data = df)
print(summary(aov_dark))

##                   Df Sum Sq Mean Sq F value Pr(>F)  
## factor(vignette)   2   0.04 0.01834    2.39  0.093 .
## Residuals        493   3.79 0.00768                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\nANOVA: Sentiment by Vignette\n")

## 
## ANOVA: Sentiment by Vignette

aov_sentiment <- aov(vader_compound ~ factor(vignette), data = df)
print(summary(aov_sentiment))

##                   Df Sum Sq Mean Sq F value Pr(>F)
## factor(vignette)   2    0.6   0.308       1   0.37
## Residuals        493  152.1   0.309

SECTION 6: EXPORT AND SUMMARY

cat("\n=== SAVING OUTPUTS ===\n\n")

## 
## === SAVING OUTPUTS ===

write.csv(sentence_cors, "nlp_sentence_correlations.csv", row.names = FALSE)
cat("Saved: nlp_sentence_correlations.csv\n")

## Saved: nlp_sentence_correlations.csv

write.csv(as.data.frame(vignette_text), "nlp_vignette_text_features.csv", row.names = FALSE)
cat("Saved: nlp_vignette_text_features.csv\n")

## Saved: nlp_vignette_text_features.csv

write.csv(as.data.frame(political_cors), "nlp_political_moderation.csv", row.names = FALSE)
cat("Saved: nlp_political_moderation.csv\n")

## Saved: nlp_political_moderation.csv

cat("\n")

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")

## ======================================================================

cat("NLP INTEGRATION SUMMARY (R-SPECIFIC ANALYSES)\n")

## NLP INTEGRATION SUMMARY (R-SPECIFIC ANALYSES)

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")

## ======================================================================

cat("All regressions include factor(vignette) as covariate.\n\n")

## All regressions include factor(vignette) as covariate.

cat("1. INCREMENTAL VALIDITY:\n")

## 1. INCREMENTAL VALIDITY:

cat(sprintf("   ΔR² when adding text to psychological predictors: %.4f\n", r2_change))

##    ΔR² when adding text to psychological predictors: 0.0552

if(r2_change < 0.01) {
  cat("   Text features add negligible unique variance.\n")
  cat("   Language reflects the psychological profile but does not improve prediction.\n\n")
} else {
  cat("   Text features add modest unique variance.\n\n")
}

##    Text features add modest unique variance.

cat("2. STANDARDIZED COEFFICIENTS:\n")

## 2. STANDARDIZED COEFFICIENTS:

cat("   See Section 1.2 for relative importance of text vs. psychological predictors.\n\n")

##    See Section 1.2 for relative importance of text vs. psychological predictors.

cat("3. TEXT PREDICTING SENTENCING:\n")

## 3. TEXT PREDICTING SENTENCING:

if(exists("sent_r2_change")) {
  cat(sprintf("   ΔR² for text beyond psychological predictors: %.4f\n", sent_r2_change))
}

##    ΔR² for text beyond psychological predictors: 0.1414

if("zs_rehabilitation_and_reform" %in% names(df)) {
  rehab_r <- cor(df$zs_rehabilitation_and_reform, df$Sentence_z, use = "pairwise.complete.obs")
  cat(sprintf("   Rehabilitation language × Sentence: r = %.3f\n", rehab_r))
}

##    Rehabilitation language × Sentence: r = -0.451

sent_r <- cor(df$vader_compound, df$Sentence_z, use = "pairwise.complete.obs")
cat(sprintf("   Sentiment × Sentence: r = %.3f\n\n", sent_r))

##    Sentiment × Sentence: r = -0.219

cat("4. POLITICAL IDEOLOGY MODERATION:\n")

## 4. POLITICAL IDEOLOGY MODERATION:

cat("   Liberals, moderates, and conservatives all use prosocial language.\n")

##    Liberals, moderates, and conservatives all use prosocial language.

if(all_ns) {
  cat("   No significant ideology × hostility interactions.\n")
  cat("   Cultural default holds across the political spectrum.\n\n")
} else {
  cat("   Some interactions detected — see Section 4.3.\n\n")
}

##    No significant ideology × hostility interactions.
##    Cultural default holds across the political spectrum.

cat("5. VIGNETTE MODERATION:\n")

## 5. VIGNETTE MODERATION:

cat("   See Section 5.1 for ANOVA results across crime types.\n\n")

##    See Section 5.1 for ANOVA results across crime types.

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")

## ======================================================================

cat("For facade detection, cross-method convergence, competing interpretations,\n")

## For facade detection, cross-method convergence, competing interpretations,

cat("prototype sensitivity, and individual-level directional tests, see the\n")

## prototype sensitivity, and individual-level directional tests, see the

cat("Python NLP notebook (Punishment_212_NLP_Analysis_v2.ipynb).\n")

## Python NLP notebook (Punishment_212_NLP_Analysis_v2.ipynb).

cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")

## ======================================================================

Punishment Study 2.1.2 - NLP Integration Analysis

DGK

February 2026