************************************
************************************
***SOCI 420: ADVANCED METHODS OF SOCIAL RESEARCH
***HYPOTHESIS TESTING I: THE ONE-SAMPLE CASE (chapter 8)
************************************
************************************

************************************
***CLEAR MEMORY
************************************
clear all

************************************
***CREATE SHORTCUTS AND LOG FILE
************************************
***Shortcut for folders
global codes  = "H:\course\codes"
global data   = "H:\course\data"
global output = "H:\course\output"

***Start saving results window
log using "$codes\Stata08.log", replace text

************************************
***OPENING COMMANDS
************************************
***Tell Stata to not pause for "more" messages
set more off

***Open 2021 GSS
use "$data\GSS2021.dta", clear

***Complex survey design
svyset [weight=wtssnrps], strata(vstrat) psu(vpsu) singleunit(scaled)

************************************
***ONE-SAMPLE Z-TEST AND t-TEST
************************************
***Personal income of Veterans (GSS)
***compared to population 15+ (U.S. Census Bureau)
***https://fred.stlouisfed.org/series/MAPAINUSA646N#

***Mean personal income in 2021 (U.S. Census Bureau) = 57,143

***Generate dummy variable for Veterans
tab vetyears, m

gen veteran=.
  replace veteran=1 if vetyears>=1 & vetyears<=4 // Some years of active duty
  replace veteran=0 if vetyears==0 // No active duty
  
tab vetyears veteran, m

***Mean personal income of Veterans in 2021 (GSS)
svy, subpop(if veteran==1): mean conrinc
mean conrinc if veteran==1

***Is the mean income of Veterans ($49,562.49) significantly lower
***than mean income of population 15+ ($57,143)?

***Z-test (it does not allow the use of weight or complex survey design)
ztest conrinc=57143 if veteran==1

***t-test (it does not allow the use of weight or complex survey design)
ttest conrinc=57143 if veteran==1

************************************
***ONE-SAMPLE TEST OF PROPORTIONS
************************************
***Gender composition of sample of adult population (18+) (GSS)
***compared to overall population (U.S. Census Bureau)
***https://www.census.gov/quickfacts/fact/table/US/PST045221

***Percentage of women in the total population in 2021 (U.S. Census Bureau) = 50.5%

***Generate dummy variable for women
tab sex
tab sex, nolabel

generate female=.
  replace female=0 if sex==1
  replace female=1 if sex==2

tab sex female, m
  
***Percentage of 18+ women in 2021 (GSS)
tab female [aweight=wtssnrps]
tab female

***Is the percentage of women 18+ (55.94%) significantly higher
***than the percentage of women in the total population (50.5%)?

***Proportion test (it does not allow the use of weight or complex survey design)
prtest female=.505

************************************
***CLOSING COMMANDS
************************************
***Save data
save "$data\Stata08.dta", replace

***Save log
log close