************************************
************************************
***SOCI 600: INTRODUCTION TO SOCIOLOGICAL DATA ANALYSIS
***MEASURES OF CENTRAL TENDENCY AND DISPERSION
************************************
************************************

************************************
***CLEAR MEMORY
************************************
clear all

************************************
***CREATE SHORTCUTS AND LOG FILE
************************************
***Shortcut for folders
global codes  = "H:\course\codes"
global data   = "H:\course\data"
global output = "H:\course\output"

***Start saving results window
log using "$codes\Stata03.log", replace text

************************************
***OPENING COMMANDS
************************************
***Tell Stata to not pause for "more" messages
set more off

***Open 2019 ACS (only Texas)
use "$data\ACS2019.dta", clear

***Complex survey design
svyset cluster [pweight=perwt], strata(strata) singleunit(scaled)

************************************
***GENERATE VARIABLES
************************************
***Sex
gen female=.
  replace female=0 if sex==1 // Male
  replace female=1 if sex==2 // Female

label define female 0 "Male" 1 "Female"
label values female female

***Race/ethnicity
gen raceth=.
  replace raceth=1 if race==1 & hispan==0 // White
  replace raceth=2 if race==2 & hispan==0 // Black
  replace raceth=3 if hispan>=1 & hispan<=4 // Hispanic
  replace raceth=4 if (race==4 | race==5 | race==6) & hispan==0 // Asian
  replace raceth=5 if race==3 & hispan==0 // Native American
  replace raceth=6 if (race==7 | race==8 | race==9) & hispan==0 // Other

label define raceth 1 "White" 2 "African American" 3 "Hispanic" ///
                    4 "Asian" 5 "Native American" 6 "Ohter races"
label values raceth raceth

***Age
egen agegr = cut(age), at(0,16,20,25,35,45,55,65,100)

label define agecode 0 "0-15" 16 "16-19" 20 "20-24" 25 "25-34" ///
                     35 "35-44" 45 "45-54" 55 "55-64" 65 "65-100"
label values agegr agegr

***Educational attainment
gen educgr=.
  replace educgr=1 if educ>=0 & educ<=5 // Less than high school
  replace educgr=2 if educ==6 // High school
  replace educgr=3 if educ==7 | educ==8 // Some college
  replace educgr=4 if educ==10 // College
  replace educgr=5 if educ==11 // 5+ years of college, graduate school

label define educgr 1 "Less than high school" 2 "High school" ///
                    3 "Some college" 4 "College" 5 "Graduate school"
label values educgr educgr

***Marital status
gen marital=.
  replace marital=1 if marst==1 | marst==2 // Married
  replace marital=2 if marst>=3 & marst<=5 // Separated, divorced, widowed
  replace marital=3 if marst==6 // Never married, single

label define marital 1 "Married" 2 "Separated, divorced, widowed" 3 "Never married"
label values marital marital

***Migration status
gen migrant=.
  replace migrant=1 if migrate1d==10 | migrate1d==23 // same house or within PUMA
  replace migrant=2 if migrate1d>=24 & migrate1d<=32 // internal migrant
  replace migrant=3 if migrate1d==40 // international migrant

label define migrant 1 "Non-migrant" 2 "Internal migrant" 3 "International migrant"
label values migrant migrant

***Wage and salary income
gen income=.
  replace income=incwage if incwage!=999999

************************************
***INCOME BY CATEGORIES OF ONE VARIABLE
************************************
***Income
tabstat income [fweight=perwt] if income!=0, stat(min p25 p50 p75 max iqr mean sd)

***Income by sex
tabstat income [fweight=perwt] if income!=0, by(female) stat(min p25 p50 p75 max iqr mean sd)

***Income by race/ethnicity
tabstat income [fweight=perwt] if income!=0, by(raceth) stat(min p25 p50 p75 max iqr mean sd)

***Income by age group
tabstat income [fweight=perwt] if income!=0, by(agegr) stat(min p25 p50 p75 max iqr mean sd)

***Income by educational attainment
tabstat income [fweight=perwt] if income!=0, by(educgr) stat(min p25 p50 p75 max iqr mean sd)

***Income by marital status
tabstat income [fweight=perwt] if income!=0, by(marital) stat(min p25 p50 p75 max iqr mean sd)

***Income by migrant status
tabstat income [fweight=perwt] if income!=0, by(migrant) stat(min p25 p50 p75 max iqr mean sd)

************************************
***INCOME BY COMBINATIONS OF MORE THAN ONE VARIABLE
************************************
***Income by sex and race/ethnicity
table raceth female [fweight=perwt] if income!=0, stat(min income) stat(p25 income) ///
                                                  stat(p50 income) stat(p75 income) ///
					                              stat(max income) stat(iqr income) ///
					                              stat(mean income) stat(sd income)

***Income by sex and age group
table agegr female [fweight=perwt] if income!=0, stat(min income) stat(p25 income) ///
                                                  stat(p50 income) stat(p75 income) ///
					                              stat(max income) stat(iqr income) ///
					                              stat(mean income) stat(sd income)

***Income by sex and educational attainment
table educgr female [fweight=perwt] if income!=0, stat(min income) stat(p25 income) ///
                                                  stat(p50 income) stat(p75 income) ///
					                              stat(max income) stat(iqr income) ///
					                              stat(mean income) stat(sd income)

************************************
***INCOME WITH COMPLEX SAMPLE DESIGN
************************************
***No weight
mean income if income!=. & income!=0
estat sd

***Weight
***It corrects the mean
mean income if income!=. & income!=0 [fweight=perwt]
estat sd

***Complex survey design
***It correct the mean, standard error, and standard deviation
svy, subpop(if income!=. & income!=0): mean income
estat sd

***Income by sex
svy, subpop(if income!=. & income!=0): mean income, over(female)
estat sd

***Income by race/ethnicity
svy, subpop(if income!=. & income!=0): mean income, over(raceth)
estat sd

***Income by age group
svy, subpop(if income!=. & income!=0): mean income, over(agegr)
estat sd

***Income by educational attainment
svy, subpop(if income!=. & income!=0): mean income, over(educgr)
estat sd

***Income by marital status
svy, subpop(if income!=. & income!=0): mean income, over(marital)
estat sd

***Income by migrant status
svy, subpop(if income!=. & income!=0): mean income, over(migrant)
estat sd

***Income by sex and race/ethnicity
svy, subpop(if income!=. & income!=0): mean income, over(female raceth)
estat sd

svy, subpop(if income!=. & income!=0): mean income, over(raceth female)
estat sd

***Income by sex and age group
svy, subpop(if income!=. & income!=0): mean income, over(female agegr)
estat sd

svy, subpop(if income!=. & income!=0): mean income, over(agegr female)
estat sd

***Income by sex and educational attainment
svy, subpop(if income!=. & income!=0): mean income, over(female educgr)
estat sd

svy, subpop(if income!=. & income!=0): mean income, over(educgr female)
estat sd

************************************
***LINE GRAPH - MEAN INCOME BY AGE
************************************
***Generate variable with mean income by age
sort age
by age: egen mincage=mean(income) if income!=0
sum mincage, d

***Line graph of income by age
twoway line mincage age [fweight=perwt], ytitle("Mean income") ylabel(0(20000)80000)

***Save graph
graph export "$output\age-income_line.png", replace

************************************
***LINE GRAPH - MEAN INCOME BY AGE AND SEX
************************************
***Generate variable with mean income by age and sex
sort female age
by female age: egen mincagesex=mean(income) if income!=0
sum mincagesex, d

***Line graph of income by age and sex
twoway line mincagesex age if female==0 [fweight=perwt] || ///
       line mincagesex age if female==1 [fweight=perwt], ///
       legend(label(1 Males) label(2 Females)) ///
       ytitle("Mean income") ylabel(0(20000)80000) ///
       xtitle("Age")
	   
***Save graph
graph export "$output\age-sex-income_line.png", replace

************************************
***BOXPLOT
************************************
***Income
graph box income if income!=0 [fweight=perwt], ytitle(Wage and salary income)
graph hbox income if income!=0 [fweight=perwt], ytitle(Wage and salary income)

***Income by sex
graph hbox income if income!=0 [fweight=perwt], over(female) ytitle(Wage and salary income)

***Income by race/ethnicity
graph hbox income if income!=0 [fweight=perwt], over(raceth) ytitle(Wage and salary income)

***Income by age group
graph hbox income if income!=0 [fweight=perwt], over(agegr) ytitle(Wage and salary income)

***Income by educational attainment
graph hbox income if income!=0 [fweight=perwt], over(educgr) ytitle(Wage and salary income)

***Income by marital status
graph hbox income if income!=0 [fweight=perwt], over(marital) ytitle(Wage and salary income)

***Income by migration status
graph hbox income if income!=0 [fweight=perwt], over(migrant) ytitle(Wage and salary income)

***Income by sex and race/ethnicity
graph hbox income if income!=0 [fweight=perwt], over(female) over(raceth) ytitle(Wage and salary income)
graph hbox income if income!=0 [fweight=perwt], over(raceth) over(female) ytitle(Wage and salary income)

***Income by sex and age group
graph hbox income if income!=0 [fweight=perwt], over(female) over(agegr) ytitle(Wage and salary income)
graph hbox income if income!=0 [fweight=perwt], over(agegr) over(female) ytitle(Wage and salary income)

***Income by sex and educational attainment
graph hbox income if income!=0 [fweight=perwt], over(female) over(educgr) ytitle(Wage and salary income)
graph hbox income if income!=0 [fweight=perwt], over(educgr) over(female) ytitle(Wage and salary income)

************************************
***CLOSING COMMANDS
************************************
***Save data
save "$data\Stata03.dta", replace

***Save log
log close