**************************************************************** * Unemployment Insurance In Survey and Administrative Data * Jeff Larrimore, Jacob Mortenson, and David Splinter * * CPS Estimates **************************************************************** global datadir = "/cca/data/Jeff/CPS_UI/" * save the CPS data and imputation data in the above directory *** *Step 1: read, format, and save the imputation clear import excel $datadir/LMS-UI-data-v3.xlsx, sheet(Imputations) cellrange(A5:F2304) rename A inc_centile rename B taxyr rename C ui_sum rename D ui_n rename E ui_mean rename F ui_sd save $datadir/IRS_centile_ui.dta, replace *** *Step 2: create a dummy file with income bins so that there are no gaps in output *Step dropped for merge files since not needed for the merge file creation *** *Step 3: Set up the CPS data use "$datadir/cps_00061.dta", clear *Variables in this IPUMS download that are used for this program (although income variables are more expansive than truly needed): year serial month cpsid pernum cpsidp asecwt asecwtcvd age sploc inctot incwage incbus incfarm incss incwelfr incretir incssi incint incunemp incwkcom incvet incsurv incdisab incdivid incrent inceduc incchild incalim incasist incother incrann incpens srcreti1 srcreti2 increti1 increti2 srcsurv1 srcsurv2 incsurv1 incsurv2 srcdisa1 srcdisa2 incdisa1 incdisa2 srcret1 srcret2 incret1 incret2 keep year serial month cpsid pernum cpsidp asecwt asecwtcvd age sploc inctot incwage incbus incfarm incss incwelfr incretir incssi incint incunemp incwkcom incvet incsurv incdisab incdivid incrent inceduc incchild incalim incasist incother incrann incpens srcreti1 srcreti2 increti1 increti2 srcsurv1 srcsurv2 incsurv1 incsurv2 srcdisa1 srcdisa2 incdisa1 incdisa2 srcret1 srcret2 incret1 incret2 replace asecwt = asecwtcvd if asecwtcvd!=. keep if year>=2020 gen taxyr = year - 1 *save individuals who are age 15. We do not impute UI to them and keep their original values, but want to include in the final merge files for ease of use preserve keep if age==15 replace incunemp = 0 if incunemp==999999 | incunemp==. rename incunemp incunemp_orig gen incunemp_impute = incunemp_orig keep year incunemp_impute incunemp_orig serial pernum tempfile age15_unemp save `age15_unemp' restore keep if age>=15 *Replace NIU values with zeros for individual income sources replace incunemp = 0 if incunemp==999999 | incunemp==. replace increti1 = 0 if increti1 == 999999 | increti1 == . replace increti2 = 0 if increti2 == 999999 | increti2 == . replace incret1 = 0 if incret1 == 9999999 | incret1==. replace incret2 = 0 if incret2 == 9999999 | incret2==. replace incsurv1 = 0 if incsurv1 == 99999999 | incsurv1==. replace incsurv2 = 0 if incsurv2 == 99999999 | incsurv2==. replace incpens = 0 if incpens==. replace incrann = 0 if incrann==. replace incalim = 0 if incalim==. *Verify that everything looks correct and that total earnings matches total personal earnings reported by Census *gen testtotal = incwage + incbus + incfarm + incint + incdivid + incrent + incchild + incother + incsurv1 + incsurv2 + inceduc + incasist + incssi + incdisa1 + incdisa2 + incunemp + incwkcom + incvet + incss + incwelfr + incpens + incrann + incalim + incretir *count if abs(testtotal-inctot)>1 gen laborearn = incwage + incbus + incfarm gen privinc = laborearn + incrent + incint + incdivid + incother + incretir + incalim + incsurv1 + incsurv2 + incpens + incrann *** taxform_inc IS THE INCOME NEEDED FOR MERGING SINCE MATCHING TO IRS DATA *** NOTE THAT IT EXCLUDES UNEMPLOYMENT INSURANCE (deviation from our other papers where we use taxform_inc) gen priv_plus_ss_di = privinc + incss + incdisa1 + incdisa2 replace priv_plus_ss_di = priv_plus_ss_di - incdisa1 if srcdisa1==1 // disability benefits from workers comp. are non-taxable replace priv_plus_ss_di = priv_plus_ss_di - incdisa2 if srcdisa2==1 // disability benefits from workers comp. are non-taxable gen taxform_inc = priv_plus_ss_di **gen taxform_inc = priv_plus_ss_di + incunemp *Determine spouse's income, and link together individual and spouse preserve keep taxform_inc incunemp year serial pernum rename pernum sploc rename taxform_inc sp_taxform_inc rename incunemp sp_incunemp tempfile spouse_inc save `spouse_inc' tab year restore count joinby year serial sploc using `spouse_inc', unmatched(master) count tab year _merge gen married = 0 replace married = 1 if _merge==3 replace sp_taxform_inc = 0 if sp_taxform_inc==. replace sp_incunemp = 0 if sp_incunemp==. *reminder: taxform_inc EXCLUDES unemployment income already gen tu_unemp = incunemp + sp_incunemp gen tu_totalinc = taxform_inc + sp_taxform_inc *Equal-split income gen split_tu_totalinc = tu_totalinc replace split_tu_totalinc = tu_totalinc/2 if married==1 ***** *Step 4: set up the data for imputation (identifying single centiles) local centiles = 100 set seed 19893871 gen rand1 = runiform() sort year split_tu_totalinc rand1 by year: gen double runningwgt = sum(asecwt) by year: egen double totalwgt = total(asecwt) gen inc_centile = runningwgt/totalwgt replace inc_centile = ceil(inc_centile*`centiles') drop runningwgt totalwgt *** * Step 5: MERGE IN IRS DATA gen anyui = (incunemp>0) *Merge in IRS UI data sort taxyr inc_centile joinby taxyr inc_centile using $datadir/IRS_centile_ui.dta, unmatched(master) _merge(IRS_cent_merge) *Determine how many people and recipients in each centile sort taxyr inc_centile gen temp = anyui * asecwt by taxyr inc_centile: egen double ui_n_cps_raw = total(temp) drop temp by taxyr inc_centile: egen double cps_n = total(asecwt) *Count the number of missing recipients (ui_n is recipients from IRS) and determine how many I need to impute gen missing_recipients = ui_n - ui_n_cps_raw replace missing_recipients = 0 if missing_recipients<0 gen impute_share = missing_recipients / (cps_n - ui_n_cps_raw) replace impute_share = 0 if impute_share<0 *Randomply impute recipients in each centile. anyui_impute are those who either have original UI or imputed UI set seed 1983716767 gen rand = runiform() gen anyui_impute = 0 replace anyui_impute = 1 if anyui==1 replace anyui_impute = 1 if anyui==1 *OLD CODE: simple *replace anyui_impute = 1 if anyui==0 & rand<=impute_share *NEW CODE - allows for ordering within the centile_ui_dollars_cps that can prioritize those with employment earnings gen nowork = (incwage<=0) gsort year inc_centile anyui nowork rand by year inc_centile anyui: gen double runningwgt = sum(asecwt) by year inc_centile anyui: egen double totalwgt = total(asecwt) gen impute_in_centile = runningwgt/totalwgt replace anyui_impute = 1 if impute_in_centile0 { local mean = r(mean) quietly sum ui_sd if inc_centile==`i' & anyui_impute == 1 & anyui==0 & taxyr == `yr', meanonly local sd = r(mean) drawnorm temp, means(`mean') sds(`sd') replace incunemp_impute = temp if anyui_impute == 1 & anyui==0 & inc_centile==`i' & taxyr == `yr' drop temp } } } } *Place a $100 lower bound on imputed UI replace incunemp_impute = 100 if incunemp_impute<100 & anyui_impute == 1 & anyui==0 *And replace CPS reported values with their actuals replace incunemp_impute = incunemp if anyui==1 keep year incunemp_impute anyui serial cpsid pernum asecwtcvd asecwt incunemp rename incunemp incunemp_orig save $datadir/UI_mergedata.dta, replace keep year incunemp_impute incunemp_orig serial pernum append using `age15_unemp' keep if incunemp_impute>0 save $datadir/UI_mergedata_small.dta, replace