I recently found myself with need to produce over 1000 cross-tabulations with their respective chi-square and p-value. Say, for instance, 3 sample groups X 6 themes X 4 categorical questions X 15 covariates. So I needed to create 3 excel workbook with each workbook containing 6 sheets then within each sheet I did the crostab of the 4 questions by 15 covariates.
My go to function was Stata built-in function, putexcel, but columns needed easily went beyond Z.
Borrowing from Chuck Huber's tutorial https://blog.stata.com/2017/04/06/cr...ary-variables/ I demonstrate how to integrate putexcel with excelcol for specifying column names instead of char() function. Here is the code chunk with application using nlsw88 dataset.
clear
clear matrix
* ssc install excelcol // if excelcol is not installed
sysuse nlsw88.dta, clear
label define south 0 "North" 1 "South"
label value south south
recode hours (0/40=0) (41/max=1), gen(hours_40)
label variable hours_40 "Works more than 40 hours a week"
label define hours_40 0 "40 hours or less" 1 "More than 40 hours"
label value hours_40 hours_40
xtile wage_quart = wage, nq(4)
label variable wage_quart "Wage quantile"
label define wage_quart 1"First quantile" 2"Second quantile" 3"Third quantile" 4"Fourth quantile"
label value wage_quart wage_quart
local theme1 "industry occupation" // Occupation
local theme2 "wage_quart hours_40" // Compensation
local theme3 "union" // Workers' Union
gen themelab=. /*Variable for labeling purpose only. Not to be used in analysis*/
recode themelab .=1 if _n==1
recode themelab .=2 if _n==2
recode themelab .=3 if _n==3
label define themelab 1"Occupation" 2"Compensation" 3"Workers' Union", modify
label value themelab themelab
local covariates "south race married collgrad" // Variables for grouping
label define smsalbl 0 "Non SMSA" 1 "SMSA", replace
levelsof smsa, local(smsaLevels)
local smsaValueLabel : value label smsa /*Inherit value labels from variable */
foreach s of local smsaLevels {
local i=1
while `i'<=3 {
local y = 1
foreach vary of local theme`i' { /*Outcome variable */
local x = 1
foreach varx of local covariates { /*Categorizing variable */
tabulate `varx' if !missing(`vary') & smsa == `s', matcell(rowtotals) /* For calculating rowwise percent */
tabulate `varx' `vary' if smsa == `s', chi2 matcell(cellcounts) /* The actual crosstabulation */
local RowCount = r(r)
local ColCount = r(c)
local chi = round(r(chi2), .01)
local p_value = round(r(p), .001)
levelsof `varx', local(RowLevels)
local RowValueLabel : value label `varx' /*Inherit value labels from row variable */
local RowVarLabel : variable label `varx' /*Inherit variable labels from row variable */
levelsof `vary', local(ColLevels)
local ColValueLabel : value label `vary' /*Inherit value labels from column variable */
local ColVarLabel : variable label `vary' /*Inherit variable labels from row variable */
putexcel clear
putexcel set `"`:label (themelab) `i''"', sheet(`:label (smsa) `s'') modify open /* Each workbook will be specific to a theme.
Each workbook will consist of 2 sheets each for smsa and nonsmsa.
A sheet will consist of several questions (categorized by covariates) used in defining a specific theme*/
forvalues row = 1/`RowCount' {
//Label rows
local RowValueLabelNum = word("`RowLevels'", `row')
local CellContents : label `RowValueLabel' `RowValueLabelNum' /* Rows to be consecutively labeled using specific covariate value labels */
local r = `y'
excelcol `r' /* using excelcol instead of char() to enable column extend beyond excel column Z */
local Cell = "`r(column)'" + string(`=`row' + `x' + 2')
putexcel `Cell' = "`CellContents'", right /* Value labels */
local r = `y'
excelcol `r'
local CellRange = "`r(column)'" + string(`x' + 2)
putexcel `CellRange' = "`RowVarLabel'", hcenter bold /* Variable labels */
//Chi-square
local r = 1 + `ColCount' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(`x' + 3)
putexcel `Cell' = "`chi' (`p_value')", hcenter /* Chi-square test statistic and p-value */
//Populate cells with frequency and percent
forvalues col = 1/`ColCount' {
local cellcount = cellcounts[`row',`col']
local cellpercent = string(100*`cellcount'/rowtotals[`row',1],"%9.1f") /* Calculate percent */
local CellContents = "`cellcount' (`cellpercent'%)" /* Concatenate frequency and percent */
local r = `col' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(`=`row' + `x' + 2') /* Specify cells to be filled with frequecy & percent */
putexcel `Cell' = "`CellContents'", right
//Label columns
if `row'==1 {
local ColValueLabelNum = word("`ColLevels'", `col')
local CellContents : label `ColValueLabel' `ColValueLabelNum' /* Columns to be labeled using column value labels */
local r = `col' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(2)
putexcel `Cell' = "`CellContents'", hcenter
local r = 1 + `ColCount' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(2)
putexcel `Cell' = "Chi-square (p-value)", hcenter /* Naming the chisquare column */
local r = `y'
excelcol `r'
local Cell = "`r(column)'" + string(1)
putexcel `Cell' = "`ColVarLabel'", hcenter bold /* Variable labels */
}
}
}
putexcel close /* open and close option saves processing time by saving at workbook level instead of cell level */
loc x = `x' + `RowCount' + 2
}
loc y = `y' + `ColCount' + 4
}
local i=`i'+1
}
}
The result are excel workbooks like below Array
Related Posts with putexcel for multiple tables extending beyond Excel column Z
Generated regressor in interaction termHi everyone Short form of question Can you use standard corrections for generated regressors when t…
Dummy variable value dependent on any of the 36 values in a string variableI am a beginner in Stata (ver 16) and need help regarding dummy variable. There is a dummy variable…
Error using esrefline in meta forestplot in Stata 16SEHi I get an error "option esrefline not allowed" "r(198)" using meta forestplot. I have Stata v16SE.…
Duncan Segregation Index with Aggregate DataI'd like to calculate the duncan index of segregation, for occupational gender segregation. In addit…
Manipulating with string variablesHi all, I am using STATA 15.1 and I have a new problem and it is a challenge, at least for me if I …
Subscribe to:
Post Comments (Atom)
0 Response to putexcel for multiple tables extending beyond Excel column Z
Post a Comment