I recently found myself with need to produce over 1000 cross-tabulations with their respective chi-square and p-value. Say, for instance, 3 sample groups X 6 themes X 4 categorical questions X 15 covariates. So I needed to create 3 excel workbook with each workbook containing 6 sheets then within each sheet I did the crostab of the 4 questions by 15 covariates.
My go to function was Stata built-in function, putexcel, but columns needed easily went beyond Z.
Borrowing from Chuck Huber's tutorial https://blog.stata.com/2017/04/06/cr...ary-variables/ I demonstrate how to integrate putexcel with excelcol for specifying column names instead of char() function. Here is the code chunk with application using nlsw88 dataset.
clear
clear matrix
* ssc install excelcol // if excelcol is not installed
sysuse nlsw88.dta, clear
label define south 0 "North" 1 "South"
label value south south
recode hours (0/40=0) (41/max=1), gen(hours_40)
label variable hours_40 "Works more than 40 hours a week"
label define hours_40 0 "40 hours or less" 1 "More than 40 hours"
label value hours_40 hours_40
xtile wage_quart = wage, nq(4)
label variable wage_quart "Wage quantile"
label define wage_quart 1"First quantile" 2"Second quantile" 3"Third quantile" 4"Fourth quantile"
label value wage_quart wage_quart
local theme1 "industry occupation" // Occupation
local theme2 "wage_quart hours_40" // Compensation
local theme3 "union" // Workers' Union
gen themelab=. /*Variable for labeling purpose only. Not to be used in analysis*/
recode themelab .=1 if _n==1
recode themelab .=2 if _n==2
recode themelab .=3 if _n==3
label define themelab 1"Occupation" 2"Compensation" 3"Workers' Union", modify
label value themelab themelab
local covariates "south race married collgrad" // Variables for grouping
label define smsalbl 0 "Non SMSA" 1 "SMSA", replace
levelsof smsa, local(smsaLevels)
local smsaValueLabel : value label smsa /*Inherit value labels from variable */
foreach s of local smsaLevels {
local i=1
while `i'<=3 {
local y = 1
foreach vary of local theme`i' { /*Outcome variable */
local x = 1
foreach varx of local covariates { /*Categorizing variable */
tabulate `varx' if !missing(`vary') & smsa == `s', matcell(rowtotals) /* For calculating rowwise percent */
tabulate `varx' `vary' if smsa == `s', chi2 matcell(cellcounts) /* The actual crosstabulation */
local RowCount = r(r)
local ColCount = r(c)
local chi = round(r(chi2), .01)
local p_value = round(r(p), .001)
levelsof `varx', local(RowLevels)
local RowValueLabel : value label `varx' /*Inherit value labels from row variable */
local RowVarLabel : variable label `varx' /*Inherit variable labels from row variable */
levelsof `vary', local(ColLevels)
local ColValueLabel : value label `vary' /*Inherit value labels from column variable */
local ColVarLabel : variable label `vary' /*Inherit variable labels from row variable */
putexcel clear
putexcel set `"`:label (themelab) `i''"', sheet(`:label (smsa) `s'') modify open /* Each workbook will be specific to a theme.
Each workbook will consist of 2 sheets each for smsa and nonsmsa.
A sheet will consist of several questions (categorized by covariates) used in defining a specific theme*/
forvalues row = 1/`RowCount' {
//Label rows
local RowValueLabelNum = word("`RowLevels'", `row')
local CellContents : label `RowValueLabel' `RowValueLabelNum' /* Rows to be consecutively labeled using specific covariate value labels */
local r = `y'
excelcol `r' /* using excelcol instead of char() to enable column extend beyond excel column Z */
local Cell = "`r(column)'" + string(`=`row' + `x' + 2')
putexcel `Cell' = "`CellContents'", right /* Value labels */
local r = `y'
excelcol `r'
local CellRange = "`r(column)'" + string(`x' + 2)
putexcel `CellRange' = "`RowVarLabel'", hcenter bold /* Variable labels */
//Chi-square
local r = 1 + `ColCount' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(`x' + 3)
putexcel `Cell' = "`chi' (`p_value')", hcenter /* Chi-square test statistic and p-value */
//Populate cells with frequency and percent
forvalues col = 1/`ColCount' {
local cellcount = cellcounts[`row',`col']
local cellpercent = string(100*`cellcount'/rowtotals[`row',1],"%9.1f") /* Calculate percent */
local CellContents = "`cellcount' (`cellpercent'%)" /* Concatenate frequency and percent */
local r = `col' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(`=`row' + `x' + 2') /* Specify cells to be filled with frequecy & percent */
putexcel `Cell' = "`CellContents'", right
//Label columns
if `row'==1 {
local ColValueLabelNum = word("`ColLevels'", `col')
local CellContents : label `ColValueLabel' `ColValueLabelNum' /* Columns to be labeled using column value labels */
local r = `col' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(2)
putexcel `Cell' = "`CellContents'", hcenter
local r = 1 + `ColCount' + `y'
excelcol `r'
local Cell = "`r(column)'" + string(2)
putexcel `Cell' = "Chi-square (p-value)", hcenter /* Naming the chisquare column */
local r = `y'
excelcol `r'
local Cell = "`r(column)'" + string(1)
putexcel `Cell' = "`ColVarLabel'", hcenter bold /* Variable labels */
}
}
}
putexcel close /* open and close option saves processing time by saving at workbook level instead of cell level */
loc x = `x' + `RowCount' + 2
}
loc y = `y' + `ColCount' + 4
}
local i=`i'+1
}
}
The result are excel workbooks like below Array
Related Posts with putexcel for multiple tables extending beyond Excel column Z
Stata speed when using i7 vs. i9 (vs. perhaps Xeon)Hello all, if I keep RAM constant at around 32GB, what sort of speed increase in Stata analysis (say…
Multinomial logistic regressionHi all, I am working with a data set (sample below) that contains, among other things, four binary v…
Predict an out-of-sample value in StataHi all, I have a data set over a period 2008-2017. I estimated a gmm model based on this time period…
Business CalendarHey everybody, I'm trying since a long while to change my daily data to business calendar data. Acco…
How to generate type III table for mixed linear models in Stata?Hello, I'm new to Stata and I'm trying to obtain a type III table for a mixed model in Stata? Any su…
Subscribe to:
Post Comments (Atom)
0 Response to putexcel for multiple tables extending beyond Excel column Z
Post a Comment