@article {2047, title = {Comparison Between Dichotomous and Polytomous Scoring of Innovative Items in a Large-Scale Computerized Adaptive Test}, journal = {Educational and Psychological Measurement}, volume = {72}, year = {2012}, pages = {493-509}, abstract = {

This study explored the impact of partial credit scoring of one type of innovative items (multiple-response items) in a computerized adaptive version of a large-scale licensure pretest and operational test settings. The impacts of partial credit scoring on the estimation of the ability parameters and classification decisions in operational test settings were explored in one real data analysis and two simulation studies when two different polytomous scoring algorithms, automated polytomous scoring and rater-generated polytomous scoring, were applied. For the real data analyses, the ability estimates from dichotomous and polytomous scoring were highly correlated; the classification consistency between different scoring algorithms was nearly perfect. Information distribution changed slightly in the operational item bank. In the two simulation studies comparing each polytomous scoring with dichotomous scoring, the ability estimates resulting from polytomous scoring had slightly higher measurement precision than those resulting from dichotomous scoring. The practical impact related to classification decision was minor because of the extremely small number of items that could be scored polytomously in this current study.

}, doi = {10.1177/0013164411422903}, author = {Jiao, H. and Liu, J. and Haynie, K. and Woo, A. and Gorham, J.} } @inbook {1954, title = {Developing item variants: An empirical study}, year = {2009}, note = {{PDF file, 194 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Large-scale standardized test have been widely used for educational and licensure testing. In computerized adaptive testing (CAT), one of the practical concerns for maintaining large-scale assessments is to ensure adequate numbers of high-quality items that are required for item pool functioning. Developing items at specific difficulty levels and for certain areas of test plans is a wellknown challenge. The purpose of this study was to investigate strategies for varying items that can effectively generate items at targeted difficulty levels and specific test plan areas. Each variant item generation model was developed by decomposing selected source items possessing ideal measurement properties and targeting the desirable content domains. 341 variant items were generated from 72 source items. Data were collected from six pretest periods. Items were calibrated using the Rasch model. Initial results indicate that variant items showed desirable measurement properties. Additionally, compared to an average of approximately 60\% of the items passing pretest criteria, an average of 84\% of the variant items passed the pretest criteria. }, author = {Wendt, A. and Kao, S. and Gorham, J. and Woo, A.} } @inbook {1822, title = {Limiting item exposure for target difficulty ranges in a high-stakes CAT}, year = {2009}, note = {MB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing. {PDF File, 1.}, author = {Li, X. and Becker, K. and Gorham, J. and Woo, A.} }