@article {2697, title = {A Comparison of Constraint Programming and Mixed-Integer Programming for Automated Test-Form Generation}, journal = {Journal of Educational Measurement}, volume = {55}, number = {4}, year = {2018}, pages = {435-456}, abstract = {Abstract The final step of the typical process of developing educational and psychological tests is to place the selected test items in a formatted form. The step involves the grouping and ordering of the items to meet a variety of formatting constraints. As this activity tends to be time-intensive, the use of mixed-integer programming (MIP) has been proposed to automate it. The goal of this article is to show how constraint programming (CP) can be used as an alternative to automate test-form generation problems with a large variety of formatting constraints, and how it compares with MIP-based form generation as for its models, solutions, and running times. Two empirical examples are presented: (i) automated generation of a computerized fixed-form; and (ii) automated generation of shadow tests for multistage testing. Both examples show that CP works well with feasible solutions and running times likely to be better than that for MIP-based applications.}, doi = {10.1111/jedm.12187}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12187}, author = {Li, Jie and van der Linden, Wim J.} } @article {2504, title = {Optimal Reassembly of Shadow Tests in CAT}, journal = {Applied Psychological Measurement}, volume = {40}, number = {7}, year = {2016}, pages = {469-485}, abstract = {Even in the age of abundant and fast computing resources, concurrency requirements for large-scale online testing programs still put an uninterrupted delivery of computer-adaptive tests at risk. In this study, to increase the concurrency for operational programs that use the shadow-test approach to adaptive testing, we explored various strategies aiming for reducing the number of reassembled shadow tests without compromising the measurement quality. Strategies requiring fixed intervals between reassemblies, a certain minimal change in the interim ability estimate since the last assembly before triggering a reassembly, and a hybrid of the two strategies yielded substantial reductions in the number of reassemblies without degradation in the measurement accuracy. The strategies effectively prevented unnecessary reassemblies due to adapting to the noise in the early test stages. They also highlighted the practicality of the shadow-test approach by minimizing the computational load involved in its use of mixed-integer programming.}, doi = {10.1177/0146621616654597}, url = {http://apm.sagepub.com/content/40/7/469.abstract}, author = {Choi, Seung W. and Moellering, Karin T. and Li, Jie and van der Linden, Wim J.} } @article {2296, title = {Integrating Test-Form Formatting Into Automated Test Assembly}, journal = {Applied Psychological Measurement}, volume = {37}, number = {5}, year = {2013}, pages = {361-374}, abstract = {

Automated test assembly uses the methodology of mixed integer programming to select an optimal set of items from an item bank. Automated test-form generation uses the same methodology to optimally order the items and format the test form. From an optimization point of view, production of fully formatted test forms directly from the item pool using a simultaneous optimization model is more attractive than any of the current, more time-consuming two-stage processes. The goal of this study was to provide such simultaneous models both for computer-delivered and paper forms, as well as explore their performances relative to two-stage optimization. Empirical examples are presented to show that it is possible to automatically produce fully formatted optimal test forms directly from item pools up to some 2,000 items on a regular PC in realistic times.

}, doi = {10.1177/0146621613476157}, url = {http://apm.sagepub.com/content/37/5/361.abstract}, author = {Diao, Qi and van der Linden, Wim J.} } @article {2313, title = {Speededness and Adaptive Testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {38}, number = {4}, year = {2013}, pages = {418-438}, abstract = {

Two simple constraints on the item parameters in a response\–time model are proposed to control the speededness of an adaptive test. As the constraints are additive, they can easily be included in the constraint set for a shadow-test approach (STA) to adaptive testing. Alternatively, a simple heuristic is presented to control speededness in plain adaptive testing without any constraints. Both types of control are easy to implement and do not require any other real-time parameter estimation during the test than the regular update of the test taker\’s ability estimate. Evaluation of the two approaches using simulated adaptive testing showed that the STA was especially effective. It guaranteed testing times that differed less than 10 seconds from a reference test across a variety of conditions.

}, doi = {10.3102/1076998612466143}, url = {http://jeb.sagepub.com/cgi/content/abstract/38/4/418}, author = {van der Linden, Wim J. and Xiong, Xinhui} } @article {2153, title = {Predictive Control of Speededness in Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {33}, number = {1}, year = {2009}, pages = {25-41}, abstract = {

An adaptive testing method is presented that controls the speededness of a test using predictions of the test takers\&$\#$39; response times on the candidate items in the pool. Two different types of predictions are investigated: posterior predictions given the actual response times on the items already administered and posterior predictions that use the responses on these items as an additional source of information. In a simulation study with an adaptive test modeled after a test from the Armed Services Vocational Aptitude Battery, the effectiveness of the methods in removing differential speededness from the test was evaluated.

}, doi = {10.1177/0146621607314042}, url = {http://apm.sagepub.com/content/33/1/25.abstract}, author = {van der Linden, Wim J.} } @article {2234, title = {Implementing Sympson-Hetter Item-Exposure Control in a Shadow-Test Approach to Constrained Adaptive Testing}, journal = {International Journal of Testing}, volume = {8}, number = {3}, year = {2008}, pages = {272-289}, doi = {10.1080/15305050802262233}, url = {http://www.tandfonline.com/doi/abs/10.1080/15305050802262233}, author = {Veldkamp, Bernard P. and van der Linden, Wim J.} } @article {2241, title = {Conditional Item-Exposure Control in Adaptive Testing Using Item-Ineligibility Probabilities}, journal = {Journal of Educational and Behavioral Statistics}, volume = {32}, number = {4}, year = {2007}, pages = {398-418}, abstract = {

Two conditional versions of the exposure-control method with item-ineligibility constraints for adaptive testing in van der Linden and Veldkamp (2004) are presented. The first version is for unconstrained item selection, the second for item selection with content constraints imposed by the shadow-test approach. In both versions, the exposure rates of the items are controlled using probabilities of item ineligibility given \θ that adapt the exposure rates automatically to a goal value for the items in the pool. In an extensive empirical study with an adaptive version of the Law School Admission Test, the authors show how the method can be used to drive conditional exposure rates below goal values as low as 0.025. Obviously, the price to be paid for minimal exposure rates is a decrease in the accuracy of the ability estimates. This trend is illustrated with empirical data.

}, doi = {10.3102/1076998606298044}, url = {http://jeb.sagepub.com/cgi/content/abstract/32/4/398}, author = {van der Linden, Wim J. and Veldkamp, Bernard P.} } @article {2199, title = {Detecting Differential Speededness in Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {44}, number = {2}, year = {2007}, pages = {117{\textendash}130}, abstract = {

A potential undesirable effect of multistage testing is differential speededness, which happens if some of the test takers run out of time because they receive subtests with items that are more time intensive than others. This article shows how a probabilistic response-time model can be used for estimating differences in time intensities and speed between subtests and test takers and detecting differential speededness. An empirical data set for a multistage test in the computerized CPA Exam was used to demonstrate the procedures. Although the more difficult subtests appeared to have items that were more time intensive than the easier subtests, an analysis of the residual response times did not reveal any significant differential speededness because the time limit appeared to be appropriate. In a separate analysis, within each of the subtests, we found minor but consistent patterns of residual times that are believed to be due to a warm-up effect, that is, use of more time on the initial items than they actually need.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2007.00030.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2007.00030.x}, author = {van der Linden, Wim J. and Breithaupt, Krista and Chuah, Siang Chee and Zhang, Yanwei} } @article {2237, title = {Constraining Item Exposure in Computerized Adaptive Testing With Shadow Tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {3}, year = {2004}, pages = {273-291}, abstract = {

Item-exposure control in computerized adaptive testing is implemented by imposing item-ineligibility constraints on the assembly process of the shadow tests. The method resembles Sympson and Hetter\’s (1985) method of item-exposure control in that the decisions to impose the constraints are probabilistic. The method does not, however, require time-consuming simulation studies to set values for control parameters before the operational use of the test. Instead, it can set the probabilities of item ineligibility adaptively during the test using the actual item-exposure rates. An empirical study using an item pool from the Law School Admission Test showed that application of the method yielded perfect control of the item-exposure rates and had negligible impact on the bias and mean-squared error functions of the ability estimator.

}, doi = {10.3102/10769986029003273}, url = {http://jeb.sagepub.com/cgi/content/abstract/29/3/273}, author = {van der Linden, Wim J. and Veldkamp, Bernard P.} }