@article {2753, title = {Expanding the Meaning of Adaptive Testing to Enhance Validity}, journal = {Journal of Computerized Adaptive Testing}, volume = {10}, year = {2023}, pages = {22-31}, keywords = {Adaptive Testing, CAT, CBT, test-taking disengagement, validity}, doi = {10.7333/2305-1002022}, author = {Steven L. Wise} } @article {2752, title = {An Extended Taxonomy of Variants of Computerized Adaptive Testing}, journal = {Journal of Computerized Adaptive Testing}, volume = {10}, year = {2023}, keywords = {Adaptive Testing, evidence-centered design, Item Response Theory, knowledge-based model construction, missingness}, issn = {2165-6592}, doi = {10.7333/2302-100101}, author = {Roy Levy and John T. Behrens and Robert J. Mislevy} } @article {2737, title = {Efficiency of Targeted Multistage Calibration Designs Under Practical Constraints: A Simulation Study}, journal = {Journal of Educational Measurement}, volume = {56}, number = {1}, year = {2019}, pages = {121-146}, abstract = {Abstract Calibration of an item bank for computer adaptive testing requires substantial resources. In this study, we investigated whether the efficiency of calibration under the Rasch model could be enhanced by improving the match between item difficulty and student ability. We introduced targeted multistage calibration designs, a design type that considers ability-related background variables and performance for assigning students to suitable items. Furthermore, we investigated whether uncertainty about item difficulty could impair the assembling of efficient designs. The results indicated that targeted multistage calibration designs were more efficient than ordinary targeted designs under optimal conditions. Limited knowledge about item difficulty reduced the efficiency of one of the two investigated targeted multistage calibration designs, whereas targeted designs were more robust.}, doi = {10.1111/jedm.12203}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12203}, author = {Berger, St{\'e}phanie and Verschoor, Angela J. and Eggen, Theo J. H. M. and Moser, Urs} } @article {2698, title = {Evaluation of a New Method for Providing Full Review Opportunities in Computerized Adaptive Testing{\textemdash}Computerized Adaptive Testing With Salt}, journal = {Journal of Educational Measurement}, volume = {55}, number = {4}, year = {2018}, pages = {582-594}, abstract = {Abstract Allowing item review in computerized adaptive testing (CAT) is getting more attention in the educational measurement field as more and more testing programs adopt CAT. The research literature has shown that allowing item review in an educational test could result in more accurate estimates of examinees{\textquoteright} abilities. The practice of item review in CAT, however, is hindered by the potential danger of test-manipulation strategies. To provide review opportunities to examinees while minimizing the effect of test-manipulation strategies, researchers have proposed different algorithms to implement CAT with restricted revision options. In this article, we propose and evaluate a new method that implements CAT without any restriction on item review. In particular, we evaluate the new method in terms of the accuracy on ability estimates and the robustness against test-manipulation strategies. This study shows that the newly proposed method is promising in a win-win situation: examinees have full freedom to review and change answers, and the impacts of test-manipulation strategies are undermined.}, doi = {10.1111/jedm.12193}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12193}, author = {Cui, Zhongmin and Liu, Chunyan and He, Yong and Chen, Hanwei} } @conference {2656, title = {Efficiency of Item Selection in CD-CAT Based on Conjunctive Bayesian Network Modeling Hierarchical attributes}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Cognitive diagnosis models (CDM) aim to diagnosis examinee\’s mastery status of multiple fine-grained skills. As new development for cognitive diagnosis methods emerges, much attention is given to cognitive diagnostic computerized adaptive testing (CD-CAT) as well. The topics such as item selection methods, item exposure control strategies, and online calibration methods, which have been wellstudied for traditional item response theory (IRT) based CAT, are also investigated in the context of CD-CAT (e.g., Xu, Chang, \& Douglas, 2003; Wang, Chang, \& Huebner, 2011; Chen et al., 2012).

In CDM framework, some researchers suggest to model structural relationship between cognitive skills, or namely, attributes. Especially, attributes can be hierarchical, such that some attributes must be acquired before the subsequent ones are mastered. For example, in mathematics, addition must be mastered before multiplication, which gives a hierarchy model for addition skill and multiplication skill. Recently, new CDMs considering attribute hierarchies have been suggested including the Attribute Hierarchy Method (AHM; Leighton, Gierl, \& Hunka, 2004) and the Hierarchical Diagnostic Classification Models (HDCM; Templin \& Bradshaw, 2014).

Bayesian Networks (BN), the probabilistic graphical models representing the relationship of a set of random variables using a directed acyclic graph with conditional probability distributions, also provide an efficient framework for modeling the relationship between attributes (Culbertson, 2016). Among various BNs, conjunctive Bayesian network (CBN; Beerenwinkel, Eriksson, \& Sturmfels, 2007) is a special kind of BN, which assumes partial ordering between occurrences of events and conjunctive constraints between them.

In this study, we propose using CBN for modeling attribute hierarchies and discuss the advantage of CBN for CDM. We then explore the impact of the CBN modeling on the efficiency of item selection methods for CD-CAT when the attributes are truly hierarchical. To this end, two simulation studies, one for fixed-length CAT and another for variable-length CAT, are conducted. For each studies, two attribute hierarchy structures with 5 and 8 attributes are assumed. Among the various item selection methods developed for CD-CAT, six algorithms are considered: posterior-weighted Kullback-Leibler index (PWKL; Cheng, 2009), the modified PWKL index (MPWKL; Kaplan, de la Torre, Barrada, 2015), Shannon entropy (SHE; Tatsuoka, 2002), mutual information (MI; Wang, 2013), posterior-weighted CDM discrimination index (PWCDI; Zheng \& Chang, 2016) and posterior-weighted attribute-level CDM discrimination index (PWACDI; Zheng \& Chang, 2016). The impact of Q-matrix structure, item quality, and test termination rules on the efficiency of item selection algorithms is also investigated. Evaluation measures include the attribute classification accuracy (fixed-length experiment) and test length of CDCAT until stopping (variable-length experiment).

The results of the study indicate that the efficiency of item selection is improved by directly modeling the attribute hierarchies using CBN. The test length until achieving diagnosis probability threshold was reduced to 50-70\% for CBN based CAT compared to the CD-CAT assuming independence of attributes. The magnitude of improvement is greater when the cognitive model of the test includes more attributes and when the test length is shorter. We conclude by discussing how Q-matrix structure, item quality, and test termination rules affect the efficiency.

References

Beerenwinkel, N., Eriksson, N., \& Sturmfels, B. (2007). Conjunctive bayesian networks. Bernoulli, 893- 909.

Chen, P., Xin, T., Wang, C., \& Chang, H. H. (2012). Online calibration methods for the DINA model with independent attributes in CD-CAT. Psychometrika, 77(2), 201-222.

Cheng, Y. (2009). When cognitive diagnosis meets computerized adaptive testing: CD-CAT. Psychometrika, 74(4), 619-632.

Culbertson, M. J. (2016). Bayesian networks in educational assessment: the state of the field. Applied Psychological Measurement, 40(1), 3-21.

Kaplan, M., de la Torre, J., \& Barrada, J. R. (2015). New item selection methods for cognitive diagnosis computerized adaptive testing. Applied Psychological Measurement, 39(3), 167-188.

Leighton, J. P., Gierl, M. J., \& Hunka, S. M. (2004). The attribute hierarchy method for cognitive assessment: a variation on Tatsuoka\&$\#$39;s rule-space approach. Journal of Educational Measurement, 41(3), 205-237.

Tatsuoka, C. (2002). Data analytic methods for latent partially ordered classification models. Journal of the Royal Statistical Society: Series C (Applied Statistics), 51(3), 337-350.

Templin, J., \& Bradshaw, L. (2014). Hierarchical diagnostic classification models: A family of models for estimating and testing attribute hierarchies. Psychometrika, 79(2), 317-339. Wang, C. (2013). Mutual information item selection method in cognitive diagnostic computerized adaptive testing with short test length. Educational and Psychological Measurement, 73(6), 1017-1035.

Wang, C., Chang, H. H., \& Huebner, A. (2011). Restrictive stochastic item selection methods in cognitive diagnostic computerized adaptive testing. Journal of Educational Measurement, 48(3), 255-273.

Xu, X., Chang, H., \& Douglas, J. (2003, April). A simulation study to compare CAT strategies for cognitive diagnosis. Paper presented at the annual meeting of National Council on Measurement in Education, Chicago.

Zheng, C., \& Chang, H. H. (2016). High-efficiency response distribution\–based item selection algorithms for short-length cognitive diagnostic computerized adaptive testing. Applied Psychological Measurement, 40(8), 608-624.

Session Video

}, keywords = {CD-CAT, Conjuctive Bayesian Network Modeling, item selection}, url = {https://drive.google.com/open?id=1RbO2gd4aULqsSgRi_VZudNN_edX82NeD}, author = {Soo-Yun Han and Yun Joo Yoo} } @conference {2652, title = {Efficiency of Targeted Multistage Calibration Designs under Practical Constraints: A Simulation Study}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Calibration of an item bank for computer adaptive testing requires substantial resources. In this study, we focused on two related research questions. First, we investigated whether the efficiency of item calibration under the Rasch model could be enhanced by calibration designs that optimize the match between item difficulty and student ability (Berger, 1991). Therefore, we introduced targeted multistage calibration designs, a design type that refers to a combination of traditional targeted calibration designs and multistage designs. As such, targeted multistage calibration designs consider ability-related background variables (e.g., grade in school), as well as performance (i.e., outcome of a preceding test stage) for assigning students to suitable items.

Second, we explored how limited a priori knowledge about item difficulty affects the efficiency of both targeted calibration designs and targeted multistage calibration designs. When arranging items within a given calibration design, test developers need to know the item difficulties to locate items optimally within the design. However, usually, no empirical information about item difficulty is available before item calibration. Owing to missing empirical data, test developers might fail to assign all items to the most suitable location within a calibration design.

Both research questions were addressed in a simulation study in which we varied the calibration design, as well as the accuracy of item distribution across the different booklets or modules within each design (i.e., number of misplaced items). The results indicated that targeted multistage calibration designs were more efficient than ordinary targeted designs under optimal conditions. Especially, targeted multistage calibration designs provided more accurate estimates for very easy and 52 IACAT 2017 ABSTRACTS BOOKLET very difficult items. Limited knowledge about item difficulty during test construction impaired the efficiency of all designs. The loss of efficiency was considerably large for one of the two investigated targeted multistage calibration designs, whereas targeted designs were more robust.

References

Berger, M. P. F. (1991). On the efficiency of IRT models when applied to different sampling designs. Applied Psychological Measurement, 15(3), 293\–306. doi:10.1177/014662169101500310

Session Video

}, keywords = {CAT, Efficiency, Multistage Calibration}, url = {https://drive.google.com/file/d/1ko2LuiARKqsjL_6aupO4Pj9zgk6p_xhd/view?usp=sharing}, author = {Stephanie Berger and Angela J. Verschoor and Theo Eggen and Urs Moser} } @conference {2671, title = {An Empirical Simulation Study Using mstR for MST Designs}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Unlike other systems of adaptive testing, multistage testing (MST) provides many benefits of adaptive testing and linear testing, and has become the most sought-after form for computerized testing in educational assessment recently. It is greatly fit for testing educational achievement and can be adapted to practical educational surveys testing. However, there are many practical considerations for MST design for operational implementations including costs and benefits. As a practitioner, we need to start with various simulations to evaluate the various MST designs and their performances before the implementations. A recently developed statistical tool mstR, an open source R package, was released to support the researchers and practitioners to aid their MST simulations for implementations.

Conventional MST design has three stages of module (i.e., 1-2-3 design) structure. Alternatively, the composition of modules diverges from one design to another (e.g., 1-3 design). For advance planning of equivalence studies, this paper utilizes both 1-2-3 design and 1-3 design for the MST structures. In order to study the broad structure of these values, this paper evaluates the different MST designs through simulations using the R package mstR. The empirical simulation study provides an introductory overview of mstR and describes what mstR offers using different MST structures from 2PL item bank. Further comparisons will show the advantages of the different MST designs (e.g., 1-2-3 design and 1-3 design) for different practical implementations.

As an open-source statistical environment R, mstR provides a great simulation tool and allows psychologists, social scientists, and educational measurement scientists to apply it to innovative future assessments in the operational use of MST.

}, keywords = {mstR, multistage testing}, author = {Soo Lee} } @conference {2635, title = {Evaluation of Parameter Recovery, Drift, and DIF with CAT Data}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Parameter drift and differential item functioning (DIF) analyses are frequent components of a test maintenance plan. That is, after a test form(s) is published, organizations will often calibrate postpublishing data at a later date to evaluate whether the performance of the items or the test has changed over time. For example, if item content is leaked, the items might gradually become easier over time, and item statistics or parameters can reflect this.

When tests are published under a computerized adaptive testing (CAT) paradigm, they are nearly always calibrated with item response theory (IRT). IRT calibrations assume that range restriction is not an issue \– that is, each item is administered to a range of examinee ability. CAT data violates this assumption. However, some organizations still wish to evaluate continuing performance of the items from a DIF or drift paradigm.

This presentation will evaluate just how inaccurate DIF and drift analyses might be on CAT data, using a Monte Carlo parameter recovery methodology. Known item parameters will be used to generate both linear and CAT data sets, which are then calibrated for DIF and drift. In addition, we will implement Randomesque item exposure constraints in some CAT conditions, as this randomization directly alleviates the range restriction problem somewhat, but it is an empirical question as to whether this improves the parameter recovery calibrations.

Session Video

}, keywords = {CAT, DIF, Parameter Drift, Parameter Recovery}, url = {https://drive.google.com/open?id=1F7HCZWD28Q97sCKFIJB0Yps0H66NPeKq}, author = {Nathan Thompson and Jordan Stoeger} } @article {2618, title = {On the effect of adding clinical samples to validation studies of patient-reported outcome item banks: a simulation study}, journal = {Quality of Life Research}, volume = {25}, number = {7}, year = {2016}, pages = {1635{\textendash}1644}, abstract = {To increase the precision of estimated item parameters of item response theory models for patient-reported outcomes, general population samples are often enriched with samples of clinical respondents. Calibration studies provide little information on how this sampling scheme is incorporated into model estimation. In a small simulation study the impact of ignoring the oversampling of clinical respondents on item and person parameters is illustrated.}, issn = {1573-2649}, doi = {10.1007/s11136-015-1199-9}, url = {https://doi.org/10.1007/s11136-015-1199-9}, author = {Smits, Niels} } @article {2491, title = {Effect of Imprecise Parameter Estimation on Ability Estimation in a Multistage Test in an Automatic Item Generation Context }, journal = {Journal of Computerized Adaptive Testing}, volume = {4}, year = {2016}, pages = {1-18}, keywords = {Adaptive Testing, automatic item generation, errors in item parameters, item clones, multistage testing}, issn = {2165-6592 }, doi = {10.7333/1608-040101}, url = {http://iacat.org/jcat/index.php/jcat/article/view/59/27}, author = {Colvin, Kimberly and Keller, Lisa A and Robin, Frederic} } @article {2507, title = {Exploration of Item Selection in Dual-Purpose Cognitive Diagnostic Computerized Adaptive Testing: Based on the RRUM}, journal = {Applied Psychological Measurement}, volume = {40}, number = {8}, year = {2016}, pages = {625-640}, abstract = {Cognitive diagnostic computerized adaptive testing (CD-CAT) can be divided into two broad categories: (a) single-purpose tests, which are based on the subject{\textquoteright}s knowledge state (KS) alone, and (b) dual-purpose tests, which are based on both the subject{\textquoteright}s KS and traditional ability level ( ). This article seeks to identify the most efficient item selection method for the latter type of CD-CAT corresponding to various conditions and various evaluation criteria, respectively, based on the reduced reparameterized unified model (RRUM) and the two-parameter logistic model of item response theory (IRT-2PLM). The Shannon entropy (SHE) and Fisher information methods were combined to produce a new synthetic item selection index, that is, the {\textquotedblleft}dapperness with information (DWI){\textquotedblright} index, which concurrently considers both KS and within one step. The new method was compared with four other methods. The results showed that, in most conditions, the new method exhibited the best performance in terms of KS estimation and the second-best performance in terms of estimation. Item utilization uniformity and computing time are also considered for all the competing methods.}, doi = {10.1177/0146621616666008}, url = {http://apm.sagepub.com/content/40/8/625.abstract}, author = {Dai, Buyun and Zhang, Minqiang and Li, Guangming} } @article {2453, title = {The Effect of Upper and Lower Asymptotes of IRT Models on Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {39}, number = {7}, year = {2015}, pages = {551-565}, abstract = {In this article, the effect of the upper and lower asymptotes in item response theory models on computerized adaptive testing is shown analytically. This is done by deriving the step size between adjacent latent trait estimates under the four-parameter logistic model (4PLM) and two models it subsumes, the usual three-parameter logistic model (3PLM) and the 3PLM with upper asymptote (3PLMU). The authors show analytically that the large effect of the discrimination parameter on the step size holds true for the 4PLM and the two models it subsumes under both the maximum information method and the b-matching method for item selection. Furthermore, the lower asymptote helps reduce the positive bias of ability estimates associated with early guessing, and the upper asymptote helps reduce the negative bias induced by early slipping. Relative step size between modeling versus not modeling the upper or lower asymptote under the maximum Fisher information method (MI) and the b-matching method is also derived. It is also shown analytically why the gain from early guessing is smaller than the loss from early slipping when the lower asymptote is modeled, and vice versa when the upper asymptote is modeled. The benefit to loss ratio is quantified under both the MI and the b-matching method. Implications of the analytical results are discussed.}, doi = {10.1177/0146621615585850}, url = {http://apm.sagepub.com/content/39/7/551.abstract}, author = {Cheng, Ying and Liu, Cheng} } @article {2484, title = {Evaluating Content Alignment in Computerized Adaptive Testing}, journal = {Educational Measurement: Issues and Practice}, volume = {34}, number = {41-48}, year = {2015}, abstract = {The alignment between a test and the content domain it measures represents key evidence for the validation of test score inferences. Although procedures have been developed for evaluating the content alignment of linear tests, these procedures are not readily applicable to computerized adaptive tests (CATs), which require large item pools and do not use fixed test forms. This article describes the decisions made in the development of CATs that influence and might threaten content alignment. It outlines a process for evaluating alignment that is sensitive to these threats and gives an empirical example of the process.}, doi = {http://dx.doi.org/10.1111/emip.12094}, author = {Wise, S. L. and Kingsbury, G. G. and Webb, N. L.} } @article {2349, title = {An Enhanced Approach to Combine Item Response Theory With Cognitive Diagnosis in Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {51}, number = {4}, year = {2014}, pages = {358{\textendash}380}, abstract = {

Computerized adaptive testing offers the possibility of gaining information on both the overall ability and cognitive profile in a single assessment administration. Some algorithms aiming for these dual purposes have been proposed, including the shadow test approach, the dual information method (DIM), and the constraint weighted method. The current study proposed two new methods, aggregate ranked information index (ARI) and aggregate standardized information index (ASI), which appropriately addressed the noncompatibility issue inherent in the original DIM method. More flexible weighting schemes that put different emphasis on information about general ability (i.e., \θ in item response theory) and information about cognitive profile (i.e., \α in cognitive diagnostic modeling) were also explored. Two simulation studies were carried out to investigate the effectiveness of the new methods and weighting schemes. Results showed that the new methods with the flexible weighting schemes could produce more accurate estimation of both overall ability and cognitive profile than the original DIM. Among them, the ASI with both empirical and theoretical weights is recommended, and attribute-level weighting scheme is preferred if some attributes are considered more important from a substantive perspective.

}, issn = {1745-3984}, doi = {10.1111/jedm.12057}, url = {http://dx.doi.org/10.1111/jedm.12057}, author = {Wang, Chun and Zheng, Chanjin and Chang, Hua-Hua} } @article {2332, title = {Enhancing Pool Utilization in Constructing the Multistage Test Using Mixed-Format Tests}, journal = {Applied Psychological Measurement}, volume = {38}, number = {4}, year = {2014}, pages = {268-280}, abstract = {

This study investigated a new pool utilization method of constructing multistage tests (MST) using the mixed-format test based on the generalized partial credit model (GPCM). MST simulations of a classification test were performed to evaluate the MST design. A linear programming (LP) model was applied to perform MST reassemblies based on the initial MST construction. Three subsequent MST reassemblies were performed. For each reassembly, three test unit replacement ratios (TRRs; 0.22, 0.44, and 0.66) were investigated. The conditions of the three passing rates (30\%, 50\%, and 70\%) were also considered in the classification testing. The results demonstrated that various MST reassembly conditions increased the overall pool utilization rates, while maintaining the desired MST construction. All MST testing conditions performed equally well in terms of the precision of the classification decision.

}, doi = {10.1177/0146621613515545}, url = {http://apm.sagepub.com/content/38/4/268.abstract}, author = {Park, Ryoungsun and Kim, Jiseon and Chung, Hyewon and Dodd, Barbara G.} } @article {2316, title = {Estimating Measurement Precision in Reduced-Length Multi-Stage Adaptive Testing }, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2013}, pages = {67-87}, doi = {10.7333/1309-0104067}, author = {Crotts, K.M. and Zenisky, A. L. and Sireci, S.G. and Li, X.} } @article {2249, title = {An Efficiency Balanced Information Criterion for Item Selection in Computerized Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {49}, number = {3}, year = {2012}, pages = {225{\textendash}246}, abstract = {

Successful administration of computerized adaptive testing (CAT) programs in educational settings requires that test security and item exposure control issues be taken seriously. Developing an item selection algorithm that strikes the right balance between test precision and level of item pool utilization is the key to successful implementation and long-term quality control of CAT. This study proposed a new item selection method using the \“efficiency balanced information\” criterion to address issues with the maximum Fisher information method and stratification methods. According to the simulation results, the new efficiency balanced information method had desirable advantages over the other studied item selection methods in terms of improving the optimality of CAT assembly and utilizing items with low a-values while eliminating the need for item pool stratification.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2012.00173.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2012.00173.x}, author = {Han, Kyung T.} } @article {2166, title = {An Empirical Evaluation of the Slip Correction in the Four Parameter Logistic Models With Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {36}, number = {2}, year = {2012}, pages = {75-87}, abstract = {

In a selected response test, aberrant responses such as careless errors and lucky guesses might cause error in ability estimation because these responses do not actually reflect the knowledge that examinees possess. In a computerized adaptive test (CAT), these aberrant responses could further cause serious estimation error due to dynamic item administration. To enhance the robust performance of CAT against aberrant responses, Barton and Lord proposed the four-parameter logistic (4PL) item response theory (IRT) model. However, most studies relevant to the 4PL IRT model were conducted based on simulation experiments. This study attempts to investigate the performance of the 4PL IRT model as a slip-correction mechanism with an empirical experiment. The results showed that the 4PL IRT model could not only reduce the problematic underestimation of the examinees\’ ability introduced by careless mistakes in practical situations but also improve measurement efficiency.

}, doi = {10.1177/0146621611432862}, url = {http://apm.sagepub.com/content/36/2/75.abstract}, author = {Yen, Yung-Chin and Ho, Rong-Guey and Laio, Wen-Wei and Chen, Li-Ju and Kuo, Ching-Chin} } @mastersthesis {1997, title = {Effects of Different Computerized Adaptive Testing Strategies of Recovery of Ability}, volume = {Ph.D.}, year = {2011}, abstract = {

The purpose of the present study is to compare ability estimations obtained from computerized adaptive testing (CAT) procedure with the paper and pencil test administration results of Student Selection Examination (SSE) science subtest considering different ability estimation methods and test termination rules. There are two phases in the present study. In the first phase, a post-hoc simulation was conducted to find out relationships between examinee ability levels estimated by CAT and paper and pencil test versions of the SSE. Maximum Likelihood Estimation and Expected A Posteriori were used as ability estimation method. Test termination rules were standard error threshold and fixed number of items. Second phase was actualized by implementing a CAT administration to a group of examinees to investigate performance of CAT administration in an environment other than simulated administration. Findings of post-hoc simulations indicated CAT could be implemented by using Expected A Posteriori estimation method with standard error threshold value of 0.30 or higher for SSE. Correlation between ability estimates obtained by CAT and real SSE was found to be 0.95. Mean of number of items given to examinees by CAT is 18.4. Correlation between live CAT and real SSE ability estimations was 0.74. Number of items used for CAT administration is approximately 50\% of the items in paper and pencil SSE science subtest. Results indicated that CAT for SSE science subtest provided ability estimations with higher reliability with fewer items compared to paper and pencil format.

}, author = {Kalender, I.} } @article {510, title = {Efficiency of static and computer adaptive short forms compared to full-length measures of depressive symptoms}, journal = {Quality of Life Research}, volume = {19(1)}, year = {2010}, pages = {125{\textendash}136}, author = {Choi, S. and Reise, S. P. and Pilkonis, P. A. and Hays, R. D. and Cella, D.} } @book {2048, title = {Elements of Adaptive Testing}, year = {2010}, pages = {437}, publisher = {Springer}, organization = {Springer}, address = {New York}, doi = {10.1007/978-0-387-85461-8}, author = {van der Linden, W. J. and Glas, C. A. W.} } @inbook {2063, title = {Estimation of the Parameters in an Item-Cloning Model for Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {289-314}, chapter = {15}, doi = {10.1007/978-0-387-85461-8 }, author = {Glas, C. A. W. and van der Linden, W. J. and Geerlings, H.} } @inbook {1787, title = {Effect of early misfit in computerized adaptive testing on the recovery of theta}, year = {2009}, note = {{PDF File, 212 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Guyer, R. D. and Weiss, D. J.} } @article {2270, title = {Effekte des adaptiven Testens auf die Moti{\textlnot}vation zur Testbearbeitung [Effects of adaptive testing on test taking motivation].}, journal = {Diagnostica}, volume = {55}, year = {2009}, pages = {20-28}, author = {Frey, A. and Hartig, J. and Moosbrugger, H.} } @article {131, title = {Evaluation of a computer-adaptive test for the assessment of depression (D-CAT) in clinical application}, journal = {International Journal for Methods in Psychiatric Research}, volume = {18}, number = {1}, year = {2009}, note = {Journal articleInternational journal of methods in psychiatric researchInt J Methods Psychiatr Res. 2009 Feb 4.}, month = {Feb 4}, pages = {233-236}, edition = {2009/02/06}, abstract = {In the past, a German Computerized Adaptive Test, based on Item Response Theory (IRT), was developed for purposes of assessing the construct depression [Computer-adaptive test for depression (D-CAT)]. This study aims at testing the feasibility and validity of the real computer-adaptive application.The D-CAT, supplied by a bank of 64 items, was administered on personal digital assistants (PDAs) to 423 consecutive patients suffering from psychosomatic and other medical conditions (78 with depression). Items were adaptively administered until a predetermined reliability (r >/= 0.90) was attained. For validation purposes, the Hospital Anxiety and Depression Scale (HADS), the Centre for Epidemiological Studies Depression (CES-D) scale, and the Beck Depression Inventory (BDI) were administered. Another sample of 114 patients was evaluated using standardized diagnostic interviews [Composite International Diagnostic Interview (CIDI)].The D-CAT was quickly completed (mean 74 seconds), well accepted by the patients and reliable after an average administration of only six items. In 95\% of the cases, 10 items or less were needed for a reliable score estimate. Correlations between the D-CAT and the HADS, CES-D, and BDI ranged between r = 0.68 and r = 0.77. The D-CAT distinguished between diagnostic groups as well as established questionnaires do.The D-CAT proved an efficient, well accepted and reliable tool. Discriminative power was comparable to other depression measures, whereby the CAT is shorter and more precise. Item usage raises questions of balancing the item selection for content in the future. Copyright (c) 2009 John Wiley \& Sons, Ltd.}, isbn = {1049-8931 (Print)}, author = {Fliege, H. and Becker, J. and Walter, O. B. and Rose, M. and Bjorner, J. B. and Klapp, B. F.} } @inbook {1800, title = {An evaluation of a new procedure for computing information functions for Bayesian scores from computerized adaptive tests}, year = {2009}, note = {{PDF file, 571 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Ito, K. and Pommerich, M and Segall, D.} } @article {227, title = {An evaluation of patient-reported outcomes found computerized adaptive testing was efficient in assessing stress perception}, journal = {Journal of Clinical Epidemiology}, volume = {62}, number = {3}, year = {2009}, note = {Kocalevent, Ruya-DanielaRose, MatthiasBecker, JanineWalter, Otto BFliege, HerbertBjorner, Jakob BKleiber, DieterKlapp, Burghard FEvaluation StudiesUnited StatesJournal of clinical epidemiologyJ Clin Epidemiol. 2009 Mar;62(3):278-87, 287.e1-3. Epub 2008 Jul 18.}, pages = {278-287}, edition = {2008/07/22}, abstract = {OBJECTIVES: This study aimed to develop and evaluate a first computerized adaptive test (CAT) for the measurement of stress perception (Stress-CAT), in terms of the two dimensions: exposure to stress and stress reaction. STUDY DESIGN AND SETTING: Item response theory modeling was performed using a two-parameter model (Generalized Partial Credit Model). The evaluation of the Stress-CAT comprised a simulation study and real clinical application. A total of 1,092 psychosomatic patients (N1) were studied. Two hundred simulees (N2) were generated for a simulated response data set. Then the Stress-CAT was given to n=116 inpatients, (N3) together with established stress questionnaires as validity criteria. RESULTS: The final banks included n=38 stress exposure items and n=31 stress reaction items. In the first simulation study, CAT scores could be estimated with a high measurement precision (SE<0.32; rho>0.90) using 7.0+/-2.3 (M+/-SD) stress reaction items and 11.6+/-1.7 stress exposure items. The second simulation study reanalyzed real patients data (N1) and showed an average use of items of 5.6+/-2.1 for the dimension stress reaction and 10.0+/-4.9 for the dimension stress exposure. Convergent validity showed significantly high correlations. CONCLUSIONS: The Stress-CAT is short and precise, potentially lowering the response burden of patients in clinical decision making.}, keywords = {*Diagnosis, Computer-Assisted, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Female, Humans, Male, Middle Aged, Perception, Quality of Health Care/*standards, Questionnaires, Reproducibility of Results, Sickness Impact Profile, Stress, Psychological/*diagnosis/psychology, Treatment Outcome}, isbn = {1878-5921 (Electronic)0895-4356 (Linking)}, author = {Kocalevent, R. D. and Rose, M. and Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kleiber, D. and Klapp, B. F.} } @inbook {1879, title = {An examination of decision-theory adaptive testing procedures}, year = {2009}, note = {{PDF file, 203 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {This research examined three ways to adaptively select items using decision theory: a traditional decision theory sequential testing approach (expected minimum cost), information gain (modeled after Kullback-Leibler), and a maximum discrimination approach, and then compared them all against an approach using maximum IRT Fisher information. It also examined the use of Wald{\textquoteright}s (1947) wellknown sequential probability ratio test, SPRT, as a test termination rule in this context. The minimum cost approach was notably better than the best-case possibility for IRT. Information gain, which is based on entropy and comes from information theory, was almost identical to minimum cost. The simple approach using the item that best discriminates between the two most likely classifications also fared better than IRT, but not as well as information gain or minimum cost. Through Wald{\textquoteright}s SPRT, large percentages of examinees can be accurately classified with very few items. With only 25 sequentially selected items, for example, approximately 90\% of the simulated NAEP examinees were classified with 86\% accuracy. The advantages of the decision theory model are many{\textemdash}the model yields accurate mastery state classifications, can use a small item pool, is simple to implement, requires little pretesting, is applicable to criterion-referenced tests, can be used in diagnostic testing, can be adapted to yield classifications on multiple skills, and should be easy to explain to non-statisticians.}, author = {Rudner, L. M.} } @book {1672, title = {Effect of early misfit in computerized adaptive testing on the recovery of theta}, year = {2008}, note = {{PDF file, 1,004 KB}}, address = {Unpublished Ph.D. dissertation, University of Minnesota, Minneapolis MN.}, author = {Guyer, R. D.} } @article {5, title = {Efficiency and sensitivity of multidimensional computerized adaptive testing of pediatric physical functioning}, journal = {Disability \& Rehabilitation}, volume = {30}, number = {6}, year = {2008}, note = {Allen, Diane DNi, PengshengHaley, Stephen MK02 HD45354-01/HD/NICHD NIH HHS/United StatesNIDDR H133P0001/DD/NCBDD CDC HHS/United StatesResearch Support, N.I.H., ExtramuralEnglandDisability and rehabilitationDisabil Rehabil. 2008;30(6):479-84.}, pages = {479-84}, edition = {2008/02/26}, abstract = {PURPOSE: Computerized adaptive tests (CATs) have efficiency advantages over fixed-length tests of physical functioning but may lose sensitivity when administering extremely low numbers of items. Multidimensional CATs may efficiently improve sensitivity by capitalizing on correlations between functional domains. Using a series of empirical simulations, we assessed the efficiency and sensitivity of multidimensional CATs compared to a longer fixed-length test. METHOD: Parent responses to the Pediatric Evaluation of Disability Inventory before and after intervention for 239 children at a pediatric rehabilitation hospital provided the data for this retrospective study. Reliability, effect size, and standardized response mean were compared between full-length self-care and mobility subscales and simulated multidimensional CATs with stopping rules at 40, 30, 20, and 10 items. RESULTS: Reliability was lowest in the 10-item CAT condition for the self-care (r = 0.85) and mobility (r = 0.79) subscales; all other conditions had high reliabilities (r > 0.94). All multidimensional CAT conditions had equivalent levels of sensitivity compared to the full set condition for both domains. CONCLUSIONS: Multidimensional CATs efficiently retain the sensitivity of longer fixed-length measures even with 5 items per dimension (10-item CAT condition). Measuring physical functioning with multidimensional CATs could enhance sensitivity following intervention while minimizing response burden.}, keywords = {*Disability Evaluation, Child, Computers, Disabled Children/*classification/rehabilitation, Efficiency, Humans, Outcome Assessment (Health Care), Psychometrics, Reproducibility of Results, Retrospective Studies, Self Care, Sensitivity and Specificity}, isbn = {0963-8288 (Print)0963-8288 (Linking)}, author = {Allen, D. D. and Ni, P. and Haley, S. M.} } @article {125, title = {The effect of including pretest items in an operational computerized adaptive test: Do different ability examinees spend different amounts of time on embedded pretest items?}, journal = {Educational Assessment}, volume = {12}, number = {2}, year = {2007}, pages = {161-173}, publisher = {Lawrence Erlbaum: US}, abstract = {The purpose of this study was to examine the effect of pretest items on response time in an operational, fixed-length, time-limited computerized adaptive test (CAT). These pretest items are embedded within the CAT, but unlike the operational items, are not tailored to the examinee{\textquoteright}s ability level. If examinees with higher ability levels need less time to complete these items than do their counterparts with lower ability levels, they will have more time to devote to the operational test questions. Data were from a graduate admissions test that was administered worldwide. Data from both quantitative and verbal sections of the test were considered. For the verbal section, examinees in the lower ability groups spent systematically more time on their pretest items than did those in the higher ability groups, though for the quantitative section the differences were less clear. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {ability, operational computerized adaptive test, pretest items, time}, isbn = {1062-7197 (Print); 1532-6977 (Electronic)}, author = {Ferdous, A. A. and Plake, B. S. and Chang, S-R.} } @booklet {316, title = {The effect of using item parameters calibrated from paper administrations in computer adaptive test administrations}, journal = {Journal of Technology, Learning, and Assessment}, volume = {5}, number = {7}, year = {2007}, pages = {1-29}, abstract = {Computer administered tests are becoming increasingly prevalent as computer technology becomes more readily available on a large scale. For testing programs that utilize both computer and paper administrations, mode effects are problematic in that they can result in examinee scores that are artificially inflated or deflated. As such, researchers have engaged in extensive studies of whether scores differ across paper and computer presentations of the same tests. The research generally seems to indicate that the more complicated it is to present or take a test on computer, the greater the possibility of mode effects. In a computer adaptive test, mode effects may be a particular concern if items are calibrated using item responses obtained from one administration mode (i.e., paper), and those parameters are then used operationally in a different administration mode (i.e., computer). This paper studies the suitability of using parameters calibrated from a paper administration for item selection and scoring in a computer adaptive administration, for two tests with lengthy passages that required navigation in the computer administration. The results showed that the use of paper calibrated parameters versus computer calibrated parameters in computer adaptive administrations had small to moderate effects on the reliability of examinee scores, at fairly short test lengths. This effect was generally diminished for longer test lengths. However, the results suggest that in some cases, some loss in reliability might be inevitable if paper-calibrated parameters are used in computer adaptive administrations.}, keywords = {Mode effects}, author = {Pommerich, M} } @article {2075, title = {The Effect of Using Item Parameters Calibrated from Paper Administrations in Computer Adaptive Test Administrations}, journal = {The Journal of Technology, Learning, and Assessment}, volume = {5}, number = {7}, year = {2007}, abstract = {

Computer administered tests are becoming increasingly prevalent as computer technology becomes more readily available on a large scale. For testing programs that utilize both computer and paper administrations, mode effects are problematic in that they can
result in examinee scores that are artificially inflated or deflated. As such, researchers have engaged in extensive studies of whether scores differ across paper and computer presentations of the same tests. The research generally seems to indicate that the more
complicated it is to present or take a test on computer, the greater the possibility of mode effects. In a computer adaptive test, mode effects may be a particular concern if items are calibrated using item responses obtained from one administration mode (i.e., paper), and those parameters are then used operationally in a different administration mode (i.e., computer). This paper studies the suitability of using parameters calibrated from a paper administration for item selection and scoring in a computer adaptive administration, for two tests with lengthy passages that required navigation in the computer administration. The results showed that the use of paper calibrated parameters versus computer calibrated parameters in computer adaptive administrations had small to
moderate effects on the reliability of examinee scores, at fairly short test lengths. This effect was generally diminished for longer test lengths. However, the results suggest that in some cases, some loss in reliability might be inevitable if paper-calibrated parameters
are used in computer adaptive administrations.\ 

}, author = {Pommerich, M} } @article {2196, title = {Estimating the Standard Error of the Maximum Likelihood Ability Estimator in Adaptive Testing Using the Posterior-Weighted Test Information Function}, journal = {Educational and Psychological Measurement}, volume = {67}, number = {6}, year = {2007}, pages = {958-975}, abstract = {

The standard error of the maximum likelihood ability estimator is commonly estimated by evaluating the test information function at an examinee\&$\#$39;s current maximum likelihood estimate (a point estimate) of ability. Because the test information function evaluated at the point estimate may differ from the test information function evaluated at an examinee\&$\#$39;s true ability value, the estimated standard error may be biased under certain conditions. This is of particular concern in adaptive testing because the height of the test information function is expected to be higher at the current estimate of ability than at the actual value of ability. This article proposes using the posterior-weighted test information function in computing the standard error of the maximum likelihood ability estimator for adaptive test sessions. A simulation study showed that the proposed approach provides standard error estimates that are less biased and more efficient than those provided by the traditional point estimate approach.

}, doi = {10.1177/0013164407301544}, url = {http://epm.sagepub.com/content/67/6/958.abstract}, author = {Penfield, Randall D.} } @article {111, title = {Evaluation of computer adaptive testing systems}, journal = {International Journal of Web-Based Learning and Teaching Technologies}, volume = {2}, number = {1}, year = {2007}, pages = {70-87}, publisher = {IGI Global: US}, abstract = {Many educational organizations are trying to reduce the cost of the exams, the workload and delay of scoring, and the human errors. Also, they try to increase the accuracy and efficiency of the testing. Recently, most examination organizations use computer adaptive testing (CAT) as the method for large scale testing. This article investigates the current state of CAT systems and identifies their strengths and weaknesses. It evaluates 10 CAT systems using an evaluation framework of 15 domains categorized into three dimensions: educational, technical, and economical. The results show that the majority of the CAT systems give priority to security, reliability, and maintainability. However, they do not offer to the examinee any advanced support and functionalities. Also, the feedback to the examinee is limited and the presentation of the items is poor. Recommendations are made in order to enhance the overall quality of a CAT system. For example, alternative multimedia items should be available so that the examinee would choose a preferred media type. Feedback could be improved by providing more information to the examinee or providing information anytime the examinee wished. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computer adaptive testing systems, examination organizations, systems evaluation}, isbn = {1548-1093 (Print); 1548-1107 (Electronic)}, author = {Economides, A. A. and Roupas, C} } @article {163, title = {An exploration and realization of computerized adaptive testing with cognitive diagnosis}, journal = {Acta Psychologica Sinica}, volume = {39}, number = {4}, year = {2007}, pages = {747-753}, abstract = { An increased attention paid to {\textquotedblleft}cognitive bugs behavior,{\textquotedblright} appears to lead to an increased research interests in diagnostic testing based on Item Response Theory(IRT)that combines cognitive psychology and psychometrics. The study of cognitive diagnosis were applied mainly to Paper-and-Pencil (P\&P) testing. Rarely has it been applied to computerized adaptive testing CAT), To our knowledge, no research on CAT with cognitive diagnosis has been conducted in China. Since CAT is more efficient and accurate than P\&P testing, there is important to develop an application technique for cognitive diagnosis suitable for CAT. This study attempts to construct a preliminary CAT system for cognitive diagnosis.With the help of the methods for {\textquotedblleft} Diagnosis first, Ability estimation second {\textquotedblright}, the knowledge state conversion diagram was used to describe all the possible knowledge states in a domain of interest and the relation among the knowledge states at the diagnosis stage, where a new strategy of item selection based-on the algorithm of Depth First Search was proposed. On the other hand, those items that contain attributes which the examinee has not mastered were removed in ability estimation. At the stage of accurate ability estimation, all the items answered by each examinee not only matched his/her ability estimated value, but also were limited to those items whose attributes have been mastered by the examinee.We used Monte Carlo Simulation to simulate all the data of the three different structures of cognitive attributes in this study. These structures were tree-shaped, forest-shaped, and some isolated vertices (that are related to simple Q-matrix). Both tree-shaped and isolated vertices structure were derived from actual cases, while forest-shaped structure was a generalized simulation. 3000 examinees and 3000 items were simulated in the experiment of tree-shaped, 2550 examinees and 3100 items in forest-shaped, and 2000 examinees and 2500 items in isolated vertices. The maximum test length was all assumed as 30 items for all those experiments. The difficulty parameters and the logarithm of the discrimination were drawn from the standard normal distribution N(0,1). There were 100 examinees of each attribute pattern in the experiment of tree-shaped and 50 examinees of each attribute pattern in forest-shaped. In isolated vertices, 2000 examinees are students come from actual case.To assess the behaviors of the proposed diagnostic approach, three assessment indices were used. They are attribute pattern classification agreement rate (abr.APCAR), the Recovery (the average of the absolute deviation between the estimated value and the true value) and the average test length (abr. Length).Parts of results of Monte Carlo study were as follows.For the attribute structure of tree-shaped, APCAR is 84.27\%,Recovery is 0.17,Length is 24.80.For the attribute structure of forest-shaped, APCAR is 84.02\%,Recovery is 0.172,Length is 23.47.For the attribute structure of isolated vertices, APCAR is 99.16\%,Recorvery is 0.256,Length is 27.32.As show the above, we can conclude that the results are favorable. The rate of cognitive diagnosis accuracy has exceeded 80\% in each experiment, and the Recovery is also good. Therefore, it should be an acceptable idea to construct an initiatory CAT system for cognitive diagnosis, if we use the methods for {\textquotedblleft}Diagnosis first, Ability estimation second {\textquotedblright} with the help of both knowledge state conversion diagram and the new strategy of item selection based-on the algorithm of Depth First Search}, author = {Haijing, L. and Shuliang, D.} } @inbook {1762, title = {Exploring potential designs for multi-form structure computerized adaptive tests with uniform item exposure}, year = {2007}, note = {{PDF file, 295 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Edwards, M. C. and Thissen, D.} } @article {2173, title = {Effects of Estimation Bias on Multiple-Category Classification With an IRT-Based Adaptive Classification Procedure}, journal = {Educational and Psychological Measurement}, volume = {66}, number = {4}, year = {2006}, pages = {545-564}, abstract = {

The effects of five ability estimators, that is, maximum likelihood estimator, weighted likelihood estimator, maximum a posteriori, expected a posteriori, and Owen\&$\#$39;s sequential estimator, on the performances of the item response theory\–based adaptive classification procedure on multiple categories were studied via simulations. The following results were found. (a) The Bayesian estimators were more likely to misclassify examinees into an inward category because of their inward biases, when a fixed start value of zero was assigned to every examinee. (b) When moderately accurate start values were available, however, Bayesian estimators produced classifications that were slightly more accurate than was the maximum likelihood estimator or weighted likelihood estimator. Expected a posteriori was the procedure that produced the most accurate results among the three Bayesian methods. (c) All five estimators produced equivalent efficiencies in terms of number of items required, which was 50 or more items except for abilities that were less than -2.00 or greater than 2.00.

}, doi = {10.1177/0013164405284031}, url = {http://epm.sagepub.com/content/66/4/545.abstract}, author = {Yang, Xiangdong and Poggio, John C. and Glasnapp, Douglas R.} } @article {399, title = {Equating scores from adaptive to linear tests}, journal = {Applied Psychological Measurement}, volume = {30}, number = {6}, year = {2006}, pages = {493-508}, publisher = {Sage Publications: US}, abstract = {Two local methods for observed-score equating are applied to the problem of equating an adaptive test to a linear test. In an empirical study, the methods were evaluated against a method based on the test characteristic function (TCF) of the linear test and traditional equipercentile equating applied to the ability estimates on the adaptive test for a population of test takers. The two local methods were generally best. Surprisingly, the TCF method performed slightly worse than the equipercentile method. Both methods showed strong bias and uniformly large inaccuracy, but the TCF method suffered from extra error due to the lower asymptote of the test characteristic function. It is argued that the worse performances of the two methods are a consequence of the fact that they use a single equating transformation for an entire population of test takers and therefore have to compromise between the individual score distributions. }, keywords = {computerized adaptive testing, equipercentile equating, local equating, score reporting, test characteristic function}, isbn = {0146-6216 (Print)}, author = {van der Linden, W. J.} } @article {246, title = {Estimation of an examinee{\textquoteright}s ability in the web-based computerized adaptive testing program IRT-CAT}, journal = {J Educ Eval Health Prof}, volume = {3}, year = {2006}, note = {Lee, Yoon-HwanPark, Jung-HoPark, In-YongKorea (South)Journal of educational evaluation for health professionsJ Educ Eval Health Prof. 2006;3:4. Epub 2006 Nov 22.}, pages = {4}, edition = {2006/01/01}, abstract = {We developed a program to estimate an examinee s ability in order to provide freely available access to a web-based computerized adaptive testing (CAT) program. We used PHP and Java Script as the program languages, PostgresSQL as the database management system on an Apache web server and Linux as the operating system. A system which allows for user input and searching within inputted items and creates tests was constructed. We performed an ability estimation on each test based on a Rasch model and 2- or 3-parametric logistic models. Our system provides an algorithm for a web-based CAT, replacing previous personal computer-based ones, and makes it possible to estimate an examinee{\textquoteright}s ability immediately at the end of test.}, isbn = {1975-5937 (Electronic)}, author = {Lee, Y. H. and Park, J. H. and Park, I. Y.} } @article {233, title = {An evaluation of a patient-reported outcomes found computerized adaptive testing was efficient in assessing osteoarthritis impact}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {7}, year = {2006}, pages = {715-723}, abstract = {BACKGROUND AND OBJECTIVES: Evaluate a patient-reported outcomes questionnaire that uses computerized adaptive testing (CAT) to measure the impact of osteoarthritis (OA) on functioning and well-being. MATERIALS AND METHODS: OA patients completed 37 questions about the impact of OA on physical, social and role functioning, emotional well-being, and vitality. Questionnaire responses were calibrated and scored using item response theory, and two scores were estimated: a Total-OA score based on patients{\textquoteright} responses to all 37 questions, and a simulated CAT-OA score where the computer selected and scored the five most informative questions for each patient. Agreement between Total-OA and CAT-OA scores was assessed using correlations. Discriminant validity of Total-OA and CAT-OA scores was assessed with analysis of variance. Criterion measures included OA pain and severity, patient global assessment, and missed work days. RESULTS: Simulated CAT-OA and Total-OA scores correlated highly (r = 0.96). Both Total-OA and simulated CAT-OA scores discriminated significantly between patients differing on the criterion measures. F-statistics across criterion measures ranged from 39.0 (P < .001) to 225.1 (P < .001) for the Total-OA score, and from 40.5 (P < .001) to 221.5 (P < .001) for the simulated CAT-OA score. CONCLUSIONS: CAT methods produce valid and precise estimates of the impact of OA on functioning and well-being with significant reduction in response burden.}, isbn = {08954356}, author = {Kosinski, M. and Bjorner, J. and Warejr, J. and Sullivan, E. and Straus, W.} } @article {554, title = {Evaluation parameters for computer adaptive testing}, journal = {British Journal of Educational Technology}, volume = {Vol. 37}, year = {2006}, pages = {261-278}, author = {Georgiadou, E. and Triantafillou, E. and Economides, A. A.} } @article {35, title = {Expansion of a physical function item bank and development of an abbreviated form for clinical research}, journal = {Journal of Applied Measurement}, volume = {7}, number = {1}, year = {2006}, pages = {1-15}, publisher = {Richard M Smith: US}, abstract = {We expanded an existing 33-item physical function (PF) item bank with a sufficient number of items to enable computerized adaptive testing (CAT). Ten items were written to expand the bank and the new item pool was administered to 295 people with cancer. For this analysis of the new pool, seven poorly performing items were identified for further examination. This resulted in a bank with items that define an essentially unidimensional PF construct, cover a wide range of that construct, reliably measure the PF of persons with cancer, and distinguish differences in self-reported functional performance levels. We also developed a 5-item (static) assessment form ("BriefPF") that can be used in clinical research to express scores on the same metric as the overall bank. The BriefPF was compared to the PF-10 from the Medical Outcomes Study SF-36. Both short forms significantly differentiated persons across functional performance levels. While the entire bank was more precise across the PF continuum than either short form, there were differences in the area of the continuum in which each short form was more precise: the BriefPF was more precise than the PF-10 at the lower functional levels and the PF-10 was more precise than the BriefPF at the higher levels. Future research on this bank will include the development of a CAT version, the PF-CAT. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {clinical research, computerized adaptive testing, performance levels, physical function item bank, Psychometrics, test reliability, Test Validity}, isbn = {1529-7713 (Print)}, author = {Bode, R. K. and Lai, J-S. and Dineen, K. and Heinemann, A. W. and Shevrin, D. and Von Roenn, J. and Cella, D.} } @conference {2220, title = {The effectiveness of using multiple item pools in computerized adaptive testing}, booktitle = {Annual meeting of the National Council on Measurement in Education }, year = {2005}, month = {04/2005}, address = {Montreal, Canada}, author = {Zhang, J. and Chang, H.} } @article {69, title = {Effects of practical constraints on item selection rules at the early stages of computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {41}, number = {2}, year = {2004}, pages = {149-174}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {The purpose of this study was to compare the effects of four item selection rules--(1) Fisher information (F), (2) Fisher information with a posterior distribution (FP), (3) Kullback-Leibler information with a posterior distribution (KP), and (4) completely randomized item selection (RN)--with respect to the precision of trait estimation and the extent of item usage at the early stages of computerized adaptive testing. The comparison of the four item selection rules was carried out under three conditions: (1) using only the item information function as the item selection criterion; (2) using both the item information function and content balancing; and (3) using the item information function, content balancing, and item exposure control. When test length was less than 10 items, FP and KP tended to outperform F at extreme trait levels in Condition 1. However, in more realistic settings, it could not be concluded that FP and KP outperformed F, especially when item exposure control was imposed. When test length was greater than 10 items, the three nonrandom item selection procedures performed similarly no matter what the condition was, while F had slightly higher item usage. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, item selection rules, practical constraints}, isbn = {0022-0655 (Print)}, author = {Chen, S-Y. and Ankenmann, R. D.} } @article {332, title = {Estimating ability and item-selection strategy in self-adapted testing: A latent class approach}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {4}, year = {2004}, pages = {379-396}, publisher = {American Educational Research Assn: US}, abstract = {This article presents a psychometric model for estimating ability and item-selection strategies in self-adapted testing. In contrast to computer adaptive testing, in self-adapted testing the examinees are allowed to select the difficulty of the items. The item-selection strategy is defined as the distribution of difficulty conditional on the responses given to previous items. The article shows that missing responses in self-adapted testing are missing at random and can be ignored in the estimation of ability. However, the item-selection strategy cannot always be ignored in such an estimation. An EM algorithm is presented to estimate an examinee{\textquoteright}s ability and strategies, and a model fit is evaluated using Akaike{\textquoteright}s information criterion. The article includes an application with real data to illustrate how the model can be used in practice for evaluating hypotheses, estimating ability, and identifying strategies. In the example, four strategies were identified and related to examinees{\textquoteright} ability. It was shown that individual examinees tended not to follow a consistent strategy throughout the test. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {estimating ability, item-selection strategies, psychometric model, self-adapted testing}, isbn = {1076-9986 (Print)}, author = {Revuelta, J.} } @article {2086, title = {Evaluating scale stability of a computer adaptive testing system}, number = {05-12}, year = {2004}, institution = {GMAC}, address = {McLean, VA}, author = {Guo, F. and Wang, L.} } @book {1725, title = {Evaluating the effects of several multi-stage testing design variables on selected psychometric outcomes for certification and licensure assessment}, year = {2004}, address = {Unpublished doctoral dissertation, University of Massachusetts, Amherst}, author = {Zenisky, A. L.} } @article {245, title = {{\'E}valuation et multim{\'e}dia dans l{\textquoteright}apprentissage d{\textquoteright}une L2 [Assessment and multimedia in learning an L2]}, journal = {ReCALL}, volume = {16}, number = {2}, year = {2004}, pages = {475-487}, abstract = {In the first part of this paper different areas where technology may be used for second language assessment are described. First, item banking operations, which are generally based on item Response Theory but not necessarily restricted to dichotomously scored items, facilitate assessment task organization and require technological support. Second, technology may help to design more authentic assessment tasks or may be needed in some direct testing situations. Third, the assessment environment may be more adapted and more stimulating when technology is used to give the student more control. The second part of the paper presents different functions of assessment. The monitoring function (often called formative assessment) aims at adapting the classroom activities to students and to provide continuous feedback. Technology may be used to train the teachers in monitoring techniques, to organize data or to produce diagnostic information; electronic portfolios or quizzes that are built in some educational software may also be used for monitoring. The placement function is probably the one in which the application of computer adaptive testing procedures (e.g. French CAPT) is the most appropriate. Automatic scoring devices may also be used for placement purposes. Finally the certification function requires more valid and more reliable tools. Technology may be used to enhance the testing situation (to make it more authentic) or to facilitate data processing during the construction of a test. Almond et al. (2002) propose a four component model (Selection, Presentation, Scoring and Response) for designing assessment systems. Each component must be planned taking into account the assessment function. }, keywords = {Adaptive Testing, Computer Assisted Instruction, Educational, Foreign Language Learning, Program Evaluation, Technology computerized adaptive testing}, author = {Laurier, M.} } @article {291, title = {Evaluation of the CATSIB DIF procedure in a pretest setting}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {2}, year = {2004}, pages = {177-199}, publisher = {American Educational Research Assn: US}, abstract = {A new procedure, CATSIB, for assessing differential item functioning (DIF) on computerized adaptive tests (CATs) is proposed. CATSIB, a modified SIBTEST procedure, matches test takers on estimated ability and controls for impact-induced Type I error inflation by employing a CAT version of the SIBTEST "regression correction." The performance of CATSIB in terms of detection of DIF in pretest items was evaluated in a simulation study. Simulated test takers were adoptively administered 25 operational items from a pool of 1,000 and were linearly administered 16 pretest items that were evaluated for DIF. Sample size varied from 250 to 500 in each group. Simulated impact levels ranged from a 0- to 1-standard-deviation difference in mean ability levels. The results showed that CATSIB with the regression correction displayed good control over Type 1 error, whereas CATSIB without the regression correction displayed impact-induced Type 1 error inflation. With 500 test takers in each group, power rates were exceptionally high (84\% to 99\%) for values of DIF at the boundary between moderate and large DIF. For smaller samples of 250 test takers in each group, the corresponding power rates ranged from 47\% to 95\%. In addition, in all cases, CATSIB was very accurate in estimating the true values of DIF, displaying at most only minor estimation bias. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive tests, differential item functioning}, isbn = {1076-9986 (Print)}, author = {Nandakumar, R. and Roussos, L. A.} } @booklet {1340, title = {Effect of extra time on GRE{\textregistered} Quantitative and Verbal Scores (Research Report 03-13)}, year = {2003}, note = {{PDF file, 88 KB}}, address = {Princeton NJ: Educational Testing service}, author = {Bridgeman, B. and Cline, F. and Hessinger, J.} } @conference {1012, title = {The effect of item selection method on the variability of CAT{\textquoteright}s ability estimates when item parameters are contaminated with measurement errors}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 275 KB}}, address = {Chicago IL}, author = {Li, Y. H. and Schafer, W. D.} } @booklet {1413, title = {The effects of model misfit in computerized classification test}, year = {2003}, note = {{PDF file, 432 KB}}, address = {Paper presented at the annual meeting of the National Council on Measurement in Education, Chicago IL}, author = {Jiao, H. and Lau, A. C.} } @article {213, title = {The effects of model specification error in item response theory-based computerized classification test using sequential probability ratio test}, journal = {Dissertation Abstracts International Section A: Humanities \& Social Sciences}, volume = {64}, number = {2-A}, year = {2003}, pages = {478}, abstract = {This study investigated the effects of model specification error on classification accuracy, error rates, and average test length in Item Response Theory (IRT) based computerized classification test (CCT) using sequential probability ratio test (SPRT) in making binary decisions from examinees{\textquoteright} dichotomous responses. This study consisted of three sub-studies. In each sub-study, one of the three unidimensional dichotomous IRT models, the 1-parameter logistic (IPL), the 2-parameter logistic (2PL), and the 3-parameter logistic (3PL) model was set as the true model and the other two models were treated as the misfit models. Item pool composition, test length, and stratum depth were manipulated to simulate different test conditions. To ensure the validity of the study results, the true model based CCTs using the true and the recalibrated item parameters were compared first to study the effect of estimation error in item parameters in CCTs. Then, the true model and the misfit model based CCTs were compared to accomplish the research goal, The results indicated that estimation error in item parameters did not affect classification results based on CCTs using SPRT. The effect of model specification error depended on the true model, the misfit model, and the item pool composition. When the IPL or the 2PL IRT model was the true model, the use of another IRT model had little impact on the CCT results. When the 3PL IRT model was the true model, the use of the 1PL model raised the false positive error rates. The influence of using the 2PL instead of the 3PL model depended on the item pool composition. When the item discrimination parameters varied greatly from uniformity of one, the use of the 2PL IRT model raised the false negative error rates to above the nominal level. In the simulated test conditions with test length and item exposure constraints, using a misfit model in CCTs most often affected the average test length. Its effects on error rates and classification accuracy were negligible. It was concluded that in CCTs using SPRT, IRT model selection and evaluation is indispensable (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Jiao, H.} } @conference {1280, title = {Effects of test administration mode on item parameter estimates}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 233 KB}}, address = {Chicago IL}, author = {Yi, Q. and Harris, D. J. and Wang, T. and Ban, J-C.} } @conference {1024, title = {Evaluating a new approach to detect aberrant responses in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {stimation of Ability Level by Using Only Observable Quantities in Adaptive Testing.}, address = {Chicago IL}, author = {Lu, Y., and Robin, F.} } @conference {1291, title = {Evaluating computer-based test security by generalized item overlap rates}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, address = {Chicago IL}, author = {Zhang, J. and Lu, T.} } @conference {1022, title = {Evaluating computerized adaptive testing design for the MCAT with realistic simulated data}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 985 KB}}, address = {Chicago IL}, author = {Lu, Y., and Pitoniak, M. and Rizavi, S. and Way, W. D. and Steffan, M.} } @conference {1201, title = {Evaluating stability of online item calibrations under varying conditions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago IL}, author = {Thomasson, G. L.} } @conference {1096, title = {Evaluating the comparability of English- and French-speaking examinees on a science achievement test administered using two-stage testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {PDF file, 568 K}, address = {Chicago IL}, author = {Puhan, G. and Gierl, M.} } @conference {1966, title = {The evaluation of exposure control procedures for an operational CAT. }, booktitle = {Paper presented at the Annual Meeting of the American Educational Research Association}, year = {2003}, address = {Chicago IL}, author = {French, B. F. and Thompson, T. T.} } @article {96, title = {An examination of exposure control and content balancing restrictions on item selection in CATs using the partial credit model}, journal = {Journal of Applied Measurement}, volume = {4}, number = {1}, year = {2003}, note = {1529-7713Journal Article}, pages = {24-42}, abstract = {The purpose of the present investigation was to systematically examine the effectiveness of the Sympson-Hetter technique and rotated content balancing relative to no exposure control and no content rotation conditions in a computerized adaptive testing system (CAT) based on the partial credit model. A series of simulated fixed and variable length CATs were run using two data sets generated to multiple content areas for three sizes of item pools. The 2 (exposure control) X 2 (content rotation) X 2 (test length) X 3 (item pool size) X 2 (data sets) yielded a total of 48 conditions. Results show that while both procedures can be used with no deleterious effect on measurement precision, the gains in exposure control, pool utilization, and item overlap appear quite modest. Difficulties involved with setting the exposure control parameters in small item pools make questionable the utility of the Sympson-Hetter technique with similar item pools.}, keywords = {*Computers, *Educational Measurement, *Models, Theoretical, Automation, Decision Making, Humans, Reproducibility of Results}, author = {Davis, L. L. and Pastor, D. A. and Dodd, B. G. and Chiang, C. and Fitzpatrick, S. J.} } @conference {1029, title = {Exposure control using adaptive multi-stage item bundles}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 116 KB}}, address = {Chicago IL}, author = {Luecht, RM} } @conference {260, title = {Exposure control using adaptive multi-stage item bundles}, booktitle = {annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago, IL. USA}, author = {Luecht, RM} } @article {335, title = {The effect of test characteristics on aberrant response patterns in computer adaptive testing}, journal = {Dissertation Abstracts International Section A: Humanities \& Social Sciences}, volume = {62}, number = {10-A}, year = {2002}, pages = {3363}, abstract = {The advantages that computer adaptive testing offers over linear tests have been well documented. The Computer Adaptive Test (CAT) design is more efficient than the Linear test design as fewer items are needed to estimate an examinee{\textquoteright}s proficiency to a desired level of precision. In the ideal situation, a CAT will result in examinees answering different number of items according to the stopping rule employed. Unfortunately, the realities of testing conditions have necessitated the imposition of time and minimum test length limits on CATs. Such constraints might place a burden on the CAT test taker resulting in aberrant response behaviors by some examinees. Occurrence of such response patterns results in inaccurate estimation of examinee proficiency levels. This study examined the effects of test lengths, time limits and the interaction of these factors with the examinee proficiency levels on the occurrence of aberrant response patterns. The focus of the study was on the aberrant behaviors caused by rushed guessing due to restrictive time limits. Four different testing scenarios were examined; fixed length performance tests with and without content constraints, fixed length mastery tests and variable length mastery tests without content constraints. For each of these testing scenarios, the effect of two test lengths, five different timing conditions and the interaction between these factors with three ability levels on ability estimation were examined. For fixed and variable length mastery tests, decision accuracy was also looked at in addition to the estimation accuracy. Several indices were used to evaluate the estimation and decision accuracy for different testing conditions. The results showed that changing time limits had a significant impact on the occurrence of aberrant response patterns conditional on ability. Increasing test length had negligible if not negative effect on ability estimation when rushed guessing occured. In case of performance testing high ability examinees while in classification testing middle ability examinees suffered the most. The decision accuracy was considerably affected in case of variable length classification tests. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Rizavi, S. M.} } @article {370, title = {An EM approach to parameter estimation for the Zinnes and Griggs paired comparison IRT model}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {208-227}, abstract = {Borman et al. recently proposed a computer adaptive performance appraisal system called CARS II that utilizes paired comparison judgments of behavioral stimuli. To implement this approach,the paired comparison ideal point model developed by Zinnes and Griggs was selected. In this article,the authors describe item response and information functions for the Zinnes and Griggs model and present procedures for estimating stimulus and person parameters. Monte carlo simulations were conducted to assess the accuracy of the parameter estimation procedures. The results indicated that at least 400 ratees (i.e.,ratings) are required to obtain reasonably accurate estimates of the stimulus parameters and their standard errors. In addition,latent trait estimation improves as test length increases. The implications of these results for test construction are also discussed. }, keywords = {Adaptive Testing, Computer Assisted Testing, Item Response Theory, Maximum Likelihood, Personnel Evaluation, Statistical Correlation, Statistical Estimation}, author = {Stark, S. and F Drasgow} } @conference {223, title = {An empirical comparison of achievement level estimates from adaptive tests and paper-and-pencil tests}, booktitle = {annual meeting of the American Educational Research Association}, year = {2002}, address = {New Orleans, LA. USA}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G.} } @conference {972, title = {An empirical comparison of achievement level estimates from adaptive tests and paper-and-pencil tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 134 KB}}, address = {New Orleans LA}, author = {Kingsbury, G. G.} } @booklet {1588, title = {An empirical investigation of selected multi-stage testing design variables on test assembly and decision accuracy outcomes for credentialing exams (Center for Educational Assessment Research Report No 469)}, year = {2002}, address = {Amherst, MA: University of Massachusetts, School of Education.}, author = {Zenisky, A. L.} } @conference {1203, title = {Employing new ideas in CAT to a simulated reading test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 216 KB}}, address = {New Orleans LA}, author = {Thompson, T.} } @article {672, title = {{\'E}tude de la distribution d{\textquoteright}{\'e}chantillonnage de l{\textquoteright}estimateur du niveau d{\textquoteright}habilet{\'e} en testing adaptatif en fonction de deux r{\`e}gles d{\textquoteright}arr{\^e}t dans le contexte de l{\textquoteright}application du mod{\`e}le de Rasch [Study of the sampling distribution of the proficiecy estima}, journal = {Mesure et {\'e}valuation en {\'e}ducation}, volume = {24(2-3)}, year = {2002}, note = {(In French)}, pages = {23-40}, author = {Ra{\^\i}che, G. and Blais, J-G.} } @article {412, title = {Evaluation of selection procedures for computerized adaptive testing with polytomous items}, journal = {Applied Psychological Measurement}, volume = {26}, number = {4}, year = {2002}, note = {References .Sage Publications, US}, pages = {393-411}, abstract = {In the present study, a procedure that has been used to select dichotomous items in computerized adaptive testing was applied to polytomous items. This procedure was designed to select the item with maximum weighted information. In a simulation study, the item information function was integrated over a fixed interval of ability values and the item with the maximum area was selected. This maximum interval information item selection procedure was compared to a maximum point information item selection procedure. Substantial differences between the two item selection procedures were not found when computerized adaptive tests were evaluated on bias and the root mean square of the ability estimate. }, keywords = {computerized adaptive testing}, author = {van Rijn, P. W. and Theo Eggen and Hemker, B. T. and Sanders, P. F.} } @article {681, title = {Evaluation of selection procedures for computerized adaptive testing with polytomous items}, journal = {Applied Psychological Measurement}, volume = {26}, year = {2002}, pages = {393-411}, author = {van Rijn, P. W. and Theo Eggen and Hemker, B. T. and Sanders, P. F.} } @conference {1134, title = {An examination of decision-theory adaptive testing procedures}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 46 KB}}, address = {New Orleans, LA}, author = {Rudner, L. M.} } @booklet {1624, title = {An exploration of potentially problematic adaptive tests}, year = {2002}, note = {Princeton NJ: Educational Testing Service.}, address = {Research Report 02-05)}, author = {Stocking, M. and Steffen, M. Golub-Smith, M. L. and Eignor, D. R.} } @conference {1121, title = {The effect of test and examinee characteristics on the occurrence of aberrant response patterns in a computerized adaptive test}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {$\#$RI01-01}, address = {Seattle WA}, author = {Rizavi, S. and Swaminathan, H.} } @conference {1139, title = {Effective use of simulated data in an on-line item calibration in practical situations of computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, address = {Seattle WA}, author = {Samejima, F.} } @conference {880, title = {Effects of changes in the examinees{\textquoteright} ability distribution on the exposure control methods in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {$\#$CH01-02 {PDF file, 695 KB}}, address = {Seattle WA}, author = {Chang, S-W. and Twu, B.-Y.} } @conference {1138, title = {Efficient on-line item calibration using a nonparametric method adjusted to computerized adaptive testing}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Samejima, F.} } @article {9, title = {Evaluation of an MMPI-A short form: Implications for adaptive testing}, journal = {Journal of Personality Assessment}, volume = {76}, number = {1}, year = {2001}, pages = {76-89}, abstract = {Reports some psychometric properties of an MMPI-Adolescent version (MMPI-A; J. N. Butcher et al, 1992) short form based on administration of the 1st 150 items of this test instrument. The authors report results for both the MMPI-A normative sample of 1,620 adolescents (aged 14-18 yrs) and a clinical sample of 565 adolescents (mean age 15.2 yrs) in a variety of treatment settings. The authors summarize results for the MMPI-A basic scales in terms of Pearson product-moment correlations generated between full administration and short-form administration formats and mean T score elevations for the basic scales generated by each approach. In this investigation, the authors also examine single-scale and 2-point congruences found for the MMPI-A basic clinical scales as derived from standard and short-form administrations. The authors present the relative strengths and weaknesses of the MMPI-A short form and discuss the findings in terms of implications for attempts to shorten the item pool through the use of computerized adaptive assessment approaches. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Mean, Minnesota Multiphasic Personality Inventory, Psychometrics, Statistical Correlation, Statistical Samples, Test Forms}, author = {Archer, R. P. and Tirrell, C. A. and Elkins, D. E.} } @article {2124, title = {An examination of conditioning variables used in computer adaptive testing for DIF analyses}, journal = {Applied Measurement in Education}, volume = {14}, year = {2001}, pages = {3-16}, author = {Walker, C. M. and Beretvas, S. N and Ackerman, T. A.} } @conference {854, title = {An examination of item review on a CAT using the specific information item selection algorithm}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {PDF file, 325 KB}}, address = {Seattle WA}, author = {Bowles, R and Pommerich, M} } @conference {865, title = {An examination of item review on a CAT using the specific information item selection algorithm}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {PDF file, 325 K}, address = {Seattle WA}, author = {Bowles, R and Pommerich, M} } @booklet {1338, title = {An examination of item review on computer adaptive tests}, year = {2001}, address = {Manuscript in preparation, University of Virginia}, author = {Bowles, R} } @conference {1000, title = {An examination of item selection rules by stratified CAT designs integrated with content balancing methods}, booktitle = {Paper presented at the Annual Meeting of the American Educational Research Association}, year = {2001}, note = {{PDF file, 296 KB}}, address = {Seattle WA}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @conference {897, title = {An examination of testlet scoring and item exposure constraints in the Verbal Reasoning section of the MCAT}, year = {2001}, note = {{PDF file, 653 KB}}, author = {Davis, L. L. and Dodd, B. G.} } @booklet {1357, title = {An examination of testlet scoring and item exposure constraints in the verbal reasoning section of the MCAT}, year = {2001}, address = {MCAT Monograph Series: Association of American Medical Colleges}, author = {Davis, L. L. and Dodd, B. G.} } @article {36, title = {An examination of the comparative reliability, validity, and accuracy of performance ratings made using computerized adaptive rating scales}, journal = {Journal of Applied Psychology}, volume = {86}, number = {5}, year = {2001}, note = {214803450021-9010Journal ArticleValidation Studies}, pages = {965-973}, abstract = {This laboratory research compared the reliability, validity, and accuracy of a computerized adaptive rating scale (CARS) format and 2 relatively common and representative rating formats. The CARS is a paired-comparison rating task that uses adaptive testing principles to present pairs of scaled behavioral statements to the rater to iteratively estimate a ratee{\textquoteright}s effectiveness on 3 dimensions of contextual performance. Videotaped vignettes of 6 office workers were prepared, depicting prescripted levels of contextual performance, and 112 subjects rated these vignettes using the CARS format and one or the other competing format. Results showed 23\%-37\% lower standard errors of measurement for the CARS format. In addition, validity was significantly higher for the CARS format (d = .18), and Cronbach{\textquoteright}s accuracy coefficients showed significantly higher accuracy, with a median effect size of .08. The discussion focuses on possible reasons for the results.}, keywords = {*Computer Simulation, *Employee Performance Appraisal, *Personnel Selection, Adult, Automatic Data Processing, Female, Human, Male, Reproducibility of Results, Sensitivity and Specificity, Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Task Performance and Analysis, Video Recording}, author = {Borman, W. C. and Buck, D. E. and Hanson, M. A. and Motowidlo, S. J. and Stark, S. and F Drasgow} } @booklet {1440, title = {Effects of item-selection criteria on classification testing with the sequential probability ratio test (Research Report 2000-8)}, year = {2000}, note = {$\#$LI00-8}, address = {Iowa City, IA: American College Testing}, author = {Lin, C.-J. and Spray, J. A.} } @conference {837, title = {Effects of nonequivalence of item pools on ability estimates in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, note = {PDF file, 657 K}, address = {New Orleans LA}, author = {Ban, J. C. and Wang, T. and Yi, Q. and Harris, D. J.} } @article {179, title = {Emergence of item response modeling in instrument development and data analysis}, journal = {Medical Care}, volume = {38}, number = {Suppl. 9}, year = {2000}, pages = {II60-II65}, keywords = {Computer Assisted Testing, Health, Item Response Theory, Measurement, Statistical Validity computerized adaptive testing, Test Construction, Treatment Outcomes}, author = {Hambleton, R. K.} } @article {205, title = {Estimating Item Parameters from Classical Indices for Item Pool Development with a Computerized Classification Test. }, number = {Research Report 2000-4}, year = {2000}, institution = {ACT, Inc.}, address = {Iowa City, Iowa}, author = {Huang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J. A.} } @booklet {1344, title = {Estimating item parameters from classical indices for item pool development with a computerized classification test (ACT Research 2000-4)}, year = {2000}, address = {Iowa City IA, ACT, Inc}, author = {Chang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J.} } @booklet {1409, title = {Estimating item parameters from classical indices for item pool development with a computerized classification test (Research Report 2000-4)}, year = {2000}, address = {Iowa City IA: ACT Inc}, author = {Huang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J.} } @article {74, title = {Estimation of trait level in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {24}, number = {3}, year = {2000}, pages = {257-265}, abstract = {Notes that in computerized adaptive testing (CAT), a examinee{\textquoteright}s trait level (θ) must be estimated with reasonable accuracy based on a small number of item responses. A successful implementation of CAT depends on (1) the accuracy of statistical methods used for estimating θ and (2) the efficiency of the item-selection criterion. Methods of estimating θ suitable for CAT are reviewed, and the differences between Fisher and Kullback-Leibler information criteria for selecting items are discussed. The accuracy of different CAT algorithms was examined in an empirical study. The results show that correcting θ estimates for bias was necessary at earlier stages of CAT, but most CAT algorithms performed equally well for tests of 10 or more items. (PsycINFO Database Record (c) 2005 APA )}, keywords = {(Statistical), Adaptive Testing, Computer Assisted Testing, Item Analysis, Statistical Estimation computerized adaptive testing}, author = {Cheng, P. E. and Liou, M.} } @article {494, title = {ETS finds flaws in the way online GRE rates some students}, journal = {Chronicle of Higher Education}, volume = {47}, year = {2000}, pages = {a47}, author = {Carlson, S.} } @conference {898, title = {An examination of exposure control and content balancing restrictions on item selection in CATs using the partial credit model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2000}, address = {New Orleans, LA}, author = {Davis, L. L. and Pastor, D. A. and Dodd, B. G. and Chiang, C. and Fitzpatrick, S.} } @article {41, title = {An examination of the reliability and validity of performance ratings made using computerized adaptive rating scales}, journal = {Dissertation Abstracts International: Section B: The Sciences and Engineering}, volume = {61}, number = {1-B}, year = {2000}, pages = {570}, abstract = {This study compared the psychometric properties of performance ratings made using recently-developed computerized adaptive rating scales (CARS) to the psyc hometric properties of ratings made using more traditional paper-and-pencil rati ng formats, i.e., behaviorally-anchored and graphic rating scales. Specifically, the reliability, validity and accuracy of the performance ratings from each for mat were examined. One hundred twelve participants viewed six 5-minute videotape s of office situations and rated the performance of a target person in each vide otape on three contextual performance dimensions-Personal Support, Organizationa l Support, and Conscientious Initiative-using CARS and either behaviorally-ancho red or graphic rating scales. Performance rating properties were measured using Shrout and Fleiss{\textquoteright}s intraclass correlation (2, 1), Borman{\textquoteright}s differential accurac y measure, and Cronbach{\textquoteright}s accuracy components as indexes of rating reliability, validity, and accuracy, respectively. Results found that performance ratings mad e using the CARS were significantly more reliable and valid than performance rat ings made using either of the other formats. Additionally, CARS yielded more acc urate performance ratings than the paper-and-pencil formats. The nature of the C ARS system (i.e., its adaptive nature and scaling methodology) and its paired co mparison judgment task are offered as possible reasons for the differences found in the psychometric properties of the performance ratings made using the variou s rating formats. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Performance Tests, Rating Scales, Reliability, Test, Test Validity}, author = {Buck, D. E.} } @article {364, title = {An exploratory analysis of item parameters and characteristics that influence item level response time}, journal = {Dissertation Abstracts International Section A: Humanities and Social Sciences}, volume = {61}, number = {5-A}, year = {2000}, pages = {1812}, abstract = {This research examines the relationship between item level response time and (1) item discrimination, (2) item difficulty, (3) word count, (4) item type, and (5) whether a figure is included in an item. Data are from the Graduate Management Admission Test, which is currently offered only as a computerized adaptive test. Analyses revealed significant differences in response time between the five item types: problem solving, data sufficiency, sentence correction, critical reasoning, and reading comprehension. For this reason, the planned pairwise and complex analyses were run within each item type. Pairwise curvilinear regression analyses explored the relationship between response time and item discrimination, item difficulty, and word count. Item difficulty significantly contributed to the prediction of response time for each item type; two of the relationships were significantly quadratic. Item discrimination significantly contributed to the prediction of response time for only two of the item types; one revealed a quadratic relationship and the other a cubic relationship. Word count had significant linear relationship with response time for all the item types except reading comprehension, for which there was no significant relationship. Multiple regression analyses using word count, item difficulty, and item discrimination predicted between 35.4\% and 71.4\% of the variability in item response time across item types. The results suggest that response time research should consider the type of item that is being administered and continue to explore curvilinear relationships between response time and its predictor variables. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Item Analysis (Statistical), Item Response Theory, Problem Solving, Reaction Time, Reading Comprehension, Reasoning}, author = {Smith, Russell Winsor} } @article {220, title = {The effect of model misspecification on classification decisions made using a computerized test}, journal = {Journal of Educational Measurement}, volume = {36}, number = {1}, year = {1999}, note = {National Council on Measurement in Education, US}, pages = {47-59}, abstract = {Many computerized testing algorithms require the fitting of some item response theory (IRT) model to examinees{\textquoteright} responses to facilitate item selection, the determination of test stopping rules, and classification decisions. Some IRT models are thought to be particularly useful for small volume certification programs that wish to make the transition to computerized adaptive testing (CAT). The 1-parameter logistic model (1-PLM) is usually assumed to require a smaller sample size than the 3-parameter logistic model (3-PLM) for item parameter calibrations. This study examined the effects of model misspecification on the precision of the decisions made using the sequential probability ratio test. For this comparison, the 1-PLM was used to estimate item parameters, even though the items{\textquoteright} characteristics were represented by a 3-PLM. Results demonstrate that the 1-PLM produced considerably more decision errors under simulation conditions similar to a real testing environment, compared to the true model and to a fixed-form standard reference set of items. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Kalohn, J.C. and Spray, J. A.} } @article {665, title = {The effects of test difficulty manipulation in computerized adaptive testing and self-adapted testing}, journal = {Applied Measurement in Education}, volume = {12}, year = {1999}, pages = {167-184}, author = {Ponsoda, V. and Olea, J. and Rodriguez, M. S. and Revuelta, J.} } @article {740, title = {Empirical initialization of the trait estimator in adaptive testing}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, note = {[Error correction in 23, 248]}, pages = {21-29}, author = {van der Linden, W. J.} } @conference {996, title = {An enhanced stratified computerized adaptive testing design}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1999}, note = {{PDF file, 478 KB}}, address = {Montreal, Canada}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @article {234, title = {Evaluating the usefulness of computerized adaptive testing for medical in-course assessment}, journal = {Academic Medicine}, volume = {74}, number = {10}, year = {1999}, note = {Kreiter, C DFerguson, KGruppen, L DUnited statesAcademic medicine : journal of the Association of American Medical CollegesAcad Med. 1999 Oct;74(10):1125-8.}, month = {Oct}, pages = {1125-8}, edition = {1999/10/28}, abstract = {PURPOSE: This study investigated the feasibility of converting an existing computer-administered, in-course internal medicine test to an adaptive format. METHOD: A 200-item internal medicine extended matching test was used for this research. Parameters were estimated with commercially available software with responses from 621 examinees. A specially developed simulation program was used to retrospectively estimate the efficiency of the computer-adaptive exam format. RESULTS: It was found that the average test length could be shortened by almost half with measurement precision approximately equal to that of the full 200-item paper-and-pencil test. However, computer-adaptive testing with this item bank provided little advantage for examinees at the upper end of the ability continuum. An examination of classical item statistics and IRT item statistics suggested that adding more difficult items might extend the advantage to this group of examinees. CONCLUSIONS: Medical item banks presently used for incourse assessment might be advantageously employed in adaptive testing. However, it is important to evaluate the match between the items and the measurement objective of the test before implementing this format.}, keywords = {*Automation, *Education, Medical, Undergraduate, Educational Measurement/*methods, Humans, Internal Medicine/*education, Likelihood Functions, Psychometrics/*methods, Reproducibility of Results}, isbn = {1040-2446 (Print)}, author = {Kreiter, C. D. and Ferguson, K. and Gruppen, L. D.} } @conference {1233, title = {An examination of conditioning variables in DIF analysis in a computer adaptive testing environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Walker, C. M. and Ackerman, T. A.} } @article {808, title = {Examinee judgments of changes in item difficulty: Implications for item review in computerized adaptive testing}, journal = {Applied Measurement in Education}, volume = {12}, year = {1999}, pages = {185-198}, author = {Wise, S. L. and Finney, S. J., and Enders, C. K. and Freeman, S.A. and Severance, D.D.} } @booklet {1348, title = {Exploring the relationship between item exposure rate and test overlap rate in computerized adaptive testing}, year = {1999}, note = {(Also ACT Research Report 99-5). (Also presented at American Educational Research Association, 1999)}, address = {Paper presented at the annual meeting of the National Council on Measurement in Education, Montreal, Canada}, author = {Chen, S. and Ankenmann, R. D. and Spray, J. A.} } @booklet {1349, title = {Exploring the relationship between item exposure rate and test overlap rate in computerized adaptive testing (ACT Research Report series 99-5)}, year = {1999}, note = {(also National Council on Measurement in Education paper, 1999).}, address = {Iowa City IA: ACT, Inc}, author = {Chen, S-Y. and Ankenmann, R. D. and Spray, J. A.} } @article {177, title = {The effect of item pool restriction on the precision of ability measurement for a Rasch-based CAT: comparisons to traditional fixed length examinations}, journal = {J Outcome Meas}, volume = {2}, number = {2}, year = {1998}, note = {983263801090-655xJournal Article}, pages = {97-122}, abstract = {This paper describes a method for examining the precision of a computerized adaptive test with a limited item pool. Standard errors of measurement ascertained in the testing of simulees with a CAT using a restricted pool were compared to the results obtained in a live paper-and-pencil achievement testing of 4494 nursing students on four versions of an examination of calculations of drug administration. CAT measures of precision were considered when the simulated examine pools were uniform and normal. Precision indices were also considered in terms of the number of CAT items required to reach the precision of the traditional tests. Results suggest that regardless of the size of the item pool, CAT provides greater precision in measurement with a smaller number of items administered even when the choice of items is limited but fails to achieve equiprecision along the entire ability continuum.}, keywords = {*Decision Making, Computer-Assisted, Comparative Study, Computer Simulation, Education, Nursing, Educational Measurement/*methods, Human, Models, Statistical, Psychometrics/*methods}, author = {Halkitis, P. N.} } @conference {962, title = {Effect of item selection on item exposure rates within a computerized classification test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego CA}, author = {Kalohn, J.C. and Spray, J. A.} } @conference {1297, title = {An empirical Bayes approach to Mantel-Haenszel DIF analysis: Theoretical development and application to CAT data}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Zwick, R.} } @conference {1242, title = {Essentially unbiased Bayesian estimates in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1998}, address = {San Diego}, author = {Wang, T. and Lau, C. and Hanson, B. A.} } @conference {889, title = {Evaluating and insuring measurement precision in adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Davey, T. and Nering, M. L.} } @conference {1182, title = {Evaluation of methods for the use of underutilized items in a CAT environment}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Steffen, M. and Liu, M.} } @conference {1187, title = {An examination of item-level response times from an operational CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {Urbana IL}, author = {Swygert, K.} } @conference {1168, title = {Expected losses for individuals in Computerized Mastery Testing}, booktitle = {Paper presented at the annual meeting of National Council on Measurement in Education}, year = {1998}, address = {San Diego}, author = {Smith, R. and Lewis, C.} } @article {822, title = {The effect of adaptive administration on the variability of the Mantel-Haenszel measure of differential item functioning}, journal = {Educational and Psychological Measurement}, volume = {57}, year = {1997}, pages = {412-421}, author = {Zwick, R.} } @article {67, title = {The effect of population distribution and method of theta estimation on computerized adaptive testing (CAT) using the rating scale model}, journal = {Educational \& Psychological Measurement}, volume = {57}, number = {3}, year = {1997}, note = {Sage Publications, US}, pages = {422-439}, abstract = {Investigated the effect of population distribution on maximum likelihood estimation (MLE) and expected a posteriori estimation (EAP) in a simulation study of computerized adaptive testing (CAT) based on D. Andrich{\textquoteright}s (1978) rating scale model. Comparisons were made among MLE and EAP with a normal prior distribution and EAP with a uniform prior distribution within 2 data sets: one generated using a normal trait distribution and the other using a negatively skewed trait distribution. Descriptive statistics, correlations, scattergrams, and accuracy indices were used to compare the different methods of trait estimation. The EAP estimation with a normal prior or uniform prior yielded results similar to those obtained with MLE, even though the prior did not match the underlying trait distribution. An additional simulation study based on real data suggested that more work is needed to determine the optimal number of quadrature points for EAP in CAT based on the rating scale model. The choice between MLE and EAP for particular measurement situations is discussed. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Chen, S-K. and Hou, L. Y. and Fitzpatrick, S. J. and Dodd, B. G.} } @article {501, title = {The effect of population distribution and methods of theta estimation on computerized adaptive testing (CAT) using the rating scale model}, journal = {Educational and Psychological Measurement}, volume = {57}, year = {1997}, pages = {422-439}, author = {Chen, S. and Hou, L. and Fitzpatrick, S. J. and Dodd, B.} } @conference {1147, title = {The effects of motivation on equating adaptive and conventional tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Segall, D. O.} } @inbook {1885, title = {Equating the CAT-ASVAB}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation (pp. 181-198). Washington DC: American Psychological Association.}, author = {Segall, D. O.} } @conference {1239, title = {Essentially unbiased EAP estimates in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1997}, note = {$\#$WA97-01 PDF file, 225 K}, address = {Chicago}, author = {Wang, T.} } @article {472, title = {Evaluating an automatically scorable, open-ended response type for measuring mathematical reasoning in computer-adaptive tests}, year = {1997}, author = {Bennett, R. E. and Steffen, M. and Singley, M.K. and Morley, M. and Jacquemin, D.} } @conference {1241, title = {Evaluating comparability in computerized adaptive testing: A theoretical framework with an example}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1997}, address = {Chicago}, author = {Wang, T. and Kolen, M. J.} } @inbook {1792, title = {Evaluating item calibration medium in computerized adaptive testing}, year = {1997}, address = {W.A. Sands, B.K. Waters and J.R. McBride, Computerized adaptive testing: From inquiry to operation (pp. 161-168). Washington, DC: American Psychological Association.}, author = {Hetter, R. D. and Segall, D. O. and Bloxom, B. M.} } @conference {1264, title = {Examinee issues in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, note = {[ERIC ED 408 329]}, address = {Chicago IL}, author = {Wise, S. L.} } @conference {826, title = {Effect of altering passing score in CAT when unidimensionality is violated}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, month = {April}, address = {New York NY}, author = {Abdel-Fattah, A. A. and Lau, CA and Spray, J. A.} } @article {149, title = {The effect of individual differences variables on the assessment of ability for Computerized Adaptive Testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {57}, number = {6-B}, year = {1996}, pages = {4085}, abstract = {Computerized Adaptive Testing (CAT) continues to gain momentum as the accepted testing modality for a growing number of certification, licensure, education, government and human resource applications. However, the developers of these tests have for the most part failed to adequately explore the impact of individual differences such as test anxiety on the adaptive testing process. It is widely accepted that non-cognitive individual differences variables interact with the assessment of ability when using written examinations. Logic would dictate that individual differences variables would equally affect CAT. Two studies were used to explore this premise. In the first study, 507 examinees were given a test anxiety survey prior to taking a high stakes certification exam using CAT or using a written format. All examinees had already completed their course of study, and the examination would be their last hurdle prior to being awarded certification. High test anxious examinees performed worse than their low anxious counterparts on both testing formats. The second study replicated the finding that anxiety depresses performance in CAT. It also addressed the differential effect of anxiety on within test performance. Examinees were candidates taking their final certification examination following a four year college program. Ability measures were calculated for each successive part of the test for 923 subjects. Within subject performance varied depending upon test position. High anxious examinees performed poorly at all points in the test, while low and medium anxious examinee performance peaked in the middle of the test. If test anxiety and performance measures were actually the same trait, then low anxious individuals should have performed equally well throughout the test. The observed interaction of test anxiety and time on task serves as strong evidence that test anxiety has motivationally mediated as well as cognitively mediated effects. The results of the studies are di (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Gershon, R. C.} } @conference {1221, title = {Effects of answer feedback and test anxiety on the psychometric and motivational characteristics of computer-adaptive and self-adaptive vocabulary tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education.}, year = {1996}, note = {$\#$VI96-01}, author = {Vispoel, W. P. and Brunsman, B. and Forte, E. and Bleiler, T.} } @conference {1226, title = {Effects of answer review and test anxiety on the psychometric and motivational characteristics of computer-adaptive and self-adaptive vocabulary tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York}, author = {Vispoel, W. and Forte, E. and Boo, J.} } @conference {952, title = {The effects of methods of theta estimation, prior distribution, and number of quadrature points on CAT using the graded response model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York NY}, author = {Hou, L. and Chen, S. and Dodd. B. G. and Fitzpatrick, S. J.} } @book {1686, title = {The effects of person misfit in computerized adaptive testing}, year = {1996}, address = {Unpublished doctoral dissertation, University of Minnesota, Minneapolis}, author = {Nering, M. L.} } @conference {921, title = {Effects of randomesque item selection on CAT item exposure rates and proficiency estimation under 1- and 2-PL models}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York}, author = {Featherman, C. M. and Subhiyah, R. G. and Hadadi, A.} } @conference {1116, title = {An evaluation of a two-stage testlet design for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1996}, address = {Banff, Alberta, Canada}, author = {Reese, L. M. and Schnipke, D. L.} } @conference {916, title = {The effect of ability estimation for polytomous CAT in different item selection procedures}, booktitle = {Paper presented at the Annual meeting of the Psychometric Society}, year = {1995}, address = {Minneapolis MN}, author = {Fan, M. and Hsu, Y.} } @conference {825, title = {The effect of model misspecification on classification decisions made using a computerized test: UIRT versus MIRT}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1995}, note = {$\#$AB95-01}, address = {Minneapolis MN}, author = {Abdel-Fattah, A. A. and Lau, C.-M. A.} } @conference {1174, title = {The effect of model misspecification on classification decisions made using a computerized test: 3-PLM vs. 1PLM (and UIRT versus MIRT)}, booktitle = {Paper presented at the Annual Meeting of the Psychometric Society}, year = {1995}, note = {$\#$SP95-01}, address = {Minneapolis, MN}, author = {Spray, J. A. and Kalohn, J.C. and Schulz, M. and Fleer, P. Jr.} } @conference {882, title = {The effect of population distribution and methods of theta estimation on CAT using the rating scale model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, address = {San Francisco}, author = {Chen, S. and Hou, L. and Fitzpatrick, S. J. and Dodd, B. G.} } @article {2126, title = {Effect of Rasch calibration on ability and DIF estimation in computer-adaptive tests}, journal = {Journal of Educational Measurement}, volume = {32}, year = {1995}, pages = {341-363}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @article {685, title = {Effects and underlying mechanisms of self-adapted testing}, journal = {Journal of Educational Psychology}, volume = {87}, year = {1995}, pages = {103-116}, author = {Rocklin, T. R. and O{\textquoteright}Donnell, A. M. and Holst, P. M.} } @conference {1146, title = {The effects of item compromise on computerized adaptive test scores}, booktitle = {Paper presented at the meeting of the Society for Industrial and Organizational Psychology}, year = {1995}, address = {Orlando, FL}, author = {Segall, D. O.} } @book {1694, title = {El control de la exposicin de los items en tests adaptativos informatizados [Item exposure control in computerized adaptive tests]}, year = {1995}, address = {Unpublished master{\textquoteright}s dissertation, Universidad Autonma de Madrid, Spain}, author = {Revuelta, J.} } @conference {1036, title = {Equating computerized adaptive certification examinations: The Board of Registry series of studies}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco}, author = {Lunz, M. E. and Bergstrom, Betty A.} } @conference {1145, title = {Equating the CAT-ASVAB: Experiences and lessons learned}, booktitle = {Paper presented at the meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco}, author = {Segall, D. O.} } @conference {1153, title = {Equating the CAT-ASVAB: Issues and approach}, booktitle = {Paper presented at the meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco}, author = {Segall, D. O. and Carter, G.} } @conference {1051, title = {Equating the computerized adaptive edition of the Differential Aptitude Tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education. San Francisco}, year = {1995}, address = {CA}, author = {J. R. McBride} } @conference {1190, title = {Estimation of item difficulty from restricted CAT calibration samples}, booktitle = {Paper presented at the annual conference of the National Council on Measurement in Education in San Francisco.}, year = {1995}, author = {Sykes, R. and Ito, K.} } @booklet {1405, title = {An evaluation of alternative concepts for administering the Armed Services Vocational Aptitude Battery to applicants for enlistment}, year = {1995}, address = {DMDC Technical Report 95-013. Monterey, CA: Personnel Testing Division, Defense Manpower Data Center}, author = {Hogan, P.F. and J. R. McBride and Curran, L. T.} } @conference {1049, title = {Early psychometric research in the CAT-ASVAB Project}, booktitle = {Paper presented at the 102nd Annual Convention of the American Psychological Association. Los Angeles}, year = {1994}, address = {CA}, author = {J. R. McBride} } @conference {955, title = {The effect of restricting ability distributions in the estimation of item difficulties: Implications for a CAT implementation}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans}, author = {Ito, K. and Sykes, R.C.} } @article {717, title = {The effect of review on the psychometric characteristics of computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {7}, year = {1994}, pages = {211-222}, author = {Stone, G. E. and Lunz, M. E.} } @article {630, title = {The effect of review on the psychometric characteristics of computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {7(3)}, year = {1994}, pages = {211-222}, author = {Lunz, M. E. and Stone, G. E.} } @article {376, title = {The effect of review on the psychometric characterstics of computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {7}, number = {3}, year = {1994}, note = {Lawrence Erlbaum, US}, pages = {211-222}, abstract = {Explored the effect of reviewing items and altering responses on examinee ability estimates, test precision, test information, decision confidence, and pass/fail status for computerized adaptive tests. Two different populations of examinees took different computerized certification examinations. For purposes of analysis, each population was divided into 3 ability groups (high, medium, and low). Ability measures before and after review were highly correlated, but slightly lower decision confidence was found after review. Pass/fail status was most affected for examinees with estimates close to the pass point. Decisions remained the same for 94\% of the examinees. Test precision is only slightly affected by review, and the average information loss can be recovered by the addition of one item. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {Stone, G. E. and Lunz, M. E.} } @book {1635, title = {Effects of computerized adaptive test anxiety on nursing licensure examinations}, year = {1994}, address = {Dissertation Abstracts International, A (Humanities and Social Sciences), 54 (9-A), 3410}, author = {Arrowwood, V. E.} } @conference {944, title = {The effects of item pool depth on the accuracy of pass/fail decisions for NCLEX using CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans}, author = {Haynie, K.A. and Way, W. D.} } @article {262, title = {An empirical study of computerized adaptive test administration conditions}, journal = {Journal of Educational Measurement}, volume = {31}, number = {3}, year = {1994}, month = {Fal}, pages = {251-263}, author = {Lunz, M. E. and Bergstrom, Betty A.} } @inbook {25, title = {The equivalence of Rasch item calibrations and ability estimates across modes of administration}, booktitle = {Objective measurement: Theory into practice}, volume = {2}, year = {1994}, pages = {122-128}, publisher = {Ablex Publishing Co.}, organization = {Ablex Publishing Co.}, address = {Norwood, N.J. USA}, keywords = {computerized adaptive testing}, author = {Bergstrom, Betty A. and Lunz, M. E.} } @conference {914, title = {Establishing the comparability of the NCLEX using CAT with traditional NCLEX examinations}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans, LA}, author = {Eignor, D. R. and Way, W. D. and Amoss, K.E.} } @conference {887, title = {Evaluation and implementation of CAT-ASVAB}, booktitle = {Paper presented at the annual meeting of the American Psychological Association}, year = {1994}, address = {Los Angeles}, author = {Curran, L. T. and Wise, L. L.} } @book {1692, title = {The exploration of an alternative method for scoring computer adaptive tests}, year = {1994}, address = {Unpublished doctoral dissertation, Lincoln NE: University of Nebraska}, author = {Potenza, M.} } @conference {1229, title = {The efficiency, reliability, and concurrent validity of adaptive and fixed-item tests of music listening skills}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta GA}, author = {Vispoel, W. P. and Wang, T. and Bleiler, T.} } @conference {1115, title = {Establishing time limits for the GRE computer adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta GA}, author = {Reese, C. M.} } @article {265, title = {The effect of review on student ability and test efficiency for computerized adaptive tests}, journal = {Applied Psychological Measurement}, volume = {16}, number = {1}, year = {1992}, note = {Sage Publications, US}, pages = {33-40}, abstract = {220 students were randomly assigned to a review condition for a medical technology test; their test instructions indicated that each item must be answered when presented, but that the responses could be reviewed and altered at the end of the test. A sample of 492 students did not have the opportunity to review and alter responses. Within the review condition, examinee ability estimates before and after review were correlated .98. The average efficiency of the test was decreased by 1\% after review. Approximately 32\% of the examinees improved their ability estimates after review but did not change their pass/fail status. Disallowing review on adaptive tests administered under these rules is not supported by these data. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {Lunz, M. E. and Bergstrom, Betty A. and Wright, Benjamin D.} } @article {2024, title = {The Effect of Review on Student Ability and Test Efficiency for Computerized Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {16}, year = {1992}, pages = {33-40}, author = {Lunz, M. E. and Berstrom, B.A. and Wright, B. D.} } @conference {950, title = {Effects of feedback during self-adapted testing on estimates of ability}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco}, author = {Holst, P. M. and O{\textquoteright}Donnell, A. M. and Rocklin, T. R.} } @conference {1129, title = {The effects of feedback in computerized adaptive and self-adapted tests}, booktitle = {Paper presented at the annual meeting of the NMCE}, year = {1992}, address = {San Francisco}, author = {Roos, L. L. and Plake, B. S. and Wise, S. L.} } @conference {981, title = {Estimation of ability level by using only observable quantities in adaptive testing}, booktitle = {Paper presented at the annual meeting if the American Educational Research Association}, year = {1992}, address = {Chicago}, author = {Kirisci, L.} } @inbook {1850, title = {Evaluation of alternative operational concepts}, year = {1992}, address = {Proceedings of the 34th Annual Conference of the Military Testing Association. San Diego, CA: Navy Personnel Research and Development Center.}, author = {J. R. McBride and Hogan, P.F.} } @conference {1126, title = {An empirical comparison of self-adapted and maximum information item selection}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, address = {Chicago}, author = {Rocklin, T. R. and O{\textquoteright}Donnell, A. M.} } @article {2021, title = {The Effect of Item Selection Procedure and Stepsize on Computerized Adaptive Attitude Measurement Using the Rating Scale Model}, journal = {Applied Psychological Measurement}, volume = {14}, year = {1990}, pages = {355-366}, author = {Dodd, B. G.} } @article {103, title = {The effect of item selection procedure and stepsize on computerized adaptive attitude measurement using the rating scale model}, journal = {Applied Psychological Measurement}, volume = {14}, number = {4}, year = {1990}, pages = {355-386}, abstract = {Real and simulated datasets were used to investigate the effects of the systematic variation of two major variables on the operating characteristics of computerized adaptive testing (CAT) applied to instruments consisting of poly- chotomously scored rating scale items. The two variables studied were the item selection procedure and the stepsize method used until maximum likelihood trait estimates could be calculated. The findings suggested that (1) item pools that consist of as few as 25 items may be adequate for CAT; (2) the variable stepsize method of preliminary trait estimation produced fewer cases of nonconvergence than the use of a fixed stepsize procedure; and (3) the scale value item selection procedure used in conjunction with a minimum standard error stopping rule outperformed the information item selection technique used in conjunction with a minimum information stopping rule in terms of the frequencies of nonconvergent cases, the number of items administered, and the correlations of CAT 0 estimates with full scale estimates and known 0 values. The implications of these findings for implementing CAT with rating scale items are discussed. Index terms: }, author = {Dodd, B. G.} } @article {564, title = {The effects of variable entry on bias and information of the Bayesian adaptive testing procedure}, journal = {Educational and Psychological Measurement}, volume = {50}, year = {1990}, pages = {785-802}, author = {Hankins, J. A.} } @conference {843, title = {An empirical study of the computer adaptive MMPI-2}, booktitle = {Paper presented at the 25th Annual Symposium on recent developments in the MMPI/MMPI-2}, year = {1990}, address = {Minneapolis MN}, author = {Ben-Porath, Y. S. and Roper, B. L. and Butcher, J. N.} } @article {2018, title = {Estimating Reliabilities of Computerized Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {145-149}, author = {Divgi, D. R.} } @book {1640, title = {{\'E}tude de praticabilit{\'e} du testing adaptatif de ma{\^\i}trise des apprentissages scolaires au Qu{\'e}bec : une exp{\'e}rimentation en {\'e}ducation {\'e}conomique secondaire 5}, year = {1989}, note = {[In French]}, address = {Th{\`e}se de doctorat non publi{\'e}e. Montr{\'e}al : Universit{\'e} du Qu{\'e}bec {\`a} Montr{\'e}al. [In French]}, author = {Auger, R.} } @conference {551, title = {EXSPRT: An expert systems approach to computer-based adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, volume = {San Francisco.}, year = {1989}, author = {Frick, T. W. and Plew, G.T. and Luk, H.-K.} } @booklet {1458, title = {The equivalence of scores from automated and conventional educational and psychological tests (College Board Report No. 88-8)}, year = {1988}, address = {New York: The College Entrance Examination Board.}, author = {Mazzeo, J. and Harvey, A. L.} } @article {366, title = {The effect of item parameter estimation error on decisions made using the sequential probability ratio test}, number = {Research Report 87-1}, year = {1987}, institution = {DTIC Document}, address = {Iowa City, IA. USA}, keywords = {computerized adaptive testing, Sequential probability ratio test}, author = {Spray, J. A. and Reckase, M. D.} } @booklet {1607, title = {The effect of item parameter estimation error on the decisions made using the sequential probability ratio test (ACT Research Report Series 87-17)}, year = {1987}, address = {Iowa City IA: American College Testing}, author = {Spray, J. A. and Reckase, M. D.} } @book {1673, title = {The effects of variable entry on bias and information of the Bayesian adaptive testing procedure}, year = {1987}, address = {Dissertation Abstracts International, 47 (8A), 3013}, author = {Hankins, J. A.} } @conference {1055, title = {Equating the computerized adaptive edition of the Differential Aptitude Tests}, booktitle = {Paper presented at the meeting of the American Psychological Association}, year = {1987}, address = {New York}, author = {J. R. McBride and Corpe, V. A. and Wing, H.} } @booklet {1629, title = {Equivalent-groups versus single-group equating designs for the Accelerated CAT-ASVAB Project (Research Memorandum 87-6)}, year = {1987}, address = {Alexandria VA: Center for Naval Analyses}, author = {Stoloff, P. H.} } @article {609, title = {The effects of computer experience on computerized adaptive test performance}, journal = {Educational and Psychological Measurement}, volume = {46}, year = {1986}, pages = {727-733}, author = {Lee, J. A.} } @article {559, title = {Equivalence of conventional and computer presentation of speed tests}, journal = {Applied Psychological Measurement}, volume = {10}, year = {1986}, pages = {23-34}, author = {Greaud, V. A., and Green, B. F.} } @article {374, title = {Equivalence of scores from computerized adaptive and paper-and-pencil ASVAB tests}, number = {CNR 113}, year = {1985}, pages = {100}, institution = {Center for Naval Analysis}, address = {Alexandria, VA. USA}, author = {Stoloff, P. H.} } @booklet {1451, title = {Efficiency and precision in two-stage adaptive testing}, year = {1984}, address = {West Palm Beach Florida: Eastern ERA}, author = {Loyd, B.H.} } @booklet {1400, title = {Evaluation of computerized adaptive testing of the ASVAB}, year = {1984}, address = {San Diego, CA: Navy Personnel Research and Development Center, unpublished manuscript}, author = {Hardwicke, S. and Vicino, F. and J. R. McBride and Nemeth, C.} } @conference {1218, title = {An evaluation of the utility of large scale computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1984}, address = {Chicago}, author = {Vicino, F. L. and Hardwicke, S. B.} } @conference {1217, title = {An evaluation of the utility of large scale computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1984}, address = {New Orleans LA}, author = {Vicino, F. L. and Hardwicke, S. B.} } @booklet {1389, title = {Evaluation plan for the computerized adaptive vocational aptitude battery (Research Report 82-1)}, year = {1984}, note = {Baltimore MD: The Johns Hopkins University, Department of Psychology.}, author = {Green, B. F. and Bock, R. D. and Humphreys, L. G. and Linn, R. L. and Reckase, M. D.} } @book {1681, title = {Effects of item parameter error and other factors on trait estimation in latent trait based adaptive testing}, year = {1983}, note = {Dissertation Abstracts International, 44(3-B), 944.}, address = {Unpublished doctoral dissertation, University of Minnesota}, author = {Mattson, J. D.} } @booklet {1474, title = {An evaluation of one- and three-parameter logistic tailored testing procedures for use with small item pools (Research Report ONR83-1)}, year = {1983}, address = {Iowa City IA: American College Testing Program}, author = {McKinley, R. L. and Reckase, M. D.} } @book {1660, title = {Effect of error in item parameter estimates on adaptive testing (Doctoral dissertation, University of Minnesota)}, year = {1981}, note = {(University Microfilms No. AAD81-25946)}, address = {Dissertation Abstracts International, 42, 06-B}, author = {Crichton, L. I.} } @article {2010, title = {The Effects of Item Calibration Sample Size and Item Pool Size on Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {5}, year = {1981}, pages = {11-19}, author = {Ree, M. J.} } @booklet {1494, title = {Effects of computerized adaptive testing on Black and White students (Research Report 79-2)}, year = {1980}, note = {{PDF file, 2.323 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Pine, S. M. and Church, A. T. and Gialluca, K. A. and Weiss, D. J.} } @conference {1089, title = {Effects of program parameters and item pool characteristics on the bias of a three-parameter tailored testing procedure}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1980}, note = {A.}, address = {Boston MA, U}, author = {Patience, W. M. and Reckase, M. D.} } @booklet {1427, title = {An empirical study of a broad range test of verbal ability}, year = {1980}, address = {Princeton NJ: Educational Testing Service}, author = {Kreitzberg, C. B. and Jones, D. J.} } @conference {1192, title = {Estimating the reliability of adaptive tests from a single test administration}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1980}, note = {1981 draft copy available.) {PDF File, 7,603 KB}}, address = {Boston}, author = {Sympson, J. B.} } @article {1967, title = {Efficiency of an adaptive inter-subtest branching strategy in the measurement of classroom achievement (Research Report 79-6)}, year = {1979}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Gialluca, K. A. and Weiss, D. J.} } @booklet {1462, title = {An evaluation of computerized adaptive testing}, year = {1979}, address = {In Proceedings of the 21st Military Testing Association Conference. SanDiego, CA: Navy Personnel Research and Development Center.}, author = {J. R. McBride} } @article {516, title = {Evaluation of implied orders as a basis for tailored testing with simulation data}, journal = {Applied Psychological Measurement}, volume = {3}, year = {1979}, pages = {495-514}, author = {Cliff, N. A. and McCormick, D.} } @article {2008, title = {Evaluation of Implied Orders as a Basis for Tailored Testing with Simulation Data}, journal = {Applied Psychological Measurement}, volume = {3}, year = {1979}, pages = {495-514}, author = {N. Cliff and Cudeck, R. and McCormick, D. J.} } @booklet {1353, title = {Evaluations of implied orders as a basis for tailored testing using simulations (Technical Report No. 4)}, year = {1978}, note = {$\#$CL77-04}, address = {Los Angeles CA: University of Southern California, Department of Psychology.}, author = {Cliff, N. A. and Cudeck, R. and McCormick, D.} } @article {2006, title = {Effects of Immediate Knowledge of Results and Adaptive Testing on Ability Test Performance}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {259-266}, author = {Betz, N. E.} } @article {481, title = {Effects of immediate knowledge of results and adaptive testing on ability test performance}, journal = {Applied Psychological Measurement}, volume = {2}, year = {1977}, pages = {259-266}, author = {Betz, N. E.} } @inbook {1974, title = {Effects of Knowledge of Results and Varying Proportion Correct on Ability Test Performance and Psychological Variables}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Prestwood, J. S.} } @inbook {1746, title = {An empirical evaluation of implied orders as a basis for tailored testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Cliff, N. A. and Cudeck, R. and McCormick, D.} } @article {2004, title = {An Empirical Investigation of the Stratified Adaptive Computerized Testing Model}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {141-152}, author = {B. K. Waters} } @inbook {1902, title = {Estimation of latent trait status in adaptive testing}, year = {1977}, note = {28 MB}}, address = {D. J. Weiss (Ed.), Applications of computerized testing (Research Report 77-1). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Sympson, J. B.} } @conference {1106, title = {The effect of item pool characteristics on the operation of a tailored testing procedure}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1976}, address = {Murray Hill NJ}, author = {Reckase, M. D.} } @inbook {1784, title = {Effectiveness of the ancillary estimation procedure}, year = {1976}, note = {{PDF file, 252 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 103-106). Washington DC: U.S. Government Printing Office.}, author = {Gugel, J. F. and Schmidt, F. L. and Urry, V. W.} } @booklet {1333, title = {Effects of immediate knowledge of results and adaptive testing on ability test performance (Research Report 76-3)}, year = {1976}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Betz, N. E. and Weiss, D. J.} } @booklet {1352, title = {Elements of a basic test theory generalizable to tailored testing}, year = {1976}, address = {Unpublished manuscript}, author = {Cliff, N. A.} } @inbook {1943, title = {An empirical investigation of Weiss{\textquoteright} stradaptive testing model}, year = {1976}, note = {$\#$WA75-01 {PDF file, 576 KB}}, address = {C. L. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 54-63.). Washington DC: U. S. Civil Service Commission.}, author = {B. K. Waters} } @mastersthesis {2239, title = {An exploratory studyof the efficiency of the flexilevel testing procedure}, volume = {Doctoral}, year = {1976}, school = {University of Toronto}, address = {Toronto, Canada}, author = {Seguin, S. P.} } @conference {1105, title = {The effect of item choice on ability estimation when using a simple logistic tailored testing model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1975}, address = {Washington, D.C.}, author = {Reckase, M. D.} } @booklet {1332, title = {Empirical and simulation studies of flexilevel ability testing (Research Report 75-3)}, year = {1975}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Betz, N. E. and Weiss, D. J.} } @booklet {1430, title = {An empirical comparison of two-stage and pyramidal ability testing (Research Report 75-1)}, year = {1975}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Larkin, K. C. and Weiss, D. J.} } @inbook {1900, title = {Evaluating the results of computerized adaptive testing}, year = {1975}, note = {{PDF file, 446 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 26-31. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Sympson, J. B.} } @booklet {1429, title = {An empirical investigation of computer-administered pyramidal ability testing (Research Report 74-3)}, year = {1974}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Larkin, K. C. and Weiss, D. J.} } @conference {2225, title = {An empirical investigation of the stability and accuracy of flexilevel tests}, booktitle = {Annual meeting of the National Council on Measurement in Education}, year = {1974}, month = {03/1074}, address = {Chicago IL}, author = {Kocher, A.T.} } @book {1717, title = {An empirical investigation of the stratified adaptive computerized testing model for the measurement of human ability}, year = {1974}, note = {$\#$WA74-01}, address = {Unpublished Ph.D. dissertation, Florida State University}, author = {B. K. Waters} } @book {1687, title = {An evaluation of the self-scoring flexilevel testing model}, year = {1974}, address = {Unpublished dissertation, Florida State University. Dissertation Abstracts International, 35 (7-A), 4257}, author = {Olivier, P.} } @mastersthesis {2228, title = {An evaluation of the self-scoring flexilevel testing model}, year = {1974}, school = {Florida State University}, type = {Ph.D. Dissertation}, author = {Olivier, P.} } @booklet {1330, title = {An empirical study of computer-administered two-stage ability testing (Research Report 73-4)}, year = {1973}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Betz, N. E. and Weiss, D. J.} } @article {815, title = {The efficacy of tailored testing}, journal = {Educational Research}, volume = {11}, year = {1969}, pages = {219-222}, author = {Wood, R. L.} } @article {513, title = {An exploratory study of programmed tests}, journal = {Educational and Psychological Measurement}, volume = {28}, year = {1969}, pages = {345-360}, author = {Cleary, T. A. and Linn, R. L. and Rock, D. A.} } @booklet {1319, title = {An exploratory study of branching tests (Technical Research Note 188)}, year = {1967}, address = {Washington DC: US Army Behavioral Science Research Laboratory. (NTIS No. AD 655263)}, author = {Bayroff, A. G. and Seeley, L. C.} } @book {1690, title = {An evaluation of the sequential method of testing}, year = {1962}, note = {$\#$PA62-1 University Microfilms Number 63-1748.}, address = {Unpublished doctoral dissertation, Michigan State University}, author = {Paterson, J. J.} } @booklet {1599, title = {Exploratory study of a sequential item test}, year = {1962}, address = {U.S. Army Personnel Research Office, Technical Research Note 129.}, author = {Seeley, L. C. and Morton, M. A. and Anderson, A. A.} } @article {437, title = { An empirical study of the applicability of sequential analysis to item selection}, volume = {13}, year = {1953}, pages = {3-13}, author = {Anastasi, A.} }