@article {2724, title = {Application of Dimension Reduction to CAT Item Selection Under the Bifactor Model}, journal = {Applied Psychological Measurement}, volume = {43}, number = {6}, year = {2019}, pages = {419-434}, abstract = {Multidimensional computerized adaptive testing (MCAT) based on the bifactor model is suitable for tests with multidimensional bifactor measurement structures. Several item selection methods that proved to be more advantageous than the maximum Fisher information method are not practical for bifactor MCAT due to time-consuming computations resulting from high dimensionality. To make them applicable in bifactor MCAT, dimension reduction is applied to four item selection methods, which are the posterior-weighted Fisher D-optimality (PDO) and three non-Fisher information-based methods{\textemdash}posterior expected Kullback{\textendash}Leibler information (PKL), continuous entropy (CE), and mutual information (MI). They were compared with the Bayesian D-optimality (BDO) method in terms of estimation precision. When both the general and group factors are the measurement objectives, BDO, PDO, CE, and MI perform equally well and better than PKL. When the group factors represent nuisance dimensions, MI and CE perform the best in estimating the general factor, followed by the BDO, PDO, and PKL. How the bifactor pattern and test length affect estimation accuracy was also discussed.}, doi = {10.1177/0146621618813086}, url = {https://doi.org/10.1177/0146621618813086}, author = {Xiuzhen Mao and Jiahui Zhang and Tao Xin} } @article {2602, title = {Application of Binary Searching for Item Exposure Control in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {41}, number = {7}, year = {2017}, pages = {561-576}, abstract = {Cognitive diagnosis has emerged as a new generation of testing theory for educational assessment after the item response theory (IRT). One distinct feature of cognitive diagnostic models (CDMs) is that they assume the latent trait to be discrete instead of continuous as in IRT. From this perspective, cognitive diagnosis bears a close resemblance to searching problems in computer science and, similarly, item selection problem in cognitive diagnostic computerized adaptive testing (CD-CAT) can be considered as a dynamic searching problem. Previously, item selection algorithms in CD-CAT were developed from information indices in information science and attempted to achieve a balance among several objectives by assigning different weights. As a result, they suffered from low efficiency from a tug-of-war competition among multiple goals in item selection and, at the same time, put an undue responsibility of assigning the weights for these goals by trial and error on users. Based on the searching problem perspective on CD-CAT, this article adapts the binary searching algorithm, one of the most well-known searching algorithms in searching problems, to item selection in CD-CAT. The two new methods, the stratified dynamic binary searching (SDBS) algorithm for fixed-length CD-CAT and the dynamic binary searching (DBS) algorithm for variable-length CD-CAT, can achieve multiple goals without any of the aforementioned issues. The simulation studies indicate their performances are comparable or superior to the previous methods.}, doi = {10.1177/0146621617707509}, url = {https://doi.org/10.1177/0146621617707509}, author = {Chanjin Zheng and Chun Wang} } @conference {2668, title = {The Development of a Web-Based CAT in China}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Cognitive ability assessment has been widely used as the recruitment tool in hiring potential employees. Traditional cognitive ability tests have been encountering threats from item-exposures and long time for answering. Especially in China, campus recruitment thinks highly of short answering time and anti-cheating. Beisen, as the biggest native online assessment software provider, developed a web-based CAT for cognitive ability which assessing verbal, quantitative, logical and spatial ability in order to decrease answering times, improve assessment accuracy and reduce threats from cheating and faking in online ability test. The web-based test provides convenient testing for examinees who can access easily to the test via internet just by login the test website at any time and any place through any Internet-enabled devices (e.g., laptops, IPADs, and smart phones).

We designed the CAT following strategies of establishing item bank, setting starting point, item selection, scoring and terminating. Additionally, we pay close attention to administrating the test via web. For the CAT procedures, we employed online calibration for establishing a stable and expanding item bank, and integrated maximum Fisher information, \α-stratified strategy and randomization for item selection and coping with item exposures. Fixed-length and variable-length strategies were combined in terminating the test. For fulfilling the fluid web-based testing, we employed cloud computing techniques and designed each computing process subtly. Distributed computation was used to process scoring which executes EAP and item selecting at high speed. Caching all items to the servers in advance helps shortening the process of loading items to examinees\’ terminal equipment. Horizontally scalable cloud servers function coping with great concurrency. The massive computation in item selecting was conversed to searching items from an information matrix table.

We examined the average accuracy, bank usage and computing performance in the condition of laboratory and real testing. According to a test for almost 28000 examinees, we found that bank usage is averagely 50\%, and that 80\% tests terminate at test information of 10 and averagely at 9.6. In context of great concurrency, the testing is unhindered and the process of scoring and item selection only takes averagely 0.23s for each examiner.

Session Video

}, keywords = {China, Web-Based CAT}, author = {Chongli Liang and Danjun Wang and Dan Zhou and Peida Zhan} } @article {2580, title = {Dual-Objective Item Selection Criteria in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {54}, number = {2}, year = {2017}, pages = {165{\textendash}183}, abstract = {The development of cognitive diagnostic-computerized adaptive testing (CD-CAT) has provided a new perspective for gaining information about examinees{\textquoteright} mastery on a set of cognitive attributes. This study proposes a new item selection method within the framework of dual-objective CD-CAT that simultaneously addresses examinees{\textquoteright} attribute mastery status and overall test performance. The new procedure is based on the Jensen-Shannon (JS) divergence, a symmetrized version of the Kullback-Leibler divergence. We show that the JS divergence resolves the noncomparability problem of the dual information index and has close relationships with Shannon entropy, mutual information, and Fisher information. The performance of the JS divergence is evaluated in simulation studies in comparison with the methods available in the literature. Results suggest that the JS divergence achieves parallel or more precise recovery of latent trait variables compared to the existing methods and maintains practical advantages in computation and item pool usage.}, issn = {1745-3984}, doi = {10.1111/jedm.12139}, url = {http://dx.doi.org/10.1111/jedm.12139}, author = {Kang, Hyeon-Ah and Zhang, Susu and Chang, Hua-Hua} } @conference {2663, title = {From Blueprints to Systems: An Integrated Approach to Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

For years, test blueprints have told test developers how many items and what types of items will be included in a test. Adaptive testing adopted this approach from paper testing, and it is reasonably useful. Unfortunately, \&$\#$39;how many items and what types of items\&$\#$39; are not all the elements one should consider when choosing items for an adaptive test. To fill in gaps, practitioners have developed tools to allow an adaptive test to behave appropriately (i.e. examining exposure control, content balancing, item drift procedures, etc.). Each of these tools involves the use of a separate process external to the primary item selection process.

The use of these subsidiary processes makes item selection less optimal and makes it difficult to prioritize aspects of selection. This discussion describes systems-based adaptive testing. This approach uses metadata concerning items, test takers and test elements to select items. These elements are weighted by the stakeholders to shape an expanded blueprint designed for adaptive testing.\ 

Session Video

}, keywords = {CAT, integrated approach, Keynote}, url = {https://drive.google.com/open?id=1CBaAfH4ES7XivmvrMjPeKyFCsFZOpQMJ}, author = {Gage Kingsbury and Tony Zara} } @article {2608, title = {The Information Product Methods: A Unified Approach to Dual-Purpose Computerized Adaptive Testing}, journal = {Applied Psychological MeasurementApplied Psychological Measurement}, volume = {42}, year = {2017}, month = {2018/06/01}, pages = {321 - 324}, abstract = {This article gives a brief summary of major approaches in dual-purpose computerized adaptive testing (CAT) in which the test is tailored interactively to both an examinee?s overall ability level, ?, and attribute mastery level, α. It also proposes an information product approach whose connections to the current methods are revealed. An updated comprehensive empirical study demonstrated that the information product approach not only can offer a unified framework to connect all other approaches but also can mitigate the weighting issue in the dual-information approach.}, isbn = {0146-6216}, url = {https://doi.org/10.1177/0146621617730392}, author = {Zheng, Chanjin and He, Guanrui and Gao, Chunlei} } @conference {2646, title = {Item Pool Design and Evaluation}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Early work on CAT tended to use existing sets of items which came from fixed length test forms. These sets of items were selected to meet much different requirements than are needed for a CAT; decision making or covering a content domain. However, there was also some early work that suggested having items equally distributed over the range of proficiency that was of interest or concentrated at a decision point. There was also some work that showed that there was bias in proficiency estimates when an item pool was too easy or too hard. These early findings eventually led to work on item pool design and, more recently, on item pool evaluation. This presentation gives a brief overview of these topics to give some context for the following presentations in this symposium.

Session Video

}, keywords = {CAT, Item Pool Design}, url = {https://drive.google.com/open?id=1ZAsqm1yNZlliqxEHcyyqQ_vOSu20xxZs}, author = {Mark D Reckase and Wei He and Jing-Ru Xu and Xuechun Zhou} } @conference {2665, title = {Multi-stage Testing for a Multi-disciplined End-of primary-school Test }, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The Dutch secondary education system consists of five levels: basic, lower, and middle vocational education, general secondary education, and pre-academic education. The individual decision for level of secondary education is based on a combination of the teacher\’s judgment and an end-of-primaryschool placement test.

This placement test encompasses the measurement of reading, language, mathematics and writing; each skill consisting of one to four subdomains. The Dutch end-of-primaryschool test is currently administered in two linear 200-item paper-based versions. The two versions differ in difficulty so as to motivate both less able and more able students, and measure both groups of students precisely. The primary goal of the test is providing a placement advice for five levels of secondary education. The secondary goal is the assessment of six different fundamental reference levels defined on reading, language, and mathematics. Because of the high stakes advice of the test, the Dutch parliament has instructed to change the format to a multistage test. A major advantage of multistage testing is that the tailoring of the tests is more strongly related to the ability of the students than to the teacher\’s judgment. A separate multistage test is under development for each of the three skills measured by the reference levels to increase the classification accuracy for secondary education placement and to optimally measure the performance on the reference-level-related skills.

This symposium consists of three presentations discussing the challenges in transitioning from a linear paper-based test to a computer-based multistage test within an existing curriculum and the specification of the multistage test to meet the measurement purposes. The transitioning to a multistage test has to improve both classification accuracy and measurement precision.

First, we describe the Dutch educational system and the role of the end-of-primary-school placement test within this system. Special attention will be paid to the advantages of multistage testing over both linear testing and computerized adaptive testing, and on practical implications related to the transitioning from a linear to a multistage test.

Second, we discuss routing and reporting on the new multi-stage test. Both topics have a major impact on the quality of the placement advice and the reference mastery decisions. Several methods for routing and reporting are compared.

Third, the linear test contains 200 items to cover a broad range of different skills and to obtain a precise measurement of those skills separately. Multistage testing creates opportunities to reduce the cognitive burden for the students while maintaining the same quality of placement advice and assessment of mastering of reference levels. This presentation focuses on optimal allocation of items to test modules, optimal number of stages and modules per stage and test length reduction.

Session Video 1

Session Video 2

}, keywords = {mst, Multidisciplined, proficiency}, url = {https://drive.google.com/open?id=1C5ys178p_Wl9eemQuIsI56IxDTck2z8P}, author = {Hendrik Straat and Maaike van Groen and Wobbe Zijlstra and Marie-Anne Keizer-Mittelha{\"e}user and Michel Lamor{\'e}} } @conference {2648, title = {New Challenges (With Solutions) and Innovative Applications of CAT}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Over the past several decades, computerized adaptive testing (CAT) has profoundly changed the administration of large-scale aptitude tests, state-wide achievement tests, professional licensure exams, and health outcome measures. While many challenges of CAT have been successfully addressed due to the continual efforts of researchers in the field, there are still many remaining, longstanding challenges that have yet to be resolved. This symposium will begin with three presentations, each of which provides a sound solution to one of the unresolved challenges. They are (1) item calibration when responses are \“missing not at random\” from CAT administration; (2) online calibration of new items when person traits have non-ignorable measurement error; (3) establishing consistency and asymptotic normality of latent trait estimation when allowing item response revision in CAT. In addition, this symposium also features innovative applications of CAT. In particular, there is emerging interest in using cognitive diagnostic CAT to monitor and detect learning progress (4th presentation). Last but not least, the 5th presentation illustrates the power of multidimensional polytomous CAT that permits rapid identification of hospitalized patients\’ rehabilitative care needs in\ health outcomes measurement. We believe this symposium covers a wide range of interesting and important topics in CAT.

Session Video

}, keywords = {CAT, challenges, innovative applications}, url = {https://drive.google.com/open?id=1Wvgxw7in_QCq_F7kzID6zCZuVXWcFDPa}, author = {Chun Wang and David J. Weiss and Xue Zhang and Jian Tao and Yinhong He and Ping Chen and Shiyu Wang and Susu Zhang and Haiyan Lin and Xiaohong Gao and Hua-Hua Chang and Zhuoran Shang} } @article {2507, title = {Exploration of Item Selection in Dual-Purpose Cognitive Diagnostic Computerized Adaptive Testing: Based on the RRUM}, journal = {Applied Psychological Measurement}, volume = {40}, number = {8}, year = {2016}, pages = {625-640}, abstract = {Cognitive diagnostic computerized adaptive testing (CD-CAT) can be divided into two broad categories: (a) single-purpose tests, which are based on the subject{\textquoteright}s knowledge state (KS) alone, and (b) dual-purpose tests, which are based on both the subject{\textquoteright}s KS and traditional ability level ( ). This article seeks to identify the most efficient item selection method for the latter type of CD-CAT corresponding to various conditions and various evaluation criteria, respectively, based on the reduced reparameterized unified model (RRUM) and the two-parameter logistic model of item response theory (IRT-2PLM). The Shannon entropy (SHE) and Fisher information methods were combined to produce a new synthetic item selection index, that is, the {\textquotedblleft}dapperness with information (DWI){\textquotedblright} index, which concurrently considers both KS and within one step. The new method was compared with four other methods. The results showed that, in most conditions, the new method exhibited the best performance in terms of KS estimation and the second-best performance in terms of estimation. Item utilization uniformity and computing time are also considered for all the competing methods.}, doi = {10.1177/0146621616666008}, url = {http://apm.sagepub.com/content/40/8/625.abstract}, author = {Dai, Buyun and Zhang, Minqiang and Li, Guangming} } @article {2506, title = {High-Efficiency Response Distribution{\textendash}Based Item Selection Algorithms for Short-Length Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {40}, number = {8}, year = {2016}, pages = {608-624}, abstract = {Cognitive diagnostic computerized adaptive testing (CD-CAT) purports to obtain useful diagnostic information with great efficiency brought by CAT technology. Most of the existing CD-CAT item selection algorithms are evaluated when test length is fixed and relatively long, but some applications of CD-CAT, such as in interim assessment, require to obtain the cognitive pattern with a short test. The mutual information (MI) algorithm proposed by Wang is the first endeavor to accommodate this need. To reduce the computational burden, Wang provided a simplified scheme, but at the price of scale/sign change in the original index. As a result, it is very difficult to combine it with some popular constraint management methods. The current study proposes two high-efficiency algorithms, posterior-weighted cognitive diagnostic model (CDM) discrimination index (PWCDI) and posterior-weighted attribute-level CDM discrimination index (PWACDI), by modifying the CDM discrimination index. They can be considered as an extension of the Kullback{\textendash}Leibler (KL) and posterior-weighted KL (PWKL) methods. A pre-calculation strategy has also been developed to address the computational issue. Simulation studies indicate that the newly developed methods can produce results comparable with or better than the MI and PWKL in both short and long tests. The other major advantage is that the computational issue has been addressed more elegantly than MI. PWCDI and PWACDI can run as fast as PWKL. More importantly, they do not suffer from the problem of scale/sign change as MI and, thus, can be used with constraint management methods together in a straightforward manner.}, doi = {10.1177/0146621616665196}, url = {http://apm.sagepub.com/content/40/8/608.abstract}, author = {Zheng, Chanjin and Chang, Hua-Hua} } @article {2487, title = {Monitoring Items in Real Time to Enhance CAT Security}, journal = {Journal of Educational Measurement}, volume = {53}, number = {2}, year = {2016}, pages = {131{\textendash}151}, abstract = {An IRT-based sequential procedure is developed to monitor items for enhancing test security. The procedure uses a series of statistical hypothesis tests to examine whether the statistical characteristics of each item under inspection have changed significantly during CAT administration. This procedure is compared with a previously developed CTT-based procedure through simulation studies. The results show that when the total number of examinees is fixed both procedures can control the rate of type I errors at any reasonable significance level by choosing an appropriate cutoff point and meanwhile maintain a low rate of type II errors. Further, the IRT-based method has a much lower type II error rate or more power than the CTT-based method when the number of compromised items is small (e.g., 5), which can be achieved if the IRT-based procedure can be applied in an active mode in the sense that flagged items can be replaced with new items.}, issn = {1745-3984}, doi = {10.1111/jedm.12104}, url = {http://dx.doi.org/10.1111/jedm.12104}, author = {Zhang, Jinming and Li, Jie} } @article {2493, title = {Online Calibration of Polytomous Items Under the Generalized Partial Credit Model}, journal = {Applied Psychological Measurement}, volume = {40}, number = {6}, year = {2016}, pages = {434-450}, abstract = {Online calibration is a technology-enhanced architecture for item calibration in computerized adaptive tests (CATs). Many CATs are administered continuously over a long term and rely on large item banks. To ensure test validity, these item banks need to be frequently replenished with new items, and these new items need to be pretested before being used operationally. Online calibration dynamically embeds pretest items in operational tests and calibrates their parameters as response data are gradually obtained through the continuous test administration. This study extends existing formulas, procedures, and algorithms for dichotomous item response theory models to the generalized partial credit model, a popular model for items scored in more than two categories. A simulation study was conducted to investigate the developed algorithms and procedures under a variety of conditions, including two estimation algorithms, three pretest item selection methods, three seeding locations, two numbers of score categories, and three calibration sample sizes. Results demonstrated acceptable estimation accuracy of the two estimation algorithms in some of the simulated conditions. A variety of findings were also revealed for the interacted effects of included factors, and recommendations were made respectively.}, doi = {10.1177/0146621616650406}, url = {http://apm.sagepub.com/content/40/6/434.abstract}, author = {Zheng, Yi} } @article {2354, title = {On-the-Fly Assembled Multistage Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {39}, number = {2}, year = {2015}, pages = {104-118}, abstract = {

Recently, multistage testing (MST) has been adopted by several important large-scale testing programs and become popular among practitioners and researchers. Stemming from the decades of history of computerized adaptive testing (CAT), the rapidly growing MST alleviates several major problems of earlier CAT applications. Nevertheless, MST is only one among all possible solutions to these problems. This article presents a new adaptive testing design, \“on-the-fly assembled multistage adaptive testing\” (OMST), which combines the benefits of CAT and MST and offsets their limitations. Moreover, OMST also provides some unique advantages over both CAT and MST. A simulation study was conducted to compare OMST with MST and CAT, and the results demonstrated the promising features of OMST. Finally, the \“Discussion\” section provides suggestions on possible future adaptive testing designs based on the OMST framework, which could provide great flexibility for adaptive tests in the digital future and open an avenue for all types of hybrid designs based on the different needs of specific tests.

}, doi = {10.1177/0146621614544519}, url = {http://apm.sagepub.com/content/39/2/104.abstract}, author = {Zheng, Yi and Chang, Hua-Hua} } @article {2350, title = {Determining the Overall Impact of Interruptions During Online Testing}, journal = {Journal of Educational Measurement}, volume = {51}, number = {4}, year = {2014}, pages = {419{\textendash}440}, abstract = {

With an increase in the number of online tests, interruptions during testing due to unexpected technical issues seem unavoidable. For example, interruptions occurred during several recent state tests. When interruptions occur, it is important to determine the extent of their impact on the examinees\’ scores. There is a lack of research on this topic due to the novelty of the problem. This article is an attempt to fill that void. Several methods, primarily based on propensity score matching, linear regression, and item response theory, were suggested to determine the overall impact of the interruptions on the examinees\’ scores. A realistic simulation study shows that the suggested methods have satisfactory Type I error rate and power. Then the methods were applied to data from the Indiana Statewide Testing for Educational Progress-Plus (ISTEP+) test that experienced interruptions in 2013. The results indicate that the interruptions did not have a significant overall impact on the student scores for the ISTEP+ test.

}, issn = {1745-3984}, doi = {10.1111/jedm.12052}, url = {http://dx.doi.org/10.1111/jedm.12052}, author = {Sinharay, Sandip and Wan, Ping and Whitaker, Mike and Kim, Dong-In and Zhang, Litong and Choi, Seung W.} } @article {2349, title = {An Enhanced Approach to Combine Item Response Theory With Cognitive Diagnosis in Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {51}, number = {4}, year = {2014}, pages = {358{\textendash}380}, abstract = {

Computerized adaptive testing offers the possibility of gaining information on both the overall ability and cognitive profile in a single assessment administration. Some algorithms aiming for these dual purposes have been proposed, including the shadow test approach, the dual information method (DIM), and the constraint weighted method. The current study proposed two new methods, aggregate ranked information index (ARI) and aggregate standardized information index (ASI), which appropriately addressed the noncompatibility issue inherent in the original DIM method. More flexible weighting schemes that put different emphasis on information about general ability (i.e., \θ in item response theory) and information about cognitive profile (i.e., \α in cognitive diagnostic modeling) were also explored. Two simulation studies were carried out to investigate the effectiveness of the new methods and weighting schemes. Results showed that the new methods with the flexible weighting schemes could produce more accurate estimation of both overall ability and cognitive profile than the original DIM. Among them, the ASI with both empirical and theoretical weights is recommended, and attribute-level weighting scheme is preferred if some attributes are considered more important from a substantive perspective.

}, issn = {1745-3984}, doi = {10.1111/jedm.12057}, url = {http://dx.doi.org/10.1111/jedm.12057}, author = {Wang, Chun and Zheng, Chanjin and Chang, Hua-Hua} } @article {2321, title = {A Sequential Procedure for Detecting Compromised Items in the Item Pool of a CAT System}, journal = {Applied Psychological Measurement}, volume = {38}, number = {2}, year = {2014}, pages = {87-104}, abstract = {

To maintain the validity of a continuous testing system, such as computerized adaptive testing (CAT), items should be monitored to ensure that the performance of test items has not gone through any significant changes during their lifetime in an item pool. In this article, the author developed a sequentially monitoring procedure based on a series of statistical hypothesis tests to examine whether the statistical characteristics of individual items have changed significantly during test administration. Simulation studies show that under the simulated setting, by choosing an appropriate cutoff point, the procedure can control the rate of Type I errors at any reasonable significance level and meanwhile, has a very low rate of Type II errors.

}, doi = {10.1177/0146621613510062}, url = {http://apm.sagepub.com/content/38/2/87.abstract}, author = {Zhang, Jinming} } @article {2316, title = {Estimating Measurement Precision in Reduced-Length Multi-Stage Adaptive Testing }, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2013}, pages = {67-87}, doi = {10.7333/1309-0104067}, author = {Crotts, K.M. and Zenisky, A. L. and Sireci, S.G. and Li, X.} } @conference {2098, title = {A Paradigm for Multinational Adaptive Testing}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

Impact of Issues in \“Exported\” Adaptive Testing

Goal is construct equivalency in the new environment

Research Questions

}, keywords = {CAT, multinational adaptive testing}, author = {A Zara} } @inbook {2057, title = {Designing and Implementing a Multistage Adaptive Test: The Uniform CPA Exam}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {167-190}, chapter = {9}, doi = {10.1007/978-0-387-85461-8}, author = {Melican, G.J. and Breithaupt, K and Zhang, Y.} } @inbook {2065, title = {The Investigation of Differential Item Functioning in Adaptive Tests}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {331-352}, chapter = {17}, doi = {10.1007/978-0-387-85461-8}, author = {Zwick, R.} } @inbook {2066, title = {Multistage Testing: Issues, Designs, and Research}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {355-372}, chapter = {18}, doi = {10.1007/978-0-387-85461-8}, author = {Zenisky, A. L. and Hambleton, R. K. and Luecht, RM} } @booklet {323, title = {Validation of a computer-adaptive test to evaluate generic health-related quality of life}, journal = {Health and Quality of Life Outcomes}, volume = {8}, year = {2010}, note = {Rebollo, PabloCastejon, IgnacioCuervo, JesusVilla, GuillermoGarcia-Cueto, EduardoDiaz-Cuervo, HelenaZardain, Pilar CMuniz, JoseAlonso, JordiSpanish CAT-Health Research GroupEnglandHealth Qual Life Outcomes. 2010 Dec 3;8:147.}, pages = {147}, edition = {2010/12/07}, abstract = {BACKGROUND: Health Related Quality of Life (HRQoL) is a relevant variable in the evaluation of health outcomes. Questionnaires based on Classical Test Theory typically require a large number of items to evaluate HRQoL. Computer Adaptive Testing (CAT) can be used to reduce tests length while maintaining and, in some cases, improving accuracy. This study aimed at validating a CAT based on Item Response Theory (IRT) for evaluation of generic HRQoL: the CAT-Health instrument. METHODS: Cross-sectional study of subjects aged over 18 attending Primary Care Centres for any reason. CAT-Health was administered along with the SF-12 Health Survey. Age, gender and a checklist of chronic conditions were also collected. CAT-Health was evaluated considering: 1) feasibility: completion time and test length; 2) content range coverage, Item Exposure Rate (IER) and test precision; and 3) construct validity: differences in the CAT-Health scores according to clinical variables and correlations between both questionnaires. RESULTS: 396 subjects answered CAT-Health and SF-12, 67.2\% females, mean age (SD) 48.6 (17.7) years. 36.9\% did not report any chronic condition. Median completion time for CAT-Health was 81 seconds (IQ range = 59-118) and it increased with age (p < 0.001). The median number of items administered was 8 (IQ range = 6-10). Neither ceiling nor floor effects were found for the score. None of the items in the pool had an IER of 100\% and it was over 5\% for 27.1\% of the items. Test Information Function (TIF) peaked between levels -1 and 0 of HRQoL. Statistically significant differences were observed in the CAT-Health scores according to the number and type of conditions. CONCLUSIONS: Although domain-specific CATs exist for various areas of HRQoL, CAT-Health is one of the first IRT-based CATs designed to evaluate generic HRQoL and it has proven feasible, valid and efficient, when administered to a broad sample of individuals attending primary care settings.}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Rebollo, P. and Castejon, I. and Cuervo, J. and Villa, G. and Garcia-Cueto, E. and Diaz-Cuervo, H. and Zardain, P. C. and Muniz, J. and Alonso, J.} } @article {2102, title = {Computer Adaptive-Attribute Testing A New Approach to Cognitive Diagnostic Assessment}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216}, year = {2008}, pages = {29-39}, abstract = {

The influence of interdisciplinary forces stemming from developments in cognitive science,mathematical statistics, educational
psychology, and computing science are beginning to appear in educational and psychological assessment. Computer adaptive-attribute testing (CA-AT) is one example. The concepts and procedures in CA-AT can be found at the intersection between computer adaptive testing and cognitive diagnostic assessment. CA-AT allows us to fuse the administrative benefits of computer adaptive testing with the psychological benefits of cognitive diagnostic assessment to produce an innovative psychologically-based adaptive testing approach. We describe the concepts behind CA-AT as well as illustrate how it can be used to promote formative, computer-based, classroom assessment.

}, keywords = {cognition and assessment, cognitive diagnostic assessment, computer adaptive testing}, doi = {10.1027/0044-3409.216.1.29}, author = {Gierl, M. J. and Zhou, J.} } @inbook {1962, title = {Computerized attribute-adaptive testing: A new computerized adaptive testing approach incorporating cognitive psychology}, year = {2007}, note = {{PDF file, 296 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Zhou, J. and Gierl, M. J. and Cui, Y.} } @article {2199, title = {Detecting Differential Speededness in Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {44}, number = {2}, year = {2007}, pages = {117{\textendash}130}, abstract = {

A potential undesirable effect of multistage testing is differential speededness, which happens if some of the test takers run out of time because they receive subtests with items that are more time intensive than others. This article shows how a probabilistic response-time model can be used for estimating differences in time intensities and speed between subtests and test takers and detecting differential speededness. An empirical data set for a multistage test in the computerized CPA Exam was used to demonstrate the procedures. Although the more difficult subtests appeared to have items that were more time intensive than the easier subtests, an analysis of the residual response times did not reveal any significant differential speededness because the time limit appeared to be appropriate. In a separate analysis, within each of the subtests, we found minor but consistent patterns of residual times that are believed to be due to a warm-up effect, that is, use of more time on the initial items than they actually need.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2007.00030.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2007.00030.x}, author = {van der Linden, Wim J. and Breithaupt, Krista and Chuah, Siang Chee and Zhang, Yanwei} } @article {583, title = {Prospective evaluation of the am-pac-cat in outpatient rehabilitation settings}, journal = {Physical Therapy}, volume = {87}, year = {2007}, pages = {385-398}, author = {Jette, A., and Haley, S. and Tao, W. and Ni, P. and Moed, R. and Meyers, D. and Zurek, M.} } @article {2216, title = {Comparison of the Psychometric Properties of Several Computer-Based Test Designs for Credentialing Exams With Multiple Purposes}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {203-220}, doi = {10.1207/s15324818ame1903_3}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1903_3}, author = {Jodoin, Michael G. and Zenisky, April and Hambleton, Ronald K.} } @conference {2220, title = {The effectiveness of using multiple item pools in computerized adaptive testing}, booktitle = {Annual meeting of the National Council on Measurement in Education }, year = {2005}, month = {04/2005}, address = {Montreal, Canada}, author = {Zhang, J. and Chang, H.} } @conference {1285, title = {Identifying practical indices for enhancing item pool security}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education (NCME)}, year = {2005}, address = {Montreal, Canada}, author = {Yi, Q. and Zhang, J. and Chang, Hua-Hua} } @conference {2135, title = {Rescuing CAT by fixing the problems}, booktitle = {National Council on Measurement in Education}, year = {2005}, address = {Montreal, Canada}, author = {Chang, S-H. and Zhang, J.} } @book {1725, title = {Evaluating the effects of several multi-stage testing design variables on selected psychometric outcomes for certification and licensure assessment}, year = {2004}, address = {Unpublished doctoral dissertation, University of Massachusetts, Amherst}, author = {Zenisky, A. L.} } @conference {1290, title = {Investigating the effects of selected multi-stage test design alternatives on credentialing outcomes}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {{PDF file, 129 KB}}, address = {San Diego CA}, author = {Zenisky, A. L. and Hambleton, R. K.} } @booklet {200, title = {Practical methods for dealing with {\textquoteright}not applicable{\textquoteright} item responses in the AMC Linear Disability Score project}, journal = {Health and Quality of Life Outcomes}, volume = {2}, year = {2004}, note = {Holman, RebeccaGlas, Cees A WLindeboom, RobertZwinderman, Aeilko Hde Haan, Rob JEnglandHealth Qual Life Outcomes. 2004 Jun 16;2:29.}, month = {Jun 16}, pages = {29}, type = {Comparative StudyResearch Support, Non-U.S. Gov{\textquoteright}t}, edition = {2004/06/18}, abstract = {BACKGROUND: Whenever questionnaires are used to collect data on constructs, such as functional status or health related quality of life, it is unlikely that all respondents will respond to all items. This paper examines ways of dealing with responses in a {\textquoteright}not applicable{\textquoteright} category to items included in the AMC Linear Disability Score (ALDS) project item bank. METHODS: The data examined in this paper come from the responses of 392 respondents to 32 items and form part of the calibration sample for the ALDS item bank. The data are analysed using the one-parameter logistic item response theory model. The four practical strategies for dealing with this type of response are: cold deck imputation; hot deck imputation; treating the missing responses as if these items had never been offered to those individual patients; and using a model which takes account of the {\textquoteright}tendency to respond to items{\textquoteright}. RESULTS: The item and respondent population parameter estimates were very similar for the strategies involving hot deck imputation; treating the missing responses as if these items had never been offered to those individual patients; and using a model which takes account of the {\textquoteright}tendency to respond to items{\textquoteright}. The estimates obtained using the cold deck imputation method were substantially different. CONCLUSIONS: The cold deck imputation method was not considered suitable for use in the ALDS item bank. The other three methods described can be usefully implemented in the ALDS item bank, depending on the purpose of the data analysis to be carried out. These three methods may be useful for other data sets examining similar constructs, when item response theory based methods are used.}, keywords = {*Disability Evaluation, *Health Surveys, *Logistic Models, *Questionnaires, Activities of Daily Living/*classification, Data Interpretation, Statistical, Health Status, Humans, Pilot Projects, Probability, Quality of Life, Severity of Illness Index}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Holman, R. and Glas, C. A. and Lindeboom, R. and Zwinderman, A. H. and de Haan, R. J.} } @conference {876, title = {Assessing CAT security breaches by the item pooling index}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago IL}, author = {Chang, Hua-Hua and Zhang, J.} } @conference {1291, title = {Evaluating computer-based test security by generalized item overlap rates}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, address = {Chicago IL}, author = {Zhang, J. and Lu, T.} } @article {823, title = {Application of an empirical Bayes enhancement of Mantel-Haenszel differential item functioning analysis to a computerized adaptive test}, journal = {Applied Psychological Measurement}, volume = {26}, year = {2002}, pages = {57-76}, author = {Zwick, R. and Thayer, D. T.} } @conference {959, title = {Comparison of the psychometric properties of several computer-based test designs for credentialing exams}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 261 KB}}, address = {New Orleans LA}, author = {Jodoin, M. and Zenisky, A. L. and Hambleton, R. K.} } @booklet {1588, title = {An empirical investigation of selected multi-stage testing design variables on test assembly and decision accuracy outcomes for credentialing exams (Center for Educational Assessment Research Report No 469)}, year = {2002}, address = {Amherst, MA: University of Massachusetts, School of Education.}, author = {Zenisky, A. L.} } @conference {920, title = {A further study on adjusting CAT item selection starting point for individual examinees}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {$\#$FA02-01}, address = {New Orleans LA}, author = {Fan, M. and Zhu.} } @article {60, title = {Hypergeometric family and item overlap rates in computerized adaptive testing}, journal = {Psychometrika}, volume = {67}, number = {3}, year = {2002}, pages = {387-398}, abstract = {A computerized adaptive test (CAT) is usually administered to small groups of examinees at frequent time intervals. It is often the case that examinees who take the test earlier share information with examinees who will take the test later, thus increasing the risk that many items may become known. Item overlap rate for a group of examinees refers to the number of overlapping items encountered by these examinees divided by the test length. For a specific item pool, different item selection algorithms may yield different item overlap rates. An important issue in designing a good CAT item selection algorithm is to keep item overlap rate below a preset level. In doing so, it is important to investigate what the lowest rate could be for all possible item selection algorithms. In this paper we rigorously prove that if every item had an equal possibility to be selected from the pool in a fixed-length CAT, the number of overlapping item among any α randomly sampled examinees follows the hypergeometric distribution family for α >= 1. Thus, the expected values of the number of overlapping items among any randomly sampled α examinee can be calculated precisely. These values may serve as benchmarks in controlling item overlap rates for fixed-length adaptive tests. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Algorithms, Computer Assisted Testing, Taking, Test, Time On Task computerized adaptive testing}, author = {Chang, Hua-Hua and Zhang, J.} } @conference {875, title = {Identify the lower bounds for item sharing and item pooling in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, address = {New Orleans LA}, author = {Chang, Hua-Hua and Zhang, J.} } @conference {940, title = {Impact of selected factors on the psychometric quality of credentialing examinations administered with a sequential testlet design}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, address = {New Orleans LA}, author = {Hambleton, R. K. and Jodoin, M. and Zenisky, A. L.} } @conference {1292, title = {The robustness of the unidimensional 3PL IRT model when applied to two-dimensional data in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 1.356 MB}}, address = {New Orleans LA}, author = {Zhao, J. C. and McMorris, R. F. and Pruzek, R. M. and Chen, R.} } @conference {1294, title = {Statistical indexes for monitoring item behavior under computer adaptive testing environment}, booktitle = {(Original title: Detecting item misfit in computerized adaptive testing.) Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 2.287 MB}}, address = {New Orleans LA}, author = {Zhu, R. and Yu, F. and Liu, S. M.} } @conference {1021, title = {Impact of item location effects on ability estimation in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {$\#$LI01-01}, address = {Seattle WA}, author = {Liu, M. and Zhu, R. and Guo, F.} } @conference {1173, title = {Monitoring items for changes in performance in computerized adaptive tests}, booktitle = {Paper presented at the annual conference of the National Council on Measurement in Education}, year = {2001}, address = {Seattle, Washington}, author = {Smith, R. L. and Wang, M.M. and Wingersky, M. and Zhao, C.} } @conference {1240, title = {Assembling parallel item pools for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2000}, note = {$\#$WA00-02}, address = {New Orleans}, author = {Wang, T. and Fan, M. Yi, Q. and Ban, J. C. and Zhu, D.} } @article {607, title = {Computerized adaptive administration of the self-evaluation examination}, journal = {AANA.J}, volume = {68}, year = {2000}, pages = {226-31}, author = {LaVelle, T. and Zaglaniczny, K., and Spitzer, L.E.} } @conference {1293, title = {Adjusting computer adaptive test starting points to conserve item pool}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1999}, address = {Montreal, Canada}, author = {Zhu, D. and Fan. M.} } @conference {978, title = {A comparison of conventional and adaptive testing procedures for making single-point decisions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, note = {$\#$KI99-1}, address = {Montreal, Canada}, author = {Kingsbury, G. G. and A Zara} } @booklet {1587, title = {Current and future research in multi-stage testing (Research Report No 370)}, year = {1999}, note = {{PDF file, 131 KB}}, address = {Amherst MA: University of Massachusetts, Laboratory of Pychometric and Evaluative Research.}, author = {Zenisky, A. L.} } @inbook {1963, title = {The development of a computerized adaptive selection system for computer programmers in a financial services company}, year = {1999}, address = {F. Drasgow and J. B. Olsen (Eds.), Innvoations in computerized assessment (p. 7-33). Mahwah NJ Erlbaum.}, author = {Zickar, M.. J. and Overton, R. C. and Taylor, L. R. and Harms, H. J.} } @conference {1169, title = {More efficient use of item inventories}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Smith, R. and Zhu, R.} } @conference {979, title = {A procedure to compare conventional and adaptive testing procedures for making single-point decisions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Kingsbury, G. G. and A Zara} } @book {1726, title = {The robustness of the unidimensional 3PL IRT model when applied to two-dimensional data in computerized adaptive testing}, year = {1999}, note = {$\#$ZH99-1}, address = {Unpublished Ph.D. dissertation, State University of New York at Albany}, author = {Zhao, J. C.} } @conference {1297, title = {An empirical Bayes approach to Mantel-Haenszel DIF analysis: Theoretical development and application to CAT data}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Zwick, R.} } @booklet {1335, title = {Feasibility studies of two-stage testing in large-scale educational assessment: Implications for NAEP}, year = {1998}, address = {American Institutes for Research, CA}, author = {Bock, R. D. and Zimowski, M. F.} } @article {655, title = {Adapting to adaptive testing}, journal = {Personnel Psychology}, volume = {50}, year = {1997}, pages = {171-185}, author = {Overton, R. C. and Harms, H. J. and Taylor, L. R. and Zickar, M.. J.} } @conference {1289, title = {Administering and scoring the computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {A Zara} } @conference {1296, title = {A Bayesian enhancement of Mantel Haenszel DIF analysis for computer adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Zwick, R.} } @article {822, title = {The effect of adaptive administration on the variability of the Mantel-Haenszel measure of differential item functioning}, journal = {Educational and Psychological Measurement}, volume = {57}, year = {1997}, pages = {412-421}, author = {Zwick, R.} } @conference {1252, title = {Modifying the NCLEXTM CAT item selection algorithm to improve item exposure}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York}, author = {Way, W. D. and A Zara and Leahy, J.} } @conference {1253, title = {Strategies for managing item pools to maximize item security}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {San Diego}, author = {Way, W. D. and A Zara and Leahy, J.} } @article {2126, title = {Effect of Rasch calibration on ability and DIF estimation in computer-adaptive tests}, journal = {Journal of Educational Measurement}, volume = {32}, year = {1995}, pages = {341-363}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @booklet {1591, title = {DIF analysis for pretest items in computer-adaptive testing (Educational Testing Service Research Rep No RR 94-33)}, year = {1994}, note = {$\#$ZW94-33}, address = {Princeton NJ: Educational Testing Service.}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @article {2028, title = {A Simulation Study of Methods for Assessing Differential Item Functioning in Computerized Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {18}, number = {2}, year = {1994}, pages = {121-140}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @booklet {1590, title = {A simulation study of methods for assessing differential item functioning in computer-adaptive tests (Educational Testing Service Research Rep No RR 93-11)}, year = {1993}, address = {Princeton NJ: Educational Testing Service.}, author = {Zwick, R. and Thayer, D. and Wingersky, M.} } @conference {1288, title = {A comparison of computerized adaptive and paper-and-pencil versions of the national registered nurse licensure examination}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco CA}, author = {A Zara} } @conference {1295, title = {Differential item functioning analysis for computer-adaptive tests and other IRT-scored measures}, booktitle = {Paper presented at the annual meeting of the Military Testing Association}, year = {1992}, address = {San Diego CA}, author = {Zwick, R.} } @article {596, title = {A comparison of procedures for content-sensitive item selection in computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {4}, year = {1991}, pages = {241-261}, author = {Kingsbury, G. G. and A Zara} } @inbook {1788, title = {Computerized adaptive testing: Theory, applications, and standards}, year = {1991}, address = {R. K. Hambleton and J. N. Zaal (Eds.), Advances in educational and psychological testing: Theory and Applications (pp. 341-366). Boston: Kluwer.}, author = {Hambleton, R. K. and Zaal, J. N. and Pieters, J. P. M.} } @booklet {1407, title = {A simulation study of some simple approaches to the study of DIF for CATs}, year = {1991}, address = {Internal memorandum, Educational Testing Service}, author = {Holland, P. W. and Zwick, R.} } @inbook {1961, title = {A research proposal for field testing CAT for nursing licensure examinations}, year = {1990}, address = {Delegate Assembly Book of Reports 1989. Chicago: National Council of State Boards of Nursing.}, author = {A Zara} } @article {595, title = {Procedures for selecting items for computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {2}, year = {1989}, pages = {359-375}, author = {Kingsbury, G. G. and A Zara} } @inbook {1933, title = {A research proposal for field testing CAT for nursing licensure examinations}, year = {1989}, address = {Delegate Assembly Book of Reports 1989. Chicago: National Council of State Boards of Nursing, Inc.}, author = {A Zara} } @article {749, title = {Some procedures for computerized ability testing}, journal = {International Journal of Educational Research}, volume = {13(2)}, year = {1989}, pages = {175-187}, author = {van der Linden, W. J. and Zwarts, M. A.} } @article {821, title = {Introduction to item response theory and computerized adaptive testing as applied in licensure and certification testing}, journal = {National Clearinghouse of Examination Information Newsletter}, volume = {6}, year = {1988}, pages = {11-17}, author = {A Zara} } @booklet {1589, title = {Full-information item factor analysis from the ASVAB CAT item pool (Methodology Research Center Report 87-1)}, year = {1987}, address = {Chicago IL: University of Chicago}, author = {Zimowski, M. F. and Bock, R. D.} } @booklet {1586, title = {Functional and design specifications for the National Council of State Boards of Nursing adaptive testing system}, year = {1987}, address = {Unpublished manuscript}, author = {A Zara and Bosma, J. and Kaplan, R.} } @booklet {1418, title = {Alternate forms reliability and concurrent validity of adaptive and conventional tests with military recruits}, year = {1983}, address = {Minneapolis MN: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Kiely, G. L. and A Zara and Weiss, D. J.} }