@article {2753, title = {Expanding the Meaning of Adaptive Testing to Enhance Validity}, journal = {Journal of Computerized Adaptive Testing}, volume = {10}, year = {2023}, pages = {22-31}, keywords = {Adaptive Testing, CAT, CBT, test-taking disengagement, validity}, doi = {10.7333/2305-1002022}, author = {Steven L. Wise} } @article {2752, title = {An Extended Taxonomy of Variants of Computerized Adaptive Testing}, journal = {Journal of Computerized Adaptive Testing}, volume = {10}, year = {2023}, keywords = {Adaptive Testing, evidence-centered design, Item Response Theory, knowledge-based model construction, missingness}, issn = {2165-6592}, doi = {10.7333/2302-100101}, author = {Roy Levy and John T. Behrens and Robert J. Mislevy} } @conference {2631, title = {Generating Rationales to Support Formative Feedback in Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Computer adaptive testing offers many important benefits to support and promote life-long learning. Computers permit testing on-demand thereby allowing students to take the test at any time during instruction; items on computerized tests are scored immediately thereby providing students with instant feedback; computerized tests permit continuous administration thereby allowing students to have more choice about when they write their exams. But despite these important benefits, the advent of computer adaptive testing has also raised formidable challenges, particularly in the area of item development. Educators must have access to large numbers of diverse, high-quality test items to implement computerize adaptive testing because items are continuously administered to students. Hence, hundreds or even thousands of items are needed to develop the test item banks necessary for computer adaptive testing. Unfortunately, educational test items, as they are currently created, are time consuming and expensive to develop because each individual item is written, initially, by a content specialist and, then, reviewed, edited, and revised by groups of content specialists to ensure the items yield reliable and valid information. Hence, item development is one of the most important problems that must be solved before we can migrate to computer adaptive testing to support life-long learning because large numbers of high-quality, content-specific, test items are required.

One promising item development method that may be used to address this challenge is with automatic item generation. Automatic item generation is a relatively new but rapidly evolving research area where cognitive and psychometric modelling practices are used produce hundreds of new test items with the aid of computer technology. The purpose of our presentation is to describe a new methodology for generating both the items and the rationales required to solve each generated item in order to produce the feedback needed to support life-long learning. Our item generation methodology will first be described. To ensure our description is practical, the method will also be demonstrated using generated items from the health sciences to demonstrate how item generation can promote life-long learning for medical educators and practitioners.

Session Video

}, keywords = {Adaptive Testing, formative feedback, Item generation}, url = {https://drive.google.com/open?id=1O5KDFtQlDLvhNoDr7X4JO4arpJkIHKUP}, author = {Mark Gierl and Okan Bulut} } @conference {2627, title = {How Adaptive is an Adaptive Test: Are all Adaptive Tests Adaptive?}, booktitle = {2017 IACAT Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

There are many different kinds of adaptive tests but they all have the characteristic that some feature of the test is customized to the purpose of the test. In the time allotted, it is impossible to consider the adaptation of all of this types so this address will focus on the \“classic\” adaptive test that matches the difficulty of the test to the capabilities of the person being tested. This address will first present information on the maximum level of adaptation that can occur and then compare the amount of adaptation that typically occurs on an operational adaptive test to the maximum level of adaptation. An index is proposed to summarize the amount of adaptation and it is argued that this type of index should be reported for operational adaptive tests to show the amount of adaptation that typically occurs.

Click for Presentation Video\ 

}, keywords = {Adaptive Testing, CAT}, url = {https://drive.google.com/open?id=1Nj-zDCKk3DvHA4Jlp1qkb2XovmHeQfxu}, author = {Mark D Reckase} } @article {2491, title = {Effect of Imprecise Parameter Estimation on Ability Estimation in a Multistage Test in an Automatic Item Generation Context }, journal = {Journal of Computerized Adaptive Testing}, volume = {4}, year = {2016}, pages = {1-18}, keywords = {Adaptive Testing, automatic item generation, errors in item parameters, item clones, multistage testing}, issn = {2165-6592 }, doi = {10.7333/1608-040101}, url = {http://iacat.org/jcat/index.php/jcat/article/view/59/27}, author = {Colvin, Kimberly and Keller, Lisa A and Robin, Frederic} } @article {2103, title = {Computerized Adaptive Testing of Personality Traits}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216}, year = {2008}, pages = {12-21}, abstract = {

A computerized adaptive testing (CAT) procedure was simulated with ordinal polytomous personality data collected using a
conventional paper-and-pencil testing format. An adapted Dutch version of the dominance scale of Gough and Heilbrun\’s Adjective
Check List (ACL) was used. This version contained Likert response scales with five categories. Item parameters were estimated using Samejima\’s graded response model from the responses of 1,925 subjects. The CAT procedure was simulated using the responses of 1,517 other subjects. The value of the required standard error in the stopping rule of the CAT was manipulated. The relationship between CAT latent trait estimates and estimates based on all dominance items was studied. Additionally, the pattern of relationships between the CAT latent trait estimates and the other ACL scales was compared to that between latent trait estimates based on the entire item pool and the other ACL scales. The CAT procedure resulted in latent trait estimates qualitatively equivalent to latent trait estimates based on all items, while a substantial reduction of the number of used items could be realized (at the stopping rule of 0.4 about 33\% of the 36 items was used).

}, keywords = {Adaptive Testing, cmoputer-assisted testing, Item Response Theory, Likert scales, Personality Measures}, doi = {10.1027/0044-3409.216.1.12}, author = {Hol, A. M. and Vorst, H. C. M. and Mellenbergh, G. J.} } @article {199, title = {Computerized adaptive testing for polytomous motivation items: Administration mode effects and a comparison with short forms}, journal = {Applied Psychological Measurement}, volume = {31}, number = {5}, year = {2007}, note = {10.1177/0146621606297314Journal; Peer Reviewed Journal; Journal Article}, pages = {412-429}, abstract = {In a randomized experiment (n=515), a computerized and a computerized adaptive test (CAT) are compared. The item pool consists of 24 polytomous motivation items. Although items are carefully selected, calibration data show that Samejima{\textquoteright}s graded response model did not fit the data optimally. A simulation study is done to assess possible consequences of model misfit. CAT efficiency was studied by a systematic comparison of the CAT with two types of conventional fixed length short forms, which are created to be good CAT competitors. Results showed no essential administration mode effects. Efficiency analyses show that CAT outperformed the short forms in almost all aspects when results are aggregated along the latent trait scale. The real and the simulated data results are very similar, which indicate that the real data results are not affected by model misfit. (PsycINFO Database Record (c) 2007 APA ) (journal abstract)}, keywords = {2220 Tests \& Testing, Adaptive Testing, Attitude Measurement, computer adaptive testing, Computer Assisted Testing, items, Motivation, polytomous motivation, Statistical Validity, Test Administration, Test Forms, Test Items}, isbn = {0146-6216}, author = {Hol, A. M. and Vorst, H. C. M. and Mellenbergh, G. J.} } @article {2095, title = {An Authoring Environment for Adaptive Testing}, journal = {Educational Technology \& Society}, volume = {8}, year = {2005}, pages = {66-76}, abstract = {

SIETTE is a web-based adaptive testing system. It implements Computerized Adaptive Tests. These tests are tailor-made, theory-based tests, where questions shown to students, finalization of the test, and student knowledge estimation is accomplished adaptively. To construct these tests, SIETTE has an authoring environment comprising a suite of tools that helps teachers create questions and tests properly, and analyze students\’ performance after taking a test. In this paper, we present this authoring environment in the
framework of adaptive testing. As will be shown, this set of visual tools, that contain some adaptable eatures, can be useful for teachers lacking skills in this kind of testing. Additionally, other systems that implement adaptive testing will be studied.\ 

}, keywords = {Adaptability, Adaptive Testing, Authoring environment, Item Response Theory}, author = {Guzm{\'a}n, E and Conejo, R and Garc{\'\i}a-Herv{\'a}s, E} } @article {398, title = {A comparison of item-selection methods for adaptive tests with content constraints}, journal = {Journal of Educational Measurement}, volume = {42}, number = {3}, year = {2005}, pages = {283-302}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {In test assembly, a fundamental difference exists between algorithms that select a test sequentially or simultaneously. Sequential assembly allows us to optimize an objective function at the examinee{\textquoteright}s ability estimate, such as the test information function in computerized adaptive testing. But it leads to the non-trivial problem of how to realize a set of content constraints on the test{\textemdash}a problem more naturally solved by a simultaneous item-selection method. Three main item-selection methods in adaptive testing offer solutions to this dilemma. The spiraling method moves item selection across categories of items in the pool proportionally to the numbers needed from them. Item selection by the weighted-deviations method (WDM) and the shadow test approach (STA) is based on projections of the future consequences of selecting an item. These two methods differ in that the former calculates a projection of a weighted sum of the attributes of the eventual test and the latter a projection of the test itself. The pros and cons of these methods are analyzed. An empirical comparison between the WDM and STA was conducted for an adaptive version of the Law School Admission Test (LSAT), which showed equally good item-exposure rates but violations of some of the constraints and larger bias and inaccuracy of the ability estimator for the WDM.}, keywords = {Adaptive Testing, Algorithms, content constraints, item selection method, shadow test approach, spiraling method, weighted deviations method}, isbn = {0022-0655 (Print)}, author = {van der Linden, W. J.} } @article {72, title = {Controlling item exposure and test overlap in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {29}, number = {3}, year = {2005}, pages = {204-217}, abstract = {This article proposes an item exposure control method, which is the extension of the Sympson and Hetter procedure and can provide item exposure control at both the item and test levels. Item exposure rate and test overlap rate are two indices commonly used to track item exposure in computerized adaptive tests. By considering both indices, item exposure can be monitored at both the item and test levels. To control the item exposure rate and test overlap rate simultaneously, the modified procedure attempted to control not only the maximum value but also the variance of item exposure rates. Results indicated that the item exposure rate and test overlap rate could be controlled simultaneously by implementing the modified procedure. Item exposure control was improved and precision of trait estimation decreased when a prespecified maximum test overlap rate was stringent. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Content (Test) computerized adaptive testing}, author = {Chen, S-Y. and Lei, P-W.} } @article {282, title = {Somministrazione di test computerizzati di tipo adattivo: Un{\textquoteright} applicazione del modello di misurazione di Rasch [Administration of computerized and adaptive tests: An application of the Rasch Model]}, journal = {Testing Psicometria Metodologia}, volume = {12}, number = {3}, year = {2005}, pages = {131-149}, abstract = {The aim of the present study is to describe the characteristics of a procedure for administering computerized and adaptive tests (Computer Adaptive Testing or CAT). Items to be asked to the individuals are interactively chosen and are selected from a "bank" in which they were previously calibrated and recorded on the basis of their difficulty level. The selection of items is performed by increasingly more accurate estimates of the examinees{\textquoteright} ability. The building of an item-bank on Psychometrics and the implementation of this procedure allow a first validation through Monte Carlo simulations. (PsycINFO Database Record (c) 2006 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Response Theory computerized adaptive testing, Models, Psychometrics}, author = {Miceli, R. and Molinengo, G.} } @article {202, title = {Assisted self-adapted testing: A comparative study}, journal = {European Journal of Psychological Assessment}, volume = {20}, number = {1}, year = {2004}, pages = {2-9}, abstract = {A new type of self-adapted test (S-AT), called Assisted Self-Adapted Test (AS-AT), is presented. It differs from an ordinary S-AT in that prior to selecting the difficulty category, the computer advises examinees on their best difficulty category choice, based on their previous performance. Three tests (computerized adaptive test, AS-AT, and S-AT) were compared regarding both their psychometric (precision and efficiency) and psychological (anxiety) characteristics. Tests were applied in an actual assessment situation, in which test scores determined 20\% of term grades. A sample of 173 high school students participated. Neither differences in posttest anxiety nor ability were obtained. Concerning precision, AS-AT was as precise as CAT, and both revealed more precision than S-AT. It was concluded that AS-AT acted as a CAT concerning precision. Some hints, but not conclusive support, of the psychological similarity between AS-AT and S-AT was also found. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Anxiety, Computer Assisted Testing, Psychometrics, Test}, author = {Hontangas, P. and Olea, J. and Ponsoda, V. and Revuelta, J. and Wise, S. L.} } @article {245, title = {{\'E}valuation et multim{\'e}dia dans l{\textquoteright}apprentissage d{\textquoteright}une L2 [Assessment and multimedia in learning an L2]}, journal = {ReCALL}, volume = {16}, number = {2}, year = {2004}, pages = {475-487}, abstract = {In the first part of this paper different areas where technology may be used for second language assessment are described. First, item banking operations, which are generally based on item Response Theory but not necessarily restricted to dichotomously scored items, facilitate assessment task organization and require technological support. Second, technology may help to design more authentic assessment tasks or may be needed in some direct testing situations. Third, the assessment environment may be more adapted and more stimulating when technology is used to give the student more control. The second part of the paper presents different functions of assessment. The monitoring function (often called formative assessment) aims at adapting the classroom activities to students and to provide continuous feedback. Technology may be used to train the teachers in monitoring techniques, to organize data or to produce diagnostic information; electronic portfolios or quizzes that are built in some educational software may also be used for monitoring. The placement function is probably the one in which the application of computer adaptive testing procedures (e.g. French CAPT) is the most appropriate. Automatic scoring devices may also be used for placement purposes. Finally the certification function requires more valid and more reliable tools. Technology may be used to enhance the testing situation (to make it more authentic) or to facilitate data processing during the construction of a test. Almond et al. (2002) propose a four component model (Selection, Presentation, Scoring and Response) for designing assessment systems. Each component must be planned taking into account the assessment function. }, keywords = {Adaptive Testing, Computer Assisted Instruction, Educational, Foreign Language Learning, Program Evaluation, Technology computerized adaptive testing}, author = {Laurier, M.} } @article {139, title = {Kann die Konfundierung von Konzentrationsleistung und Aktivierung durch adaptives Testen mit dern FAKT vermieden werden? [Avoiding the confounding of concentration performance and activation by adaptive testing with the FACT]}, journal = {Zeitschrift f{\"u}r Differentielle und Diagnostische Psychologie}, volume = {25}, number = {1}, year = {2004}, pages = {1-17}, abstract = {The study investigates the effect of computerized adaptive testing strategies on the confounding of concentration performance with activation. A sample of 54 participants was administered 1 out of 3 versions (2 adaptive, 1 non-adaptive) of the computerized Frankfurt Adaptive Concentration Test FACT (Moosbrugger \& Heyden, 1997) at three subsequent points in time. During the test administration changes in activation (electrodermal activity) were recorded. The results pinpoint a confounding of concentration performance with activation for the non-adaptive test version, but not for the adaptive test versions (p = .01). Thus, adaptive FACT testing strategies can remove the confounding of concentration performance with activation, thereby increasing the discriminant validity. In conclusion, an attention-focusing-hypothesis is formulated to explain the observed effect. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Concentration, Performance, Testing computerized adaptive testing}, author = {Frey, A. and Moosbrugger, H.} } @article {275, title = {A Bayesian method for the detection of item preknowledge in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {27}, number = {2}, year = {2003}, pages = {121-137}, abstract = {With the increased use of continuous testing in computerized adaptive testing, new concerns about test security have evolved, such as how to ensure that items in an item pool are safeguarded from theft. In this article, procedures to detect test takers using item preknowledge are explored. When test takers use item preknowledge, their item responses deviate from the underlying item response theory (IRT) model, and estimated abilities may be inflated. This deviation may be detected through the use of person-fit indices. A Bayesian posterior log odds ratio index is proposed for detecting the use of item preknowledge. In this approach to person fit, the estimated probability that each test taker has preknowledge of items is updated after each item response. These probabilities are based on the IRT parameters, a model specifying the probability that each item has been memorized, and the test taker{\textquoteright}s item responses. Simulations based on an operational computerized adaptive test (CAT) pool are used to demonstrate the use of the odds ratio index. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Cheating, Computer Assisted Testing, Individual Differences computerized adaptive testing, Item, Item Analysis (Statistical), Mathematical Modeling, Response Theory}, author = {McLeod, L. and Lewis, C. and Thissen, D.} } @article {63, title = {A comparative study of item exposure control methods in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {40}, number = {1}, year = {2003}, pages = {71-103}, abstract = {This study compared the properties of five methods of item exposure control within the purview of estimating examinees{\textquoteright} abilities in a computerized adaptive testing (CAT) context. Each exposure control algorithm was incorporated into the item selection procedure and the adaptive testing progressed based on the CAT design established for this study. The merits and shortcomings of these strategies were considered under different item pool sizes and different desired maximum exposure rates and were evaluated in light of the observed maximum exposure rates, the test overlap rates, and the conditional standard errors of measurement. Each method had its advantages and disadvantages, but no one possessed all of the desired characteristics. There was a clear and logical trade-off between item exposure control and measurement precision. The M. L. Stocking and C. Lewis conditional multinomial procedure and, to a slightly lesser extent, the T. Davey and C. G. Parshall method seemed to be the most promising considering all of the factors that this study addressed. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Educational, Item Analysis (Statistical), Measurement, Strategies computerized adaptive testing}, author = {Chang, S-W. and Ansley, T. N.} } @article {349, title = {Computerized adaptive rating scales for measuring managerial performance}, journal = {International Journal of Selection and Assessment}, volume = {11}, number = {2-3}, year = {2003}, pages = {237-246}, abstract = {Computerized adaptive rating scales (CARS) had been developed to measure contextual or citizenship performance. This rating format used a paired-comparison protocol, presenting pairs of behavioral statements scaled according to effectiveness levels, and an iterative item response theory algorithm to obtain estimates of ratees{\textquoteright} citizenship performance (W. C. Borman et al, 2001). In the present research, we developed CARS to measure the entire managerial performance domain, including task and citizenship performance, thus addressing a major limitation of the earlier CARS. The paper describes this development effort, including an adjustment to the algorithm that reduces substantially the number of item pairs required to obtain almost as much precision in the performance estimates. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Algorithms, Associations, Citizenship, Computer Assisted Testing, Construction, Contextual, Item Response Theory, Job Performance, Management, Management Personnel, Rating Scales, Test}, author = {Schneider, R. J. and Goff, M. and Anderson, S. and Borman, W. C.} } @article {75, title = {Computerized adaptive testing using the nearest-neighbors criterion}, journal = {Applied Psychological Measurement}, volume = {27}, number = {3}, year = {2003}, pages = {204-216}, abstract = {Item selection procedures designed for computerized adaptive testing need to accurately estimate every taker{\textquoteright}s trait level (θ) and, at the same time, effectively use all items in a bank. Empirical studies showed that classical item selection procedures based on maximizing Fisher or other related information yielded highly varied item exposure rates; with these procedures, some items were frequently used whereas others were rarely selected. In the literature, methods have been proposed for controlling exposure rates; they tend to affect the accuracy in θ estimates, however. A modified version of the maximum Fisher information (MFI) criterion, coined the nearest neighbors (NN) criterion, is proposed in this study. The NN procedure improves to a moderate extent the undesirable item exposure rates associated with the MFI criterion and keeps sufficient precision in estimates. The NN criterion will be compared with a few other existing methods in an empirical study using the mean squared errors in θ estimates and plots of item exposure rates associated with different distributions. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {(Statistical), Adaptive Testing, Computer Assisted Testing, Item Analysis, Item Response Theory, Statistical Analysis, Statistical Estimation computerized adaptive testing, Statistical Tests}, author = {Cheng, P. E. and Liou, M.} } @article {94, title = {Item exposure constraints for testlets in the verbal reasoning section of the MCAT}, journal = {Applied Psychological Measurement}, volume = {27}, number = {5}, year = {2003}, pages = {335-356}, abstract = {The current study examined item exposure control procedures for testlet scored reading passages in the Verbal Reasoning section of the Medical College Admission Test with four computerized adaptive testing (CAT) systems using the partial credit model. The first system used a traditional CAT using maximum information item selection. The second used random item selection to provide a baseline for optimal exposure rates. The third used a variation of Lunz and Stahl{\textquoteright}s randomization procedure. The fourth used Luecht and Nungester{\textquoteright}s computerized adaptive sequential testing (CAST) system. A series of simulated fixed-length CATs was run to determine the optimal item length selection procedure. Results indicated that both the randomization procedure and CAST performed well in terms of exposure control and measurement precision, with the CAST system providing the best overall solution when all variables were taken into consideration. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Entrance Examinations, Item Response Theory, Random Sampling, Reasoning, Verbal Ability computerized adaptive testing}, author = {Davis, L. L. and Dodd, B. G.} } @article {57, title = {Optimal stratification of item pools in α-stratified computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {27}, number = {4}, year = {2003}, pages = {262-274}, abstract = {A method based on 0-1 linear programming (LP) is presented to stratify an item pool optimally for use in α-stratified adaptive testing. Because the 0-1 LP model belongs to the subclass of models with a network flow structure, efficient solutions are possible. The method is applied to a previous item pool from the computerized adaptive testing (CAT) version of the Graduate Record Exams (GRE) Quantitative Test. The results indicate that the new method performs well in practical situations. It improves item exposure control, reduces the mean squared error in the θ estimates, and increases test reliability. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Content (Test), Item Response Theory, Mathematical Modeling, Test Construction computerized adaptive testing}, author = {Chang, Hua-Hua and van der Linden, W. J.} } @article {68, title = {The relationship between item exposure and test overlap in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {40}, number = {2}, year = {2003}, pages = {129-145}, abstract = {The purpose of this article is to present an analytical derivation for the mathematical form of an average between-test overlap index as a function of the item exposure index, for fixed-length computerized adaptive tests (CATs). This algebraic relationship is used to investigate the simultaneous control of item exposure at both the item and test levels. The results indicate that, in fixed-length CATs, control of the average between-test overlap is achieved via the mean and variance of the item exposure rates of the items that constitute the CAT item pool. The mean of the item exposure rates is easily manipulated. Control over the variance of the item exposure rates can be achieved via the maximum item exposure rate (r-sub(max)). Therefore, item exposure control methods which implement a specification of r-sub(max) (e.g., J. B. Sympson and R. D. Hetter, 1985) provide the most direct control at both the item and test levels. (PsycINFO Database Record (c) 2005 APA )}, keywords = {(Statistical), Adaptive Testing, Computer Assisted Testing, Human Computer, Interaction computerized adaptive testing, Item Analysis, Item Analysis (Test), Test Items}, author = {Chen, S-Y. and Ankemann, R. D. and Spray, J. A.} } @article {397, title = {Some alternatives to Sympson-Hetter item-exposure control in computerized adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {28}, number = {3}, year = {2003}, pages = {249-265}, abstract = {TheHetter and Sympson (1997; 1985) method is a method of probabilistic item-exposure control in computerized adaptive testing. Setting its control parameters to admissible values requires an iterative process of computer simulations that has been found to be time consuming, particularly if the parameters have to be set conditional on a realistic set of values for the examinees{\textquoteright} ability parameter. Formal properties of the method are identified that help us explain why this iterative process can be slow and does not guarantee admissibility. In addition, some alternatives to the SH method are introduced. The behavior of these alternatives was estimated for an adaptive test from an item pool from the Law School Admission Test (LSAT). Two of the alternatives showed attractive behavior and converged smoothly to admissibility for all items in a relatively small number of iteration steps. }, keywords = {Adaptive Testing, Computer Assisted Testing, Test Items computerized adaptive testing}, author = {van der Linden, W. J.} } @article {321, title = {Timing behavior in computerized adaptive testing: Response times for correct and incorrect answers are not related to general fluid intelligence/Zum Zeitverhalten beim computergest{\"u}tzten adaptiveb Testen: Antwortlatenzen bei richtigen und falschen L{\"o}sun}, journal = {Zeitschrift f{\"u}r Differentielle und Diagnostische Psychologie}, volume = {24}, number = {1}, year = {2003}, pages = {57-63}, abstract = {Examined the effects of general fluid intelligence on item response times for correct and false responses in computerized adaptive testing. After performing the CFT3 intelligence test, 80 individuals (aged 17-44 yrs) completed perceptual and cognitive discrimination tasks. Results show that response times were related neither to the proficiency dimension reflected by the task nor to the individual level of fluid intelligence. Furthermore, the false > correct-phenomenon as well as substantial positive correlations between item response times for false and correct responses were shown to be independent of intelligence levels. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Cognitive Ability, Intelligence, Perception, Reaction Time computerized adaptive testing}, author = {Rammsayer, Thomas and Brandler, Susanne} } @article {407, title = {Using response times to detect aberrant responses in computerized adaptive testing}, journal = {Psychometrika}, volume = {68}, number = {2}, year = {2003}, pages = {251-265}, abstract = {A lognormal model for response times is used to check response times for aberrances in examinee behavior on computerized adaptive tests. Both classical procedures and Bayesian posterior predictive checks are presented. For a fixed examinee, responses and response times are independent; checks based on response times offer thus information independent of the results of checks on response patterns. Empirical examples of the use of classical and Bayesian checks for detecting two different types of aberrances in response times are presented. The detection rates for the Bayesian checks outperformed those for the classical checks, but at the cost of higher false-alarm rates. A guideline for the choice between the two types of checks is offered.}, keywords = {Adaptive Testing, Behavior, Computer Assisted Testing, computerized adaptive testing, Models, person Fit, Prediction, Reaction Time}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @article {308, title = {A comparison of item selection techniques and exposure control mechanisms in CATs using the generalized partial credit model}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {147-163}, abstract = {The use of more performance items in large-scale testing has led to an increase in the research investigating the use of polytomously scored items in computer adaptive testing (CAT). Because this research has to be complemented with information pertaining to exposure control, the present research investigated the impact of using five different exposure control algorithms in two sized item pools calibrated using the generalized partial credit model. The results of the simulation study indicated that the a-stratified design, in comparison to a no-exposure control condition, could be used to reduce item exposure and overlap, increase pool utilization, and only minorly degrade measurement precision. Use of the more restrictive exposure control algorithms, such as the Sympson-Hetter and conditional Sympson-Hetter, controlled exposure to a greater extent but at the cost of measurement precision. Because convergence of the exposure control parameters was problematic for some of the more restrictive exposure control algorithms, use of the more simplistic exposure control mechanisms, particularly when the test length to item pool size ratio is large, is recommended. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {(Statistical), Adaptive Testing, Algorithms computerized adaptive testing, Computer Assisted Testing, Item Analysis, Item Response Theory, Mathematical Modeling}, author = {Pastor, D. A. and Dodd, B. G. and Chang, Hua-Hua} } @article {370, title = {An EM approach to parameter estimation for the Zinnes and Griggs paired comparison IRT model}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {208-227}, abstract = {Borman et al. recently proposed a computer adaptive performance appraisal system called CARS II that utilizes paired comparison judgments of behavioral stimuli. To implement this approach,the paired comparison ideal point model developed by Zinnes and Griggs was selected. In this article,the authors describe item response and information functions for the Zinnes and Griggs model and present procedures for estimating stimulus and person parameters. Monte carlo simulations were conducted to assess the accuracy of the parameter estimation procedures. The results indicated that at least 400 ratees (i.e.,ratings) are required to obtain reasonably accurate estimates of the stimulus parameters and their standard errors. In addition,latent trait estimation improves as test length increases. The implications of these results for test construction are also discussed. }, keywords = {Adaptive Testing, Computer Assisted Testing, Item Response Theory, Maximum Likelihood, Personnel Evaluation, Statistical Correlation, Statistical Estimation}, author = {Stark, S. and F Drasgow} } @article {60, title = {Hypergeometric family and item overlap rates in computerized adaptive testing}, journal = {Psychometrika}, volume = {67}, number = {3}, year = {2002}, pages = {387-398}, abstract = {A computerized adaptive test (CAT) is usually administered to small groups of examinees at frequent time intervals. It is often the case that examinees who take the test earlier share information with examinees who will take the test later, thus increasing the risk that many items may become known. Item overlap rate for a group of examinees refers to the number of overlapping items encountered by these examinees divided by the test length. For a specific item pool, different item selection algorithms may yield different item overlap rates. An important issue in designing a good CAT item selection algorithm is to keep item overlap rate below a preset level. In doing so, it is important to investigate what the lowest rate could be for all possible item selection algorithms. In this paper we rigorously prove that if every item had an equal possibility to be selected from the pool in a fixed-length CAT, the number of overlapping item among any α randomly sampled examinees follows the hypergeometric distribution family for α >= 1. Thus, the expected values of the number of overlapping items among any randomly sampled α examinee can be calculated precisely. These values may serve as benchmarks in controlling item overlap rates for fixed-length adaptive tests. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Algorithms, Computer Assisted Testing, Taking, Test, Time On Task computerized adaptive testing}, author = {Chang, Hua-Hua and Zhang, J.} } @article {418, title = {Mathematical-programming approaches to test item pool design}, number = {RR 02-09}, year = {2002}, note = {Using Smart Source ParsingAdvances in psychology research, Vol. ( Hauppauge, NY : Nova Science Publishers, Inc, [URL:http://www.Novapublishers.com]. vi, 228 pp}, pages = {93-108}, institution = {University of Twente, Faculty of Educational Science and Technology}, address = {Twente, The Netherlands}, abstract = {(From the chapter) This paper presents an approach to item pool design that has the potential to improve on the quality of current item pools in educational and psychological testing and hence to increase both measurement precision and validity. The approach consists of the application of mathematical programming techniques to calculate optimal blueprints for item pools. These blueprints can be used to guide the item-writing process. Three different types of design problems are discussed, namely for item pools for linear tests, item pools computerized adaptive testing (CAT), and systems of rotating item pools for CAT. The paper concludes with an empirical example of the problem of designing a system of rotating item pools for CAT.}, keywords = {Adaptive Testing, Computer Assisted, Computer Programming, Educational Measurement, Item Response Theory, Mathematics, Psychometrics, Statistical Rotation computerized adaptive testing, Test Items, Testing}, isbn = {02-09}, author = {Veldkamp, B. P. and van der Linden, W. J. and Ariel, A.} } @article {277, title = {Outlier detection in high-stakes certification testing}, journal = {Journal of Educational Measurement}, volume = {39}, number = {3}, year = {2002}, pages = {219-233}, abstract = {Discusses recent developments of person-fit analysis in computerized adaptive testing (CAT). Methods from statistical process control are presented that have been proposed to classify an item score pattern as fitting or misfitting the underlying item response theory model in CAT Most person-fit research in CAT is restricted to simulated data. In this study, empirical data from a certification test were used. Alternatives are discussed to generate norms so that bounds can be determined to classify an item score pattern as fitting or misfitting. Using bounds determined from a sample of a high-stakes certification test, the empirical analysis showed that different types of misfit can be distinguished Further applications using statistical process control methods to detect misfitting item score patterns are discussed. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, computerized adaptive testing, Educational Measurement, Goodness of Fit, Item Analysis (Statistical), Item Response Theory, person Fit, Statistical Estimation, Statistical Power, Test Scores}, author = {Meijer, R. R.} } @article {346, title = {A structure-based approach to psychological measurement: Matching measurement models to latent structure}, journal = {Assessment}, volume = {9}, number = {1}, year = {2002}, pages = {4-16}, abstract = {The present article sets forth the argument that psychological assessment should be based on a construct{\textquoteright}s latent structure. The authors differentiate dimensional (continuous) and taxonic (categorical) structures at the latent and manifest levels and describe the advantages of matching the assessment approach to the latent structure of a construct. A proper match will decrease measurement error, increase statistical power, clarify statistical relationships, and facilitate the location of an efficient cutting score when applicable. Thus, individuals will be placed along a continuum or assigned to classes more accurately. The authors briefly review the methods by which latent structure can be determined and outline a structure-based approach to assessment that builds on dimensional scaling models, such as item response theory, while incorporating classification methods as appropriate. Finally, the authors empirically demonstrate the utility of their approach and discuss its compatibility with traditional assessment methods and with computerized adaptive testing. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Assessment, Classification (Cognitive Process), Computer Assisted, Item Response Theory, Psychological, Scaling (Testing), Statistical Analysis computerized adaptive testing, Taxonomies, Testing}, author = {Ruscio, John and Ruscio, Ayelet Meron} } @inbook {108, title = {The work ahead: A psychometric infrastructure for computerized adaptive tests}, booktitle = {Computer-based tests: Building the foundation for future assessment}, year = {2002}, note = {Using Smart Source ParsingComputer-based testing: Building the foundation for future assessments. (pp. 1-35). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xi, 326 pp}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) Considers the past and future of computerized adaptive tests and computer-based tests and looks at issues and challenges confronting a testing program as it implements and operates a computer-based test. Recommendations for testing programs from The National Council of Measurement in Education Ad Hoc Committee on Computerized Adaptive Test Disclosure are appended. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Educational, Measurement, Psychometrics}, author = {F Drasgow}, editor = {M. P. Potenza and J. J. Freemer and W. C. Ward} } @article {315, title = {Differences between self-adapted and computerized adaptive tests: A meta-analysis}, journal = {Journal of Educational Measurement}, volume = {38}, number = {3}, year = {2001}, pages = {235-247}, abstract = {Self-adapted testing has been described as a variation of computerized adaptive testing that reduces test anxiety and thereby enhances test performance. The purpose of this study was to gain a better understanding of these proposed effects of self-adapted tests (SATs); meta-analysis procedures were used to estimate differences between SATs and computerized adaptive tests (CATs) in proficiency estimates and post-test anxiety levels across studies in which these two types of tests have been compared. After controlling for measurement error the results showed that SATs yielded proficiency estimates that were 0.12 standard deviation units higher and post-test anxiety levels that were 0.19 standard deviation units lower than those yielded by CATs. The authors speculate about possible reasons for these differences and discuss advantages and disadvantages of using SATs in operational settings. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Scores computerized adaptive testing, Test, Test Anxiety}, author = {Pitkin, A. K. and Vispoel, W. P.} } @article {9, title = {Evaluation of an MMPI-A short form: Implications for adaptive testing}, journal = {Journal of Personality Assessment}, volume = {76}, number = {1}, year = {2001}, pages = {76-89}, abstract = {Reports some psychometric properties of an MMPI-Adolescent version (MMPI-A; J. N. Butcher et al, 1992) short form based on administration of the 1st 150 items of this test instrument. The authors report results for both the MMPI-A normative sample of 1,620 adolescents (aged 14-18 yrs) and a clinical sample of 565 adolescents (mean age 15.2 yrs) in a variety of treatment settings. The authors summarize results for the MMPI-A basic scales in terms of Pearson product-moment correlations generated between full administration and short-form administration formats and mean T score elevations for the basic scales generated by each approach. In this investigation, the authors also examine single-scale and 2-point congruences found for the MMPI-A basic clinical scales as derived from standard and short-form administrations. The authors present the relative strengths and weaknesses of the MMPI-A short form and discuss the findings in terms of implications for attempts to shorten the item pool through the use of computerized adaptive assessment approaches. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Mean, Minnesota Multiphasic Personality Inventory, Psychometrics, Statistical Correlation, Statistical Samples, Test Forms}, author = {Archer, R. P. and Tirrell, C. A. and Elkins, D. E.} } @inbook {385, title = {Item response theory applied to combinations of multiple-choice and constructed-response items--approximation methods for scale scores}, booktitle = {Test scoring}, year = {2001}, note = {Using Smart Source ParsingTest scoring. (pp. 293-341). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xii, 422 pp}, pages = {289-315}, publisher = {Lawrence Erlbaum Associates}, organization = {Lawrence Erlbaum Associates}, chapter = {8}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) The authors develop approximate methods that replace the scoring tables with weighted linear combinations of the component scores. Topics discussed include: a linear approximation for the extension to combinations of scores; the generalization of two or more scores; potential applications of linear approximations to item response theory in computerized adaptive tests; and evaluation of the pattern-of-summed-scores, and Gaussian approximation, estimates of proficiency. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Item Response Theory, Method), Multiple Choice (Testing, Scoring (Testing), Statistical Estimation, Statistical Weighting, Test Items, Test Scores}, author = {Thissen, D. and Nelson, L. A. and Swygert, K. A.} } @article {188, title = {Item selection in computerized adaptive testing: Should more discriminating items be used first?}, journal = {Journal of Educational Measurement}, volume = {38}, number = {3}, year = {2001}, pages = {249-266}, abstract = {During computerized adaptive testing (CAT), items are selected continuously according to the test-taker{\textquoteright}s estimated ability. Test security has become a problem because high-discrimination items are more likely to be selected and become overexposed. So, there seems to be a tradeoff between high efficiency in ability estimations and balanced usage of items. This series of four studies addressed the dilemma by focusing on the notion of whether more or less discriminating items should be used first in CAT. The first study demonstrated that the common maximum information method with J. B. Sympson and R. D. Hetter (1985) control resulted in the use of more discriminating items first. The remaining studies showed that using items in the reverse order, as described in H. Chang and Z. Yings (1999) stratified method had potential advantages: (a) a more balanced item usage and (b) a relatively stable resultant item pool structure with easy and inexpensive management. This stratified method may have ability-estimation efficiency better than or close to that of other methods. It is argued that the judicious selection of items, as in the stratified method, is a more active control of item exposure. (PsycINFO Database Record (c) 2005 APA )}, keywords = {ability, Adaptive Testing, Computer Assisted Testing, Estimation, Statistical, Test Items computerized adaptive testing}, author = {Hau, Kit-Tai and Chang, Hua-Hua} } @article {279, title = {Nouveaux d{\'e}veloppements dans le domaine du testing informatis{\'e} [New developments in the area of computerized testing]}, journal = {Psychologie Fran{\c c}aise}, volume = {46}, number = {3}, year = {2001}, pages = {221-230}, abstract = {L{\textquoteright}usage de l{\textquoteright}{\'e}valuation assist{\'e}e par ordinateur s{\textquoteright}est fortement d{\'e}velopp{\'e} depuis la premi{\`e}re formulation de ses principes de base dans les ann{\'e}es soixante et soixante-dix. Cet article offre une introduction aux derniers d{\'e}veloppements dans le domaine de l{\textquoteright}{\'e}valuation assist{\'e}e par ordinateur, en particulier celui du testing adaptative informatis{\'e}e (TAI). L{\textquoteright}estimation de l{\textquoteright}aptitude, la s{\'e}lection des items et le d{\'e}veloppement d{\textquoteright}une base d{\textquoteright}items dans le cas du TAI sont discut{\'e}s. De plus, des exemples d{\textquoteright}utilisations innovantes de l{\textquoteright}ordinateur dans des syst{\`e}mes int{\'e}gr{\'e}s de testing et de testing via Internet sont pr{\'e}sent{\'e}s. L{\textquoteright}article se termine par quelques illustrations de nouvelles applications du testing informatis{\'e} et des suggestions pour des recherches futures.Discusses the latest developments in computerized psychological assessment, with emphasis on computerized adaptive testing (CAT). Ability estimation, item selection, and item pool development in CAT are described. Examples of some innovative approaches to CAT are presented. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Applications, Computer Assisted, Diagnosis, Psychological Assessment computerized adaptive testing}, author = {Meijer, R. R. and Gr{\'e}goire, J.} } @article {39, title = {Outlier measures and norming methods for computerized adaptive tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {26}, number = {1}, year = {2001}, pages = {85-104}, abstract = {Notes that the problem of identifying outliers has 2 important aspects: the choice of outlier measures and the method to assess the degree of outlyingness (norming) of those measures. Several classes of measures for identifying outliers in Computerized Adaptive Tests (CATs) are introduced. Some of these measures are constructed to take advantage of CATs{\textquoteright} sequential choice of items; other measures are taken directly from paper and pencil (P\&P) tests and are used for baseline comparisons. Assessing the degree of outlyingness of CAT responses, however, can not be applied directly from P\&P tests because stopping rules associated with CATs yield examinee responses of varying lengths. Standard outlier measures are highly correlated with the varying lengths which makes comparison across examinees impossible. Therefore, 4 methods are presented and compared which map outlier statistics to a familiar probability scale (a p value). The methods are explored in the context of CAT data from a 1995 Nationally Administered Computerized Examination (NACE). (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Statistical Analysis, Test Norms}, author = {Bradlow, E. T. and Weiss, R. E.} } @inbook {362, title = {Practical issues in setting standards on computerized adaptive tests}, booktitle = {Setting performance standards: Concepts, methods, and perspectives}, year = {2001}, note = {Using Smart Source ParsingSetting performance standards: Concepts, methods, and perspectives. (pp. 355-369). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xiii, 510 pp}, pages = {355-369}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) Examples of setting standards on computerized adaptive tests (CATs) are hard to find. Some examples of CATs involving performance standards include the registered nurse exam and the Novell systems engineer exam. Although CATs do not require separate standard setting-methods, there are special issues to be addressed by test specialist who set performance standards on CATs. Setting standards on a CAT will typical require modifications on the procedures used with more traditional, fixed-form, paper-and -pencil examinations. The purpose of this chapter is to illustrate why CATs pose special challenges to the standard setter. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Performance Tests, Testing Methods}, author = {Sireci, S. G. and Clauser, B. E.} } @article {197, title = {Toepassing van een computergestuurde adaptieve testprocedure op persoonlijkheidsdata [Application of a computerised adaptive test procedure on personality data]}, journal = {Nederlands Tijdschrift voor de Psychologie en haar Grensgebieden}, volume = {56}, number = {3}, year = {2001}, pages = {119-133}, abstract = {Studied the applicability of a computerized adaptive testing procedure to an existing personality questionnaire within the framework of item response theory. The procedure was applied to the scores of 1,143 male and female university students (mean age 21.8 yrs) in the Netherlands on the Neuroticism scale of the Amsterdam Biographical Questionnaire (G. J. Wilde, 1963). The graded response model (F. Samejima, 1969) was used. The quality of the adaptive test scores was measured based on their correlation with test scores for the entire item bank and on their correlation with scores on other scales from the personality test. The results indicate that computerized adaptive testing can be applied to personality scales. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Applications, Computer Assisted Testing, Personality Measures, Test Reliability computerized adaptive testing}, author = {Hol, A. M. and Vorst, H. C. M. and Mellenbergh, G. J.} } @article {70, title = {A comparison of item selection rules at the early stages of computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {24}, number = {3}, year = {2000}, pages = {241-255}, abstract = {The effects of 5 item selection rules--Fisher information (FI), Fisher interval information (FII), Fisher information with a posterior distribution (FIP), Kullback-Leibler information (KL), and Kullback-Leibler information with a posterior distribution (KLP)--were compared with respect to the efficiency and precision of trait (θ) estimation at the early stages of computerized adaptive testing (CAT). FII, FIP, KL, and KLP performed marginally better than FI at the early stages of CAT for θ=-3 and -2. For tests longer than 10 items, there appeared to be no precision advantage for any of the selection rules. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Analysis (Test), Statistical Estimation computerized adaptive testing}, author = {Chen, S-Y. and Ankenmann, R. D. and Chang, Hua-Hua} } @article {351, title = {Diagnostische programme in der Demenzfr{\"u}herkennung: Der Adaptive Figurenfolgen-Lerntest (ADAFI) [Diagnostic programs in the early detection of dementia: The Adaptive Figure Series Learning Test (ADAFI)]}, journal = {Zeitschrift f{\"u}r Gerontopsychologie \& -Psychiatrie}, volume = {13}, number = {1}, year = {2000}, pages = {16-29}, abstract = {Zusammenfassung: Untersucht wurde die Eignung des computergest{\"u}tzten Adaptiven Figurenfolgen-Lerntests (ADAFI), zwischen gesunden {\"a}lteren Menschen und {\"a}lteren Menschen mit erh{\"o}htem Demenzrisiko zu differenzieren. Der im ADAFI vorgelegte Aufgabentyp der fluiden Intelligenzdimension (logisches Auff{\"u}llen von Figurenfolgen) hat sich in mehreren Studien zur Erfassung des intellektuellen Leistungspotentials (kognitive Plastizit{\"a}t) {\"a}lterer Menschen als g{\"u}nstig f{\"u}r die genannte Differenzierung erwiesen. Aufgrund seiner Konzeption als Diagnostisches Programm f{\"a}ngt der ADAFI allerdings einige Kritikpunkte an Vorgehensweisen in diesen bisherigen Arbeiten auf. Es konnte gezeigt werden, a) da{\ss} mit dem ADAFI deutliche Lokationsunterschiede zwischen den beiden Gruppen darstellbar sind, b) da{\ss} mit diesem Verfahren eine gute Vorhersage des mentalen Gesundheitsstatus der Probanden auf Einzelfallebene gelingt (Sensitivit{\"a}t: 80 \%, Spezifit{\"a}t: 90 \%), und c) da{\ss} die Vorhersageleistung statusdiagnostischer Tests zur Informationsverarbeitungsgeschwindigkeit und zum Arbeitsged{\"a}chtnis geringer ist. Die Ergebnisse weisen darauf hin, da{\ss} die plastizit{\"a}tsorientierte Leistungserfassung mit dem ADAFI vielversprechend f{\"u}r die Fr{\"u}hdiagnostik dementieller Prozesse sein k{\"o}nnte.The aim of this study was to examine the ability of the computerized Adaptive Figure Series Learning Test (ADAFI) to differentiate among old subjects at risk for dementia and old healthy controls. Several studies on the subject of measuring the intellectual potential (cognitive plasticity) of old subjects have shown the usefulness of the fluid intelligence type of task used in the ADAFI (completion of figure series) for this differentiation. Because the ADAFI has been developed as a Diagnostic Program it is able to counter some critical issues in those studies. It was shown a) that distinct differences between both groups are revealed by the ADAFI, b) that the prediction of the cognitive health status of individual subjects is quite good (sensitivity: 80 \%, specifity: 90 \%), and c) that the prediction of the cognitive health status with tests of processing speed and working memory is worse than with the ADAFI. The results indicate that the ADAFI might be a promising plasticity-oriented tool for the measurement of cognitive decline in the elderly, and thus might be useful for the early detection of dementia.}, keywords = {Adaptive Testing, At Risk Populations, Computer Assisted Diagnosis, Dementia}, author = {Schreiber, M. D. and Schneider, R. J. and Schweizer, A. and Beckmann, J. F. and Baltissen, R.} } @article {74, title = {Estimation of trait level in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {24}, number = {3}, year = {2000}, pages = {257-265}, abstract = {Notes that in computerized adaptive testing (CAT), a examinee{\textquoteright}s trait level (θ) must be estimated with reasonable accuracy based on a small number of item responses. A successful implementation of CAT depends on (1) the accuracy of statistical methods used for estimating θ and (2) the efficiency of the item-selection criterion. Methods of estimating θ suitable for CAT are reviewed, and the differences between Fisher and Kullback-Leibler information criteria for selecting items are discussed. The accuracy of different CAT algorithms was examined in an empirical study. The results show that correcting θ estimates for bias was necessary at earlier stages of CAT, but most CAT algorithms performed equally well for tests of 10 or more items. (PsycINFO Database Record (c) 2005 APA )}, keywords = {(Statistical), Adaptive Testing, Computer Assisted Testing, Item Analysis, Statistical Estimation computerized adaptive testing}, author = {Cheng, P. E. and Liou, M.} } @article {41, title = {An examination of the reliability and validity of performance ratings made using computerized adaptive rating scales}, journal = {Dissertation Abstracts International: Section B: The Sciences and Engineering}, volume = {61}, number = {1-B}, year = {2000}, pages = {570}, abstract = {This study compared the psychometric properties of performance ratings made using recently-developed computerized adaptive rating scales (CARS) to the psyc hometric properties of ratings made using more traditional paper-and-pencil rati ng formats, i.e., behaviorally-anchored and graphic rating scales. Specifically, the reliability, validity and accuracy of the performance ratings from each for mat were examined. One hundred twelve participants viewed six 5-minute videotape s of office situations and rated the performance of a target person in each vide otape on three contextual performance dimensions-Personal Support, Organizationa l Support, and Conscientious Initiative-using CARS and either behaviorally-ancho red or graphic rating scales. Performance rating properties were measured using Shrout and Fleiss{\textquoteright}s intraclass correlation (2, 1), Borman{\textquoteright}s differential accurac y measure, and Cronbach{\textquoteright}s accuracy components as indexes of rating reliability, validity, and accuracy, respectively. Results found that performance ratings mad e using the CARS were significantly more reliable and valid than performance rat ings made using either of the other formats. Additionally, CARS yielded more acc urate performance ratings than the paper-and-pencil formats. The nature of the C ARS system (i.e., its adaptive nature and scaling methodology) and its paired co mparison judgment task are offered as possible reasons for the differences found in the psychometric properties of the performance ratings made using the variou s rating formats. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Performance Tests, Rating Scales, Reliability, Test, Test Validity}, author = {Buck, D. E.} } @article {317, title = {Overview of the computerized adaptive testing special section}, journal = {Psicol{\'o}gica}, volume = {21}, number = {1-2}, year = {2000}, pages = {115-120}, abstract = {This paper provides an overview of the five papers included in the Psicologica special section on computerized adaptive testing. A short introduction to this topic is presented as well. The main results, the links between the five papers and the general research topic to which they are more related are also shown. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computers computerized adaptive testing}, author = {Ponsoda, V.} } @article {292, title = {The distribution of indexes of person fit within the computerized adaptive testing environment}, journal = {Applied Psychological Measurement}, volume = {21}, number = {2}, year = {1997}, note = {Journal; Peer Reviewed Journal}, pages = {115-127}, abstract = {The extent to which a trait estimate represents the underlying latent trait of interest can be estimated by using indexes of person fit. Several statistical methods for indexing person fit have been proposed to identify nonmodel-fitting response vectors. These person-fit indexes have generally been found to follow a standard normal distribution for conventionally administered tests. The present investigation found that within the context of computerized adaptive testing (CAT) these indexes tended not to follow a standard normal distribution. As the item pool became less discriminating, as the CAT termination criterion became less stringent, and as the number of items in the pool decreased, the distributions of the indexes approached a standard normal distribution. It was determined that under these conditions the indexes{\textquoteright} distributions approached standard normal distributions because more items were being administered. However, even when over 50 items were administered in a CAT the indexes were distributed in a fashion that was different from what was expected. (PsycINFO Database Record (c) 2006 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Fit, Person Environment}, author = {Nering, M. L.} }