@article {46, title = {Development and validation of patient-reported outcome measures for sleep disturbance and sleep-related impairments}, journal = {Sleep}, volume = {33}, number = {6}, year = {2010}, note = {Buysse, Daniel JYu, LanMoul, Douglas EGermain, AnneStover, AngelaDodds, Nathan EJohnston, Kelly LShablesky-Cade, Melissa APilkonis, Paul AAR052155/AR/NIAMS NIH HHS/United StatesU01AR52155/AR/NIAMS NIH HHS/United StatesU01AR52158/AR/NIAMS NIH HHS/United StatesU01AR52170/AR/NIAMS NIH HHS/United StatesU01AR52171/AR/NIAMS NIH HHS/United StatesU01AR52177/AR/NIAMS NIH HHS/United StatesU01AR52181/AR/NIAMS NIH HHS/United StatesU01AR52186/AR/NIAMS NIH HHS/United StatesResearch Support, N.I.H., ExtramuralValidation StudiesUnited StatesSleepSleep. 2010 Jun 1;33(6):781-92.}, month = {Jun 1}, pages = {781-92}, edition = {2010/06/17}, abstract = {STUDY OBJECTIVES: To develop an archive of self-report questions assessing sleep disturbance and sleep-related impairments (SRI), to develop item banks from this archive, and to validate and calibrate the item banks using classic validation techniques and item response theory analyses in a sample of clinical and community participants. DESIGN: Cross-sectional self-report study. SETTING: Academic medical center and participant homes. PARTICIPANTS: One thousand nine hundred ninety-three adults recruited from an Internet polling sample and 259 adults recruited from medical, psychiatric, and sleep clinics. INTERVENTIONS: None. MEASUREMENTS AND RESULTS: This study was part of PROMIS (Patient-Reported Outcomes Information System), a National Institutes of Health Roadmap initiative. Self-report item banks were developed through an iterative process of literature searches, collecting and sorting items, expert content review, qualitative patient research, and pilot testing. Internal consistency, convergent validity, and exploratory and confirmatory factor analysis were examined in the resulting item banks. Factor analyses identified 2 preliminary item banks, sleep disturbance and SRI. Item response theory analyses and expert content review narrowed the item banks to 27 and 16 items, respectively. Validity of the item banks was supported by moderate to high correlations with existing scales and by significant differences in sleep disturbance and SRI scores between participants with and without sleep disorders. CONCLUSIONS: The PROMIS sleep disturbance and SRI item banks have excellent measurement properties and may prove to be useful for assessing general aspects of sleep and SRI with various groups of patients and interventions.}, keywords = {*Outcome Assessment (Health Care), *Self Disclosure, Adult, Aged, Aged, 80 and over, Cross-Sectional Studies, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Psychometrics, Questionnaires, Reproducibility of Results, Sleep Disorders/*diagnosis, Young Adult}, isbn = {0161-8105 (Print)0161-8105 (Linking)}, author = {Buysse, D. J. and Yu, L. and Moul, D. E. and Germain, A. and Stover, A. and Dodds, N. E. and Johnston, K. L. and Shablesky-Cade, M. A. and Pilkonis, P. A.} } @article {138, title = {Development of an item bank for the assessment of depression in persons with mental illnesses and physical diseases using Rasch analysis}, journal = {Rehabilitation Psychology}, volume = {54}, number = {2}, year = {2009}, note = {Forkmann, ThomasBoecker, MarenNorra, ChristineEberle, NicoleKircher, TiloSchauerte, PatrickMischke, KarlWesthofen, MartinGauggel, SiegfriedWirtz, MarkusResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesRehabilitation psychologyRehabil Psychol. 2009 May;54(2):186-97.}, month = {May}, pages = {186-97}, edition = {2009/05/28}, abstract = {OBJECTIVE: The calibration of item banks provides the basis for computerized adaptive testing that ensures high diagnostic precision and minimizes participants{\textquoteright} test burden. The present study aimed at developing a new item bank that allows for assessing depression in persons with mental and persons with somatic diseases. METHOD: The sample consisted of 161 participants treated for a depressive syndrome, and 206 participants with somatic illnesses (103 cardiologic, 103 otorhinolaryngologic; overall mean age = 44.1 years, SD =14.0; 44.7\% women) to allow for validation of the item bank in both groups. Persons answered a pool of 182 depression items on a 5-point Likert scale. RESULTS: Evaluation of Rasch model fit (infit < 1.3), differential item functioning, dimensionality, local independence, item spread, item and person separation (>2.0), and reliability (>.80) resulted in a bank of 79 items with good psychometric properties. CONCLUSIONS: The bank provides items with a wide range of content coverage and may serve as a sound basis for computerized adaptive testing applications. It might also be useful for researchers who wish to develop new fixed-length scales for the assessment of depression in specific rehabilitation settings.}, keywords = {Adaptation, Psychological, Adult, Aged, Depressive Disorder/*diagnosis/psychology, Diagnosis, Computer-Assisted, Female, Heart Diseases/*psychology, Humans, Male, Mental Disorders/*psychology, Middle Aged, Models, Statistical, Otorhinolaryngologic Diseases/*psychology, Personality Assessment/statistics \& numerical data, Personality Inventory/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Questionnaires, Reproducibility of Results, Sick Role}, isbn = {0090-5550 (Print)0090-5550 (Linking)}, author = {Forkmann, T. and Boecker, M. and Norra, C. and Eberle, N. and Kircher, T. and Schauerte, P. and Mischke, K. and Westhofen, M. and Gauggel, S. and Wirtz, M.} } @article {227, title = {An evaluation of patient-reported outcomes found computerized adaptive testing was efficient in assessing stress perception}, journal = {Journal of Clinical Epidemiology}, volume = {62}, number = {3}, year = {2009}, note = {Kocalevent, Ruya-DanielaRose, MatthiasBecker, JanineWalter, Otto BFliege, HerbertBjorner, Jakob BKleiber, DieterKlapp, Burghard FEvaluation StudiesUnited StatesJournal of clinical epidemiologyJ Clin Epidemiol. 2009 Mar;62(3):278-87, 287.e1-3. Epub 2008 Jul 18.}, pages = {278-287}, edition = {2008/07/22}, abstract = {OBJECTIVES: This study aimed to develop and evaluate a first computerized adaptive test (CAT) for the measurement of stress perception (Stress-CAT), in terms of the two dimensions: exposure to stress and stress reaction. STUDY DESIGN AND SETTING: Item response theory modeling was performed using a two-parameter model (Generalized Partial Credit Model). The evaluation of the Stress-CAT comprised a simulation study and real clinical application. A total of 1,092 psychosomatic patients (N1) were studied. Two hundred simulees (N2) were generated for a simulated response data set. Then the Stress-CAT was given to n=116 inpatients, (N3) together with established stress questionnaires as validity criteria. RESULTS: The final banks included n=38 stress exposure items and n=31 stress reaction items. In the first simulation study, CAT scores could be estimated with a high measurement precision (SE<0.32; rho>0.90) using 7.0+/-2.3 (M+/-SD) stress reaction items and 11.6+/-1.7 stress exposure items. The second simulation study reanalyzed real patients data (N1) and showed an average use of items of 5.6+/-2.1 for the dimension stress reaction and 10.0+/-4.9 for the dimension stress exposure. Convergent validity showed significantly high correlations. CONCLUSIONS: The Stress-CAT is short and precise, potentially lowering the response burden of patients in clinical decision making.}, keywords = {*Diagnosis, Computer-Assisted, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Female, Humans, Male, Middle Aged, Perception, Quality of Health Care/*standards, Questionnaires, Reproducibility of Results, Sickness Impact Profile, Stress, Psychological/*diagnosis/psychology, Treatment Outcome}, isbn = {1878-5921 (Electronic)0895-4356 (Linking)}, author = {Kocalevent, R. D. and Rose, M. and Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kleiber, D. and Klapp, B. F.} } @article {77, title = {The maximum priority index method for severely constrained item selection in computerized adaptive testing}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {62}, number = {2}, year = {2009}, note = {Cheng, YingChang, Hua-HuaResearch Support, Non-U.S. Gov{\textquoteright}tEnglandThe British journal of mathematical and statistical psychologyBr J Math Stat Psychol. 2009 May;62(Pt 2):369-83. Epub 2008 Jun 2.}, month = {May}, pages = {369-83}, edition = {2008/06/07}, abstract = {This paper introduces a new heuristic approach, the maximum priority index (MPI) method, for severely constrained item selection in computerized adaptive testing. Our simulation study shows that it is able to accommodate various non-statistical constraints simultaneously, such as content balancing, exposure control, answer key balancing, and so on. Compared with the weighted deviation modelling method, it leads to fewer constraint violations and better exposure control while maintaining the same level of measurement precision.}, keywords = {Aptitude Tests/*statistics \& numerical data, Diagnosis, Computer-Assisted/*statistics \& numerical data, Educational Measurement/*statistics \& numerical data, Humans, Mathematical Computing, Models, Statistical, Personality Tests/*statistics \& numerical data, Psychometrics/*statistics \& numerical data, Reproducibility of Results, Software}, isbn = {0007-1102 (Print)0007-1102 (Linking)}, author = {Cheng, Y and Chang, Hua-Hua} } @article {143, title = {Progress in assessing physical function in arthritis: PROMIS short forms and computerized adaptive testing}, journal = {Journal of Rheumatology}, volume = {36}, number = {9}, year = {2009}, note = {Fries, James FCella, DavidRose, MatthiasKrishnan, EswarBruce, BonnieU01 AR052158/AR/NIAMS NIH HHS/United StatesU01 AR52177/AR/NIAMS NIH HHS/United StatesConsensus Development ConferenceResearch Support, N.I.H., ExtramuralCanadaThe Journal of rheumatologyJ Rheumatol. 2009 Sep;36(9):2061-6.}, month = {Sep}, pages = {2061-2066}, edition = {2009/09/10}, abstract = {OBJECTIVE: Assessing self-reported physical function/disability with the Health Assessment Questionnaire Disability Index (HAQ) and other instruments has become central in arthritis research. Item response theory (IRT) and computerized adaptive testing (CAT) techniques can increase reliability and statistical power. IRT-based instruments can improve measurement precision substantially over a wider range of disease severity. These modern methods were applied and the magnitude of improvement was estimated. METHODS: A 199-item physical function/disability item bank was developed by distilling 1865 items to 124, including Legacy Health Assessment Questionnaire (HAQ) and Physical Function-10 items, and improving precision through qualitative and quantitative evaluation in over 21,000 subjects, which included about 1500 patients with rheumatoid arthritis and osteoarthritis. Four new instruments, (A) Patient-Reported Outcomes Measurement Information (PROMIS) HAQ, which evolved from the original (Legacy) HAQ; (B) "best" PROMIS 10; (C) 20-item static (short) forms; and (D) simulated PROMIS CAT, which sequentially selected the most informative item, were compared with the HAQ. RESULTS: Online and mailed administration modes yielded similar item and domain scores. The HAQ and PROMIS HAQ 20-item scales yielded greater information content versus other scales in patients with more severe disease. The "best" PROMIS 20-item scale outperformed the other 20-item static forms over a broad range of 4 standard deviations. The 10-item simulated PROMIS CAT outperformed all other forms. CONCLUSION: Improved items and instruments yielded better information. The PROMIS HAQ is currently available and considered validated. The new PROMIS short forms, after validation, are likely to represent further improvement. CAT-based physical function/disability assessment offers superior performance over static forms of equal length.}, keywords = {*Disability Evaluation, *Outcome Assessment (Health Care), Arthritis/diagnosis/*physiopathology, Health Surveys, Humans, Prognosis, Reproducibility of Results}, isbn = {0315-162X (Print)0315-162X (Linking)}, author = {Fries, J.F. and Cella, D. and Rose, M. and Krishnan, E. and Bruce, B.} } @article {78, title = {Reduction in patient burdens with graphical computerized adaptive testing on the ADL scale: tool development and simulation}, journal = {Health and Quality of Life Outcomes}, volume = {7}, year = {2009}, note = {Chien, Tsair-WeiWu, Hing-ManWang, Weng-ChungCastillo, Roberto VasquezChou, WillyComparative StudyValidation StudiesEnglandHealth and quality of life outcomesHealth Qual Life Outcomes. 2009 May 5;7:39.}, pages = {39}, edition = {2009/05/07}, abstract = {BACKGROUND: The aim of this study was to verify the effectiveness and efficacy of saving time and reducing burden for patients, nurses, and even occupational therapists through computer adaptive testing (CAT). METHODS: Based on an item bank of the Barthel Index (BI) and the Frenchay Activities Index (FAI) for assessing comprehensive activities of daily living (ADL) function in stroke patients, we developed a visual basic application (VBA)-Excel CAT module, and (1) investigated whether the averaged test length via CAT is shorter than that of the traditional all-item-answered non-adaptive testing (NAT) approach through simulation, (2) illustrated the CAT multimedia on a tablet PC showing data collection and response errors of ADL clinical functional measures in stroke patients, and (3) demonstrated the quality control of endorsing scale with fit statistics to detect responding errors, which will be further immediately reconfirmed by technicians once patient ends the CAT assessment. RESULTS: The results show that endorsed items could be shorter on CAT (M = 13.42) than on NAT (M = 23) at 41.64\% efficiency in test length. However, averaged ability estimations reveal insignificant differences between CAT and NAT. CONCLUSION: This study found that mobile nursing services, placed at the bedsides of patients could, through the programmed VBA-Excel CAT module, reduce the burden to patients and save time, more so than the traditional NAT paper-and-pencil testing appraisals.}, keywords = {*Activities of Daily Living, *Computer Graphics, *Computer Simulation, *Diagnosis, Computer-Assisted, Female, Humans, Male, Point-of-Care Systems, Reproducibility of Results, Stroke/*rehabilitation, Taiwan, United States}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Chien, T. W. and Wu, H. M. and Wang, W-C. and Castillo, R. V. and Chou, W.} } @article {88, title = {Assessing self-care and social function using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {4}, year = {2008}, note = {Coster, Wendy JHaley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesR41 HD052318-01A1/HD/NICHD NIH HHS/United StatesR43 HD42388-01/HD/NICHD NIH HHS/United StatesComparative StudyResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Apr;89(4):622-9.}, month = {Apr}, pages = {622-629}, edition = {2008/04/01}, abstract = {OBJECTIVE: To examine score agreement, validity, precision, and response burden of a prototype computer adaptive testing (CAT) version of the self-care and social function scales of the Pediatric Evaluation of Disability Inventory compared with the full-length version of these scales. DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics; community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Children with disabilities (n=469) and 412 children with no disabilities (analytic sample); 38 children with disabilities and 35 children without disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from prototype CAT applications of each scale using 15-, 10-, and 5-item stopping rules; scores from the full-length self-care and social function scales; time (in seconds) to complete assessments and respondent ratings of burden. RESULTS: Scores from both computer simulations and field administration of the prototype CATs were highly consistent with scores from full-length administration (r range, .94-.99). Using computer simulation of retrospective data, discriminant validity, and sensitivity to change of the CATs closely approximated that of the full-length scales, especially when the 15- and 10-item stopping rules were applied. In the cross-validation study the time to administer both CATs was 4 minutes, compared with over 16 minutes to complete the full-length scales. CONCLUSIONS: Self-care and social function score estimates from CAT administration are highly comparable with those obtained from full-length scale administration, with small losses in validity and precision and substantial decreases in administration time.}, keywords = {*Disability Evaluation, *Social Adjustment, Activities of Daily Living, Adolescent, Age Factors, Child, Child, Preschool, Computer Simulation, Cross-Over Studies, Disabled Children/*rehabilitation, Female, Follow-Up Studies, Humans, Infant, Male, Outcome Assessment (Health Care), Reference Values, Reproducibility of Results, Retrospective Studies, Risk Factors, Self Care/*standards/trends, Sex Factors, Sickness Impact Profile}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Coster, W. J. and Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @article {231, title = {Computerized adaptive testing in back pain: Validation of the CAT-5D-QOL}, journal = {Spine}, volume = {33}, number = {12}, year = {2008}, note = {Kopec, Jacek ABadii, MaziarMcKenna, MarioLima, Viviane DSayre, Eric CDvorak, MarcelResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesUnited StatesSpineSpine (Phila Pa 1976). 2008 May 20;33(12):1384-90.}, month = {May 20}, pages = {1384-90}, edition = {2008/05/23}, abstract = {STUDY DESIGN: We have conducted an outcome instrument validation study. OBJECTIVE: Our objective was to develop a computerized adaptive test (CAT) to measure 5 domains of health-related quality of life (HRQL) and assess its feasibility, reliability, validity, and efficiency. SUMMARY OF BACKGROUND DATA: Kopec and colleagues have recently developed item response theory based item banks for 5 domains of HRQL relevant to back pain and suitable for CAT applications. The domains are Daily Activities (DAILY), Walking (WALK), Handling Objects (HAND), Pain or Discomfort (PAIN), and Feelings (FEEL). METHODS: An adaptive algorithm was implemented in a web-based questionnaire administration system. The questionnaire included CAT-5D-QOL (5 scales), Modified Oswestry Disability Index (MODI), Roland-Morris Disability Questionnaire (RMDQ), SF-36 Health Survey, and standard clinical and demographic information. Participants were outpatients treated for mechanical back pain at a referral center in Vancouver, Canada. RESULTS: A total of 215 patients completed the questionnaire and 84 completed a retest. On average, patients answered 5.2 items per CAT-5D-QOL scale. Reliability ranged from 0.83 (FEEL) to 0.92 (PAIN) and was 0.92 for the MODI, RMDQ, and Physical Component Summary (PCS-36). The ceiling effect was 0.5\% for PAIN compared with 2\% for MODI and 5\% for RMQ. The CAT-5D-QOL scales correlated as anticipated with other measures of HRQL and discriminated well according to the level of satisfaction with current symptoms, duration of the last episode, sciatica, and disability compensation. The average relative discrimination index was 0.87 for PAIN, 0.67 for DAILY and 0.62 for WALK, compared with 0.89 for MODI, 0.80 for RMDQ, and 0.59 for PCS-36. CONCLUSION: The CAT-5D-QOL is feasible, reliable, valid, and efficient in patients with back pain. This methodology can be recommended for use in back pain research and should improve outcome assessment, facilitate comparisons across studies, and reduce patient burden.}, keywords = {*Disability Evaluation, *Health Status Indicators, *Quality of Life, Adult, Aged, Algorithms, Back Pain/*diagnosis/psychology, British Columbia, Diagnosis, Computer-Assisted/*standards, Feasibility Studies, Female, Humans, Internet, Male, Middle Aged, Predictive Value of Tests, Questionnaires/*standards, Reproducibility of Results}, isbn = {1528-1159 (Electronic)0362-2436 (Linking)}, author = {Kopec, J. A. and Badii, M. and McKenna, M. and Lima, V. D. and Sayre, E. C. and Dvorak, M.} } @article {5, title = {Efficiency and sensitivity of multidimensional computerized adaptive testing of pediatric physical functioning}, journal = {Disability \& Rehabilitation}, volume = {30}, number = {6}, year = {2008}, note = {Allen, Diane DNi, PengshengHaley, Stephen MK02 HD45354-01/HD/NICHD NIH HHS/United StatesNIDDR H133P0001/DD/NCBDD CDC HHS/United StatesResearch Support, N.I.H., ExtramuralEnglandDisability and rehabilitationDisabil Rehabil. 2008;30(6):479-84.}, pages = {479-84}, edition = {2008/02/26}, abstract = {PURPOSE: Computerized adaptive tests (CATs) have efficiency advantages over fixed-length tests of physical functioning but may lose sensitivity when administering extremely low numbers of items. Multidimensional CATs may efficiently improve sensitivity by capitalizing on correlations between functional domains. Using a series of empirical simulations, we assessed the efficiency and sensitivity of multidimensional CATs compared to a longer fixed-length test. METHOD: Parent responses to the Pediatric Evaluation of Disability Inventory before and after intervention for 239 children at a pediatric rehabilitation hospital provided the data for this retrospective study. Reliability, effect size, and standardized response mean were compared between full-length self-care and mobility subscales and simulated multidimensional CATs with stopping rules at 40, 30, 20, and 10 items. RESULTS: Reliability was lowest in the 10-item CAT condition for the self-care (r = 0.85) and mobility (r = 0.79) subscales; all other conditions had high reliabilities (r > 0.94). All multidimensional CAT conditions had equivalent levels of sensitivity compared to the full set condition for both domains. CONCLUSIONS: Multidimensional CATs efficiently retain the sensitivity of longer fixed-length measures even with 5 items per dimension (10-item CAT condition). Measuring physical functioning with multidimensional CATs could enhance sensitivity following intervention while minimizing response burden.}, keywords = {*Disability Evaluation, Child, Computers, Disabled Children/*classification/rehabilitation, Efficiency, Humans, Outcome Assessment (Health Care), Psychometrics, Reproducibility of Results, Retrospective Studies, Self Care, Sensitivity and Specificity}, isbn = {0963-8288 (Print)0963-8288 (Linking)}, author = {Allen, D. D. and Ni, P. and Haley, S. M.} } @article {84, title = {Letting the CAT out of the bag: Comparing computer adaptive tests and an 11-item short form of the Roland-Morris Disability Questionnaire}, journal = {Spine}, volume = {33}, number = {12}, year = {2008}, note = {Cook, Karon FChoi, Seung WCrane, Paul KDeyo, Richard AJohnson, Kurt LAmtmann, Dagmar5 P60-AR48093/AR/United States NIAMS5U01AR052171-03/AR/United States NIAMSComparative StudyResearch Support, N.I.H., ExtramuralUnited StatesSpineSpine. 2008 May 20;33(12):1378-83.}, month = {May 20}, pages = {1378-83}, edition = {2008/05/23}, abstract = {STUDY DESIGN: A post hoc simulation of a computer adaptive administration of the items of a modified version of the Roland-Morris Disability Questionnaire. OBJECTIVE: To evaluate the effectiveness of adaptive administration of back pain-related disability items compared with a fixed 11-item short form. SUMMARY OF BACKGROUND DATA: Short form versions of the Roland-Morris Disability Questionnaire have been developed. An alternative to paper-and-pencil short forms is to administer items adaptively so that items are presented based on a person{\textquoteright}s responses to previous items. Theoretically, this allows precise estimation of back pain disability with administration of only a few items. MATERIALS AND METHODS: Data were gathered from 2 previously conducted studies of persons with back pain. An item response theory model was used to calibrate scores based on all items, items of a paper-and-pencil short form, and several computer adaptive tests (CATs). RESULTS: Correlations between each CAT condition and scores based on a 23-item version of the Roland-Morris Disability Questionnaire ranged from 0.93 to 0.98. Compared with an 11-item short form, an 11-item CAT produced scores that were significantly more highly correlated with scores based on the 23-item scale. CATs with even fewer items also produced scores that were highly correlated with scores based on all items. For example, scores from a 5-item CAT had a correlation of 0.93 with full scale scores. Seven- and 9-item CATs correlated at 0.95 and 0.97, respectively. A CAT with a standard-error-based stopping rule produced scores that correlated at 0.95 with full scale scores. CONCLUSION: A CAT-based back pain-related disability measure may be a valuable tool for use in clinical and research contexts. Use of CAT for other common measures in back pain research, such as other functional scales or measures of psychological distress, may offer similar advantages.}, keywords = {*Disability Evaluation, *Health Status Indicators, Adult, Aged, Aged, 80 and over, Back Pain/*diagnosis/psychology, Calibration, Computer Simulation, Diagnosis, Computer-Assisted/*standards, Humans, Middle Aged, Models, Psychological, Predictive Value of Tests, Questionnaires/*standards, Reproducibility of Results}, isbn = {1528-1159 (Electronic)}, author = {Cook, K. F. and Choi, S. W. and Crane, P. K. and Deyo, R. A. and Johnson, K. L. and Amtmann, D.} } @article {287, title = {Measuring physical functioning in children with spinal impairments with computerized adaptive testing}, journal = {Journal of Pediatric Orthopedics}, volume = {28}, number = {3}, year = {2008}, note = {Mulcahey, M JHaley, Stephen MDuffy, TheresaPengsheng, NiBetz, Randal RK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesUnited StatesJournal of pediatric orthopedicsJ Pediatr Orthop. 2008 Apr-May;28(3):330-5.}, month = {Apr-May}, pages = {330-5}, edition = {2008/03/26}, abstract = {BACKGROUND: The purpose of this study was to assess the utility of measuring current physical functioning status of children with scoliosis and kyphosis by applying computerized adaptive testing (CAT) methods. Computerized adaptive testing uses a computer interface to administer the most optimal items based on previous responses, reducing the number of items needed to obtain a scoring estimate. METHODS: This was a prospective study of 77 subjects (0.6-19.8 years) who were seen by a spine surgeon during a routine clinic visit for progress spine deformity. Using a multidimensional version of the Pediatric Evaluation of Disability Inventory CAT program (PEDI-MCAT), we evaluated content range, accuracy and efficiency, known-group validity, concurrent validity with the Pediatric Outcomes Data Collection Instrument, and test-retest reliability in a subsample (n = 16) within a 2-week interval. RESULTS: We found the PEDI-MCAT to have sufficient item coverage in both self-care and mobility content for this sample, although most patients tended to score at the higher ends of both scales. Both the accuracy of PEDI-MCAT scores as compared with a fixed format of the PEDI (r = 0.98 for both mobility and self-care) and test-retest reliability were very high [self-care: intraclass correlation (3,1) = 0.98, mobility: intraclass correlation (3,1) = 0.99]. The PEDI-MCAT took an average of 2.9 minutes for the parents to complete. The PEDI-MCAT detected expected differences between patient groups, and scores on the PEDI-MCAT correlated in expected directions with scores from the Pediatric Outcomes Data Collection Instrument domains. CONCLUSIONS: Use of the PEDI-MCAT to assess the physical functioning status, as perceived by parents of children with complex spinal impairments, seems to be feasible and achieves accurate and efficient estimates of self-care and mobility function. Additional item development will be needed at the higher functioning end of the scale to avoid ceiling effects for older children. LEVEL OF EVIDENCE: This is a level II prospective study designed to establish the utility of computer adaptive testing as an evaluation method in a busy pediatric spine practice.}, keywords = {*Disability Evaluation, Adolescent, Child, Child, Preschool, Computer Simulation, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Kyphosis/*diagnosis/rehabilitation, Male, Prospective Studies, Reproducibility of Results, Scoliosis/*diagnosis/rehabilitation}, isbn = {0271-6798 (Print)0271-6798 (Linking)}, author = {Mulcahey, M. J. and Haley, S. M. and Duffy, T. and Pengsheng, N. and Betz, R. R.} } @article {71, title = {Predicting item exposure parameters in computerized adaptive testing}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {61}, number = {1}, year = {2008}, note = {Chen, Shu-YingDoong, Shing-HwangResearch Support, Non-U.S. Gov{\textquoteright}tEnglandThe British journal of mathematical and statistical psychologyBr J Math Stat Psychol. 2008 May;61(Pt 1):75-91.}, month = {May}, pages = {75-91}, edition = {2008/05/17}, abstract = {The purpose of this study is to find a formula that describes the relationship between item exposure parameters and item parameters in computerized adaptive tests by using genetic programming (GP) - a biologically inspired artificial intelligence technique. Based on the formula, item exposure parameters for new parallel item pools can be predicted without conducting additional iterative simulations. Results show that an interesting formula between item exposure parameters and item parameters in a pool can be found by using GP. The item exposure parameters predicted based on the found formula were close to those observed from the Sympson and Hetter (1985) procedure and performed well in controlling item exposure rates. Similar results were observed for the Stocking and Lewis (1998) multinomial model for item selection and the Sympson and Hetter procedure with content balancing. The proposed GP approach has provided a knowledge-based solution for finding item exposure parameters.}, keywords = {*Algorithms, *Artificial Intelligence, Aptitude Tests/*statistics \& numerical data, Diagnosis, Computer-Assisted/*statistics \& numerical data, Humans, Models, Statistical, Psychometrics/statistics \& numerical data, Reproducibility of Results, Software}, isbn = {0007-1102 (Print)0007-1102 (Linking)}, author = {Chen, S-Y. and Doong, S. H.} } @article {152, title = {Using computerized adaptive testing to reduce the burden of mental health assessment}, journal = {Psychiatric Services}, volume = {59}, number = {4}, year = {2008}, note = {Gibbons, Robert DWeiss, David JKupfer, David JFrank, EllenFagiolini, AndreaGrochocinski, Victoria JBhaumik, Dulal KStover, AngelaBock, R DarrellImmekus, Jason CR01-MH-30915/MH/United States NIMHR01-MH-66302/MH/United States NIMHResearch Support, N.I.H., ExtramuralUnited StatesPsychiatric services (Washington, D.C.)Psychiatr Serv. 2008 Apr;59(4):361-8.}, month = {Apr}, pages = {361-8}, edition = {2008/04/02}, abstract = {OBJECTIVE: This study investigated the combination of item response theory and computerized adaptive testing (CAT) for psychiatric measurement as a means of reducing the burden of research and clinical assessments. METHODS: Data were from 800 participants in outpatient treatment for a mood or anxiety disorder; they completed 616 items of the 626-item Mood and Anxiety Spectrum Scales (MASS) at two times. The first administration was used to design and evaluate a CAT version of the MASS by using post hoc simulation. The second confirmed the functioning of CAT in live testing. RESULTS: Tests of competing models based on item response theory supported the scale{\textquoteright}s bifactor structure, consisting of a primary dimension and four group factors (mood, panic-agoraphobia, obsessive-compulsive, and social phobia). Both simulated and live CAT showed a 95\% average reduction (585 items) in items administered (24 and 30 items, respectively) compared with administration of the full MASS. The correlation between scores on the full MASS and the CAT version was .93. For the mood disorder subscale, differences in scores between two groups of depressed patients--one with bipolar disorder and one without--on the full scale and on the CAT showed effect sizes of .63 (p<.003) and 1.19 (p<.001) standard deviation units, respectively, indicating better discriminant validity for CAT. CONCLUSIONS: Instead of using small fixed-length tests, clinicians can create item banks with a large item pool, and a small set of the items most relevant for a given individual can be administered with no loss of information, yielding a dramatic reduction in administration time and patient and clinician burden.}, keywords = {*Diagnosis, Computer-Assisted, *Questionnaires, Adolescent, Adult, Aged, Agoraphobia/diagnosis, Anxiety Disorders/diagnosis, Bipolar Disorder/diagnosis, Female, Humans, Male, Mental Disorders/*diagnosis, Middle Aged, Mood Disorders/diagnosis, Obsessive-Compulsive Disorder/diagnosis, Panic Disorder/diagnosis, Phobic Disorders/diagnosis, Reproducibility of Results, Time Factors}, isbn = {1075-2730 (Print)}, author = {Gibbons, R. D. and Weiss, D. J. and Kupfer, D. J. and Frank, E. and Fagiolini, A. and Grochocinski, V. J. and Bhaumik, D. K. and Stover, A. and Bock, R. D. and Immekus, J. C.} } @article {135, title = {Computerized adaptive personality testing: A review and illustration with the MMPI-2 Computerized Adaptive Version}, journal = {Psychological Assessment}, volume = {19}, number = {1}, year = {2007}, note = {Forbey, Johnathan DBen-Porath, Yossef SResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesPsychological assessmentPsychol Assess. 2007 Mar;19(1):14-24.}, month = {Mar}, pages = {14-24}, edition = {2007/03/21}, abstract = {Computerized adaptive testing in personality assessment can improve efficiency by significantly reducing the number of items administered to answer an assessment question. Two approaches have been explored for adaptive testing in computerized personality assessment: item response theory and the countdown method. In this article, the authors review the literature on each and report the results of an investigation designed to explore the utility, in terms of item and time savings, and validity, in terms of correlations with external criterion measures, of an expanded countdown method-based research version of the Minnesota Multiphasic Personality Inventory-2 (MMPI-2), the MMPI-2 Computerized Adaptive Version (MMPI-2-CA). Participants were 433 undergraduate college students (170 men and 263 women). Results indicated considerable item savings and corresponding time savings for the adaptive testing modalities compared with a conventional computerized MMPI-2 administration. Furthermore, computerized adaptive administration yielded comparable results to computerized conventional administration of the MMPI-2 in terms of both test scores and their validity. Future directions for computerized adaptive personality testing are discussed.}, keywords = {Adolescent, Adult, Diagnosis, Computer-Assisted/*statistics \& numerical data, Female, Humans, Male, MMPI/*statistics \& numerical data, Personality Assessment/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Reference Values, Reproducibility of Results}, isbn = {1040-3590 (Print)}, author = {Forbey, J. D. and Ben-Porath, Y. S.} } @article {172, title = {Computer adaptive testing improved accuracy and precision of scores over random item selection in a physical functioning item bank}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {11}, year = {2006}, note = {Haley, Stephen MNi, PengshengHambleton, Ronald KSlavin, Mary DJette, Alan MK02 hd45354-01/hd/nichdR01 hd043568/hd/nichdComparative StudyResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.EnglandJournal of clinical epidemiologyJ Clin Epidemiol. 2006 Nov;59(11):1174-82. Epub 2006 Jul 11.}, month = {Nov}, pages = {1174-82}, edition = {2006/10/10}, abstract = {BACKGROUND AND OBJECTIVE: Measuring physical functioning (PF) within and across postacute settings is critical for monitoring outcomes of rehabilitation; however, most current instruments lack sufficient breadth and feasibility for widespread use. Computer adaptive testing (CAT), in which item selection is tailored to the individual patient, holds promise for reducing response burden, yet maintaining measurement precision. We calibrated a PF item bank via item response theory (IRT), administered items with a post hoc CAT design, and determined whether CAT would improve accuracy and precision of score estimates over random item selection. METHODS: 1,041 adults were interviewed during postacute care rehabilitation episodes in either hospital or community settings. Responses for 124 PF items were calibrated using IRT methods to create a PF item bank. We examined the accuracy and precision of CAT-based scores compared to a random selection of items. RESULTS: CAT-based scores had higher correlations with the IRT-criterion scores, especially with short tests, and resulted in narrower confidence intervals than scores based on a random selection of items; gains, as expected, were especially large for low and high performing adults. CONCLUSION: The CAT design may have important precision and efficiency advantages for point-of-care functional assessment in rehabilitation practice settings.}, keywords = {*Recovery of Function, Activities of Daily Living, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Rehabilitation/*standards, Reproducibility of Results, Software}, isbn = {0895-4356 (Print)}, author = {Haley, S. M. and Ni, P. and Hambleton, R. K. and Slavin, M. D. and Jette, A. M.} } @article {184, title = {Simulated computerized adaptive test for patients with shoulder impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {3}, year = {2006}, note = {0895-4356 (Print)Journal ArticleValidation Studies}, pages = {290-8}, abstract = {BACKGROUND AND OBJECTIVE: To test unidimensionality and local independence of a set of shoulder functional status (SFS) items, develop a computerized adaptive test (CAT) of the items using a rating scale item response theory model (RSM), and compare discriminant validity of measures generated using all items (theta(IRT)) and measures generated using the simulated CAT (theta(CAT)). STUDY DESIGN AND SETTING: We performed a secondary analysis of data collected prospectively during rehabilitation of 400 patients with shoulder impairments who completed 60 SFS items. RESULTS: Factor analytic techniques supported that the 42 SFS items formed a unidimensional scale and were locally independent. Except for five items, which were deleted, the RSM fit the data well. The remaining 37 SFS items were used to generate the CAT. On average, 6 items were needed to estimate precise measures of function using the SFS CAT, compared with all 37 SFS items. The theta(IRT) and theta(CAT) measures were highly correlated (r = .96) and resulted in similar classifications of patients. CONCLUSION: The simulated SFS CAT was efficient and produced precise, clinically relevant measures of functional status with good discriminating ability.}, keywords = {*Computer Simulation, *Range of Motion, Articular, Activities of Daily Living, Adult, Aged, Aged, 80 and over, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Prospective Studies, Reproducibility of Results, Research Support, N.I.H., Extramural, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Shoulder Dislocation/*physiopathology/psychology/rehabilitation, Shoulder Pain/*physiopathology/psychology/rehabilitation, Shoulder/*physiopathology, Sickness Impact Profile, Treatment Outcome}, author = {Hart, D. L. and Cook, K. F. and Mioduski, J. E. and Teal, C. R. and Crane, P. K.} } @article {171, title = {A computer adaptive testing approach for assessing physical functioning in children and adolescents}, journal = {Developmental Medicine and Child Neuropsychology}, volume = {47}, number = {2}, year = {2005}, note = {Haley, Stephen MNi, PengshengFragala-Pinkham, Maria ASkrinar, Alison MCorzo, DeyaniraComparative StudyResearch Support, Non-U.S. Gov{\textquoteright}tEnglandDevelopmental medicine and child neurologyDev Med Child Neurol. 2005 Feb;47(2):113-20.}, month = {Feb}, pages = {113-120}, edition = {2005/02/15}, abstract = {The purpose of this article is to demonstrate: (1) the accuracy and (2) the reduction in amount of time and effort in assessing physical functioning (self-care and mobility domains) of children and adolescents using computer-adaptive testing (CAT). A CAT algorithm selects questions directly tailored to the child{\textquoteright}s ability level, based on previous responses. Using a CAT algorithm, a simulation study was used to determine the number of items necessary to approximate the score of a full-length assessment. We built simulated CAT (5-, 10-, 15-, and 20-item versions) for self-care and mobility domains and tested their accuracy in a normative sample (n=373; 190 males, 183 females; mean age 6y 11mo [SD 4y 2m], range 4mo to 14y 11mo) and a sample of children and adolescents with Pompe disease (n=26; 21 males, 5 females; mean age 6y 1mo [SD 3y 10mo], range 5mo to 14y 10mo). Results indicated that comparable score estimates (based on computer simulations) to the full-length tests can be achieved in a 20-item CAT version for all age ranges and for normative and clinical samples. No more than 13 to 16\% of the items in the full-length tests were needed for any one administration. These results support further consideration of using CAT programs for accurate and efficient clinical assessments of physical functioning.}, keywords = {*Computer Systems, Activities of Daily Living, Adolescent, Age Factors, Child, Child Development/*physiology, Child, Preschool, Computer Simulation, Confidence Intervals, Demography, Female, Glycogen Storage Disease Type II/physiopathology, Health Status Indicators, Humans, Infant, Infant, Newborn, Male, Motor Activity/*physiology, Outcome Assessment (Health Care)/*methods, Reproducibility of Results, Self Care, Sensitivity and Specificity}, isbn = {0012-1622 (Print)}, author = {Haley, S. M. and Ni, P. and Fragala-Pinkham, M. A. and Skrinar, A. M. and Corzo, D.} } @article {168, title = {Activity outcome measurement for postacute care}, journal = {Medical Care}, volume = {42}, number = {1 Suppl}, year = {2004}, note = {0025-7079Journal ArticleMulticenter Study}, pages = {I49-161}, abstract = {BACKGROUND: Efforts to evaluate the effectiveness of a broad range of postacute care services have been hindered by the lack of conceptually sound and comprehensive measures of outcomes. It is critical to determine a common underlying structure before employing current methods of item equating across outcome instruments for future item banking and computer-adaptive testing applications. OBJECTIVE: To investigate the factor structure, reliability, and scale properties of items underlying the Activity domains of the International Classification of Functioning, Disability and Health (ICF) for use in postacute care outcome measurement. METHODS: We developed a 41-item Activity Measure for Postacute Care (AM-PAC) that assessed an individual{\textquoteright}s execution of discrete daily tasks in his or her own environment across major content domains as defined by the ICF. We evaluated the reliability and discriminant validity of the prototype AM-PAC in 477 individuals in active rehabilitation programs across 4 rehabilitation settings using factor analyses, tests of item scaling, internal consistency reliability analyses, Rasch item response theory modeling, residual component analysis, and modified parallel analysis. RESULTS: Results from an initial exploratory factor analysis produced 3 distinct, interpretable factors that accounted for 72\% of the variance: Applied Cognition (44\%), Personal Care \& Instrumental Activities (19\%), and Physical \& Movement Activities (9\%); these 3 activity factors were verified by a confirmatory factor analysis. Scaling assumptions were met for each factor in the total sample and across diagnostic groups. Internal consistency reliability was high for the total sample (Cronbach alpha = 0.92 to 0.94), and for specific diagnostic groups (Cronbach alpha = 0.90 to 0.95). Rasch scaling, residual factor, differential item functioning, and modified parallel analyses supported the unidimensionality and goodness of fit of each unique activity domain. CONCLUSIONS: This 3-factor model of the AM-PAC can form the conceptual basis for common-item equating and computer-adaptive applications, leading to a comprehensive system of outcome instruments for postacute care settings.}, keywords = {*Self Efficacy, *Sickness Impact Profile, Activities of Daily Living/*classification/psychology, Adult, Aftercare/*standards/statistics \& numerical data, Aged, Boston, Cognition/physiology, Disability Evaluation, Factor Analysis, Statistical, Female, Human, Male, Middle Aged, Movement/physiology, Outcome Assessment (Health Care)/*methods/statistics \& numerical data, Psychometrics, Questionnaires/standards, Rehabilitation/*standards/statistics \& numerical data, Reproducibility of Results, Sensitivity and Specificity, Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Support, U.S. Gov{\textquoteright}t, P.H.S.}, author = {Haley, S. M. and Coster, W. J. and Andres, P. L. and Ludlow, L. H. and Ni, P. and Bond, T. L. and Sinclair, S. J. and Jette, A. M.} } @article {381, title = {Pre-equating: a simulation study based on a large scale assessment model}, journal = {Journal of Applied Measurement}, volume = {5}, number = {3}, year = {2004}, note = {1529-7713Journal Article}, pages = {301-18}, abstract = {Although post-equating (PE) has proven to be an acceptable method in the scaling and equating of items and forms, there are times when the turn-around period for equating and converting raw scores to scale scores is so small that PE cannot be undertaken within the prescribed time frame. In such cases, pre-equating (PrE) could be considered as an acceptable alternative. Assessing the feasibility of using item calibrations from the item bank (as in PrE) is conditioned on the equivalency of the calibrations and the errors associated with it vis a vis the results obtained via PE. This paper creates item banks over three periods of item introduction into the banks and uses the Rasch model in examining data with respect to the recovery of item parameters, the measurement error, and the effect cut-points have on examinee placement in both the PrE and PE situations. Results indicate that PrE is a viable solution to PE provided the stability of the item calibrations are enhanced by using large sample sizes (perhaps as large as full-population) in populating the item bank.}, keywords = {*Databases, *Models, Theoretical, Calibration, Human, Psychometrics, Reference Values, Reproducibility of Results}, author = {Taherbhai, H. M. and Young, M. J.} } @article {87, title = {Refining the conceptual basis for rehabilitation outcome measurement: personal care and instrumental activities domain}, journal = {Medical Care}, volume = {42}, number = {1 Suppl}, year = {2004}, note = {0025-7079Journal Article}, month = {Jan}, pages = {I62-172}, abstract = {BACKGROUND: Rehabilitation outcome measures routinely include content on performance of daily activities; however, the conceptual basis for item selection is rarely specified. These instruments differ significantly in format, number, and specificity of daily activity items and in the measurement dimensions and type of scale used to specify levels of performance. We propose that a requirement for upper limb and hand skills underlies many activities of daily living (ADL) and instrumental activities of daily living (IADL) items in current instruments, and that items selected based on this definition can be placed along a single functional continuum. OBJECTIVE: To examine the dimensional structure and content coverage of a Personal Care and Instrumental Activities item set and to examine the comparability of items from existing instruments and a set of new items as measures of this domain. METHODS: Participants (N = 477) from 3 different disability groups and 4 settings representing the continuum of postacute rehabilitation care were administered the newly developed Activity Measure for Post-Acute Care (AM-PAC), the SF-8, and an additional setting-specific measure: FIM (in-patient rehabilitation); MDS (skilled nursing facility); MDS-PAC (postacute settings); OASIS (home care); or PF-10 (outpatient clinic). Rasch (partial-credit model) analyses were conducted on a set of 62 items covering the Personal Care and Instrumental domain to examine item fit, item functioning, and category difficulty estimates and unidimensionality. RESULTS: After removing 6 misfitting items, the remaining 56 items fit acceptably along the hypothesized continuum. Analyses yielded different difficulty estimates for the maximum score (eg, "Independent performance") for items with comparable content from different instruments. Items showed little differential item functioning across age, diagnosis, or severity groups, and 92\% of the participants fit the model. CONCLUSIONS: ADL and IADL items from existing rehabilitation outcomes instruments that depend on skilled upper limb and hand use can be located along a single continuum, along with the new personal care and instrumental items of the AM-PAC addressing gaps in content. Results support the validity of the proposed definition of the Personal Care and Instrumental Activities dimension of function as a guide for future development of rehabilitation outcome instruments, such as linked, setting-specific short forms and computerized adaptive testing approaches.}, keywords = {*Self Efficacy, *Sickness Impact Profile, Activities of Daily Living/*classification/psychology, Adult, Aged, Aged, 80 and over, Disability Evaluation, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods/statistics \& numerical data, Questionnaires/*standards, Recovery of Function/physiology, Rehabilitation/*standards/statistics \& numerical data, Reproducibility of Results, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Research Support, U.S. Gov{\textquoteright}t, P.H.S., Sensitivity and Specificity}, author = {Coster, W. J. and Haley, S. M. and Andres, P. L. and Ludlow, L. H. and Bond, T. L. and Ni, P. S.} } @article {96, title = {An examination of exposure control and content balancing restrictions on item selection in CATs using the partial credit model}, journal = {Journal of Applied Measurement}, volume = {4}, number = {1}, year = {2003}, note = {1529-7713Journal Article}, pages = {24-42}, abstract = {The purpose of the present investigation was to systematically examine the effectiveness of the Sympson-Hetter technique and rotated content balancing relative to no exposure control and no content rotation conditions in a computerized adaptive testing system (CAT) based on the partial credit model. A series of simulated fixed and variable length CATs were run using two data sets generated to multiple content areas for three sizes of item pools. The 2 (exposure control) X 2 (content rotation) X 2 (test length) X 3 (item pool size) X 2 (data sets) yielded a total of 48 conditions. Results show that while both procedures can be used with no deleterious effect on measurement precision, the gains in exposure control, pool utilization, and item overlap appear quite modest. Difficulties involved with setting the exposure control parameters in small item pools make questionable the utility of the Sympson-Hetter technique with similar item pools.}, keywords = {*Computers, *Educational Measurement, *Models, Theoretical, Automation, Decision Making, Humans, Reproducibility of Results}, author = {Davis, L. L. and Pastor, D. A. and Dodd, B. G. and Chiang, C. and Fitzpatrick, S. J.} } @article {187, title = {Development of an index of physical functional health status in rehabilitation}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {83}, number = {5}, year = {2002}, note = {0003-9993 (Print)Journal Article}, month = {May}, pages = {655-65}, abstract = {OBJECTIVE: To describe (1) the development of an index of physical functional health status (FHS) and (2) its hierarchical structure, unidimensionality, reproducibility of item calibrations, and practical application. DESIGN: Rasch analysis of existing data sets. SETTING: A total of 715 acute, orthopedic outpatient centers and 62 long-term care facilities in 41 states participating with Focus On Therapeutic Outcomes, Inc. PATIENTS: A convenience sample of 92,343 patients (40\% male; mean age +/- standard deviation [SD], 48+/-17y; range, 14-99y) seeking rehabilitation between 1993 and 1999. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Patients completed self-report health status surveys at admission and discharge. The Medical Outcomes Study 36-Item Short-Form Health Survey{\textquoteright}s physical functioning scale (PF-10) is the foundation of the physical FHS. The Oswestry Low Back Pain Disability Questionnaire, Neck Disability Index, Lysholm Knee Questionnaire, items pertinent to patients with upper-extremity impairments, and items pertinent to patients with more involved neuromusculoskeletal impairments were cocalibrated into the PF-10. RESULTS: The final FHS item bank contained 36 items (patient separation, 2.3; root mean square measurement error, 5.9; mean square +/- SD infit, 0.9+/-0.5; outfit, 0.9+/-0.9). Analyses supported empirical item hierarchy, unidimensionality, reproducibility of item calibrations, and content and construct validity of the FHS-36. CONCLUSIONS: Results support the reliability and validity of FHS-36 measures in the present sample. Analyses show the potential for a dynamic, computer-controlled, adaptive survey for FHS assessment applicable for group analysis and clinical decision making for individual patients.}, keywords = {*Health Status Indicators, *Rehabilitation Centers, Adolescent, Adult, Aged, Aged, 80 and over, Female, Health Surveys, Humans, Male, Middle Aged, Musculoskeletal Diseases/*physiopathology/*rehabilitation, Nervous System Diseases/*physiopathology/*rehabilitation, Physical Fitness/*physiology, Recovery of Function/physiology, Reproducibility of Results, Retrospective Studies}, author = {Hart, D. L. and Wright, B. D.} } @article {36, title = {An examination of the comparative reliability, validity, and accuracy of performance ratings made using computerized adaptive rating scales}, journal = {Journal of Applied Psychology}, volume = {86}, number = {5}, year = {2001}, note = {214803450021-9010Journal ArticleValidation Studies}, pages = {965-973}, abstract = {This laboratory research compared the reliability, validity, and accuracy of a computerized adaptive rating scale (CARS) format and 2 relatively common and representative rating formats. The CARS is a paired-comparison rating task that uses adaptive testing principles to present pairs of scaled behavioral statements to the rater to iteratively estimate a ratee{\textquoteright}s effectiveness on 3 dimensions of contextual performance. Videotaped vignettes of 6 office workers were prepared, depicting prescripted levels of contextual performance, and 112 subjects rated these vignettes using the CARS format and one or the other competing format. Results showed 23\%-37\% lower standard errors of measurement for the CARS format. In addition, validity was significantly higher for the CARS format (d = .18), and Cronbach{\textquoteright}s accuracy coefficients showed significantly higher accuracy, with a median effect size of .08. The discussion focuses on possible reasons for the results.}, keywords = {*Computer Simulation, *Employee Performance Appraisal, *Personnel Selection, Adult, Automatic Data Processing, Female, Human, Male, Reproducibility of Results, Sensitivity and Specificity, Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Task Performance and Analysis, Video Recording}, author = {Borman, W. C. and Buck, D. E. and Hanson, M. A. and Motowidlo, S. J. and Stark, S. and F Drasgow} } @article {368, title = {CAT administration of language placement examinations}, journal = {Journal of Applied Measurement}, volume = {1}, number = {3}, year = {2000}, note = {1529-7713Journal Article}, pages = {292-302}, abstract = {This article describes the development of a computerized adaptive test for Cegep de Jonquiere, a community college located in Quebec, Canada. Computerized language proficiency testing allows the simultaneous presentation of sound stimuli as the question is being presented to the test-taker. With a properly calibrated bank of items, the language proficiency test can be offered in an adaptive framework. By adapting the test to the test-taker{\textquoteright}s level of ability, an assessment can be made with significantly fewer items. We also describe our initial attempt to detect instances in which "cheating low" is occurring. In the "cheating low" situation, test-takers deliberately answer questions incorrectly, questions that they are fully capable of answering correctly had they been taking the test honestly.}, keywords = {*Language, *Software, Aptitude Tests/*statistics \& numerical data, Educational Measurement/*statistics \& numerical data, Humans, Psychometrics, Reproducibility of Results, Research Support, Non-U.S. Gov{\textquoteright}t}, author = {Stahl, J. and Bergstrom, B. and Gershon, R. C.} } @article {329, title = {Computerization and adaptive administration of the NEO PI-R}, journal = {Assessment}, volume = {7}, number = {4}, year = {2000}, note = {1073-1911 (Print)Journal Article}, pages = {347-64}, abstract = {This study asks, how well does an item response theory (IRT) based computerized adaptive NEO PI-R work? To explore this question, real-data simulations (N = 1,059) were used to evaluate a maximum information item selection computerized adaptive test (CAT) algorithm. Findings indicated satisfactory recovery of full-scale facet scores with the administration of around four items per facet scale. Thus, the NEO PI-R could be reduced in half with little loss in precision by CAT administration. However, results also indicated that the CAT algorithm was not necessary. We found that for many scales, administering the "best" four items per facet scale would have produced similar results. In the conclusion, we discuss the future of computerized personality assessment and describe the role IRT methods might play in such assessments.}, keywords = {*Personality Inventory, Algorithms, California, Diagnosis, Computer-Assisted/*methods, Humans, Models, Psychological, Psychometrics/methods, Reproducibility of Results}, author = {Reise, S. P. and Henson, J. M.} } @article {234, title = {Evaluating the usefulness of computerized adaptive testing for medical in-course assessment}, journal = {Academic Medicine}, volume = {74}, number = {10}, year = {1999}, note = {Kreiter, C DFerguson, KGruppen, L DUnited statesAcademic medicine : journal of the Association of American Medical CollegesAcad Med. 1999 Oct;74(10):1125-8.}, month = {Oct}, pages = {1125-8}, edition = {1999/10/28}, abstract = {PURPOSE: This study investigated the feasibility of converting an existing computer-administered, in-course internal medicine test to an adaptive format. METHOD: A 200-item internal medicine extended matching test was used for this research. Parameters were estimated with commercially available software with responses from 621 examinees. A specially developed simulation program was used to retrospectively estimate the efficiency of the computer-adaptive exam format. RESULTS: It was found that the average test length could be shortened by almost half with measurement precision approximately equal to that of the full 200-item paper-and-pencil test. However, computer-adaptive testing with this item bank provided little advantage for examinees at the upper end of the ability continuum. An examination of classical item statistics and IRT item statistics suggested that adding more difficult items might extend the advantage to this group of examinees. CONCLUSIONS: Medical item banks presently used for incourse assessment might be advantageously employed in adaptive testing. However, it is important to evaluate the match between the items and the measurement objective of the test before implementing this format.}, keywords = {*Automation, *Education, Medical, Undergraduate, Educational Measurement/*methods, Humans, Internal Medicine/*education, Likelihood Functions, Psychometrics/*methods, Reproducibility of Results}, isbn = {1040-2446 (Print)}, author = {Kreiter, C. D. and Ferguson, K. and Gruppen, L. D.} } @article {33, title = {A computerized adaptive testing system for speech discrimination measurement: The Speech Sound Pattern Discrimination Test}, journal = {Journal of the Accoustical Society of America}, volume = {101}, number = {4}, year = {1997}, note = {972575560001-4966Journal Article}, pages = {2289-298}, abstract = {A computerized, adaptive test-delivery system for the measurement of speech discrimination, the Speech Sound Pattern Discrimination Test, is described and evaluated. Using a modified discrimination task, the testing system draws on a pool of 130 items spanning a broad range of difficulty to estimate an examinee{\textquoteright}s location along an underlying continuum of speech processing ability, yet does not require the examinee to possess a high level of English language proficiency. The system is driven by a mathematical measurement model which selects only test items which are appropriate in difficulty level for a given examinee, thereby individualizing the testing experience. Test items were administered to a sample of young deaf adults, and the adaptive testing system evaluated in terms of respondents{\textquoteright} sensory and perceptual capabilities, acoustic and phonetic dimensions of speech, and theories of speech perception. Data obtained in this study support the validity, reliability, and efficiency of this test as a measure of speech processing ability.}, keywords = {*Diagnosis, Computer-Assisted, *Speech Discrimination Tests, *Speech Perception, Adolescent, Adult, Audiometry, Pure-Tone, Human, Middle Age, Psychometrics, Reproducibility of Results}, author = {Bochner, J. and Garrison, W. and Palmer, L. and MacKenzie, D. and Braveman, A.} }