@article {46, title = {Development and validation of patient-reported outcome measures for sleep disturbance and sleep-related impairments}, journal = {Sleep}, volume = {33}, number = {6}, year = {2010}, note = {Buysse, Daniel JYu, LanMoul, Douglas EGermain, AnneStover, AngelaDodds, Nathan EJohnston, Kelly LShablesky-Cade, Melissa APilkonis, Paul AAR052155/AR/NIAMS NIH HHS/United StatesU01AR52155/AR/NIAMS NIH HHS/United StatesU01AR52158/AR/NIAMS NIH HHS/United StatesU01AR52170/AR/NIAMS NIH HHS/United StatesU01AR52171/AR/NIAMS NIH HHS/United StatesU01AR52177/AR/NIAMS NIH HHS/United StatesU01AR52181/AR/NIAMS NIH HHS/United StatesU01AR52186/AR/NIAMS NIH HHS/United StatesResearch Support, N.I.H., ExtramuralValidation StudiesUnited StatesSleepSleep. 2010 Jun 1;33(6):781-92.}, month = {Jun 1}, pages = {781-92}, edition = {2010/06/17}, abstract = {STUDY OBJECTIVES: To develop an archive of self-report questions assessing sleep disturbance and sleep-related impairments (SRI), to develop item banks from this archive, and to validate and calibrate the item banks using classic validation techniques and item response theory analyses in a sample of clinical and community participants. DESIGN: Cross-sectional self-report study. SETTING: Academic medical center and participant homes. PARTICIPANTS: One thousand nine hundred ninety-three adults recruited from an Internet polling sample and 259 adults recruited from medical, psychiatric, and sleep clinics. INTERVENTIONS: None. MEASUREMENTS AND RESULTS: This study was part of PROMIS (Patient-Reported Outcomes Information System), a National Institutes of Health Roadmap initiative. Self-report item banks were developed through an iterative process of literature searches, collecting and sorting items, expert content review, qualitative patient research, and pilot testing. Internal consistency, convergent validity, and exploratory and confirmatory factor analysis were examined in the resulting item banks. Factor analyses identified 2 preliminary item banks, sleep disturbance and SRI. Item response theory analyses and expert content review narrowed the item banks to 27 and 16 items, respectively. Validity of the item banks was supported by moderate to high correlations with existing scales and by significant differences in sleep disturbance and SRI scores between participants with and without sleep disorders. CONCLUSIONS: The PROMIS sleep disturbance and SRI item banks have excellent measurement properties and may prove to be useful for assessing general aspects of sleep and SRI with various groups of patients and interventions.}, keywords = {*Outcome Assessment (Health Care), *Self Disclosure, Adult, Aged, Aged, 80 and over, Cross-Sectional Studies, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Psychometrics, Questionnaires, Reproducibility of Results, Sleep Disorders/*diagnosis, Young Adult}, isbn = {0161-8105 (Print)0161-8105 (Linking)}, author = {Buysse, D. J. and Yu, L. and Moul, D. E. and Germain, A. and Stover, A. and Dodds, N. E. and Johnston, K. L. and Shablesky-Cade, M. A. and Pilkonis, P. A.} } @article {7, title = {Development and preliminary testing of a computerized adaptive assessment of chronic pain}, journal = {Journal of Pain}, volume = {10}, number = {9}, year = {2009}, note = {Anatchkova, Milena DSaris-Baglama, Renee NKosinski, MarkBjorner, Jakob B1R43AR052251-01A1/AR/NIAMS NIH HHS/United StatesEvaluation StudiesResearch Support, N.I.H., ExtramuralUnited StatesThe journal of pain : official journal of the American Pain SocietyJ Pain. 2009 Sep;10(9):932-43.}, month = {Sep}, pages = {932-943}, edition = {2009/07/15}, abstract = {The aim of this article is to report the development and preliminary testing of a prototype computerized adaptive test of chronic pain (CHRONIC PAIN-CAT) conducted in 2 stages: (1) evaluation of various item selection and stopping rules through real data-simulated administrations of CHRONIC PAIN-CAT; (2) a feasibility study of the actual prototype CHRONIC PAIN-CAT assessment system conducted in a pilot sample. Item calibrations developed from a US general population sample (N = 782) were used to program a pain severity and impact item bank (kappa = 45), and real data simulations were conducted to determine a CAT stopping rule. The CHRONIC PAIN-CAT was programmed on a tablet PC using QualityMetric{\textquoteright}s Dynamic Health Assessment (DYHNA) software and administered to a clinical sample of pain sufferers (n = 100). The CAT was completed in significantly less time than the static (full item bank) assessment (P < .001). On average, 5.6 items were dynamically administered by CAT to achieve a precise score. Scores estimated from the 2 assessments were highly correlated (r = .89), and both assessments discriminated across pain severity levels (P < .001, RV = .95). Patients{\textquoteright} evaluations of the CHRONIC PAIN-CAT were favorable. PERSPECTIVE: This report demonstrates that the CHRONIC PAIN-CAT is feasible for administration in a clinic. The application has the potential to improve pain assessment and help clinicians manage chronic pain.}, keywords = {*Computers, *Questionnaires, Activities of Daily Living, Adaptation, Psychological, Chronic Disease, Cohort Studies, Disability Evaluation, Female, Humans, Male, Middle Aged, Models, Psychological, Outcome Assessment (Health Care), Pain Measurement/*methods, Pain, Intractable/*diagnosis/psychology, Psychometrics, Quality of Life, User-Computer Interface}, isbn = {1528-8447 (Electronic)1526-5900 (Linking)}, author = {Anatchkova, M. D. and Saris-Baglama, R. N. and Kosinski, M. and Bjorner, J. B.} } @article {138, title = {Development of an item bank for the assessment of depression in persons with mental illnesses and physical diseases using Rasch analysis}, journal = {Rehabilitation Psychology}, volume = {54}, number = {2}, year = {2009}, note = {Forkmann, ThomasBoecker, MarenNorra, ChristineEberle, NicoleKircher, TiloSchauerte, PatrickMischke, KarlWesthofen, MartinGauggel, SiegfriedWirtz, MarkusResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesRehabilitation psychologyRehabil Psychol. 2009 May;54(2):186-97.}, month = {May}, pages = {186-97}, edition = {2009/05/28}, abstract = {OBJECTIVE: The calibration of item banks provides the basis for computerized adaptive testing that ensures high diagnostic precision and minimizes participants{\textquoteright} test burden. The present study aimed at developing a new item bank that allows for assessing depression in persons with mental and persons with somatic diseases. METHOD: The sample consisted of 161 participants treated for a depressive syndrome, and 206 participants with somatic illnesses (103 cardiologic, 103 otorhinolaryngologic; overall mean age = 44.1 years, SD =14.0; 44.7\% women) to allow for validation of the item bank in both groups. Persons answered a pool of 182 depression items on a 5-point Likert scale. RESULTS: Evaluation of Rasch model fit (infit < 1.3), differential item functioning, dimensionality, local independence, item spread, item and person separation (>2.0), and reliability (>.80) resulted in a bank of 79 items with good psychometric properties. CONCLUSIONS: The bank provides items with a wide range of content coverage and may serve as a sound basis for computerized adaptive testing applications. It might also be useful for researchers who wish to develop new fixed-length scales for the assessment of depression in specific rehabilitation settings.}, keywords = {Adaptation, Psychological, Adult, Aged, Depressive Disorder/*diagnosis/psychology, Diagnosis, Computer-Assisted, Female, Heart Diseases/*psychology, Humans, Male, Mental Disorders/*psychology, Middle Aged, Models, Statistical, Otorhinolaryngologic Diseases/*psychology, Personality Assessment/statistics \& numerical data, Personality Inventory/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Questionnaires, Reproducibility of Results, Sick Role}, isbn = {0090-5550 (Print)0090-5550 (Linking)}, author = {Forkmann, T. and Boecker, M. and Norra, C. and Eberle, N. and Kircher, T. and Schauerte, P. and Mischke, K. and Westhofen, M. and Gauggel, S. and Wirtz, M.} } @article {227, title = {An evaluation of patient-reported outcomes found computerized adaptive testing was efficient in assessing stress perception}, journal = {Journal of Clinical Epidemiology}, volume = {62}, number = {3}, year = {2009}, note = {Kocalevent, Ruya-DanielaRose, MatthiasBecker, JanineWalter, Otto BFliege, HerbertBjorner, Jakob BKleiber, DieterKlapp, Burghard FEvaluation StudiesUnited StatesJournal of clinical epidemiologyJ Clin Epidemiol. 2009 Mar;62(3):278-87, 287.e1-3. Epub 2008 Jul 18.}, pages = {278-287}, edition = {2008/07/22}, abstract = {OBJECTIVES: This study aimed to develop and evaluate a first computerized adaptive test (CAT) for the measurement of stress perception (Stress-CAT), in terms of the two dimensions: exposure to stress and stress reaction. STUDY DESIGN AND SETTING: Item response theory modeling was performed using a two-parameter model (Generalized Partial Credit Model). The evaluation of the Stress-CAT comprised a simulation study and real clinical application. A total of 1,092 psychosomatic patients (N1) were studied. Two hundred simulees (N2) were generated for a simulated response data set. Then the Stress-CAT was given to n=116 inpatients, (N3) together with established stress questionnaires as validity criteria. RESULTS: The final banks included n=38 stress exposure items and n=31 stress reaction items. In the first simulation study, CAT scores could be estimated with a high measurement precision (SE<0.32; rho>0.90) using 7.0+/-2.3 (M+/-SD) stress reaction items and 11.6+/-1.7 stress exposure items. The second simulation study reanalyzed real patients data (N1) and showed an average use of items of 5.6+/-2.1 for the dimension stress reaction and 10.0+/-4.9 for the dimension stress exposure. Convergent validity showed significantly high correlations. CONCLUSIONS: The Stress-CAT is short and precise, potentially lowering the response burden of patients in clinical decision making.}, keywords = {*Diagnosis, Computer-Assisted, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Female, Humans, Male, Middle Aged, Perception, Quality of Health Care/*standards, Questionnaires, Reproducibility of Results, Sickness Impact Profile, Stress, Psychological/*diagnosis/psychology, Treatment Outcome}, isbn = {1878-5921 (Electronic)0895-4356 (Linking)}, author = {Kocalevent, R. D. and Rose, M. and Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kleiber, D. and Klapp, B. F.} } @article {170, title = {Measuring global physical health in children with cerebral palsy: Illustration of a multidimensional bi-factor model and computerized adaptive testing}, journal = {Quality of Life Research}, volume = {18}, number = {3}, year = {2009}, note = {Haley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AHambleton, Ronald KMontpetit, KathleenBilodeau, NathalieGorton, George EWatson, KyleTucker, Carole AK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 Apr;18(3):359-70. Epub 2009 Feb 17.}, month = {Apr}, pages = {359-370}, edition = {2009/02/18}, abstract = {PURPOSE: The purposes of this study were to apply a bi-factor model for the determination of test dimensionality and a multidimensional CAT using computer simulations of real data for the assessment of a new global physical health measure for children with cerebral palsy (CP). METHODS: Parent respondents of 306 children with cerebral palsy were recruited from four pediatric rehabilitation hospitals and outpatient clinics. We compared confirmatory factor analysis results across four models: (1) one-factor unidimensional; (2) two-factor multidimensional (MIRT); (3) bi-factor MIRT with fixed slopes; and (4) bi-factor MIRT with varied slopes. We tested whether the general and content (fatigue and pain) person score estimates could discriminate across severity and types of CP, and whether score estimates from a simulated CAT were similar to estimates based on the total item bank, and whether they correlated as expected with external measures. RESULTS: Confirmatory factor analysis suggested separate pain and fatigue sub-factors; all 37 items were retained in the analyses. From the bi-factor MIRT model with fixed slopes, the full item bank scores discriminated across levels of severity and types of CP, and compared favorably to external instruments. CAT scores based on 10- and 15-item versions accurately captured the global physical health scores. CONCLUSIONS: The bi-factor MIRT CAT application, especially the 10- and 15-item versions, yielded accurate global physical health scores that discriminated across known severity groups and types of CP, and correlated as expected with concurrent measures. The CATs have potential for collecting complex data on the physical health of children with CP in an efficient manner.}, keywords = {*Computer Simulation, *Health Status, *Models, Statistical, Adaptation, Psychological, Adolescent, Cerebral Palsy/*physiopathology, Child, Child, Preschool, Factor Analysis, Statistical, Female, Humans, Male, Massachusetts, Pennsylvania, Questionnaires, Young Adult}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A. and Hambleton, R. K. and Montpetit, K. and Bilodeau, N. and Gorton, G. E. and Watson, K. and Tucker, C. A.} } @article {78, title = {Reduction in patient burdens with graphical computerized adaptive testing on the ADL scale: tool development and simulation}, journal = {Health and Quality of Life Outcomes}, volume = {7}, year = {2009}, note = {Chien, Tsair-WeiWu, Hing-ManWang, Weng-ChungCastillo, Roberto VasquezChou, WillyComparative StudyValidation StudiesEnglandHealth and quality of life outcomesHealth Qual Life Outcomes. 2009 May 5;7:39.}, pages = {39}, edition = {2009/05/07}, abstract = {BACKGROUND: The aim of this study was to verify the effectiveness and efficacy of saving time and reducing burden for patients, nurses, and even occupational therapists through computer adaptive testing (CAT). METHODS: Based on an item bank of the Barthel Index (BI) and the Frenchay Activities Index (FAI) for assessing comprehensive activities of daily living (ADL) function in stroke patients, we developed a visual basic application (VBA)-Excel CAT module, and (1) investigated whether the averaged test length via CAT is shorter than that of the traditional all-item-answered non-adaptive testing (NAT) approach through simulation, (2) illustrated the CAT multimedia on a tablet PC showing data collection and response errors of ADL clinical functional measures in stroke patients, and (3) demonstrated the quality control of endorsing scale with fit statistics to detect responding errors, which will be further immediately reconfirmed by technicians once patient ends the CAT assessment. RESULTS: The results show that endorsed items could be shorter on CAT (M = 13.42) than on NAT (M = 23) at 41.64\% efficiency in test length. However, averaged ability estimations reveal insignificant differences between CAT and NAT. CONCLUSION: This study found that mobile nursing services, placed at the bedsides of patients could, through the programmed VBA-Excel CAT module, reduce the burden to patients and save time, more so than the traditional NAT paper-and-pencil testing appraisals.}, keywords = {*Activities of Daily Living, *Computer Graphics, *Computer Simulation, *Diagnosis, Computer-Assisted, Female, Humans, Male, Point-of-Care Systems, Reproducibility of Results, Stroke/*rehabilitation, Taiwan, United States}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Chien, T. W. and Wu, H. M. and Wang, W-C. and Castillo, R. V. and Chou, W.} } @article {173, title = {Replenishing a computerized adaptive test of patient-reported daily activity functioning}, journal = {Quality of Life Research}, volume = {18}, number = {4}, year = {2009}, note = {Haley, Stephen MNi, PengshengJette, Alan MTao, WeiMoed, RichardMeyers, DougLudlow, Larry HK02 HD45354-01/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 May;18(4):461-71. Epub 2009 Mar 14.}, month = {May}, pages = {461-71}, edition = {2009/03/17}, abstract = {PURPOSE: Computerized adaptive testing (CAT) item banks may need to be updated, but before new items can be added, they must be linked to the previous CAT. The purpose of this study was to evaluate 41 pretest items prior to including them into an operational CAT. METHODS: We recruited 6,882 patients with spine, lower extremity, upper extremity, and nonorthopedic impairments who received outpatient rehabilitation in one of 147 clinics across 13 states of the USA. Forty-one new Daily Activity (DA) items were administered along with the Activity Measure for Post-Acute Care Daily Activity CAT (DA-CAT-1) in five separate waves. We compared the scoring consistency with the full item bank, test information function (TIF), person standard errors (SEs), and content range of the DA-CAT-1 to the new CAT (DA-CAT-2) with the pretest items by real data simulations. RESULTS: We retained 29 of the 41 pretest items. Scores from the DA-CAT-2 were more consistent (ICC = 0.90 versus 0.96) than DA-CAT-1 when compared with the full item bank. TIF and person SEs were improved for persons with higher levels of DA functioning, and ceiling effects were reduced from 16.1\% to 6.1\%. CONCLUSIONS: Item response theory and online calibration methods were valuable in improving the DA-CAT.}, keywords = {*Activities of Daily Living, *Disability Evaluation, *Questionnaires, *User-Computer Interface, Adult, Aged, Cohort Studies, Computer-Assisted Instruction, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Haley, S. M. and Ni, P. and Jette, A. M. and Tao, W. and Moed, R. and Meyers, D. and Ludlow, L. H.} } @article {212, title = {Adaptive short forms for outpatient rehabilitation outcome assessment}, journal = {American Journal of Physical Medicine and Rehabilitation}, volume = {87}, number = {10}, year = {2008}, note = {Jette, Alan MHaley, Stephen MNi, PengshengMoed, RichardK02 HD45354-01/HD/NICHD NIH HHS/United StatesR01 HD43568/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.Research Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesAmerican journal of physical medicine \& rehabilitation / Association of Academic PhysiatristsAm J Phys Med Rehabil. 2008 Oct;87(10):842-52.}, month = {Oct}, pages = {842-52}, edition = {2008/09/23}, abstract = {OBJECTIVE: To develop outpatient Adaptive Short Forms for the Activity Measure for Post-Acute Care item bank for use in outpatient therapy settings. DESIGN: A convenience sample of 11,809 adults with spine, lower limb, upper limb, and miscellaneous orthopedic impairments who received outpatient rehabilitation in 1 of 127 outpatient rehabilitation clinics in the United States. We identified optimal items for use in developing outpatient Adaptive Short Forms based on the Basic Mobility and Daily Activities domains of the Activity Measure for Post-Acute Care item bank. Patient scores were derived from the Activity Measure for Post-Acute Care computerized adaptive testing program. Items were selected for inclusion on the Adaptive Short Forms based on functional content, range of item coverage, measurement precision, item exposure rate, and data collection burden. RESULTS: Two outpatient Adaptive Short Forms were developed: (1) an 18-item Basic Mobility Adaptive Short Form and (2) a 15-item Daily Activities Adaptive Short Form, derived from the same item bank used to develop the Activity Measure for Post-Acute Care computerized adaptive testing program. Both Adaptive Short Forms achieved acceptable psychometric properties. CONCLUSIONS: In outpatient postacute care settings where computerized adaptive testing outcome applications are currently not feasible, item response theory-derived Adaptive Short Forms provide the efficient capability to monitor patients{\textquoteright} functional outcomes. The development of Adaptive Short Form functional outcome instruments linked by a common, calibrated item bank has the potential to create a bridge to outcome monitoring across postacute care settings and can facilitate the eventual transformation from Adaptive Short Forms to computerized adaptive testing applications easier and more acceptable to the rehabilitation community.}, keywords = {*Activities of Daily Living, *Ambulatory Care Facilities, *Mobility Limitation, *Treatment Outcome, Disabled Persons/psychology/*rehabilitation, Female, Humans, Male, Middle Aged, Questionnaires, Rehabilitation Centers}, isbn = {1537-7385 (Electronic)}, author = {Jette, A. M. and Haley, S. M. and Ni, P. and Moed, R.} } @article {88, title = {Assessing self-care and social function using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {4}, year = {2008}, note = {Coster, Wendy JHaley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesR41 HD052318-01A1/HD/NICHD NIH HHS/United StatesR43 HD42388-01/HD/NICHD NIH HHS/United StatesComparative StudyResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Apr;89(4):622-9.}, month = {Apr}, pages = {622-629}, edition = {2008/04/01}, abstract = {OBJECTIVE: To examine score agreement, validity, precision, and response burden of a prototype computer adaptive testing (CAT) version of the self-care and social function scales of the Pediatric Evaluation of Disability Inventory compared with the full-length version of these scales. DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics; community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Children with disabilities (n=469) and 412 children with no disabilities (analytic sample); 38 children with disabilities and 35 children without disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from prototype CAT applications of each scale using 15-, 10-, and 5-item stopping rules; scores from the full-length self-care and social function scales; time (in seconds) to complete assessments and respondent ratings of burden. RESULTS: Scores from both computer simulations and field administration of the prototype CATs were highly consistent with scores from full-length administration (r range, .94-.99). Using computer simulation of retrospective data, discriminant validity, and sensitivity to change of the CATs closely approximated that of the full-length scales, especially when the 15- and 10-item stopping rules were applied. In the cross-validation study the time to administer both CATs was 4 minutes, compared with over 16 minutes to complete the full-length scales. CONCLUSIONS: Self-care and social function score estimates from CAT administration are highly comparable with those obtained from full-length scale administration, with small losses in validity and precision and substantial decreases in administration time.}, keywords = {*Disability Evaluation, *Social Adjustment, Activities of Daily Living, Adolescent, Age Factors, Child, Child, Preschool, Computer Simulation, Cross-Over Studies, Disabled Children/*rehabilitation, Female, Follow-Up Studies, Humans, Infant, Male, Outcome Assessment (Health Care), Reference Values, Reproducibility of Results, Retrospective Studies, Risk Factors, Self Care/*standards/trends, Sex Factors, Sickness Impact Profile}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Coster, W. J. and Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @article {169, title = {Computerized adaptive testing for follow-up after discharge from inpatient rehabilitation: II. Participation outcomes}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {2}, year = {2008}, note = {Haley, Stephen MGandek, BarbaraSiebens, HilaryBlack-Schaffer, Randie MSinclair, Samuel JTao, WeiCoster, Wendy JNi, PengshengJette, Alan MK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesK02 HD45354-01/HD/NICHD NIH HHS/United StatesR01 HD043568/HD/NICHD NIH HHS/United StatesR01 HD043568-01/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Feb;89(2):275-83.}, month = {Feb}, pages = {275-283}, edition = {2008/01/30}, abstract = {OBJECTIVES: To measure participation outcomes with a computerized adaptive test (CAT) and compare CAT and traditional fixed-length surveys in terms of score agreement, respondent burden, discriminant validity, and responsiveness. DESIGN: Longitudinal, prospective cohort study of patients interviewed approximately 2 weeks after discharge from inpatient rehabilitation and 3 months later. SETTING: Follow-up interviews conducted in patient{\textquoteright}s home setting. PARTICIPANTS: Adults (N=94) with diagnoses of neurologic, orthopedic, or medically complex conditions. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Participation domains of mobility, domestic life, and community, social, \& civic life, measured using a CAT version of the Participation Measure for Postacute Care (PM-PAC-CAT) and a 53-item fixed-length survey (PM-PAC-53). RESULTS: The PM-PAC-CAT showed substantial agreement with PM-PAC-53 scores (intraclass correlation coefficient, model 3,1, .71-.81). On average, the PM-PAC-CAT was completed in 42\% of the time and with only 48\% of the items as compared with the PM-PAC-53. Both formats discriminated across functional severity groups. The PM-PAC-CAT had modest reductions in sensitivity and responsiveness to patient-reported change over a 3-month interval as compared with the PM-PAC-53. CONCLUSIONS: Although continued evaluation is warranted, accurate estimates of participation status and responsiveness to change for group-level analyses can be obtained from CAT administrations, with a sizeable reduction in respondent burden.}, keywords = {*Activities of Daily Living, *Adaptation, Physiological, *Computer Systems, *Questionnaires, Adult, Aged, Aged, 80 and over, Chi-Square Distribution, Factor Analysis, Statistical, Female, Humans, Longitudinal Studies, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Patient Discharge, Prospective Studies, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Haley, S. M. and Gandek, B. and Siebens, H. and Black-Schaffer, R. M. and Sinclair, S. J. and Tao, W. and Coster, W. J. and Ni, P. and Jette, A. M.} } @article {231, title = {Computerized adaptive testing in back pain: Validation of the CAT-5D-QOL}, journal = {Spine}, volume = {33}, number = {12}, year = {2008}, note = {Kopec, Jacek ABadii, MaziarMcKenna, MarioLima, Viviane DSayre, Eric CDvorak, MarcelResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesUnited StatesSpineSpine (Phila Pa 1976). 2008 May 20;33(12):1384-90.}, month = {May 20}, pages = {1384-90}, edition = {2008/05/23}, abstract = {STUDY DESIGN: We have conducted an outcome instrument validation study. OBJECTIVE: Our objective was to develop a computerized adaptive test (CAT) to measure 5 domains of health-related quality of life (HRQL) and assess its feasibility, reliability, validity, and efficiency. SUMMARY OF BACKGROUND DATA: Kopec and colleagues have recently developed item response theory based item banks for 5 domains of HRQL relevant to back pain and suitable for CAT applications. The domains are Daily Activities (DAILY), Walking (WALK), Handling Objects (HAND), Pain or Discomfort (PAIN), and Feelings (FEEL). METHODS: An adaptive algorithm was implemented in a web-based questionnaire administration system. The questionnaire included CAT-5D-QOL (5 scales), Modified Oswestry Disability Index (MODI), Roland-Morris Disability Questionnaire (RMDQ), SF-36 Health Survey, and standard clinical and demographic information. Participants were outpatients treated for mechanical back pain at a referral center in Vancouver, Canada. RESULTS: A total of 215 patients completed the questionnaire and 84 completed a retest. On average, patients answered 5.2 items per CAT-5D-QOL scale. Reliability ranged from 0.83 (FEEL) to 0.92 (PAIN) and was 0.92 for the MODI, RMDQ, and Physical Component Summary (PCS-36). The ceiling effect was 0.5\% for PAIN compared with 2\% for MODI and 5\% for RMQ. The CAT-5D-QOL scales correlated as anticipated with other measures of HRQL and discriminated well according to the level of satisfaction with current symptoms, duration of the last episode, sciatica, and disability compensation. The average relative discrimination index was 0.87 for PAIN, 0.67 for DAILY and 0.62 for WALK, compared with 0.89 for MODI, 0.80 for RMDQ, and 0.59 for PCS-36. CONCLUSION: The CAT-5D-QOL is feasible, reliable, valid, and efficient in patients with back pain. This methodology can be recommended for use in back pain research and should improve outcome assessment, facilitate comparisons across studies, and reduce patient burden.}, keywords = {*Disability Evaluation, *Health Status Indicators, *Quality of Life, Adult, Aged, Algorithms, Back Pain/*diagnosis/psychology, British Columbia, Diagnosis, Computer-Assisted/*standards, Feasibility Studies, Female, Humans, Internet, Male, Middle Aged, Predictive Value of Tests, Questionnaires/*standards, Reproducibility of Results}, isbn = {1528-1159 (Electronic)0362-2436 (Linking)}, author = {Kopec, J. A. and Badii, M. and McKenna, M. and Lima, V. D. and Sayre, E. C. and Dvorak, M.} } @article {287, title = {Measuring physical functioning in children with spinal impairments with computerized adaptive testing}, journal = {Journal of Pediatric Orthopedics}, volume = {28}, number = {3}, year = {2008}, note = {Mulcahey, M JHaley, Stephen MDuffy, TheresaPengsheng, NiBetz, Randal RK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesUnited StatesJournal of pediatric orthopedicsJ Pediatr Orthop. 2008 Apr-May;28(3):330-5.}, month = {Apr-May}, pages = {330-5}, edition = {2008/03/26}, abstract = {BACKGROUND: The purpose of this study was to assess the utility of measuring current physical functioning status of children with scoliosis and kyphosis by applying computerized adaptive testing (CAT) methods. Computerized adaptive testing uses a computer interface to administer the most optimal items based on previous responses, reducing the number of items needed to obtain a scoring estimate. METHODS: This was a prospective study of 77 subjects (0.6-19.8 years) who were seen by a spine surgeon during a routine clinic visit for progress spine deformity. Using a multidimensional version of the Pediatric Evaluation of Disability Inventory CAT program (PEDI-MCAT), we evaluated content range, accuracy and efficiency, known-group validity, concurrent validity with the Pediatric Outcomes Data Collection Instrument, and test-retest reliability in a subsample (n = 16) within a 2-week interval. RESULTS: We found the PEDI-MCAT to have sufficient item coverage in both self-care and mobility content for this sample, although most patients tended to score at the higher ends of both scales. Both the accuracy of PEDI-MCAT scores as compared with a fixed format of the PEDI (r = 0.98 for both mobility and self-care) and test-retest reliability were very high [self-care: intraclass correlation (3,1) = 0.98, mobility: intraclass correlation (3,1) = 0.99]. The PEDI-MCAT took an average of 2.9 minutes for the parents to complete. The PEDI-MCAT detected expected differences between patient groups, and scores on the PEDI-MCAT correlated in expected directions with scores from the Pediatric Outcomes Data Collection Instrument domains. CONCLUSIONS: Use of the PEDI-MCAT to assess the physical functioning status, as perceived by parents of children with complex spinal impairments, seems to be feasible and achieves accurate and efficient estimates of self-care and mobility function. Additional item development will be needed at the higher functioning end of the scale to avoid ceiling effects for older children. LEVEL OF EVIDENCE: This is a level II prospective study designed to establish the utility of computer adaptive testing as an evaluation method in a busy pediatric spine practice.}, keywords = {*Disability Evaluation, Adolescent, Child, Child, Preschool, Computer Simulation, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Kyphosis/*diagnosis/rehabilitation, Male, Prospective Studies, Reproducibility of Results, Scoliosis/*diagnosis/rehabilitation}, isbn = {0271-6798 (Print)0271-6798 (Linking)}, author = {Mulcahey, M. J. and Haley, S. M. and Duffy, T. and Pengsheng, N. and Betz, R. R.} } @article {152, title = {Using computerized adaptive testing to reduce the burden of mental health assessment}, journal = {Psychiatric Services}, volume = {59}, number = {4}, year = {2008}, note = {Gibbons, Robert DWeiss, David JKupfer, David JFrank, EllenFagiolini, AndreaGrochocinski, Victoria JBhaumik, Dulal KStover, AngelaBock, R DarrellImmekus, Jason CR01-MH-30915/MH/United States NIMHR01-MH-66302/MH/United States NIMHResearch Support, N.I.H., ExtramuralUnited StatesPsychiatric services (Washington, D.C.)Psychiatr Serv. 2008 Apr;59(4):361-8.}, month = {Apr}, pages = {361-8}, edition = {2008/04/02}, abstract = {OBJECTIVE: This study investigated the combination of item response theory and computerized adaptive testing (CAT) for psychiatric measurement as a means of reducing the burden of research and clinical assessments. METHODS: Data were from 800 participants in outpatient treatment for a mood or anxiety disorder; they completed 616 items of the 626-item Mood and Anxiety Spectrum Scales (MASS) at two times. The first administration was used to design and evaluate a CAT version of the MASS by using post hoc simulation. The second confirmed the functioning of CAT in live testing. RESULTS: Tests of competing models based on item response theory supported the scale{\textquoteright}s bifactor structure, consisting of a primary dimension and four group factors (mood, panic-agoraphobia, obsessive-compulsive, and social phobia). Both simulated and live CAT showed a 95\% average reduction (585 items) in items administered (24 and 30 items, respectively) compared with administration of the full MASS. The correlation between scores on the full MASS and the CAT version was .93. For the mood disorder subscale, differences in scores between two groups of depressed patients--one with bipolar disorder and one without--on the full scale and on the CAT showed effect sizes of .63 (p<.003) and 1.19 (p<.001) standard deviation units, respectively, indicating better discriminant validity for CAT. CONCLUSIONS: Instead of using small fixed-length tests, clinicians can create item banks with a large item pool, and a small set of the items most relevant for a given individual can be administered with no loss of information, yielding a dramatic reduction in administration time and patient and clinician burden.}, keywords = {*Diagnosis, Computer-Assisted, *Questionnaires, Adolescent, Adult, Aged, Agoraphobia/diagnosis, Anxiety Disorders/diagnosis, Bipolar Disorder/diagnosis, Female, Humans, Male, Mental Disorders/*diagnosis, Middle Aged, Mood Disorders/diagnosis, Obsessive-Compulsive Disorder/diagnosis, Panic Disorder/diagnosis, Phobic Disorders/diagnosis, Reproducibility of Results, Time Factors}, isbn = {1075-2730 (Print)}, author = {Gibbons, R. D. and Weiss, D. J. and Kupfer, D. J. and Frank, E. and Fagiolini, A. and Grochocinski, V. J. and Bhaumik, D. K. and Stover, A. and Bock, R. D. and Immekus, J. C.} } @article {135, title = {Computerized adaptive personality testing: A review and illustration with the MMPI-2 Computerized Adaptive Version}, journal = {Psychological Assessment}, volume = {19}, number = {1}, year = {2007}, note = {Forbey, Johnathan DBen-Porath, Yossef SResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesPsychological assessmentPsychol Assess. 2007 Mar;19(1):14-24.}, month = {Mar}, pages = {14-24}, edition = {2007/03/21}, abstract = {Computerized adaptive testing in personality assessment can improve efficiency by significantly reducing the number of items administered to answer an assessment question. Two approaches have been explored for adaptive testing in computerized personality assessment: item response theory and the countdown method. In this article, the authors review the literature on each and report the results of an investigation designed to explore the utility, in terms of item and time savings, and validity, in terms of correlations with external criterion measures, of an expanded countdown method-based research version of the Minnesota Multiphasic Personality Inventory-2 (MMPI-2), the MMPI-2 Computerized Adaptive Version (MMPI-2-CA). Participants were 433 undergraduate college students (170 men and 263 women). Results indicated considerable item savings and corresponding time savings for the adaptive testing modalities compared with a conventional computerized MMPI-2 administration. Furthermore, computerized adaptive administration yielded comparable results to computerized conventional administration of the MMPI-2 in terms of both test scores and their validity. Future directions for computerized adaptive personality testing are discussed.}, keywords = {Adolescent, Adult, Diagnosis, Computer-Assisted/*statistics \& numerical data, Female, Humans, Male, MMPI/*statistics \& numerical data, Personality Assessment/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Reference Values, Reproducibility of Results}, isbn = {1040-3590 (Print)}, author = {Forbey, J. D. and Ben-Porath, Y. S.} } @article {86, title = {IRT health outcomes data analysis project: an overview and summary}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl. 1}, year = {2007}, note = {Cook, Karon FTeal, Cayla RBjorner, Jakob BCella, DavidChang, Chih-HungCrane, Paul KGibbons, Laura EHays, Ron DMcHorney, Colleen AOcepek-Welikson, KatjaRaczek, Anastasia ETeresi, Jeanne AReeve, Bryce B1U01AR52171-01/AR/United States NIAMSR01 (CA60068)/CA/United States NCIY1-PC-3028-01/PC/United States NCIResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:121-32. Epub 2007 Mar 10.}, pages = {121-132}, edition = {2007/03/14}, abstract = {BACKGROUND: In June 2004, the National Cancer Institute and the Drug Information Association co-sponsored the conference, "Improving the Measurement of Health Outcomes through the Applications of Item Response Theory (IRT) Modeling: Exploration of Item Banks and Computer-Adaptive Assessment." A component of the conference was presentation of a psychometric and content analysis of a secondary dataset. OBJECTIVES: A thorough psychometric and content analysis was conducted of two primary domains within a cancer health-related quality of life (HRQOL) dataset. RESEARCH DESIGN: HRQOL scales were evaluated using factor analysis for categorical data, IRT modeling, and differential item functioning analyses. In addition, computerized adaptive administration of HRQOL item banks was simulated, and various IRT models were applied and compared. SUBJECTS: The original data were collected as part of the NCI-funded Quality of Life Evaluation in Oncology (Q-Score) Project. A total of 1,714 patients with cancer or HIV/AIDS were recruited from 5 clinical sites. MEASURES: Items from 4 HRQOL instruments were evaluated: Cancer Rehabilitation Evaluation System-Short Form, European Organization for Research and Treatment of Cancer Quality of Life Questionnaire, Functional Assessment of Cancer Therapy and Medical Outcomes Study Short-Form Health Survey. RESULTS AND CONCLUSIONS: Four lessons learned from the project are discussed: the importance of good developmental item banks, the ambiguity of model fit results, the limits of our knowledge regarding the practical implications of model misfit, and the importance in the measurement of HRQOL of construct definition. With respect to these lessons, areas for future research are suggested. The feasibility of developing item banks for broad definitions of health is discussed.}, keywords = {*Data Interpretation, Statistical, *Health Status, *Quality of Life, *Questionnaires, *Software, Female, HIV Infections/psychology, Humans, Male, Neoplasms/psychology, Outcome Assessment (Health Care)/*methods, Psychometrics, Stress, Psychological}, isbn = {0962-9343 (Print)}, author = {Cook, K. F. and Teal, C. R. and Bjorner, J. B. and Cella, D. and Chang, C-H. and Crane, P. K. and Gibbons, L. E. and Hays, R. D. and McHorney, C. A. and Ocepek-Welikson, K. and Raczek, A. E. and Teresi, J. A. and Reeve, B. B.} } @article {328, title = {Psychometric evaluation and calibration of health-related quality of life item banks: plans for the Patient-Reported Outcomes Measurement Information System (PROMIS)}, journal = {Medical Care}, volume = {45}, number = {5 Suppl 1}, year = {2007}, note = {Reeve, Bryce BHays, Ron DBjorner, Jakob BCook, Karon FCrane, Paul KTeresi, Jeanne AThissen, DavidRevicki, Dennis AWeiss, David JHambleton, Ronald KLiu, HonghuGershon, RichardReise, Steven PLai, Jin-sheiCella, DavidPROMIS Cooperative GroupAG015815/AG/United States NIAResearch Support, N.I.H., ExtramuralUnited StatesMedical careMed Care. 2007 May;45(5 Suppl 1):S22-31.}, month = {May}, pages = {S22-31}, edition = {2007/04/20}, abstract = {BACKGROUND: The construction and evaluation of item banks to measure unidimensional constructs of health-related quality of life (HRQOL) is a fundamental objective of the Patient-Reported Outcomes Measurement Information System (PROMIS) project. OBJECTIVES: Item banks will be used as the foundation for developing short-form instruments and enabling computerized adaptive testing. The PROMIS Steering Committee selected 5 HRQOL domains for initial focus: physical functioning, fatigue, pain, emotional distress, and social role participation. This report provides an overview of the methods used in the PROMIS item analyses and proposed calibration of item banks. ANALYSES: Analyses include evaluation of data quality (eg, logic and range checking, spread of response distribution within an item), descriptive statistics (eg, frequencies, means), item response theory model assumptions (unidimensionality, local independence, monotonicity), model fit, differential item functioning, and item calibration for banking. RECOMMENDATIONS: Summarized are key analytic issues; recommendations are provided for future evaluations of item banks in HRQOL assessment.}, keywords = {*Health Status, *Information Systems, *Quality of Life, *Self Disclosure, Adolescent, Adult, Aged, Calibration, Databases as Topic, Evaluation Studies as Topic, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Psychometrics, Questionnaires/standards, United States}, isbn = {0025-7079 (Print)}, author = {Reeve, B. B. and Hays, R. D. and Bjorner, J. B. and Cook, K. F. and Crane, P. K. and Teresi, J. A. and Thissen, D. and Revicki, D. A. and Weiss, D. J. and Hambleton, R. K. and Liu, H. and Gershon, R. C. and Reise, S. P. and Lai, J. S. and Cella, D.} } @article {172, title = {Computer adaptive testing improved accuracy and precision of scores over random item selection in a physical functioning item bank}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {11}, year = {2006}, note = {Haley, Stephen MNi, PengshengHambleton, Ronald KSlavin, Mary DJette, Alan MK02 hd45354-01/hd/nichdR01 hd043568/hd/nichdComparative StudyResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.EnglandJournal of clinical epidemiologyJ Clin Epidemiol. 2006 Nov;59(11):1174-82. Epub 2006 Jul 11.}, month = {Nov}, pages = {1174-82}, edition = {2006/10/10}, abstract = {BACKGROUND AND OBJECTIVE: Measuring physical functioning (PF) within and across postacute settings is critical for monitoring outcomes of rehabilitation; however, most current instruments lack sufficient breadth and feasibility for widespread use. Computer adaptive testing (CAT), in which item selection is tailored to the individual patient, holds promise for reducing response burden, yet maintaining measurement precision. We calibrated a PF item bank via item response theory (IRT), administered items with a post hoc CAT design, and determined whether CAT would improve accuracy and precision of score estimates over random item selection. METHODS: 1,041 adults were interviewed during postacute care rehabilitation episodes in either hospital or community settings. Responses for 124 PF items were calibrated using IRT methods to create a PF item bank. We examined the accuracy and precision of CAT-based scores compared to a random selection of items. RESULTS: CAT-based scores had higher correlations with the IRT-criterion scores, especially with short tests, and resulted in narrower confidence intervals than scores based on a random selection of items; gains, as expected, were especially large for low and high performing adults. CONCLUSION: The CAT design may have important precision and efficiency advantages for point-of-care functional assessment in rehabilitation practice settings.}, keywords = {*Recovery of Function, Activities of Daily Living, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Rehabilitation/*standards, Reproducibility of Results, Software}, isbn = {0895-4356 (Print)}, author = {Haley, S. M. and Ni, P. and Hambleton, R. K. and Slavin, M. D. and Jette, A. M.} } @article {176, title = {Computerized adaptive testing for follow-up after discharge from inpatient rehabilitation: I. Activity outcomes}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {87}, number = {8}, year = {2006}, note = {Haley, Stephen MSiebens, HilaryCoster, Wendy JTao, WeiBlack-Schaffer, Randie MGandek, BarbaraSinclair, Samuel JNi, PengshengK0245354-01/phsR01 hd043568/hd/nichdResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2006 Aug;87(8):1033-42.}, month = {Aug}, pages = {1033-42}, edition = {2006/08/01}, abstract = {OBJECTIVE: To examine score agreement, precision, validity, efficiency, and responsiveness of a computerized adaptive testing (CAT) version of the Activity Measure for Post-Acute Care (AM-PAC-CAT) in a prospective, 3-month follow-up sample of inpatient rehabilitation patients recently discharged home. DESIGN: Longitudinal, prospective 1-group cohort study of patients followed approximately 2 weeks after hospital discharge and then 3 months after the initial home visit. SETTING: Follow-up visits conducted in patients{\textquoteright} home setting. PARTICIPANTS: Ninety-four adults who were recently discharged from inpatient rehabilitation, with diagnoses of neurologic, orthopedic, and medically complex conditions. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from AM-PAC-CAT, including 3 activity domains of movement and physical, personal care and instrumental, and applied cognition were compared with scores from a traditional fixed-length version of the AM-PAC with 66 items (AM-PAC-66). RESULTS: AM-PAC-CAT scores were in good agreement (intraclass correlation coefficient model 3,1 range, .77-.86) with scores from the AM-PAC-66. On average, the CAT programs required 43\% of the time and 33\% of the items compared with the AM-PAC-66. Both formats discriminated across functional severity groups. The standardized response mean (SRM) was greater for the movement and physical fixed form than the CAT; the effect size and SRM of the 2 other AM-PAC domains showed similar sensitivity between CAT and fixed formats. Using patients{\textquoteright} own report as an anchor-based measure of change, the CAT and fixed length formats were comparable in responsiveness to patient-reported change over a 3-month interval. CONCLUSIONS: Accurate estimates for functional activity group-level changes can be obtained from CAT administrations, with a considerable reduction in administration time.}, keywords = {*Activities of Daily Living, *Adaptation, Physiological, *Computer Systems, *Questionnaires, Adult, Aged, Aged, 80 and over, Chi-Square Distribution, Factor Analysis, Statistical, Female, Humans, Longitudinal Studies, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Patient Discharge, Prospective Studies, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Siebens, H. and Coster, W. J. and Tao, W. and Black-Schaffer, R. M. and Gandek, B. and Sinclair, S. J. and Ni, P.} } @article {352, title = {Computerized adaptive testing of diabetes impact: a feasibility study of Hispanics and non-Hispanics in an active clinic population}, journal = {Quality of Life Research}, volume = {15}, number = {9}, year = {2006}, note = {Schwartz, CarolynWelch, GarrySantiago-Kelley, PaulaBode, RitaSun, Xiaowu1 r43 dk066874-01/dk/niddkResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2006 Nov;15(9):1503-18. Epub 2006 Sep 26.}, month = {Nov}, pages = {1503-18}, edition = {2006/10/13}, abstract = {BACKGROUND: Diabetes is a leading cause of death and disability in the US and is twice as common among Hispanic Americans as non-Hispanics. The societal costs of diabetes provide an impetus for developing tools that can improve patient care and delay or prevent diabetes complications. METHODS: We implemented a feasibility study of a Computerized Adaptive Test (CAT) to measure diabetes impact using a sample of 103 English- and 97 Spanish-speaking patients (mean age = 56.5, 66.5\% female) in a community medical center with a high proportion of minority patients (28\% African-American). The 37 items of the Diabetes Impact Survey were translated using forward-backward translation and cognitive debriefing. Participants were randomized to receive either the full-length tool or the Diabetes-CAT first, in the patient{\textquoteright}s native language. RESULTS: The number of items and the amount of time to complete the survey for the CAT was reduced to one-sixth the amount for the full-length tool in both languages, across disease severity. Confirmatory Factor Analysis confirmed that the Diabetes Impact Survey is unidimensional. The Diabetes-CAT demonstrated acceptable internal consistency reliability, construct validity, and discriminant validity in the overall sample, although subgroup analyses suggested that the English sample data evidenced higher levels of reliability and validity than the Spanish sample and issues with discriminant validity in the Spanish sample. Differential Item Function analysis revealed differences in responses tendencies by language group in 3 of the 37 items. Participant interviews suggested that the Spanish-speaking patients generally preferred the paper survey to the computer-assisted tool, and were twice as likely to experience difficulties understanding the items. CONCLUSIONS: While the Diabetes-CAT demonstrated clear advantages in reducing respondent burden as compared to the full-length tool, simplifying the item bank will be necessary for enhancing the feasibility of the Diabetes-CAT for use with low literacy patients.}, keywords = {*Computers, *Hispanic Americans, *Quality of Life, Adult, Aged, Data Collection/*methods, Diabetes Mellitus/*psychology, Feasibility Studies, Female, Humans, Language, Male, Middle Aged}, isbn = {0962-9343 (Print)}, author = {Schwartz, C. and Welch, G. and Santiago-Kelley, P. and Bode, R. and Sun, X.} } @article {237, title = {Factor analysis techniques for assessing sufficient unidimensionality of cancer related fatigue}, journal = {Quality of Life Research}, volume = {15}, number = {7}, year = {2006}, note = {0962-9343 (Print)Journal ArticleResearch Support, N.I.H., Extramural}, month = {Sep}, pages = {1179-90}, abstract = {BACKGROUND: Fatigue is the most common unrelieved symptom experienced by people with cancer. The purpose of this study was to examine whether cancer-related fatigue (CRF) can be summarized using a single score, that is, whether CRF is sufficiently unidimensional for measurement approaches that require or assume unidimensionality. We evaluated this question using factor analysis techniques including the theory-driven bi-factor model. METHODS: Five hundred and fifty five cancer patients from the Chicago metropolitan area completed a 72-item fatigue item bank, covering a range of fatigue-related concerns including intensity, frequency and interference with physical, mental, and social activities. Dimensionality was assessed using exploratory and confirmatory factor analysis (CFA) techniques. RESULTS: Exploratory factor analysis (EFA) techniques identified from 1 to 17 factors. The bi-factor model suggested that CRF was sufficiently unidimensional. CONCLUSIONS: CRF can be considered sufficiently unidimensional for applications that require unidimensionality. One such application, item response theory (IRT), will facilitate the development of short-form and computer-adaptive testing. This may further enable practical and accurate clinical assessment of CRF.}, keywords = {*Factor Analysis, Statistical, *Quality of Life, Aged, Chicago, Fatigue/*etiology, Female, Humans, Male, Middle Aged, Neoplasms/*complications, Questionnaires}, author = {Lai, J-S. and Crane, P. K. and Cella, D.} } @article {311, title = {Multidimensional computerized adaptive testing of the EORTC QLQ-C30: basic developments and evaluations}, journal = {Quality of Life Research}, volume = {15}, number = {3}, year = {2006}, note = {Petersen, Morten AaGroenvold, MogensAaronson, NeilFayers, PeterSprangers, MirjamBjorner, Jakob BEuropean Organisation for Research and Treatment of Cancer Quality of Life GroupResearch Support, Non-U.S. Gov{\textquoteright}tNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2006 Apr;15(3):315-29.}, month = {Apr}, pages = {315-29}, edition = {2006/03/21}, abstract = {OBJECTIVE: Self-report questionnaires are widely used to measure health-related quality of life (HRQOL). Ideally, such questionnaires should be adapted to the individual patient and at the same time scores should be directly comparable across patients. This may be achieved using computerized adaptive testing (CAT). Usually, CAT is carried out for a single domain at a time. However, many HRQOL domains are highly correlated. Multidimensional CAT may utilize these correlations to improve measurement efficiency. We investigated the possible advantages and difficulties of multidimensional CAT. STUDY DESIGN AND SETTING: We evaluated multidimensional CAT of three scales from the EORTC QLQ-C30: the physical functioning, emotional functioning, and fatigue scales. Analyses utilised a database with 2958 European cancer patients. RESULTS: It was possible to obtain scores for the three domains with five to seven items administered using multidimensional CAT that were very close to the scores obtained using all 12 items and with no or little loss of measurement precision. CONCLUSION: The findings suggest that multidimensional CAT may significantly improve measurement precision and efficiency and encourage further research into multidimensional CAT. Particularly, the estimation of the model underlying the multidimensional CAT and the conceptual aspects need further investigations.}, keywords = {*Quality of Life, *Self Disclosure, Adult, Female, Health Status, Humans, Male, Middle Aged, Questionnaires/*standards, User-Computer Interface}, isbn = {0962-9343 (Print)}, author = {Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Fayers, P. and Sprangers, M. and Bjorner, J. B.} } @article {184, title = {Simulated computerized adaptive test for patients with shoulder impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {3}, year = {2006}, note = {0895-4356 (Print)Journal ArticleValidation Studies}, pages = {290-8}, abstract = {BACKGROUND AND OBJECTIVE: To test unidimensionality and local independence of a set of shoulder functional status (SFS) items, develop a computerized adaptive test (CAT) of the items using a rating scale item response theory model (RSM), and compare discriminant validity of measures generated using all items (theta(IRT)) and measures generated using the simulated CAT (theta(CAT)). STUDY DESIGN AND SETTING: We performed a secondary analysis of data collected prospectively during rehabilitation of 400 patients with shoulder impairments who completed 60 SFS items. RESULTS: Factor analytic techniques supported that the 42 SFS items formed a unidimensional scale and were locally independent. Except for five items, which were deleted, the RSM fit the data well. The remaining 37 SFS items were used to generate the CAT. On average, 6 items were needed to estimate precise measures of function using the SFS CAT, compared with all 37 SFS items. The theta(IRT) and theta(CAT) measures were highly correlated (r = .96) and resulted in similar classifications of patients. CONCLUSION: The simulated SFS CAT was efficient and produced precise, clinically relevant measures of functional status with good discriminating ability.}, keywords = {*Computer Simulation, *Range of Motion, Articular, Activities of Daily Living, Adult, Aged, Aged, 80 and over, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Prospective Studies, Reproducibility of Results, Research Support, N.I.H., Extramural, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Shoulder Dislocation/*physiopathology/psychology/rehabilitation, Shoulder Pain/*physiopathology/psychology/rehabilitation, Shoulder/*physiopathology, Sickness Impact Profile, Treatment Outcome}, author = {Hart, D. L. and Cook, K. F. and Mioduski, J. E. and Teal, C. R. and Crane, P. K.} } @article {175, title = {Assessing mobility in children using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {86}, number = {5}, year = {2005}, note = {Haley, Stephen MRaczek, Anastasia ECoster, Wendy JDumas, Helene MFragala-Pinkham, Maria AK02 hd45354-01a1/hd/nichdR43 hd42388-01/hd/nichdResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2005 May;86(5):932-9.}, month = {May}, pages = {932-9}, edition = {2005/05/17}, abstract = {OBJECTIVE: To assess score agreement, validity, precision, and response burden of a prototype computerized adaptive testing (CAT) version of the Mobility Functional Skills Scale (Mob-CAT) of the Pediatric Evaluation of Disability Inventory (PEDI) as compared with the full 59-item version (Mob-59). DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; and cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics, community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Four hundred sixty-nine children with disabilities and 412 children with no disabilities (analytic sample); 41 children without disabilities and 39 with disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from a prototype Mob-CAT application and versions using 15-, 10-, and 5-item stopping rules; scores from the Mob-59; and number of items and time (in seconds) to administer assessments. RESULTS: Mob-CAT scores from both computer simulations (intraclass correlation coefficient [ICC] range, .94-.99) and field administrations (ICC=.98) were in high agreement with scores from the Mob-59. Using computer simulations of retrospective data, discriminant validity, and sensitivity to change of the Mob-CAT closely approximated that of the Mob-59, especially when using the 15- and 10-item stopping rule versions of the Mob-CAT. The Mob-CAT used no more than 15\% of the items for any single administration, and required 20\% of the time needed to administer the Mob-59. CONCLUSIONS: Comparable score estimates for the PEDI mobility scale can be obtained from CAT administrations, with losses in validity and precision for shorter forms, but with a considerable reduction in administration time.}, keywords = {*Computer Simulation, *Disability Evaluation, Adolescent, Child, Child, Preschool, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Male, Outcome Assessment (Health Care)/*methods, Rehabilitation Centers, Rehabilitation/*standards, Sensitivity and Specificity}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Raczek, A. E. and Coster, W. J. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @article {171, title = {A computer adaptive testing approach for assessing physical functioning in children and adolescents}, journal = {Developmental Medicine and Child Neuropsychology}, volume = {47}, number = {2}, year = {2005}, note = {Haley, Stephen MNi, PengshengFragala-Pinkham, Maria ASkrinar, Alison MCorzo, DeyaniraComparative StudyResearch Support, Non-U.S. Gov{\textquoteright}tEnglandDevelopmental medicine and child neurologyDev Med Child Neurol. 2005 Feb;47(2):113-20.}, month = {Feb}, pages = {113-120}, edition = {2005/02/15}, abstract = {The purpose of this article is to demonstrate: (1) the accuracy and (2) the reduction in amount of time and effort in assessing physical functioning (self-care and mobility domains) of children and adolescents using computer-adaptive testing (CAT). A CAT algorithm selects questions directly tailored to the child{\textquoteright}s ability level, based on previous responses. Using a CAT algorithm, a simulation study was used to determine the number of items necessary to approximate the score of a full-length assessment. We built simulated CAT (5-, 10-, 15-, and 20-item versions) for self-care and mobility domains and tested their accuracy in a normative sample (n=373; 190 males, 183 females; mean age 6y 11mo [SD 4y 2m], range 4mo to 14y 11mo) and a sample of children and adolescents with Pompe disease (n=26; 21 males, 5 females; mean age 6y 1mo [SD 3y 10mo], range 5mo to 14y 10mo). Results indicated that comparable score estimates (based on computer simulations) to the full-length tests can be achieved in a 20-item CAT version for all age ranges and for normative and clinical samples. No more than 13 to 16\% of the items in the full-length tests were needed for any one administration. These results support further consideration of using CAT programs for accurate and efficient clinical assessments of physical functioning.}, keywords = {*Computer Systems, Activities of Daily Living, Adolescent, Age Factors, Child, Child Development/*physiology, Child, Preschool, Computer Simulation, Confidence Intervals, Demography, Female, Glycogen Storage Disease Type II/physiopathology, Health Status Indicators, Humans, Infant, Infant, Newborn, Male, Motor Activity/*physiology, Outcome Assessment (Health Care)/*methods, Reproducibility of Results, Self Care, Sensitivity and Specificity}, isbn = {0012-1622 (Print)}, author = {Haley, S. M. and Ni, P. and Fragala-Pinkham, M. A. and Skrinar, A. M. and Corzo, D.} } @article {121, title = {Data pooling and analysis to build a preliminary item bank: an example using bowel function in prostate cancer}, journal = {Evaluation and the Health Professions}, volume = {28}, number = {2}, year = {2005}, note = {0163-2787 (Print)Journal Article}, pages = {142-59}, abstract = {Assessing bowel function (BF) in prostate cancer can help determine therapeutic trade-offs. We determined the components of BF commonly assessed in prostate cancer studies as an initial step in creating an item bank for clinical and research application. We analyzed six archived data sets representing 4,246 men with prostate cancer. Thirty-one items from validated instruments were available for analysis. Items were classified into domains (diarrhea, rectal urgency, pain, bleeding, bother/distress, and other) then subjected to conventional psychometric and item response theory (IRT) analyses. Items fit the IRT model if the ratio between observed and expected item variance was between 0.60 and 1.40. Four of 31 items had inadequate fit in at least one analysis. Poorly fitting items included bleeding (2), rectal urgency (1), and bother/distress (1). A fifth item assessing hemorrhoids was poorly correlated with other items. Our analyses supported four related components of BF: diarrhea, rectal urgency, pain, and bother/distress.}, keywords = {*Quality of Life, *Questionnaires, Adult, Aged, Data Collection/methods, Humans, Intestine, Large/*physiopathology, Male, Middle Aged, Prostatic Neoplasms/*physiopathology, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Statistics, Nonparametric}, author = {Eton, D. T. and Lai, J. S. and Cella, D. and Reeve, B. B. and Talcott, J. A. and Clark, J. A. and McPherson, C. P. and Litwin, M. S. and Moinpour, C. M.} } @article {236, title = {An item bank was created to improve the measurement of cancer-related fatigue}, journal = {Journal of Clinical Epidemiology}, volume = {58}, number = {2}, year = {2005}, note = {Lai, Jin-SheiCella, DavidDineen, KellyBode, RitaVon Roenn, JamieGershon, Richard CShevrin, DanielEnglandJ Clin Epidemiol. 2005 Feb;58(2):190-7.}, month = {Feb}, pages = {190-7}, type = {Multicenter Study}, edition = {2005/02/01}, abstract = {OBJECTIVE: Cancer-related fatigue (CRF) is one of the most common unrelieved symptoms experienced by patients. CRF is underrecognized and undertreated due to a lack of clinically sensitive instruments that integrate easily into clinics. Modern computerized adaptive testing (CAT) can overcome these obstacles by enabling precise assessment of fatigue without requiring the administration of a large number of questions. A working item bank is essential for development of a CAT platform. The present report describes the building of an operational item bank for use in clinical settings with the ultimate goal of improving CRF identification and treatment. STUDY DESIGN AND SETTING: The sample included 301 cancer patients. Psychometric properties of items were examined by using Rasch analysis, an Item Response Theory (IRT) model. RESULTS AND CONCLUSION: The final bank includes 72 items. These 72 unidimensional items explained 57.5\% of the variance, based on factor analysis results. Excellent internal consistency (alpha=0.99) and acceptable item-total correlation were found (range: 0.51-0.85). The 72 items covered a reasonable range of the fatigue continuum. No significant ceiling effects, floor effects, or gaps were found. A sample short form was created for demonstration purposes. The resulting bank is amenable to the development of a CAT platform.}, keywords = {Adult, Aged, Aged, 80 and over, Factor Analysis, Statistical, Fatigue/*etiology/psychology, Female, Humans, Male, Middle Aged, Neoplasms/*complications/psychology, Psychometrics, Questionnaires}, isbn = {0895-4356 (Print)0895-4356 (Linking)}, author = {Lai, J-S. and Cella, D. and Dineen, K. and Bode, R. and Von Roenn, J. and Gershon, R. C. and Shevrin, D.} } @article {357, title = {Measuring physical function in patients with complex medical and postsurgical conditions: a computer adaptive approach}, journal = {American Journal of Physical Medicine and Rehabilitation}, volume = {84}, number = {10}, year = {2005}, note = {0894-9115 (Print)Comparative StudyJournal ArticleResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, P.H.S.}, month = {Oct}, pages = {741-8}, abstract = {OBJECTIVE: To examine whether the range of disability in the medically complex and postsurgical populations receiving rehabilitation is adequately sampled by the new Activity Measure--Post-Acute Care (AM-PAC), and to assess whether computer adaptive testing (CAT) can derive valid patient scores using fewer questions. DESIGN: Observational study of 158 subjects (mean age 67.2 yrs) receiving skilled rehabilitation services in inpatient (acute rehabilitation hospitals, skilled nursing facility units) and community (home health services, outpatient departments) settings for recent-onset or worsening disability from medical (excluding neurological) and surgical (excluding orthopedic) conditions. Measures were interviewer-administered activity questions (all patients) and physical functioning portion of the SF-36 (outpatients) and standardized chart items (11 Functional Independence Measure (FIM), 19 Standardized Outcome and Assessment Information Set (OASIS) items, and 22 Minimum Data Set (MDS) items). Rasch modeling analyzed all data and the relationship between person ability estimates and average item difficulty. CAT assessed the ability to derive accurate patient scores using a sample of questions. RESULTS: The 163-item activity item pool covered the range of physical movement and personal and instrumental activities. CAT analysis showed comparable scores between estimates using 10 items or the total item pool. CONCLUSION: The AM-PAC can assess a broad range of function in patients with complex medical illness. CAT achieves valid patient scores using fewer questions.}, keywords = {Activities of Daily Living/*classification, Adult, Aged, Cohort Studies, Continuity of Patient Care, Disability Evaluation, Female, Health Services Research, Humans, Male, Middle Aged, Postoperative Care/*rehabilitation, Prognosis, Recovery of Function, Rehabilitation Centers, Rehabilitation/*standards, Sensitivity and Specificity, Sickness Impact Profile, Treatment Outcome}, author = {Siebens, H. and Andres, P. L. and Pengsheng, N. and Coster, W. J. and Haley, S. M.} } @article {185, title = {Simulated computerized adaptive tests for measuring functional status were efficient with good discriminant validity in patients with hip, knee, or foot/ankle impairments}, journal = {Journal of Clinical Epidemiology}, volume = {58}, number = {6}, year = {2005}, note = {0895-4356 (Print)Journal ArticleMulticenter StudyValidation Studies}, pages = {629-38}, abstract = {BACKGROUND AND OBJECTIVE: To develop computerized adaptive tests (CATs) designed to assess lower extremity functional status (FS) in people with lower extremity impairments using items from the Lower Extremity Functional Scale and compare discriminant validity of FS measures generated using all items analyzed with a rating scale Item Response Theory model (theta(IRT)) and measures generated using the simulated CATs (theta(CAT)). METHODS: Secondary analysis of retrospective intake rehabilitation data. RESULTS: Unidimensionality of items was strong, and local independence of items was adequate. Differential item functioning (DIF) affected item calibration related to body part, that is, hip, knee, or foot/ankle, but DIF did not affect item calibration for symptom acuity, gender, age, or surgical history. Therefore, patients were separated into three body part specific groups. The rating scale model fit all three data sets well. Three body part specific CATs were developed: each was 70\% more efficient than using all LEFS items to estimate FS measures. theta(IRT) and theta(CAT) measures discriminated patients by symptom acuity, age, and surgical history in similar ways. theta(CAT) measures were as precise as theta(IRT) measures. CONCLUSION: Body part-specific simulated CATs were efficient and produced precise measures of FS with good discriminant validity.}, keywords = {*Health Status Indicators, Activities of Daily Living, Adolescent, Adult, Aged, Aged, 80 and over, Ankle Joint/physiopathology, Diagnosis, Computer-Assisted/*methods, Female, Hip Joint/physiopathology, Humans, Joint Diseases/physiopathology/*rehabilitation, Knee Joint/physiopathology, Lower Extremity/*physiopathology, Male, Middle Aged, Research Support, N.I.H., Extramural, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Retrospective Studies}, author = {Hart, D. L. and Mioduski, J. E. and Stratford, P. W.} } @article {168, title = {Activity outcome measurement for postacute care}, journal = {Medical Care}, volume = {42}, number = {1 Suppl}, year = {2004}, note = {0025-7079Journal ArticleMulticenter Study}, pages = {I49-161}, abstract = {BACKGROUND: Efforts to evaluate the effectiveness of a broad range of postacute care services have been hindered by the lack of conceptually sound and comprehensive measures of outcomes. It is critical to determine a common underlying structure before employing current methods of item equating across outcome instruments for future item banking and computer-adaptive testing applications. OBJECTIVE: To investigate the factor structure, reliability, and scale properties of items underlying the Activity domains of the International Classification of Functioning, Disability and Health (ICF) for use in postacute care outcome measurement. METHODS: We developed a 41-item Activity Measure for Postacute Care (AM-PAC) that assessed an individual{\textquoteright}s execution of discrete daily tasks in his or her own environment across major content domains as defined by the ICF. We evaluated the reliability and discriminant validity of the prototype AM-PAC in 477 individuals in active rehabilitation programs across 4 rehabilitation settings using factor analyses, tests of item scaling, internal consistency reliability analyses, Rasch item response theory modeling, residual component analysis, and modified parallel analysis. RESULTS: Results from an initial exploratory factor analysis produced 3 distinct, interpretable factors that accounted for 72\% of the variance: Applied Cognition (44\%), Personal Care \& Instrumental Activities (19\%), and Physical \& Movement Activities (9\%); these 3 activity factors were verified by a confirmatory factor analysis. Scaling assumptions were met for each factor in the total sample and across diagnostic groups. Internal consistency reliability was high for the total sample (Cronbach alpha = 0.92 to 0.94), and for specific diagnostic groups (Cronbach alpha = 0.90 to 0.95). Rasch scaling, residual factor, differential item functioning, and modified parallel analyses supported the unidimensionality and goodness of fit of each unique activity domain. CONCLUSIONS: This 3-factor model of the AM-PAC can form the conceptual basis for common-item equating and computer-adaptive applications, leading to a comprehensive system of outcome instruments for postacute care settings.}, keywords = {*Self Efficacy, *Sickness Impact Profile, Activities of Daily Living/*classification/psychology, Adult, Aftercare/*standards/statistics \& numerical data, Aged, Boston, Cognition/physiology, Disability Evaluation, Factor Analysis, Statistical, Female, Human, Male, Middle Aged, Movement/physiology, Outcome Assessment (Health Care)/*methods/statistics \& numerical data, Psychometrics, Questionnaires/standards, Rehabilitation/*standards/statistics \& numerical data, Reproducibility of Results, Sensitivity and Specificity, Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Support, U.S. Gov{\textquoteright}t, P.H.S.}, author = {Haley, S. M. and Coster, W. J. and Andres, P. L. and Ludlow, L. H. and Ni, P. and Bond, T. L. and Sinclair, S. J. and Jette, A. M.} } @booklet {201, title = {The AMC Linear Disability Score project in a population requiring residential care: psychometric properties}, journal = {Health and Quality of Life Outcomes}, volume = {2}, year = {2004}, note = {Holman, RebeccaLindeboom, RobertVermeulen, Marinusde Haan, Rob JResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesEnglandHealth and quality of life outcomesHealth Qual Life Outcomes. 2004 Aug 3;2:42.}, month = {Aug 3}, pages = {42}, edition = {2004/08/05}, abstract = {BACKGROUND: Currently there is a lot of interest in the flexible framework offered by item banks for measuring patient relevant outcomes, including functional status. However, there are few item banks, which have been developed to quantify functional status, as expressed by the ability to perform activities of daily life. METHOD: This paper examines the psychometric properties of the AMC Linear Disability Score (ALDS) project item bank using an item response theory model and full information factor analysis. Data were collected from 555 respondents on a total of 160 items. RESULTS: Following the analysis, 79 items remained in the item bank. The remaining 81 items were excluded because of: difficulties in presentation (1 item); low levels of variation in response pattern (28 items); significant differences in measurement characteristics for males and females or for respondents under or over 85 years old (26 items); or lack of model fit to the data at item level (26 items). CONCLUSIONS: It is conceivable that the item bank will have different measurement characteristics for other patient or demographic populations. However, these results indicate that the ALDS item bank has sound psychometric properties for respondents in residential care settings and could form a stable base for measuring functional status in a range of situations, including the implementation of computerised adaptive testing of functional status.}, keywords = {*Disability Evaluation, *Health Status Indicators, Activities of Daily Living/*classification, Adult, Aged, Aged, 80 and over, Data Collection/methods, Female, Humans, Logistic Models, Male, Middle Aged, Netherlands, Pilot Projects, Probability, Psychometrics/*instrumentation, Questionnaires/standards, Residential Facilities/*utilization, Severity of Illness Index}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Holman, R. and Lindeboom, R. and Vermeulen, M. and de Haan, R. J.} } @article {8, title = {Computer adaptive testing: a strategy for monitoring stroke rehabilitation across settings}, journal = {Stroke Rehabilitation}, volume = {11}, number = {2}, year = {2004}, note = {Andres, Patricia LBlack-Schaffer, Randie MNi, PengshengHaley, Stephen MR01 hd43568/hd/nichdEvaluation StudiesResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.Research Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesTopics in stroke rehabilitationTop Stroke Rehabil. 2004 Spring;11(2):33-9.}, month = {Spring}, pages = {33-39}, edition = {2004/05/01}, abstract = {Current functional assessment instruments in stroke rehabilitation are often setting-specific and lack precision, breadth, and/or feasibility. Computer adaptive testing (CAT) offers a promising potential solution by providing a quick, yet precise, measure of function that can be used across a broad range of patient abilities and in multiple settings. CAT technology yields a precise score by selecting very few relevant items from a large and diverse item pool based on each individual{\textquoteright}s responses. We demonstrate the potential usefulness of a CAT assessment model with a cross-sectional sample of persons with stroke from multiple rehabilitation settings.}, keywords = {*Computer Simulation, *User-Computer Interface, Adult, Aged, Aged, 80 and over, Cerebrovascular Accident/*rehabilitation, Disabled Persons/*classification, Female, Humans, Male, Middle Aged, Monitoring, Physiologic/methods, Severity of Illness Index, Task Performance and Analysis}, isbn = {1074-9357 (Print)}, author = {Andres, P. L. and Black-Schaffer, R. M. and Ni, P. and Haley, S. M.} } @article {147, title = {Computerized adaptive measurement of depression: A simulation study}, journal = {BMC Psychiatry}, volume = {4}, number = {1}, year = {2004}, pages = {13-23}, abstract = {Background: Efficient, accurate instruments for measuring depression are increasingly importantin clinical practice. We developed a computerized adaptive version of the Beck DepressionInventory (BDI). We examined its efficiency and its usefulness in identifying Major DepressiveEpisodes (MDE) and in measuring depression severity.Methods: Subjects were 744 participants in research studies in which each subject completed boththe BDI and the SCID. In addition, 285 patients completed the Hamilton Depression Rating Scale.Results: The adaptive BDI had an AUC as an indicator of a SCID diagnosis of MDE of 88\%,equivalent to the full BDI. The adaptive BDI asked fewer questions than the full BDI (5.6 versus 21items). The adaptive latent depression score correlated r = .92 with the BDI total score and thelatent depression score correlated more highly with the Hamilton (r = .74) than the BDI total scoredid (r = .70).Conclusions: Adaptive testing for depression may provide greatly increased efficiency withoutloss of accuracy in identifying MDE or in measuring depression severity.}, keywords = {*Computer Simulation, Adult, Algorithms, Area Under Curve, Comparative Study, Depressive Disorder/*diagnosis/epidemiology/psychology, Diagnosis, Computer-Assisted/*methods/statistics \& numerical data, Factor Analysis, Statistical, Female, Humans, Internet, Male, Mass Screening/methods, Patient Selection, Personality Inventory/*statistics \& numerical data, Pilot Projects, Prevalence, Psychiatric Status Rating Scales/*statistics \& numerical data, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Severity of Illness Index, Software}, author = {Gardner, W. and Shear, K. and Kelleher, K. J. and Pajer, K. A. and Mammen, O. and Buysse, D. and Frank, E.} } @article {87, title = {Refining the conceptual basis for rehabilitation outcome measurement: personal care and instrumental activities domain}, journal = {Medical Care}, volume = {42}, number = {1 Suppl}, year = {2004}, note = {0025-7079Journal Article}, month = {Jan}, pages = {I62-172}, abstract = {BACKGROUND: Rehabilitation outcome measures routinely include content on performance of daily activities; however, the conceptual basis for item selection is rarely specified. These instruments differ significantly in format, number, and specificity of daily activity items and in the measurement dimensions and type of scale used to specify levels of performance. We propose that a requirement for upper limb and hand skills underlies many activities of daily living (ADL) and instrumental activities of daily living (IADL) items in current instruments, and that items selected based on this definition can be placed along a single functional continuum. OBJECTIVE: To examine the dimensional structure and content coverage of a Personal Care and Instrumental Activities item set and to examine the comparability of items from existing instruments and a set of new items as measures of this domain. METHODS: Participants (N = 477) from 3 different disability groups and 4 settings representing the continuum of postacute rehabilitation care were administered the newly developed Activity Measure for Post-Acute Care (AM-PAC), the SF-8, and an additional setting-specific measure: FIM (in-patient rehabilitation); MDS (skilled nursing facility); MDS-PAC (postacute settings); OASIS (home care); or PF-10 (outpatient clinic). Rasch (partial-credit model) analyses were conducted on a set of 62 items covering the Personal Care and Instrumental domain to examine item fit, item functioning, and category difficulty estimates and unidimensionality. RESULTS: After removing 6 misfitting items, the remaining 56 items fit acceptably along the hypothesized continuum. Analyses yielded different difficulty estimates for the maximum score (eg, "Independent performance") for items with comparable content from different instruments. Items showed little differential item functioning across age, diagnosis, or severity groups, and 92\% of the participants fit the model. CONCLUSIONS: ADL and IADL items from existing rehabilitation outcomes instruments that depend on skilled upper limb and hand use can be located along a single continuum, along with the new personal care and instrumental items of the AM-PAC addressing gaps in content. Results support the validity of the proposed definition of the Personal Care and Instrumental Activities dimension of function as a guide for future development of rehabilitation outcome instruments, such as linked, setting-specific short forms and computerized adaptive testing approaches.}, keywords = {*Self Efficacy, *Sickness Impact Profile, Activities of Daily Living/*classification/psychology, Adult, Aged, Aged, 80 and over, Disability Evaluation, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods/statistics \& numerical data, Questionnaires/*standards, Recovery of Function/physiology, Rehabilitation/*standards/statistics \& numerical data, Reproducibility of Results, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Research Support, U.S. Gov{\textquoteright}t, P.H.S., Sensitivity and Specificity}, author = {Coster, W. J. and Haley, S. M. and Andres, P. L. and Ludlow, L. H. and Bond, T. L. and Ni, P. S.} } @article {31, title = {The feasibility of applying item response theory to measures of migraine impact: a re-analysis of three clinical studies}, journal = {Quality of Life Research}, volume = {12}, number = {8}, year = {2003}, note = {0962-9343Journal Article}, pages = {887-902}, abstract = {BACKGROUND: Item response theory (IRT) is a powerful framework for analyzing multiitem scales and is central to the implementation of computerized adaptive testing. OBJECTIVES: To explain the use of IRT to examine measurement properties and to apply IRT to a questionnaire for measuring migraine impact--the Migraine Specific Questionnaire (MSQ). METHODS: Data from three clinical studies that employed the MSQ-version 1 were analyzed by confirmatory factor analysis for categorical data and by IRT modeling. RESULTS: Confirmatory factor analyses showed very high correlations between the factors hypothesized by the original test constructions. Further, high item loadings on one common factor suggest that migraine impact may be adequately assessed by only one score. IRT analyses of the MSQ were feasible and provided several suggestions as to how to improve the items and in particular the response choices. Out of 15 items, 13 showed adequate fit to the IRT model. In general, IRT scores were strongly associated with the scores proposed by the original test developers and with the total item sum score. Analysis of response consistency showed that more than 90\% of the patients answered consistently according to a unidimensional IRT model. For the remaining patients, scores on the dimension of emotional function were less strongly related to the overall IRT scores that mainly reflected role limitations. Such response patterns can be detected easily using response consistency indices. Analysis of test precision across score levels revealed that the MSQ was most precise at one standard deviation worse than the mean impact level for migraine patients that are not in treatment. Thus, gains in test precision can be achieved by developing items aimed at less severe levels of migraine impact. CONCLUSIONS: IRT proved useful for analyzing the MSQ. The approach warrants further testing in a more comprehensive item pool for headache impact that would enable computerized adaptive testing.}, keywords = {*Sickness Impact Profile, Adolescent, Adult, Aged, Comparative Study, Cost of Illness, Factor Analysis, Statistical, Feasibility Studies, Female, Human, Male, Middle Aged, Migraine/*psychology, Models, Psychological, Psychometrics/instrumentation/*methods, Quality of Life/*psychology, Questionnaires, Support, Non-U.S. Gov{\textquoteright}t}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E., Jr.} } @article {238, title = {Item banking to improve, shorten and computerized self-reported fatigue: an illustration of steps to create a core item bank from the FACIT-Fatigue Scale}, journal = {Quality of Life Research}, volume = {12}, number = {5}, year = {2003}, note = {0962-9343Journal Article}, month = {Aug}, pages = {485-501}, abstract = {Fatigue is a common symptom among cancer patients and the general population. Due to its subjective nature, fatigue has been difficult to effectively and efficiently assess. Modern computerized adaptive testing (CAT) can enable precise assessment of fatigue using a small number of items from a fatigue item bank. CAT enables brief assessment by selecting questions from an item bank that provide the maximum amount of information given a person{\textquoteright}s previous responses. This article illustrates steps to prepare such an item bank, using 13 items from the Functional Assessment of Chronic Illness Therapy Fatigue Subscale (FACIT-F) as the basis. Samples included 1022 cancer patients and 1010 people from the general population. An Item Response Theory (IRT)-based rating scale model, a polytomous extension of the Rasch dichotomous model was utilized. Nine items demonstrating acceptable psychometric properties were selected and positioned on the fatigue continuum. The fatigue levels measured by these nine items along with their response categories covered 66.8\% of the general population and 82.6\% of the cancer patients. Although the operational CAT algorithms to handle polytomously scored items are still in progress, we illustrated how CAT may work by using nine core items to measure level of fatigue. Using this illustration, a fatigue measure comparable to its full-length 13-item scale administration was obtained using four items. The resulting item bank can serve as a core to which will be added a psychometrically sound and operational item bank covering the entire fatigue continuum.}, keywords = {*Health Status Indicators, *Questionnaires, Adult, Fatigue/*diagnosis/etiology, Female, Humans, Male, Middle Aged, Neoplasms/complications, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Sickness Impact Profile}, author = {Lai, J-S. and Crane, P. K. and Cella, D. and Chang, C-H. and Bode, R. K. and Heinemann, A. W.} } @article {305, title = {Assessing tobacco beliefs among youth using item response theory models}, journal = {Drug and Alcohol Dependence}, volume = {68}, number = {Suppl 1}, year = {2002}, note = {0376-8716Journal Article}, month = {Nov}, pages = {S21-S39}, abstract = {Successful intervention research programs to prevent adolescent smoking require well-chosen, psychometrically sound instruments for assessing smoking prevalence and attitudes. Twelve thousand eight hundred and ten adolescents were surveyed about their smoking beliefs as part of the Teenage Attitudes and Practices Survey project, a prospective cohort study of predictors of smoking initiation among US adolescents. Item response theory (IRT) methods are used to frame a discussion of questions that a researcher might ask when selecting an optimal item set. IRT methods are especially useful for choosing items during instrument development, trait scoring, evaluating item functioning across groups, and creating optimal item subsets for use in specialized applications such as computerized adaptive testing. Data analytic steps for IRT modeling are reviewed for evaluating item quality and differential item functioning across subgroups of gender, age, and smoking status. Implications and challenges in the use of these methods for tobacco onset research and for assessing the developmental trajectories of smoking among youth are discussed.}, keywords = {*Attitude to Health, *Culture, *Health Behavior, *Questionnaires, Adolescent, Adult, Child, Female, Humans, Male, Models, Statistical, Smoking/*epidemiology}, author = {Panter, A. T. and Reeve, B. B.} } @article {187, title = {Development of an index of physical functional health status in rehabilitation}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {83}, number = {5}, year = {2002}, note = {0003-9993 (Print)Journal Article}, month = {May}, pages = {655-65}, abstract = {OBJECTIVE: To describe (1) the development of an index of physical functional health status (FHS) and (2) its hierarchical structure, unidimensionality, reproducibility of item calibrations, and practical application. DESIGN: Rasch analysis of existing data sets. SETTING: A total of 715 acute, orthopedic outpatient centers and 62 long-term care facilities in 41 states participating with Focus On Therapeutic Outcomes, Inc. PATIENTS: A convenience sample of 92,343 patients (40\% male; mean age +/- standard deviation [SD], 48+/-17y; range, 14-99y) seeking rehabilitation between 1993 and 1999. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Patients completed self-report health status surveys at admission and discharge. The Medical Outcomes Study 36-Item Short-Form Health Survey{\textquoteright}s physical functioning scale (PF-10) is the foundation of the physical FHS. The Oswestry Low Back Pain Disability Questionnaire, Neck Disability Index, Lysholm Knee Questionnaire, items pertinent to patients with upper-extremity impairments, and items pertinent to patients with more involved neuromusculoskeletal impairments were cocalibrated into the PF-10. RESULTS: The final FHS item bank contained 36 items (patient separation, 2.3; root mean square measurement error, 5.9; mean square +/- SD infit, 0.9+/-0.5; outfit, 0.9+/-0.9). Analyses supported empirical item hierarchy, unidimensionality, reproducibility of item calibrations, and content and construct validity of the FHS-36. CONCLUSIONS: Results support the reliability and validity of FHS-36 measures in the present sample. Analyses show the potential for a dynamic, computer-controlled, adaptive survey for FHS assessment applicable for group analysis and clinical decision making for individual patients.}, keywords = {*Health Status Indicators, *Rehabilitation Centers, Adolescent, Adult, Aged, Aged, 80 and over, Female, Health Surveys, Humans, Male, Middle Aged, Musculoskeletal Diseases/*physiopathology/*rehabilitation, Nervous System Diseases/*physiopathology/*rehabilitation, Physical Fitness/*physiology, Recovery of Function/physiology, Reproducibility of Results, Retrospective Studies}, author = {Hart, D. L. and Wright, B. D.} } @article {146, title = {Multidimensional adaptive testing for mental health problems in primary care}, journal = {Medical Care}, volume = {40}, number = {9}, year = {2002}, note = {Gardner, WilliamKelleher, Kelly JPajer, Kathleen AMCJ-177022/PHS HHS/MH30915/MH/NIMH NIH HHS/MH50629/MH/NIMH NIH HHS/Med Care. 2002 Sep;40(9):812-23.}, month = {Sep}, pages = {812-23}, edition = {2002/09/10}, abstract = {OBJECTIVES: Efficient and accurate instruments for assessing child psychopathology are increasingly important in clinical practice and research. For example, screening in primary care settings can identify children and adolescents with disorders that may otherwise go undetected. However, primary care offices are notorious for the brevity of visits and screening must not burden patients or staff with long questionnaires. One solution is to shorten assessment instruments, but dropping questions typically makes an instrument less accurate. An alternative is adaptive testing, in which a computer selects the items to be asked of a patient based on the patient{\textquoteright}s previous responses. This research used a simulation to test a child mental health screen based on this technology. RESEARCH DESIGN: Using half of a large sample of data, a computerized version was developed of the Pediatric Symptom Checklist (PSC), a parental-report psychosocial problem screen. With the unused data, a simulation was conducted to determine whether the Adaptive PSC can reproduce the results of the full PSC with greater efficiency. SUBJECTS: PSCs were completed by parents on 21,150 children seen in a national sample of primary care practices. RESULTS: Four latent psychosocial problem dimensions were identified through factor analysis: internalizing problems, externalizing problems, attention problems, and school problems. A simulated adaptive test measuring these traits asked an average of 11.6 questions per patient, and asked five or fewer questions for 49\% of the sample. There was high agreement between the adaptive test and the full (35-item) PSC: only 1.3\% of screening decisions were discordant (kappa = 0.93). This agreement was higher than that obtained using a comparable length (12-item) short-form PSC (3.2\% of decisions discordant; kappa = 0.84). CONCLUSIONS: Multidimensional adaptive testing may be an accurate and efficient technology for screening for mental health problems in primary care settings.}, keywords = {Adolescent, Child, Child Behavior Disorders/*diagnosis, Child Health Services/*organization \& administration, Factor Analysis, Statistical, Female, Humans, Linear Models, Male, Mass Screening/*methods, Parents, Primary Health Care/*organization \& administration}, isbn = {0025-7079 (Print)0025-7079 (Linking)}, author = {Gardner, W. and Kelleher, K. J. and Pajer, K. A.} } @article {36, title = {An examination of the comparative reliability, validity, and accuracy of performance ratings made using computerized adaptive rating scales}, journal = {Journal of Applied Psychology}, volume = {86}, number = {5}, year = {2001}, note = {214803450021-9010Journal ArticleValidation Studies}, pages = {965-973}, abstract = {This laboratory research compared the reliability, validity, and accuracy of a computerized adaptive rating scale (CARS) format and 2 relatively common and representative rating formats. The CARS is a paired-comparison rating task that uses adaptive testing principles to present pairs of scaled behavioral statements to the rater to iteratively estimate a ratee{\textquoteright}s effectiveness on 3 dimensions of contextual performance. Videotaped vignettes of 6 office workers were prepared, depicting prescripted levels of contextual performance, and 112 subjects rated these vignettes using the CARS format and one or the other competing format. Results showed 23\%-37\% lower standard errors of measurement for the CARS format. In addition, validity was significantly higher for the CARS format (d = .18), and Cronbach{\textquoteright}s accuracy coefficients showed significantly higher accuracy, with a median effect size of .08. The discussion focuses on possible reasons for the results.}, keywords = {*Computer Simulation, *Employee Performance Appraisal, *Personnel Selection, Adult, Automatic Data Processing, Female, Human, Male, Reproducibility of Results, Sensitivity and Specificity, Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Task Performance and Analysis, Video Recording}, author = {Borman, W. C. and Buck, D. E. and Hanson, M. A. and Motowidlo, S. J. and Stark, S. and F Drasgow} } @article {21, title = {NCLEX-RN performance: predicting success on the computerized examination}, journal = {Journal of Professional Nursing}, volume = {17}, number = {4}, year = {2001}, note = {8755-7223Journal Article}, month = {Jul-Aug}, pages = {158-165}, abstract = {Since the adoption of the Computerized Adaptive Testing (CAT) format of the National Certification Licensure Examination for Registered Nurses (NCLEX-RN), no studies have been reported in the literature on predictors of successful performance by baccalaureate nursing graduates on the licensure examination. In this study, a discriminant analysis was used to identify which of 21 variables can be significant predictors of success on the CAT NCLEX-RN. The convenience sample consisted of 289 individuals who graduated from a baccalaureate nursing program between 1995 and 1998. Seven significant predictor variables were identified. The total number of C+ or lower grades earned in nursing theory courses was the best predictor, followed by grades in several individual nursing courses. More than 93 per cent of graduates were correctly classified. Ninety-four per cent of NCLEX "passes" were correctly classified, as were 92 per cent of NCLEX failures. This degree of accuracy in classifying CAT NCLEX-RN failures represents a marked improvement over results reported in previous studies of licensure examinations, and suggests the discriminant function will be helpful in identifying future students in danger of failure. J Prof Nurs 17:158-165, 2001.}, keywords = {*Education, Nursing, Baccalaureate, *Educational Measurement, *Licensure, Adult, Female, Humans, Male, Predictive Value of Tests, Software}, author = {Beeman, P. B. and Waterhouse, J. K.} } @article {28, title = {Competency gradient for child-parent centers}, journal = {Journal of Outcomes Measurement}, volume = {3}, number = {1}, year = {1999}, note = {1090-655X (Print)Journal ArticleResearch Support, U.S. Gov{\textquoteright}t, P.H.S.}, pages = {35-52}, abstract = {This report describes an implementation of the Rasch model during the longitudinal evaluation of a federally-funded early childhood preschool intervention program. An item bank is described for operationally defining a psychosocial construct called community life-skills competency, an expected teenage outcome of the preschool intervention. This analysis examined the position of teenage students on this scale structure, and investigated a pattern of cognitive operations necessary for students to pass community life-skills test items. Then this scale structure was correlated with nationally standardized reading and math achievement scores, teacher ratings, and school records to assess its validity as a measure of the community-related outcome goal for this intervention. The results show a functional relationship between years of early intervention and magnitude of effect on the life-skills competency variable.}, keywords = {*Models, Statistical, Activities of Daily Living/classification/psychology, Adolescent, Chicago, Child, Child, Preschool, Early Intervention (Education)/*statistics \& numerical data, Female, Follow-Up Studies, Humans, Male, Outcome and Process Assessment (Health Care)/*statistics \& numerical data}, author = {Bezruczko, N.} } @article {144, title = {A study of psychologically optimal level of item difficulty}, journal = {Shinrigaku Kenkyu}, volume = {65}, number = {6}, year = {1995}, note = {Fujimori, SClinical TrialControlled Clinical TrialEnglish AbstractJapanShinrigaku kenkyu : The Japanese journal of psychologyShinrigaku Kenkyu. 1995 Feb;65(6):446-53.}, month = {Feb}, pages = {446-53}, edition = {1995/02/01}, abstract = {For the purpose of selecting items in a test, this study presented a viewpoint of psychologically optimal difficulty level, as well as measurement efficiency, of items. A paper-and-pencil test (P \& P) composed of hard, moderate and easy subtests was administered to 298 students at a university. A computerized adaptive test (CAT) was also administered to 79 students. The items of both tests were selected from Shiba{\textquoteright}s Word Meaning Comprehension Test, for which the estimates of parameters of two-parameter item response model were available. The results of P \& P research showed that the psychologically optimal success level would be such that the proportion of right answers is somewhere between .75 and .85. A similar result was obtained from CAT research, where the proportion of about .8 might be desirable. Traditionally a success rate of .5 has been recommended in adaptive testing. In this study, however, it was suggested that the items of such level would be too hard psychologically for many examinees.}, keywords = {*Adaptation, Psychological, *Psychological Tests, Adult, Female, Humans, Male}, isbn = {0021-5236 (Print)0021-5236 (Linking)}, author = {Fujimori, S.} }