@article {170, title = {Measuring global physical health in children with cerebral palsy: Illustration of a multidimensional bi-factor model and computerized adaptive testing}, journal = {Quality of Life Research}, volume = {18}, number = {3}, year = {2009}, note = {Haley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AHambleton, Ronald KMontpetit, KathleenBilodeau, NathalieGorton, George EWatson, KyleTucker, Carole AK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 Apr;18(3):359-70. Epub 2009 Feb 17.}, month = {Apr}, pages = {359-370}, edition = {2009/02/18}, abstract = {PURPOSE: The purposes of this study were to apply a bi-factor model for the determination of test dimensionality and a multidimensional CAT using computer simulations of real data for the assessment of a new global physical health measure for children with cerebral palsy (CP). METHODS: Parent respondents of 306 children with cerebral palsy were recruited from four pediatric rehabilitation hospitals and outpatient clinics. We compared confirmatory factor analysis results across four models: (1) one-factor unidimensional; (2) two-factor multidimensional (MIRT); (3) bi-factor MIRT with fixed slopes; and (4) bi-factor MIRT with varied slopes. We tested whether the general and content (fatigue and pain) person score estimates could discriminate across severity and types of CP, and whether score estimates from a simulated CAT were similar to estimates based on the total item bank, and whether they correlated as expected with external measures. RESULTS: Confirmatory factor analysis suggested separate pain and fatigue sub-factors; all 37 items were retained in the analyses. From the bi-factor MIRT model with fixed slopes, the full item bank scores discriminated across levels of severity and types of CP, and compared favorably to external instruments. CAT scores based on 10- and 15-item versions accurately captured the global physical health scores. CONCLUSIONS: The bi-factor MIRT CAT application, especially the 10- and 15-item versions, yielded accurate global physical health scores that discriminated across known severity groups and types of CP, and correlated as expected with concurrent measures. The CATs have potential for collecting complex data on the physical health of children with CP in an efficient manner.}, keywords = {*Computer Simulation, *Health Status, *Models, Statistical, Adaptation, Psychological, Adolescent, Cerebral Palsy/*physiopathology, Child, Child, Preschool, Factor Analysis, Statistical, Female, Humans, Male, Massachusetts, Pennsylvania, Questionnaires, Young Adult}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A. and Hambleton, R. K. and Montpetit, K. and Bilodeau, N. and Gorton, G. E. and Watson, K. and Tucker, C. A.} } @article {88, title = {Assessing self-care and social function using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {4}, year = {2008}, note = {Coster, Wendy JHaley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesR41 HD052318-01A1/HD/NICHD NIH HHS/United StatesR43 HD42388-01/HD/NICHD NIH HHS/United StatesComparative StudyResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Apr;89(4):622-9.}, month = {Apr}, pages = {622-629}, edition = {2008/04/01}, abstract = {OBJECTIVE: To examine score agreement, validity, precision, and response burden of a prototype computer adaptive testing (CAT) version of the self-care and social function scales of the Pediatric Evaluation of Disability Inventory compared with the full-length version of these scales. DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics; community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Children with disabilities (n=469) and 412 children with no disabilities (analytic sample); 38 children with disabilities and 35 children without disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from prototype CAT applications of each scale using 15-, 10-, and 5-item stopping rules; scores from the full-length self-care and social function scales; time (in seconds) to complete assessments and respondent ratings of burden. RESULTS: Scores from both computer simulations and field administration of the prototype CATs were highly consistent with scores from full-length administration (r range, .94-.99). Using computer simulation of retrospective data, discriminant validity, and sensitivity to change of the CATs closely approximated that of the full-length scales, especially when the 15- and 10-item stopping rules were applied. In the cross-validation study the time to administer both CATs was 4 minutes, compared with over 16 minutes to complete the full-length scales. CONCLUSIONS: Self-care and social function score estimates from CAT administration are highly comparable with those obtained from full-length scale administration, with small losses in validity and precision and substantial decreases in administration time.}, keywords = {*Disability Evaluation, *Social Adjustment, Activities of Daily Living, Adolescent, Age Factors, Child, Child, Preschool, Computer Simulation, Cross-Over Studies, Disabled Children/*rehabilitation, Female, Follow-Up Studies, Humans, Infant, Male, Outcome Assessment (Health Care), Reference Values, Reproducibility of Results, Retrospective Studies, Risk Factors, Self Care/*standards/trends, Sex Factors, Sickness Impact Profile}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Coster, W. J. and Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @article {5, title = {Efficiency and sensitivity of multidimensional computerized adaptive testing of pediatric physical functioning}, journal = {Disability \& Rehabilitation}, volume = {30}, number = {6}, year = {2008}, note = {Allen, Diane DNi, PengshengHaley, Stephen MK02 HD45354-01/HD/NICHD NIH HHS/United StatesNIDDR H133P0001/DD/NCBDD CDC HHS/United StatesResearch Support, N.I.H., ExtramuralEnglandDisability and rehabilitationDisabil Rehabil. 2008;30(6):479-84.}, pages = {479-84}, edition = {2008/02/26}, abstract = {PURPOSE: Computerized adaptive tests (CATs) have efficiency advantages over fixed-length tests of physical functioning but may lose sensitivity when administering extremely low numbers of items. Multidimensional CATs may efficiently improve sensitivity by capitalizing on correlations between functional domains. Using a series of empirical simulations, we assessed the efficiency and sensitivity of multidimensional CATs compared to a longer fixed-length test. METHOD: Parent responses to the Pediatric Evaluation of Disability Inventory before and after intervention for 239 children at a pediatric rehabilitation hospital provided the data for this retrospective study. Reliability, effect size, and standardized response mean were compared between full-length self-care and mobility subscales and simulated multidimensional CATs with stopping rules at 40, 30, 20, and 10 items. RESULTS: Reliability was lowest in the 10-item CAT condition for the self-care (r = 0.85) and mobility (r = 0.79) subscales; all other conditions had high reliabilities (r > 0.94). All multidimensional CAT conditions had equivalent levels of sensitivity compared to the full set condition for both domains. CONCLUSIONS: Multidimensional CATs efficiently retain the sensitivity of longer fixed-length measures even with 5 items per dimension (10-item CAT condition). Measuring physical functioning with multidimensional CATs could enhance sensitivity following intervention while minimizing response burden.}, keywords = {*Disability Evaluation, Child, Computers, Disabled Children/*classification/rehabilitation, Efficiency, Humans, Outcome Assessment (Health Care), Psychometrics, Reproducibility of Results, Retrospective Studies, Self Care, Sensitivity and Specificity}, isbn = {0963-8288 (Print)0963-8288 (Linking)}, author = {Allen, D. D. and Ni, P. and Haley, S. M.} } @article {287, title = {Measuring physical functioning in children with spinal impairments with computerized adaptive testing}, journal = {Journal of Pediatric Orthopedics}, volume = {28}, number = {3}, year = {2008}, note = {Mulcahey, M JHaley, Stephen MDuffy, TheresaPengsheng, NiBetz, Randal RK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesUnited StatesJournal of pediatric orthopedicsJ Pediatr Orthop. 2008 Apr-May;28(3):330-5.}, month = {Apr-May}, pages = {330-5}, edition = {2008/03/26}, abstract = {BACKGROUND: The purpose of this study was to assess the utility of measuring current physical functioning status of children with scoliosis and kyphosis by applying computerized adaptive testing (CAT) methods. Computerized adaptive testing uses a computer interface to administer the most optimal items based on previous responses, reducing the number of items needed to obtain a scoring estimate. METHODS: This was a prospective study of 77 subjects (0.6-19.8 years) who were seen by a spine surgeon during a routine clinic visit for progress spine deformity. Using a multidimensional version of the Pediatric Evaluation of Disability Inventory CAT program (PEDI-MCAT), we evaluated content range, accuracy and efficiency, known-group validity, concurrent validity with the Pediatric Outcomes Data Collection Instrument, and test-retest reliability in a subsample (n = 16) within a 2-week interval. RESULTS: We found the PEDI-MCAT to have sufficient item coverage in both self-care and mobility content for this sample, although most patients tended to score at the higher ends of both scales. Both the accuracy of PEDI-MCAT scores as compared with a fixed format of the PEDI (r = 0.98 for both mobility and self-care) and test-retest reliability were very high [self-care: intraclass correlation (3,1) = 0.98, mobility: intraclass correlation (3,1) = 0.99]. The PEDI-MCAT took an average of 2.9 minutes for the parents to complete. The PEDI-MCAT detected expected differences between patient groups, and scores on the PEDI-MCAT correlated in expected directions with scores from the Pediatric Outcomes Data Collection Instrument domains. CONCLUSIONS: Use of the PEDI-MCAT to assess the physical functioning status, as perceived by parents of children with complex spinal impairments, seems to be feasible and achieves accurate and efficient estimates of self-care and mobility function. Additional item development will be needed at the higher functioning end of the scale to avoid ceiling effects for older children. LEVEL OF EVIDENCE: This is a level II prospective study designed to establish the utility of computer adaptive testing as an evaluation method in a busy pediatric spine practice.}, keywords = {*Disability Evaluation, Adolescent, Child, Child, Preschool, Computer Simulation, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Kyphosis/*diagnosis/rehabilitation, Male, Prospective Studies, Reproducibility of Results, Scoliosis/*diagnosis/rehabilitation}, isbn = {0271-6798 (Print)0271-6798 (Linking)}, author = {Mulcahey, M. J. and Haley, S. M. and Duffy, T. and Pengsheng, N. and Betz, R. R.} } @article {174, title = {Measurement precision and efficiency of multidimensional computer adaptive testing of physical functioning using the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {87}, number = {9}, year = {2006}, note = {Haley, Stephen MNi, PengshengLudlow, Larry HFragala-Pinkham, Maria AK02 hd45354-01/hd/nichdResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2006 Sep;87(9):1223-9.}, month = {Sep}, pages = {1223-9}, edition = {2006/08/29}, abstract = {OBJECTIVE: To compare the measurement efficiency and precision of a multidimensional computer adaptive testing (M-CAT) application to a unidimensional CAT (U-CAT) comparison using item bank data from 2 of the functional skills scales of the Pediatric Evaluation of Disability Inventory (PEDI). DESIGN: Using existing PEDI mobility and self-care item banks, we compared the stability of item calibrations and model fit between unidimensional and multidimensional Rasch models and compared the efficiency and precision of the U-CAT- and M-CAT-simulated assessments to a random draw of items. SETTING: Pediatric rehabilitation hospital and clinics. PARTICIPANTS: Clinical and normative samples. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Not applicable. RESULTS: The M-CAT had greater levels of precision and efficiency than the separate mobility and self-care U-CAT versions when using a similar number of items for each PEDI subdomain. Equivalent estimation of mobility and self-care scores can be achieved with a 25\% to 40\% item reduction with the M-CAT compared with the U-CAT. CONCLUSIONS: M-CAT applications appear to have both precision and efficiency advantages compared with separate U-CAT assessments when content subdomains have a high correlation. Practitioners may also realize interpretive advantages of reporting test score information for each subdomain when separate clinical inferences are desired.}, keywords = {*Disability Evaluation, *Pediatrics, Adolescent, Child, Child, Preschool, Computers, Disabled Persons/*classification/rehabilitation, Efficiency, Humans, Infant, Outcome Assessment (Health Care), Psychometrics, Self Care}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Ni, P. and Ludlow, L. H. and Fragala-Pinkham, M. A.} } @article {175, title = {Assessing mobility in children using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {86}, number = {5}, year = {2005}, note = {Haley, Stephen MRaczek, Anastasia ECoster, Wendy JDumas, Helene MFragala-Pinkham, Maria AK02 hd45354-01a1/hd/nichdR43 hd42388-01/hd/nichdResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2005 May;86(5):932-9.}, month = {May}, pages = {932-9}, edition = {2005/05/17}, abstract = {OBJECTIVE: To assess score agreement, validity, precision, and response burden of a prototype computerized adaptive testing (CAT) version of the Mobility Functional Skills Scale (Mob-CAT) of the Pediatric Evaluation of Disability Inventory (PEDI) as compared with the full 59-item version (Mob-59). DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; and cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics, community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Four hundred sixty-nine children with disabilities and 412 children with no disabilities (analytic sample); 41 children without disabilities and 39 with disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from a prototype Mob-CAT application and versions using 15-, 10-, and 5-item stopping rules; scores from the Mob-59; and number of items and time (in seconds) to administer assessments. RESULTS: Mob-CAT scores from both computer simulations (intraclass correlation coefficient [ICC] range, .94-.99) and field administrations (ICC=.98) were in high agreement with scores from the Mob-59. Using computer simulations of retrospective data, discriminant validity, and sensitivity to change of the Mob-CAT closely approximated that of the Mob-59, especially when using the 15- and 10-item stopping rule versions of the Mob-CAT. The Mob-CAT used no more than 15\% of the items for any single administration, and required 20\% of the time needed to administer the Mob-59. CONCLUSIONS: Comparable score estimates for the PEDI mobility scale can be obtained from CAT administrations, with losses in validity and precision for shorter forms, but with a considerable reduction in administration time.}, keywords = {*Computer Simulation, *Disability Evaluation, Adolescent, Child, Child, Preschool, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Male, Outcome Assessment (Health Care)/*methods, Rehabilitation Centers, Rehabilitation/*standards, Sensitivity and Specificity}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Raczek, A. E. and Coster, W. J. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @article {171, title = {A computer adaptive testing approach for assessing physical functioning in children and adolescents}, journal = {Developmental Medicine and Child Neuropsychology}, volume = {47}, number = {2}, year = {2005}, note = {Haley, Stephen MNi, PengshengFragala-Pinkham, Maria ASkrinar, Alison MCorzo, DeyaniraComparative StudyResearch Support, Non-U.S. Gov{\textquoteright}tEnglandDevelopmental medicine and child neurologyDev Med Child Neurol. 2005 Feb;47(2):113-20.}, month = {Feb}, pages = {113-120}, edition = {2005/02/15}, abstract = {The purpose of this article is to demonstrate: (1) the accuracy and (2) the reduction in amount of time and effort in assessing physical functioning (self-care and mobility domains) of children and adolescents using computer-adaptive testing (CAT). A CAT algorithm selects questions directly tailored to the child{\textquoteright}s ability level, based on previous responses. Using a CAT algorithm, a simulation study was used to determine the number of items necessary to approximate the score of a full-length assessment. We built simulated CAT (5-, 10-, 15-, and 20-item versions) for self-care and mobility domains and tested their accuracy in a normative sample (n=373; 190 males, 183 females; mean age 6y 11mo [SD 4y 2m], range 4mo to 14y 11mo) and a sample of children and adolescents with Pompe disease (n=26; 21 males, 5 females; mean age 6y 1mo [SD 3y 10mo], range 5mo to 14y 10mo). Results indicated that comparable score estimates (based on computer simulations) to the full-length tests can be achieved in a 20-item CAT version for all age ranges and for normative and clinical samples. No more than 13 to 16\% of the items in the full-length tests were needed for any one administration. These results support further consideration of using CAT programs for accurate and efficient clinical assessments of physical functioning.}, keywords = {*Computer Systems, Activities of Daily Living, Adolescent, Age Factors, Child, Child Development/*physiology, Child, Preschool, Computer Simulation, Confidence Intervals, Demography, Female, Glycogen Storage Disease Type II/physiopathology, Health Status Indicators, Humans, Infant, Infant, Newborn, Male, Motor Activity/*physiology, Outcome Assessment (Health Care)/*methods, Reproducibility of Results, Self Care, Sensitivity and Specificity}, isbn = {0012-1622 (Print)}, author = {Haley, S. M. and Ni, P. and Fragala-Pinkham, M. A. and Skrinar, A. M. and Corzo, D.} } @article {305, title = {Assessing tobacco beliefs among youth using item response theory models}, journal = {Drug and Alcohol Dependence}, volume = {68}, number = {Suppl 1}, year = {2002}, note = {0376-8716Journal Article}, month = {Nov}, pages = {S21-S39}, abstract = {Successful intervention research programs to prevent adolescent smoking require well-chosen, psychometrically sound instruments for assessing smoking prevalence and attitudes. Twelve thousand eight hundred and ten adolescents were surveyed about their smoking beliefs as part of the Teenage Attitudes and Practices Survey project, a prospective cohort study of predictors of smoking initiation among US adolescents. Item response theory (IRT) methods are used to frame a discussion of questions that a researcher might ask when selecting an optimal item set. IRT methods are especially useful for choosing items during instrument development, trait scoring, evaluating item functioning across groups, and creating optimal item subsets for use in specialized applications such as computerized adaptive testing. Data analytic steps for IRT modeling are reviewed for evaluating item quality and differential item functioning across subgroups of gender, age, and smoking status. Implications and challenges in the use of these methods for tobacco onset research and for assessing the developmental trajectories of smoking among youth are discussed.}, keywords = {*Attitude to Health, *Culture, *Health Behavior, *Questionnaires, Adolescent, Adult, Child, Female, Humans, Male, Models, Statistical, Smoking/*epidemiology}, author = {Panter, A. T. and Reeve, B. B.} } @article {146, title = {Multidimensional adaptive testing for mental health problems in primary care}, journal = {Medical Care}, volume = {40}, number = {9}, year = {2002}, note = {Gardner, WilliamKelleher, Kelly JPajer, Kathleen AMCJ-177022/PHS HHS/MH30915/MH/NIMH NIH HHS/MH50629/MH/NIMH NIH HHS/Med Care. 2002 Sep;40(9):812-23.}, month = {Sep}, pages = {812-23}, edition = {2002/09/10}, abstract = {OBJECTIVES: Efficient and accurate instruments for assessing child psychopathology are increasingly important in clinical practice and research. For example, screening in primary care settings can identify children and adolescents with disorders that may otherwise go undetected. However, primary care offices are notorious for the brevity of visits and screening must not burden patients or staff with long questionnaires. One solution is to shorten assessment instruments, but dropping questions typically makes an instrument less accurate. An alternative is adaptive testing, in which a computer selects the items to be asked of a patient based on the patient{\textquoteright}s previous responses. This research used a simulation to test a child mental health screen based on this technology. RESEARCH DESIGN: Using half of a large sample of data, a computerized version was developed of the Pediatric Symptom Checklist (PSC), a parental-report psychosocial problem screen. With the unused data, a simulation was conducted to determine whether the Adaptive PSC can reproduce the results of the full PSC with greater efficiency. SUBJECTS: PSCs were completed by parents on 21,150 children seen in a national sample of primary care practices. RESULTS: Four latent psychosocial problem dimensions were identified through factor analysis: internalizing problems, externalizing problems, attention problems, and school problems. A simulated adaptive test measuring these traits asked an average of 11.6 questions per patient, and asked five or fewer questions for 49\% of the sample. There was high agreement between the adaptive test and the full (35-item) PSC: only 1.3\% of screening decisions were discordant (kappa = 0.93). This agreement was higher than that obtained using a comparable length (12-item) short-form PSC (3.2\% of decisions discordant; kappa = 0.84). CONCLUSIONS: Multidimensional adaptive testing may be an accurate and efficient technology for screening for mental health problems in primary care settings.}, keywords = {Adolescent, Child, Child Behavior Disorders/*diagnosis, Child Health Services/*organization \& administration, Factor Analysis, Statistical, Female, Humans, Linear Models, Male, Mass Screening/*methods, Parents, Primary Health Care/*organization \& administration}, isbn = {0025-7079 (Print)0025-7079 (Linking)}, author = {Gardner, W. and Kelleher, K. J. and Pajer, K. A.} } @article {28, title = {Competency gradient for child-parent centers}, journal = {Journal of Outcomes Measurement}, volume = {3}, number = {1}, year = {1999}, note = {1090-655X (Print)Journal ArticleResearch Support, U.S. Gov{\textquoteright}t, P.H.S.}, pages = {35-52}, abstract = {This report describes an implementation of the Rasch model during the longitudinal evaluation of a federally-funded early childhood preschool intervention program. An item bank is described for operationally defining a psychosocial construct called community life-skills competency, an expected teenage outcome of the preschool intervention. This analysis examined the position of teenage students on this scale structure, and investigated a pattern of cognitive operations necessary for students to pass community life-skills test items. Then this scale structure was correlated with nationally standardized reading and math achievement scores, teacher ratings, and school records to assess its validity as a measure of the community-related outcome goal for this intervention. The results show a functional relationship between years of early intervention and magnitude of effect on the life-skills competency variable.}, keywords = {*Models, Statistical, Activities of Daily Living/classification/psychology, Adolescent, Chicago, Child, Child, Preschool, Early Intervention (Education)/*statistics \& numerical data, Female, Follow-Up Studies, Humans, Male, Outcome and Process Assessment (Health Care)/*statistics \& numerical data}, author = {Bezruczko, N.} }