MatchIt/ 0000755 0001762 0000144 00000000000 14512163103 011577 5 ustar ligges users MatchIt/NAMESPACE 0000644 0001762 0000144 00000001630 14336727641 013037 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(plot,matchit)
S3method(plot,matchit.subclass)
S3method(plot,summary.matchit)
S3method(print,matchit)
S3method(print,summary.matchit)
S3method(print,summary.matchit.subclass)
S3method(rbind,getmatches)
S3method(rbind,matchdata)
S3method(summary,matchit)
S3method(summary,matchit.subclass)
export(add_s.weights)
export(euclidean_dist)
export(get_matches)
export(mahalanobis_dist)
export(match.data)
export(matchit)
export(robust_mahalanobis_dist)
export(scaled_euclidean_dist)
import(graphics)
import(stats)
importFrom(Rcpp,evalCpp)
importFrom(Rcpp,sourceCpp)
importFrom(grDevices,devAskNewPage)
importFrom(grDevices,nclass.FD)
importFrom(grDevices,nclass.Sturges)
importFrom(grDevices,nclass.scott)
importFrom(utils,capture.output)
importFrom(utils,combn)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
useDynLib(MatchIt, .registration = TRUE)
MatchIt/data/ 0000755 0001762 0000144 00000000000 14463003201 012505 5 ustar ligges users MatchIt/data/lalonde.tab 0000644 0001762 0000144 00000072426 13705231557 014645 0 ustar ligges users "treat" "age" "educ" "race" "married" "nodegree" "re74" "re75" "re78"
"NSW1" 1 37 11 "black" 1 1 0 0 9930.046
"NSW2" 1 22 9 "hispan" 0 1 0 0 3595.894
"NSW3" 1 30 12 "black" 0 0 0 0 24909.45
"NSW4" 1 27 11 "black" 0 1 0 0 7506.146
"NSW5" 1 33 8 "black" 0 1 0 0 289.7899
"NSW6" 1 22 9 "black" 0 1 0 0 4056.494
"NSW7" 1 23 12 "black" 0 0 0 0 0
"NSW8" 1 32 11 "black" 0 1 0 0 8472.158
"NSW9" 1 22 16 "black" 0 0 0 0 2164.022
"NSW10" 1 33 12 "white" 1 0 0 0 12418.07
"NSW11" 1 19 9 "black" 0 1 0 0 8173.908
"NSW12" 1 21 13 "black" 0 0 0 0 17094.64
"NSW13" 1 18 8 "black" 0 1 0 0 0
"NSW14" 1 27 10 "black" 1 1 0 0 18739.93
"NSW15" 1 17 7 "black" 0 1 0 0 3023.879
"NSW16" 1 19 10 "black" 0 1 0 0 3228.503
"NSW17" 1 27 13 "black" 0 0 0 0 14581.86
"NSW18" 1 23 10 "black" 0 1 0 0 7693.4
"NSW19" 1 40 12 "black" 0 0 0 0 10804.32
"NSW20" 1 26 12 "black" 0 0 0 0 10747.35
"NSW21" 1 23 11 "black" 0 1 0 0 0
"NSW22" 1 41 14 "white" 0 0 0 0 5149.501
"NSW23" 1 38 9 "white" 0 1 0 0 6408.95
"NSW24" 1 24 11 "black" 0 1 0 0 1991.4
"NSW25" 1 18 10 "black" 0 1 0 0 11163.17
"NSW26" 1 29 11 "black" 1 1 0 0 9642.999
"NSW27" 1 25 11 "black" 0 1 0 0 9897.049
"NSW28" 1 27 10 "hispan" 0 1 0 0 11142.87
"NSW29" 1 17 10 "black" 0 1 0 0 16218.04
"NSW30" 1 24 11 "black" 0 1 0 0 995.7002
"NSW31" 1 17 10 "black" 0 1 0 0 0
"NSW32" 1 48 4 "black" 0 1 0 0 6551.592
"NSW33" 1 25 11 "black" 1 1 0 0 1574.424
"NSW34" 1 20 12 "black" 0 0 0 0 0
"NSW35" 1 25 12 "black" 0 0 0 0 3191.753
"NSW36" 1 42 14 "black" 0 0 0 0 20505.93
"NSW37" 1 25 5 "black" 0 1 0 0 6181.88
"NSW38" 1 23 12 "black" 1 0 0 0 5911.551
"NSW39" 1 46 8 "black" 1 1 0 0 3094.156
"NSW40" 1 24 10 "black" 0 1 0 0 0
"NSW41" 1 21 12 "black" 0 0 0 0 1254.582
"NSW42" 1 19 9 "white" 0 1 0 0 13188.83
"NSW43" 1 17 8 "black" 0 1 0 0 8061.485
"NSW44" 1 18 8 "hispan" 1 1 0 0 2787.96
"NSW45" 1 20 11 "black" 0 1 0 0 3972.54
"NSW46" 1 25 11 "black" 1 1 0 0 0
"NSW47" 1 17 8 "black" 0 1 0 0 0
"NSW48" 1 17 9 "black" 0 1 0 0 0
"NSW49" 1 25 5 "black" 0 1 0 0 12187.41
"NSW50" 1 23 12 "black" 0 0 0 0 4843.176
"NSW51" 1 28 8 "black" 0 1 0 0 0
"NSW52" 1 31 11 "black" 1 1 0 0 8087.487
"NSW53" 1 18 11 "black" 0 1 0 0 0
"NSW54" 1 25 12 "black" 0 0 0 0 2348.973
"NSW55" 1 30 11 "black" 1 1 0 0 590.7818
"NSW56" 1 17 10 "black" 0 1 0 0 0
"NSW57" 1 37 9 "black" 0 1 0 0 1067.506
"NSW58" 1 41 4 "black" 1 1 0 0 7284.986
"NSW59" 1 42 14 "black" 1 0 0 0 13167.52
"NSW60" 1 22 11 "white" 0 1 0 0 1048.432
"NSW61" 1 17 8 "black" 0 1 0 0 0
"NSW62" 1 29 8 "black" 0 1 0 0 1923.938
"NSW63" 1 35 10 "black" 0 1 0 0 4666.236
"NSW64" 1 27 11 "black" 0 1 0 0 549.2984
"NSW65" 1 29 4 "black" 0 1 0 0 762.9146
"NSW66" 1 28 9 "black" 0 1 0 0 10694.29
"NSW67" 1 27 11 "black" 0 1 0 0 0
"NSW68" 1 23 7 "white" 0 1 0 0 0
"NSW69" 1 45 5 "black" 1 1 0 0 8546.715
"NSW70" 1 29 13 "black" 0 0 0 0 7479.656
"NSW71" 1 27 9 "black" 0 1 0 0 0
"NSW72" 1 46 13 "black" 0 0 0 0 647.2046
"NSW73" 1 18 6 "black" 0 1 0 0 0
"NSW74" 1 25 12 "black" 0 0 0 0 11965.81
"NSW75" 1 28 15 "black" 0 0 0 0 9598.541
"NSW76" 1 25 11 "white" 0 1 0 0 18783.35
"NSW77" 1 22 12 "black" 0 0 0 0 18678.08
"NSW78" 1 21 9 "black" 0 1 0 0 0
"NSW79" 1 40 11 "black" 0 1 0 0 23005.6
"NSW80" 1 22 11 "black" 0 1 0 0 6456.697
"NSW81" 1 25 12 "black" 0 0 0 0 0
"NSW82" 1 18 12 "black" 0 0 0 0 2321.107
"NSW83" 1 38 12 "white" 0 0 0 0 4941.849
"NSW84" 1 27 13 "black" 0 0 0 0 0
"NSW85" 1 27 8 "black" 0 1 0 0 0
"NSW86" 1 38 11 "black" 0 1 0 0 0
"NSW87" 1 23 8 "hispan" 0 1 0 0 3881.284
"NSW88" 1 26 11 "black" 0 1 0 0 17230.96
"NSW89" 1 21 12 "white" 0 0 0 0 8048.603
"NSW90" 1 25 8 "black" 0 1 0 0 0
"NSW91" 1 31 11 "black" 1 1 0 0 14509.93
"NSW92" 1 17 10 "black" 0 1 0 0 0
"NSW93" 1 25 11 "black" 0 1 0 0 0
"NSW94" 1 21 12 "black" 0 0 0 0 9983.784
"NSW95" 1 44 11 "black" 0 1 0 0 0
"NSW96" 1 25 12 "white" 0 0 0 0 5587.503
"NSW97" 1 18 9 "black" 0 1 0 0 4482.845
"NSW98" 1 42 12 "black" 0 0 0 0 2456.153
"NSW99" 1 25 10 "black" 0 1 0 0 0
"NSW100" 1 31 9 "hispan" 0 1 0 0 26817.6
"NSW101" 1 24 10 "black" 0 1 0 0 0
"NSW102" 1 26 10 "black" 0 1 0 0 9265.788
"NSW103" 1 25 11 "black" 0 1 0 0 485.2298
"NSW104" 1 18 11 "black" 0 1 0 0 4814.627
"NSW105" 1 19 11 "black" 0 1 0 0 7458.105
"NSW106" 1 43 9 "black" 0 1 0 0 0
"NSW107" 1 27 13 "black" 0 0 0 0 34099.28
"NSW108" 1 17 9 "black" 0 1 0 0 1953.268
"NSW109" 1 30 11 "black" 0 1 0 0 0
"NSW110" 1 26 10 "black" 1 1 2027.999 0 0
"NSW111" 1 20 9 "black" 0 1 6083.994 0 8881.665
"NSW112" 1 17 9 "hispan" 0 1 445.1704 74.34345 6210.67
"NSW113" 1 20 12 "black" 0 0 989.2678 165.2077 0
"NSW114" 1 18 11 "black" 0 1 858.2543 214.5636 929.8839
"NSW115" 1 27 12 "black" 1 0 3670.872 334.0493 0
"NSW116" 1 21 12 "white" 0 0 3670.872 334.0494 12558.02
"NSW117" 1 27 12 "black" 0 0 2143.413 357.9499 22163.25
"NSW118" 1 20 12 "black" 0 0 0 377.5686 1652.637
"NSW119" 1 19 10 "black" 0 1 0 385.2741 8124.715
"NSW120" 1 23 12 "black" 0 0 5506.308 501.0741 671.3318
"NSW121" 1 29 14 "black" 0 0 0 679.6734 17814.98
"NSW122" 1 18 10 "black" 0 1 0 798.9079 9737.154
"NSW123" 1 19 9 "black" 0 1 0 798.9079 17685.18
"NSW124" 1 27 13 "white" 1 0 9381.566 853.7225 0
"NSW125" 1 18 11 "white" 0 1 3678.231 919.5579 4321.705
"NSW126" 1 27 9 "black" 1 1 0 934.4454 1773.423
"NSW127" 1 22 12 "black" 0 0 5605.852 936.1773 0
"NSW128" 1 23 10 "black" 1 1 0 936.4386 11233.26
"NSW129" 1 23 12 "hispan" 0 0 9385.74 1117.439 559.4432
"NSW130" 1 20 11 "black" 0 1 3637.498 1220.836 1085.44
"NSW131" 1 17 9 "black" 0 1 1716.509 1253.439 5445.2
"NSW132" 1 28 11 "black" 0 1 0 1284.079 60307.93
"NSW133" 1 26 11 "black" 1 1 0 1392.853 1460.36
"NSW134" 1 20 11 "black" 0 1 16318.62 1484.994 6943.342
"NSW135" 1 24 11 "black" 1 1 824.3886 1666.113 4032.708
"NSW136" 1 31 9 "black" 0 1 0 1698.607 10363.27
"NSW137" 1 23 8 "white" 1 1 0 1713.15 4232.309
"NSW138" 1 18 10 "black" 0 1 2143.411 1784.274 11141.39
"NSW139" 1 29 12 "black" 0 0 10881.94 1817.284 0
"NSW140" 1 26 11 "white" 0 1 0 2226.266 13385.86
"NSW141" 1 24 9 "black" 0 1 9154.7 2288.675 4849.559
"NSW142" 1 25 12 "black" 0 0 14426.79 2409.274 0
"NSW143" 1 24 10 "black" 0 1 4250.402 2421.947 1660.508
"NSW144" 1 46 8 "black" 0 1 3165.658 2594.723 0
"NSW145" 1 31 12 "white" 0 0 0 2611.218 2484.549
"NSW146" 1 19 11 "black" 0 1 2305.026 2615.276 4146.603
"NSW147" 1 19 8 "black" 0 1 0 2657.057 9970.681
"NSW148" 1 27 11 "black" 0 1 2206.94 2666.274 0
"NSW149" 1 26 11 "black" 1 1 0 2754.646 26372.28
"NSW150" 1 20 10 "black" 0 1 5005.731 2777.355 5615.189
"NSW151" 1 28 10 "black" 0 1 0 2836.506 3196.571
"NSW152" 1 24 12 "black" 0 0 13765.75 2842.764 6167.681
"NSW153" 1 19 8 "black" 0 1 2636.353 2937.264 7535.942
"NSW154" 1 23 12 "black" 0 0 6269.341 3039.96 8484.239
"NSW155" 1 42 9 "black" 1 1 0 3058.531 1294.409
"NSW156" 1 25 13 "black" 0 0 12362.93 3090.732 0
"NSW157" 1 18 9 "black" 0 1 0 3287.375 5010.342
"NSW158" 1 21 12 "black" 0 0 6473.683 3332.409 9371.037
"NSW159" 1 27 10 "black" 0 1 1001.146 3550.075 0
"NSW160" 1 21 8 "black" 0 1 989.2678 3695.897 4279.613
"NSW161" 1 22 9 "black" 0 1 2192.877 3836.986 3462.564
"NSW162" 1 31 4 "black" 0 1 8517.589 4023.211 7382.549
"NSW163" 1 24 10 "black" 1 1 11703.2 4078.152 0
"NSW164" 1 29 10 "black" 0 1 0 4398.95 0
"NSW165" 1 29 12 "black" 0 0 9748.387 4878.937 10976.51
"NSW166" 1 19 10 "white" 0 1 0 5324.109 13829.62
"NSW167" 1 19 11 "hispan" 1 1 5424.485 5463.803 6788.463
"NSW168" 1 31 9 "black" 0 1 10717.03 5517.841 9558.501
"NSW169" 1 22 10 "black" 1 1 1468.348 5588.664 13228.28
"NSW170" 1 21 9 "black" 0 1 6416.47 5749.331 743.6666
"NSW171" 1 17 10 "black" 0 1 1291.468 5793.852 5522.788
"NSW172" 1 26 12 "black" 1 0 8408.762 5794.831 1424.944
"NSW173" 1 20 9 "hispan" 0 1 12260.78 5875.049 1358.643
"NSW174" 1 19 10 "black" 0 1 4121.949 6056.754 0
"NSW175" 1 26 10 "black" 0 1 25929.68 6788.958 672.8773
"NSW176" 1 28 11 "black" 0 1 1929.029 6871.856 0
"NSW177" 1 22 12 "hispan" 1 0 492.2305 7055.702 10092.83
"NSW178" 1 33 11 "black" 0 1 0 7867.916 6281.433
"NSW179" 1 22 12 "white" 0 0 6759.994 8455.504 12590.71
"NSW180" 1 29 10 "hispan" 0 1 0 8853.674 5112.014
"NSW181" 1 33 12 "black" 1 0 20279.95 10941.35 15952.6
"NSW182" 1 25 14 "black" 1 0 35040.07 11536.57 36646.95
"NSW183" 1 35 9 "black" 1 1 13602.43 13830.64 12803.97
"NSW184" 1 35 8 "black" 1 1 13732.07 17976.15 3786.628
"NSW185" 1 33 11 "black" 1 1 14660.71 25142.24 4181.942
"PSID1" 0 30 12 "white" 1 0 20166.73 18347.23 25564.67
"PSID2" 0 26 12 "white" 1 0 25862.32 17806.55 25564.67
"PSID3" 0 25 16 "white" 1 0 25862.32 15316.21 25564.67
"PSID4" 0 42 11 "white" 1 1 21787.05 14265.29 15491.01
"PSID5" 0 25 9 "black" 1 1 14829.69 13776.53 0
"PSID6" 0 37 9 "black" 1 1 13685.48 12756.05 17833.2
"PSID7" 0 32 12 "white" 1 0 19067.58 12625.35 14146.28
"PSID8" 0 20 12 "black" 0 0 7392.314 12396.19 17765.23
"PSID9" 0 38 9 "hispan" 1 1 16826.18 12029.18 0
"PSID10" 0 39 10 "white" 1 1 16767.41 12022.02 4433.18
"PSID11" 0 41 5 "white" 1 1 10785.76 11991.58 19451.31
"PSID12" 0 31 14 "white" 1 0 17831.29 11563.69 22094.97
"PSID13" 0 34 8 "white" 1 1 8038.872 11404.35 5486.799
"PSID14" 0 29 12 "white" 1 0 14768.95 11146.55 6420.722
"PSID15" 0 22 14 "black" 1 0 748.4399 11105.37 18208.55
"PSID16" 0 42 0 "hispan" 1 1 2797.833 10929.92 9922.934
"PSID17" 0 25 9 "hispan" 0 1 5460.477 10589.76 7539.361
"PSID18" 0 28 9 "white" 1 1 11091.41 10357.02 15406.78
"PSID19" 0 40 13 "white" 1 0 3577.621 10301.52 11911.95
"PSID20" 0 35 9 "white" 1 1 11475.43 9397.403 11087.38
"PSID21" 0 27 10 "hispan" 1 1 15711.36 9098.419 17023.41
"PSID22" 0 27 6 "hispan" 1 1 7831.189 9071.565 5661.171
"PSID23" 0 36 12 "white" 1 0 25535.12 8695.597 21905.82
"PSID24" 0 47 8 "black" 1 1 9275.169 8543.419 0
"PSID25" 0 40 11 "white" 1 1 20666.35 8502.242 25564.67
"PSID26" 0 27 7 "white" 1 1 3064.293 8461.065 11149.45
"PSID27" 0 36 9 "black" 1 1 13256.4 8457.484 0
"PSID28" 0 39 6 "hispan" 1 1 13279.91 8441.371 25048.94
"PSID29" 0 21 9 "white" 1 1 11156.07 8441.371 1213.214
"PSID30" 0 29 12 "white" 1 0 11199.17 8081.516 0
"PSID31" 0 22 13 "hispan" 0 0 6404.843 7882.79 9453.017
"PSID32" 0 25 10 "white" 1 1 13634.54 7793.274 11688.82
"PSID33" 0 27 12 "white" 1 0 12270.89 7709.129 7806.829
"PSID34" 0 45 8 "white" 1 1 22415.97 7635.726 15931.37
"PSID35" 0 26 12 "white" 1 0 2345.242 7565.903 2838.713
"PSID36" 0 27 12 "white" 1 0 9788.497 7496.081 14038.4
"PSID37" 0 33 8 "white" 1 1 12312.03 7474.597 25514.43
"PSID38" 0 25 12 "white" 1 0 11381.38 7467.435 4162.756
"PSID39" 0 49 8 "white" 1 1 6459.703 7431.629 7503.896
"PSID40" 0 40 3 "hispan" 1 1 7576.485 7426.258 12104.06
"PSID41" 0 22 12 "black" 1 0 9729.719 7372.548 2231.367
"PSID42" 0 25 5 "white" 1 1 7891.927 7293.774 14617.67
"PSID43" 0 25 12 "white" 1 0 11516.57 7263.339 19588.74
"PSID44" 0 21 12 "white" 1 0 13601.23 7202.468 10746.03
"PSID45" 0 33 9 "hispan" 1 1 11959.36 7087.887 25564.67
"PSID46" 0 20 12 "black" 1 0 9555.344 7055.661 0
"PSID47" 0 19 11 "white" 1 1 4306.468 6978.677 837.871
"PSID48" 0 25 12 "black" 1 0 295.8493 6942.871 461.0507
"PSID49" 0 29 12 "white" 1 0 15303.83 6932.129 24290.87
"PSID50" 0 20 12 "white" 1 0 3558.029 6797.855 6680.802
"PSID51" 0 29 6 "hispan" 1 1 8542.403 6701.177 7196.528
"PSID52" 0 25 13 "white" 1 0 19259.59 6652.839 13015.82
"PSID53" 0 41 15 "white" 1 0 25862.32 6563.323 24647
"PSID54" 0 39 10 "white" 1 1 22745.13 6493.5 25564.67
"PSID55" 0 33 12 "white" 1 0 10819.07 6369.968 2936.243
"PSID56" 0 29 8 "white" 1 1 9169.369 6352.065 20575.86
"PSID57" 0 21 11 "white" 1 1 10679.96 6276.871 10923.35
"PSID58" 0 31 12 "white" 1 0 23652.27 6228.532 22403.81
"PSID59" 0 36 12 "black" 1 0 11040.47 6221.371 7215.739
"PSID60" 0 25 7 "white" 1 1 5597.625 6099.629 122.6513
"PSID61" 0 35 7 "white" 1 1 10715.23 6087.097 15177.73
"PSID62" 0 22 9 "white" 1 1 5683.833 6038.758 4742.025
"PSID63" 0 31 2 "hispan" 1 1 3262.179 5965.355 9732.307
"PSID64" 0 40 15 "white" 1 0 10907.24 5922.387 6238.962
"PSID65" 0 47 3 "white" 1 1 9047.894 5911.645 6145.865
"PSID66" 0 26 8 "hispan" 0 1 3168.134 5872.258 11136.15
"PSID67" 0 42 7 "white" 1 1 10971.89 5806.016 9241.702
"PSID68" 0 53 12 "white" 0 0 17104.4 5775.581 19965.56
"PSID69" 0 30 17 "black" 0 0 17827.37 5546.419 14421.13
"PSID70" 0 28 10 "white" 1 1 10415.46 5544.629 10289.41
"PSID71" 0 46 11 "white" 1 1 14753.28 5299.355 0
"PSID72" 0 28 12 "white" 0 0 8256.35 5279.661 21602.88
"PSID73" 0 27 12 "hispan" 1 0 17604.01 5222.371 25564.67
"PSID74" 0 25 10 "white" 1 1 4335.857 5181.194 12418.81
"PSID75" 0 38 8 "white" 1 1 11242.27 5174.032 0
"PSID76" 0 26 12 "hispan" 0 0 7968.338 5109.581 4181.966
"PSID77" 0 54 12 "white" 0 0 7165.039 5012.903 0
"PSID78" 0 38 8 "hispan" 1 1 22606.02 4978.887 8720.065
"PSID79" 0 23 17 "white" 0 0 0 4876.839 16747.08
"PSID80" 0 23 8 "white" 1 1 3595.255 4866.097 2782.559
"PSID81" 0 23 12 "white" 1 0 11690.95 4764.048 14065
"PSID82" 0 25 12 "hispan" 1 0 8746.167 4762.258 379.7757
"PSID83" 0 25 15 "white" 1 0 7386.436 4738.984 12705.49
"PSID84" 0 37 11 "hispan" 0 1 615.2098 4713.919 0
"PSID85" 0 40 12 "white" 1 0 18389.68 4688.855 21857.05
"PSID86" 0 19 10 "white" 0 1 5777.878 4672.742 135.9508
"PSID87" 0 48 7 "white" 1 1 13326.93 4636.935 0
"PSID88" 0 19 12 "white" 0 0 8530.648 4620.823 0
"PSID89" 0 16 9 "white" 0 1 2539.21 4579.645 0
"PSID90" 0 29 10 "white" 1 1 713.1731 4542.048 7781.708
"PSID91" 0 30 16 "white" 0 0 3093.682 4468.645 15538.29
"PSID92" 0 22 11 "white" 1 1 8761.841 4463.274 10642.59
"PSID93" 0 22 10 "white" 0 1 17268.98 4400.613 2453.026
"PSID94" 0 47 10 "black" 1 1 13311.26 4397.032 19330.14
"PSID95" 0 25 12 "hispan" 1 0 2266.872 4361.226 3020.473
"PSID96" 0 47 10 "black" 0 1 21918.32 4323.629 19438.02
"PSID97" 0 24 12 "black" 1 0 8573.752 4293.194 0
"PSID98" 0 20 12 "black" 1 0 2648.929 4273.5 0
"PSID99" 0 28 12 "black" 0 0 16722.34 4253.806 7314.747
"PSID100" 0 47 11 "white" 0 1 8060.424 4232.323 3358.873
"PSID101" 0 50 0 "white" 1 1 10162.72 4218 220.1813
"PSID102" 0 18 12 "white" 0 0 2217.89 4191.145 8957.978
"PSID103" 0 21 12 "white" 0 0 9665.063 4110.581 1687.564
"PSID104" 0 47 11 "white" 1 1 23924.61 4096.258 17358.85
"PSID105" 0 21 12 "white" 0 0 2827.222 4056.871 5937.505
"PSID106" 0 34 11 "white" 1 1 0 4010.323 18133.18
"PSID107" 0 19 12 "white" 1 0 5817.063 3919.016 1066.919
"PSID108" 0 44 13 "white" 1 0 8032.994 3881.419 3104.704
"PSID109" 0 21 15 "white" 1 0 6951.479 3879.629 0
"PSID110" 0 20 12 "black" 0 0 5099.971 3842.032 12718.79
"PSID111" 0 51 11 "white" 0 1 48.98167 3813.387 1525.014
"PSID112" 0 28 13 "white" 0 0 5260.631 3790.113 9253.524
"PSID113" 0 24 15 "white" 0 0 12746.99 3743.565 0
"PSID114" 0 28 8 "hispan" 1 1 8305.332 3718.5 0
"PSID115" 0 20 11 "white" 1 1 5822.941 3532.306 11075.56
"PSID116" 0 29 12 "white" 1 0 14288.93 3503.661 8133.407
"PSID117" 0 23 12 "white" 1 0 14347.71 3482.177 3818.445
"PSID118" 0 20 11 "black" 0 1 0 3480.387 5495.665
"PSID119" 0 42 7 "white" 1 1 4324.102 3457.113 9856.436
"PSID120" 0 43 12 "white" 1 0 14328.12 3453.532 18781.9
"PSID121" 0 27 13 "white" 0 0 16406.9 3426.677 5344.937
"PSID122" 0 27 4 "hispan" 1 1 626.9654 3410.565 3367.739
"PSID123" 0 25 12 "white" 1 0 21469.65 3405.194 7981.201
"PSID124" 0 18 12 "white" 0 0 4729.67 3328.21 12602.05
"PSID125" 0 31 16 "white" 1 0 25862.32 3254.806 25564.67
"PSID126" 0 27 12 "white" 1 0 4043.927 3231.532 7240.86
"PSID127" 0 18 11 "white" 0 1 0 3226.161 15814.63
"PSID128" 0 24 7 "white" 1 1 7860.578 3213.629 0
"PSID129" 0 23 12 "white" 1 0 7856.66 3213.629 5535.564
"PSID130" 0 50 12 "white" 1 0 19929.66 3190.355 18597.19
"PSID131" 0 19 12 "white" 0 0 99.92261 3172.452 15436.33
"PSID132" 0 23 10 "white" 1 1 15811.28 3145.597 6398.556
"PSID133" 0 51 12 "white" 1 0 21001.38 3140.226 16015.6
"PSID134" 0 19 11 "black" 0 1 5607.422 3054.29 94.5745
"PSID135" 0 20 10 "white" 1 1 3099.56 2970.145 21141.83
"PSID136" 0 20 11 "hispan" 0 1 2868.367 2968.355 7403.41
"PSID137" 0 21 12 "white" 0 0 8128.998 2939.71 0
"PSID138" 0 39 10 "white" 1 1 0 2886 18761.22
"PSID139" 0 36 5 "white" 0 1 3814.692 2873.468 2751.527
"PSID140" 0 19 9 "black" 0 1 1079.556 2873.468 14344.29
"PSID141" 0 42 6 "hispan" 1 1 2425.572 2832.29 1907.745
"PSID142" 0 20 7 "white" 0 1 1902.448 2792.903 6098.578
"PSID143" 0 23 12 "white" 1 0 4954.986 2771.419 0
"PSID144" 0 35 12 "white" 1 0 1469.45 2719.5 0
"PSID145" 0 18 12 "white" 0 0 881.6701 2696.226 12120.31
"PSID146" 0 43 8 "white" 1 1 18338.74 2674.742 6395.601
"PSID147" 0 37 14 "white" 1 0 18501.36 2638.935 13429.58
"PSID148" 0 24 10 "white" 1 1 4719.874 2565.532 2173.736
"PSID149" 0 51 12 "white" 0 0 20742.76 2538.677 1019.631
"PSID150" 0 22 11 "hispan" 0 1 7341.373 2535.097 14187.65
"PSID151" 0 19 12 "white" 0 0 336.9939 2518.984 7118.209
"PSID152" 0 52 0 "hispan" 1 1 773.9104 2506.452 0
"PSID153" 0 21 12 "white" 0 0 2903.633 2456.323 4787.834
"PSID154" 0 24 12 "white" 0 0 9784.578 2413.355 0
"PSID155" 0 35 8 "white" 1 1 2241.401 2399.032 9460.406
"PSID156" 0 20 13 "white" 0 0 0 2352.484 0
"PSID157" 0 17 7 "black" 0 1 1054.086 2286.242 1613.677
"PSID158" 0 18 10 "black" 0 1 311.5234 2284.452 8154.095
"PSID159" 0 28 12 "black" 0 0 6285.328 2255.806 7310.313
"PSID160" 0 25 14 "hispan" 1 0 1622.273 2239.694 1892.968
"PSID161" 0 40 12 "hispan" 0 0 13616.9 2228.952 876.2919
"PSID162" 0 50 3 "white" 1 1 3136.786 2203.887 13976.34
"PSID163" 0 48 8 "white" 1 1 16050.31 2116.161 11600.15
"PSID164" 0 17 7 "hispan" 0 1 0 2082.145 6460.621
"PSID165" 0 30 12 "white" 1 0 7347.251 2080.355 14475.81
"PSID166" 0 30 7 "white" 1 1 574.0652 2010.532 366.4762
"PSID167" 0 22 11 "white" 1 1 3030.986 1976.516 0
"PSID168" 0 27 12 "white" 1 0 11493.06 1906.694 13419.24
"PSID169" 0 25 9 "white" 1 1 23377.97 1901.323 1898.879
"PSID170" 0 21 14 "white" 0 0 80.32994 1890.581 6389.69
"PSID171" 0 17 10 "white" 0 1 0 1888.79 19993.64
"PSID172" 0 39 7 "white" 0 1 7786.126 1844.032 9206.237
"PSID173" 0 18 9 "black" 0 1 1183.397 1822.548 803.8833
"PSID174" 0 25 12 "white" 1 0 2721.422 1754.516 1037.364
"PSID175" 0 20 8 "white" 1 1 2360.916 1741.984 0
"PSID176" 0 19 13 "white" 0 0 2366.794 1709.758 0
"PSID177" 0 19 11 "white" 0 1 0 1693.645 9853.481
"PSID178" 0 22 12 "white" 0 0 10137.25 1679.323 25564.67
"PSID179" 0 18 11 "black" 0 1 2068.986 1623.823 20243.38
"PSID180" 0 21 10 "white" 0 1 1767.259 1555.79 7675.312
"PSID181" 0 24 12 "white" 1 0 7643.1 1546.839 3262.82
"PSID182" 0 18 11 "white" 0 1 1273.523 1532.516 12489.75
"PSID183" 0 17 10 "white" 0 1 568.1874 1525.355 6231.573
"PSID184" 0 17 10 "white" 0 1 0 1503.871 7843.773
"PSID185" 0 18 10 "white" 0 1 0 1491.339 237.914
"PSID186" 0 53 10 "hispan" 0 1 7878.212 1489.548 13170.98
"PSID187" 0 18 11 "black" 0 1 1191.234 1478.806 3683.972
"PSID188" 0 17 10 "hispan" 0 1 0 1453.742 6918.716
"PSID189" 0 26 12 "black" 0 0 0 1448.371 0
"PSID190" 0 39 5 "white" 1 1 13082.02 1434.048 18323.81
"PSID191" 0 18 12 "black" 0 0 1579.169 1408.984 3057.416
"PSID192" 0 23 13 "white" 0 0 601.4949 1394.661 4975.505
"PSID193" 0 18 8 "white" 0 1 5023.56 1391.081 6756.166
"PSID194" 0 28 10 "white" 1 1 7578.444 1383.919 2404.261
"PSID195" 0 32 4 "white" 1 1 0 1378.548 0
"PSID196" 0 18 11 "black" 0 1 0 1367.806 33.98771
"PSID197" 0 40 10 "white" 1 1 1543.902 1342.742 0
"PSID198" 0 21 14 "white" 0 0 8456.196 1330.21 16967.26
"PSID199" 0 29 10 "hispan" 0 1 3732.403 1323.048 6694.101
"PSID200" 0 31 6 "white" 0 1 2666.562 1321.258 0
"PSID201" 0 46 7 "white" 1 1 19171.43 1317.677 0
"PSID202" 0 20 9 "hispan" 1 1 0 1283.661 0
"PSID203" 0 36 18 "white" 1 0 3273.935 1269.339 18227.76
"PSID204" 0 45 12 "white" 1 0 16559.72 1265.758 7987.112
"PSID205" 0 16 10 "white" 0 1 1026.656 1224.581 6847.785
"PSID206" 0 18 12 "white" 0 0 818.9735 1208.468 2232.845
"PSID207" 0 40 12 "hispan" 0 0 11867.28 1195.935 3873.121
"PSID208" 0 16 9 "white" 0 1 0 1188.774 2451.548
"PSID209" 0 16 10 "white" 0 1 574.0652 1181.613 5578.418
"PSID210" 0 28 5 "hispan" 1 1 10967.98 1178.032 239.3917
"PSID211" 0 20 12 "white" 0 0 0 1147.597 15554.55
"PSID212" 0 19 8 "white" 1 1 39.18534 1136.855 5327.204
"PSID213" 0 16 8 "white" 0 1 0 1113.581 542.3257
"PSID214" 0 20 11 "white" 1 1 2547.047 1099.258 0
"PSID215" 0 35 10 "white" 1 1 4964.782 1086.726 1745.195
"PSID216" 0 32 6 "hispan" 1 1 979.6334 1036.597 0
"PSID217" 0 32 16 "black" 0 0 17135.75 1031.226 0
"PSID218" 0 17 9 "black" 0 1 0 981.0968 8900.347
"PSID219" 0 16 7 "white" 0 1 0 975.7258 4728.725
"PSID220" 0 32 15 "white" 0 0 489.8167 968.5645 7684.178
"PSID221" 0 19 12 "white" 0 0 815.055 964.9839 12059.73
"PSID222" 0 40 12 "white" 1 0 16851.65 961.4032 17717.94
"PSID223" 0 50 7 "white" 1 1 11473.47 956.0323 0
"PSID224" 0 39 11 "white" 0 1 0 930.9677 0
"PSID225" 0 18 8 "hispan" 0 1 0 902.3226 1306.31
"PSID226" 0 39 10 "black" 0 1 844.444 889.7903 701.9201
"PSID227" 0 17 11 "hispan" 0 1 0 873.6774 7759.542
"PSID228" 0 17 5 "black" 0 1 96.00407 868.3065 0
"PSID229" 0 19 12 "white" 0 0 2425.572 861.1452 2587.499
"PSID230" 0 27 15 "white" 0 0 0 857.5645 3392.86
"PSID231" 0 18 11 "black" 0 1 587.78 841.4516 7933.914
"PSID232" 0 20 14 "white" 1 0 0 805.6452 1454.083
"PSID233" 0 20 12 "white" 1 0 12145.49 791.3226 13683.75
"PSID234" 0 19 13 "black" 0 0 1714.358 785.9516 9067.33
"PSID235" 0 24 8 "white" 1 1 213.5601 760.8871 2340.719
"PSID236" 0 27 12 "white" 1 0 4222.22 751.9355 0
"PSID237" 0 19 9 "white" 0 1 773.9104 676.7419 5647.871
"PSID238" 0 52 8 "black" 1 1 5454.599 666 0
"PSID239" 0 18 11 "hispan" 0 1 0 630.1935 0
"PSID240" 0 16 10 "hispan" 0 1 0 630.1935 3892.332
"PSID241" 0 18 12 "hispan" 0 0 0 630.1935 4843.988
"PSID242" 0 45 12 "white" 0 0 4473.006 608.7097 0
"PSID243" 0 21 14 "white" 0 0 9708.167 594.3871 2256.488
"PSID244" 0 36 8 "white" 1 1 2715.544 585.4355 0
"PSID245" 0 21 13 "white" 0 0 513.3279 578.2742 0
"PSID246" 0 41 7 "white" 1 1 19573.08 565.7419 0
"PSID247" 0 18 7 "white" 0 1 491.776 558.5806 642.8111
"PSID248" 0 39 9 "white" 0 1 11230.52 537.0968 5752.79
"PSID249" 0 19 3 "white" 1 1 0 537.0968 0
"PSID250" 0 32 13 "white" 1 0 12553.02 524.5645 15353.58
"PSID251" 0 16 9 "white" 0 1 0 485.1774 4112.513
"PSID252" 0 16 7 "white" 0 1 658.3136 479.8065 6210.885
"PSID253" 0 21 9 "black" 0 1 1030.574 470.8548 1223.558
"PSID254" 0 22 12 "white" 1 0 12096.51 469.0645 14289.62
"PSID255" 0 23 11 "hispan" 1 1 8946.012 469.0645 4776.012
"PSID256" 0 17 8 "black" 0 1 0 451.1613 0
"PSID257" 0 21 8 "white" 1 1 5699.507 388.5 8844.194
"PSID258" 0 18 10 "white" 0 1 0 386.7097 0
"PSID259" 0 24 12 "white" 1 0 9051.813 327.629 8547.171
"PSID260" 0 24 12 "black" 1 0 4232.016 320.4677 1273.8
"PSID261" 0 16 9 "white" 0 1 0 320.4677 3707.616
"PSID262" 0 20 8 "white" 1 1 621.0876 306.1452 5551.819
"PSID263" 0 42 8 "white" 0 1 17925.33 300.7742 14116.72
"PSID264" 0 17 8 "hispan" 0 1 391.8534 300.7742 18891.26
"PSID265" 0 19 8 "hispan" 0 1 368.3422 300.7742 18510
"PSID266" 0 17 9 "black" 0 1 0 297.1935 54.67588
"PSID267" 0 21 14 "white" 0 0 107.7597 293.6129 7698.955
"PSID268" 0 16 9 "black" 0 1 0 277.5 3983.951
"PSID269" 0 23 13 "black" 0 0 172.4155 272.129 582.2243
"PSID270" 0 16 9 "white" 0 1 411.446 254.2258 1725.985
"PSID271" 0 17 11 "hispan" 0 1 803.2994 248.8548 5173.521
"PSID272" 0 46 7 "white" 0 1 1081.515 245.2742 0
"PSID273" 0 32 10 "white" 1 1 4145.809 238.1129 8245.714
"PSID274" 0 18 11 "white" 0 1 131.2709 218.4194 7503.896
"PSID275" 0 23 12 "hispan" 1 0 0 216.629 0
"PSID276" 0 18 10 "white" 1 1 0 211.2581 14053.18
"PSID277" 0 19 10 "black" 0 1 1056.045 205.8871 0
"PSID278" 0 16 7 "black" 0 1 133.2301 205.8871 6145.865
"PSID279" 0 26 7 "white" 1 1 1538.024 189.7742 650.1997
"PSID280" 0 16 10 "white" 0 1 0 189.7742 2136.793
"PSID281" 0 17 10 "white" 0 1 0 182.6129 6423.677
"PSID282" 0 17 10 "white" 0 1 0 171.871 1483.637
"PSID283" 0 23 8 "white" 1 1 33.30754 166.5 0
"PSID284" 0 29 12 "white" 1 0 14641.6 162.9194 9473.705
"PSID285" 0 17 10 "white" 0 1 0 152.1774 10301.23
"PSID286" 0 49 8 "white" 1 1 14684.7 136.0645 14963.46
"PSID287" 0 20 10 "white" 1 1 6563.544 134.2742 15363.92
"PSID288" 0 40 16 "white" 1 0 0 114.5806 0
"PSID289" 0 19 10 "white" 0 1 1933.796 112.7903 675.321
"PSID290" 0 18 11 "white" 0 1 1481.206 57.29032 1421.573
"PSID291" 0 16 6 "black" 0 1 0 44.75806 0
"PSID292" 0 22 8 "white" 1 1 105.8004 42.96774 209.8372
"PSID293" 0 31 12 "black" 1 0 0 42.96774 11023.84
"PSID294" 0 20 11 "white" 1 1 4478.884 39.3871 6280.338
"PSID295" 0 17 11 "hispan" 0 1 601.4949 10.74194 1913.656
"PSID296" 0 50 12 "white" 1 0 25862.32 0 25564.67
"PSID297" 0 49 14 "white" 1 0 25862.32 0 25564.67
"PSID298" 0 47 9 "white" 1 1 25862.32 0 25564.67
"PSID299" 0 34 11 "hispan" 1 1 22198.49 0 0
"PSID300" 0 22 8 "black" 1 1 16961.37 0 959.0445
"PSID301" 0 27 12 "white" 1 0 15509.56 0 12593.19
"PSID302" 0 30 10 "white" 1 1 14913.94 0 11563.21
"PSID303" 0 52 12 "white" 1 0 14780.71 0 25564.67
"PSID304" 0 43 12 "white" 1 0 13321.05 0 16860.86
"PSID305" 0 27 9 "hispan" 1 1 12829.28 0 0
"PSID306" 0 35 13 "white" 0 0 9537.711 0 11269.14
"PSID307" 0 45 12 "white" 1 0 9277.128 0 12108.49
"PSID308" 0 22 11 "black" 1 1 9049.853 0 9088.018
"PSID309" 0 22 12 "white" 1 0 9022.424 0 3342.618
"PSID310" 0 23 11 "white" 1 1 8910.745 0 4183.444
"PSID311" 0 55 7 "white" 1 1 8832.375 0 0
"PSID312" 0 26 14 "white" 0 0 8411.132 0 0
"PSID313" 0 34 12 "white" 0 0 8125.079 0 6032.08
"PSID314" 0 22 11 "white" 0 1 8013.401 0 5748.356
"PSID315" 0 31 12 "white" 0 0 6156.016 0 4094.78
"PSID316" 0 19 12 "white" 0 0 5797.47 0 2160.436
"PSID317" 0 24 10 "white" 1 1 5523.173 0 5040.525
"PSID318" 0 36 12 "white" 1 0 5374.269 0 0
"PSID319" 0 20 9 "white" 1 1 5229.283 0 15892.95
"PSID320" 0 23 8 "white" 1 1 4610.155 0 0
"PSID321" 0 35 11 "white" 1 1 3975.352 0 21963.45
"PSID322" 0 23 12 "white" 0 0 3893.063 0 16324.45
"PSID323" 0 29 10 "white" 0 1 3751.996 0 251.2135
"PSID324" 0 24 9 "white" 1 1 3438.513 0 818.6605
"PSID325" 0 18 10 "white" 0 1 3360.143 0 0
"PSID326" 0 45 8 "black" 0 1 3299.405 0 31.03226
"PSID327" 0 21 13 "hispan" 0 0 3015.312 0 17627.8
"PSID328" 0 29 13 "white" 1 0 2780.2 0 14339.86
"PSID329" 0 21 15 "white" 0 0 2629.336 0 1717.118
"PSID330" 0 22 16 "black" 0 0 2564.68 0 116.7404
"PSID331" 0 24 12 "black" 1 0 2355.039 0 2448.593
"PSID332" 0 20 14 "white" 0 0 2210.053 0 2813.591
"PSID333" 0 19 6 "black" 0 1 1955.348 0 14998.92
"PSID334" 0 19 9 "hispan" 0 1 1822.118 0 3372.172
"PSID335" 0 19 12 "black" 0 0 1681.051 0 0
"PSID336" 0 20 13 "white" 0 0 1657.54 0 913.235
"PSID337" 0 19 12 "black" 0 0 1655.58 0 0
"PSID338" 0 26 5 "white" 1 1 1573.291 0 3700.227
"PSID339" 0 26 9 "hispan" 0 1 1563.495 0 2862.356
"PSID340" 0 23 12 "white" 0 0 1504.717 0 0
"PSID341" 0 20 9 "hispan" 0 1 1500.798 0 12618.31
"PSID342" 0 20 10 "white" 0 1 1412.631 0 6290.682
"PSID343" 0 36 11 "white" 1 1 1404.794 0 0
"PSID344" 0 39 12 "white" 1 0 1289.198 0 1202.869
"PSID345" 0 17 9 "black" 0 1 1222.582 0 422.6298
"PSID346" 0 55 3 "white" 0 1 1208.868 0 0
"PSID347" 0 28 8 "white" 1 1 1202.99 0 19516.33
"PSID348" 0 19 12 "hispan" 0 0 1058.004 0 8923.991
"PSID349" 0 37 7 "white" 1 1 963.9593 0 0
"PSID350" 0 16 9 "white" 1 1 920.8554 0 15997.87
"PSID351" 0 17 10 "white" 0 1 646.558 0 9438.24
"PSID352" 0 24 12 "black" 0 0 566.2281 0 2284.565
"PSID353" 0 19 11 "white" 0 1 540.7576 0 3406.16
"PSID354" 0 50 5 "black" 1 1 411.446 0 9166.338
"PSID355" 0 19 9 "black" 0 1 384.0163 0 0
"PSID356" 0 36 1 "black" 0 1 348.7495 0 0
"PSID357" 0 18 11 "white" 0 1 321.3198 0 7722.599
"PSID358" 0 16 7 "hispan" 0 1 289.9715 0 7515.717
"PSID359" 0 21 11 "white" 1 1 246.8676 0 6708.879
"PSID360" 0 55 6 "white" 1 1 111.6782 0 0
"PSID361" 0 37 12 "white" 0 0 48.98167 0 877.7696
"PSID362" 0 26 12 "hispan" 1 0 47.0224 0 0
"PSID363" 0 54 12 "white" 1 0 0 0 0
"PSID364" 0 50 12 "white" 1 0 0 0 0
"PSID365" 0 16 8 "white" 0 1 0 0 2559.422
"PSID366" 0 16 9 "hispan" 0 1 0 0 0
"PSID367" 0 18 10 "black" 0 1 0 0 2281.61
"PSID368" 0 40 11 "black" 1 1 0 0 0
"PSID369" 0 16 8 "white" 0 1 0 0 0
"PSID370" 0 16 9 "black" 0 1 0 0 2158.959
"PSID371" 0 26 14 "white" 0 0 0 0 6717.745
"PSID372" 0 20 9 "black" 0 1 0 0 6083.8
"PSID373" 0 20 12 "black" 0 0 0 0 0
"PSID374" 0 18 11 "black" 0 1 0 0 0
"PSID375" 0 46 11 "black" 1 1 0 0 2820.98
"PSID376" 0 17 8 "black" 0 1 0 0 12760.17
"PSID377" 0 16 9 "white" 0 1 0 0 4974.028
"PSID378" 0 30 10 "white" 1 1 0 0 3151.991
"PSID379" 0 33 12 "hispan" 1 0 0 0 5841.453
"PSID380" 0 34 12 "black" 1 0 0 0 18716.88
"PSID381" 0 21 13 "black" 0 0 0 0 17941.08
"PSID382" 0 29 11 "white" 1 1 0 0 0
"PSID383" 0 19 12 "white" 0 0 0 0 0
"PSID384" 0 31 4 "hispan" 0 1 0 0 1161.493
"PSID385" 0 19 12 "hispan" 0 0 0 0 18573.55
"PSID386" 0 20 12 "black" 0 0 0 0 11594.24
"PSID387" 0 55 4 "black" 0 1 0 0 0
"PSID388" 0 19 11 "black" 0 1 0 0 16485.52
"PSID389" 0 18 11 "black" 0 1 0 0 7146.286
"PSID390" 0 48 13 "white" 1 0 0 0 0
"PSID391" 0 16 9 "hispan" 1 1 0 0 6821.186
"PSID392" 0 17 10 "black" 0 1 0 0 0
"PSID393" 0 38 12 "white" 1 0 0 0 18756.78
"PSID394" 0 34 8 "white" 1 1 0 0 2664.341
"PSID395" 0 53 12 "white" 0 0 0 0 0
"PSID396" 0 48 14 "white" 1 0 0 0 7236.427
"PSID397" 0 16 9 "white" 0 1 0 0 6494.608
"PSID398" 0 17 8 "black" 0 1 0 0 4520.366
"PSID399" 0 27 14 "black" 0 0 0 0 10122.43
"PSID400" 0 37 8 "black" 0 1 0 0 648.722
"PSID401" 0 17 10 "black" 0 1 0 0 1053.619
"PSID402" 0 16 8 "white" 0 1 0 0 0
"PSID403" 0 48 12 "white" 1 0 0 0 1491.026
"PSID404" 0 55 7 "white" 0 1 0 0 0
"PSID405" 0 21 15 "white" 0 0 0 0 0
"PSID406" 0 16 10 "black" 0 1 0 0 1730.418
"PSID407" 0 23 12 "white" 0 0 0 0 3902.676
"PSID408" 0 46 11 "black" 1 1 0 0 0
"PSID409" 0 17 10 "white" 0 1 0 0 14942.77
"PSID410" 0 42 16 "white" 0 0 0 0 23764.8
"PSID411" 0 18 10 "black" 0 1 0 0 5306.516
"PSID412" 0 53 12 "black" 0 0 0 0 0
"PSID413" 0 17 10 "white" 1 1 0 0 3859.822
"PSID414" 0 17 6 "white" 0 1 0 0 0
"PSID415" 0 43 6 "white" 1 1 0 0 0
"PSID416" 0 34 12 "black" 0 0 0 0 0
"PSID417" 0 16 8 "hispan" 0 1 0 0 12242.96
"PSID418" 0 27 12 "white" 1 0 0 0 1533.88
"PSID419" 0 51 4 "black" 0 1 0 0 0
"PSID420" 0 39 2 "black" 1 1 0 0 964.9555
"PSID421" 0 55 8 "white" 1 1 0 0 0
"PSID422" 0 16 9 "white" 0 1 0 0 5551.819
"PSID423" 0 27 10 "black" 0 1 0 0 7543.794
"PSID424" 0 25 14 "white" 0 0 0 0 0
"PSID425" 0 18 11 "white" 0 1 0 0 10150.5
"PSID426" 0 24 1 "hispan" 1 1 0 0 19464.61
"PSID427" 0 21 18 "white" 0 0 0 0 0
"PSID428" 0 32 5 "black" 1 1 0 0 187.6713
"PSID429" 0 16 9 "white" 0 1 0 0 1495.459
MatchIt/man/ 0000755 0001762 0000144 00000000000 14512071217 012356 5 ustar ligges users MatchIt/man/matchit.Rd 0000644 0001762 0000144 00000054426 14435663335 014325 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/matchit.R
\name{matchit}
\alias{matchit}
\alias{print.matchit}
\title{Matching for Causal Inference}
\usage{
matchit(
formula,
data = NULL,
method = "nearest",
distance = "glm",
link = "logit",
distance.options = list(),
estimand = "ATT",
exact = NULL,
mahvars = NULL,
antiexact = NULL,
discard = "none",
reestimate = FALSE,
s.weights = NULL,
replace = FALSE,
m.order = NULL,
caliper = NULL,
std.caliper = TRUE,
ratio = 1,
verbose = FALSE,
include.obj = FALSE,
...
)
\method{print}{matchit}(x, ...)
}
\arguments{
\item{formula}{a two-sided \code{\link{formula}} object containing the treatment and
covariates to be used in creating the distance measure used in the matching.
This formula will be supplied to the functions that estimate the distance
measure. The formula should be specified as \code{A ~ X1 + X2 + ...} where
\code{A} represents the treatment variable and \code{X1} and \code{X2} are
covariates.}
\item{data}{a data frame containing the variables named in \code{formula}
and possible other arguments. If not found in \code{data}, the variables
will be sought in the environment.}
\item{method}{the matching method to be used. The allowed methods are
\code{\link[=method_nearest]{"nearest"}} for nearest neighbor matching (on
the propensity score by default), \code{\link[=method_optimal]{"optimal"}}
for optimal pair matching, \code{\link[=method_full]{"full"}} for optimal
full matching, \code{\link[=method_genetic]{"genetic"}} for genetic
matching, \code{\link[=method_cem]{"cem"}} for coarsened exact matching,
\code{\link[=method_exact]{"exact"}} for exact matching,
\code{\link[=method_cardinality]{"cardinality"}} for cardinality and
template matching, and \code{\link[=method_subclass]{"subclass"}} for
subclassification. When set to \code{NULL}, no matching will occur, but
propensity score estimation and common support restrictions will still occur
if requested. See the linked pages for each method for more details on what
these methods do, how the arguments below are used by each on, and what
additional arguments are allowed.}
\item{distance}{the distance measure to be used. Can be either the name of a
method of estimating propensity scores (e.g., \code{"glm"}), the name of a
method of computing a distance matrix from the covariates (e.g.,
\code{"mahalanobis"}), a vector of already-computed distance measures, or a
matrix of pairwise distances. See \code{\link{distance}} for allowable
options. The default is \code{"glm"} for propensity scores estimated with
logistic regression using \code{\link[=glm]{glm()}}. Ignored for some methods; see individual
methods pages for information on whether and how the distance measure is
used.}
\item{link}{when \code{distance} is specified as a string, an additional
argument controlling the link function used in estimating the distance
measure. Allowable options depend on the specific \code{distance} value
specified. See \code{\link{distance}} for allowable options with each
option. The default is \code{"logit"}, which, along with \code{distance = "glm"}, identifies the default measure as logistic regression propensity
scores.}
\item{distance.options}{a named list containing additional arguments
supplied to the function that estimates the distance measure as determined
by the argument to \code{distance}. See \link{distance} for an
example of its use.}
\item{estimand}{a string containing the name of the target estimand desired.
Can be one of \code{"ATT"} or \code{"ATC"}. Some methods accept \code{"ATE"}
as well. Default is \code{"ATT"}. See Details and the individual methods
pages for information on how this argument is used.}
\item{exact}{for methods that allow it, for which variables exact matching
should take place. Can be specified as a string containing the names of
variables in \code{data} to be used or a one-sided formula with the desired
variables on the right-hand side (e.g., \code{~ X3 + X4}). See the
individual methods pages for information on whether and how this argument is
used.}
\item{mahvars}{for methods that allow it, on which variables Mahalanobis
distance matching should take place when \code{distance} corresponds to
propensity scores. Usually used to perform Mahalanobis distance matching
within propensity score calipers, where the propensity scores are computed
using \code{formula} and \code{distance}. Can be specified as a string
containing the names of variables in \code{data} to be used or a one-sided
formula with the desired variables on the right-hand side (e.g., \code{~ X3 + X4}). See the individual methods pages for information on whether and how
this argument is used.}
\item{antiexact}{for methods that allow it, for which variables anti-exact
matching should take place. Anti-exact matching ensures paired individuals
do not have the same value of the anti-exact matching variable(s). Can be
specified as a string containing the names of variables in \code{data} to be
used or a one-sided formula with the desired variables on the right-hand
side (e.g., \code{~ X3 + X4}). See the individual methods pages for
information on whether and how this argument is used.}
\item{discard}{a string containing a method for discarding units outside a
region of common support. When a propensity score is estimated or supplied
to \code{distance} as a vector, the options are \code{"none"},
\code{"treated"}, \code{"control"}, or \code{"both"}. For \code{"none"}, no
units are discarded for common support. Otherwise, units whose propensity
scores fall outside the corresponding region are discarded. Can also be a
\code{logical} vector where \code{TRUE} indicates the unit is to be
discarded. Default is \code{"none"} for no common support restriction. See
Details.}
\item{reestimate}{if \code{discard} is not \code{"none"} and propensity
scores are estimated, whether to re-estimate the propensity scores in the
remaining sample. Default is \code{FALSE} to use the propensity scores
estimated in the original sample.}
\item{s.weights}{an optional numeric vector of sampling weights to be
incorporated into propensity score models and balance statistics. Can also
be specified as a string containing the name of variable in \code{data} to
be used or a one-sided formula with the variable on the right-hand side
(e.g., \code{~ SW}). Not all propensity score models accept sampling
weights; see \link{distance} for information on which do and do not,
and see \code{vignette("sampling-weights")} for details on how to use
sampling weights in a matching analysis.}
\item{replace}{for methods that allow it, whether matching should be done
with replacement (\code{TRUE}), where control units are allowed to be
matched to several treated units, or without replacement (\code{FALSE}),
where control units can only be matched to one treated unit each. See the
individual methods pages for information on whether and how this argument is
used. Default is \code{FALSE} for matching without replacement.}
\item{m.order}{for methods that allow it, the order that the matching takes
place. Allowable options depend on the matching method. The default of
\code{NULL} corresponds to \code{"largest"} when a propensity score is
estimated or supplied as a vector and \code{"data"} otherwise.}
\item{caliper}{for methods that allow it, the width(s) of the caliper(s) to
use in matching. Should be a numeric vector with each value named according
to the variable to which the caliper applies. To apply to the distance
measure, the value should be unnamed. See the individual methods pages for
information on whether and how this argument is used. The default is
\code{NULL} for no caliper.}
\item{std.caliper}{\code{logical}; when a caliper is specified, whether the
the caliper is in standard deviation units (\code{TRUE}) or raw units
(\code{FALSE}). Can either be of length 1, applying to all calipers, or of
length equal to the length of \code{caliper}. Default is \code{TRUE}.}
\item{ratio}{for methods that allow it, how many control units should be
matched to each treated unit in k:1 matching. Should be a single integer
value. See the individual methods pages for information on whether and how
this argument is used. The default is 1 for 1:1 matching.}
\item{verbose}{\code{logical}; whether information about the matching
process should be printed to the console. What is printed depends on the
matching method. Default is \code{FALSE} for no printing other than
warnings.}
\item{include.obj}{\code{logical}; whether to include any objects created in
the matching process in the output, i.e., by the functions from other
packages \code{matchit()} calls. What is included depends on the matching
method. Default is \code{FALSE}.}
\item{\dots}{additional arguments passed to the functions used in the
matching process. See the individual methods pages for information on what
additional arguments are allowed for each method. Ignored for \code{print()}.}
\item{x}{a \code{matchit} object.}
}
\value{
When \code{method} is something other than \code{"subclass"}, a
\code{matchit} object with the following components:
\item{match.matrix}{a matrix containing the matches. The rownames correspond
to the treated units and the values in each row are the names (or indices)
of the control units matched to each treated unit. When treated units are
matched to different numbers of control units (e.g., with exact matching or
matching with a caliper), empty spaces will be filled with \code{NA}. Not
included when \code{method} is \code{"full"}, \code{"cem"} (unless \code{k2k = TRUE}), \code{"exact"}, or \code{"cardinality"}.}
\item{subclass}{a factor
containing matching pair/stratum membership for each unit. Unmatched units
will have a value of \code{NA}. Not included when \code{replace = TRUE}.}
\item{weights}{a numeric vector of estimated matching weights. Unmatched and
discarded units will have a weight of zero.}
\item{model}{the fit object of
the model used to estimate propensity scores when \code{distance} is
specified and not \code{"mahalanobis"} or a numeric vector. When
\code{reestimate = TRUE}, this is the model estimated after discarding
units.}
\item{X}{a data frame of covariates mentioned in \code{formula},
\code{exact}, \code{mahvars}, and \code{antiexact}.}
\item{call}{the \code{matchit()} call.}
\item{info}{information on the matching method and
distance measures used.}
\item{estimand}{the argument supplied to
\code{estimand}.}
\item{formula}{the \code{formula} supplied.}
\item{treat}{a vector of treatment status converted to zeros (0) and ones
(1) if not already in that format.}
\item{distance}{a vector of distance
values (i.e., propensity scores) when \code{distance} is supplied as a
method of estimating propensity scores or a numeric vector.}
\item{discarded}{a logical vector denoting whether each observation was
discarded (\code{TRUE}) or not (\code{FALSE}) by the argument to
\code{discard}.}
\item{s.weights}{the vector of sampling weights supplied to
the \code{s.weights} argument, if any.}
\item{exact}{a one-sided formula
containing the variables, if any, supplied to \code{exact}.}
\item{mahvars}{a one-sided formula containing the variables, if any,
supplied to \code{mahvars}.}
\item{obj}{when \code{include.obj = TRUE}, an
object containing the intermediate results of the matching procedure. See
the individual methods pages for what this component will contain.}
When \code{method = "subclass"}, a \code{matchit.subclass} object with the same
components as above except that \code{match.matrix} is excluded and one
additional component, \code{q.cut}, is included, containing a vector of the
distance measure cutpoints used to define the subclasses. See
\code{\link{method_subclass}} for details.
}
\description{
\code{matchit()} is the main function of \emph{MatchIt} and performs
pairing, subset selection, and subclassification with the aim of creating
treatment and control groups balanced on included covariates. \emph{MatchIt}
implements the suggestions of Ho, Imai, King, and Stuart (2007) for
improving parametric statistical models by preprocessing data with
nonparametric matching methods.
This page documents the overall use of \code{matchit()}, but for specifics
of how \code{matchit()} works with individual matching methods, see the
individual pages linked in the Details section below.
}
\details{
Details for the various matching methods can be found at the following help
pages:
\itemize{
\item \code{\link{method_nearest}} for nearest neighbor matching
\item \code{\link{method_optimal}} for optimal pair matching
\item \code{\link{method_full}} for optimal full matching
\item \code{\link{method_genetic}} for genetic matching
\item \code{\link{method_cem}} for coarsened exact matching
\item \code{\link{method_exact}} for exact matching
\item \code{\link{method_cardinality}} for cardinality and template matching
\item \code{\link{method_subclass}} for subclassification
}
The pages contain information on what the method does, which of the arguments above are
allowed with them and how they are interpreted, and what additional
arguments can be supplied to further tune the method. Note that the default
method with no arguments supplied other than \code{formula} and \code{data}
is 1:1 nearest neighbor matching without replacement on a propensity score
estimated using a logistic regression of the treatment on the covariates.
This is not the same default offered by other matching programs, such as
those in \emph{Matching}, \code{teffects} in Stata, or \verb{PROC PSMATCH}
in SAS, so care should be taken if trying to replicate the results of those
programs.
When \code{method = NULL}, no matching will occur, but any propensity score
estimation and common support restriction will. This can be a simple way to
estimate the propensity score for use in future matching specifications
without having to re-estimate it each time. The \code{matchit()} output with
no matching can be supplied to \code{summary()} to examine balance prior to
matching on any of the included covariates and on the propensity score if
specified. All arguments other than \code{distance}, \code{discard}, and
\code{reestimate} will be ignored.
See \link{distance} for details on the several ways to
specify the \code{distance}, \code{link}, and \code{distance.options}
arguments to estimate propensity scores and create distance measures.
When the treatment variable is not a \code{0/1} variable, it will be coerced
to one and returned as such in the \code{matchit()} output (see section
Value, below). The following rules are used: 1) if \code{0} is one of the
values, it will be considered the control and the other value the treated;
2) otherwise, if the variable is a factor, \code{levels(treat)[1]} will be
considered control and the other variable the treated; 3) otherwise,
\code{sort(unique(treat))[1]} will be considered control and the other value
the treated. It is safest to ensure the treatment variable is a \code{0/1}
variable.
The \code{discard} option implements a common support restriction. It can
only be used when a distance measure is an estimated propensity score or supplied as a vector and is ignored for some matching
methods. When specified as \code{"treated"}, treated units whose distance
measure is outside the range of distance measures of the control units will
be discarded. When specified as \code{"control"}, control units whose
distance measure is outside the range of distance measures of the treated
units will be discarded. When specified as \code{"both"}, treated and
control units whose distance measure is outside the intersection of the
range of distance measures of the treated units and the range of distance
measures of the control units will be discarded. When \code{reestimate = TRUE} and \code{distance} corresponds to a propensity score-estimating
function, the propensity scores are re-estimated in the remaining units
prior to being used for matching or calipers.
Caution should be used when interpreting effects estimated with various
values of \code{estimand}. Setting \code{estimand = "ATT"} doesn't
necessarily mean the average treatment effect in the treated is being
estimated; it just means that for matching methods, treated units will be
untouched and given weights of 1 and control units will be matched to them
(and the opposite for \code{estimand = "ATC"}). If a caliper is supplied or
treated units are removed for common support or some other reason (e.g.,
lacking matches when using exact matching), the actual estimand targeted is
not the ATT but the treatment effect in the matched sample. The argument to
\code{estimand} simply triggers which units are matched to which, and for
stratification-based methods (exact matching, CEM, full matching, and
subclassification), determines the formula used to compute the
stratification weights.
\subsection{How Matching Weights Are Computed}{
Matching weights are computed in one of two ways depending on whether matching was done with replacement
or not.
For matching \emph{without} replacement (except for cardinality matching), each
unit is assigned to a subclass, which represents the pair they are a part of
(in the case of k:1 matching) or the stratum they belong to (in the case of
exact matching, coarsened exact matching, full matching, or
subclassification). The formula for computing the weights depends on the
argument supplied to \code{estimand}. A new "stratum propensity score"
(\code{sp}) is computed as the proportion of units in each stratum that are
in the treated group, and all units in that stratum are assigned that
stratum propensity score. This is distinct from the propensity score used for matching, if any. Weights are then computed using the standard formulas for
inverse probability weights with the stratum propensity score inserted: for the ATT, weights are 1 for the treated
units and \code{sp/(1-sp)} for the control units; for the ATC, weights are
\code{(1-sp)/sp} for the treated units and 1 for the control units; for the
ATE, weights are \code{1/sp} for the treated units and \code{1/(1-sp)} for the
control units. For cardinality matching, all matched units receive a weight
of 1.
For matching \emph{with} replacement, units are not assigned to unique strata. For
the ATT, each treated unit gets a weight of 1. Each control unit is weighted
as the sum of the inverse of the number of control units matched to the same
treated unit across its matches. For example, if a control unit was matched
to a treated unit that had two other control units matched to it, and that
same control was matched to a treated unit that had one other control unit
matched to it, the control unit in question would get a weight of 1/3 + 1/2
= 5/6. For the ATC, the same is true with the treated and control labels
switched. The weights are computed using the \code{match.matrix} component
of the \code{matchit()} output object.
In each treatment group, weights are divided by the mean of the nonzero
weights in that treatment group to make the weights sum to the number of
units in that treatment group. If sampling weights are included through the
\code{s.weights} argument, they will be included in the \code{matchit()}
output object but not incorporated into the matching weights.
\code{\link[=match.data]{match.data()}}, which extracts the matched set from a \code{matchit} object,
combines the matching weights and sampling weights.
}
}
\examples{
data("lalonde")
# Default: 1:1 NN PS matching w/o replacement
m.out1 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde)
m.out1
summary(m.out1)
# 1:1 NN Mahalanobis distance matching w/ replacement and
# exact matching on married and race
m.out2 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
distance = "mahalanobis", replace = TRUE,
exact = ~ married + race)
m.out2
summary(m.out2, un = TRUE)
# 2:1 NN Mahalanobis distance matching within caliper defined
# by a probit pregression PS
m.out3 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
distance = "glm", link = "probit",
mahvars = ~ age + educ + re74 + re75,
caliper = .1, ratio = 2)
m.out3
summary(m.out3, un = TRUE)
# Optimal full PS matching for the ATE within calipers on
# PS, age, and educ
\dontshow{if (requireNamespace("optmatch", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
m.out4 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "full", estimand = "ATE",
caliper = c(.1, age = 2, educ = 1),
std.caliper = c(TRUE, FALSE, FALSE))
m.out4
summary(m.out4, un = TRUE)
\dontshow{\}) # examplesIf}
# Subclassification on a logistic PS with 10 subclasses after
# discarding controls outside common support of PS
s.out1 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "subclass", distance = "glm",
discard = "control", subclass = 10)
s.out1
summary(s.out1, un = TRUE)
}
\references{
Ho, D. E., Imai, K., King, G., & Stuart, E. A. (2007). Matching
as Nonparametric Preprocessing for Reducing Model Dependence in Parametric
Causal Inference. \emph{Political Analysis}, 15(3), 199–236. \doi{10.1093/pan/mpl013}
Ho, D. E., Imai, K., King, G., & Stuart, E. A. (2011). MatchIt:
Nonparametric Preprocessing for Parametric Causal Inference. \emph{Journal of
Statistical Software}, 42(8). \doi{10.18637/jss.v042.i08}
}
\seealso{
\code{\link[=summary.matchit]{summary.matchit()}} for balance assessment after matching, \code{\link[=plot.matchit]{plot.matchit()}} for plots of covariate balance and propensity score overlap after matching.
\code{vignette("MatchIt")} for an introduction to matching with
\emph{MatchIt}; \code{vignette("matching-methods")} for descriptions of the
variety of matching methods and options available;
\code{vignette("assessing-balance")} for information on assessing the
quality of a matching specification; \code{vignette("estimating-effects")}
for instructions on how to estimate treatment effects after matching; and
\code{vignette("sampling-weights")} for a guide to using \emph{MatchIt} with
sampling weights.
}
\author{
Daniel Ho (\email{dho@law.stanford.edu}); Kosuke Imai
(\email{imai@harvard.edu}); Gary King (\email{king@harvard.edu});
Elizabeth Stuart (\email{estuart@jhsph.edu})
Version 4.0.0 update by Noah Greifer (\email{noah.greifer@gmail.com})
}
MatchIt/man/add_s.weights.Rd 0000644 0001762 0000144 00000005034 14334100747 015375 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/add_s.weights.R
\name{add_s.weights}
\alias{add_s.weights}
\title{Add sampling weights to a \code{matchit} object}
\usage{
add_s.weights(m, s.weights = NULL, data = NULL)
}
\arguments{
\item{m}{a \code{matchit} object; the output of a call to \code{\link[=matchit]{matchit()}},
typically with the \code{s.weights} argument unspecified.}
\item{s.weights}{an numeric vector of sampling weights to be added to the
\code{matchit} object. Can also be specified as a string containing the name
of variable in \code{data} to be used or a one-sided formula with the
variable on the right-hand side (e.g., \code{~ SW}).}
\item{data}{a data frame containing the sampling weights if given as a
string or formula. If unspecified, \code{add_s.weights()} will attempt to find
the dataset using the environment of the \code{matchit} object.}
}
\value{
a \code{matchit} object with an \code{s.weights} component
containing the supplied sampling weights. If \code{s.weights = NULL}, the original
\code{matchit} object is returned.
}
\description{
Adds sampling weights to a \code{matchit} object so that they are
incorporated into balance assessment and creation of the weights. This would
typically only be used when an argument to \code{s.weights} was not supplied
to \code{\link[=matchit]{matchit()}} (i.e., because they were not to be included in the estimation
of the propensity score) but sampling weights are required for generalizing
an effect to the correct population. Without adding sampling weights to the
\code{matchit} object, balance assessment tools (i.e., \code{\link[=summary.matchit]{summary.matchit()}}
and \code{\link[=plot.matchit]{plot.matchit()}}) will not calculate balance statistics correctly, and
the weights produced by \code{\link[=match.data]{match.data()}} and \code{\link[=get_matches]{get_matches()}} will not
incorporate the sampling weights.
}
\examples{
data("lalonde")
# Generate random sampling weights, just
# for this example
sw <- rchisq(nrow(lalonde), 2)
# NN PS match using logistic regression PS that doesn't
# include sampling weights
m.out <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde)
m.out
# Add s.weights to the matchit object
m.out <- add_s.weights(m.out, sw)
m.out #note additional output
# Check balance; note that sample sizes incorporate
# s.weights
summary(m.out, improvement = FALSE)
}
\seealso{
\code{\link[=matchit]{matchit()}}; \code{\link[=match.data]{match.data()}}
}
\author{
Noah Greifer
}
MatchIt/man/method_full.Rd 0000644 0001762 0000144 00000025445 14335005654 015166 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/matchit2full.R
\name{method_full}
\alias{method_full}
\title{Optimal Full Matching}
\arguments{
\item{formula}{a two-sided \link{formula} object containing the treatment and
covariates to be used in creating the distance measure used in the matching.
This formula will be supplied to the functions that estimate the distance
measure.}
\item{data}{a data frame containing the variables named in \code{formula}.
If not found in \code{data}, the variables will be sought in the
environment.}
\item{method}{set here to \code{"full"}.}
\item{distance}{the distance measure to be used. See \code{\link{distance}}
for allowable options. Can be supplied as a distance matrix.}
\item{link}{when \code{distance} is specified as a method of estimating
propensity scores, an additional argument controlling the link function used
in estimating the distance measure. See \code{\link{distance}} for allowable
options with each option.}
\item{distance.options}{a named list containing additional arguments
supplied to the function that estimates the distance measure as determined
by the argument to \code{distance}.}
\item{estimand}{a string containing the desired estimand. Allowable options
include \code{"ATT"}, \code{"ATC"}, and \code{"ATE"}. The estimand controls
how the weights are computed; see the Computing Weights section at
\code{\link[=matchit]{matchit()}} for details.}
\item{exact}{for which variables exact matching should take place.}
\item{mahvars}{for which variables Mahalanobis distance matching should take
place when \code{distance} corresponds to a propensity score (e.g., for
caliper matching or to discard units for common support). If specified, the
distance measure will not be used in matching.}
\item{antiexact}{for which variables ant-exact matching should take place.
Anti-exact matching is processed using \pkgfun{optmatch}{antiExactMatch}.}
\item{discard}{a string containing a method for discarding units outside a
region of common support. Only allowed when \code{distance} corresponds to a
propensity score.}
\item{reestimate}{if \code{discard} is not \code{"none"}, whether to
re-estimate the propensity score in the remaining sample prior to matching.}
\item{s.weights}{the variable containing sampling weights to be incorporated
into propensity score models and balance statistics.}
\item{caliper}{the width(s) of the caliper(s) used for caliper matching.
Calipers are processed by \pkgfun{optmatch}{caliper}. See Notes and Examples.}
\item{std.caliper}{\code{logical}; when calipers are specified, whether they
are in standard deviation units (\code{TRUE}) or raw units (\code{FALSE}).}
\item{verbose}{\code{logical}; whether information about the matching
process should be printed to the console.}
\item{\dots}{additional arguments passed to \pkgfun{optmatch}{fullmatch}.
Allowed arguments include \code{min.controls}, \code{max.controls},
\code{omit.fraction}, \code{mean.controls}, \code{tol}, and \code{solver}.
See the \pkgfun{optmatch}{fullmatch} documentation for details. In general,
\code{tol} should be set to a low number (e.g., \code{1e-7}) to get a more
precise solution.
The arguments \code{replace}, \code{m.order}, and \code{ratio} are ignored with a warning.}
}
\description{
In \code{\link[=matchit]{matchit()}}, setting \code{method = "full"} performs optimal full
matching, which is a form of subclassification wherein all units, both
treatment and control (i.e., the "full" sample), are assigned to a subclass
and receive at least one match. The matching is optimal in the sense that
that sum of the absolute distances between the treated and control units in
each subclass is as small as possible. The method relies on and is a wrapper
for \pkgfun{optmatch}{fullmatch}.
Advantages of optimal full matching include that the matching order is not
required to be specified, units do not need to be discarded, and it is less
likely that extreme within-subclass distances will be large, unlike with
standard subclassification. The primary output of full matching is a set of
matching weights that can be applied to the matched sample; in this way,
full matching can be seen as a robust alternative to propensity score
weighting, robust in the sense that the propensity score model does not need
to be correct to estimate the treatment effect without bias. Note: with large samples, the optimization may fail or run very slowly; one can try using \code{\link[=method_quick]{method = "quick"}} instead, which also performs full matching but can be much faster.
This page details the allowable arguments with \code{method = "full"}.
See \code{\link[=matchit]{matchit()}} for an explanation of what each argument means in a general
context and how it can be specified.
Below is how \code{matchit()} is used for optimal full matching:
\preformatted{
matchit(formula,
data = NULL,
method = "full",
distance = "glm",
link = "logit",
distance.options = list(),
estimand = "ATT",
exact = NULL,
mahvars = NULL,
anitexact = NULL,
discard = "none",
reestimate = FALSE,
s.weights = NULL,
caliper = NULL,
std.caliper = TRUE,
verbose = FALSE,
...)
}
}
\details{
\subsection{Mahalanobis Distance Matching}{
Mahalanobis distance matching can be done one of two ways:
\enumerate{
\item{
If no propensity score needs to be estimated, \code{distance} should be
set to \code{"mahalanobis"}, and Mahalanobis distance matching will occur
using all the variables in \code{formula}. Arguments to \code{discard} and
\code{mahvars} will be ignored, and a caliper can only be placed on named
variables. For example, to perform simple Mahalanobis distance matching, the
following could be run:
\preformatted{
matchit(treat ~ X1 + X2, method = "nearest",
distance = "mahalanobis") }
With this code, the Mahalanobis distance is computed using \code{X1} and
\code{X2}, and matching occurs on this distance. The \code{distance}
component of the \code{matchit()} output will be empty.
}
\item{
If a propensity score needs to be estimated for any reason, e.g., for
common support with \code{discard} or for creating a caliper,
\code{distance} should be whatever method is used to estimate the propensity
score or a vector of distance measures, i.e., it should not be
\code{"mahalanobis"}. Use \code{mahvars} to specify the variables used to
create the Mahalanobis distance. For example, to perform Mahalanobis within
a propensity score caliper, the following could be run:
\preformatted{
matchit(treat ~ X1 + X2 + X3, method = "nearest",
distance = "glm", caliper = .25,
mahvars = ~ X1 + X2) }
With this code, \code{X1}, \code{X2}, and \code{X3} are used to estimate the
propensity score (using the \code{"glm"} method, which by default is
logistic regression), which is used to create a matching caliper. The actual
matching occurs on the Mahalanobis distance computed only using \code{X1}
and \code{X2}, which are supplied to \code{mahvars}. Units whose propensity
score difference is larger than the caliper will not be paired, and some
treated units may therefore not receive a match. The estimated propensity
scores will be included in the \code{distance} component of the
\code{matchit()} output. See Examples.
}
}
}
}
\note{
Calipers can only be used when \code{min.controls} is left at its
default.
The option \code{"optmatch_max_problem_size"} is automatically set to
\code{Inf} during the matching process, different from its default in
\emph{optmatch}. This enables matching problems of any size to be run, but
may also let huge, infeasible problems get through and potentially take a
long time or crash R. See \pkgfun{optmatch}{setMaxProblemSize} for more details.
}
\section{Outputs}{
All outputs described in \code{\link[=matchit]{matchit()}} are returned with
\code{method = "full"} except for \code{match.matrix}. This is because
matching strata are not indexed by treated units as they are in some other
forms of matching. When \code{include.obj = TRUE} in the call to
\code{matchit()}, the output of the call to \pkgfun{optmatch}{fullmatch} will be
included in the output. When \code{exact} is specified, this will be a list
of such objects, one for each stratum of the \code{exact} variables.
}
\examples{
\dontshow{if (requireNamespace("optmatch", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
data("lalonde")
# Optimal full PS matching
m.out1 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "full")
m.out1
summary(m.out1)
# Optimal full Mahalanobis distance matching within a PS caliper
m.out2 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "full", caliper = .01,
mahvars = ~ age + educ + re74 + re75)
m.out2
summary(m.out2, un = FALSE)
# Optimal full Mahalanobis distance matching within calipers
# of 500 on re74 and re75
m.out3 <- matchit(treat ~ age + educ + re74 + re75,
data = lalonde, distance = "mahalanobis",
method = "full",
caliper = c(re74 = 500, re75 = 500),
std.caliper = FALSE)
m.out3
summary(m.out3, addlvariables = ~race + nodegree + married,
data = lalonde, un = FALSE)
\dontshow{\}) # examplesIf}
}
\references{
In a manuscript, be sure to cite the following paper if using
\code{matchit()} with \code{method = "full"}:
Hansen, B. B., & Klopfer, S. O. (2006). Optimal Full Matching and Related
Designs via Network Flows. \emph{Journal of Computational and Graphical Statistics},
15(3), 609–627. \doi{10.1198/106186006X137047}
For example, a sentence might read:
\emph{Optimal full matching was performed using the MatchIt package (Ho,
Imai, King, & Stuart, 2011) in R, which calls functions from the optmatch
package (Hansen & Klopfer, 2006).}
Theory is also developed in the following article:
Hansen, B. B. (2004). Full Matching in an Observational Study of Coaching
for the SAT. Journal of the American Statistical Association, 99(467),
609–618. \doi{10.1198/016214504000000647}
}
\seealso{
\code{\link[=matchit]{matchit()}} for a detailed explanation of the inputs and outputs of
a call to \code{matchit()}.
\pkgfun{optmatch}{fullmatch}, which is the workhorse.
\code{\link{method_optimal}} for optimal pair matching, which is a special
case of optimal full matching, and which relies on similar machinery.
Results from \code{method = "optimal"} can be replicated with \code{method = "full"} by setting \code{min.controls}, \code{max.controls}, and
\code{mean.controls} to the desired \code{ratio}.
\code{\link{method_quick}} for fast generalized quick matching, which is very similar to optimal full matching but can be dramatically faster at the expense of optimality and is less customizable.
}
MatchIt/man/method_nearest.Rd 0000644 0001762 0000144 00000034545 14435663335 015675 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/matchit2nearest.R
\name{method_nearest}
\alias{method_nearest}
\title{Nearest Neighbor Matching}
\arguments{
\item{formula}{a two-sided \link{formula} object containing the treatment and
covariates to be used in creating the distance measure used in the matching.}
\item{data}{a data frame containing the variables named in \code{formula}.
If not found in \code{data}, the variables will be sought in the
environment.}
\item{method}{set here to \code{"nearest"}.}
\item{distance}{the distance measure to be used. See \code{\link{distance}}
for allowable options. Can be supplied as a distance matrix.}
\item{link}{when \code{distance} is specified as a method of estimating
propensity scores, an additional argument controlling the link function used
in estimating the distance measure. See \code{\link{distance}} for allowable
options with each option.}
\item{distance.options}{a named list containing additional arguments
supplied to the function that estimates the distance measure as determined
by the argument to \code{distance}.}
\item{estimand}{a string containing the desired estimand. Allowable options
include \code{"ATT"} and \code{"ATC"}. See Details.}
\item{exact}{for which variables exact matching should take place.}
\item{mahvars}{for which variables Mahalanobis distance matching should take
place when \code{distance} corresponds to a propensity score (e.g., for
caliper matching or to discard units for common support). If specified, the
distance measure will not be used in matching.}
\item{antiexact}{for which variables ant-exact matching should take place.}
\item{discard}{a string containing a method for discarding units outside a
region of common support. Only allowed when \code{distance} corresponds to a
propensity score.}
\item{reestimate}{if \code{discard} is not \code{"none"}, whether to
re-estimate the propensity score in the remaining sample prior to matching.}
\item{s.weights}{the variable containing sampling weights to be incorporated
into propensity score models and balance statistics.}
\item{replace}{whether matching should be done with replacement.}
\item{m.order}{the order that the matching takes place. Allowable options
include \code{"largest"}, where matching takes place in descending order of
distance measures; \code{"smallest"}, where matching takes place in ascending
order of distance measures; \code{"closest"}, where matching takes place in
order of the distance between units; \code{"random"}, where matching takes place
in a random order; and \code{"data"} where matching takes place based on the
order of units in the data. When \code{m.order = "random"}, results may differ
across different runs of the same code unless a seed is set and specified
with \code{\link[=set.seed]{set.seed()}}. The default of \code{NULL} corresponds to \code{"largest"} when a
propensity score is estimated or supplied as a vector and \code{"data"}
otherwise.}
\item{caliper}{the width(s) of the caliper(s) used for caliper matching. See
Details and Examples.}
\item{std.caliper}{\code{logical}; when calipers are specified, whether they
are in standard deviation units (\code{TRUE}) or raw units (\code{FALSE}).}
\item{ratio}{how many control units should be matched to each treated unit
for k:1 matching. For variable ratio matching, see section "Variable Ratio
Matching" in Details below.}
\item{min.controls, max.controls}{for variable ratio matching, the minimum
and maximum number of controls units to be matched to each treated unit. See
section "Variable Ratio Matching" in Details below.}
\item{verbose}{\code{logical}; whether information about the matching
process should be printed to the console. When \code{TRUE}, a progress bar
implemented using \emph{RcppProgress} will be displayed.}
\item{\dots}{additional arguments that control the matching specification:
\describe{
\item{\code{reuse.max}}{ \code{numeric}; the maximum number of
times each control can be used as a match. Setting \code{reuse.max = 1}
corresponds to matching without replacement (i.e., \code{replace = FALSE}),
and setting \code{reuse.max = Inf} corresponds to traditional matching with
replacement (i.e., \code{replace = TRUE}) with no limit on the number of
times each control unit can be matched. Other values restrict the number of
times each control can be matched when matching with replacement.
\code{replace} is ignored when \code{reuse.max} is specified. }
\item{\code{unit.id}}{ one or more variables containing a unit ID for each
observation, i.e., in case multiple observations correspond to the same
unit. Once a control observation has been matched, no other observation with
the same unit ID can be used as matches. This ensures each control unit is
used only once even if it has multiple observations associated with it.
Omitting this argument is the same as giving each observation a unique ID.
Ignored when \code{replace = TRUE}. }
}}
}
\description{
In \code{\link[=matchit]{matchit()}}, setting \code{method = "nearest"} performs greedy nearest
neighbor matching. A distance is computed between each treated unit and each
control unit, and, one by one, each treated unit is assigned a control unit
as a match. The matching is "greedy" in the sense that there is no action
taken to optimize an overall criterion; each match is selected without
considering the other matches that may occur subsequently.
This page details the allowable arguments with \code{method = "nearest"}.
See \code{\link[=matchit]{matchit()}} for an explanation of what each argument means in a general
context and how it can be specified.
Below is how \code{matchit()} is used for nearest neighbor matching:
\preformatted{
matchit(formula,
data = NULL,
method = "nearest",
distance = "glm",
link = "logit",
distance.options = list(),
estimand = "ATT",
exact = NULL,
mahvars = NULL,
antiexact = NULL,
discard = "none",
reestimate = FALSE,
s.weights = NULL,
replace = TRUE,
m.order = NULL,
caliper = NULL,
ratio = 1,
min.controls = NULL,
max.controls = NULL,
verbose = FALSE,
...) }
}
\details{
\subsection{Mahalanobis Distance Matching}{
Mahalanobis distance matching can be done one of two ways:
\enumerate{
\item{If no propensity score needs to be estimated, \code{distance} should be
set to \code{"mahalanobis"}, and Mahalanobis distance matching will occur
using all the variables in \code{formula}. Arguments to \code{discard} and
\code{mahvars} will be ignored, and a caliper can only be placed on named
variables. For example, to perform simple Mahalanobis distance matching, the
following could be run:
\preformatted{
matchit(treat ~ X1 + X2, method = "nearest",
distance = "mahalanobis") }
With this code, the Mahalanobis distance is computed using \code{X1} and
\code{X2}, and matching occurs on this distance. The \code{distance}
component of the \code{matchit()} output will be empty.
}
\item{If a propensity score needs to be estimated for any reason, e.g., for
common support with \code{discard} or for creating a caliper,
\code{distance} should be whatever method is used to estimate the propensity
score or a vector of distance measures. Use \code{mahvars} to specify the
variables used to create the Mahalanobis distance. For example, to perform
Mahalanobis within a propensity score caliper, the following could be run:
\preformatted{
matchit(treat ~ X1 + X2 + X3, method = "nearest",
distance = "glm", caliper = .25,
mahvars = ~ X1 + X2) }
With this code, \code{X1}, \code{X2}, and \code{X3} are used to estimate the
propensity score (using the \code{"glm"} method, which by default is
logistic regression), which is used to create a matching caliper. The actual
matching occurs on the Mahalanobis distance computed only using \code{X1}
and \code{X2}, which are supplied to \code{mahvars}. Units whose propensity
score difference is larger than the caliper will not be paired, and some
treated units may therefore not receive a match. The estimated propensity
scores will be included in the \code{distance} component of the
\code{matchit()} output. See Examples.
}
}
}
\subsection{Estimand}{
The \code{estimand} argument controls whether control units are selected to be
matched with treated units (\code{estimand = "ATT"}) or treated units are
selected to be matched with control units (\code{estimand = "ATC"}). The
"focal" group (e.g., the treated units for the ATT) is typically made to be
the smaller treatment group, and a warning will be thrown if it is not set
that way unless \code{replace = TRUE}. Setting \code{estimand = "ATC"} is
equivalent to swapping all treated and control labels for the treatment
variable. When \code{estimand = "ATC"}, the default \code{m.order} is
\code{"smallest"}, and the \code{match.matrix} component of the output will
have the names of the control units as the rownames and be filled with the
names of the matched treated units (opposite to when \code{estimand = "ATT"}). Note that the argument supplied to \code{estimand} doesn't
necessarily correspond to the estimand actually targeted; it is merely a
switch to trigger which treatment group is considered "focal".
}
\subsection{Variable Ratio Matching}{
\code{matchit()} can perform variable
ratio "extremal" matching as described by Ming and Rosenbaum (2000). This
method tends to result in better balance than fixed ratio matching at the
expense of some precision. When \code{ratio > 1}, rather than requiring all
treated units to receive \code{ratio} matches, each treated unit is assigned
a value that corresponds to the number of control units they will be matched
to. These values are controlled by the arguments \code{min.controls} and
\code{max.controls}, which correspond to \eqn{\alpha} and \eqn{\beta},
respectively, in Ming and Rosenbaum (2000), and trigger variable ratio
matching to occur. Some treated units will receive \code{min.controls}
matches and others will receive \code{max.controls} matches (and one unit
may have an intermediate number of matches); how many units are assigned
each number of matches is determined by the algorithm described in Ming and
Rosenbaum (2000, p119). \code{ratio} controls how many total control units
will be matched: \code{n1 * ratio} control units will be matched, where
\code{n1} is the number of treated units, yielding the same total number of
matched controls as fixed ratio matching does.
Variable ratio matching cannot be used with Mahalanobis distance matching or
when \code{distance} is supplied as a matrix. The calculations of the
numbers of control units each treated unit will be matched to occurs without
consideration of \code{caliper} or \code{discard}. \code{ratio} does not
have to be an integer but must be greater than 1 and less than \code{n0/n1},
where \code{n0} and \code{n1} are the number of control and treated units,
respectively. Setting \code{ratio = n0/n1} performs a crude form of full
matching where all control units are matched. If \code{min.controls} is not
specified, it is set to 1 by default. \code{min.controls} must be less than
\code{ratio}, and \code{max.controls} must be greater than \code{ratio}. See
Examples below for an example of their use.
}
\subsection{Using \code{m.order = "closest"}}{
As of version 4.6.0, \code{m.order} can be set to \code{"closest"}, which works regardless of how the distance measure is specified. This matches in order of the distance between units. The closest pair of units across all potential pairs of units will be matched first; the second closest pair of all potential pairs will be matched second, etc. This ensures that the best possible matches are given priority, and in that sense performs similarly to \code{m.order = "smallest"}.
}
}
\note{
Sometimes an error will be produced by \emph{Rcpp} along the lines of
\code{"function 'Rcpp_precious_remove' not provided by package 'Rcpp'"}. It
is not immediately clear why this happens, though
\href{https://lists.r-forge.r-project.org/pipermail/rcpp-devel/2021-July/010648.html}{this}
thread appears to provide some insight. In a fresh session, run
\verb{remove.packages(c("MatchIt", "Rcpp")); install.packages("MatchIt")}.
This should sync \emph{MatchIt} and \emph{Rcpp} and ensure they work
correctly.
}
\section{Outputs}{
All outputs described in \code{\link[=matchit]{matchit()}} are returned with
\code{method = "nearest"}. When \code{replace = TRUE}, the \code{subclass}
component is omitted. \code{include.obj} is ignored.
}
\examples{
data("lalonde")
# 1:1 greedy NN matching on the PS
m.out1 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "nearest")
m.out1
summary(m.out1)
# 3:1 NN Mahalanobis distance matching with
# replacement within a PS caliper
m.out2 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "nearest", replace = TRUE,
mahvars = ~ age + educ + re74 + re75,
ratio = 3, caliper = .02)
m.out2
summary(m.out2, un = FALSE)
# 1:1 NN Mahalanobis distance matching within calipers
# on re74 and re75 and exact matching on married and race
m.out3 <- matchit(treat ~ age + educ + re74 + re75, data = lalonde,
method = "nearest", distance = "mahalanobis",
exact = ~ married + race,
caliper = c(re74 = .2, re75 = .15))
m.out3
summary(m.out3, un = FALSE)
# 2:1 variable ratio NN matching on the PS
m.out4 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "nearest", ratio = 2,
min.controls = 1, max.controls = 12)
m.out4
summary(m.out4, un = FALSE)
# Some units received 1 match and some received 12
table(table(m.out4$subclass[m.out4$treat == 0]))
}
\references{
In a manuscript, you don't need to cite another package when
using \code{method = "nearest"} because the matching is performed completely
within \emph{MatchIt}. For example, a sentence might read:
\emph{Nearest neighbor matching was performed using the MatchIt package (Ho, Imai, King, & Stuart, 2011) in R.}
}
\seealso{
\code{\link[=matchit]{matchit()}} for a detailed explanation of the inputs and outputs of
a call to \code{matchit()}.
\code{\link[=method_optimal]{method_optimal()}} for optimal pair matching, which is similar to
nearest neighbor matching except that an overall distance criterion is
minimized.
}
MatchIt/man/method_exact.Rd 0000644 0001762 0000144 00000007564 14334100750 015322 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/matchit2exact.R
\name{method_exact}
\alias{method_exact}
\title{Exact Matching}
\arguments{
\item{formula}{a two-sided \link{formula} object containing the treatment and
covariates to be used in creating the subclasses defined by a full cross of
the covariate levels.}
\item{data}{a data frame containing the variables named in \code{formula}.
If not found in \code{data}, the variables will be sought in the
environment.}
\item{method}{set here to \code{"exact"}.}
\item{estimand}{a string containing the desired estimand. Allowable options
include \code{"ATT"}, \code{"ATC"}, and \code{"ATE"}. The estimand controls
how the weights are computed; see the Computing Weights section at
\code{\link[=matchit]{matchit()}} for details.}
\item{s.weights}{the variable containing sampling weights to be incorporated
into balance statistics. These weights do not affect the matching process.}
\item{verbose}{\code{logical}; whether information about the matching
process should be printed to the console.}
\item{\dots}{ignored.
The arguments \code{distance} (and related arguments), \code{exact}, \code{mahvars}, \code{discard} (and related arguments), \code{replace}, \code{m.order}, \code{caliper} (and related arguments), and \code{ratio} are ignored with a warning.}
}
\description{
In \code{\link[=matchit]{matchit()}}, setting \code{method = "exact"} performs exact matching.
With exact matching, a complete cross of the covariates is used to form
subclasses defined by each combination of the covariate levels. Any subclass
that doesn't contain both treated and control units is discarded, leaving
only subclasses containing treatment and control units that are exactly
equal on the included covariates. The benefits of exact matching are that
confounding due to the covariates included is completely eliminated,
regardless of the functional form of the treatment or outcome models. The
problem is that typically many units will be discarded, sometimes
dramatically reducing precision and changing the target population of
inference. To use exact matching in combination with another matching method
(i.e., to exact match on some covariates and some other form of matching on
others), use the \code{exact} argument with that method.
This page details the allowable arguments with \code{method = "exact"}. See
\code{\link[=matchit]{matchit()}} for an explanation of what each argument means in a general
context and how it can be specified.
Below is how \code{matchit()} is used for exact matching:
\preformatted{
matchit(formula,
data = NULL,
method = "exact",
estimand = "ATT",
s.weights = NULL,
verbose = FALSE,
...)
}
}
\section{Outputs}{
All outputs described in \code{\link[=matchit]{matchit()}} are returned with
\code{method = "exact"} except for \code{match.matrix}. This is because
matching strata are not indexed by treated units as they are in some other
forms of matching. \code{include.obj} is ignored.
}
\examples{
data("lalonde")
# Exact matching on age, race, married, and educ
m.out1 <- matchit(treat ~ age + race + married + educ, data = lalonde,
method = "exact")
m.out1
summary(m.out1)
}
\references{
In a manuscript, you don't need to cite another package when
using \code{method = "exact"} because the matching is performed completely
within \emph{MatchIt}. For example, a sentence might read:
\emph{Exact matching was performed using the MatchIt package (Ho, Imai,
King, & Stuart, 2011) in R.}
}
\seealso{
\code{\link[=matchit]{matchit()}} for a detailed explanation of the inputs and outputs of
a call to \code{matchit()}. The \code{exact} argument can be used with other
methods to perform exact matching in combination with other matching
methods.
\link{method_cem} for coarsened exact matching, which performs exact
matching on coarsened versions of the covariates.
}
MatchIt/man/method_optimal.Rd 0000644 0001762 0000144 00000027664 14375204746 015705 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/matchit2optimal.R
\name{method_optimal}
\alias{method_optimal}
\title{Optimal Pair Matching}
\arguments{
\item{formula}{a two-sided \link{formula} object containing the treatment and
covariates to be used in creating the distance measure used in the matching.
This formula will be supplied to the functions that estimate the distance
measure.}
\item{data}{a data frame containing the variables named in \code{formula}.
If not found in \code{data}, the variables will be sought in the
environment.}
\item{method}{set here to \code{"optimal"}.}
\item{distance}{the distance measure to be used. See \code{\link{distance}}
for allowable options. Can be supplied as a distance matrix.}
\item{link}{when \code{distance} is specified as a method of estimating
propensity scores, an additional argument controlling the link function used
in estimating the distance measure. See \code{\link{distance}} for allowable
options with each option.}
\item{distance.options}{a named list containing additional arguments
supplied to the function that estimates the distance measure as determined
by the argument to \code{distance}.}
\item{estimand}{a string containing the desired estimand. Allowable options
include \code{"ATT"} and \code{"ATC"}. See Details.}
\item{exact}{for which variables exact matching should take place.}
\item{mahvars}{for which variables Mahalanobis distance matching should take
place when \code{distance} corresponds to a propensity score (e.g., for
caliper matching or to discard units for common support). If specified, the
distance measure will not be used in matching.}
\item{antiexact}{for which variables ant-exact matching should take place.
Anti-exact matching is processed using \pkgfun{optmatch}{antiExactMatch}.}
\item{discard}{a string containing a method for discarding units outside a
region of common support. Only allowed when \code{distance} is not
\code{"mahalanobis"} and not a matrix.}
\item{reestimate}{if \code{discard} is not \code{"none"}, whether to
re-estimate the propensity score in the remaining sample prior to matching.}
\item{s.weights}{the variable containing sampling weights to be incorporated
into propensity score models and balance statistics.}
\item{ratio}{how many control units should be matched to each treated unit
for k:1 matching. For variable ratio matching, see section "Variable Ratio
Matching" in Details below.}
\item{min.controls, max.controls}{for variable ratio matching, the minimum
and maximum number of controls units to be matched to each treated unit. See
section "Variable Ratio Matching" in Details below.}
\item{verbose}{\code{logical}; whether information about the matching
process should be printed to the console. What is printed depends on the
matching method. Default is \code{FALSE} for no printing other than
warnings.}
\item{\dots}{additional arguments passed to \pkgfun{optmatch}{fullmatch}.
Allowed arguments include \code{tol} and \code{solver}. See the
\pkgfun{optmatch}{fullmatch} documentation for details. In general, \code{tol}
should be set to a low number (e.g., \code{1e-7}) to get a more precise
solution.
The arguments \code{replace}, \code{caliper}, and \code{m.order} are ignored with a warning.}
}
\description{
In \code{\link[=matchit]{matchit()}}, setting \code{method = "optimal"} performs optimal pair
matching. The matching is optimal in the sense that that sum of the absolute
pairwise distances in the matched sample is as small as possible. The method
functionally relies on \pkgfun{optmatch}{fullmatch}.
Advantages of optimal pair matching include that the matching order is not
required to be specified and it is less likely that extreme within-pair
distances will be large, unlike with nearest neighbor matching. Generally,
however, as a subset selection method, optimal pair matching tends to
perform similarly to nearest neighbor matching in that similar subsets of
units will be selected to be matched.
This page details the allowable arguments with \code{method = "optmatch"}.
See \code{\link[=matchit]{matchit()}} for an explanation of what each argument means in a general
context and how it can be specified.
Below is how \code{matchit()} is used for optimal pair matching:
\preformatted{
matchit(formula,
data = NULL,
method = "optimal",
distance = "glm",
link = "logit",
distance.options = list(),
estimand = "ATT",
exact = NULL,
mahvars = NULL,
antiexact = NULL,
discard = "none",
reestimate = FALSE,
s.weights = NULL,
ratio = 1,
min.controls = NULL,
max.controls = NULL,
verbose = FALSE,
...) }
}
\details{
\subsection{Mahalanobis Distance Matching}{
Mahalanobis distance matching can be done one of two ways:
\enumerate{
\item{If no propensity score needs to be estimated, \code{distance} should be
set to \code{"mahalanobis"}, and Mahalanobis distance matching will occur
using all the variables in \code{formula}. Arguments to \code{discard} and
\code{mahvars} will be ignored. For example, to perform simple Mahalanobis
distance matching, the following could be run:
\preformatted{
matchit(treat ~ X1 + X2, method = "nearest",
distance = "mahalanobis") }
With this code, the Mahalanobis distance is computed using \code{X1} and
\code{X2}, and matching occurs on this distance. The \code{distance}
component of the \code{matchit()} output will be empty.
}
\item{If a propensity score needs to be estimated for common support with
\code{discard}, \code{distance} should be whatever method is used to
estimate the propensity score or a vector of distance measures, i.e., it
should not be \code{"mahalanobis"}. Use \code{mahvars} to specify the
variables used to create the Mahalanobis distance. For example, to perform
Mahalanobis after discarding units outside the common support of the
propensity score in both groups, the following could be run:
\preformatted{
matchit(treat ~ X1 + X2 + X3, method = "nearest",
distance = "glm", discard = "both",
mahvars = ~ X1 + X2) }
With this code, \code{X1}, \code{X2}, and \code{X3} are used to estimate the
propensity score (using the \code{"glm"} method, which by default is
logistic regression), which is used to identify the common support. The
actual matching occurs on the Mahalanobis distance computed only using
\code{X1} and \code{X2}, which are supplied to \code{mahvars}. The estimated
propensity scores will be included in the \code{distance} component of the
\code{matchit()} output.
}
}
}
\subsection{Estimand}{
The \code{estimand} argument controls whether control units are selected to be matched with treated units
(\code{estimand = "ATT"}) or treated units are selected to be matched with
control units (\code{estimand = "ATC"}). The "focal" group (e.g., the
treated units for the ATT) is typically made to be the smaller treatment
group, and a warning will be thrown if it is not set that way unless
\code{replace = TRUE}. Setting \code{estimand = "ATC"} is equivalent to
swapping all treated and control labels for the treatment variable. When
\code{estimand = "ATC"}, the \code{match.matrix} component of the output
will have the names of the control units as the rownames and be filled with
the names of the matched treated units (opposite to when \code{estimand = "ATT"}). Note that the argument supplied to \code{estimand} doesn't
necessarily correspond to the estimand actually targeted; it is merely a
switch to trigger which treatment group is considered "focal".
}
\subsection{Variable Ratio Matching}{
\code{matchit()} can perform variable
ratio matching, which involves matching a different number of control units
to each treated unit. When \code{ratio > 1}, rather than requiring all
treated units to receive \code{ratio} matches, the arguments to
\code{max.controls} and \code{min.controls} can be specified to control the
maximum and minimum number of matches each treated unit can have.
\code{ratio} controls how many total control units will be matched: \code{n1 * ratio} control units will be matched, where \code{n1} is the number of
treated units, yielding the same total number of matched controls as fixed
ratio matching does.
Variable ratio matching can be used with any \code{distance} specification.
\code{ratio} does not have to be an integer but must be greater than 1 and
less than \code{n0/n1}, where \code{n0} and \code{n1} are the number of
control and treated units, respectively. Setting \code{ratio = n0/n1}
performs a restricted form of full matching where all control units are
matched. If \code{min.controls} is not specified, it is set to 1 by default.
\code{min.controls} must be less than \code{ratio}, and \code{max.controls}
must be greater than \code{ratio}. See the Examples section of
\code{\link[=method_nearest]{method_nearest()}} for an example of their use, which is the same
as it is with optimal matching.
}
}
\note{
Optimal pair matching is a restricted form of optimal full matching
where the number of treated units in each subclass is equal to 1, whereas in
unrestricted full matching, multiple treated units can be assigned to the
same subclass. \pkgfun{optmatch}{pairmatch} is simply a wrapper for
\pkgfun{optmatch}{fullmatch}, which performs optimal full matching and is the
workhorse for \code{\link{method_full}}. In the same way, \code{matchit()}
uses \code{optmatch::fullmatch()} under the hood, imposing the restrictions that
make optimal full matching function like optimal pair matching (which is
simply to set \code{min.controls >= 1} and to pass \code{ratio} to the
\code{mean.controls} argument). This distinction is not important for
regular use but may be of interest to those examining the source code.
The option \code{"optmatch_max_problem_size"} is automatically set to
\code{Inf} during the matching process, different from its default in
\emph{optmatch}. This enables matching problems of any size to be run, but
may also let huge, infeasible problems get through and potentially take a
long time or crash R. See \pkgfun{optmatch}{setMaxProblemSize} for more details.
}
\section{Outputs}{
All outputs described in \code{\link[=matchit]{matchit()}} are returned with
\code{method = "optimal"}. When \code{include.obj = TRUE} in the call to
\code{matchit()}, the output of the call to \code{optmatch::fullmatch()} will be
included in the output. When \code{exact} is specified, this will be a list
of such objects, one for each stratum of the \code{exact} variables.
}
\examples{
\dontshow{if (requireNamespace("optmatch", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
data("lalonde")
#1:1 optimal PS matching with exact matching on race
m.out1 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "optimal", exact = ~race)
m.out1
summary(m.out1)
#2:1 optimal matching on the scaled Euclidean distance
m.out2 <- matchit(treat ~ age + educ + race + nodegree +
married + re74 + re75, data = lalonde,
method = "optimal", ratio = 2,
distance = "scaled_euclidean")
m.out2
summary(m.out2, un = FALSE)
\dontshow{\}) # examplesIf}
}
\references{
In a manuscript, be sure to cite the following paper if using
\code{matchit()} with \code{method = "optimal"}:
Hansen, B. B., & Klopfer, S. O. (2006). Optimal Full Matching and Related
Designs via Network Flows. Journal of Computational and Graphical
Statistics, 15(3), 609–627. \doi{10.1198/106186006X137047}
For example, a sentence might read:
\emph{Optimal pair matching was performed using the MatchIt package (Ho,
Imai, King, & Stuart, 2011) in R, which calls functions from the optmatch
package (Hansen & Klopfer, 2006).}
}
\seealso{
\code{\link[=matchit]{matchit()}} for a detailed explanation of the inputs and outputs of
a call to \code{matchit()}.
\pkgfun{optmatch}{fullmatch}, which is the workhorse.
\code{\link{method_full}} for optimal full matching, of which optimal pair
matching is a special case, and which relies on similar machinery.
}
MatchIt/man/rbind.matchdata.Rd 0000644 0001762 0000144 00000006322 14334100751 015671 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rbind.matchdata.R
\name{rbind.matchdata}
\alias{rbind.matchdata}
\alias{rbind.getmatches}
\title{Append matched datasets together}
\usage{
\method{rbind}{matchdata}(..., deparse.level = 1)
\method{rbind}{getmatches}(..., deparse.level = 1)
}
\arguments{
\item{\dots}{Two or more \code{matchdata} or \code{getmatches} objects the
output of calls to \code{\link[=match.data]{match.data()}} and \code{\link[=get_matches]{get_matches()}}, respectively.
Supplied objects must either be all \code{matchdata} objects or all
\code{getmatches} objects.}
\item{deparse.level}{Passed to \code{\link[=rbind]{rbind()}}.}
}
\value{
An object of the same class as those supplied to it (i.e., a
\code{matchdata} object if \code{matchdata} objects are supplied and a
\code{getmatches} object if \code{getmatches} objects are supplied).
\code{\link[=rbind]{rbind()}} is called on the objects after adjusting the variables so that the
appropriate method will be dispatched corresponding to the class of the
original data object.
}
\description{
These functions are \code{\link[=rbind]{rbind()}} methods for objects resulting from calls to
\code{\link[=match.data]{match.data()}} and \code{\link[=get_matches]{get_matches()}}. They function nearly identically to
\code{rbind.data.frame()}; see Details for how they differ.
}
\details{
\code{rbind()} appends two or more datasets row-wise. This can be useful
when matching was performed separately on subsets of the original data and
they are to be combined into a single dataset for effect estimation. Using
the regular \code{data.frame} method for \code{rbind()} would pose a
problem, however; the \code{subclass} variable would have repeated names
across different datasets, even though units only belong to the subclasses
in their respective datasets. \code{rbind.matchdata()} renames the
subclasses so that the correct subclass membership is maintained.
The supplied matched datasets must be generated from the same original
dataset, that is, having the same variables in it. The added components
(e.g., weights, subclass) can be named differently in different datasets but
will be changed to have the same name in the output.
\code{rbind.getmatches()} and \code{rbind.matchdata()} are identical.
}
\examples{
data("lalonde")
# Matching based on race subsets
m.out_b <- matchit(treat ~ age + educ + married +
nodegree + re74 + re75,
data = subset(lalonde, race == "black"))
md_b <- match.data(m.out_b)
m.out_h <- matchit(treat ~ age + educ + married +
nodegree + re74 + re75,
data = subset(lalonde, race == "hispan"))
md_h <- match.data(m.out_h)
m.out_w <- matchit(treat ~ age + educ + married +
nodegree + re74 + re75,
data = subset(lalonde, race == "white"))
md_w <- match.data(m.out_w)
#Bind the datasets together
md_all <- rbind(md_b, md_h, md_w)
#Subclass conflicts are avoided
levels(md_all$subclass)
}
\seealso{
\code{\link[=match.data]{match.data()}}, \code{\link[=rbind]{rbind()}}
See \code{vignettes("estimating-effects")} for details on using
\code{rbind()} for effect estimation after subsetting the data.
}
\author{
Noah Greifer
}
MatchIt/man/match.data.Rd 0000644 0001762 0000144 00000023453 14415437120 014661 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/match.data.R
\name{match.data}
\alias{match.data}
\alias{get_matches}
\title{Construct a matched dataset from a \code{matchit} object}
\usage{
match.data(
object,
group = "all",
distance = "distance",
weights = "weights",
subclass = "subclass",
data = NULL,
include.s.weights = TRUE,
drop.unmatched = TRUE
)
get_matches(
object,
distance = "distance",
weights = "weights",
subclass = "subclass",
id = "id",
data = NULL,
include.s.weights = TRUE
)
}
\arguments{
\item{object}{a \code{matchit} object; the output of a call to \code{\link[=matchit]{matchit()}}.}
\item{group}{which group should comprise the matched dataset: \code{"all"}
for all units, \code{"treated"} for just treated units, or \code{"control"}
for just control units. Default is \code{"all"}.}
\item{distance}{a string containing the name that should be given to the
variable containing the distance measure in the data frame output. Default
is \code{"distance"}, but \code{"prop.score"} or similar might be a good
alternative if propensity scores were used in matching. Ignored if a
distance measure was not supplied or estimated in the call to
\code{matchit()}.}
\item{weights}{a string containing the name that should be given to the
variable containing the matching weights in the data frame output. Default
is \code{"weights"}.}
\item{subclass}{a string containing the name that should be given to the
variable containing the subclasses or matched pair membership in the data
frame output. Default is \code{"subclass"}.}
\item{data}{a data frame containing the original dataset to which the
computed output variables (\code{distance}, \code{weights}, and/or
\code{subclass}) should be appended. If empty, \code{match.data()} and
\code{get_matches()} will attempt to find the dataset using the environment
of the \code{matchit} object, which can be unreliable; see Notes.}
\item{include.s.weights}{\code{logical}; whether to multiply the estimated
weights by the sampling weights supplied to \code{matchit()}, if any.
Default is \code{TRUE}. If \code{FALSE}, the weights in the
\code{match.data()} or \code{get_matches()} output should be multiplied by
the sampling weights before being supplied to the function estimating the
treatment effect in the matched data.}
\item{drop.unmatched}{\code{logical}; whether the returned data frame should
contain all units (\code{FALSE}) or only units that were matched (i.e., have
a matching weight greater than zero) (\code{TRUE}). Default is \code{TRUE}
to drop unmatched units.}
\item{id}{a string containing the name that should be given to the variable
containing the unit IDs in the data frame output. Default is \code{"id"}.
Only used with \code{get_matches()}; for \code{match.data()}, the units IDs
are stored in the row names of the returned data frame.}
}
\value{
A data frame containing the data supplied in the \code{data} argument or in the
original call to \code{matchit()} with the computed
output variables appended as additional columns, named according the
arguments above. For \code{match.data()}, the \code{group} and
\code{drop.unmatched} arguments control whether only subsets of the data are
returned. See Details above for how \code{match.data()} and
\code{get_matches()} differ. Note that \code{get_matches} sorts the data by
subclass and treatment status, unlike \code{match.data()}, which uses the
order of the data.
The returned data frame will contain the variables in the original data set
or dataset supplied to \code{data} and the following columns:
\item{distance}{The propensity score, if estimated or supplied to the
\code{distance} argument in \code{matchit()} as a vector.}
\item{weights}{The computed matching weights. These must be used in effect
estimation to correctly incorporate the matching.}
\item{subclass}{Matching
strata membership. Units with the same value are in the same stratum.}
\item{id}{The ID of each unit, corresponding to the row names in the
original data or dataset supplied to \code{data}. Only included in
\code{get_matches} output. This column can be used to identify which rows
belong to the same unit since the same unit may appear multiple times if
reused in matching with replacement.}
These columns will take on the name supplied to the corresponding arguments
in the call to \code{match.data()} or \code{get_matches()}. See Examples for
an example of rename the \code{distance} column to \code{"prop.score"}.
If \code{data} or the original dataset supplied to \code{matchit()} was a
\code{data.table} or \code{tbl}, the \code{match.data()} output will have
the same class, but the \code{get_matches()} output will always be a base R
\code{data.frame}.
In addition to their base class (e.g., \code{data.frame} or \code{tbl}),
returned objects have the class \code{matchdata} or \code{getmatches}. This
class is important when using \code{\link[=rbind.matchdata]{rbind()}} to
append matched datasets.
}
\description{
\code{match.data()} and \code{get_matches()} create a data frame with
additional variables for the distance measure, matching weights, and
subclasses after matching. This dataset can be used to estimate treatment
effects after matching or subclassification. \code{get_matches()} is most
useful after matching with replacement; otherwise, \code{match.data()} is
more flexible. See Details below for the difference between them.
}
\details{
\code{match.data()} creates a dataset with one row per unit. It will be
identical to the dataset supplied except that several new columns will be
added containing information related to the matching. When
\code{drop.unmatched = TRUE}, the default, units with weights of zero, which
are those units that were discarded by common support or the caliper or were
simply not matched, will be dropped from the dataset, leaving only the
subset of matched units. The idea is for the output of \code{match.data()}
to be used as the dataset input in calls to \code{glm()} or similar to
estimate treatment effects in the matched sample. It is important to include
the weights in the estimation of the effect and its standard error. The
subclass column, when created, contains pair or subclass membership and
should be used to estimate the effect and its standard error. Subclasses
will only be included if there is a \code{subclass} component in the
\code{matchit} object, which does not occur with matching with replacement,
in which case \code{get_matches()} should be used. See
\code{vignette("estimating-effects")} for information on how to use
\code{match.data()} output to estimate effects.
\code{get_matches()} is similar to \code{match.data()}; the primary
difference occurs when matching is performed with replacement, i.e., when
units do not belong to a single matched pair. In this case, the output of
\code{get_matches()} will be a dataset that contains one row per unit for
each pair they are a part of. For example, if matching was performed with
replacement and a control unit was matched to two treated units, that
control unit will have two rows in the output dataset, one for each pair it
is a part of. Weights are computed for each row, and, for control units, are equal to the
inverse of the number of control units in each control unit's subclass; treated units get a weight of 1.
Unmatched units are dropped. An additional column with unit IDs will be
created (named using the \code{id} argument) to identify when the same unit
is present in multiple rows. This dataset structure allows for the inclusion
of both subclass membership and repeated use of units, unlike the output of
\code{match.data()}, which lacks subclass membership when matching is done
with replacement. A \code{match.matrix} component of the \code{matchit}
object must be present to use \code{get_matches()}; in some forms of
matching, it is absent, in which case \code{match.data()} should be used
instead. See \code{vignette("estimating-effects")} for information on how to
use \code{get_matches()} output to estimate effects after matching with
replacement.
}
\note{
The most common way to use \code{match.data()} and
\code{get_matches()} is by supplying just the \code{matchit} object, e.g.,
as \code{match.data(m.out)}. A data set will first be searched in the
environment of the \code{matchit} formula, then in the calling environment
of \code{match.data()} or \code{get_matches()}, and finally in the
\code{model} component of the \code{matchit} object if a propensity score
was estimated.
When called from an environment different from the one in which
\code{matchit()} was originally called and a propensity score was not
estimated (or was but with \code{discard} not \code{"none"} and
\code{reestimate = TRUE}), this syntax may not work because the original
dataset used to construct the matched dataset will not be found. This can
occur when \code{matchit()} was run within an \code{\link[=lapply]{lapply()}} or
\code{purrr::map()} call. The solution, which is recommended in all cases,
is simply to supply the original dataset to the \code{data} argument of
\code{match.data()}, e.g., as \code{match.data(m.out, data = original_data)}, as demonstrated in the Examples.
}
\examples{
data("lalonde")
# 4:1 matching w/replacement
m.out1 <- matchit(treat ~ age + educ + married +
race + nodegree + re74 + re75,
data = lalonde, replace = TRUE,
caliper = .05, ratio = 4)
m.data1 <- match.data(m.out1, data = lalonde,
distance = "prop.score")
dim(m.data1) #one row per matched unit
head(m.data1, 10)
g.matches1 <- get_matches(m.out1, data = lalonde,
distance = "prop.score")
dim(g.matches1) #multiple rows per matched unit
head(g.matches1, 10)
}
\seealso{
\code{\link[=matchit]{matchit()}}; \code{\link[=rbind.matchdata]{rbind.matchdata()}}
\code{vignette("estimating-effects")} for uses of \code{match.data()} and
\code{get_matches()} in estimating treatment effects.
}
MatchIt/man/macros/ 0000755 0001762 0000144 00000000000 14463002323 013637 5 ustar ligges users MatchIt/man/macros/macros.Rd 0000644 0001762 0000144 00000002604 14207226672 015427 0 ustar ligges users % Rd macro for simplifying documentation writing
\newcommand{\fun}{\code{\link[=#1]{#1()}}}
% Because R packages need conditional use of packages in Suggests, any cross-reference to a doc in another package needs to be conditionally evaluated, too.
%\pkgfun{}{})tests whether the package is available, and, if so, produces a cross-reference to the function in the package; if not, the function name is displayed without a cross-reference. The first argument is the package, the second is the function name, e.g., \pkgfun{optmatch}{pairmatch}.
\newcommand{\pkgfun}{\ifelse{\Sexpr[results=rd,stage=render]{requireNamespace("#1", quietly = TRUE)}}{\code{\link[#1:#2]{#1::#2()}}}{\code{#1::#2()}}}
%\newcommand{\pkgfun}{\code{\link[#1:#2]{#1::#2()}}}
%E.g., \pkgfun{sandwich}{vcovCL} is the same as \code{\link[sandwich:vcovCL]{vcovCL}} if the sandwich package is installed and \code{vcovCL} if not.
%\pkgfun2{}{}{} does the same but allows the third argument to be printed, e.g., to use text that differs from the name of the function in the new package.
\newcommand{\pkgfun2}{\ifelse{\Sexpr[results=rd,stage=render]{requireNamespace("#1", quietly = TRUE)}}{\code{\link[#1:#2]{#3()}}}{\code{#3()}}}
%\newcommand{\pkgfun2}{\code{\link[#1:#2]{#3()}}}
%E.g., \pkgfun2{sandwich}{vcovCL}{meatCL} is the same as \code{\link[sandwich:vcovCL]{meatCL}} if the sandwich package is installed and \code{meatCL} if not.
MatchIt/man/mahalanobis_dist.Rd 0000644 0001762 0000144 00000020611 14415704563 016157 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dist_functions.R
\name{mahalanobis_dist}
\alias{mahalanobis_dist}
\alias{euclidean_dist}
\alias{scaled_euclidean_dist}
\alias{robust_mahalanobis_dist}
\title{Compute a Distance Matrix}
\usage{
mahalanobis_dist(
formula = NULL,
data = NULL,
s.weights = NULL,
var = NULL,
discarded = NULL,
...
)
scaled_euclidean_dist(
formula = NULL,
data = NULL,
s.weights = NULL,
var = NULL,
discarded = NULL,
...
)
robust_mahalanobis_dist(
formula = NULL,
data = NULL,
s.weights = NULL,
discarded = NULL,
...
)
euclidean_dist(formula = NULL, data = NULL, ...)
}
\arguments{
\item{formula}{a formula with the treatment (i.e., splitting variable) on
the left side and the covariates used to compute the distance matrix on the
right side. If there is no left-hand-side variable, the distances will be
computed between all pairs of units. If \code{NULL}, all the variables in
\code{data} will be used as covariates.}
\item{data}{a data frame containing the variables named in \code{formula}.
If \code{formula} is \code{NULL}, all variables in \code{data} will be used
as covariates.}
\item{s.weights}{when \code{var = NULL}, an optional vector of sampling
weights used to compute the variances used in the Mahalanobis, scaled
Euclidean, and robust Mahalanobis distances.}
\item{var}{for \code{mahalanobis_dist()}, a covariance matrix used to scale
the covariates. For \code{scaled_euclidean_dist()}, either a covariance
matrix (from which only the diagonal elements will be used) or a vector of
variances used to scale the covariates. If \code{NULL}, these values will be
calculated using formulas described in Details.}
\item{discarded}{a \code{logical} vector denoting which units are to be
discarded or not. This is used only when \code{var = NULL}. The scaling
factors will be computed only using the non-discarded units, but the
distance matrix will be computed for all units (discarded and
non-discarded).}
\item{\dots}{ignored. Included to make cycling through these functions
easier without having to change the arguments supplied.}
}
\value{
A numeric distance matrix. When \code{formula} has a left-hand-side
(treatment) variable, the matrix will have one row for each treated unit and
one column for each control unit. Otherwise, the matrix will have one row
and one column for each unit.
}
\description{
The functions compute a distance matrix, either for a single dataset (i.e.,
the distances between all pairs of units) or for two groups defined by a
splitting variable (i.e., the distances between all units in one group and
all units in the other). These distance matrices include the Mahalanobis
distance, Euclidean distance, scaled Euclidean distance, and robust
(rank-based) Mahalanobis distance. These functions can be used as inputs to
the \code{distance} argument to \code{\link[=matchit]{matchit()}} and are used to compute the
corresponding distance matrices within \code{matchit()} when named.
}
\details{
The \strong{Euclidean distance} (computed using \code{euclidean_dist()}) is
the raw distance between units, computed as \deqn{d_{ij} = \sqrt{(x_i -
x_j)(x_i - x_j)'}} where \eqn{x_i} and \eqn{x_j} are vectors of covariates
for units \eqn{i} and \eqn{j}, respectively. The Euclidean distance is
sensitive to the scales of the variables and their redundancy (i.e.,
correlation). It should probably not be used for matching unless all of the
variables have been previously scaled appropriately or are already on the
same scale. It forms the basis of the other distance measures.
The \strong{scaled Euclidean distance} (computed using
\code{scaled_euclidean_dist()}) is the Euclidean distance computed on the
scaled covariates. Typically the covariates are scaled by dividing by their
standard deviations, but any scaling factor can be supplied using the
\code{var} argument. This leads to a distance measure computed as
\deqn{d_{ij} = \sqrt{(x_i - x_j)S_d^{-1}(x_i - x_j)'}} where \eqn{S_d} is a
diagonal matrix with the squared scaling factors on the diagonal. Although
this measure is not sensitive to the scales of the variables (because they
are all placed on the same scale), it is still sensitive to redundancy among
the variables. For example, if 5 variables measure approximately the same
construct (i.e., are highly correlated) and 1 variable measures another
construct, the first construct will have 5 times as much influence on the
distance between units as the second construct. The Mahalanobis distance
attempts to address this issue.
The \strong{Mahalanobis distance} (computed using \code{mahalanobis_dist()})
is computed as \deqn{d_{ij} = \sqrt{(x_i - x_j)S^{-1}(x_i - x_j)'}} where
\eqn{S} is a scaling matrix, typically the covariance matrix of the
covariates. It is essentially equivalent to the Euclidean distance computed
on the scaled principal components of the covariates. This is the most
popular distance matrix for matching because it is not sensitive to the
scale of the covariates and accounts for redundancy between them. The
scaling matrix can also be supplied using the \code{var} argument.
The Mahalanobis distance can be sensitive to outliers and long-tailed or
otherwise non-normally distributed covariates and may not perform well with
categorical variables due to prioritizing rare categories over common ones.
One solution is the rank-based \strong{robust Mahalanobis distance}
(computed using \code{robust_mahalanobis_dist()}), which is computed by
first replacing the covariates with their ranks (using average ranks for
ties) and rescaling each ranked covariate by a constant scaling factor
before computing the usual Mahalanobis distance on the rescaled ranks.
The Mahalanobis distance and its robust variant are computed internally by
transforming the covariates in such a way that the Euclidean distance
computed on the scaled covariates is equal to the requested distance. For
the Mahalanobis distance, this involves replacing the covariates vector
\eqn{x_i} with \eqn{x_iS^{-.5}}, where \eqn{S^{-.5}} is the Cholesky
decomposition of the (generalized) inverse of the covariance matrix \eqn{S}.
When a left-hand-side splitting variable is present in \code{formula} and
\code{var = NULL} (i.e., so that the scaling matrix is computed internally),
the covariance matrix used is the "pooled" covariance matrix, which
essentially is a weighted average of the covariance matrices computed
separately within each level of the splitting variable to capture
within-group variation and reduce sensitivity to covariate imbalance. This
is also true of the scaling factors used in the scaled Euclidean distance.
}
\examples{
data("lalonde")
# Computing the scaled Euclidean distance between all units:
d <- scaled_euclidean_dist(~ age + educ + race + married,
data = lalonde)
# Another interface using the data argument:
dat <- subset(lalonde, select = c(age, educ, race, married))
d <- scaled_euclidean_dist(data = dat)
# Computing the Mahalanobis distance between treated and
# control units:
d <- mahalanobis_dist(treat ~ age + educ + race + married,
data = lalonde)
# Supplying a covariance matrix or vector of variances (note:
# a bit more complicated with factor variables)
dat <- subset(lalonde, select = c(age, educ, married, re74))
vars <- sapply(dat, var)
d <- scaled_euclidean_dist(data = dat, var = vars)
# Same result:
d <- scaled_euclidean_dist(data = dat, var = diag(vars))
# Discard units:
discard <- sample(c(TRUE, FALSE), nrow(lalonde),
replace = TRUE, prob = c(.2, .8))
d <- mahalanobis_dist(treat ~ age + educ + race + married,
data = lalonde, discarded = discard)
dim(d) #all units present in distance matrix
table(lalonde$treat)
}
\references{
Rosenbaum, P. R. (2010). \emph{Design of observational studies}.
Springer.
Rosenbaum, P. R., & Rubin, D. B. (1985). Constructing a Control Group Using
Multivariate Matched Sampling Methods That Incorporate the Propensity Score.
\emph{The American Statistician}, 39(1), 33–38. \doi{10.2307/2683903}
Rubin, D. B. (1980). Bias Reduction Using Mahalanobis-Metric Matching.
\emph{Biometrics}, 36(2), 293–298. \doi{10.2307/2529981}
}
\seealso{
\code{\link{distance}}, \code{\link[=matchit]{matchit()}}, \code{\link[=dist]{dist()}} (which is used
internally to compute Euclidean distances)
\pkgfun{optmatch}{match_on}, which provides similar functionality but with fewer
options and a focus on efficient storage of the output.
}
\author{
Noah Greifer
}
MatchIt/man/lalonde.Rd 0000644 0001762 0000144 00000003500 14334100747 014264 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lalonde.R
\docType{data}
\name{lalonde}
\alias{lalonde}
\title{Data from National Supported Work Demonstration and PSID, as analyzed by
Dehejia and Wahba (1999).}
\format{
A data frame with 614 observations (185 treated, 429 control).
There are 9 variables measured for each individual.
\itemize{
\item "treat"
is the treatment assignment (1=treated, 0=control).
\item "age" is age in years.
\item "educ" is education in number of years of schooling.
\item "race" is the individual's race/ethnicity, (Black, Hispanic, or White). Note
previous versions of this dataset used indicator variables \code{black} and
\code{hispan} instead of a single race variable.
\item "married" is an
indicator for married (1=married, 0=not married).
\item "nodegree" is an
indicator for whether the individual has a high school degree (1=no degree,
0=degree).
\item "re74" is income in 1974, in U.S. dollars.
\item "re75" is
income in 1975, in U.S. dollars.
\item "re78" is income in 1978, in U.S.
dollars. }
"treat" is the treatment variable, "re78" is the outcome, and the
others are pre-treatment covariates.
}
\description{
This is a subsample of the data from the treated group in the National
Supported Work Demonstration (NSW) and the comparison sample from the
Population Survey of Income Dynamics (PSID). This data was previously
analyzed extensively by Lalonde (1986) and Dehejia and Wahba (1999).
}
\references{
Lalonde, R. (1986). Evaluating the econometric evaluations of
training programs with experimental data. \emph{American Economic Review} 76:
604-620.
Dehejia, R.H. and Wahba, S. (1999). Causal Effects in Nonexperimental
Studies: Re-Evaluating the Evaluation of Training Programs. \emph{Journal of the
American Statistical Association} 94: 1053-1062.
}
\keyword{datasets}
MatchIt/man/figures/ 0000755 0001762 0000144 00000000000 14463003300 014013 5 ustar ligges users MatchIt/man/figures/logo.png 0000644 0001762 0000144 00001144776 14235232564 015522 0 ustar ligges users PNG
IHDR qy pHYs .# .#x?v OiTXtXML:com.adobe.xmp UIDATxwwA9LI#$: &APEXnQw]+*VDA!$!!Oʤg&u2{~a|gg4L `wa `2jz߿ë##VUTTw]{z|Piauj{Yb :;g- ؗ}}ggg>{ > }7=)Չq1MOcG~X6G~Zjz
յ;k{wjG (w f|::]Ww?`>P~%}Z_]R]Z_]\=t쾽Z٥ 0暞DgWgV6=}窓_tDMG4oͭ 1p `oXή^X}Yw;W=zf=MU>P]Ym^
_$w &5MOiYj>>3z_tv/V5o =; {To_ڃcמּOMOw =; {q_uX[S-;ëoyTjS