diff --git a/data/sampledata_commentsexclusion.csv b/data/sampledata_commentsexclusion.csv new file mode 100644 index 0000000..f1e2a56 --- /dev/null +++ b/data/sampledata_commentsexclusion.csv @@ -0,0 +1,200 @@ +participantid,ip_address,variable1,variable2,variable3,variable4,variable5,variable6,demographicscateg,condition12,condition1234,comments,completion_timeseconds,exclude_completion,exclude_time,variance,exclude_variance +103,73.173.1,4,4,6,5,5,6,2,1,3,,527,0,0,0.8,0 +109,97.102.1,1,4,4,4,5,6,2,2,3,,287,0,0,2.8,0 +118,108.30.2,3,6,6,2,3,2,1,1,1,,391,0,0,3.4666666666666672,0 +121,75.128.9,2,5,5,2,1,1,2,1,2,,377,0,0,3.4666666666666672,0 +122,76.185.1,6,2,3,5,6,5,2,1,4,,359,0,0,2.7,0 +126,47.208.6,3,6,6,6,7,6,2,2,1,,595,0,0,1.8666666666666665,0 +128,73.214.8,2,2,3,5,6,7,1,1,1,,296,0,0,4.566666666666667,0 +129,24.167.1,1,4,4,1,1,3,2,2,3,,466,0,0,2.2666666666666666,0 +132,24.46.11,2,4,4,3,4,3,2,2,3,,296,0,0,0.6666666666666666,0 +133,72.187.1,2,3,3,3,2,4,1,1,3,,550,0,0,0.5666666666666667,0 +134,108.30.2,1,5,5,4,5,5,1,1,1,,514,0,0,2.5666666666666673,0 +136,71.206.8,7,2,2,5,7,6,1,2,1,,358,0,0,5.366666666666667,0 +137,76.210.1,2,6,6,6,6,5,1,2,1,,435,0,0,2.5666666666666673,0 +138,76.23.61,1,5,5,5,7,6,1,1,1,,418,0,0,4.166666666666667,0 +144,70.182.9,1,5,5,6,4,7,1,1,4,,571,0,0,4.266666666666667,0 +145,71.181.2,3,6,7,3,3,2,1,1,2,,382,0,0,4,0 +147,107.210.,3,6,6,4,1,2,2,2,2,,160,0,0,4.2666666666666675,0 +148,70.182.9,6,2,2,3,3,1,1,2,2,,226,0,0,2.9666666666666672,0 +149,100.15.1,2,2,3,2,1,3,1,2,1,none,533,0,0,0.5666666666666667,0 +160,49.205.1,1,5,5,4,6,4,1,2,3,,406,0,0,2.9666666666666672,0 +161,142.196.,7,2,2,4,4,5,2,1,1,,541,0,0,3.6,0 +171,108.215.,6,2,2,3,2,4,1,1,3,,226,0,0,2.566666666666667,0 +175,67.249.8,2,6,6,7,5,6,2,1,4,,170,0,0,3.0666666666666664,0 +183,75.12.93,3,6,6,6,4,7,2,1,4,,401,0,0,2.2666666666666666,0 +186,64.237.2,3,6,7,7,5,6,2,1,4,exclude me!,463,0,0,2.2666666666666666,0 +187,71.63.20,2,4,4,7,7,6,1,2,3,,213,0,0,4,0 +194,47.156.2,7,2,2,1,3,2,2,2,2,,516,0,0,4.566666666666666,0 +195,24.1.163,7,2,2,3,2,1,2,2,4,,557,0,0,4.566666666666666,0 +203,24.247.1,3,6,6,5,6,5,1,1,4,I did your survey twice! Oops!,248,0,0,1.3666666666666665,0 +207,68.63.16,6,2,3,7,4,5,2,2,4,none,289,0,0,3.5,0 +209,24.31.24,4,3,4,3,2,1,2,2,3,,563,0,0,1.3666666666666667,0 +210,74.196.6,3,3,3,5,6,5,2,2,1,,214,0,0,1.7666666666666668,0 +214,69.247.7,4,3,3,3,2,4,1,1,3,,454,0,0,0.5666666666666667,0 +217,71.192.8,3,3,3,7,6,7,2,2,3,,192,0,0,4.166666666666666,0 +227,163.238.,2,6,6,3,4,3,1,1,4,,332,0,0,2.8,0 +232,68.184.4,3,6,6,1,3,2,2,2,1,,457,0,0,4.3,0 +233,99.136.2,7,2,2,1,3,4,1,2,4,,191,0,0,4.566666666666667,0 +235,162.248.,1,4,4,1,3,3,1,2,2,,212,0,0,1.8666666666666665,0 +240,174.60.7,4,5,6,4,3,3,1,2,1,,541,0,0,1.3666666666666667,0 +248,73.149.2,3,3,3,7,7,6,1,2,3,thanks,265,0,0,4.166666666666666,0 +252,47.17.18,3,6,5,7,5,6,1,1,3,,254,0,0,1.8666666666666665,0 +261,24.34.32,2,6,6,3,4,3,1,2,1,,158,0,0,2.8,0 +268,76.99.15,2,6,6,3,3,5,1,2,3,great survey,181,0,0,2.966666666666667,0 +273,173.69.6,1,5,5,3,3,2,2,2,2,,463,0,0,2.5666666666666673,0 +280,72.185.1,3,6,6,2,3,2,2,2,1,,421,0,0,3.4666666666666672,0 +287,47.202.1,5,6,6,2,1,3,2,2,2,,377,0,0,4.566666666666667,0 +294,162.231.,2,5,5,5,5,5,2,2,1,,528,0,0,1.5,0 +295,74.70.19,4,3,3,5,7,4,2,1,3,,30,0,1,2.2666666666666666,0 +296,114.142.,4,3,2,4,3,5,1,2,2,,201,0,0,1.1,0 +298,75.39.5.,7,2,2,5,6,7,1,2,2,,597,0,0,5.366666666666667,0 +306,76.92.35,2,5,5,2,2,2,1,2,1,,191,0,0,2.4,0 +315,68.44.12,4,5,6,6,7,6,2,1,4,I am a bot.,303,0,0,1.0666666666666664,0 +318,71.172.3,1,4,4,3,3,4,1,2,4,,576,0,0,1.3666666666666667,0 +323,66.66.17,2,6,6,3,3,4,1,1,1,,382,0,0,2.8,0 +324,73.61.25,2,5,5,1,2,3,2,2,3,,460,0,0,2.8,0 +326,108.208.,6,2,2,2,2,3,2,2,3,,359,0,0,2.566666666666667,0 +330,67.242.1,3,3,3,7,4,6,2,1,4,,477,0,0,3.0666666666666664,0 +331,96.27.11,1,4,3,5,4,6,1,1,1,,437,0,0,2.9666666666666672,0 +332,173.91.8,6,2,2,7,6,5,1,2,1,,407,0,0,4.666666666666666,0 +339,184.91.9,2,5,5,6,5,6,2,1,1,,375,0,0,2.1666666666666674,0 +340,100.2.42,3,6,6,3,3,5,1,2,4,,163,0,0,2.2666666666666666,0 +341,172.91.8,3,3,3,3,2,1,2,1,2,,501,0,0,0.7,0 +342,75.88.10,3,3,3,5,3,6,2,1,1,,321,0,0,1.7666666666666664,0 +347,162.199.,2,6,6,2,2,1,1,1,4,,572,0,0,4.966666666666667,0 +349,162.195.,6,2,2,3,2,1,2,2,2,ta!,225,0,0,3.066666666666667,0 +360,32.208.8,4,3,3,2,4,3,1,2,2,,299,0,0,0.5666666666666667,0 +362,50.97.23,2,5,5,7,4,5,1,1,3,,305,0,0,2.6666666666666665,0 +366,107.77.1,1,4,4,2,1,4,1,1,2,,553,0,0,2.2666666666666666,0 +378,184.13.6,4,4,4,4,4,4,4,4,4,,470,0,0,0,1 +382,74.217.9,1,4,5,6,5,5,2,2,1,,466,0,0,3.0666666666666664,0 +386,73.88.71,2,2,3,1,1,3,2,2,2,,518,0,0,0.8,0 +391,98.127.1,1,4,3,6,3,6,1,1,2,,308,0,0,3.766666666666667,0 +394,108.88.5,4,5,6,5,6,7,1,2,2,,464,0,0,1.1,0 +398,166.181.,2,3,3,1,1,3,2,2,2,,537,0,0,0.9666666666666666,0 +399,67.80.97,2,5,5,4,3,2,1,2,4,,499,0,0,1.9,0 +400,73.39.22,6,6,6,6,6,6,6,6,6,,212,0,0,0,1 +401,98.244.1,6,2,2,1,3,1,2,2,3,,55,0,1,3.5,0 +402,67.48.16,3,6,5,2,4,2,2,2,1,,232,0,0,2.6666666666666665,0 +422,73.38.16,7,2,2,2,3,6,2,1,1,,468,0,0,5.066666666666667,0 +425,173.61.1,2,6,6,4,5,4,2,2,4,,477,0,0,2.3,0 +426,104.1.23,2,6,6,3,4,3,2,2,1,,481,0,0,2.8,0 +427,172.76.6,6,2,2,3,2,1,1,1,2,,62,0,1,3.066666666666667,0 +428,71.237.1,3,3,3,3,3,3,3,3,3,,315,0,0,0,1 +429,68.132.1,1,5,5,4,6,4,1,1,1,,593,0,0,2.9666666666666672,0 +433,73.91.43,3,6,6,3,4,3,2,2,3,,307,0,0,2.166666666666667,0 +436,107.242.,2,6,6,3,5,4,1,1,4,,519,0,0,2.6666666666666665,0 +447,162.202.,2,4,4,3,2,2,2,1,2,,429,0,0,0.9666666666666666,0 +448,76.186.1,2,4,4,2,1,4,2,1,3,,338,0,0,1.7666666666666664,0 +449,108.200.,2,4,4,3,2,4,1,2,3,,163,0,0,0.9666666666666666,0 +453,174.20.5,3,6,6,3,3,5,1,1,4,,247,0,0,2.2666666666666666,0 +454,96.246.1,7,2,2,3,2,1,1,2,2,,358,0,0,4.566666666666666,0 +464,67.167.4,2,6,6,2,4,2,1,2,2,,336,0,0,3.866666666666667,0 +470,67.161.1,2,4,4,1,1,3,1,2,1,,403,0,0,1.9,0 +474,74.130.2,6,2,2,3,2,1,2,1,2,,277,0,0,3.066666666666667,0 +480,162.200.,4,5,6,7,6,5,2,2,4,,360,0,0,1.1,0 +483,99.52.99,2,6,6,4,4,4,2,2,1,,314,0,0,2.2666666666666666,0 +488,76.31.20,2,5,5,4,4,3,2,2,2,,356,0,0,1.3666666666666667,0 +498,69.180.2,4,3,4,7,6,7,1,2,4,great survey,545,0,0,2.9666666666666663,0 +504,99.61.18,4,2,2,7,6,7,2,2,4,thanks,399,0,0,5.466666666666667,0 +510,66.69.37,2,3,3,5,6,7,2,2,2,,406,0,0,3.8666666666666663,0 +513,73.19.18,3,3,3,5,4,4,2,1,4,,485,0,0,0.6666666666666666,0 +530,75.18.18,1,5,4,2,2,3,2,1,4,,476,0,0,2.166666666666667,0 +531,173.211.,4,5,6,2,1,2,1,2,3,,547,0,0,3.866666666666667,0 +534,75.70.28,2,5,5,1,2,3,1,2,2,,208,0,0,2.8,0 +535,201.254.,1,4,3,4,5,4,1,2,3,,318,0,0,1.9,0 +550,73.225.1,6,2,2,4,5,6,2,1,3,,590,0,0,3.3666666666666663,0 +554,50.4.32.,1,4,4,4,5,5,2,1,1,,322,0,0,2.166666666666667,0 +564,70.95.39,1,4,4,2,3,3,2,1,2,,556,0,0,1.3666666666666667,0 +566,170.83.2,3,6,6,4,5,3,1,1,2,,466,0,0,1.9,0 +568,98.214.1,4,2,2,4,5,4,1,2,2,good survey,227,0,0,1.5,0 +569,66.168.2,7,2,2,1,3,1,2,1,1,,565,0,0,5.066666666666667,0 +573,108.171.,3,6,6,2,2,2,2,1,3,,325,0,0,3.9,0 +582,174.63.2,2,3,3,3,2,1,1,1,4,,308,0,0,0.6666666666666666,0 +590,67.168.1,2,6,6,7,5,7,2,1,3,,543,0,0,3.5,0 +594,172.78.5,7,2,2,3,3,2,2,2,1,,527,0,0,3.766666666666667,0 +597,73.111.1,2,5,5,1,4,3,1,2,3,,210,0,0,2.666666666666667,0 +608,173.25.2,6,2,3,7,6,7,2,1,1,,236,0,0,4.566666666666667,0 +609,99.116.1,2,4,4,4,6,7,2,1,2,,275,0,0,3.1,0 +616,174.63.2,1,5,5,2,2,2,1,2,2,,233,0,0,2.966666666666667,0 +622,67.168.1,1,5,5,1,3,1,2,1,3,,536,0,0,3.866666666666667,0 +637,172.78.5,4,3,3,2,2,4,1,1,2,,261,0,0,0.8,0 +642,73.111.1,1,4,5,4,6,5,2,2,2,,540,0,0,2.9666666666666677,0 +648,173.25.2,1,5,4,1,2,1,2,2,1,,543,0,0,3.066666666666667,0 +653,99.116.1,2,3,3,2,3,4,2,2,4,,248,0,0,0.5666666666666667,0 +666,73.219.2,4,3,3,1,2,1,2,2,3,,582,0,0,1.4666666666666668,0 +667,73.120.3,1,4,4,3,5,2,2,2,4,,199,0,0,2.1666666666666665,0 +669,157.50.3,4,3,2,6,6,5,2,1,3,,396,0,0,2.6666666666666665,0 +677,24.0.80.,2,2,3,2,5,4,1,1,3,n/a,387,0,0,1.6,0 +680,115.42.1,3,6,6,3,1,3,1,1,2,,500,0,0,3.866666666666667,0 +683,76.113.9,1,4,4,3,2,2,2,2,4,,364,0,0,1.466666666666667,0 +686,172.58.1,3,3,3,3,2,5,1,2,4,,199,0,0,0.9666666666666666,0 +699,65.78.86,2,2,3,5,6,5,2,1,1,,158,0,0,2.9666666666666663,0 +706,172.1.22,4,5,4,7,4,6,1,2,4,,247,0,0,1.6,0 +707,208.38.2,1,5,5,7,5,7,2,1,2,,589,0,0,4.8,0 +708,104.34.8,2,4,4,1,2,4,2,2,4,none,229,0,0,1.7666666666666664,0 +714,71.224.6,4,2,2,6,4,7,1,2,2,,276,0,0,4.166666666666667,0 +716,107.77.2,2,4,4,6,7,6,1,1,3,n/a,464,0,0,3.366666666666667,0 +723,71.237.1,6,2,2,6,4,5,2,1,2,,266,0,0,3.3666666666666663,0 +726,76.237.9,1,5,5,7,5,7,2,2,3,,176,0,0,4.8,0 +729,73.164.2,4,3,3,4,6,3,2,1,4,,178,0,0,1.3666666666666667,0 +730,72.183.1,3,6,7,1,2,3,1,2,3,,571,0,0,5.466666666666667,0 +731,189.223.,6,2,1,4,4,4,2,1,2,,375,0,0,3.1,0 +733,71.92.20,1,4,4,2,3,3,1,2,2,,382,0,0,1.3666666666666667,0 +735,142.91.2,1,5,5,3,3,2,1,2,3,,314,0,0,2.5666666666666673,0 +741,73.90.69,6,2,2,7,5,6,2,1,2,,203,0,0,4.666666666666666,0 +743,66.87.12,2,6,6,4,3,2,1,1,1,suss suss suss,537,0,0,3.366666666666667,0 +752,71.93.21,4,3,4,4,5,4,1,1,1,,521,0,0,0.4,0 +758,108.176.,3,3,3,1,2,1,2,2,2,,565,0,0,0.9666666666666666,0 +771,50.24.25,3,3,3,1,3,5,1,2,4,,511,0,0,1.6,0 +772,69.201.7,6,2,2,6,4,5,1,1,2,,467,0,0,3.3666666666666663,0 +778,108.187.,4,3,2,4,6,5,2,2,2,,539,0,0,2,0 +779,47.203.3,1,4,3,1,2,3,1,2,2,,218,0,0,1.466666666666667,0 +788,73.27.11,1,5,5,1,2,1,2,1,1,,200,0,0,3.9,0 +805,173.167.,2,5,5,2,2,2,2,1,1,,161,0,0,2.4,0 +806,173.20.6,2,3,3,3,4,4,2,2,2,,165,0,0,0.5666666666666667,0 +807,24.27.22,2,3,3,7,7,7,1,1,2,,449,0,0,5.7666666666666675,0 +808,47.219.1,2,5,5,2,2,2,1,2,4,,335,0,0,2.4,0 +809,73.210.1,3,3,3,5,7,4,2,1,4,,384,0,0,2.5666666666666673,0 +811,47.135.3,2,5,5,4,5,4,2,1,3,,483,0,0,1.3666666666666665,0 +812,24.74.28,1,4,5,7,4,5,1,2,1,,468,0,0,3.866666666666667,0 +814,108.6.17,6,2,2,2,1,6,2,2,2,,170,0,0,4.966666666666667,0 +819,66.66.3.,2,5,5,5,5,6,1,2,2,none,415,0,0,1.8666666666666665,0 +826,107.77.2,2,6,6,4,5,3,1,2,3,,469,0,0,2.6666666666666665,0 +831,97.88.10,3,3,3,2,2,3,2,2,2,thanks,185,0,0,0.26666666666666666,0 +832,174.107.,3,6,7,6,7,6,2,2,2,,229,0,0,2.1666666666666674,0 +841,172.58.1,2,6,6,4,3,4,2,1,1,,228,0,0,2.566666666666667,0 +871,99.169.1,3,6,7,7,5,7,2,2,4,I am a bot.,567,0,0,2.5666666666666673,0 +874,174.71.4,2,5,5,7,5,7,1,2,4,,420,0,0,3.366666666666667,0 +881,96.32.17,6,2,2,1,3,1,1,2,1,,440,0,0,3.5,0 +888,75.166.1,1,4,4,5,6,7,2,2,1,,208,0,0,4.3,0 +891,24.243.1,3,6,6,3,5,4,2,2,2,,391,0,0,1.9,0 +892,98.201.1,2,6,6,7,5,6,2,2,4,,231,0,0,3.0666666666666664,0 +893,45.22.77,2,4,4,7,7,7,2,1,4,,464,0,0,4.566666666666667,0 +903,174.20.1,2,4,4,5,5,6,2,1,1,,545,0,0,1.8666666666666665,0 +904,69.10.11,2,5,5,7,4,6,1,1,1,,471,0,0,2.9666666666666672,0 +906,24.209.5,1,4,4,6,4,5,2,2,4,,177,0,0,2.8,0 +913,24.233.1,4,3,4,3,4,4,1,2,2,,578,0,0,0.26666666666666666,0 +922,68.180.9,2,6,6,7,4,5,1,2,4,,311,0,0,3.2,0 +923,131.191.,4,5,6,2,1,4,2,1,1,,497,0,0,3.4666666666666663,0 +925,23.240.1,3,6,5,2,4,1,1,2,1,,212,0,0,3.5,0 +934,98.236.2,6,2,2,6,5,6,2,2,1,,595,0,0,3.9,0 +954,70.187.8,4,4,4,4,4,4,4,4,4,,281,0,0,0,1 +955,73.67.42,2,3,3,4,5,3,1,2,2,,300,0,0,1.0666666666666667,0 +958,73.129.4,2,6,6,3,2,4,1,2,4,n/a,238,0,0,3.366666666666667,0 +962,73.147.8,4,5,6,3,1,2,2,2,4,none,282,0,0,3.5,0 +964,199.46.1,3,6,6,2,4,2,2,2,1,Bot bot bot.,544,0,0,3.3666666666666663,0 +965,162.226.,3,6,6,5,6,5,1,2,4,,412,0,0,1.3666666666666665,0 +970,14.138.2,2,3,3,1,1,3,2,2,4,,291,0,0,0.9666666666666666,0 +971,65.41.97,4,3,4,3,3,4,2,2,4,,305,0,0,0.3,0 +975,68.109.2,1,4,4,1,3,1,2,1,3,,182,0,0,2.2666666666666666,0 +978,68.5.68.,1,5,5,4,4,3,2,2,3,,484,0,0,2.2666666666666666,0 +979,173.21.1,1,4,4,5,3,4,2,2,3,,562,0,0,1.9,0 +982,108.84.1,1,4,4,4,6,6,2,2,2,this is a suspicious response,505,0,0,3.366666666666667,0 +986,174.16.6,3,6,6,3,5,4,1,2,3,,384,0,0,1.9,0 +987,68.235.1,2,4,4,3,2,3,1,2,4,,535,0,0,0.8,0 +992,73.119.1,3,3,3,5,5,5,2,2,2,,386,0,0,1.2,0 +993,107.12.1,6,2,2,NA,NA,NA,NA,2,3,,513,1,0,NA,0 +995,73.191.5,2,6,6,NA,NA,NA,NA,1,3,,252,1,0,NA,0 +996,73.222.7,2,5,5,NA,NA,NA,NA,1,4,,257,1,0,NA,0 diff --git a/data/sampledata_commentsexclusionmarked.csv b/data/sampledata_commentsexclusionmarked.csv new file mode 100644 index 0000000..f43bec1 --- /dev/null +++ b/data/sampledata_commentsexclusionmarked.csv @@ -0,0 +1,200 @@ +participantid,ip_address,variable1,variable2,variable3,variable4,variable5,variable6,demographicscateg,condition12,condition1234,comments,completion_timeseconds,exclude_completion,exclude_time,variance,exclude_variance,exclude_comments,exclude_commentsnotes +964,199.46.1,3,6,6,2,4,2,2,2,1,Bot bot bot.,544,0,0,3.366666667,0,1,suspected bot +186,64.237.2,3,6,7,7,5,6,2,1,4,exclude me!,463,0,0,2.266666667,0,1, +568,98.214.1,4,2,2,4,5,4,1,2,2,good survey,227,0,0,1.5,0,0, +268,76.99.15,2,6,6,3,3,5,1,2,3,great survey,181,0,0,2.966666667,0,0, +498,69.180.2,4,3,4,7,6,7,1,2,4,great survey,545,0,0,2.966666667,0,0, +315,68.44.12,4,5,6,6,7,6,2,1,4,I am a bot.,303,0,0,1.066666667,0,1,suspected bot +871,99.169.1,3,6,7,7,5,7,2,2,4,I am a bot.,567,0,0,2.566666667,0,1,suspected bot +203,24.247.1,3,6,6,5,6,5,1,1,4,I did your survey twice! Oops!,248,0,0,1.366666667,0,1,repeat completion +677,24.0.80.,2,2,3,2,5,4,1,1,3,n/a,387,0,0,1.6,0,0, +716,107.77.2,2,4,4,6,7,6,1,1,3,n/a,464,0,0,3.366666667,0,0, +958,73.129.4,2,6,6,3,2,4,1,2,4,n/a,238,0,0,3.366666667,0,0, +149,100.15.1,2,2,3,2,1,3,1,2,1,none,533,0,0,0.566666667,0,0, +207,68.63.16,6,2,3,7,4,5,2,2,4,none,289,0,0,3.5,0,0, +708,104.34.8,2,4,4,1,2,4,2,2,4,none,229,0,0,1.766666667,0,0, +819,66.66.3.,2,5,5,5,5,6,1,2,2,none,415,0,0,1.866666667,0,0, +962,73.147.8,4,5,6,3,1,2,2,2,4,none,282,0,0,3.5,0,0, +743,66.87.12,2,6,6,4,3,2,1,1,1,suss suss suss,537,0,0,3.366666667,0,1,suspected bot +349,162.195.,6,2,2,3,2,1,2,2,2,ta!,225,0,0,3.066666667,0,0, +248,73.149.2,3,3,3,7,7,6,1,2,3,thanks,265,0,0,4.166666667,0,0, +504,99.61.18,4,2,2,7,6,7,2,2,4,thanks,399,0,0,5.466666667,0,0, +831,97.88.10,3,3,3,2,2,3,2,2,2,thanks,185,0,0,0.266666667,0,0, +982,108.84.1,1,4,4,4,6,6,2,2,2,this is a suspicious response,505,0,0,3.366666667,0,1,suspected bot +103,73.173.1,4,4,6,5,5,6,2,1,3,,527,0,0,0.8,0,0, +109,97.102.1,1,4,4,4,5,6,2,2,3,,287,0,0,2.8,0,0, +118,108.30.2,3,6,6,2,3,2,1,1,1,,391,0,0,3.466666667,0,0, +121,75.128.9,2,5,5,2,1,1,2,1,2,,377,0,0,3.466666667,0,0, +122,76.185.1,6,2,3,5,6,5,2,1,4,,359,0,0,2.7,0,0, +126,47.208.6,3,6,6,6,7,6,2,2,1,,595,0,0,1.866666667,0,0, +128,73.214.8,2,2,3,5,6,7,1,1,1,,296,0,0,4.566666667,0,0, +129,24.167.1,1,4,4,1,1,3,2,2,3,,466,0,0,2.266666667,0,0, +132,24.46.11,2,4,4,3,4,3,2,2,3,,296,0,0,0.666666667,0,0, +133,72.187.1,2,3,3,3,2,4,1,1,3,,550,0,0,0.566666667,0,0, +134,108.30.2,1,5,5,4,5,5,1,1,1,,514,0,0,2.566666667,0,0, +136,71.206.8,7,2,2,5,7,6,1,2,1,,358,0,0,5.366666667,0,0, +137,76.210.1,2,6,6,6,6,5,1,2,1,,435,0,0,2.566666667,0,0, +138,76.23.61,1,5,5,5,7,6,1,1,1,,418,0,0,4.166666667,0,0, +144,70.182.9,1,5,5,6,4,7,1,1,4,,571,0,0,4.266666667,0,0, +145,71.181.2,3,6,7,3,3,2,1,1,2,,382,0,0,4,0,0, +147,107.210.,3,6,6,4,1,2,2,2,2,,160,0,0,4.266666667,0,0, +148,70.182.9,6,2,2,3,3,1,1,2,2,,226,0,0,2.966666667,0,0, +160,49.205.1,1,5,5,4,6,4,1,2,3,,406,0,0,2.966666667,0,0, +161,142.196.,7,2,2,4,4,5,2,1,1,,541,0,0,3.6,0,0, +171,108.215.,6,2,2,3,2,4,1,1,3,,226,0,0,2.566666667,0,0, +175,67.249.8,2,6,6,7,5,6,2,1,4,,170,0,0,3.066666667,0,0, +183,75.12.93,3,6,6,6,4,7,2,1,4,,401,0,0,2.266666667,0,0, +187,71.63.20,2,4,4,7,7,6,1,2,3,,213,0,0,4,0,0, +194,47.156.2,7,2,2,1,3,2,2,2,2,,516,0,0,4.566666667,0,0, +195,24.1.163,7,2,2,3,2,1,2,2,4,,557,0,0,4.566666667,0,0, +209,24.31.24,4,3,4,3,2,1,2,2,3,,563,0,0,1.366666667,0,0, +210,74.196.6,3,3,3,5,6,5,2,2,1,,214,0,0,1.766666667,0,0, +214,69.247.7,4,3,3,3,2,4,1,1,3,,454,0,0,0.566666667,0,0, +217,71.192.8,3,3,3,7,6,7,2,2,3,,192,0,0,4.166666667,0,0, +227,163.238.,2,6,6,3,4,3,1,1,4,,332,0,0,2.8,0,0, +232,68.184.4,3,6,6,1,3,2,2,2,1,,457,0,0,4.3,0,0, +233,99.136.2,7,2,2,1,3,4,1,2,4,,191,0,0,4.566666667,0,0, +235,162.248.,1,4,4,1,3,3,1,2,2,,212,0,0,1.866666667,0,0, +240,174.60.7,4,5,6,4,3,3,1,2,1,,541,0,0,1.366666667,0,0, +252,47.17.18,3,6,5,7,5,6,1,1,3,,254,0,0,1.866666667,0,0, +261,24.34.32,2,6,6,3,4,3,1,2,1,,158,0,0,2.8,0,0, +273,173.69.6,1,5,5,3,3,2,2,2,2,,463,0,0,2.566666667,0,0, +280,72.185.1,3,6,6,2,3,2,2,2,1,,421,0,0,3.466666667,0,0, +287,47.202.1,5,6,6,2,1,3,2,2,2,,377,0,0,4.566666667,0,0, +294,162.231.,2,5,5,5,5,5,2,2,1,,528,0,0,1.5,0,0, +295,74.70.19,4,3,3,5,7,4,2,1,3,,30,0,1,2.266666667,0,0, +296,114.142.,4,3,2,4,3,5,1,2,2,,201,0,0,1.1,0,0, +298,75.39.5.,7,2,2,5,6,7,1,2,2,,597,0,0,5.366666667,0,0, +306,76.92.35,2,5,5,2,2,2,1,2,1,,191,0,0,2.4,0,0, +318,71.172.3,1,4,4,3,3,4,1,2,4,,576,0,0,1.366666667,0,0, +323,66.66.17,2,6,6,3,3,4,1,1,1,,382,0,0,2.8,0,0, +324,73.61.25,2,5,5,1,2,3,2,2,3,,460,0,0,2.8,0,0, +326,108.208.,6,2,2,2,2,3,2,2,3,,359,0,0,2.566666667,0,0, +330,67.242.1,3,3,3,7,4,6,2,1,4,,477,0,0,3.066666667,0,0, +331,96.27.11,1,4,3,5,4,6,1,1,1,,437,0,0,2.966666667,0,0, +332,173.91.8,6,2,2,7,6,5,1,2,1,,407,0,0,4.666666667,0,0, +339,184.91.9,2,5,5,6,5,6,2,1,1,,375,0,0,2.166666667,0,0, +340,100.2.42,3,6,6,3,3,5,1,2,4,,163,0,0,2.266666667,0,0, +341,172.91.8,3,3,3,3,2,1,2,1,2,,501,0,0,0.7,0,0, +342,75.88.10,3,3,3,5,3,6,2,1,1,,321,0,0,1.766666667,0,0, +347,162.199.,2,6,6,2,2,1,1,1,4,,572,0,0,4.966666667,0,0, +360,32.208.8,4,3,3,2,4,3,1,2,2,,299,0,0,0.566666667,0,0, +362,50.97.23,2,5,5,7,4,5,1,1,3,,305,0,0,2.666666667,0,0, +366,107.77.1,1,4,4,2,1,4,1,1,2,,553,0,0,2.266666667,0,0, +378,184.13.6,4,4,4,4,4,4,4,4,4,,470,0,0,0,1,0, +382,74.217.9,1,4,5,6,5,5,2,2,1,,466,0,0,3.066666667,0,0, +386,73.88.71,2,2,3,1,1,3,2,2,2,,518,0,0,0.8,0,0, +391,98.127.1,1,4,3,6,3,6,1,1,2,,308,0,0,3.766666667,0,0, +394,108.88.5,4,5,6,5,6,7,1,2,2,,464,0,0,1.1,0,0, +398,166.181.,2,3,3,1,1,3,2,2,2,,537,0,0,0.966666667,0,0, +399,67.80.97,2,5,5,4,3,2,1,2,4,,499,0,0,1.9,0,0, +400,73.39.22,6,6,6,6,6,6,6,6,6,,212,0,0,0,1,0, +401,98.244.1,6,2,2,1,3,1,2,2,3,,55,0,1,3.5,0,0, +402,67.48.16,3,6,5,2,4,2,2,2,1,,232,0,0,2.666666667,0,0, +422,73.38.16,7,2,2,2,3,6,2,1,1,,468,0,0,5.066666667,0,0, +425,173.61.1,2,6,6,4,5,4,2,2,4,,477,0,0,2.3,0,0, +426,104.1.23,2,6,6,3,4,3,2,2,1,,481,0,0,2.8,0,0, +427,172.76.6,6,2,2,3,2,1,1,1,2,,62,0,1,3.066666667,0,0, +428,71.237.1,3,3,3,3,3,3,3,3,3,,315,0,0,0,1,0, +429,68.132.1,1,5,5,4,6,4,1,1,1,,593,0,0,2.966666667,0,0, +433,73.91.43,3,6,6,3,4,3,2,2,3,,307,0,0,2.166666667,0,0, +436,107.242.,2,6,6,3,5,4,1,1,4,,519,0,0,2.666666667,0,0, +447,162.202.,2,4,4,3,2,2,2,1,2,,429,0,0,0.966666667,0,0, +448,76.186.1,2,4,4,2,1,4,2,1,3,,338,0,0,1.766666667,0,0, +449,108.200.,2,4,4,3,2,4,1,2,3,,163,0,0,0.966666667,0,0, +453,174.20.5,3,6,6,3,3,5,1,1,4,,247,0,0,2.266666667,0,0, +454,96.246.1,7,2,2,3,2,1,1,2,2,,358,0,0,4.566666667,0,0, +464,67.167.4,2,6,6,2,4,2,1,2,2,,336,0,0,3.866666667,0,0, +470,67.161.1,2,4,4,1,1,3,1,2,1,,403,0,0,1.9,0,0, +474,74.130.2,6,2,2,3,2,1,2,1,2,,277,0,0,3.066666667,0,0, +480,162.200.,4,5,6,7,6,5,2,2,4,,360,0,0,1.1,0,0, +483,99.52.99,2,6,6,4,4,4,2,2,1,,314,0,0,2.266666667,0,0, +488,76.31.20,2,5,5,4,4,3,2,2,2,,356,0,0,1.366666667,0,0, +510,66.69.37,2,3,3,5,6,7,2,2,2,,406,0,0,3.866666667,0,0, +513,73.19.18,3,3,3,5,4,4,2,1,4,,485,0,0,0.666666667,0,0, +530,75.18.18,1,5,4,2,2,3,2,1,4,,476,0,0,2.166666667,0,0, +531,173.211.,4,5,6,2,1,2,1,2,3,,547,0,0,3.866666667,0,0, +534,75.70.28,2,5,5,1,2,3,1,2,2,,208,0,0,2.8,0,0, +535,201.254.,1,4,3,4,5,4,1,2,3,,318,0,0,1.9,0,0, +550,73.225.1,6,2,2,4,5,6,2,1,3,,590,0,0,3.366666667,0,0, +554,50.4.32.,1,4,4,4,5,5,2,1,1,,322,0,0,2.166666667,0,0, +564,70.95.39,1,4,4,2,3,3,2,1,2,,556,0,0,1.366666667,0,0, +566,170.83.2,3,6,6,4,5,3,1,1,2,,466,0,0,1.9,0,0, +569,66.168.2,7,2,2,1,3,1,2,1,1,,565,0,0,5.066666667,0,0, +573,108.171.,3,6,6,2,2,2,2,1,3,,325,0,0,3.9,0,0, +582,174.63.2,2,3,3,3,2,1,1,1,4,,308,0,0,0.666666667,0,0, +590,67.168.1,2,6,6,7,5,7,2,1,3,,543,0,0,3.5,0,0, +594,172.78.5,7,2,2,3,3,2,2,2,1,,527,0,0,3.766666667,0,0, +597,73.111.1,2,5,5,1,4,3,1,2,3,,210,0,0,2.666666667,0,0, +608,173.25.2,6,2,3,7,6,7,2,1,1,,236,0,0,4.566666667,0,0, +609,99.116.1,2,4,4,4,6,7,2,1,2,,275,0,0,3.1,0,0, +616,174.63.2,1,5,5,2,2,2,1,2,2,,233,0,0,2.966666667,0,0, +622,67.168.1,1,5,5,1,3,1,2,1,3,,536,0,0,3.866666667,0,0, +637,172.78.5,4,3,3,2,2,4,1,1,2,,261,0,0,0.8,0,0, +642,73.111.1,1,4,5,4,6,5,2,2,2,,540,0,0,2.966666667,0,0, +648,173.25.2,1,5,4,1,2,1,2,2,1,,543,0,0,3.066666667,0,0, +653,99.116.1,2,3,3,2,3,4,2,2,4,,248,0,0,0.566666667,0,0, +666,73.219.2,4,3,3,1,2,1,2,2,3,,582,0,0,1.466666667,0,0, +667,73.120.3,1,4,4,3,5,2,2,2,4,,199,0,0,2.166666667,0,0, +669,157.50.3,4,3,2,6,6,5,2,1,3,,396,0,0,2.666666667,0,0, +680,115.42.1,3,6,6,3,1,3,1,1,2,,500,0,0,3.866666667,0,0, +683,76.113.9,1,4,4,3,2,2,2,2,4,,364,0,0,1.466666667,0,0, +686,172.58.1,3,3,3,3,2,5,1,2,4,,199,0,0,0.966666667,0,0, +699,65.78.86,2,2,3,5,6,5,2,1,1,,158,0,0,2.966666667,0,0, +706,172.1.22,4,5,4,7,4,6,1,2,4,,247,0,0,1.6,0,0, +707,208.38.2,1,5,5,7,5,7,2,1,2,,589,0,0,4.8,0,0, +714,71.224.6,4,2,2,6,4,7,1,2,2,,276,0,0,4.166666667,0,0, +723,71.237.1,6,2,2,6,4,5,2,1,2,,266,0,0,3.366666667,0,0, +726,76.237.9,1,5,5,7,5,7,2,2,3,,176,0,0,4.8,0,0, +729,73.164.2,4,3,3,4,6,3,2,1,4,,178,0,0,1.366666667,0,0, +730,72.183.1,3,6,7,1,2,3,1,2,3,,571,0,0,5.466666667,0,0, +731,189.223.,6,2,1,4,4,4,2,1,2,,375,0,0,3.1,0,0, +733,71.92.20,1,4,4,2,3,3,1,2,2,,382,0,0,1.366666667,0,0, +735,142.91.2,1,5,5,3,3,2,1,2,3,,314,0,0,2.566666667,0,0, +741,73.90.69,6,2,2,7,5,6,2,1,2,,203,0,0,4.666666667,0,0, +752,71.93.21,4,3,4,4,5,4,1,1,1,,521,0,0,0.4,0,0, +758,108.176.,3,3,3,1,2,1,2,2,2,,565,0,0,0.966666667,0,0, +771,50.24.25,3,3,3,1,3,5,1,2,4,,511,0,0,1.6,0,0, +772,69.201.7,6,2,2,6,4,5,1,1,2,,467,0,0,3.366666667,0,0, +778,108.187.,4,3,2,4,6,5,2,2,2,,539,0,0,2,0,0, +779,47.203.3,1,4,3,1,2,3,1,2,2,,218,0,0,1.466666667,0,0, +788,73.27.11,1,5,5,1,2,1,2,1,1,,200,0,0,3.9,0,0, +805,173.167.,2,5,5,2,2,2,2,1,1,,161,0,0,2.4,0,0, +806,173.20.6,2,3,3,3,4,4,2,2,2,,165,0,0,0.566666667,0,0, +807,24.27.22,2,3,3,7,7,7,1,1,2,,449,0,0,5.766666667,0,0, +808,47.219.1,2,5,5,2,2,2,1,2,4,,335,0,0,2.4,0,0, +809,73.210.1,3,3,3,5,7,4,2,1,4,,384,0,0,2.566666667,0,0, +811,47.135.3,2,5,5,4,5,4,2,1,3,,483,0,0,1.366666667,0,0, +812,24.74.28,1,4,5,7,4,5,1,2,1,,468,0,0,3.866666667,0,0, +814,108.6.17,6,2,2,2,1,6,2,2,2,,170,0,0,4.966666667,0,0, +826,107.77.2,2,6,6,4,5,3,1,2,3,,469,0,0,2.666666667,0,0, +832,174.107.,3,6,7,6,7,6,2,2,2,,229,0,0,2.166666667,0,0, +841,172.58.1,2,6,6,4,3,4,2,1,1,,228,0,0,2.566666667,0,0, +874,174.71.4,2,5,5,7,5,7,1,2,4,,420,0,0,3.366666667,0,0, +881,96.32.17,6,2,2,1,3,1,1,2,1,,440,0,0,3.5,0,0, +888,75.166.1,1,4,4,5,6,7,2,2,1,,208,0,0,4.3,0,0, +891,24.243.1,3,6,6,3,5,4,2,2,2,,391,0,0,1.9,0,0, +892,98.201.1,2,6,6,7,5,6,2,2,4,,231,0,0,3.066666667,0,0, +893,45.22.77,2,4,4,7,7,7,2,1,4,,464,0,0,4.566666667,0,0, +903,174.20.1,2,4,4,5,5,6,2,1,1,,545,0,0,1.866666667,0,0, +904,69.10.11,2,5,5,7,4,6,1,1,1,,471,0,0,2.966666667,0,0, +906,24.209.5,1,4,4,6,4,5,2,2,4,,177,0,0,2.8,0,0, +913,24.233.1,4,3,4,3,4,4,1,2,2,,578,0,0,0.266666667,0,0, +922,68.180.9,2,6,6,7,4,5,1,2,4,,311,0,0,3.2,0,0, +923,131.191.,4,5,6,2,1,4,2,1,1,,497,0,0,3.466666667,0,0, +925,23.240.1,3,6,5,2,4,1,1,2,1,,212,0,0,3.5,0,0, +934,98.236.2,6,2,2,6,5,6,2,2,1,,595,0,0,3.9,0,0, +954,70.187.8,4,4,4,4,4,4,4,4,4,,281,0,0,0,1,0, +955,73.67.42,2,3,3,4,5,3,1,2,2,,300,0,0,1.066666667,0,0, +965,162.226.,3,6,6,5,6,5,1,2,4,,412,0,0,1.366666667,0,0, +970,14.138.2,2,3,3,1,1,3,2,2,4,,291,0,0,0.966666667,0,0, +971,65.41.97,4,3,4,3,3,4,2,2,4,,305,0,0,0.3,0,0, +975,68.109.2,1,4,4,1,3,1,2,1,3,,182,0,0,2.266666667,0,0, +978,68.5.68.,1,5,5,4,4,3,2,2,3,,484,0,0,2.266666667,0,0, +979,173.21.1,1,4,4,5,3,4,2,2,3,,562,0,0,1.9,0,0, +986,174.16.6,3,6,6,3,5,4,1,2,3,,384,0,0,1.9,0,0, +987,68.235.1,2,4,4,3,2,3,1,2,4,,535,0,0,0.8,0,0, +992,73.119.1,3,3,3,5,5,5,2,2,2,,386,0,0,1.2,0,0, +993,107.12.1,6,2,2,NA,NA,NA,NA,2,3,,513,1,0,NA,0,0, +995,73.191.5,2,6,6,NA,NA,NA,NA,1,3,,252,1,0,NA,0,0, +996,73.222.7,2,5,5,NA,NA,NA,NA,1,4,,257,1,0,NA,0,0, diff --git a/wrangle.qmd b/wrangle.qmd index 6e62f89..eee17d9 100644 --- a/wrangle.qmd +++ b/wrangle.qmd @@ -1,8 +1,49 @@ # Wrangle -## Clean names +# Packages for this chapter + + +```{r} +#| warning: false +#| message: false +library(tidyverse) +library(here) +library(janitor) +library(haven) +library(ufs) + +``` + + +Someone said that 90% of the work in data analysis is getting your data from its raw state to its analysable state. Working in R makes this process quick and reproducible. First, a quick overview of some useful wrangling functions from `janitor` and dplyr`. + + +## Renaming variables + +```{r} + +``` + +## Select columns + +```{r} + +``` + + +## Filter rows + +```{r} + +``` + + +# Compute variables + +```{r} + +``` -## Dealing with labels ## Exclusions @@ -14,10 +55,255 @@ ``` -## Creating scales and indexes -`group_by` `summarise` +## group_by and summarise + + + +## rowwise and mutate + + + + + + +# Wrangling data + +## Exclusions + +Often, we need to screen our data for potential exclusions. Below, we'll apply exclusions based on: +- Didn't complete the survey +- Very short completion time +- No variance across sets of items +- Bad/bot written responses + +Note, exclusions are usually defined in an analysis plan or preregistration and should always be checked with Lisa. + +Overall, we will create new variables for each exclusion in which a participant should be excluded if they have a value of 1. + +Then, we will apply a code that takes those folks out in a new dataframe to be carried forward into analyses. + +As you calculate each exclusion, it's important to check that the code is doing what you intend it to do. You can do this by viewing the dataframe (clicking on it in the Environment or typing `view(dataframename)` in the console). + + +## Completion + +Note that using the Progress variable from Qualtrics is tricky, as some people do the entire survey but don't click the final arrow, so don't show up as 100% complete. We typically use a definition of complete whereby participants who didn't complete the final demographics item are excluded. + +Here, we'll use the demographics_categ variable. A visual check of your data should reveal what the best approach is with your data. If in doubt, ask Lisa. + +This code takes the data_cleanednames dataframe and pipes it into a `mutate` command to make a new variable (called exclude_completion) that codes according to the following logic: if demographicscateg is missing (is.na), then code as 1, otherwise (TRUE) code as 0. It performs this command and then saves it back over the same dataframe (because we asked R to do this via `data_cleanednames <-`). + +Remember, to check that this code is doing what you expect! + +```{r} + +data_zapped <- data_zapped %>% + mutate(exclude_completion = case_when(is.na(demographicscateg) ~ 1, TRUE ~ 0)) + +``` + +## Completion Time + +Next up, short completion time. This usually defined based on pre-testing or a prior study. In the sample data, we will apply an exclusion if participants completed the study in less than 90 seconds. + +This code follows a similar format as for completion: if completion time (completion_timeseconds) is less than 90 seconds, code the new variable (exclude_time) as 1. Otherwise, 0. + +Again, check this is working! + +```{r} + +data_zapped <- data_zapped %>% + mutate(exclude_time = case_when(completion_timeseconds < 90 ~ 1, TRUE ~ 0)) + +``` + +## Variance + +Next, variance. This is to capture folks who are just responding down the rows of variables (presumably without reading them) or a 'bot'. + +`rowwise` tells R to do the following command by row, then creates a new variable via `mutate` called variance made up of the variance of the variables listed in the `c()` list. Then, the code computes another new variable that codes whether that variance value is 0 (if so mark as 1 for exclusion), otherwise 0. + +*IMPORTANT NOTE*: Every time you use `rowwise()` or `group_by()`, you need to add an ungroup() to the end. Trust me, this habit will save you a lot of headache down the line! + +Once again, check! + + +```{r} +data_zapped <- data_zapped %>% + rowwise() %>% + mutate(variance = var(c(variable1, variable2, variable3, variable4, variable5, variable6))) %>% + mutate(exclude_variance = case_when(variance == 0 ~ 1, TRUE ~ 0)) %>% + ungroup() +``` + +## Comments + +Now for comments screening. For ease, we will do this in Excel (or similar spreadsheet software). + +First, we need to write the current dataframe to a csv. + +```{r} + +write_csv(data_zapped, here("data","sampledata_commentsexclusion.csv")) + +``` + +Manually create new variables (columns) in Excel corresponding to the exclusion to be applied. Here it is one variable named `exclude_comments`. You may wish to make additional, clearly labelled columns with notes about why you made particular exclude decisions. Again, run by your exclusion coding plan with Lisa before beginning. + +Save the file *with a new name* (e.g., adding "marked" to the end). + +The code below reads that new datafile in, now with the new comments exclusion variable. + +```{r reading in data} + +data_zapped_commentsmarked <- read_csv(here("data","sampledata_commentsexclusionmarked.csv")) + +``` + +## Apply the exclusions + +Now, we will apply all the exclusions. The aim is to only keep folks who *don't* have 1s on any of the exclude variables. + +First, let's get a sense of how many people we're excluding. We could do this one by one for each exclusion using the `tabyl` commend, like this: + + +```{r} + +data_zapped_commentsmarked %>% + tabyl(exclude_completion) + +data_zapped_commentsmarked %>% + tabyl(exclude_time) + +data_zapped_commentsmarked %>% + tabyl(exclude_variance) + +data_zapped_commentsmarked %>% + tabyl(exclude_comments) + +``` + +But that won't get us a sense of whether folks have been marked for more than one exclusion. So let's make a new variable that codes this info: exclude_coded. To do that, we first make a variable that collapses together all the 0s and 1s from the individual exclusion variables. Then, we recode that collapsed variable to have the correct corresponding labels. Note that order here is really important, so if you change this up for your own purposes, ensure that you keep the variable ordering and recode naming in the right order. + +Once we have the exclusion_coded variable, we can make a table (via `tabyl`) of the reasons variable, we get a nice summary of how many people we excluded, and why! Note that the Total should correspond to the number of observations in your data, and there should be no values. + +Note - it's highly recommended to consider completion as a different type of exclusion - especially for reporting. Usually in a thesis or manuscript, you report the number of *completers* and then exclusion counts from there. If you include completion it makes it seem like the substantive 'exclusion rate' is much higher than it really is. + +The following code creates a coded exclusion variable, creates a table of the counts of those coded exclusions (and adds a total row via `adorn_totals`), and then prints it for viewing. + + +```{r} +data_completed <- data_zapped_commentsmarked %>% + filter(exclude_completion == 0) + +data_exclusions <- data_completed %>% + rowwise() %>% + mutate(exclude_collapse = paste(c(exclude_time, exclude_variance, exclude_comments), collapse = "")) %>% + mutate(exclude_coded = case_when(exclude_collapse == "100" ~ "time only", + exclude_collapse == "010" ~ "variance only", + exclude_collapse == "001" ~ "comments only", + exclude_collapse == "000" ~ "kept", + exclude_collapse == "110" ~ "time variance", + exclude_collapse == "101" ~ "time comments", + exclude_collapse == "011" ~ "variance comments", + exclude_collapse == "111" ~ "all")) + +exclusions_summary <- data_exclusions %>% + tabyl(exclude_coded) %>% + adorn_totals(c("row")) + + +print(exclusions_summary) +``` + +Now we'll create a new dataframe by piping the existing dataframe into a `mutate` command that creates a new exclude_all variable that codes a 1 if (`case_when`) any of the individual exclude variables have a value of 1, otherwise 0. Then, the code applies a filter to keep only rows where exclude_all is 0. The number of observations in the 'exclusions applied' dataframe should correspond to the number of 'kept' in the tabyl above! + +```{r } + +data_exclusionsapplied <- data_zapped_commentsmarked %>% + mutate(exclude_all = case_when(exclude_completion == 1 | exclude_time == 1 | exclude_variance == 1 | exclude_comments ~ 1, TRUE ~ 0)) %>% + filter(exclude_all == 0) + +``` + + +## A special case: removing IP duplicates + +Sometimes, when you're dealing with data that might be suspicious, we want to take out anyone who has duplicate IP addresses. Note that this is different from just removing duplicates (where the first instance or last instance might be kept) - in this case we want to remove both cases if a duplicate emerges. + +Here, we make a new variable that marks duplicate IP addresses with 'TRUE' and then a new variable based on that one that codes a 1 if the row is a duplicate and a 0 if not. You would then add this to the list of exclusion variables for your exclusion reasons tabyl and exclusions_applied dataframes. + +```{r eval=FALSE} + +ipduplicatesmarked <- data_zapped_commentsmarked %>% + group_by(ip_address) %>% + mutate(duplicate.flag = n() > 1) %>% + mutate(exclude_ip = case_when(duplicate.flag == "TRUE" ~ 1, TRUE ~ 0)) + +``` + + +# Creating scales/indexes + +Often, we don't analyse individual survey items. Instead, we create scales or indexes that reflect the means across several items. Before you create a scale or index, you will need to check that the internal reliability is sufficiently high to warrant doing that. We typically use the statistic Cronbach's alpha to do this. + +In the code below, we will check the reliability of two separate subscales in the sample data. + +`scaleStructure` ADD DESCRIPTION. + +##THIS IS NOT CURRENTLY WORKING!?!?! +##AWAITING FIX W IN UFS IF NOT FIXED USE psych::alpha + +```{r warning=FALSE} + reliability_scale1 <- scaleStructure(dat=data_exclusionsapplied, items=c('variable1', 'variable2', 'variable3'), ci=FALSE) + + print(reliability_scale1) + +``` + +Whoops! That can't be right! A negative alpha value is statistically impossible! + +This usually means you haven't reverse-scored an item that was meant to be! For instance, let's say variable_1 was "I hate ice cream." variable_2 was "I adore frozen desert treats." and variable_3 was "I love eating ice cream." We want to reverse the responses to variable_1 so that higher numbers mean more love of ice cream (not less!). There's a nice and simple trick for reversing items: take the number of response options, add 1, and then subtract the response from that value. Here we have a 7-point response scale, so we would to (7+1)-response. The code below makes a new variable *appropriately labelled!* that reverses variable 3. + +```{r} + +data_exclusionsapplied <- data_exclusionsapplied %>% + mutate(variable1_reversed = 8 - variable1) + +``` + +Now, if we re-run the reliability on the items including the *reversed* version of variable 3, things should work out a lot better: + +```{r warning=false} + +reliability_scale1_corrected <- scaleStructure(dat=data_exclusionsapplied, items=c('variable1_reversed', 'variable2', 'variable3'), ci=FALSE) + +print(reliability_scale1_corrected) + + +``` + + +Now that we've checked that the scale meets internal reliability standards, we can compute an index of the items by calculating the mean of the contributing items, including the *reversed* version of variable 1. + +We'll create a new dataframe to use going forward that has these computed variables in it. + +Three things: +1. When we compute means, we need to set the decimals via `round()`. +2. We also need to tell R to calculate the mean, even if some of the contributing data points are missing. This is what `na.rm = TRUE` does. +3. As noted above, `rowwise` asks R to do something for each row (which is what we want here -- to compute the mean of the contributing items for each participant). Whenever we use `rowwise` (or `group_by`), we need to `ungroup()` at the end to avoid issues down the line. + -`row_wise` `mutate` +```{r} + +data_scalescomputed <- data_exclusionsapplied %>% + rowwise() %>% + mutate(scale1_index = round(mean(c(variable1_reversed, variable2, variable3), na.rm = TRUE), 3)) %>% + ungroup() + +data_scalescomputed %>% + write_csv(here("data", "data_scalescomputed.csv") + +``` -### Checking reliabilty \ No newline at end of file