Skip to content

Commit

Permalink
Merge pull request #2499 from dathere/2131-stats-sortiness
Browse files Browse the repository at this point in the history
feat: `stats` add "sortiness" statistic
  • Loading branch information
jqnatividad authored Feb 4, 2025
2 parents f8c3902 + 2e5f6a8 commit c15b25f
Show file tree
Hide file tree
Showing 14 changed files with 458 additions and 419 deletions.
70 changes: 35 additions & 35 deletions resources/test/boston311-10-boolean-1or0-stats.csv
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
field,type,is_ascii,sum,min,max,range,sort_order,min_length,max_length,sum_length,avg_length,stddev_length,variance_length,cv_length,mean,sem,geometric_mean,harmonic_mean,stddev,variance,cv,nullcount,max_precision,sparsity,cardinality,qsv__value
case_enquiry_id,Integer,,1010041354742,101004113298,101004155594,42296,Unsorted,,,,,,,,101004135474.2,4663.4961,101004135474.1991,101004135474.1978,14747.2697,217481962.3498,0,0,,0,10,
open_dt,String,true,,2022-01-01 00:16:00,2022-01-31 11:46:00,,Unsorted,19,19,190,19,0,0,0,,,,,,,,0,,0,10,
target_dt,String,true,,2022-01-11 08:30:00,2022-05-20 13:03:21,,Unsorted,0,19,114,11.4,9.3081,86.64,0.8165,,,,,,,,4,,0.4,6,
closed_dt,String,true,,2022-01-09 06:43:06,2022-01-20 08:45:12,,Unsorted,0,19,95,9.5,9.4412,89.1358,0.9938,,,,,,,,5,,0.5,6,
ontime,String,true,,ONTIME,OVERDUE,,Unsorted,6,7,62,6.2,0.4,0.16,0.0645,,,,,,,,0,,0,2,
case_status,String,true,,Closed,Open,,Unsorted,4,6,50,5,1,1,0.2,,,,,,,,0,,0,2,
case_status_boolean,Boolean,,5,0,1,1,Unsorted,,,,,,,,0.5,0.1581,0,,0.5,0.25,100,0,,0,2,
closure_reason,String,true,, ,Case Closed. Closed date : Wed Jan 19 11:42:16 EST 2022 Resolved Removed df ,,Unsorted,1,82,350,35,34.5543,1194,0.9873,,,,,,,,0,,0,6,
case_title,String,true,,BTDT: Complaint,Sidewalk Cover / Manhole,,Unsorted,13,57,235,23.5,14.1156,199.25,0.6007,,,,,,,,0,,0,8,
subject,String,true,,Boston Police Department,Public Works Department,,Unsorted,21,31,235,23.5,2.6552,7.05,0.113,,,,,,,,0,,0,5,
reason,String,true,,Administrative & General Requests,Street Cleaning,,Unsorted,7,33,174,17.4,7.9019,62.44,0.4541,,,,,,,,0,,0,7,
type,String,true,,CE Collection,Unsatisfactory Utilities - Electrical Plumbing,,Unsorted,13,47,240,24,11.619,135,0.4841,,,,,,,,0,,0,8,
queue,String,true,,BTDT_Parking Enforcement,PWDx_Snow Cases,,Unsorted,15,46,272,27.2,10.1272,102.56,0.3723,,,,,,,,0,,0,7,
department,String,true,,BTDT,PWDx,,Unsorted,3,4,38,3.8,0.4,0.16,0.1053,,,,,,,,0,,0,5,
submittedphoto,NULL,,,,,,,,,,,,,,,,,,,,,10,,1,1,
closedphoto,NULL,,,,,,,,,,,,,,,,,,,,,10,,1,1,
location,String,true,, ,850 South St Roslindale MA 02131,,Unsorted,1,40,309,30.9,10.4062,108.29,0.3368,,,,,,,,0,,0,10,
fire_district,String,true,, ,9,,Unsorted,1,1,10,1,0,0,0,,,,,,,,0,,0,4,
pwd_district,String,true,, ,1C,,Unsorted,1,2,19,1.9,0.3,0.09,0.1579,,,,,,,,0,,0,6,
city_council_district,String,true,, ,8,,Unsorted,1,1,10,1,0,0,0,,,,,,,,0,,0,6,
police_district,String,true,, ,E5,,Unsorted,1,3,21,2.1,0.5385,0.29,0.2564,,,,,,,,0,,0,6,
neighborhood,String,true,, ,South End,,Unsorted,1,13,91,9.1,3.2696,10.69,0.3593,,,,,,,,0,,0,8,
neighborhood_services_district,String,true,, ,6,,Unsorted,1,2,14,1.4,0.4899,0.24,0.3499,,,,,,,,0,,0,7,
ward,String,true,, ,Ward 9,,Unsorted,1,7,53,5.3,1.9519,3.81,0.3683,,,,,,,,0,,0,8,
precinct,String,true,, ,2004,,Unsorted,1,4,37,3.7,0.9,0.81,0.2432,,,,,,,,0,,0,9,
location_street_name,String,true,,12 Derne St,850 South St,,Unsorted,0,20,120,12,2.7889,7.7778,0.2324,,,,,,,,1,,0.1,10,
location_zipcode,String,true,,02113,02131,,Unsorted,0,5,45,4.5,0,0,0,,,,,,,,1,,0.1,8,
latitude,Float,,423.4656,42.2884,42.3735,0.0851,Unsorted,,,,,,,,42.3466,0.008,42.3466,42.3465,0.0252,0.0006,0.0595,0,4,0,9,
longitude,Float,,-710.782,-71.133,-71.0566,0.0764,Unsorted,,,,,,,,-71.0782,0.0078,,,0.0246,0.0006,-0.0346,0,4,0,10,
source,String,true,,City Worker App,Constituent Call,,Unsorted,15,16,157,15.7,0.4583,0.21,0.0292,,,,,,,,0,,0,2,
qsv__rowcount,,,,,,,,,,,,,,,,,,,,,,,,,,10
qsv__columncount,,,,,,,,,,,,,,,,,,,,,,,,,,30
qsv__filesize_bytes,,,,,,,,,,,,,,,,,,,,,,,,,,3887
qsv__fingerprint_hash,,,,,,,,,,,,,,,,,,,,,,,,,,c68663e33baa4ab6ad8719ee9d15dc1b84423ea08bf9aafeba4b1c0b18eb3de2
field,type,is_ascii,sum,min,max,range,sort_order,sortiness,min_length,max_length,sum_length,avg_length,stddev_length,variance_length,cv_length,mean,sem,geometric_mean,harmonic_mean,stddev,variance,cv,nullcount,max_precision,sparsity,cardinality,qsv__value
case_enquiry_id,Integer,,1010041354742,101004113298,101004155594,42296,Unsorted,-0.1111,,,,,,,,101004135474.2,4663.4961,101004135474.1991,101004135474.1978,14747.2697,217481962.3498,0,0,,0,10,
open_dt,String,true,,2022-01-01 00:16:00,2022-01-31 11:46:00,,Unsorted,-0.1111,19,19,190,19,0,0,0,,,,,,,,0,,0,10,
target_dt,String,true,,2022-01-11 08:30:00,2022-05-20 13:03:21,,Unsorted,0.2,0,19,114,11.4,9.3081,86.64,0.8165,,,,,,,,4,,0.4,6,
closed_dt,String,true,,2022-01-09 06:43:06,2022-01-20 08:45:12,,Unsorted,0.5,0,19,95,9.5,9.4412,89.1358,0.9938,,,,,,,,5,,0.5,6,
ontime,String,true,,ONTIME,OVERDUE,,Unsorted,0.5556,6,7,62,6.2,0.4,0.16,0.0645,,,,,,,,0,,0,2,
case_status,String,true,,Closed,Open,,Unsorted,0.1111,4,6,50,5,1,1,0.2,,,,,,,,0,,0,2,
case_status_boolean,Boolean,,5,0,1,1,Unsorted,0.1111,,,,,,,,0.5,0.1581,0,,0.5,0.25,100,0,,0,2,
closure_reason,String,true,, ,Case Closed. Closed date : Wed Jan 19 11:42:16 EST 2022 Resolved Removed df ,,Unsorted,0.1111,1,82,350,35,34.5543,1194,0.9873,,,,,,,,0,,0,6,
case_title,String,true,,BTDT: Complaint,Sidewalk Cover / Manhole,,Unsorted,-0.1111,13,57,235,23.5,14.1156,199.25,0.6007,,,,,,,,0,,0,8,
subject,String,true,,Boston Police Department,Public Works Department,,Unsorted,0.5556,21,31,235,23.5,2.6552,7.05,0.113,,,,,,,,0,,0,5,
reason,String,true,,Administrative & General Requests,Street Cleaning,,Unsorted,0.1111,7,33,174,17.4,7.9019,62.44,0.4541,,,,,,,,0,,0,7,
type,String,true,,CE Collection,Unsatisfactory Utilities - Electrical Plumbing,,Unsorted,0.1111,13,47,240,24,11.619,135,0.4841,,,,,,,,0,,0,8,
queue,String,true,,BTDT_Parking Enforcement,PWDx_Snow Cases,,Unsorted,0.3333,15,46,272,27.2,10.1272,102.56,0.3723,,,,,,,,0,,0,7,
department,String,true,,BTDT,PWDx,,Unsorted,0.5556,3,4,38,3.8,0.4,0.16,0.1053,,,,,,,,0,,0,5,
submittedphoto,NULL,,,,,,,,,,,,,,,,,,,,,,10,,1,1,
closedphoto,NULL,,,,,,,,,,,,,,,,,,,,,,10,,1,1,
location,String,true,, ,850 South St Roslindale MA 02131,,Unsorted,-0.1111,1,40,309,30.9,10.4062,108.29,0.3368,,,,,,,,0,,0,10,
fire_district,String,true,, ,9,,Unsorted,0.3333,1,1,10,1,0,0,0,,,,,,,,0,,0,4,
pwd_district,String,true,, ,1C,,Unsorted,0.3333,1,2,19,1.9,0.3,0.09,0.1579,,,,,,,,0,,0,6,
city_council_district,String,true,, ,8,,Unsorted,0.3333,1,1,10,1,0,0,0,,,,,,,,0,,0,6,
police_district,String,true,, ,E5,,Unsorted,0.1111,1,3,21,2.1,0.5385,0.29,0.2564,,,,,,,,0,,0,6,
neighborhood,String,true,, ,South End,,Unsorted,0.1111,1,13,91,9.1,3.2696,10.69,0.3593,,,,,,,,0,,0,8,
neighborhood_services_district,String,true,, ,6,,Unsorted,-0.1111,1,2,14,1.4,0.4899,0.24,0.3499,,,,,,,,0,,0,7,
ward,String,true,, ,Ward 9,,Unsorted,0.5556,1,7,53,5.3,1.9519,3.81,0.3683,,,,,,,,0,,0,8,
precinct,String,true,, ,2004,,Unsorted,0.3333,1,4,37,3.7,0.9,0.81,0.2432,,,,,,,,0,,0,9,
location_street_name,String,true,,12 Derne St,850 South St,,Unsorted,-0.25,0,20,120,12,2.7889,7.7778,0.2324,,,,,,,,1,,0.1,10,
location_zipcode,String,true,,02113,02131,,Unsorted,0.25,0,5,45,4.5,0,0,0,,,,,,,,1,,0.1,8,
latitude,Float,,423.4656,42.2884,42.3735,0.0851,Unsorted,-0.1111,,,,,,,,42.3466,0.008,42.3466,42.3465,0.0252,0.0006,0.0595,0,4,0,9,
longitude,Float,,-710.782,-71.133,-71.0566,0.0764,Unsorted,0.1111,,,,,,,,-71.0782,0.0078,,,0.0246,0.0006,-0.0346,0,4,0,10,
source,String,true,,City Worker App,Constituent Call,,Unsorted,0.5556,15,16,157,15.7,0.4583,0.21,0.0292,,,,,,,,0,,0,2,
qsv__rowcount,,,,,,,,,,,,,,,,,,,,,,,,,,,10
qsv__columncount,,,,,,,,,,,,,,,,,,,,,,,,,,,30
qsv__filesize_bytes,,,,,,,,,,,,,,,,,,,,,,,,,,,3887
qsv__fingerprint_hash,,,,,,,,,,,,,,,,,,,,,,,,,,,2de69db140b01aae7ac3ed110469cd5d454e990baaf3dc73e2e6e26909ac3c4e
70 changes: 35 additions & 35 deletions resources/test/boston311-10-boolean-tf-stats.csv
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
field,type,is_ascii,sum,min,max,range,sort_order,min_length,max_length,sum_length,avg_length,stddev_length,variance_length,cv_length,mean,sem,geometric_mean,harmonic_mean,stddev,variance,cv,nullcount,max_precision,sparsity,cardinality,qsv__value
case_enquiry_id,Integer,,1010041354742,101004113298,101004155594,42296,Unsorted,,,,,,,,101004135474.2,4663.4961,101004135474.1991,101004135474.1978,14747.2697,217481962.3498,0,0,,0,10,
open_dt,String,true,,2022-01-01 00:16:00,2022-01-31 11:46:00,,Unsorted,19,19,190,19,0,0,0,,,,,,,,0,,0,10,
target_dt,String,true,,2022-01-11 08:30:00,2022-05-20 13:03:21,,Unsorted,0,19,114,11.4,9.3081,86.64,0.8165,,,,,,,,4,,0.4,6,
closed_dt,String,true,,2022-01-09 06:43:06,2022-01-20 08:45:12,,Unsorted,0,19,95,9.5,9.4412,89.1358,0.9938,,,,,,,,5,,0.5,6,
ontime,String,true,,ONTIME,OVERDUE,,Unsorted,6,7,62,6.2,0.4,0.16,0.0645,,,,,,,,0,,0,2,
case_status,String,true,,Closed,Open,,Unsorted,4,6,50,5,1,1,0.2,,,,,,,,0,,0,2,
case_status_boolean,Boolean,true,,False,True,,Unsorted,4,5,45,4.5,0.5,0.25,0.1111,,,,,,,,0,,0,2,
closure_reason,String,true,, ,Case Closed. Closed date : Wed Jan 19 11:42:16 EST 2022 Resolved Removed df ,,Unsorted,1,82,350,35,34.5543,1194,0.9873,,,,,,,,0,,0,6,
case_title,String,true,,BTDT: Complaint,Sidewalk Cover / Manhole,,Unsorted,13,57,235,23.5,14.1156,199.25,0.6007,,,,,,,,0,,0,8,
subject,String,true,,Boston Police Department,Public Works Department,,Unsorted,21,31,235,23.5,2.6552,7.05,0.113,,,,,,,,0,,0,5,
reason,String,true,,Administrative & General Requests,Street Cleaning,,Unsorted,7,33,174,17.4,7.9019,62.44,0.4541,,,,,,,,0,,0,7,
type,String,true,,CE Collection,Unsatisfactory Utilities - Electrical Plumbing,,Unsorted,13,47,240,24,11.619,135,0.4841,,,,,,,,0,,0,8,
queue,String,true,,BTDT_Parking Enforcement,PWDx_Snow Cases,,Unsorted,15,46,272,27.2,10.1272,102.56,0.3723,,,,,,,,0,,0,7,
department,String,true,,BTDT,PWDx,,Unsorted,3,4,38,3.8,0.4,0.16,0.1053,,,,,,,,0,,0,5,
submittedphoto,NULL,,,,,,,,,,,,,,,,,,,,,10,,1,1,
closedphoto,NULL,,,,,,,,,,,,,,,,,,,,,10,,1,1,
location,String,true,, ,850 South St Roslindale MA 02131,,Unsorted,1,40,309,30.9,10.4062,108.29,0.3368,,,,,,,,0,,0,10,
fire_district,String,true,, ,9,,Unsorted,1,1,10,1,0,0,0,,,,,,,,0,,0,4,
pwd_district,String,true,, ,1C,,Unsorted,1,2,19,1.9,0.3,0.09,0.1579,,,,,,,,0,,0,6,
city_council_district,String,true,, ,8,,Unsorted,1,1,10,1,0,0,0,,,,,,,,0,,0,6,
police_district,String,true,, ,E5,,Unsorted,1,3,21,2.1,0.5385,0.29,0.2564,,,,,,,,0,,0,6,
neighborhood,String,true,, ,South End,,Unsorted,1,13,91,9.1,3.2696,10.69,0.3593,,,,,,,,0,,0,8,
neighborhood_services_district,String,true,, ,6,,Unsorted,1,2,14,1.4,0.4899,0.24,0.3499,,,,,,,,0,,0,7,
ward,String,true,, ,Ward 9,,Unsorted,1,7,53,5.3,1.9519,3.81,0.3683,,,,,,,,0,,0,8,
precinct,String,true,, ,2004,,Unsorted,1,4,37,3.7,0.9,0.81,0.2432,,,,,,,,0,,0,9,
location_street_name,String,true,,12 Derne St,850 South St,,Unsorted,0,20,120,12,2.7889,7.7778,0.2324,,,,,,,,1,,0.1,10,
location_zipcode,String,true,,02113,02131,,Unsorted,0,5,45,4.5,0,0,0,,,,,,,,1,,0.1,8,
latitude,Float,,423.4656,42.2884,42.3735,0.0851,Unsorted,,,,,,,,42.3466,0.008,42.3466,42.3465,0.0252,0.0006,0.0595,0,4,0,9,
longitude,Float,,-710.782,-71.133,-71.0566,0.0764,Unsorted,,,,,,,,-71.0782,0.0078,,,0.0246,0.0006,-0.0346,0,4,0,10,
source,String,true,,City Worker App,Constituent Call,,Unsorted,15,16,157,15.7,0.4583,0.21,0.0292,,,,,,,,0,,0,2,
qsv__rowcount,,,,,,,,,,,,,,,,,,,,,,,,,,10
qsv__columncount,,,,,,,,,,,,,,,,,,,,,,,,,,30
qsv__filesize_bytes,,,,,,,,,,,,,,,,,,,,,,,,,,3922
qsv__fingerprint_hash,,,,,,,,,,,,,,,,,,,,,,,,,,2c18181b0593e8f8e5729741fe2da2d06b28a6d59714dbd600009190d1920400
field,type,is_ascii,sum,min,max,range,sort_order,sortiness,min_length,max_length,sum_length,avg_length,stddev_length,variance_length,cv_length,mean,sem,geometric_mean,harmonic_mean,stddev,variance,cv,nullcount,max_precision,sparsity,cardinality,qsv__value
case_enquiry_id,Integer,,1010041354742,101004113298,101004155594,42296,Unsorted,-0.1111,,,,,,,,101004135474.2,4663.4961,101004135474.1991,101004135474.1978,14747.2697,217481962.3498,0,0,,0,10,
open_dt,String,true,,2022-01-01 00:16:00,2022-01-31 11:46:00,,Unsorted,-0.1111,19,19,190,19,0,0,0,,,,,,,,0,,0,10,
target_dt,String,true,,2022-01-11 08:30:00,2022-05-20 13:03:21,,Unsorted,0.2,0,19,114,11.4,9.3081,86.64,0.8165,,,,,,,,4,,0.4,6,
closed_dt,String,true,,2022-01-09 06:43:06,2022-01-20 08:45:12,,Unsorted,0.5,0,19,95,9.5,9.4412,89.1358,0.9938,,,,,,,,5,,0.5,6,
ontime,String,true,,ONTIME,OVERDUE,,Unsorted,0.5556,6,7,62,6.2,0.4,0.16,0.0645,,,,,,,,0,,0,2,
case_status,String,true,,Closed,Open,,Unsorted,0.1111,4,6,50,5,1,1,0.2,,,,,,,,0,,0,2,
case_status_boolean,Boolean,true,,False,True,,Unsorted,0.1111,4,5,45,4.5,0.5,0.25,0.1111,,,,,,,,0,,0,2,
closure_reason,String,true,, ,Case Closed. Closed date : Wed Jan 19 11:42:16 EST 2022 Resolved Removed df ,,Unsorted,0.1111,1,82,350,35,34.5543,1194,0.9873,,,,,,,,0,,0,6,
case_title,String,true,,BTDT: Complaint,Sidewalk Cover / Manhole,,Unsorted,-0.1111,13,57,235,23.5,14.1156,199.25,0.6007,,,,,,,,0,,0,8,
subject,String,true,,Boston Police Department,Public Works Department,,Unsorted,0.5556,21,31,235,23.5,2.6552,7.05,0.113,,,,,,,,0,,0,5,
reason,String,true,,Administrative & General Requests,Street Cleaning,,Unsorted,0.1111,7,33,174,17.4,7.9019,62.44,0.4541,,,,,,,,0,,0,7,
type,String,true,,CE Collection,Unsatisfactory Utilities - Electrical Plumbing,,Unsorted,0.1111,13,47,240,24,11.619,135,0.4841,,,,,,,,0,,0,8,
queue,String,true,,BTDT_Parking Enforcement,PWDx_Snow Cases,,Unsorted,0.3333,15,46,272,27.2,10.1272,102.56,0.3723,,,,,,,,0,,0,7,
department,String,true,,BTDT,PWDx,,Unsorted,0.5556,3,4,38,3.8,0.4,0.16,0.1053,,,,,,,,0,,0,5,
submittedphoto,NULL,,,,,,,,,,,,,,,,,,,,,,10,,1,1,
closedphoto,NULL,,,,,,,,,,,,,,,,,,,,,,10,,1,1,
location,String,true,, ,850 South St Roslindale MA 02131,,Unsorted,-0.1111,1,40,309,30.9,10.4062,108.29,0.3368,,,,,,,,0,,0,10,
fire_district,String,true,, ,9,,Unsorted,0.3333,1,1,10,1,0,0,0,,,,,,,,0,,0,4,
pwd_district,String,true,, ,1C,,Unsorted,0.3333,1,2,19,1.9,0.3,0.09,0.1579,,,,,,,,0,,0,6,
city_council_district,String,true,, ,8,,Unsorted,0.3333,1,1,10,1,0,0,0,,,,,,,,0,,0,6,
police_district,String,true,, ,E5,,Unsorted,0.1111,1,3,21,2.1,0.5385,0.29,0.2564,,,,,,,,0,,0,6,
neighborhood,String,true,, ,South End,,Unsorted,0.1111,1,13,91,9.1,3.2696,10.69,0.3593,,,,,,,,0,,0,8,
neighborhood_services_district,String,true,, ,6,,Unsorted,-0.1111,1,2,14,1.4,0.4899,0.24,0.3499,,,,,,,,0,,0,7,
ward,String,true,, ,Ward 9,,Unsorted,0.5556,1,7,53,5.3,1.9519,3.81,0.3683,,,,,,,,0,,0,8,
precinct,String,true,, ,2004,,Unsorted,0.3333,1,4,37,3.7,0.9,0.81,0.2432,,,,,,,,0,,0,9,
location_street_name,String,true,,12 Derne St,850 South St,,Unsorted,-0.25,0,20,120,12,2.7889,7.7778,0.2324,,,,,,,,1,,0.1,10,
location_zipcode,String,true,,02113,02131,,Unsorted,0.25,0,5,45,4.5,0,0,0,,,,,,,,1,,0.1,8,
latitude,Float,,423.4656,42.2884,42.3735,0.0851,Unsorted,-0.1111,,,,,,,,42.3466,0.008,42.3466,42.3465,0.0252,0.0006,0.0595,0,4,0,9,
longitude,Float,,-710.782,-71.133,-71.0566,0.0764,Unsorted,0.1111,,,,,,,,-71.0782,0.0078,,,0.0246,0.0006,-0.0346,0,4,0,10,
source,String,true,,City Worker App,Constituent Call,,Unsorted,0.5556,15,16,157,15.7,0.4583,0.21,0.0292,,,,,,,,0,,0,2,
qsv__rowcount,,,,,,,,,,,,,,,,,,,,,,,,,,,10
qsv__columncount,,,,,,,,,,,,,,,,,,,,,,,,,,,30
qsv__filesize_bytes,,,,,,,,,,,,,,,,,,,,,,,,,,,3922
qsv__fingerprint_hash,,,,,,,,,,,,,,,,,,,,,,,,,,,55eabf294ec7529ad75755db9cd482e094ccef0df7bde083426f4e8582ba1c52
Loading

0 comments on commit c15b25f

Please sign in to comment.