-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathclean_better.ps1
115 lines (111 loc) · 5.53 KB
/
clean_better.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
$full_addr_regex='(\d+?\w?)\s(\w+?\s?\w+?)\s(ROAD|STREET|BOULEVARD|CIRCLE|CIRCUS|CLOSE|GARDENS|LANE|CIRCUIT|PARADE|HIGHWAY|MALL|ESPLENADE|BROADWAY|TERRACE|DRIVE|COURT|PLACE|AVENUE|CRESCENT)\s(\w+?\s?\w+?)\s(NSW|QLD|SA|TAS|ACT|NT|VIC|WA)\s(\d{4})$';
$no_res_num_regex='(\w+?)\s(ROAD|STREET|BOULEVARD|CIRCLE|CIRCUS|CLOSE|GARDENS|LANE|CIRCUIT|PARADE|HIGHWAY|MALL|BROADWAY|TERRACE|ESPLENADE|QUAYS|DRIVE|COURT|PLACE|AVENUE|CRESCENT)\s(\w+?\s?\w+?)\s(NSW|QLD|SA|TAS|ACT|NT|VIC|WA)\s(\d{4})$';
$locality_regex='(\w+?)\s(NSW|QLD|SA|TAS|ACT|NT|VIC|WA)';
New-Item -Path . -Name "addresses_cleaned.txt" -ItemType "file" -Force;
foreach($line in Get-Content .\addresses_raw.txt){
$line = $line -replace "\s+"," "
$line = $line.ToUpper()
$line = $line -replace "LOT\s\d+\s",""
$line = $line.replace('"','')
$line = $line -replace '^(\d+)-(\d+)','$1'
$line = $line -replace '(\d+)-(\d+)','$1'
$line = $line -replace 'HOUSE\s\d+',''
$line = $line -replace '\sAND\s',' '
$line = $line -replace '\sOF\s',' '
$line = $line -replace "^LEVEL\s\d+\s",""
$line = $line -replace "IN FRONT OF THE ",""
$line = $line -replace "^UNIT\s\d+\s",""
$line = $line -replace "^U\s\d+\s",""
$line = $line -replace "^ROOM\s\d+\s",""
$line = $line -replace "\sBUILDING\s"," "
$line = $line -replace '[/,.)(]',''
$line = $line.replace("`t","")
$line = $line -replace "\sARC\s"," ARCADE "
$line = $line -replace "\sESP\s"," ESPLANADE "
$line = $line -replace "\sCV\s"," COVE "
$line = $line -replace "\sCRES\s"," CRESCENT "
$line = $line -replace "\sCR\s"," CRESCENT "
$line = $line -replace "\sRD\s"," ROAD "
$line = $line -replace "\sAVE\s"," AVENUE "
$line = $line -replace "\sPL\s"," PLACE "
$line = $line -replace "\sCRT\s"," COURT "
$line = $line -replace "\sCT\s"," COURT "
$line = $line -replace "\sDR\s"," DRIVE "
$line = $line -replace "\sTCE\s"," TERRACE "
$line = $line -replace "\sPDE\s"," PARADE "
$line = $line -replace "\sCCT\s"," CIRCUIT "
$line = $line -replace "\sCRST\s"," CREST "
$line = $line -replace "\sLA\s"," LANE "
$line = $line -replace "\sWAY\s"," WAY "
$line = $line -replace "\sGDN\s"," GARDENS "
$line = $line -replace "\sGDNS\s"," GARDENS "
$line = $line -replace "\sCL\s"," CLOSE "
$line = $line -replace "\w\sCCS\s"," CIRCUS "
$line = $line -replace "\w\sCCL\s"," CIRCLE "
$line = $line -replace "\w\sBVD\s"," BOULEVARD "
$line = $line -replace "\w\sBLVD\s"," BOULEVARD "
$line = $line -replace "\w\sSTREETS\s"," STREET "
$line = $line -replace "\w\sST\s"," STREET "
$line = $line -replace "\sMT\s"," MOUNT "
$line = $line -replace "\sRD\s"," ROAD "
$line = $line -replace "\sHWY\s"," HIGHWAY "
$address = [regex]::Match($line,'(\d+?\w?)\s(\w+?\s?\w+?)\s(ROAD|STREET|BOULEVARD|CIRCLE|CIRCUS|CLOSE|GARDENS|LANE|CIRCUIT|PARADE|HIGHWAY|MALL|ESPLENADE|BROADWAY|TERRACE|DRIVE|COURT|PLACE|AVENUE|CRESCENT)\s(\w+?\s?\w+?)\s(NSW|QLD|SA|TAS|ACT|NT|VIC|WA)\s(\d{4})$').captures.groups
if ($address.value.length -gt 0)
{
write-host("ok")
$last_v = $address[7].value
if ($last_v.length -eq 1)
{
$first = $address[1].value
$out_first = $first -replace '[A-Z]*',''
write-host("1- "+$out_first)
write-host("2- "+$address[2].value)
write-host("3- "+$address[3].value)
write-host("4- "+$address[4].value)
write-host("5- "+$address[5].value)
write-host("6- "+$address[6].value)
$line=$out_first + " " + $address[2].value.Trim() + " " + $address[3].value.Trim() + " " + $address[4].value.Trim()+ " " + $address[5].value.Trim()+ " " + $address[6].value.Trim()
}
Else
{
write-host("2- "+$address[2].value)
write-host("3- "+$address[3].value)
write-host("4- "+$address[4].value)
write-host("5- "+$address[5].value)
write-host("6- "+$address[6].value)
$line=$address[2].value.Trim() + " " + $address[3].value.Trim() + " " + $address[4].value.Trim()+ " " + $address[5].value.Trim()+ " " + $address[6].value.Trim()
}
}
Else
{
$line = $line.Trim();
$address = [regex]::Match($line,'(\w+?)\s(ROAD|STREET|BOULEVARD|CIRCLE|CIRCUS|CLOSE|GARDENS|LANE|CIRCUIT|PARADE|HIGHWAY|MALL|BROADWAY|TERRACE|ESPLENADE|QUAYS|DRIVE|COURT|PLACE|AVENUE|CRESCENT)\s(\w+?\s?\w+?)\s(NSW|QLD|SA|TAS|ACT|NT|VIC|WA)\s(\d{4})$').captures.groups;
if ($address.value.length -gt 0)
{
write-host('no street no');
write-host($line)
write-host("1- " + $address[1].value)
write-host("2- " + $address[2].value)
write-host("3- " +$address[3].value)
write-host("4- " +$address[4].value)
write-host("5- " +$address[5].value)
$line=$address[1].value.Trim() + " " + $address[2].value.Trim() + " " + $address[3].value.Trim() + " " + $address[4].value.Trim() + " " + $address[5].value.Trim()
}
Else
{
if ($address.value.length -gt 0)
{
$address = [regex]::Match($line,'(\w+?)\s(NSW|QLD|SA|TAS|ACT|NT|VIC|WA)\s(\d{4})$').captures.groups;
write-host('no street no and name')
write-host($line)
write-host("1- " + $address[1].value)
write-host("2- " + $address[2].value)
write-host("3- " +$address[3].value)
$line=$address[1].value.Trim() + " " + $address[2].value.Trim() + " " + $address[3].value.Trim()
}
}
}
echo $line| Out-File -FilePath addresses_cleaned.txt -Append
};
$file="addresses_cleaned.txt";
(Get-Content $file | Select-Object -Skip 1) | Set-Content $file