-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathUserAgentUtils.php
341 lines (334 loc) · 10.3 KB
/
UserAgentUtils.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
<?php
/**
* Tera_WURFL - PHP MySQL driven WURFL
*
* Tera-WURFL was written by Steve Kamerman, and is based on the
* Java WURFL Evolution package by Luca Passani and WURFL PHP Tools by Andrea Trassati.
* This version uses a database to store the entire WURFL file, multiple patch
* files, and a persistent caching mechanism to provide extreme performance increases.
*
* @package TeraWurfl
* @author Steve Kamerman <stevekamerman AT gmail.com>
* @license http://www.mozilla.org/MPL/ MPL Vesion 1.1
*/
/**
* Provides static functions for working with User Agents
* @package TeraWurfl
*
*/
class UserAgentUtils{
public static $WORST_MATCH = 7;
public function __construct(){
}
/**
* Find the matching Device ID for a given User Agent using RIS (Reduction in String)
* @param string User Agent
* @param int How short the strings are allowed to get before a match is abandoned
* @param UserAgentMatcher The UserAgentMatcher instance that is matching the User Agent
* @return string WURFL ID
*/
public static function risMatch($ua,$tolerance,UserAgentMatcher $matcher){
// PHP RIS Function
$devices =& $matcher->deviceList;
// Exact match
$key = array_search($ua,$devices);
if($key !== false){
return $key;
}
// Narrow results to those that match the tolerance level
$curlen = strlen($ua);
while($curlen >= $tolerance){
foreach($devices as $testID => $testUA){
// Comparing substrings may be faster, but you would need to use strcmp() on the subs anyway,
// so this is probably the fastest - maybe preg /^$test/ would be faster???
//echo "testUA: $testUA, ua: $ua\n<br/>";
if(strpos($testUA,$ua) === 0){
return $testID;
}
}
$ua = substr($ua,0,strlen($ua)-1);
$curlen = strlen($ua);
}
return WurflConstants::$GENERIC;
}
/**
* Find the matching Device ID for a given User Agent using LD (Leveshtein Distance)
* @param string User Agent
* @param int Tolerance that is still considered a match
* @param UserAgentMatcher The UserAgentMatcher instance that is matching the User Agent
* @return string WURFL ID
*/
public static function ldMatch($ua,$tolerance=null,$matcher){
// PHP Leveshtein Distance Function
if(is_null($tolerance)){
$tolerance = self::$WORST_MATCH;
}
$devices =& $matcher->deviceList;
$key = array_search($ua,$devices);
if($key !== false){
return $key;
}
$best = $tolerance;
$current = 0;
$match = WurflConstants::$GENERIC;
foreach($devices as $testID => $testUA){
$current = levenshtein($ua,$testUA);
//echo "<hr/>$ua<br/>$testUA<br/>LD: $current<br/>";
if($current <= $best){
$best = $current;
$match = $testID;
}
}
return $match;
}
/**
* Number of slashes ('/') found in the given user agent
* @param String User Agent
* @return int Count
*/
public static function numSlashes($userAgent){
return substr_count($userAgent,'/');
}
/**
* The character position of the first slash. If there are no slashes, returns string length
* @param String User Agent
* @return int Character position
*/
public static function firstSlash($userAgent){
$position = strpos($userAgent,'/');
return ($position!==false)? $position: strlen($userAgent);
}
/**
* The character position of the second slash. If there is no second slash, returns string length
* @param String User Agent
* @return int Character position
*/
public static function secondSlash($userAgent){
$first = strpos($userAgent,'/');
$first++;
$position = strpos($userAgent,'/',$first);
return ($position!==false)? $position: strlen($userAgent);
}
/**
* The character position of the first space. If there are no spaces, returns string length
* @param String User Agent
* @return int Character position
*/
public static function firstSpace($userAgent){
$position = strpos($userAgent,' ');
return ($position!==false)? $position: strlen($userAgent);
}
/**
* The character position of the first open parenthisis. If there are no open parenthisis, returns string length
* @param String User Agent
* @return int Character position
*/
public static function firstOpenParen($userAgent){
$position = strpos($userAgent,'(');
return ($position!==false)? $position: strlen($userAgent);
}
/**
* Removes garbage from user agent string
* @param String User agent
* @return String User agent
*/
public static function cleanUserAgent($ua){
$ua = self::removeUPLinkFromUA($ua);
// Remove serial numbers
$ua = preg_replace('/\/SN\d{15}/','/SNXXXXXXXXXXXXXXX',$ua);
$ua = preg_replace('/\[(ST|TF|NT)\d+\]/','',$ua);
// Remove locale identifier
$ua = preg_replace('/([ ;])[a-zA-Z]{2}-[a-zA-Z]{2}([ ;\)])/','$1xx-xx$2',$ua);
$ua = self::normalizeBlackberry($ua);
$ua = self::normalizeAndroid($ua);
$ua = rtrim($ua);
return $ua;
}
/**
* Normalizes BlackBerry user agent strings
* @param String User agent
* @return String User agent
*/
public static function normalizeBlackberry($ua){
$pos = strpos($ua,'BlackBerry');
if($pos !== false && $pos > 0) $ua = substr($ua,$pos);
return $ua;
}
/**
* Normalizes Android version numbers
* @param String User agent
* @return String User agent
*/
public static function normalizeAndroid($ua){
return preg_replace('/(Android \d\.\d)([^; \/\)]+)/','$1',$ua);
}
/**
* Removes UP.Link traces from user agent strings
* @param String User agent
* @return String User agent
*/
public static function removeUPLinkFromUA($ua){
// Remove the gateway signatures from UA (UP.Link/x.x.x)
$index = strpos($ua,'UP.Link');
if($index===false){
return $ua;
}else{
// Return the UA up to the UP.Link/xxxxxx part
return substr($ua,0,$index);
}
}
/**
* Removes Vodafone garbage from user agent string
* @param String User agent
* @return String User agent
*/
public static function removeVodafonePrefix($ua){
return preg_replace('/^Vodafone\/(\d\.\d\/)?/','',$ua,1);
}
/**
* Check if user agent contains string or array of strings
* @param String User agent
* @param Mixed String or Array of strings
* @return Bool
*/
public static function checkIfContains($ua,$find){
if(is_array($find)){
foreach($find as $part){
if(strpos($ua,$part)!==false){
return true;
}
}
return false;
}else{
return (strpos($ua,$find)!==false);
}
}
/**
* Returns the character position (index) of the target string in the given user agent, starting from a given index. If target is not in user agent, returns length of user agent.
* @param String User agent
* @param String Target string to search for, or, Array of Strings to search for
* @param int Character postition in the user agent at which to start looking for the target
* @return int Character position (index) or user agent length
*/
public static function indexOfOrLength($ua, $target, $startingIndex) {
$length = strlen($ua);
if($startingIndex === false) {
return $length;
}
if(is_array($target)){
foreach($target as $target_n){
$pos = strpos($ua, $target_n, $startingIndex);
if($pos !== false) return $pos;
}
return $length;
}else{
$pos = strpos($ua, $target, $startingIndex);
return ($pos === false)? $length : $pos;
}
}
/**
* The character postition of the Nth occurance of a target string in a user agent
* @param String User agent
* @param String Target string to search for in user agent
* @param int The Nth occurence to find
* @return int Character position or -1 if $needle is not found $ordinal times
*/
public static function ordinalIndexOf($ua, $needle, $ordinal) {
if (is_null($ua) || empty($ua) || !is_integer($ordinal)){
return -1;
}
$found = 0;
$index = -1;
do{
$index = strpos($ua, $needle, $index + 1);
$index = is_int($index)? $index: -1;
if ($index < 0) {
return $index;
}
$found++;
}while($found < $ordinal);
return $index;
}
/**
* Checks for traces of mobile device signatures and returns an appropriate generic WURFL Device ID
* @param String User agent
* @return String WURFL ID
*/
public static function lastAttempts($ua){
//before we give up and return generic, one last
//attempt to catch well-behaved Nokia and Openwave browsers!
if(self::checkIfContains($ua,'UP.Browser/7'))
return 'opwv_v7_generic';
if(self::checkIfContains($ua,'UP.Browser/6'))
return 'opwv_v6_generic';
if(self::checkIfContains($ua,'UP.Browser/5'))
return 'upgui_generic';
if(self::checkIfContains($ua,'UP.Browser/4'))
return 'uptext_generic';
if(self::checkIfContains($ua,'UP.Browser/3'))
return 'uptext_generic';
if(self::checkIfContains($ua,'Series60'))
return 'nokia_generic_series60';
if(self::checkIfContains($ua,'Mozilla/4.0'))
return 'generic_web_browser';
if(self::checkIfContains($ua,'Mozilla/5.0'))
return 'generic_web_browser';
if(self::checkIfContains($ua,'Mozilla/6.0'))
return 'generic_web_browser';
return WurflConstants::$GENERIC;
}
/**
* The given user agent is definitely from a mobile device
* @param String User agent
* @return Bool
*/
public static function isMobileBrowser($ua){
$lowerua = strtolower($ua);
if(self::isDesktopBrowser($ua)){
return false;
}
if(UserAgentMatcher::contains($lowerua,WurflConstants::$MOBILE_BROWSERS)) return true;
if(UserAgentMatcher::regexContains($ua,array(
// Screen resolution in UA
'/[^\d]\d{3}x\d{3}/',
)
)){
return true;
}
return false;
}
/**
* The given user agent is definitely from a desktop browser
* @param String User agent
* @return Bool
*/
public static function isDesktopBrowser($ua){
$lowerua = strtolower($ua);
foreach(WurflConstants::$DESKTOP_BROWSERS as $browser_signature){
if(strpos($lowerua, $browser_signature) !== false){
return true;
}
}
}
/**
* The given user agent is definitely from a bot/crawler
* @param String User agent
* @return Bool
*/
public static function isRobot($ua){
$lowerua = strtolower($ua);
foreach(WurflConstants::$ROBOTS as $browser_signature){
if(strstr($lowerua, $browser_signature)){
return true;
}
}
return false;
}
public static function LD($s,$t){
// PHP's levenshtein() function requires arguments to be <= 255 chars
if(strlen($s) > 255 || strlen($t) > 255){
return levenshtein(substr($s,0,255),substr($t,0,255));
}
return levenshtein($s,$t);
}
}