-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathspizer-config.php
195 lines (161 loc) · 5.42 KB
/
spizer-config.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
<?php
/**
* Spizer main executable file - this runner demonstrates the possible use of
* configuartion files (YAML, INI, XML or plain PHP files) to run Spizer.
*
* @todo Add sanity checks to make sure configuration file is valid
* (has engine and logger sections at least)
*
* @todo Consider other runners (GTK, web based, etc.)
*
* @package Spizer
* @subpackage Runner
* @author Shahar Evron, [email protected]
* @license Licensed under the Apache License 2.0, see COPYING for details
*/
define('SPIZER_VERSION', '0.1');
set_include_path(dirname(__FILE__) . '/lib' . PATH_SEPARATOR . get_include_path());
require_once 'Zend/Loader/Autoloader.php';
$loader = Zend_Loader_Autoloader::getInstance();
$loader->registerNamespace('Spizer_');
$opts = new Zend_Console_Getopt(array(
'yml|y=s' => 'Use a YAML configuration file',
'ini|i=s' => 'Use an INI configuration file',
'xml|x=s' => 'Use an XML configuration file',
'section|s=s' => 'Specify configuration section to use (default is \'default\')',
'help|h' => 'Show this help text',
'version|v' => 'Show version information and exit'
));
// Parse command line options
try {
$opts->parse();
} catch (Zend_Console_Getopt_Exception $e) {
fwrite(STDERR, "Error parsing command line options: {$e->getMessage()}\n");
exit(1);
}
// If help, show usage and exit
if ($opts->h) {
spizer_usage();
exit(0);
}
// If version, show version information and exit
if ($opts->v) {
fwrite(STDERR, "Spizer ver. " . SPIZER_VERSION . ", rev. \$Revision$ \n");
exit(0);
}
// Make sure we have a URL
$args = $opts->getRemainingArgs();
$url = $args[0];
if (! $url) {
spizer_usage();
exit(1);
}
// Load configuration
$section = 'default';
if ($opts->s) $section = $opts->s;
if ($opts->y) {
if ($opts->x || $opts->i || $opts->p) die_single_configfile();
$config = Spizer_Config::load($opts->y, Spizer_Config::YAML, $section);
} elseif ($opts->i) {
if ($opts->x || $opts->y || $opts->p) die_single_configfile();
$config = Spizer_Config::load($opts->i, Spizer_Config::INI, $section);
} elseif ($opts->x) {
if ($opts->i || $opts->y || $opts->p) die_single_configfile();
$config = Spizer_Config::load($opts->x, Spizer_Config::XML, $section);
} elseif ($opts->p) {
if ($opts->x || $opts->y || $opts->i) die_single_configfile();
$config = Spizer_Config::load($opts->p, Spizer_Config::PHP, $section);
} else {
die_single_configfile();
}
// Set up engine
$engine = new Spizer_Engine((array) $config->engine);
/**
* Set up the logger object
*
* The logger type is defined in the configuration file - if it contains an
* underscore in it's name, it is considered to be a user-defined logger - if
* no underscore is found, the default 'Spizer_Logger_' prefix is added to
* the class name.
*/
$type = $config->logger->type;
if (strpos($type, '_') === false) $type = 'Spizer_Logger_' . $type;
Zend_Loader::loadClass($type);
$logger = new $type($config->logger->options->toArray());
$engine->setLogger($logger);
// Set up the handler objects - same underscore rules apply here as well.
if ($config->handlers) {
foreach ($config->handlers as $name => $hconf) {
$type = $hconf->type;
if (! $type) continue; // Silenty ignore badly-defined loggers (@todo: Throw exception?)
if (strpos($type, '_') === false) $type = 'Spizer_Handler_' . $type;
Zend_Loader::loadClass($type);
$handler = new $type($hconf->options->toArray());
$handler->setHandlerName($name);
$engine->addHandler($handler);
}
}
// If we have pcntl - set up a handler for sigterm
if (function_exists('pcntl_signal')) {
declare(ticks = 1);
pcntl_signal(SIGABRT, 'do_exit');
pcntl_signal(SIGHUP, 'do_exit');
pcntl_signal(SIGQUIT, 'do_exit');
pcntl_signal(SIGINT, 'do_exit');
pcntl_signal(SIGTERM, 'do_exit');
}
// Go!
$engine->run($url);
/**
* -------
* Execution ends here - Some functions are defined next
* -------
*/
/**
* Show usage message
*
* @return void
*/
function spizer_usage()
{
global $argv;
$ver = SPIZER_VERSION;
$usage = <<<USAGE
Spizer - the flexible web spider, ver. $ver
Usage: {$argv[0]} [-h] [-i|-p|-x|-y] [-s <section] <file> <Start URL>
You must specify one of the following configuration formats:
--yml | -y <file> Load a YAML configuration file
--xml | -x <file> Load an XML configuration file (not yet implemented)
--ini | -i <file> Load an INI configuration file (not yet implemented)
Additionally, you can specifiy one of the following options:
--section | -s Configuration file section to load
(default is 'default')
--version | -v Show version information and exit
--help | -h Show this help text
USAGE;
fwrite(STDERR, $usage);
}
/**
* This piece of code repeats, so I put it into a function
*
* @return void
*/
function die_single_configfile()
{
fwrite(STDERR, "Error: You must specifiy one (and only one) configration file.\n");
fwrite(STDERR, "Use '{$argv[0]} --help' to get some assistance.\n");
exit(1);
}
/**
* Stop the execution and exit in a sane manner
*
* @return void
*/
function do_exit()
{
global $engine;
$c = $engine->getRequestCounter();
unset($engine);
file_put_contents('php://stdout', "Spizer preformed a total of $c HTTP requests.\n");
exit(0);
}