forked from tyaqing/baidu_tieba_crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsocket.js
157 lines (139 loc) · 5.24 KB
/
socket.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/**
* Created by ArH on 2016/11/12.
*/
let cp = require('child_process');
let express = require('express');
let app = express();
let bodyParser = require('body-parser');
let server = require('http').Server(app);
let io = require('socket.io')(server);
// 加载http接口
let api = require('./server/api');
let port = 8081;
server.listen(port);
app.use(bodyParser.json());
api(app);
// 统计子进程 用于限制子进程个数 避免宕机 worker_max 最大进程数
// TODO 需要在进程生成的时候完成累加,进程结束时累减
let worker_sum = 0,
worker_max = 50,
connectCounter=0;
let worker_status = function(){
io.sockets.emit("worker_sum",{worker_sum,connectCounter});
};
io.on('connection', function (socket) {
connectCounter++;
console.log(connectCounter);
// TODO 发送当前系统正在执行子进程的个数
worker_status();
// 离线事件 离线就kill掉进程
socket.on('disconnect', function () {
connectCounter--;
// 浏览器失联 关掉正在运行的子进程
console.log(`${socket.id} disconnect`);
if (socket.worker && socket.worker.killed == false) {
socket.worker.kill();
worker_sum--;
}
worker_status();
});
// 统一管理监听发信
let worker_on = function () {
// worker 事件 error exit close
socket.worker.on("message", function (data) {
// {type: 'msg', data: 'close'}
console.log('cp message : ', data);
if(data.type=='msg'){
socket.emit('info', data.data);
}else if (data.type == "now_num") {
socket.emit('now_num', data.data);
}else if (data.type == "total") {
socket.emit('total', data.data);
}else if (data.type == 'close') {
socket.worker.kill();
worker_sum--;
worker_status();
socket.emit('success', '爬取结束');
}else if(data.type=='get_content'){
socket.emit('get_content','success');
}else if(data.type=='user_process'){
socket.emit('user_process', data.data);
}
});
socket.worker.on("close", function (code, signal) {
console.log('close');
console.log(code, signal);
});
};
// 获取贴吧列表
socket.on('get_tieba_list', function (res) {
if (socket.worker && socket.worker.killed == false) {
socket.emit('warning', '该页面有一个子进程正在运行,请开启另一个网页进行爬取');
return;
}
// 将子进程放入socket对象 到达一网页一子进程的效果 实现多网页多子进程爬取
socket.worker = cp.fork('./server/cp.js');
// 开启监听
worker_on();
worker_sum++;
worker_status();
socket.worker.send({order: 'get_tieba_list', data: res});
});
//获取帖子内容
socket.on('get_tieba_content', function (res) {
if (socket.worker && socket.worker.killed == false) {
socket.emit('warning', '该页面有一个子进程正在运行,请开启另一个网页进行爬取');
return;
}
socket.worker = cp.fork('./server/cp.js');
worker_on();
worker_sum++;
worker_status();
socket.worker.send({order: 'get_tieba_content', data: res});
});
// 爬取贴吧会员列表
socket.on('get_member_list', function (res) {
if (socket.worker && socket.worker.killed == false) {
socket.emit('warning', '该页面有一个子进程正在运行,请开启另一个网页进行爬取');
return;
}
// 将子进程放入socket对象 到达一网页一子进程的效果 实现多网页多子进程爬取
socket.worker = cp.fork('./server/cp.js');
// 开启监听
worker_on();
worker_sum++;
worker_status();
socket.worker.send({order: 'get_member_list', data: res});
});
// 爬取贴吧热点话题
socket.on('get_tieba_topic', function (res) {
if (socket.worker && socket.worker.killed == false) {
socket.emit('warning', '该页面有一个子进程正在运行,请开启另一个网页进行爬取');
return;
}
// 将子进程放入socket对象 到达一网页一子进程的效果 实现多网页多子进程爬取
socket.worker = cp.fork('./server/cp.js');
// 开启监听
worker_on();
worker_sum++;
worker_status();
socket.worker.send({order: 'get_tieba_topic', data: res});
});
//请求停止
socket.on('close', function (res) {
console.log('请求停止');
if (socket.worker && socket.worker.killed == false) {
socket.worker.kill();
worker_sum--;
worker_status();
if(res!='back_close'){
socket.emit('info', '已停止当前页面的爬取任务');
}else{
socket.emit('info', '离开页面后会中断爬取');
}
}
else {
if(res!='back_close') socket.emit('warning', '没有可停止的对象');
}
});
});