diff --git a/docs/university.md b/docs/university.md index 6cef8be5669182..a0d97ca3ab3145 100644 --- a/docs/university.md +++ b/docs/university.md @@ -1467,23 +1467,23 @@ jsjxy.hbut.edu.cn 证书链不全,自建 RSSHub 可设置环境变量 NODE_TLS ## 南京航空航天大学 -### 教务通知 +### 教务处 - + -| 教学服务 | 教学建设 | 学生培养 | 教学资源 | -| ------------- | ---- | ---- | ---- | -| jxfw(default) | jxjs | xspy | jxzy | +| 通知公告 | 教学服务 | 教学建设 | 学生培养 | 教学资源 | +| ---- | ---- | ---- | ---- | ---- | +| tzgg | jxfw | jxjs | xspy | jxzy | ### 计算机科学与技术学院 - + -| 通知公告 | 新闻动态 | 科研动态 | 教学动态 | 学生工作 | 招生信息 | 就业信息 | -| ---- | ---- | ---- | ---- | ---- | ---- | ---- | -| tzgg | xwdt | kydt | jxdt | xsgz | zsxx | jyxx | +| 通知公告 | 热点新闻 | 学科科研 | 教学动态 | 本科生培养 | 研究生培养 | 学生工作 | +| ---- | ---- | ---- | ---- | ----- | ----- | ---- | +| tzgg | rdxw | xkky | jxdt | be | me | xsgz | diff --git a/lib/router.js b/lib/router.js index 38e080af779eb8..14036b2bff2e7e 100644 --- a/lib/router.js +++ b/lib/router.js @@ -519,10 +519,10 @@ router.get('/seu/cse/:type?', lazyloadRouteHandler('./routes/universities/seu/cs // 南京工业大学 router.get('/njtech/jwc', lazyloadRouteHandler('./routes/universities/njtech/jwc')); -// 南京航空航天大学 -router.get('/nuaa/jwc/:type?', lazyloadRouteHandler('./routes/universities/nuaa/jwc/jwc')); -router.get('/nuaa/cs/:type?', lazyloadRouteHandler('./routes/universities/nuaa/cs/index')); -router.get('/nuaa/yjsy/:type?', lazyloadRouteHandler('./routes/universities/nuaa/yjsy/yjsy')); +// 南京航空航天大学 migrated to v2 +// router.get('/nuaa/jwc/:type/:getDescription?', lazyloadRouteHandler('./routes/universities/nuaa/jwc/jwc')); +// router.get('/nuaa/cs/:type/:getDescription?', lazyloadRouteHandler('./routes/universities/nuaa/cs/index')); +// router.get('/nuaa/yjsy/:type?', lazyloadRouteHandler('./routes/universities/nuaa/yjsy/yjsy')); // 河海大学 router.get('/hhu/libNews', lazyloadRouteHandler('./routes/universities/hhu/libNews')); diff --git a/lib/routes/universities/nuaa/cs/index.js b/lib/routes/universities/nuaa/cs/index.js deleted file mode 100644 index a56203a8b29517..00000000000000 --- a/lib/routes/universities/nuaa/cs/index.js +++ /dev/null @@ -1,84 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const url = require('url'); -const getCookie = require('../utils/pypasswaf'); -const host = 'http://cs.nuaa.edu.cn/'; - -const map = new Map([ - ['tzgg', { title: '南京航空航天大学计算机科学与技术学院 -- 通知公告', suffix: '1995/list.htm' }], - ['xwdt', { title: '南京航空航天大学计算机科学与技术学院 -- 新闻动态', suffix: '1997/list.htm' }], - ['kydt', { title: '南京航空航天大学计算机科学与技术学院 -- 科研动态', suffix: '1975/list.htm' }], - ['jxdt', { title: '南京航空航天大学计算机科学与技术学院 -- 教学动态', suffix: '1977/list.htm' }], - ['xsgz', { title: '南京航空航天大学计算机科学与技术学院 -- 学生工作', suffix: '1959/list.htm' }], - ['zsxx', { title: '南京航空航天大学计算机科学与技术学院 -- 招生信息', suffix: '1993/list.htm' }], - ['jyxx', { title: '南京航空航天大学计算机科学与技术学院 -- 就业信息', suffix: '1994/list.htm' }], -]); - -module.exports = async (ctx) => { - const type = ctx.params.type || 'tzgg'; - const suffix = map.get(type).suffix; - - const link = url.resolve(host, suffix); - const cookie = await getCookie(); - const gotConfig = { - headers: { - cookie, - }, - }; - const response = await got.get(link, gotConfig); - const $ = cheerio.load(response.data); - - const list = $('#news_list > ul > li') - .slice(0, 10) - .map(function () { - const info = { - title: $(this).find('a').attr('title'), - link: $(this).find('a').attr('href'), - date: $(this).find('span').text(), - }; - return info; - }) - .get(); - - const out = await Promise.all( - list.map(async (info) => { - const title = info.title || 'tzgg'; - const date = info.date; - const itemUrl = url.resolve(host, info.link); - - const cache = await ctx.cache.get(itemUrl); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - - const arr = itemUrl.split('.'); - const pageType = arr[arr.length - 1]; - - let description = itemUrl; - if (pageType === 'htm' || pageType === 'html') { - const response = await got.get(itemUrl, gotConfig); - const $ = cheerio.load(response.data); - description = $('.wp_articlecontent') - .html() - .replace(/src="\//g, `src="${url.resolve(host, '.')}`) - .trim(); - } - - const single = { - title, - link: itemUrl, - description, - pubDate: new Date(date).toUTCString(), - }; - ctx.cache.set(itemUrl, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - - ctx.state.data = { - title: map.get(type).title, - link, - description: '南京航空航天大学计算机科学与技术学院RSS', - item: out, - }; -}; diff --git a/lib/routes/universities/nuaa/jwc/jwc.js b/lib/routes/universities/nuaa/jwc/jwc.js deleted file mode 100644 index 766f63bde09ffe..00000000000000 --- a/lib/routes/universities/nuaa/jwc/jwc.js +++ /dev/null @@ -1,105 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const url = require('url'); -const host = 'http://aao.nuaa.edu.cn/'; -// https is not allowed in nuaa;interesting ha - -const map = { - default: 8230, - jxfw: 8230, // 教学服务 - xspy: 8231, // 学生培养 - jxjs: 8232, // 教学建设 - jxzy: 8233, // 教学资源 -}; - -async function load(link, cookie, ctx) { - const cache = await ctx.cache.get(link); - if (cache) { - return JSON.parse(cache); - } - const response = await got.get(link, { - headers: { - cookie, - }, - }); - const $ = cheerio.load(response.data); - const pubDate = new Date( - $('.release-time') - .text() - .slice(-10) - .match(/\d{4}-\d{2}-\d{2}/) - ).toUTCString(); - const images = $('img'); - for (let k = 0; k < images.length; k++) { - $(images[k]).replaceWith(``); - } - const description = $('.wp_articlecontent').html(); - const result = { pubDate, description }; - ctx.cache.set(link, JSON.stringify(result)); - - return result; -} - -module.exports = async (ctx) => { - const browser = await require('@/utils/puppeteer')(); - const type = ctx.params.type || 'default'; - const listUrl = `${host}${map[type]}/list.htm`; - - const page = await browser.newPage(); - await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'); - await page.evaluateOnNewDocument(() => { - // eslint-disable-next-line - Object.defineProperty(navigator, 'webdriver', { get: () => false }); - }); - // 南航的新waf现在采用cookie验证的方法防止爬虫,并且会对chrome headless进行检测 - - await page.goto(listUrl, { - waitUntil: 'networkidle0', - }); - - let msg; - try { - // data = await page.$('ul.right-ul') - msg = await page.$$eval('ul.right-ul > li', (es) => - es.map((e) => { - const html = e.innerHTML; - - let [, title] = html.match(/(.+)<\/a>/); - if (title.match(/font/)) { - [, title] = title.match(/>(.+)(.+?) `${name}=${value}`).join('; '); - browser.close(); - msg = await Promise.all( - msg.map(async (e) => { - const link = url.resolve(host, e.link); - return { - ...e, - link, - guid: link, - ...(await load(link, cookie, ctx)), - }; - }) - ); - - ctx.state.data = { - title: '南航教务', - link: host, - description: '南航教务RSS', - item: msg, - }; -}; diff --git a/lib/v2/nuaa/cs/index.js b/lib/v2/nuaa/cs/index.js new file mode 100644 index 00000000000000..bc3cb3571dc28e --- /dev/null +++ b/lib/v2/nuaa/cs/index.js @@ -0,0 +1,87 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const getCookie = require('../utils/pypasswaf'); +const host = 'http://cs.nuaa.edu.cn/'; + +const map = new Map([ + ['tzgg', { title: '南京航空航天大学计算机科学与技术学院 -- 通知公告', suffix: 'tzgg/list.htm' }], + ['rdxw', { title: '南京航空航天大学计算机科学与技术学院 -- 热点新闻', suffix: '10846/list.htm' }], + ['xkky', { title: '南京航空航天大学计算机科学与技术学院 -- 学科科研', suffix: '10849/list.htm' }], + ['be', { title: '南京航空航天大学计算机科学与技术学院 -- 本科生培养', suffix: '10850/list.htm' }], + ['me', { title: '南京航空航天大学计算机科学与技术学院 -- 研究生培养', suffix: '10851/list.htm' }], + ['jxdt', { title: '南京航空航天大学计算机科学与技术学院 -- 教学动态', suffix: '1977/list.htm' }], + ['xsgz', { title: '南京航空航天大学计算机科学与技术学院 -- 学生工作', suffix: '1959/list.htm' }], +]); + +module.exports = async (ctx) => { + const type = ctx.params.type; + const getDescription = Boolean(ctx.params.getDescription) || false; + const suffix = map.get(type).suffix; + + const link = new URL(suffix, host).href; + const cookie = await getCookie(); + const gotConfig = { + headers: { + cookie, + }, + }; + const response = await got(link, gotConfig); + const $ = cheerio.load(response.data); + + const list = $('#news_list ul li') + .slice(0, Math.min(parseInt($('.per_count', '#wp_paging_w6').text()), parseInt($('.all_count', '#wp_paging_w6').slice(1).text()))) + .map(function () { + const info = { + title: $(this).find('a').attr('title'), + link: $(this).find('a').attr('href'), + date: $(this).find('span').text(), + }; + return info; + }) + .get(); + + const out = await Promise.all( + list.map((info) => { + const title = info.title || 'tzgg'; + const date = info.date; + const itemUrl = new URL(info.link, host).href; + + return ctx.cache.tryGet(itemUrl, async () => { + const arr = itemUrl.split('.'); + const pageType = arr[arr.length - 1]; + + // 南航新 WAF 过于敏感 + // 目前 description 需要遍历页面,会被 WAF 拦截导致无法输出 + // 考虑换一种获取 description 的方式或者将标题当作 title。 + let description = title; + if (getDescription) { + description = itemUrl; + if (pageType === 'htm' || pageType === 'html') { + const response = await got.get(itemUrl, gotConfig); + const $ = cheerio.load(response.data); + description = $('.wp_articlecontent') + .html() + .replace(/src="\//g, `src="${new URL('.', host).href}`) + .trim(); + } + } + + const single = { + title, + link: itemUrl, + description, + pubDate: parseDate(date), + }; + return single; + }); + }) + ); + + ctx.state.data = { + title: map.get(type).title, + link, + description: '南京航空航天大学计算机科学与技术学院RSS', + item: out, + }; +}; diff --git a/lib/v2/nuaa/jwc/jwc.js b/lib/v2/nuaa/jwc/jwc.js new file mode 100644 index 00000000000000..8f199edb09bd72 --- /dev/null +++ b/lib/v2/nuaa/jwc/jwc.js @@ -0,0 +1,82 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const getCookie = require('../utils/pypasswaf'); +const host = 'http://aao.nuaa.edu.cn/'; + +const map = new Map([ + ['tzgg', { title: '南京航空航天大学教务处 -- 通知公告', suffix: '8222/list.htm' }], + ['jxfw', { title: '南京航空航天大学教务处 -- 教学服务', suffix: '8230/list.htm' }], + ['xspy', { title: '南京航空航天大学教务处 -- 学生培养', suffix: '8231/list.htm' }], + ['jxjs', { title: '南京航空航天大学教务处 -- 教学建设', suffix: '8232/list.htm' }], + ['jxzy', { title: '南京航空航天大学教务处 -- 教学资源', suffix: '8233/list.htm' }], +]); + +module.exports = async (ctx) => { + const type = ctx.params.type; + const suffix = map.get(type).suffix; + const getDescription = Boolean(ctx.params.getDescription) || false; + + const link = new URL(suffix, host).href; + const cookie = await getCookie(); + const gotConfig = { + headers: { + cookie, + }, + }; + const response = await got(link, gotConfig); + const $ = cheerio.load(response.data); + + const list = $('#wp_news_w8 ul li') + .slice(0, 10) + .map(function () { + const info = { + title: $(this).find('a').text(), + link: $(this).find('a').attr('href'), + date: $(this).find('span').text(), + }; + return info; + }) + .get(); + + const out = await Promise.all( + list.map((info) => { + const title = info.title || 'tzgg'; + const date = info.date; + const itemUrl = new URL(info.link, host).href; + + return ctx.cache.tryGet(itemUrl, async () => { + const arr = itemUrl.split('.'); + const pageType = arr[arr.length - 1]; + + // 南航新 WAF 过于敏感 + // 目前 description 需要遍历页面,会被 WAF 拦截导致无法输出 + // 考虑换一种获取 description 的方式或者将标题当作 title。 + let description = title; + if (getDescription) { + description = itemUrl; + if (pageType === 'htm' || pageType === 'html') { + const response = await got(itemUrl, gotConfig); + const $ = cheerio.load(response.data); + description = $('.wp_articlecontent').html(); + } + } + + const single = { + title, + link: itemUrl, + description, + pubDate: parseDate(date), + }; + return single; + }); + }) + ); + + ctx.state.data = { + title: map.get(type).title, + link, + description: '南京航空航天大学教务处RSS', + item: out, + }; +}; diff --git a/lib/v2/nuaa/maintainer.js b/lib/v2/nuaa/maintainer.js new file mode 100644 index 00000000000000..1861cbdab90dc1 --- /dev/null +++ b/lib/v2/nuaa/maintainer.js @@ -0,0 +1,5 @@ +module.exports = { + '/cs/:type/:getDescription?': ['LogicJake', 'Seiry', 'qrzbing'], + '/jwc/:type/:getDescription?': ['arcosx', 'Seiry', 'qrzbing'], + '/yjsy/:type?': ['junfengP', 'Seiry'], +}; diff --git a/lib/v2/nuaa/radar.js b/lib/v2/nuaa/radar.js new file mode 100644 index 00000000000000..df521f4734a37a --- /dev/null +++ b/lib/v2/nuaa/radar.js @@ -0,0 +1,23 @@ +module.exports = { + 'nuaa.edu.cn': { + _name: '南京航空航天大学', + aao: [ + { + title: '教务处', + docs: 'https://docs.rsshub.app/university.html#nan-jing-hang-kong-hang-tian-da-xue', + }, + ], + cs: [ + { + title: '计算机科学与技术学院', + docs: 'https://docs.rsshub.app/university.html#nan-jing-hang-kong-hang-tian-da-xue', + }, + ], + 'www.graduate': [ + { + title: '研究生院', + docs: 'https://docs.rsshub.app/university.html#nan-jing-hang-kong-hang-tian-da-xue', + }, + ], + }, +}; diff --git a/lib/v2/nuaa/router.js b/lib/v2/nuaa/router.js new file mode 100644 index 00000000000000..c906a0548a4541 --- /dev/null +++ b/lib/v2/nuaa/router.js @@ -0,0 +1,5 @@ +module.exports = (router) => { + router.get('/cs/:type/:getDescription?', require('./cs/index')); + router.get('/jwc/:type/:getDescription?', require('./jwc/jwc')); + router.get('/yjsy/:type?', require('./yjsy/yjsy')); +}; diff --git a/lib/routes/universities/nuaa/utils/pypasswaf.js b/lib/v2/nuaa/utils/pypasswaf.js similarity index 94% rename from lib/routes/universities/nuaa/utils/pypasswaf.js rename to lib/v2/nuaa/utils/pypasswaf.js index 1759260c6980a6..fe2ce4ca46d702 100644 --- a/lib/routes/universities/nuaa/utils/pypasswaf.js +++ b/lib/v2/nuaa/utils/pypasswaf.js @@ -2,7 +2,7 @@ const host = 'http://aao.nuaa.edu.cn/'; /** * async function 获取cookie - * @desc 返回一个可用的cookie,使用 `got` 发起请求的时候,传入到`options.header.cookie`即可 + * @desc 返回一个可用的cookie,使用 `got` 发起请求的时候,传入到`options.headers.cookie`即可 */ module.exports = async function getCookie() { const browser = await require('@/utils/puppeteer')(); diff --git a/lib/routes/universities/nuaa/yjsy/yjsy.js b/lib/v2/nuaa/yjsy/yjsy.js similarity index 54% rename from lib/routes/universities/nuaa/yjsy/yjsy.js rename to lib/v2/nuaa/yjsy/yjsy.js index 97e8c2bc14ab27..6f21060fcf0ed1 100644 --- a/lib/routes/universities/nuaa/yjsy/yjsy.js +++ b/lib/v2/nuaa/yjsy/yjsy.js @@ -1,8 +1,7 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); -const url = require('url'); const host = 'http://www.graduate.nuaa.edu.cn/'; -const formatPubDate = require('@/utils/date.js'); +const { parseDate } = require('@/utils/parse-date'); const map = { latest: '2146/list.htm', @@ -12,24 +11,20 @@ const map = { xxfw: '2147/list.htm', }; -async function load(link, ctx) { - const cache = await ctx.cache.get(link); - if (cache) { - return cache; - } - const response = await got.get(host + link); - const $ = cheerio.load(response.data); - const images = $('img'); - for (let k = 0; k < images.length; k++) { - $(images[k]).replaceWith(``); - } - const description = $('.wp_articlecontent').html(); - ctx.cache.set(link, description); - return { description }; -} +const load = (link, ctx) => + ctx.cache.tryGet(link, async () => { + const response = await got(host + link); + const $ = cheerio.load(response.data); + const images = $('img'); + for (let k = 0; k < images.length; k++) { + $(images[k]).replaceWith(``); + } + const description = $('.wp_articlecontent').html(); + return { description }; + }); module.exports = async (ctx) => { - const type = ctx.params.type || 'latest'; + const type = ctx.params.type ?? 'latest'; const response = await got({ method: 'get', url: host + map[type], @@ -43,12 +38,11 @@ module.exports = async (ctx) => { const itemUrl = a.attr('href'); const single = { title: a.text(), - link: url.resolve(host, itemUrl), - guid: url.resolve(host, itemUrl), - pubDate: formatPubDate(t.text()), + link: new URL(itemUrl, host).href, + pubDate: parseDate(t.text(), 'YYYY-MM-DD'), }; const other = await load(itemUrl, ctx); - return Promise.resolve(Object.assign({}, single, other)); + return { ...single, ...other }; }) );