diff --git a/lib/routes/chinaratings/credit-research.ts b/lib/routes/chinaratings/credit-research.ts index 899da63170465e..7aa14528b74a4f 100644 --- a/lib/routes/chinaratings/credit-research.ts +++ b/lib/routes/chinaratings/credit-research.ts @@ -1,11 +1,12 @@ -import { type CheerioAPI, type Cheerio, type Element, load } from 'cheerio'; -import { type Context } from 'hono'; - -import { type DataItem, type Route, type Data, ViewType } from '@/types'; +import { type Data, type DataItem, type Route, ViewType } from '@/types'; +import cache from '@/utils/cache'; import ofetch from '@/utils/ofetch'; import { parseDate } from '@/utils/parse-date'; +import { type CheerioAPI, type Cheerio, type Element, load } from 'cheerio'; +import { type Context } from 'hono'; + export const handler = async (ctx: Context): Promise => { const { category = 'Industry/Comment' } = ctx.req.param(); const limit: number = Number.parseInt(ctx.req.query('limit') ?? '15', 10); @@ -15,9 +16,11 @@ export const handler = async (ctx: Context): Promise => { const response = await ofetch(targetUrl); const $: CheerioAPI = load(response); - const language: string = $('html').attr('lang') ?? 'zh-CN'; + const language = 'zh-CN'; - const items: DataItem[] = $('div.contRight ul.list li') + let items: DataItem[] = []; + + items = $('div.contRight ul.list li') .slice(0, limit) .toArray() .map((el): Element => { @@ -38,8 +41,60 @@ export const handler = async (ctx: Context): Promise => { }; return processedItem; - }) - .filter((_): _ is DataItem => true); + }); + + items = ( + await Promise.all( + items.map((item) => { + if (!item.link) { + return item; + } + + return cache.tryGet(item.link, async (): Promise => { + const detailResponse = await ofetch(item.link); + const $$: CheerioAPI = load(detailResponse); + + const title: string = $$('div.newshead h2, div.title h3').text(); + const description: string = $$('div.news div.content').html() ?? ''; + + const metaStr: string = $$('div.newshead p span, div.title p span').text(); + const pubDateStr: string | undefined = metaStr?.match(/(\d{4}-\d{2}-\d{2})/)?.[1]; + const authors: DataItem['author'] = metaStr?.match(/来源:(.*?)/)?.[1]; + const upDatedStr: string | undefined = pubDateStr; + + let processedItem: DataItem = { + title, + description, + pubDate: pubDateStr ? parseDate(pubDateStr) : item.pubDate, + author: authors, + content: { + html: description, + text: description, + }, + updated: upDatedStr ? parseDate(upDatedStr) : item.updated, + language, + }; + + const docUrl: string | undefined = detailResponse.match(/(\/upload\/docs\/\d{4}-\d{2}-\d{2}\/doc_\d+)"/)?.[1]; + const enclosureUrl: string | undefined = docUrl ? `${new URL(docUrl, baseUrl).href}.pdf` : undefined; + + if (enclosureUrl) { + processedItem = { + ...processedItem, + enclosure_url: enclosureUrl, + enclosure_type: 'application/pdf', + enclosure_title: title, + }; + } + + return { + ...item, + ...processedItem, + }; + }); + }) + ) + ).filter((_): _ is DataItem => true); const title: string = $('title').text();