diff --git a/src/parser.mjs b/src/parser.mjs index 3934a49..d40833d 100644 --- a/src/parser.mjs +++ b/src/parser.mjs @@ -1,5 +1,6 @@ import { env } from "process"; import path from "path"; +import normalizeUrl from "normalize-url"; import DOMPurify from "isomorphic-dompurify"; import ogs from "open-graph-scraper-lite"; @@ -12,7 +13,7 @@ import Anthropic from "@anthropic-ai/sdk"; import { fetchCache as fetchCacheFactory } from "./utils.mjs"; -import cache from "./cache.mjs"; +import cache, { lifetimeCache } from "./cache.mjs"; import log from "./logger.mjs"; const fetchCache = new FileSystemCache({ @@ -70,6 +71,16 @@ const twitterFrontends = [ ]; const CLAUDE_DOMAINS = ["warpcast.com", "fxtwitter.com", ...twitterFrontends]; +const TITLE_COMPLIANCE = ` +Format this title according to these rules: + 1. Use sentence case (capitalize first word only) + 2. Remove any emojis + 3. Maximum 80 characters + 4. No trailing period + 5. Keep any existing dash (-) or colon (:) formatting + 6. Format dates as YYYY-MM-DD +`; + const GUIDELINES = `We have an opportunity to build our own corner of the onchain internet. With awesome people, links, resources, and learning. Our content focuses on: @@ -152,6 +163,63 @@ async function generateClaudeTitle(content) { } } +async function fixTitle(title) { + const prompt = `Here are our submission guidelines:\n\n${TITLE_COMPLIANCE}\n\nModify the following title minimally so that it fully complies with these guidelines. Keep all information in the title. Only modify syntactically. Return only a JSON object with a "title" property containing the modified title.\nTitle: "${title}"`; + let response; + try { + response = await anthropic.messages.create({ + model: "claude-3-5-haiku-20241022", + max_tokens: 100, + temperature: 0, + tools: [ + { + name: "generate_title", + description: + "Generate a title following the provided guidelines for our Web3/crypto hacker news platform.", + input_schema: { + type: "object", + properties: { + title: { + type: "string", + description: + "The generated title that follows all provided guidelines", + }, + }, + required: ["title"], + }, + }, + ], + tool_choice: { type: "tool", name: "generate_title" }, + messages: [ + { + role: "user", + content: prompt, + }, + ], + }); + } catch (error) { + console.error("fixTitle API request failed:", error); + return null; + } + try { + let toolUse = response.content.find((c) => c.type === "tool_use"); + if (toolUse && toolUse.input && toolUse.input.title) { + return toolUse.input.title; + } else if (response.completion && response.completion.trim().length > 0) { + console.warn( + "No tool_use block found, falling back to response.completion", + ); + return response.completion.trim(); + } else { + console.error("No title found in fixTitle response"); + return null; + } + } catch (error) { + console.error("Error extracting title in fixTitle:", error); + return null; + } +} + async function extractWarpcastContent(url) { try { const apiUrl = `https://api.neynar.com/v2/farcaster/cast?identifier=${url}&type=url`; @@ -233,7 +301,11 @@ const getYTId = (url) => { } }; -export const metadata = async (url, generateTitle = false) => { +export const metadata = async ( + url, + generateTitle = false, + submittedTitle = undefined, +) => { let urlObj; try { urlObj = new URL(url); @@ -374,6 +446,23 @@ export const metadata = async (url, generateTitle = false) => { const pagespeed = await getPageSpeedScore(url); output.pagespeed = pagespeed; + if (submittedTitle) { + const normalized = normalizeUrl(url, { stripWWW: false }); + const cacheKey = `compliantTitle-${normalized}`; + if (lifetimeCache.has(cacheKey)) { + output.compliantTitle = lifetimeCache.get(cacheKey); + } else { + fixTitle(submittedTitle) + .then((compliant) => { + if (compliant) { + console.log(compliant); + lifetimeCache.set(cacheKey, compliant); + } + }) + .catch((err) => log(`fixTitle background error: ${err}`)); + } + } + return output; }; diff --git a/src/views/components/row.mjs b/src/views/components/row.mjs index 5b2ed7c..afdc0c7 100644 --- a/src/views/components/row.mjs +++ b/src/views/components/row.mjs @@ -460,13 +460,13 @@ const row = ( ? html`${truncateLongWords( - DOMPurify.sanitize(story.title), + DOMPurify.sanitize((story.metadata && story.metadata.compliantTitle) ? story.metadata.compliantTitle : story.title), )}` : html`${pinned ? html`${pin} ` : ""}${truncateLongWords( - DOMPurify.sanitize(story.title), + DOMPurify.sanitize((story.metadata && story.metadata.compliantTitle) ? story.metadata.compliantTitle : story.title), )}`} diff --git a/src/views/feed.mjs b/src/views/feed.mjs index 5ec5a40..f3b65dd 100644 --- a/src/views/feed.mjs +++ b/src/views/feed.mjs @@ -583,7 +583,9 @@ async function addMetadata(post) { } const metadataTTLSeconds = 60 * 60; // 1 hour - metadata(post.href) + const generateTitle = false; + const submittedTitle = post.title; + metadata(post.href, generateTitle, submittedTitle) .then((result) => { if (result && result.image) { cache.set(metadataCacheKey, result, [metadataTTLSeconds]); diff --git a/src/views/story.mjs b/src/views/story.mjs index 4313a7c..294cd5b 100644 --- a/src/views/story.mjs +++ b/src/views/story.mjs @@ -197,9 +197,10 @@ export default async function (trie, theme, index, value, referral) { } const path = "/stories"; + const generateTitle = false; let data; try { - data = await metadata(value.href); + data = await metadata(value.href, generateTitle, value.title); } catch (err) {} const policy = await moderation.getLists();