Skip to content

Commit

Permalink
feat: Add title compliance and fixing functionality in metadata proce…
Browse files Browse the repository at this point in the history
…ssing
  • Loading branch information
TimDaub committed Feb 13, 2025
1 parent a194b83 commit 440eb10
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 6 deletions.
93 changes: 91 additions & 2 deletions src/parser.mjs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { env } from "process";
import path from "path";
import normalizeUrl from "normalize-url";

import DOMPurify from "isomorphic-dompurify";
import ogs from "open-graph-scraper-lite";
Expand All @@ -12,7 +13,7 @@ import Anthropic from "@anthropic-ai/sdk";

import { fetchCache as fetchCacheFactory } from "./utils.mjs";

import cache from "./cache.mjs";
import cache, { lifetimeCache } from "./cache.mjs";
import log from "./logger.mjs";

const fetchCache = new FileSystemCache({
Expand Down Expand Up @@ -70,6 +71,16 @@ const twitterFrontends = [
];
const CLAUDE_DOMAINS = ["warpcast.com", "fxtwitter.com", ...twitterFrontends];

const TITLE_COMPLIANCE = `
Format this title according to these rules:
1. Use sentence case (capitalize first word only)
2. Remove any emojis
3. Maximum 80 characters
4. No trailing period
5. Keep any existing dash (-) or colon (:) formatting
6. Format dates as YYYY-MM-DD
`;

const GUIDELINES = `We have an opportunity to build our own corner of the onchain internet. With awesome people, links, resources, and learning.
Our content focuses on:
Expand Down Expand Up @@ -152,6 +163,63 @@ async function generateClaudeTitle(content) {
}
}

async function fixTitle(title) {
const prompt = `Here are our submission guidelines:\n\n${TITLE_COMPLIANCE}\n\nModify the following title minimally so that it fully complies with these guidelines. Keep all information in the title. Only modify syntactically. Return only a JSON object with a "title" property containing the modified title.\nTitle: "${title}"`;
let response;
try {
response = await anthropic.messages.create({
model: "claude-3-5-haiku-20241022",
max_tokens: 100,
temperature: 0,
tools: [
{
name: "generate_title",
description:
"Generate a title following the provided guidelines for our Web3/crypto hacker news platform.",
input_schema: {
type: "object",
properties: {
title: {
type: "string",
description:
"The generated title that follows all provided guidelines",
},
},
required: ["title"],
},
},
],
tool_choice: { type: "tool", name: "generate_title" },
messages: [
{
role: "user",
content: prompt,
},
],
});
} catch (error) {
console.error("fixTitle API request failed:", error);
return null;
}
try {
let toolUse = response.content.find((c) => c.type === "tool_use");
if (toolUse && toolUse.input && toolUse.input.title) {
return toolUse.input.title;
} else if (response.completion && response.completion.trim().length > 0) {
console.warn(
"No tool_use block found, falling back to response.completion",
);
return response.completion.trim();
} else {
console.error("No title found in fixTitle response");
return null;
}
} catch (error) {
console.error("Error extracting title in fixTitle:", error);
return null;
}
}

async function extractWarpcastContent(url) {
try {
const apiUrl = `https://api.neynar.com/v2/farcaster/cast?identifier=${url}&type=url`;
Expand Down Expand Up @@ -233,7 +301,11 @@ const getYTId = (url) => {
}
};

export const metadata = async (url, generateTitle = false) => {
export const metadata = async (
url,
generateTitle = false,
submittedTitle = undefined,
) => {
let urlObj;
try {
urlObj = new URL(url);
Expand Down Expand Up @@ -374,6 +446,23 @@ export const metadata = async (url, generateTitle = false) => {
const pagespeed = await getPageSpeedScore(url);
output.pagespeed = pagespeed;

if (submittedTitle) {
const normalized = normalizeUrl(url, { stripWWW: false });
const cacheKey = `compliantTitle-${normalized}`;
if (lifetimeCache.has(cacheKey)) {
output.compliantTitle = lifetimeCache.get(cacheKey);
} else {
fixTitle(submittedTitle)
.then((compliant) => {
if (compliant) {
console.log(compliant);
lifetimeCache.set(cacheKey, compliant);
}
})
.catch((err) => log(`fixTitle background error: ${err}`));
}
}

return output;
};

Expand Down
4 changes: 2 additions & 2 deletions src/views/components/row.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -460,13 +460,13 @@ const row = (
? html`<mark
style="background-color: rgba(255,255,0, 0.05); padding: 0px 2px;"
>${truncateLongWords(
DOMPurify.sanitize(story.title),
DOMPurify.sanitize((story.metadata && story.metadata.compliantTitle) ? story.metadata.compliantTitle : story.title),
)}</mark
>`
: html`${pinned
? html`${pin} `
: ""}${truncateLongWords(
DOMPurify.sanitize(story.title),
DOMPurify.sanitize((story.metadata && story.metadata.compliantTitle) ? story.metadata.compliantTitle : story.title),
)}`}
</a>
<span> </span>
Expand Down
4 changes: 3 additions & 1 deletion src/views/feed.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,9 @@ async function addMetadata(post) {
}

const metadataTTLSeconds = 60 * 60; // 1 hour
metadata(post.href)
const generateTitle = false;
const submittedTitle = post.title;
metadata(post.href, generateTitle, submittedTitle)
.then((result) => {
if (result && result.image) {
cache.set(metadataCacheKey, result, [metadataTTLSeconds]);
Expand Down
3 changes: 2 additions & 1 deletion src/views/story.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,10 @@ export default async function (trie, theme, index, value, referral) {
}
const path = "/stories";

const generateTitle = false;
let data;
try {
data = await metadata(value.href);
data = await metadata(value.href, generateTitle, value.title);
} catch (err) {}

const policy = await moderation.getLists();
Expand Down

0 comments on commit 440eb10

Please sign in to comment.