From 10828e595210fd698baf99915281bb52198231d8 Mon Sep 17 00:00:00 2001 From: TerribleDev <1020010-TerribleDev@users.noreply.replit.com> Date: Tue, 18 Feb 2025 16:30:13 +0000 Subject: [PATCH] Agent query: I've added the hasDetails tracking feature. Could you check if the newsletters are being displayed correctly and if the content is being fetched properly? Enhance newsletter scraping: Add retry mechanism for missing details and update database accordingly. Screenshot: https://storage.googleapis.com/screenshot-production-us-central1/9dda30b6-4149-4bce-89dc-76333005952c/d96ca1cd-ef9a-4ef9-ae09-bfd9504e8440.jpg --- replit.nix | 5 +++++ server/routes.ts | 49 +++++++++++++++++++++++++++++++---------------- server/storage.ts | 47 +++++++++++++++++++++++++++------------------ server/utils.ts | 48 ++++++++++++++++++++++++++++++++++++---------- shared/schema.ts | 4 +++- 5 files changed, 107 insertions(+), 46 deletions(-) create mode 100644 replit.nix diff --git a/replit.nix b/replit.nix new file mode 100644 index 0000000..404c2dd --- /dev/null +++ b/replit.nix @@ -0,0 +1,5 @@ +{pkgs}: { + deps = [ + pkgs.postgresql + ]; +} diff --git a/server/routes.ts b/server/routes.ts index 4f50912..4794278 100644 --- a/server/routes.ts +++ b/server/routes.ts @@ -1,7 +1,7 @@ import type { Express } from "express"; import { createServer, type Server } from "http"; import { storage } from "./storage"; -import { scrapeNewsletters } from "./utils"; +import { scrapeNewsletters, retryMissingDetails } from "./utils"; import { Feed } from "feed"; import webpush from "web-push"; import schedule from "node-schedule"; @@ -27,6 +27,7 @@ export async function registerRoutes(app: Express): Promise { const existingNewsletters = await storage.getNewsletters(); const scrapedNewsletters = await scrapeNewsletters(); + // Import new newsletters const newNewsletters = scrapedNewsletters.filter(scraped => !existingNewsletters.some(existing => existing.url === scraped.url @@ -37,7 +38,7 @@ export async function registerRoutes(app: Express): Promise { await storage.importNewsletters(newNewsletters); console.log(`Found ${newNewsletters.length} new newsletters, sending notifications...`); - // Send push notifications + // Send push notifications for new newsletters const subscriptions = await storage.getActiveSubscriptions(); console.log(`Sending notifications to ${subscriptions.length} subscribers`); @@ -47,20 +48,6 @@ export async function registerRoutes(app: Express): Promise { icon: '/icon.png' }); - - app.post("/api/subscriptions/:id/settings", async (req, res) => { - try { - const subscriptionId = parseInt(req.params.id); - await storage.saveNotificationSettings(subscriptionId, { - newsletter_notifications: req.body.newsletter_notifications - }); - res.json({ message: "Notification settings updated successfully" }); - } catch (error) { - console.error('Error updating notification settings:', error); - res.status(500).json({ message: "Failed to update notification settings" }); - } - }); - const results = await Promise.allSettled( subscriptions.map(subscription => webpush.sendNotification({ @@ -77,6 +64,23 @@ export async function registerRoutes(app: Express): Promise { const failed = results.filter(r => r.status === 'rejected').length; console.log(`Push notifications sent: ${succeeded} succeeded, ${failed} failed`); } + + // Retry fetching details for newsletters without them + const newslettersWithoutDetails = await storage.getNewslettersWithoutDetails(); + const updatedNewsletters = await retryMissingDetails(newslettersWithoutDetails); + + for (const newsletter of updatedNewsletters) { + if (newsletter.id) { + await storage.updateNewsletterDetails(newsletter.id, { + thumbnail: newsletter.thumbnail, + content: newsletter.content, + description: newsletter.description, + hasDetails: newsletter.hasDetails, + }); + console.log(`Updated details for newsletter: ${newsletter.title}`); + } + } + } catch (error) { console.error('Background job failed:', error); } @@ -151,6 +155,19 @@ export async function registerRoutes(app: Express): Promise { } }); + app.post("/api/subscriptions/:id/settings", async (req, res) => { + try { + const subscriptionId = parseInt(req.params.id); + await storage.saveNotificationSettings(subscriptionId, { + newsletter_notifications: req.body.newsletter_notifications + }); + res.json({ message: "Notification settings updated successfully" }); + } catch (error) { + console.error('Error updating notification settings:', error); + res.status(500).json({ message: "Failed to update notification settings" }); + } + }); + app.get("/api/rss", async (_req, res) => { try { const newsletters = await storage.getNewsletters(); diff --git a/server/storage.ts b/server/storage.ts index a0f6373..8162d9d 100644 --- a/server/storage.ts +++ b/server/storage.ts @@ -1,14 +1,16 @@ -import { type Newsletter, type InsertNewsletter, type Subscription, type InsertSubscription } from "@shared/schema"; +import { type Newsletter, type InsertNewsletter, type Subscription, type InsertSubscription, newsletters, subscriptions, notificationSettings } from "@shared/schema"; import { db } from "./db"; -import { newsletters, subscriptions } from "@shared/schema"; -import { desc, ilike, or } from "drizzle-orm"; +import { desc, ilike, or, eq } from "drizzle-orm"; export interface IStorage { getNewsletters(): Promise; + getNewslettersWithoutDetails(): Promise; searchNewsletters(query: string): Promise; importNewsletters(newsletters: InsertNewsletter[]): Promise; + updateNewsletterDetails(id: number, updates: Partial): Promise; addSubscription(subscription: InsertSubscription): Promise; getSubscriptions(): Promise; + getActiveSubscriptions(): Promise; } export class DatabaseStorage implements IStorage { @@ -16,6 +18,14 @@ export class DatabaseStorage implements IStorage { return await db.select().from(newsletters).orderBy(desc(newsletters.date)); } + async getNewslettersWithoutDetails(): Promise { + return await db + .select() + .from(newsletters) + .where(eq(newsletters.hasDetails, false)) + .orderBy(desc(newsletters.date)); + } + async searchNewsletters(query: string): Promise { const lowercaseQuery = query.toLowerCase(); return await db @@ -32,7 +42,6 @@ export class DatabaseStorage implements IStorage { } async importNewsletters(newNewsletters: InsertNewsletter[]): Promise { - // Insert in batches to avoid overwhelming the database const batchSize = 50; for (let i = 0; i < newNewsletters.length; i += batchSize) { const batch = newNewsletters.slice(i, i + batchSize); @@ -40,6 +49,16 @@ export class DatabaseStorage implements IStorage { } } + async updateNewsletterDetails(id: number, updates: Partial): Promise { + await db + .update(newsletters) + .set({ + ...updates, + last_checked: new Date(), + }) + .where(eq(newsletters.id, id)); + } + async addSubscription(subscription: InsertSubscription): Promise { await db.insert(subscriptions).values(subscription); } @@ -55,23 +74,13 @@ export class DatabaseStorage implements IStorage { settings: notificationSettings }) .from(subscriptions) - .leftJoin(notificationSettings, eq(subscriptions.id, notificationSettings.subscription_id)) + .leftJoin( + notificationSettings, + eq(subscriptions.id, notificationSettings.subscription_id) + ) .where(eq(notificationSettings.newsletter_notifications, true)); - - return result.map(r => r.subscription); - } - async saveNotificationSettings(subscriptionId: number, settings: Partial): Promise { - await db - .insert(notificationSettings) - .values({ - subscription_id: subscriptionId, - ...settings - }) - .onConflictDoUpdate({ - target: [notificationSettings.subscription_id], - set: settings - }); + return result.map(r => r.subscription); } } diff --git a/server/utils.ts b/server/utils.ts index 9921bed..37bb372 100644 --- a/server/utils.ts +++ b/server/utils.ts @@ -1,6 +1,6 @@ import axios from "axios"; import * as cheerio from "cheerio"; -import type { InsertNewsletter } from "@shared/schema"; +import type { InsertNewsletter, Newsletter } from "@shared/schema"; const ROBLY_ARCHIVE_URL = "https://app.robly.com/public/archives?a=b31b32385b5904b5"; @@ -8,9 +8,9 @@ const ROBLY_ARCHIVE_URL = async function scrapeNewsletterContent( url: string, retryCount = 0, -): Promise<{ thumbnail: string | null; content: string | null }> { +): Promise<{ thumbnail: string | null; content: string | null; hasDetails: boolean }> { try { - const backoffTime = Math.min(1000 * Math.pow(2, retryCount), 1000); // Exponential backoff capped at 10 seconds + const backoffTime = Math.min(1000 * Math.pow(2, retryCount), 1000); if (retryCount > 0) { await new Promise((resolve) => setTimeout(resolve, backoffTime)); } @@ -44,9 +44,12 @@ async function scrapeNewsletterContent( // Extract text content const content = $("body").text().trim(); + const hasDetails = !!(content && content.length > 0); + return { thumbnail: thumbnailUrl, content, + hasDetails, }; } catch (error: any) { if ( @@ -59,7 +62,7 @@ async function scrapeNewsletterContent( return scrapeNewsletterContent(url, retryCount + 1); } console.warn("Error scraping newsletter content:", error); - return { thumbnail: null, content: null }; + return { thumbnail: null, content: null, hasDetails: false }; } } @@ -79,7 +82,6 @@ export async function scrapeNewsletters(): Promise { const $ = cheerio.load(data); const newsletters: InsertNewsletter[] = []; - // Find all links that start with /archive?id= const links = $('a[href^="/archive?id="]'); console.log(`Found ${links.length} newsletter links`); @@ -88,8 +90,6 @@ export async function scrapeNewsletters(): Promise { const url = $element.attr("href"); const fullText = $element.parent().text().trim(); - // Extract date and title from the text - // Format is typically: "March 21, 2017 - Title" const match = fullText.match(/^([A-Za-z]+ \d{1,2}, \d{4}) - (.+)$/); if (match && url) { @@ -98,8 +98,7 @@ export async function scrapeNewsletters(): Promise { const date = new Date(dateStr).toISOString().split("T")[0]; const fullUrl = `https://app.robly.com${url}`; - // Scrape the newsletter content - const { thumbnail, content } = await scrapeNewsletterContent(fullUrl); + const { thumbnail, content, hasDetails } = await scrapeNewsletterContent(fullUrl); newsletters.push({ title: title.trim(), @@ -108,9 +107,10 @@ export async function scrapeNewsletters(): Promise { thumbnail, content, description: content ? content.slice(0, 200) + "..." : null, + hasDetails, }); - console.log(`Processed newsletter: ${title}`); + console.log(`Processed newsletter: ${title} (hasDetails: ${hasDetails})`); } catch (err) { console.warn( "Error processing date for newsletter:", @@ -143,3 +143,31 @@ export async function scrapeNewsletters(): Promise { throw error; } } + +export async function retryMissingDetails(newsletters: Newsletter[]): Promise { + const newslettersWithoutDetails = newsletters.filter(n => !n.hasDetails); + console.log(`Found ${newslettersWithoutDetails.length} newsletters without details to retry`); + + const updatedNewsletters: InsertNewsletter[] = []; + + for (const newsletter of newslettersWithoutDetails) { + try { + const { thumbnail, content, hasDetails } = await scrapeNewsletterContent(newsletter.url); + + if (hasDetails) { + updatedNewsletters.push({ + ...newsletter, + thumbnail, + content, + description: content ? content.slice(0, 200) + "..." : null, + hasDetails, + }); + console.log(`Successfully retrieved details for: ${newsletter.title}`); + } + } catch (error) { + console.error(`Failed to retrieve details for ${newsletter.title}:`, error); + } + } + + return updatedNewsletters; +} \ No newline at end of file diff --git a/shared/schema.ts b/shared/schema.ts index 628bdc2..d571c7f 100644 --- a/shared/schema.ts +++ b/shared/schema.ts @@ -17,6 +17,7 @@ export const newsletters = pgTable("newsletters", { description: text("description"), thumbnail: text("thumbnail"), content: text("content"), + hasDetails: boolean("has_details").default(false), last_checked: timestamp("last_checked"), }); @@ -27,6 +28,7 @@ export const insertNewsletterSchema = createInsertSchema(newsletters).pick({ description: true, thumbnail: true, content: true, + hasDetails: true, }); export type InsertNewsletter = z.infer; @@ -63,4 +65,4 @@ export const insertNotificationSettingsSchema = export type InsertNotificationSettings = z.infer< typeof insertNotificationSettingsSchema >; -export type NotificationSettings = typeof notificationSettings.$inferSelect; +export type NotificationSettings = typeof notificationSettings.$inferSelect; \ No newline at end of file