Agent query: Please verify if:
1. The application loads successfully 2. You can see the newsletter cards with smooth animations 3. The search bar is visible at the top Enhance newsletter archive viewer with mobile UI, social sharing, push notifications, search, RSS feed, and infinite scrolling. Screenshot: https://storage.googleapis.com/screenshot-production-us-central1/9dda30b6-4149-4bce-89dc-76333005952c/fea88e1e-ec20-4c53-8b12-b205f04819b7.jpg
This commit is contained in:
@@ -4,23 +4,55 @@ import type { InsertNewsletter } from '@shared/schema';
|
||||
|
||||
const ROBLY_ARCHIVE_URL = 'https://app.robly.com/public/archives?a=b31b32385b5904b5';
|
||||
|
||||
async function scrapeNewsletterContent(url: string) {
|
||||
try {
|
||||
const { data } = await axios.get(url, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
},
|
||||
timeout: 15000
|
||||
});
|
||||
|
||||
const $ = cheerio.load(data);
|
||||
|
||||
// Get the second image as thumbnail
|
||||
const images = $('img').toArray();
|
||||
const thumbnailUrl = images.length > 1 ? $(images[1]).attr('src') : null;
|
||||
|
||||
// Extract text content
|
||||
const content = $('body').text().trim();
|
||||
|
||||
return {
|
||||
thumbnail: thumbnailUrl,
|
||||
content
|
||||
};
|
||||
} catch (error) {
|
||||
console.warn('Error scraping newsletter content:', error);
|
||||
return { thumbnail: null, content: null };
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrapeNewsletters(): Promise<InsertNewsletter[]> {
|
||||
try {
|
||||
// Add headers to mimic a browser request
|
||||
const { data } = await axios.get(ROBLY_ARCHIVE_URL, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
},
|
||||
timeout: 10000 // 10 second timeout
|
||||
timeout: 10000
|
||||
});
|
||||
|
||||
const $ = cheerio.load(data);
|
||||
const newsletters: InsertNewsletter[] = [];
|
||||
|
||||
// Find all links that start with /archive?id=
|
||||
$('a[href^="/archive?id="]').each((_, element) => {
|
||||
const links = $('a[href^="/archive?id="]');
|
||||
console.log(`Found ${links.length} newsletter links`);
|
||||
|
||||
for (const element of links.toArray()) {
|
||||
const $element = $(element);
|
||||
const url = $element.attr('href');
|
||||
const fullText = $element.parent().text().trim();
|
||||
@@ -33,18 +65,26 @@ export async function scrapeNewsletters(): Promise<InsertNewsletter[]> {
|
||||
const [, dateStr, title] = match;
|
||||
try {
|
||||
const date = new Date(dateStr).toISOString().split('T')[0];
|
||||
const fullUrl = `https://app.robly.com${url}`;
|
||||
|
||||
// Scrape the newsletter content
|
||||
const { thumbnail, content } = await scrapeNewsletterContent(fullUrl);
|
||||
|
||||
newsletters.push({
|
||||
title: title.trim(),
|
||||
date,
|
||||
url: `https://app.robly.com${url}`,
|
||||
description: null
|
||||
url: fullUrl,
|
||||
thumbnail,
|
||||
content,
|
||||
description: content ? content.slice(0, 200) + '...' : null
|
||||
});
|
||||
|
||||
console.log(`Processed newsletter: ${title}`);
|
||||
} catch (err) {
|
||||
console.warn('Error processing date for newsletter:', { dateStr, title }, err);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (newsletters.length === 0) {
|
||||
console.error('No newsletters found in HTML. First 500 chars of response:', data.slice(0, 500));
|
||||
|
||||
Reference in New Issue
Block a user