Add newsletter import functionality. Scrapes data from Robly and adds it to the archive. Screenshot: https://storage.googleapis.com/screenshot-production-us-central1/9dda30b6-4149-4bce-89dc-76333005952c/99711300-b33c-4ce4-b932-17b8c1aadbde.jpg
42 lines
1.3 KiB
TypeScript
42 lines
1.3 KiB
TypeScript
import axios from 'axios';
|
|
import * as cheerio from 'cheerio';
|
|
import type { InsertNewsletter } from '@shared/schema';
|
|
|
|
const ROBLY_ARCHIVE_URL = 'https://app.robly.com/public/archives?a=b31b32385b5904b5';
|
|
|
|
export async function scrapeNewsletters(): Promise<InsertNewsletter[]> {
|
|
try {
|
|
const { data } = await axios.get(ROBLY_ARCHIVE_URL);
|
|
const $ = cheerio.load(data);
|
|
const newsletters: InsertNewsletter[] = [];
|
|
|
|
// The main archive container table
|
|
$('.archiveTable tr').each((_, element) => {
|
|
const $element = $(element);
|
|
|
|
// Extract newsletter details
|
|
const title = $element.find('.archiveTitle').text().trim();
|
|
const dateText = $element.find('.archiveDate').text().trim();
|
|
const url = $element.find('a').attr('href');
|
|
|
|
if (title && dateText && url) {
|
|
// Parse the date (format: MM/DD/YYYY)
|
|
const [month, day, year] = dateText.split('/');
|
|
const date = `${year}-${month.padStart(2, '0')}-${day.padStart(2, '0')}`;
|
|
|
|
newsletters.push({
|
|
title,
|
|
date,
|
|
url: `https://app.robly.com${url}`,
|
|
description: null
|
|
});
|
|
}
|
|
});
|
|
|
|
return newsletters;
|
|
} catch (error) {
|
|
console.error('Error scraping newsletters:', error);
|
|
throw error;
|
|
}
|
|
}
|