diff --git a/infra/services/README.md b/infra/services/README.md index 1c3c6a3680..55b4c2e06b 100644 --- a/infra/services/README.md +++ b/infra/services/README.md @@ -2,7 +2,7 @@ "Services" are Docker images we run on our instances and manage using systemd. -All our services (including museum itself) follow the same pattern: +Generally our services (including museum itself) follow the same pattern: - They're run on vanilla Ubuntu instances. The only expectation they have is for Docker to be installed. @@ -23,6 +23,8 @@ All our services (including museum itself) follow the same pattern: appropriate file from `/root/service-name` into the running Docker container. +- There are exceptions to this general pattern (See [sentry](sentry)). + ## Systemd cheatsheet ```sh diff --git a/infra/services/nginx/README.md b/infra/services/nginx/README.md index c6d0d56efd..f72547896d 100644 --- a/infra/services/nginx/README.md +++ b/infra/services/nginx/README.md @@ -48,10 +48,12 @@ When adding new services that sit behind Nginx, All the files we put into `/root/nginx/conf.d` get included in an `http` block. We can see this in the default configuration of nginx: - http { - ... - include /etc/nginx/conf.d/*.conf; - } +``` +http { + ... + include /etc/nginx/conf.d/*.conf; +} +``` > To view the default configuration, run the following command against the > [official Docker image for Nginx](https://hub.docker.com/_/nginx), which is diff --git a/infra/services/sentry/README.md b/infra/services/sentry/README.md new file mode 100644 index 0000000000..37654bc736 --- /dev/null +++ b/infra/services/sentry/README.md @@ -0,0 +1,137 @@ +# Sentry + +- [Data flow](#understanding-the-data-flow) +- [Setting up a new instance](#setting-up-a-new-instance) + +## Data flow + +### Overview + +Clients tunnel events to sentry-reporter.ente.io, and include the DSN in the +request. At the other end of the tunnel is a Cloudflare Worker which unwraps the +event, remaps the DSN if needed, and sends it to our actual self-hosted Sentry +instance, sentry.ente.io. + +Among other things, this indirection allows us to treat the Sentry instance as +disposable, and recreate it from scratch anytime. The existing DSN's change, but +that is not a problem because we remap DSNs in the worker that handles the +tunneled requests. + +### DSN + +Sentry identifies each project with a unique ID it calls **DSN** (Data Source +Name). The DSN is a URL that includes the project ID. For example, here is the +DSN for the debug builds of the photos mobile app: + + https://ca5e686dd7f149d9bf94e620564cceba@sentry.ente.io/3 + +The DSN is considered public information and is included as part of the client's +code. The DSN has 3 parts: + + https://@/ + +The `` for our case is sentry.ente.io. + +Each client has a separate project, and some clients have multiple projects +(e.g. production / debug). Each of these get a separate DSN. + +### Reporting crashes + +Sentry supports +[tunnels](https://docs.sentry.io/platforms/javascript/configuration/options/#tunnel). +The idea is to encapsulate the entire "original" HTTP event which would've been +reported to some Sentry instance, and instead send this encapsulated event to a +URL that is hosted alongside the app itself (say, example.org/sentry). At the +other end of the tunnel is a service that unwraps the original payload and +forwards it to the actual Sentry instance. + +Usage on the client is simple - the mobile SDKs for Sentry support a `tunnel` +parameter which can be set to "https://sentry-reporter.ente.io" + +The other end of the tunnel is handled by a Cloudflare Worker that listens for +incoming requests to 'https://sentry-reporter.ente.io', and forwards the +requests to `sentry.ente.io`. Before forwarding, it also remaps the DSNs sent by +the client with the latest ones. This allows us to hardcode the DSN in the +client - if the DSN on the Sentry backend changes, we can just update or add a +new mapping in the worker. + +The source code for this worker is in +[workers/sentry-reporter](../../workers/sentry-reporter). + +## Setting up a new instance + +### Overview + +The upstream documentation is at https://develop.sentry.dev/self-hosted/. + +We follow their steps (clone their setup, modify the configuration, and run the +`./install.sh` that they provide). This results in a Sentry installation being +available at localhost:9000. + +Then, we install an nginx service that terminates the Cloudflare TLS and reverse +proxies to localhost:9000. + +To update Sentry just fetch the latest upstream and re-run `./install.sh`. + +### Steps + +> The following assumes that you have already provisioned new instances using +> our standard process. + +- `cd /home/ente && git clone https://github.com/getsentry/self-hosted sentry` +- Checkout the latest tag, e.g. `git checkout 24.2.0` (Sentry uses CalVer, so + this'll be the latest `year.month.0`) +- Run `sudo ./install.sh` + +The rest of this section describes the remaining three steps: + +- Modify configuration +- Configure and start external nginx +- Start the cluster + +### Configuration + +Modify `sentry/config.yml`, adding relevant bits from the contents of +`config.yml` (from this repository) and the mail credentials. + +Next, modify `.env`, setting + + SENTRY_EVENT_RETENTION_DAYS=30 + SENTRY_MAIL_HOST=ente.io + +### Configure external nginx + +Add the nginx service (See [services/nginx](../services/nginx/README.md)) to the +instance. + +Add the Sentry nginx conf and certificates (since this instance will be running +only sentry, we can use sentry specific certificates instead of our general +wildcard ones). + + sudo mv sentry.nginx.conf /root/nginx/conf.d + sudo tee /root/nginx/cert.pem + sudo tee /root/nginx/key.pem + +### Start Sentry + +Sentry should automatically start when the instance boots. If needed (and for +the first time), it can be started manually by + + cd /home/ente/sentry + sudo docker compose up -d + +The (external) nginx service will also start automatically on boot, but +if neded it can be manually started by + + sudo systemctl start nginx + +In their docs Sentry sometimes refers to commands like `sentry createuser`. To +run them, prefix the command with `docker compose exec web`. e.g. + + cd /home/ente/sentry + sudo docker compose exec web sentry createuser + +If needed, Sentry can be stopped by using + + cd /home/ente/sentry + sudo docker compose stop diff --git a/infra/services/sentry/config.yml b/infra/services/sentry/config.yml new file mode 100644 index 0000000000..6bb67454d7 --- /dev/null +++ b/infra/services/sentry/config.yml @@ -0,0 +1,13 @@ +############### +# Mail Server # +############### + +mail.host: "smtp.example.org" +mail.port: 587 +mail.username: "" +mail.password: "" +mail.use-tls: true + +# ... + +system.url-prefix: "https://sentry.ente.io" diff --git a/infra/services/sentry/sentry.nginx.conf b/infra/services/sentry/sentry.nginx.conf new file mode 100644 index 0000000000..014d4e78e0 --- /dev/null +++ b/infra/services/sentry/sentry.nginx.conf @@ -0,0 +1,19 @@ +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + ssl_certificate /etc/ssl/certs/cert.pem; + ssl_certificate_key /etc/ssl/private/key.pem; + + server_name sentry.ente.io; + + client_max_body_size 500m; + + location / { + proxy_pass http://host.docker.internal:9000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} + diff --git a/infra/workers/sentry-reporter/package.json b/infra/workers/sentry-reporter/package.json new file mode 100644 index 0000000000..4ddcb5f853 --- /dev/null +++ b/infra/workers/sentry-reporter/package.json @@ -0,0 +1,10 @@ +{ + "name": "files", + "private": true, + "devDependencies": { + "@cloudflare/workers-types": "^4.20240614.0", + "typescript": "^5", + "wrangler": "^3" + }, + "packageManager": "yarn@1.22.22" +} diff --git a/infra/workers/sentry-reporter/src/index.ts b/infra/workers/sentry-reporter/src/index.ts new file mode 100644 index 0000000000..f09cd6e9aa --- /dev/null +++ b/infra/workers/sentry-reporter/src/index.ts @@ -0,0 +1,98 @@ +/** + * Forward a tunneled request from our clients to our Sentry instance. + * + * The client use a Sentry "tunnel" that connects to where this worker listens. + * Requests to this tunnel endpoint contain the original crash report wrapped in + * an envelope. This worker extracts the original Sentry request from the + * envelope, forwards it our Sentry instance, and proxies back the response. + * + * It also replaces the replace the DSN in the POST body with the latest one. + * This allows us to hardcode the DSN in the clients, without needing to update + * them if the DSN changes on our self-hosted Sentry's side (e.g. if we recreate + * these projects from scratch in the Sentry instance). + */ +export default { + async fetch(request: Request) { + switch (request.method) { + case "POST": + return handlePOST(request); + default: + return new Response(null, { status: 405 }); + } + }, +} satisfies ExportedHandler; + +const handlePOST = async (request: Request) => { + const originalBody = await request.text(); + const originalDSNString = extractDSN(originalBody); + const { body, dsn } = mapDSN(originalBody, originalDSNString); + + const projectId = parseInt(dsn.pathname?.slice(1)?.split("/")[0] ?? "1"); + + // Proxy request to Sentry ingest + return fetch(`https://${dsn.host}/api/${projectId}/envelope/`, { + method: "POST", + headers: { + "Content-Type": "application/octet-stream", + }, + body, + }); +}; + +/** Parse the POST body sent by Sentry client to extract the DSN therein */ +const extractDSN = (body: string) => { + // The body consists of 3 lines, each a JSON string. The first line is the + // envelope header. + const [envelopeHeaderString] = body.split("\n", 1); + if (!envelopeHeaderString) throw new Error(`Missing DSN`); + const envelopeHeader = JSON.parse(envelopeHeaderString ?? ""); + const dsn = envelopeHeader["dsn"]; + if (typeof dsn !== "string") throw new Error(`Unexpected DSN ${dsn}`); + return dsn; +}; + +/** + * If {@link originalDSNString} matches one of the known DSNs that we want to + * map, perform a textual search and replace of the DSN and public_key fields in + * the body of the request. + * + * @returns the (possibly) modified body and DSN. + */ +const mapDSN = (originalBody: string, originalDSNString: string) => { + const originalDSN = new URL(originalDSNString); + + const dsnString = dsnMappings[originalDSNString]; + if (dsnString === undefined) { + // We don't have a mapping for this DSN, return the originals unchanged. + return { body: originalBody, dsn: originalDSN }; + } + + const dsn = new URL(dsnString); + + // Extract the public_key part from the URLs. We need to do two + // substitutions, first for the entire DSN, and then for the public key. + const originalPublicKey = originalDSN.username; + const publicKey = dsn.username; + + let body = originalBody.replaceAll(originalDSNString, dsnString); + if (originalPublicKey) { + body = body.replaceAll(originalPublicKey, publicKey); + } + + return { body, dsn }; +}; + +const dsnMappings: Record = { + // photos-mobile + "https://2235e5c99219488ea93da34b9ac1cb68@sentry.ente.io/4": + "https://1b13ae41ee7c898ce3c49d04781eb908@sentry.ente.io/2", + + // photos-mobile-debug + // Nb: Maps to the same project in Sentry. + "https://ca5e686dd7f149d9bf94e620564cceba@sentry.ente.io/3": + "https://1b13ae41ee7c898ce3c49d04781eb908@sentry.ente.io/2", + + // auth-mobile + "https://ed4ddd6309b847ba8849935e26e9b648@sentry.ente.io/9": + "https://47c2aa45d5e359ada9f5fe3c44c98f12@sentry.ente.io/3", +}; diff --git a/infra/workers/sentry-reporter/tsconfig.json b/infra/workers/sentry-reporter/tsconfig.json new file mode 100644 index 0000000000..a65b752070 --- /dev/null +++ b/infra/workers/sentry-reporter/tsconfig.json @@ -0,0 +1 @@ +{ "extends": "../tsconfig.base.json", "include": ["src"] } diff --git a/infra/workers/sentry-reporter/wrangler.toml b/infra/workers/sentry-reporter/wrangler.toml new file mode 100644 index 0000000000..1f343978e5 --- /dev/null +++ b/infra/workers/sentry-reporter/wrangler.toml @@ -0,0 +1,5 @@ +name = "sentry-reporter" +main = "src/index.ts" +compatibility_date = "2024-06-14" + +routes = [{ pattern = "sentry-reporter.ente.io", custom_domain = true }]