From b0e6c8eca861f59dfadabae375833f5dfe7a72e8 Mon Sep 17 00:00:00 2001 From: matst80 Date: Fri, 10 Oct 2025 20:45:42 +0000 Subject: [PATCH] add tests and grafana dashboard --- grafana_dashboard_cart.json | 327 ++++++++++++++++++++++++++++++++++++ k6/README.md | 174 +++++++++++++++++++ k6/cart_load_test.js | 248 +++++++++++++++++++++++++++ 3 files changed, 749 insertions(+) create mode 100644 grafana_dashboard_cart.json create mode 100644 k6/README.md create mode 100644 k6/cart_load_test.js diff --git a/grafana_dashboard_cart.json b/grafana_dashboard_cart.json new file mode 100644 index 0000000..8379c13 --- /dev/null +++ b/grafana_dashboard_cart.json @@ -0,0 +1,327 @@ +{ + "uid": "cart-actors", + "title": "Cart Actor Cluster", + "timezone": "browser", + "refresh": "30s", + "schemaVersion": 38, + "version": 1, + "editable": true, + "graphTooltip": 0, + "panels": [ + { + "type": "row", + "title": "Overview", + "gridPos": { "x": 0, "y": 0, "w": 24, "h": 1 }, + "id": 1, + "collapsed": false + }, + { + "type": "stat", + "title": "Active Grains", + "id": 2, + "gridPos": { "x": 0, "y": 1, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "cart_active_grains" } + ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "type": "stat", + "title": "Grains In Pool", + "id": 3, + "gridPos": { "x": 6, "y": 1, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "cart_grains_in_pool" } + ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "type": "stat", + "title": "Pool Usage %", + "id": 4, + "gridPos": { "x": 12, "y": 1, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "cart_grain_pool_usage * 100" } + ], + "units": "percent", + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { "calcs": ["lastNotNull"] } + } + }, + { + "type": "stat", + "title": "Connected Remotes", + "id": 5, + "gridPos": { "x": 18, "y": 1, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "connected_remotes" } + ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { "calcs": ["lastNotNull"] } + } + }, + + { + "type": "row", + "title": "Mutations", + "gridPos": { "x": 0, "y": 5, "w": 24, "h": 1 }, + "id": 6, + "collapsed": false + }, + { + "type": "timeseries", + "title": "Mutation Rate (1m)", + "id": 7, + "gridPos": { "x": 0, "y": 6, "w": 12, "h": 8 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "rate(cart_mutations_total[1m])", "legendFormat": "mutations/s" }, + { "refId": "B", "expr": "rate(cart_mutation_failures_total[1m])", "legendFormat": "failures/s" } + ], + "fieldConfig": { "defaults": { "unit": "ops" } } + }, + { + "type": "stat", + "title": "Failure % (5m)", + "id": 8, + "gridPos": { "x": 12, "y": 6, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { + "refId": "A", + "expr": "100 * (increase(cart_mutation_failures_total[5m]) / clamp_max(increase(cart_mutations_total[5m]), 1))" + } + ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { "calcs": ["lastNotNull"] } + } + }, + { + "type": "timeseries", + "title": "Mutation Latency Quantiles", + "id": 9, + "gridPos": { "x": 18, "y": 6, "w": 6, "h": 8 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.50, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))", + "legendFormat": "p50" + }, + { + "refId": "B", + "expr": "histogram_quantile(0.90, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))", + "legendFormat": "p90" + }, + { + "refId": "C", + "expr": "histogram_quantile(0.99, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))", + "legendFormat": "p99" + } + ], + "fieldConfig": { "defaults": { "unit": "s" } } + }, + + { + "type": "row", + "title": "Event Log", + "gridPos": { "x": 0, "y": 14, "w": 24, "h": 1 }, + "id": 10, + "collapsed": false + }, + { + "type": "timeseries", + "title": "Event Append Rate (5m)", + "id": 11, + "gridPos": { "x": 0, "y": 15, "w": 8, "h": 6 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "rate(cart_event_log_appends_total[5m])", "legendFormat": "appends/s" } + ] + }, + { + "type": "timeseries", + "title": "Event Bytes Written Rate (5m)", + "id": 12, + "gridPos": { "x": 8, "y": 15, "w": 8, "h": 6 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "rate(cart_event_log_bytes_written_total[5m])", "legendFormat": "bytes/s" } + ], + "fieldConfig": { "defaults": { "unit": "Bps" } } + }, + { + "type": "stat", + "title": "Existing Log Files", + "id": 13, + "gridPos": { "x": 16, "y": 15, "w": 4, "h": 3 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [{ "refId": "A", "expr": "cart_event_log_files_existing" }], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } } + }, + { + "type": "stat", + "title": "Last Append Age (s)", + "id": 14, + "gridPos": { "x": 20, "y": 15, "w": 4, "h": 3 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "(time() - cart_event_log_last_append_unix)" } + ], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } } + }, + { + "type": "stat", + "title": "Replay Failures Total", + "id": 15, + "gridPos": { "x": 16, "y": 18, "w": 4, "h": 3 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [{ "refId": "A", "expr": "cart_event_log_replay_failures_total" }], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } } + }, + { + "type": "stat", + "title": "Replay Duration p95 (5m)", + "id": 16, + "gridPos": { "x": 20, "y": 18, "w": 4, "h": 3 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum(rate(cart_event_log_replay_duration_seconds_bucket[5m])) by (le))" + } + ], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "fieldConfig": { "defaults": { "unit": "s" } } } + }, + + { + "type": "row", + "title": "Grain Lifecycle", + "gridPos": { "x": 0, "y": 21, "w": 24, "h": 1 }, + "id": 17, + "collapsed": false + }, + { + "type": "timeseries", + "title": "Spawn & Lookup Rates (1m)", + "id": 18, + "gridPos": { "x": 0, "y": 22, "w": 12, "h": 8 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "rate(cart_grain_spawned_total[1m])", "legendFormat": "spawns/s" }, + { "refId": "B", "expr": "rate(cart_grain_lookups_total[1m])", "legendFormat": "lookups/s" } + ] + }, + { + "type": "stat", + "title": "Negotiations Rate (5m)", + "id": 19, + "gridPos": { "x": 12, "y": 22, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { "refId": "A", "expr": "rate(cart_remote_negotiation_total[5m])" } + ], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "orientation": "horizontal" } + }, + { + "type": "stat", + "title": "Mutations Total", + "id": 20, + "gridPos": { "x": 18, "y": 22, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [{ "refId": "A", "expr": "cart_mutations_total" }], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } } + }, + + { + "type": "row", + "title": "Event Log Errors", + "gridPos": { "x": 0, "y": 30, "w": 24, "h": 1 }, + "id": 21, + "collapsed": false + }, + { + "type": "stat", + "title": "Unknown Event Types", + "id": 22, + "gridPos": { "x": 0, "y": 31, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [{ "refId": "A", "expr": "cart_event_log_unknown_types_total" }], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } } + }, + { + "type": "stat", + "title": "Event Mutation Errors", + "id": 23, + "gridPos": { "x": 6, "y": 31, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [{ "refId": "A", "expr": "cart_event_log_mutation_errors_total" }], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } } + }, + { + "type": "stat", + "title": "Replay Success Total", + "id": 24, + "gridPos": { "x": 12, "y": 31, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [{ "refId": "A", "expr": "cart_event_log_replay_total" }], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } } + }, + { + "type": "stat", + "title": "Replay Duration p50 (5m)", + "id": 25, + "gridPos": { "x": 18, "y": 31, "w": 6, "h": 4 }, + "datasource": "${DS_PROMETHEUS}", + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.50, sum(rate(cart_event_log_replay_duration_seconds_bucket[5m])) by (le))" + } + ], + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "fieldConfig": { "defaults": { "unit": "s" } } } + } + ], + "templating": { + "list": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "type": "datasource", + "query": "prometheus", + "current": { "text": "Prometheus", "value": "Prometheus" } + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": ["5s","10s","30s","1m","5m","15m","30m","1h"], + "time_options": ["5m","15m","30m","1h","6h","12h","24h","2d","7d"] + } +} diff --git a/k6/README.md b/k6/README.md new file mode 100644 index 0000000..0818791 --- /dev/null +++ b/k6/README.md @@ -0,0 +1,174 @@ +# k6 Load Tests for Cart API + +This directory contains a k6 script (`cart_load_test.js`) to stress and observe the cart actor HTTP API. + +## Contents + +- `cart_load_test.js` – primary k6 scenario script +- `README.md` – this file + +## Prerequisites + +- Node not required (k6 runs standalone) +- k6 installed (>= v0.43 recommended) +- Prometheus + Grafana (optional) if you want to correlate with the dashboard you generated +- A running cart service exposing HTTP endpoints at (default) `http://localhost:8080/cart` + +## Endpoints Exercised + +The script exercises (per iteration): + +1. `GET /cart/` – ensure / fetch cart state (creates cart if missing; sets `cartid` & `cartowner` cookies) +2. `POST /cart/` – add item mutation (random SKU & quantity) +3. `GET /cart/` – fetch after mutations +4. `GET /cart/checkout` – occasionally (~2% of iterations) to simulate checkout start + +You can extend it easily to hit deliveries, quantity changes, or removal endpoints. + +## Environment Variables + +| Variable | Purpose | Default | +|-----------------|----------------------------------------------|-------------------------| +| `BASE_URL` | Base URL root (either host or host/cart) | `http://localhost:8080/cart` | +| `VUS` | VUs for steady_mutations scenario | `20` | +| `DURATION` | Duration for steady_mutations scenario | `5m` | +| `RAMP_TARGET` | Peak VUs for ramp_up scenario | `50` | + +You can also disable one scenario by editing `options.scenarios` inside the script. + +Example run: + +```bash +k6 run \ + -e BASE_URL=https://cart.prod.example.com/cart \ + -e VUS=40 \ + -e DURATION=10m \ + -e RAMP_TARGET=120 \ + k6/cart_load_test.js +``` + +## Metrics (Custom) + +The script defines additional k6 metrics: + +- `cart_add_item_duration` (Trend) – latency of POST add item +- `cart_fetch_duration` (Trend) – latency of GET cart state +- `cart_checkout_duration` (Trend) – latency of checkout +- `cart_items_added` (Counter) – successful add item operations +- `cart_checkout_calls` (Counter) – successful checkout calls + +Thresholds (in `options.thresholds`) enforce basic SLO: +- Mutation failure rate < 2% +- p90 mutation latency < 800 ms +- p99 overall HTTP latency < 1500 ms + +Adjust thresholds to your environment if they trigger prematurely. + +## Cookies & Stickiness + +The script preserves: +- `cartid` – cart identity (server sets expiry separately) +- `cartowner` – owning host for sticky routing + +If your load balancer or ingress enforces affinity based on these cookies, traffic will naturally concentrate on the originally claimed host for each cart under test. + +## SKU Set + +SKUs used (randomly selected each mutation): + +``` +778290 778345 778317 778277 778267 778376 778244 778384 +778365 778377 778255 778286 778246 778270 778266 778285 +778329 778425 778407 778418 778430 778469 778358 778351 +778319 778307 778278 778251 778253 778261 778263 778273 +778281 778294 778297 778302 +``` + +To add/remove SKUs, edit the `SKUS` array. Keeping it non-empty and moderately sized helps randomization. + +## Extending the Script + +### Add Quantity Change + +```js +function changeQuantity(itemId, newQty) { + const payload = JSON.stringify({ Id: itemId, Qty: newQty }); + http.put(baseUrl() + '/', payload, { headers: headers() }); +} +``` + +### Remove Item + +```js +function removeItem(itemId) { + http.del(baseUrl() + '/' + itemId, null, { headers: headers() }); +} +``` + +### Add Delivery + +```js +function addDelivery(itemIds) { + const payload = JSON.stringify({ provider: "POSTNORD", items: itemIds }); + http.post(baseUrl() + '/delivery', payload, { headers: headers() }); +} +``` + +You can integrate these into the iteration loop with probabilities. + +## Output Summary + +`handleSummary` outputs a JSON summary to stdout: +- Average & p95 mutation latencies (if present) +- Fetch p95 +- Checkout count +- Check statuses + +Redirect or parse that output for CI pipelines. + +## Running in CI + +Use shorter durations (e.g. `DURATION=2m VUS=10`) to keep builds fast. Fail build on threshold breaches: + +```bash +k6 run -e BASE_URL=$TARGET -e VUS=10 -e DURATION=2m k6/cart_load_test.js || exit 1 +``` + +## Correlating with Prometheus / Grafana + +During load, observe: +- `cart_mutations_total` growth and latency histograms +- Event log write rate (`cart_event_log_appends_total`) +- Pool usage (`cart_grain_pool_usage`) and spawn rate (`cart_grain_spawned_total`) +- Failure counters (`cart_mutation_failures_total`) ensure they remain low + +If mutation latency spikes without high error rate, inspect external dependencies (e.g., product fetcher or Klarna endpoints). + +## Common Tuning Tips + +| Symptom | Potential Adjustment | +|------------------------------------|---------------------------------------------------| +| High latency p99 | Increase CPU/memory, optimize mutation handlers | +| Pool at capacity | Raise pool size argument or TTL | +| Frequent cart eviction mid-test | Confirm TTL is sliding (now 2h on mutation) | +| High replay duration | Consider snapshot + truncate event logs | +| Uneven host load | Verify `cartowner` cookie is respected upstream | + +## Safety / Load Guardrails + +- Start with low VUs (5–10) and short duration. +- Scale incrementally to find saturation points. +- If using production endpoints, coordinate off-peak runs. + +## License / Attribution + +This test script is tailored for your internal cart actor system; adapt freely. k6 is open-source (AGPL v3). Ensure compliance if redistributing. + +--- + +Feel free to request: +- A variant script for spike tests +- WebSocket / long poll integration (if added later) +- Synthetic error injection harness + +Happy load testing! \ No newline at end of file diff --git a/k6/cart_load_test.js b/k6/cart_load_test.js new file mode 100644 index 0000000..7bc4596 --- /dev/null +++ b/k6/cart_load_test.js @@ -0,0 +1,248 @@ +import http from "k6/http"; +import { check, sleep, group } from "k6"; +import { Counter, Trend } from "k6/metrics"; + +// ---------------- Configuration ---------------- +export const options = { + // Adjust vus/duration for your environment + scenarios: { + steady_mutations: { + executor: "constant-vus", + vus: __ENV.VUS ? parseInt(__ENV.VUS, 10) : 20, + duration: __ENV.DURATION || "5m", + gracefulStop: "30s", + }, + ramp_up: { + executor: "ramping-vus", + startVUs: 0, + stages: [ + { + duration: "1m", + target: __ENV.RAMP_TARGET + ? parseInt(__ENV.RAMP_TARGET, 10) + : 50, + }, + { + duration: "1m", + target: __ENV.RAMP_TARGET + ? parseInt(__ENV.RAMP_TARGET, 10) + : 50, + }, + { duration: "1m", target: 0 }, + ], + gracefulStop: "30s", + startTime: "5m", + }, + }, + thresholds: { + http_req_failed: ["rate<0.02"], // < 2% failures + http_req_duration: ["p(90)<800", "p(99)<1500"], // latency SLO + "cart_add_item_duration{op:add}": ["p(90)<800"], + "cart_fetch_duration{op:get}": ["p(90)<600"], + }, + summaryTrendStats: ["avg", "min", "med", "max", "p(90)", "p(95)", "p(99)"], +}; + +// ---------------- Metrics ---------------- +const addItemTrend = new Trend("cart_add_item_duration", true); +const fetchTrend = new Trend("cart_fetch_duration", true); +const checkoutTrend = new Trend("cart_checkout_duration", true); +const addedItemsCounter = new Counter("cart_items_added"); +const checkoutCounter = new Counter("cart_checkout_calls"); + +// ---------------- SKUs ---------------- +const SKUS = [ + "778290", + "778345", + "778317", + "778277", + "778267", + "778376", + "778244", + "778384", + "778365", + "778377", + "778255", + "778286", + "778246", + "778270", + "778266", + "778285", + "778329", + "778425", + "778407", + "778418", + "778430", + "778469", + "778358", + "778351", + "778319", + "778307", + "778278", + "778251", + "778253", + "778261", + "778263", + "778273", + "778281", + "778294", + "778297", + "778302", +]; + +// ---------------- Helpers ---------------- +function randomSku() { + return SKUS[Math.floor(Math.random() * SKUS.length)]; +} +function randomQty() { + return 1 + Math.floor(Math.random() * 3); // 1..3 +} +function baseUrl() { + const u = __ENV.BASE_URL || "http://localhost:8080/cart"; + // Allow user to pass either root host or full /cart path + return u.endsWith("/cart") ? u : u.replace(/\/+$/, "") + "/cart"; +} +function extractCookie(res, name) { + const cookies = res.cookies[name]; + if (!cookies || cookies.length === 0) return null; + return cookies[0].value; +} +function withCookies(headers, cookieJar) { + if (!cookieJar || Object.keys(cookieJar).length === 0) return headers; + const cookieStr = Object.entries(cookieJar) + .map(([k, v]) => `${k}=${v}`) + .join("; "); + return { ...headers, Cookie: cookieStr }; +} + +// Maintain cart + owner cookies per VU +let cartState = { + cartid: null, + cartowner: null, +}; + +// Refresh cookies from response +function updateCookies(res) { + const cid = extractCookie(res, "cartid"); + if (cid) cartState.cartid = cid; + const owner = extractCookie(res, "cartowner"); + if (owner) cartState.cartowner = owner; +} + +// Build headers +function headers() { + const h = { "Content-Type": "application/json" }; + const jar = {}; + if (cartState.cartid) jar["cartid"] = cartState.cartid; + if (cartState.cartowner) jar["cartowner"] = cartState.cartowner; + return withCookies(h, jar); +} + +// Ensure cart exists (GET /) +function ensureCart() { + if (cartState.cartid) return; + const res = http.get(baseUrl() + "/", { headers: headers() }); + updateCookies(res); + check(res, { + "ensure cart status 200": (r) => r.status === 200, + "ensure cart has id": () => !!cartState.cartid, + }); +} + +// Add random item +function addRandomItem() { + const payload = JSON.stringify({ + sku: randomSku(), + quantity: randomQty(), + country: "no", + }); + const start = Date.now(); + const res = http.post(baseUrl() + "/", payload, { headers: headers() }); + const dur = Date.now() - start; + addItemTrend.add(dur, { op: "add" }); + if (res.status === 200) { + addedItemsCounter.add(1); + } + updateCookies(res); + check(res, { + "add item status ok": (r) => r.status === 200, + }); +} + +// Fetch cart state +function fetchCart() { + const start = Date.now(); + const res = http.get(baseUrl() + "/", { headers: headers() }); + const dur = Date.now() - start; + fetchTrend.add(dur, { op: "get" }); + updateCookies(res); + check(res, { "fetch status ok": (r) => r.status === 200 }); +} + +// Occasional checkout trigger +function maybeCheckout() { + if (!cartState.cartid) return; + // Small probability + if (Math.random() < 0.02) { + const start = Date.now(); + const res = http.get(baseUrl() + "/checkout", { headers: headers() }); + const dur = Date.now() - start; + checkoutTrend.add(dur, { op: "checkout" }); + updateCookies(res); + if (res.status === 200) checkoutCounter.add(1); + check(res, { "checkout status ok": (r) => r.status === 200 }); + } +} + +// ---------------- k6 lifecycle ---------------- +export function setup() { + // Provide SKU list length for summary + return { skuCount: SKUS.length }; +} + +export default function (data) { + group("cart flow", () => { + // Create or reuse cart + ensureCart(); + + // Random number of item mutations per iteration (1..5) + const ops = 1 + Math.floor(Math.random() * 5); + for (let i = 0; i < ops; i++) { + addRandomItem(); + } + + // Fetch state + fetchCart(); + + // Optional checkout attempt + maybeCheckout(); + }); + + // Small think time + sleep(Math.random() * 0.5); +} + +export function teardown(data) { + // Optionally we could GET confirmation or clear cart cookie + // Not implemented for load purpose. + console.log(`Test complete. SKU count: ${data.skuCount}`); +} + +// ---------------- Summary ---------------- +export function handleSummary(data) { + return { + stdout: JSON.stringify( + { + metrics: { + mutations_avg: data.metrics.cart_add_item_duration?.avg, + mutations_p95: data.metrics.cart_add_item_duration?.p(95), + fetch_p95: data.metrics.cart_fetch_duration?.p(95), + checkout_count: data.metrics.cart_checkout_calls?.count, + }, + checks: data.root_checks, + }, + null, + 2, + ), + }; +}