missing updates #5

Merged
mats merged 77 commits from refactor/http-proxy into main 2025-10-14 23:12:06 +02:00
3 changed files with 749 additions and 0 deletions
Showing only changes of commit b0e6c8eca8 - Show all commits

327
grafana_dashboard_cart.json Normal file
View File

@@ -0,0 +1,327 @@
{
"uid": "cart-actors",
"title": "Cart Actor Cluster",
"timezone": "browser",
"refresh": "30s",
"schemaVersion": 38,
"version": 1,
"editable": true,
"graphTooltip": 0,
"panels": [
{
"type": "row",
"title": "Overview",
"gridPos": { "x": 0, "y": 0, "w": 24, "h": 1 },
"id": 1,
"collapsed": false
},
{
"type": "stat",
"title": "Active Grains",
"id": 2,
"gridPos": { "x": 0, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "cart_active_grains" }
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
}
},
{
"type": "stat",
"title": "Grains In Pool",
"id": 3,
"gridPos": { "x": 6, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "cart_grains_in_pool" }
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
}
},
{
"type": "stat",
"title": "Pool Usage %",
"id": 4,
"gridPos": { "x": 12, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "cart_grain_pool_usage * 100" }
],
"units": "percent",
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"] }
}
},
{
"type": "stat",
"title": "Connected Remotes",
"id": 5,
"gridPos": { "x": 18, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "connected_remotes" }
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"] }
}
},
{
"type": "row",
"title": "Mutations",
"gridPos": { "x": 0, "y": 5, "w": 24, "h": 1 },
"id": 6,
"collapsed": false
},
{
"type": "timeseries",
"title": "Mutation Rate (1m)",
"id": 7,
"gridPos": { "x": 0, "y": 6, "w": 12, "h": 8 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_mutations_total[1m])", "legendFormat": "mutations/s" },
{ "refId": "B", "expr": "rate(cart_mutation_failures_total[1m])", "legendFormat": "failures/s" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"type": "stat",
"title": "Failure % (5m)",
"id": 8,
"gridPos": { "x": 12, "y": 6, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "100 * (increase(cart_mutation_failures_total[5m]) / clamp_max(increase(cart_mutations_total[5m]), 1))"
}
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"] }
}
},
{
"type": "timeseries",
"title": "Mutation Latency Quantiles",
"id": 9,
"gridPos": { "x": 18, "y": 6, "w": 6, "h": 8 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.50, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))",
"legendFormat": "p50"
},
{
"refId": "B",
"expr": "histogram_quantile(0.90, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))",
"legendFormat": "p90"
},
{
"refId": "C",
"expr": "histogram_quantile(0.99, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))",
"legendFormat": "p99"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
},
{
"type": "row",
"title": "Event Log",
"gridPos": { "x": 0, "y": 14, "w": 24, "h": 1 },
"id": 10,
"collapsed": false
},
{
"type": "timeseries",
"title": "Event Append Rate (5m)",
"id": 11,
"gridPos": { "x": 0, "y": 15, "w": 8, "h": 6 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_event_log_appends_total[5m])", "legendFormat": "appends/s" }
]
},
{
"type": "timeseries",
"title": "Event Bytes Written Rate (5m)",
"id": 12,
"gridPos": { "x": 8, "y": 15, "w": 8, "h": 6 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_event_log_bytes_written_total[5m])", "legendFormat": "bytes/s" }
],
"fieldConfig": { "defaults": { "unit": "Bps" } }
},
{
"type": "stat",
"title": "Existing Log Files",
"id": 13,
"gridPos": { "x": 16, "y": 15, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_files_existing" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Last Append Age (s)",
"id": 14,
"gridPos": { "x": 20, "y": 15, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "(time() - cart_event_log_last_append_unix)" }
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Failures Total",
"id": 15,
"gridPos": { "x": 16, "y": 18, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_replay_failures_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Duration p95 (5m)",
"id": 16,
"gridPos": { "x": 20, "y": 18, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.95, sum(rate(cart_event_log_replay_duration_seconds_bucket[5m])) by (le))"
}
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "fieldConfig": { "defaults": { "unit": "s" } } }
},
{
"type": "row",
"title": "Grain Lifecycle",
"gridPos": { "x": 0, "y": 21, "w": 24, "h": 1 },
"id": 17,
"collapsed": false
},
{
"type": "timeseries",
"title": "Spawn & Lookup Rates (1m)",
"id": 18,
"gridPos": { "x": 0, "y": 22, "w": 12, "h": 8 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_grain_spawned_total[1m])", "legendFormat": "spawns/s" },
{ "refId": "B", "expr": "rate(cart_grain_lookups_total[1m])", "legendFormat": "lookups/s" }
]
},
{
"type": "stat",
"title": "Negotiations Rate (5m)",
"id": 19,
"gridPos": { "x": 12, "y": 22, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_remote_negotiation_total[5m])" }
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "orientation": "horizontal" }
},
{
"type": "stat",
"title": "Mutations Total",
"id": 20,
"gridPos": { "x": 18, "y": 22, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_mutations_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "row",
"title": "Event Log Errors",
"gridPos": { "x": 0, "y": 30, "w": 24, "h": 1 },
"id": 21,
"collapsed": false
},
{
"type": "stat",
"title": "Unknown Event Types",
"id": 22,
"gridPos": { "x": 0, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_unknown_types_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Event Mutation Errors",
"id": 23,
"gridPos": { "x": 6, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_mutation_errors_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Success Total",
"id": 24,
"gridPos": { "x": 12, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_replay_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Duration p50 (5m)",
"id": 25,
"gridPos": { "x": 18, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.50, sum(rate(cart_event_log_replay_duration_seconds_bucket[5m])) by (le))"
}
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "fieldConfig": { "defaults": { "unit": "s" } } }
}
],
"templating": {
"list": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"type": "datasource",
"query": "prometheus",
"current": { "text": "Prometheus", "value": "Prometheus" }
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": ["5s","10s","30s","1m","5m","15m","30m","1h"],
"time_options": ["5m","15m","30m","1h","6h","12h","24h","2d","7d"]
}
}

174
k6/README.md Normal file
View File

@@ -0,0 +1,174 @@
# k6 Load Tests for Cart API
This directory contains a k6 script (`cart_load_test.js`) to stress and observe the cart actor HTTP API.
## Contents
- `cart_load_test.js` primary k6 scenario script
- `README.md` this file
## Prerequisites
- Node not required (k6 runs standalone)
- k6 installed (>= v0.43 recommended)
- Prometheus + Grafana (optional) if you want to correlate with the dashboard you generated
- A running cart service exposing HTTP endpoints at (default) `http://localhost:8080/cart`
## Endpoints Exercised
The script exercises (per iteration):
1. `GET /cart/` ensure / fetch cart state (creates cart if missing; sets `cartid` & `cartowner` cookies)
2. `POST /cart/` add item mutation (random SKU & quantity)
3. `GET /cart/` fetch after mutations
4. `GET /cart/checkout` occasionally (~2% of iterations) to simulate checkout start
You can extend it easily to hit deliveries, quantity changes, or removal endpoints.
## Environment Variables
| Variable | Purpose | Default |
|-----------------|----------------------------------------------|-------------------------|
| `BASE_URL` | Base URL root (either host or host/cart) | `http://localhost:8080/cart` |
| `VUS` | VUs for steady_mutations scenario | `20` |
| `DURATION` | Duration for steady_mutations scenario | `5m` |
| `RAMP_TARGET` | Peak VUs for ramp_up scenario | `50` |
You can also disable one scenario by editing `options.scenarios` inside the script.
Example run:
```bash
k6 run \
-e BASE_URL=https://cart.prod.example.com/cart \
-e VUS=40 \
-e DURATION=10m \
-e RAMP_TARGET=120 \
k6/cart_load_test.js
```
## Metrics (Custom)
The script defines additional k6 metrics:
- `cart_add_item_duration` (Trend) latency of POST add item
- `cart_fetch_duration` (Trend) latency of GET cart state
- `cart_checkout_duration` (Trend) latency of checkout
- `cart_items_added` (Counter) successful add item operations
- `cart_checkout_calls` (Counter) successful checkout calls
Thresholds (in `options.thresholds`) enforce basic SLO:
- Mutation failure rate < 2%
- p90 mutation latency < 800 ms
- p99 overall HTTP latency < 1500 ms
Adjust thresholds to your environment if they trigger prematurely.
## Cookies & Stickiness
The script preserves:
- `cartid` cart identity (server sets expiry separately)
- `cartowner` owning host for sticky routing
If your load balancer or ingress enforces affinity based on these cookies, traffic will naturally concentrate on the originally claimed host for each cart under test.
## SKU Set
SKUs used (randomly selected each mutation):
```
778290 778345 778317 778277 778267 778376 778244 778384
778365 778377 778255 778286 778246 778270 778266 778285
778329 778425 778407 778418 778430 778469 778358 778351
778319 778307 778278 778251 778253 778261 778263 778273
778281 778294 778297 778302
```
To add/remove SKUs, edit the `SKUS` array. Keeping it non-empty and moderately sized helps randomization.
## Extending the Script
### Add Quantity Change
```js
function changeQuantity(itemId, newQty) {
const payload = JSON.stringify({ Id: itemId, Qty: newQty });
http.put(baseUrl() + '/', payload, { headers: headers() });
}
```
### Remove Item
```js
function removeItem(itemId) {
http.del(baseUrl() + '/' + itemId, null, { headers: headers() });
}
```
### Add Delivery
```js
function addDelivery(itemIds) {
const payload = JSON.stringify({ provider: "POSTNORD", items: itemIds });
http.post(baseUrl() + '/delivery', payload, { headers: headers() });
}
```
You can integrate these into the iteration loop with probabilities.
## Output Summary
`handleSummary` outputs a JSON summary to stdout:
- Average & p95 mutation latencies (if present)
- Fetch p95
- Checkout count
- Check statuses
Redirect or parse that output for CI pipelines.
## Running in CI
Use shorter durations (e.g. `DURATION=2m VUS=10`) to keep builds fast. Fail build on threshold breaches:
```bash
k6 run -e BASE_URL=$TARGET -e VUS=10 -e DURATION=2m k6/cart_load_test.js || exit 1
```
## Correlating with Prometheus / Grafana
During load, observe:
- `cart_mutations_total` growth and latency histograms
- Event log write rate (`cart_event_log_appends_total`)
- Pool usage (`cart_grain_pool_usage`) and spawn rate (`cart_grain_spawned_total`)
- Failure counters (`cart_mutation_failures_total`) ensure they remain low
If mutation latency spikes without high error rate, inspect external dependencies (e.g., product fetcher or Klarna endpoints).
## Common Tuning Tips
| Symptom | Potential Adjustment |
|------------------------------------|---------------------------------------------------|
| High latency p99 | Increase CPU/memory, optimize mutation handlers |
| Pool at capacity | Raise pool size argument or TTL |
| Frequent cart eviction mid-test | Confirm TTL is sliding (now 2h on mutation) |
| High replay duration | Consider snapshot + truncate event logs |
| Uneven host load | Verify `cartowner` cookie is respected upstream |
## Safety / Load Guardrails
- Start with low VUs (510) and short duration.
- Scale incrementally to find saturation points.
- If using production endpoints, coordinate off-peak runs.
## License / Attribution
This test script is tailored for your internal cart actor system; adapt freely. k6 is open-source (AGPL v3). Ensure compliance if redistributing.
---
Feel free to request:
- A variant script for spike tests
- WebSocket / long poll integration (if added later)
- Synthetic error injection harness
Happy load testing!

248
k6/cart_load_test.js Normal file
View File

@@ -0,0 +1,248 @@
import http from "k6/http";
import { check, sleep, group } from "k6";
import { Counter, Trend } from "k6/metrics";
// ---------------- Configuration ----------------
export const options = {
// Adjust vus/duration for your environment
scenarios: {
steady_mutations: {
executor: "constant-vus",
vus: __ENV.VUS ? parseInt(__ENV.VUS, 10) : 20,
duration: __ENV.DURATION || "5m",
gracefulStop: "30s",
},
ramp_up: {
executor: "ramping-vus",
startVUs: 0,
stages: [
{
duration: "1m",
target: __ENV.RAMP_TARGET
? parseInt(__ENV.RAMP_TARGET, 10)
: 50,
},
{
duration: "1m",
target: __ENV.RAMP_TARGET
? parseInt(__ENV.RAMP_TARGET, 10)
: 50,
},
{ duration: "1m", target: 0 },
],
gracefulStop: "30s",
startTime: "5m",
},
},
thresholds: {
http_req_failed: ["rate<0.02"], // < 2% failures
http_req_duration: ["p(90)<800", "p(99)<1500"], // latency SLO
"cart_add_item_duration{op:add}": ["p(90)<800"],
"cart_fetch_duration{op:get}": ["p(90)<600"],
},
summaryTrendStats: ["avg", "min", "med", "max", "p(90)", "p(95)", "p(99)"],
};
// ---------------- Metrics ----------------
const addItemTrend = new Trend("cart_add_item_duration", true);
const fetchTrend = new Trend("cart_fetch_duration", true);
const checkoutTrend = new Trend("cart_checkout_duration", true);
const addedItemsCounter = new Counter("cart_items_added");
const checkoutCounter = new Counter("cart_checkout_calls");
// ---------------- SKUs ----------------
const SKUS = [
"778290",
"778345",
"778317",
"778277",
"778267",
"778376",
"778244",
"778384",
"778365",
"778377",
"778255",
"778286",
"778246",
"778270",
"778266",
"778285",
"778329",
"778425",
"778407",
"778418",
"778430",
"778469",
"778358",
"778351",
"778319",
"778307",
"778278",
"778251",
"778253",
"778261",
"778263",
"778273",
"778281",
"778294",
"778297",
"778302",
];
// ---------------- Helpers ----------------
function randomSku() {
return SKUS[Math.floor(Math.random() * SKUS.length)];
}
function randomQty() {
return 1 + Math.floor(Math.random() * 3); // 1..3
}
function baseUrl() {
const u = __ENV.BASE_URL || "http://localhost:8080/cart";
// Allow user to pass either root host or full /cart path
return u.endsWith("/cart") ? u : u.replace(/\/+$/, "") + "/cart";
}
function extractCookie(res, name) {
const cookies = res.cookies[name];
if (!cookies || cookies.length === 0) return null;
return cookies[0].value;
}
function withCookies(headers, cookieJar) {
if (!cookieJar || Object.keys(cookieJar).length === 0) return headers;
const cookieStr = Object.entries(cookieJar)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
return { ...headers, Cookie: cookieStr };
}
// Maintain cart + owner cookies per VU
let cartState = {
cartid: null,
cartowner: null,
};
// Refresh cookies from response
function updateCookies(res) {
const cid = extractCookie(res, "cartid");
if (cid) cartState.cartid = cid;
const owner = extractCookie(res, "cartowner");
if (owner) cartState.cartowner = owner;
}
// Build headers
function headers() {
const h = { "Content-Type": "application/json" };
const jar = {};
if (cartState.cartid) jar["cartid"] = cartState.cartid;
if (cartState.cartowner) jar["cartowner"] = cartState.cartowner;
return withCookies(h, jar);
}
// Ensure cart exists (GET /)
function ensureCart() {
if (cartState.cartid) return;
const res = http.get(baseUrl() + "/", { headers: headers() });
updateCookies(res);
check(res, {
"ensure cart status 200": (r) => r.status === 200,
"ensure cart has id": () => !!cartState.cartid,
});
}
// Add random item
function addRandomItem() {
const payload = JSON.stringify({
sku: randomSku(),
quantity: randomQty(),
country: "no",
});
const start = Date.now();
const res = http.post(baseUrl() + "/", payload, { headers: headers() });
const dur = Date.now() - start;
addItemTrend.add(dur, { op: "add" });
if (res.status === 200) {
addedItemsCounter.add(1);
}
updateCookies(res);
check(res, {
"add item status ok": (r) => r.status === 200,
});
}
// Fetch cart state
function fetchCart() {
const start = Date.now();
const res = http.get(baseUrl() + "/", { headers: headers() });
const dur = Date.now() - start;
fetchTrend.add(dur, { op: "get" });
updateCookies(res);
check(res, { "fetch status ok": (r) => r.status === 200 });
}
// Occasional checkout trigger
function maybeCheckout() {
if (!cartState.cartid) return;
// Small probability
if (Math.random() < 0.02) {
const start = Date.now();
const res = http.get(baseUrl() + "/checkout", { headers: headers() });
const dur = Date.now() - start;
checkoutTrend.add(dur, { op: "checkout" });
updateCookies(res);
if (res.status === 200) checkoutCounter.add(1);
check(res, { "checkout status ok": (r) => r.status === 200 });
}
}
// ---------------- k6 lifecycle ----------------
export function setup() {
// Provide SKU list length for summary
return { skuCount: SKUS.length };
}
export default function (data) {
group("cart flow", () => {
// Create or reuse cart
ensureCart();
// Random number of item mutations per iteration (1..5)
const ops = 1 + Math.floor(Math.random() * 5);
for (let i = 0; i < ops; i++) {
addRandomItem();
}
// Fetch state
fetchCart();
// Optional checkout attempt
maybeCheckout();
});
// Small think time
sleep(Math.random() * 0.5);
}
export function teardown(data) {
// Optionally we could GET confirmation or clear cart cookie
// Not implemented for load purpose.
console.log(`Test complete. SKU count: ${data.skuCount}`);
}
// ---------------- Summary ----------------
export function handleSummary(data) {
return {
stdout: JSON.stringify(
{
metrics: {
mutations_avg: data.metrics.cart_add_item_duration?.avg,
mutations_p95: data.metrics.cart_add_item_duration?.p(95),
fetch_p95: data.metrics.cart_fetch_duration?.p(95),
checkout_count: data.metrics.cart_checkout_calls?.count,
},
checks: data.root_checks,
},
null,
2,
),
};
}