Complete refactor to new grpc control plane and only http proxy for carts (#4)
All checks were successful
Build and Publish / Metadata (push) Successful in 11s
Build and Publish / BuildAndDeployAmd64 (push) Successful in 1m14s
Build and Publish / BuildAndDeployArm64 (push) Successful in 3m54s

Co-authored-by: matst80 <mats.tornberg@gmail.com>
Reviewed-on: https://git.tornberg.me/mats/go-cart-actor/pulls/4
Co-authored-by: Mats Törnberg <mats@tornberg.me>
Co-committed-by: Mats Törnberg <mats@tornberg.me>
This commit was merged in pull request #4.
This commit is contained in:
2025-10-14 22:31:12 +02:00
committed by mats
parent f735540c3d
commit f5014fe906
88 changed files with 9836 additions and 5646 deletions

327
grafana_dashboard_cart.json Normal file
View File

@@ -0,0 +1,327 @@
{
"uid": "cart-actors",
"title": "Cart Actor Cluster",
"timezone": "browser",
"refresh": "30s",
"schemaVersion": 38,
"version": 1,
"editable": true,
"graphTooltip": 0,
"panels": [
{
"type": "row",
"title": "Overview",
"gridPos": { "x": 0, "y": 0, "w": 24, "h": 1 },
"id": 1,
"collapsed": false
},
{
"type": "stat",
"title": "Active Grains",
"id": 2,
"gridPos": { "x": 0, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "cart_active_grains" }
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
}
},
{
"type": "stat",
"title": "Grains In Pool",
"id": 3,
"gridPos": { "x": 6, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "cart_grains_in_pool" }
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
}
},
{
"type": "stat",
"title": "Pool Usage %",
"id": 4,
"gridPos": { "x": 12, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "cart_grain_pool_usage * 100" }
],
"units": "percent",
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"] }
}
},
{
"type": "stat",
"title": "Connected Remotes",
"id": 5,
"gridPos": { "x": 18, "y": 1, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "connected_remotes" }
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"] }
}
},
{
"type": "row",
"title": "Mutations",
"gridPos": { "x": 0, "y": 5, "w": 24, "h": 1 },
"id": 6,
"collapsed": false
},
{
"type": "timeseries",
"title": "Mutation Rate (1m)",
"id": 7,
"gridPos": { "x": 0, "y": 6, "w": 12, "h": 8 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_mutations_total[1m])", "legendFormat": "mutations/s" },
{ "refId": "B", "expr": "rate(cart_mutation_failures_total[1m])", "legendFormat": "failures/s" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"type": "stat",
"title": "Failure % (5m)",
"id": 8,
"gridPos": { "x": 12, "y": 6, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "100 * (increase(cart_mutation_failures_total[5m]) / clamp_max(increase(cart_mutations_total[5m]), 1))"
}
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"] }
}
},
{
"type": "timeseries",
"title": "Mutation Latency Quantiles",
"id": 9,
"gridPos": { "x": 18, "y": 6, "w": 6, "h": 8 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.50, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))",
"legendFormat": "p50"
},
{
"refId": "B",
"expr": "histogram_quantile(0.90, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))",
"legendFormat": "p90"
},
{
"refId": "C",
"expr": "histogram_quantile(0.99, sum(rate(cart_mutation_latency_seconds_bucket[5m])) by (le))",
"legendFormat": "p99"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
},
{
"type": "row",
"title": "Event Log",
"gridPos": { "x": 0, "y": 14, "w": 24, "h": 1 },
"id": 10,
"collapsed": false
},
{
"type": "timeseries",
"title": "Event Append Rate (5m)",
"id": 11,
"gridPos": { "x": 0, "y": 15, "w": 8, "h": 6 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_event_log_appends_total[5m])", "legendFormat": "appends/s" }
]
},
{
"type": "timeseries",
"title": "Event Bytes Written Rate (5m)",
"id": 12,
"gridPos": { "x": 8, "y": 15, "w": 8, "h": 6 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_event_log_bytes_written_total[5m])", "legendFormat": "bytes/s" }
],
"fieldConfig": { "defaults": { "unit": "Bps" } }
},
{
"type": "stat",
"title": "Existing Log Files",
"id": 13,
"gridPos": { "x": 16, "y": 15, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_files_existing" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Last Append Age (s)",
"id": 14,
"gridPos": { "x": 20, "y": 15, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "(time() - cart_event_log_last_append_unix)" }
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Failures Total",
"id": 15,
"gridPos": { "x": 16, "y": 18, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_replay_failures_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Duration p95 (5m)",
"id": 16,
"gridPos": { "x": 20, "y": 18, "w": 4, "h": 3 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.95, sum(rate(cart_event_log_replay_duration_seconds_bucket[5m])) by (le))"
}
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "fieldConfig": { "defaults": { "unit": "s" } } }
},
{
"type": "row",
"title": "Grain Lifecycle",
"gridPos": { "x": 0, "y": 21, "w": 24, "h": 1 },
"id": 17,
"collapsed": false
},
{
"type": "timeseries",
"title": "Spawn & Lookup Rates (1m)",
"id": 18,
"gridPos": { "x": 0, "y": 22, "w": 12, "h": 8 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_grain_spawned_total[1m])", "legendFormat": "spawns/s" },
{ "refId": "B", "expr": "rate(cart_grain_lookups_total[1m])", "legendFormat": "lookups/s" }
]
},
{
"type": "stat",
"title": "Negotiations Rate (5m)",
"id": 19,
"gridPos": { "x": 12, "y": 22, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{ "refId": "A", "expr": "rate(cart_remote_negotiation_total[5m])" }
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "orientation": "horizontal" }
},
{
"type": "stat",
"title": "Mutations Total",
"id": 20,
"gridPos": { "x": 18, "y": 22, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_mutations_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "row",
"title": "Event Log Errors",
"gridPos": { "x": 0, "y": 30, "w": 24, "h": 1 },
"id": 21,
"collapsed": false
},
{
"type": "stat",
"title": "Unknown Event Types",
"id": 22,
"gridPos": { "x": 0, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_unknown_types_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Event Mutation Errors",
"id": 23,
"gridPos": { "x": 6, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_mutation_errors_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Success Total",
"id": 24,
"gridPos": { "x": 12, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [{ "refId": "A", "expr": "cart_event_log_replay_total" }],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } }
},
{
"type": "stat",
"title": "Replay Duration p50 (5m)",
"id": 25,
"gridPos": { "x": 18, "y": 31, "w": 6, "h": 4 },
"datasource": "${DS_PROMETHEUS}",
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.50, sum(rate(cart_event_log_replay_duration_seconds_bucket[5m])) by (le))"
}
],
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "fieldConfig": { "defaults": { "unit": "s" } } }
}
],
"templating": {
"list": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"type": "datasource",
"query": "prometheus",
"current": { "text": "Prometheus", "value": "Prometheus" }
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": ["5s","10s","30s","1m","5m","15m","30m","1h"],
"time_options": ["5m","15m","30m","1h","6h","12h","24h","2d","7d"]
}
}