more refactoring
Some checks failed
Build and Publish / BuildAndDeploy (push) Successful in 3m6s
Build and Publish / BuildAndDeployAmd64 (push) Has been cancelled

This commit is contained in:
matst80
2025-10-10 13:22:36 +00:00
parent c30be581cd
commit 159253b8b0
13 changed files with 1475 additions and 295 deletions

View File

@@ -10,21 +10,21 @@ import (
"google.golang.org/grpc"
)
// TestThreeNodeMajorityOwnership exercises the revised majority quorum semantics
// with a 3-node cluster (A,B,C). After the quorum refactor, a 3-node cluster
// (all=2 remotes) now requires only floor((all+1)/2) = 1 remote acceptance
// instead of unanimity. Since our current ConfirmOwner implementation always
// accepts, we mainly validate:
// TestThreeNodeMajorityOwnership validates ring-determined ownership and routing
// in a 3-node cluster (A,B,C) using the consistent hashing ring (no quorum RPC).
// The previous ConfirmOwner / quorum semantics have been removed; ownership is
// deterministic and derived from the ring.
//
// 1. Ownership is established on the first node that mutates (nodeA).
// It validates:
// 1. The ring selects exactly one primary owner for a new cart.
// 2. Other nodes (B,C) do NOT create local grains for the cart.
// 3. Remote proxies are installed on B and C (so they can route mutations).
// 4. A remote mutation from nodeB updates state visible from nodeC.
// 3. Remote proxies are installed lazily so remote mutations can route.
// 4. A remote mutation from one non-owner updates state visible on another.
// 5. Authoritative state on the owner matches remote observations.
// 6. (Future) This scaffolds replication tests when RF>1 is enabled.
//
// NOTE: ConfirmOwner currently always accepts, so we cannot directly observe
// a reduced acceptance threshold here without introducing a test hook that
// can force a rejection. This test still validates that multi-node routing
// works under the new quorum rule for N=3 (where previously unanimity was required).
// (Legacy comments about ConfirmOwner acceptance thresholds have been removed.)
// (Function name retained for historical continuity.)
func TestThreeNodeMajorityOwnership(t *testing.T) {
const (
addrA = "127.0.0.1:18181"
@@ -102,6 +102,11 @@ func TestThreeNodeMajorityOwnership(t *testing.T) {
link(syncedC, hostA, addrA)
link(syncedC, hostB, addrB)
// Rebuild rings after manual linking so ownership resolution is immediate.
syncedA.ForceRingRefresh()
syncedB.ForceRingRefresh()
syncedC.ForceRingRefresh()
// Allow brief stabilization
time.Sleep(200 * time.Millisecond)
@@ -121,62 +126,71 @@ func TestThreeNodeMajorityOwnership(t *testing.T) {
Country: "se",
}
// Apply on nodeA (ownership should establish here)
if _, err := syncedA.Apply(cartID, addItem); err != nil {
t.Fatalf("nodeA Apply addItem error: %v", err)
// Determine ring-designated owner (may be any of the three hosts)
ownerPre := syncedA.DebugOwnerHost(cartID)
if ownerPre != hostA && ownerPre != hostB && ownerPre != hostC {
t.Fatalf("ring returned unexpected owner %s (not in set {%s,%s,%s})", ownerPre, hostA, hostB, hostC)
}
var ownerSynced *SyncedPool
var ownerPool *GrainLocalPool
switch ownerPre {
case hostA:
ownerSynced, ownerPool = syncedA, poolA
case hostB:
ownerSynced, ownerPool = syncedB, poolB
case hostC:
ownerSynced, ownerPool = syncedC, poolC
}
// Pick two distinct non-owner nodes for remote mutation assertions
var remote1Synced, remote2Synced *SyncedPool
switch ownerPre {
case hostA:
remote1Synced, remote2Synced = syncedB, syncedC
case hostB:
remote1Synced, remote2Synced = syncedA, syncedC
case hostC:
remote1Synced, remote2Synced = syncedA, syncedB
}
// Small wait for ConfirmOwner RPC propagation & remote proxy spawn
// Apply on the ring-designated owner
if _, err := ownerSynced.Apply(cartID, addItem); err != nil {
t.Fatalf("owner %s Apply addItem error: %v", ownerPre, err)
}
// Small wait for remote proxy spawn (ring ownership already deterministic)
time.Sleep(150 * time.Millisecond)
// Assert only nodeA has local grain
localCount := 0
if _, ok := poolA.grains[cartID]; ok {
if _, ok := poolA.GetGrains()[cartID]; ok {
localCount++
}
if _, ok := poolB.grains[cartID]; ok {
if _, ok := poolB.GetGrains()[cartID]; ok {
localCount++
}
if _, ok := poolC.grains[cartID]; ok {
if _, ok := poolC.GetGrains()[cartID]; ok {
localCount++
}
if localCount != 1 {
t.Fatalf("expected exactly 1 local grain, got %d", localCount)
}
if _, ok := poolA.grains[cartID]; !ok {
t.Fatalf("expected nodeA to own cart locally")
if _, ok := ownerPool.GetGrains()[cartID]; !ok {
t.Fatalf("expected owner %s to hold local grain", ownerPre)
}
// Verify nodeB and nodeC have remote proxies (best-effort; if not present yet, wait briefly)
waitForRemote := func(sp *SyncedPool, label string) {
deadline := time.Now().Add(500 * time.Millisecond)
for {
sp.mu.RLock()
_, remoteOk := sp.remoteIndex[cartID]
sp.mu.RUnlock()
if remoteOk {
return
}
if time.Now().After(deadline) {
t.Fatalf("%s expected remote proxy for cart not found (timeout)", label)
}
time.Sleep(25 * time.Millisecond)
}
}
waitForRemote(syncedB, "nodeB")
waitForRemote(syncedC, "nodeC")
// Remote proxies may not pre-exist; first remote mutation will trigger SpawnRemoteGrain lazily.
// Issue remote mutation from nodeB -> ChangeQuantity (increase)
// Issue remote mutation from one non-owner -> ChangeQuantity (increase)
change := &messages.ChangeQuantity{
Id: 1,
Quantity: 3,
}
if _, err := syncedB.Apply(cartID, change); err != nil {
t.Fatalf("nodeB remote Apply changeQuantity error: %v", err)
if _, err := remote1Synced.Apply(cartID, change); err != nil {
t.Fatalf("remote mutation (remote1) changeQuantity error: %v", err)
}
// Validate updated state visible via nodeC
stateC, err := syncedC.Get(cartID)
stateC, err := remote2Synced.Get(cartID)
if err != nil {
t.Fatalf("nodeC Get error: %v", err)
}