← console

job 7d549365-e5c2-4626-84d1-d68a2f0d391b

StateFAILED
Quotaservitor-demo
Manifest685e1efe78294603048471e6ae5d3464d7fb7e385d3fbf1b46e18988f6350003
Entry overridetirefireind.us/flowmetal-servitor/reinstall-host/reinstall
Workerworker-5879336e-a44f-45a2-ad04-4943fa33f428
Inbox HWM0
ErrorTraceback (most recent call last): File <builtin>, in <module> * main.star:119, in _reinstall_impl fail("escalated host %s to RDS for repair" % hostname) error: escalated host host-48afa8a4 to RDS for repair --> main.star:119:5 | 119 | fail("escalated host %s to RDS for repair" % hostname) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |

Log

#0 print >>> [noc] pulling next host record off laskey…
#2 req 1 connector:http
{
  "method": "GET",
  "url": "http://laskey.flowmetal.svc.cluster.local:8080/hosts/next"
}
#3 req 1 ok
{
  "body": {
    "config": {
      "disk_layout": "single-nvme",
      "os": "rhel-9",
      "profile": "batch-worker",
      "rack": "rack-b2"
    },
    "hostname": "host-48afa8a4"
  },
  "headers": {
    "content-length": "123",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:30:00 GMT",
    "server": "uvicorn"
  },
  "status": 200.0
}
#4 print >>> [noc] target host host-48afa8a4 (rhel-9 / batch-worker / single-nvme) — handing off to wilson.
#6 print >>> [noc] attempt 1/4: kicking off reinstall of host-48afa8a4.
#8 req 4 connector:http
{
  "body_json": {
    "config": {
      "disk_layout": "single-nvme",
      "os": "rhel-9",
      "profile": "batch-worker",
      "rack": "rack-b2"
    },
    "hostname": "host-48afa8a4"
  },
  "method": "POST",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall"
}
#9 req 4 ok
{
  "body": {
    "hostname": "host-48afa8a4",
    "reinstall_id": "ri_41daf7b749f2"
  },
  "headers": {
    "content-length": "61",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:30:10 GMT",
    "server": "uvicorn"
  },
  "status": 202.0
}
#10 req 5 sleep deadline=1776778218.149093472 duration=2s
#11 req 5 ok
null
#12 req 6 connector:http
{
  "method": "GET",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall/ri_41daf7b749f2"
}
#13 req 6 ok
{
  "body": {
    "error_kind": "reinstall_broken",
    "hostname": "host-48afa8a4",
    "message": "reinstall broke partway through",
    "reinstall_id": "ri_41daf7b749f2",
    "retryable": true,
    "status": "failed"
  },
  "headers": {
    "content-length": "172",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:30:18 GMT",
    "server": "uvicorn"
  },
  "status": 200.0
}
#14 print >>> [noc] attempt 1 failed (reinstall_broken); looping back to wilson.
#16 print >>> [noc] attempt 2/4: kicking off reinstall of host-48afa8a4.
#18 req 9 connector:http
{
  "body_json": {
    "config": {
      "disk_layout": "single-nvme",
      "os": "rhel-9",
      "profile": "batch-worker",
      "rack": "rack-b2"
    },
    "hostname": "host-48afa8a4"
  },
  "method": "POST",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall"
}
#19 req 9 ok
{
  "body": {
    "hostname": "host-48afa8a4",
    "reinstall_id": "ri_6fd254564165"
  },
  "headers": {
    "content-length": "61",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:30:26 GMT",
    "server": "uvicorn"
  },
  "status": 202.0
}
#20 req 10 sleep deadline=1776778235.970760863 duration=2s
#21 req 10 ok
null
#22 req 11 connector:http
{
  "method": "GET",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall/ri_6fd254564165"
}
#23 req 11 ok
{
  "body": {
    "eta_seconds": 2.67,
    "hostname": "host-48afa8a4",
    "reinstall_id": "ri_6fd254564165",
    "status": "pending"
  },
  "headers": {
    "content-length": "99",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:30:36 GMT",
    "server": "uvicorn"
  },
  "status": 200.0
}
#24 req 12 sleep deadline=1776778247.111197550 duration=2s
#25 req 12 ok
null
#26 req 13 connector:http
{
  "method": "GET",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall/ri_6fd254564165"
}
#27 req 13 ok
{
  "body": {
    "error_kind": "reinstall_broken",
    "hostname": "host-48afa8a4",
    "message": "reinstall broke partway through",
    "reinstall_id": "ri_6fd254564165",
    "retryable": true,
    "status": "failed"
  },
  "headers": {
    "content-length": "172",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:30:47 GMT",
    "server": "uvicorn"
  },
  "status": 200.0
}
#28 print >>> [noc] attempt 2 failed (reinstall_broken); looping back to wilson.
#30 print >>> [noc] attempt 3/4: kicking off reinstall of host-48afa8a4.
#32 req 16 connector:http
{
  "body_json": {
    "config": {
      "disk_layout": "single-nvme",
      "os": "rhel-9",
      "profile": "batch-worker",
      "rack": "rack-b2"
    },
    "hostname": "host-48afa8a4"
  },
  "method": "POST",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall"
}
#33 req 16 ok
{
  "body": {
    "hostname": "host-48afa8a4",
    "reinstall_id": "ri_cdacea815c6e"
  },
  "headers": {
    "content-length": "61",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:30:54 GMT",
    "server": "uvicorn"
  },
  "status": 202.0
}
#34 req 17 sleep deadline=1776778262.909417312 duration=2s
#35 req 17 ok
null
#36 req 18 connector:http
{
  "method": "GET",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall/ri_cdacea815c6e"
}
#37 req 18 ok
{
  "body": {
    "error_kind": "reinstall_broken",
    "hostname": "host-48afa8a4",
    "message": "reinstall broke partway through",
    "reinstall_id": "ri_cdacea815c6e",
    "retryable": true,
    "status": "failed"
  },
  "headers": {
    "content-length": "172",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:31:03 GMT",
    "server": "uvicorn"
  },
  "status": 200.0
}
#38 print >>> [noc] attempt 3 failed (reinstall_broken); looping back to wilson.
#40 print >>> [noc] attempt 4/4: kicking off reinstall of host-48afa8a4.
#42 req 21 connector:http
{
  "body_json": {
    "config": {
      "disk_layout": "single-nvme",
      "os": "rhel-9",
      "profile": "batch-worker",
      "rack": "rack-b2"
    },
    "hostname": "host-48afa8a4"
  },
  "method": "POST",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall"
}
#43 req 21 ok
{
  "body": {
    "hostname": "host-48afa8a4",
    "reinstall_id": "ri_4fef660dfae2"
  },
  "headers": {
    "content-length": "61",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:31:06 GMT",
    "server": "uvicorn"
  },
  "status": 202.0
}
#44 req 22 sleep deadline=1776778277.111812237 duration=2s
#45 req 22 ok
null
#46 req 23 connector:http
{
  "method": "GET",
  "url": "http://wilson.flowmetal.svc.cluster.local:8080/reinstall/ri_4fef660dfae2"
}
#47 req 23 ok
{
  "body": {
    "error_kind": "wilson_forgot",
    "hostname": "host-48afa8a4",
    "message": "wilson forgot this reinstall ever happened",
    "reinstall_id": "ri_4fef660dfae2",
    "retryable": true,
    "status": "forgotten"
  },
  "headers": {
    "content-length": "183",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:31:17 GMT",
    "server": "uvicorn"
  },
  "status": 200.0
}
#48 print >>> [noc] attempt 4: wilson claims it never heard of ri_4fef660dfae2. classic.
#50 print >>> [noc] escalating host-48afa8a4 to RDS: wilson exhausted 4 attempts.
#52 req 26 connector:http
{
  "body_json": {
    "hostname": "host-48afa8a4",
    "reason": "wilson exhausted 4 attempts"
  },
  "method": "POST",
  "url": "http://rds.flowmetal.svc.cluster.local:8080/tickets"
}
#53 req 26 ok
{
  "body": {
    "created_at": 1776778286.0,
    "hostname": "host-48afa8a4",
    "reason": "wilson exhausted 4 attempts",
    "ticket_id": "rds_427d036bf23b"
  },
  "headers": {
    "content-length": "122",
    "content-type": "application/json",
    "date": "Tue, 21 Apr 2026 13:31:25 GMT",
    "server": "uvicorn"
  },
  "status": 201.0
}
#54 print >>> [noc] RDS ticket rds_427d036bf23b filed for host-48afa8a4 — paging humans.
#56 FAILED worker=worker-5879336e-a44f-45a2-ad04-4943fa33f428