Add ESP-NOW heartbeat, client timeout, and slave reconnect.

Slaves send HEARTBEAT every 1s; the master marks clients inactive after
3s without traffic and reactivates on reconnect. CLIENT_INFO reports
last_ping as milliseconds since the last packet, not uptime.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
simon 2026-05-18 22:39:10 +02:00
parent 16bfbd1091
commit 6cdca4f3ad
6 changed files with 282 additions and 85 deletions

View File

@ -31,7 +31,7 @@ Example output:
```
clients (2):
[0] id=42 mac=aabbccddeeff ver=1 available=true used=false last_ping=12345 last_success_ping=12345
[0] id=42 mac=aabbccddeeff ver=1 available=true used=false last_ping=250 last_success_ping=250
```
## Regenerate protobuf

View File

@ -77,6 +77,7 @@ WiFi is brought up in STA mode (no AP association). Channel = `app_config.networ
|------|-------|-----------|---------|
| `DISCOVER` | 1 | Master → broadcast `FF:FF:FF:FF:FF:FF` | Master is searching for slaves |
| `SLAVE_INFO` | 2 | Slave → master | Slave registration |
| `HEARTBEAT` | 3 | Slave → master | Keep-alive (header only, no extra payload) |
`SLAVE_INFO` payload (after header bytes 02):
@ -88,9 +89,9 @@ WiFi is brought up in STA mode (no AP association). Channel = `app_config.networ
| `available` | uint8 | 1 = available |
| `used` | uint8 | 0 = unused |
**Master:** task `espnow_disc` sends `DISCOVER` every **500 ms** on the configured network. Logs `slave joined id=… mac=… ver=…` when a new slave is seen (up to 16 entries).
**Master:** task `espnow_disc` sends `DISCOVER` every **500 ms** on the configured network. Logs `slave joined id=… mac=… ver=…` when a new slave is seen (up to 16 entries). Task `espnow_mon` runs every **1 s** and marks a client **inactive** (`available = false`) if no `SLAVE_INFO` or `HEARTBEAT` was received for **3 s** (three missed 1 s heartbeats). A later heartbeat sets `available` true again and logs reactivation.
**Slave:** on first matching `DISCOVER`, logs `joined network N, master …`, sends `SLAVE_INFO` once, then ignores further discovers from that master (no repeat log or reply).
**Slave:** on first matching `DISCOVER`, logs `joined network N, master …`, sends `SLAVE_INFO` once, then sends `HEARTBEAT` to the master every **1 s**. While joined, periodic discovers from the same master refresh a “master alive” timer; if no discover arrives for **5 s**, the slave treats the master as lost, clears join state, and will register again on the next discover (reconnect). Discover from a different master is ignored while already joined.
Monitor via USB-JTAG (`/dev/ttyACM0`) while using a USB-serial adapter on **GPIO2/3** (`/dev/ttyUSB0`) for UART — they are different interfaces.
@ -188,7 +189,7 @@ Encoding: `uart_send_uart_message()` in `uart_proto.c`.
**Response:** payload `04` + nanopb `UartMessage` with `client_info_response.clients` — one `ClientInfo` per registered slave (from ESP-NOW `SLAVE_INFO`).
Fields per client: `id`, `mac`, `version`, `available`, `used`, `last_ping`, `last_success_ping` (milliseconds since boot, updated on each `SLAVE_INFO`).
Fields per client: `id`, `mac`, `version`, `available`, `used`, `last_ping`, `last_success_ping` **milliseconds since** the last packet / last successful heartbeat (computed when `CLIENT_INFO` is answered; typically 01000 while the slave is heartbeating every 1 s).
## Client registry
@ -196,9 +197,11 @@ Fields per client: `id`, `mac`, `version`, `available`, `used`, `last_ping`, `la
|-----|-------------|
| `client_registry_init()` | Clear all slots (called from `esp_now_comm_init`) |
| `client_registry_upsert(mac, id, version, …)` | Add or refresh client; updates ping timestamps |
| `client_registry_heartbeat(mac, id, version, …)` | Same as upsert for heartbeats; reactivates inactive clients |
| `client_registry_check_timeouts(timeout_ms)` | Mark stale clients inactive (master monitor task) |
| `client_registry_count()` / `client_registry_at(i)` | Iterate for UART encoding |
Slaves register when the master receives `SLAVE_INFO` on the matching network.
Slaves register when the master receives `SLAVE_INFO` on the matching network; `HEARTBEAT` keeps them marked available.
## Host tool (`goTool/`)

View File

@ -1,8 +1,12 @@
#include "client_registry.h"
#include "esp_log.h"
#include "freertos/FreeRTOS.h"
#include "freertos/idf_additions.h"
#include <stdio.h>
#include <string.h>
static const char *TAG = "[CLIENTS]";
typedef struct {
client_info_t info;
bool active;
@ -10,27 +14,67 @@ typedef struct {
static client_slot_t s_clients[CLIENT_REGISTRY_MAX];
static uint32_t now_ms(void) {
uint32_t client_registry_now_ms(void) {
return (uint32_t)(xTaskGetTickCount() * portTICK_PERIOD_MS);
}
uint32_t client_registry_ms_since(uint32_t timestamp) {
if (timestamp == 0) {
return 0;
}
return client_registry_now_ms() - timestamp;
}
static uint32_t now_ms(void) { return client_registry_now_ms(); }
static bool mac_equal(const uint8_t *a, const uint8_t *b) {
return memcmp(a, b, CLIENT_MAC_LEN) == 0;
}
static void mac_to_str(const uint8_t *mac, char *out, size_t out_len) {
snprintf(out, out_len, "%02x:%02x:%02x:%02x:%02x:%02x", mac[0], mac[1],
mac[2], mac[3], mac[4], mac[5]);
}
static client_slot_t *find_slot(const uint8_t mac[CLIENT_MAC_LEN]) {
for (size_t i = 0; i < CLIENT_REGISTRY_MAX; i++) {
if (s_clients[i].active && mac_equal(s_clients[i].info.mac, mac)) {
return &s_clients[i];
}
}
return NULL;
}
static client_slot_t *alloc_slot(const uint8_t mac[CLIENT_MAC_LEN],
bool *out_is_new) {
client_slot_t *slot = find_slot(mac);
if (slot != NULL) {
if (out_is_new != NULL) {
*out_is_new = false;
}
return slot;
}
for (size_t i = 0; i < CLIENT_REGISTRY_MAX; i++) {
if (!s_clients[i].active) {
slot = &s_clients[i];
slot->active = true;
memcpy(slot->info.mac, mac, CLIENT_MAC_LEN);
if (out_is_new != NULL) {
*out_is_new = true;
}
return slot;
}
}
return NULL;
}
void client_registry_init(void) { memset(s_clients, 0, sizeof(s_clients)); }
const client_info_t *
client_registry_find_by_mac(const uint8_t mac[CLIENT_MAC_LEN]) {
if (mac == NULL) {
return NULL;
}
for (size_t i = 0; i < CLIENT_REGISTRY_MAX; i++) {
if (s_clients[i].active && mac_equal(s_clients[i].info.mac, mac)) {
return &s_clients[i].info;
}
}
return NULL;
client_slot_t *slot = find_slot(mac);
return slot != NULL ? &slot->info : NULL;
}
esp_err_t client_registry_upsert(const uint8_t mac[CLIENT_MAC_LEN], uint32_t id,
@ -41,37 +85,18 @@ esp_err_t client_registry_upsert(const uint8_t mac[CLIENT_MAC_LEN], uint32_t id,
}
uint32_t ts = now_ms();
client_slot_t *slot = NULL;
bool is_new = false;
for (size_t i = 0; i < CLIENT_REGISTRY_MAX; i++) {
if (s_clients[i].active && mac_equal(s_clients[i].info.mac, mac)) {
slot = &s_clients[i];
break;
}
}
if (slot == NULL) {
for (size_t i = 0; i < CLIENT_REGISTRY_MAX; i++) {
if (!s_clients[i].active) {
slot = &s_clients[i];
slot->active = true;
memcpy(slot->info.mac, mac, CLIENT_MAC_LEN);
is_new = true;
break;
}
}
client_slot_t *slot = alloc_slot(mac, &is_new);
if (slot == NULL) {
return ESP_ERR_NO_MEM;
}
}
slot->info.id = id;
slot->info.version = version;
slot->info.available = available;
slot->info.used = used;
slot->info.last_ping = ts;
slot->info.last_success_ping = ts;
slot->info.last_ping_at = ts;
slot->info.last_success_ping_at = ts;
if (out_is_new != NULL) {
*out_is_new = is_new;
@ -79,6 +104,61 @@ esp_err_t client_registry_upsert(const uint8_t mac[CLIENT_MAC_LEN], uint32_t id,
return ESP_OK;
}
esp_err_t client_registry_heartbeat(const uint8_t mac[CLIENT_MAC_LEN],
uint32_t id, uint32_t version, bool used,
bool *out_is_new, bool *out_reactivated) {
if (mac == NULL) {
return ESP_ERR_INVALID_ARG;
}
uint32_t ts = now_ms();
bool is_new = false;
bool reactivated = false;
client_slot_t *slot = alloc_slot(mac, &is_new);
if (slot == NULL) {
return ESP_ERR_NO_MEM;
}
if (!is_new && !slot->info.available) {
reactivated = true;
}
slot->info.id = id;
slot->info.version = version;
slot->info.used = used;
slot->info.available = true;
slot->info.last_ping_at = ts;
slot->info.last_success_ping_at = ts;
if (out_is_new != NULL) {
*out_is_new = is_new;
}
if (out_reactivated != NULL) {
*out_reactivated = reactivated || is_new;
}
return ESP_OK;
}
void client_registry_check_timeouts(uint32_t timeout_ms) {
uint32_t now = now_ms();
for (size_t i = 0; i < CLIENT_REGISTRY_MAX; i++) {
if (!s_clients[i].active || !s_clients[i].info.available) {
continue;
}
uint32_t elapsed = now - s_clients[i].info.last_success_ping_at;
if (elapsed > timeout_ms) {
s_clients[i].info.available = false;
char mac_str[18];
mac_to_str(s_clients[i].info.mac, mac_str, sizeof(mac_str));
ESP_LOGW(TAG, "client inactive id=%lu mac=%s (no heartbeat for %lu ms)",
(unsigned long)s_clients[i].info.id, mac_str,
(unsigned long)elapsed);
}
}
}
size_t client_registry_count(void) {
size_t n = 0;
for (size_t i = 0; i < CLIENT_REGISTRY_MAX; i++) {

View File

@ -14,18 +14,38 @@ typedef struct {
bool available;
bool used;
uint8_t mac[CLIENT_MAC_LEN];
uint32_t last_ping;
uint32_t last_success_ping;
/** Milliseconds since boot when last packet was received from this client. */
uint32_t last_ping_at;
/** Milliseconds since boot when last heartbeat / SLAVE_INFO was accepted. */
uint32_t last_success_ping_at;
uint32_t version;
} client_info_t;
void client_registry_init(void);
/** Register or refresh a client; sets last_ping and last_success_ping to now. */
/** Milliseconds since boot (same clock as stored ping timestamps). */
uint32_t client_registry_now_ms(void);
/** Ms elapsed since timestamp; 0 if timestamp is 0. */
uint32_t client_registry_ms_since(uint32_t timestamp);
/** Register or refresh a client; updates both ping timestamps. */
esp_err_t client_registry_upsert(const uint8_t mac[CLIENT_MAC_LEN], uint32_t id,
uint32_t version, bool available, bool used,
bool *out_is_new);
/**
* Record a successful heartbeat (or initial slave info).
* Sets available=true and updates last_success_ping_at (and last_ping_at).
* If client was inactive, sets *out_reactivated=true.
*/
esp_err_t client_registry_heartbeat(const uint8_t mac[CLIENT_MAC_LEN],
uint32_t id, uint32_t version, bool used,
bool *out_is_new, bool *out_reactivated);
/** Mark clients inactive when last_success_ping_at is older than timeout_ms. */
void client_registry_check_timeouts(uint32_t timeout_ms);
size_t client_registry_count(void);
const client_info_t *client_registry_at(size_t index);
const client_info_t *client_registry_find_by_mac(const uint8_t mac[CLIENT_MAC_LEN]);

View File

@ -35,8 +35,9 @@ static bool encode_clients_list(pb_ostream_t *stream, const pb_field_t *field,
proto.id = client->id;
proto.available = client->available;
proto.used = client->used;
proto.last_ping = client->last_ping;
proto.last_success_ping = client->last_success_ping;
proto.last_ping = client_registry_ms_since(client->last_ping_at);
proto.last_success_ping =
client_registry_ms_since(client->last_success_ping_at);
proto.version = client->version;
proto.mac.funcs.encode = encode_client_mac;
proto.mac.arg = (void *)client->mac;

View File

@ -20,7 +20,14 @@
#define ESPNOW_MAGIC 0xA1
#define ESPNOW_MSG_DISCOVER 1
#define ESPNOW_MSG_SLAVE_INFO 2
#define ESPNOW_MSG_HEARTBEAT 3
#define ESPNOW_DISCOVER_INTERVAL_MS 500
#define ESPNOW_HEARTBEAT_INTERVAL_MS 1000
#define ESPNOW_HEARTBEAT_MISS_COUNT 3
#define ESPNOW_CLIENT_TIMEOUT_MS \
(ESPNOW_HEARTBEAT_INTERVAL_MS * ESPNOW_HEARTBEAT_MISS_COUNT)
#define SLAVE_MASTER_LOST_MS (ESPNOW_HEARTBEAT_INTERVAL_MS * 5)
static const uint8_t ESPNOW_BCAST[ESP_NOW_ETH_ALEN] = {0xff, 0xff, 0xff,
0xff, 0xff, 0xff};
@ -42,13 +49,18 @@ typedef struct __attribute__((packed)) {
uint32_t slave_id;
uint8_t available;
uint8_t used;
} espnow_slave_info_packet_t;
} espnow_slave_packet_t;
static app_config_t s_config;
static uint8_t s_wifi_channel;
static uint8_t s_own_mac[ESP_NOW_ETH_ALEN];
static bool s_slave_joined;
static uint8_t s_master_mac[ESP_NOW_ETH_ALEN];
static uint32_t s_last_discover_ms;
static uint32_t now_ms(void) {
return (uint32_t)(xTaskGetTickCount() * portTICK_PERIOD_MS);
}
static uint8_t network_to_channel(uint8_t network) {
if (network < 1 || network > 13) {
@ -86,58 +98,48 @@ static esp_err_t ensure_peer(const uint8_t *mac) {
static esp_err_t ensure_broadcast_peer(void) { return ensure_peer(ESPNOW_BCAST); }
static void send_slave_info(const uint8_t *dest_mac) {
espnow_slave_info_packet_t pkt = {
.magic = ESPNOW_MAGIC,
.type = ESPNOW_MSG_SLAVE_INFO,
.network = s_config.network,
.version = POWERPOD_FW_VERSION,
.slave_id = s_own_mac[5],
.available = 1,
.used = 0,
};
memcpy(pkt.mac, s_own_mac, ESP_NOW_ETH_ALEN);
static void build_slave_packet(espnow_slave_packet_t *pkt, uint8_t type) {
pkt->magic = ESPNOW_MAGIC;
pkt->type = type;
pkt->network = s_config.network;
memcpy(pkt->mac, s_own_mac, ESP_NOW_ETH_ALEN);
pkt->version = POWERPOD_FW_VERSION;
pkt->slave_id = s_own_mac[5];
pkt->available = 1;
pkt->used = 0;
}
static void send_slave_packet(const uint8_t *dest_mac, uint8_t type) {
espnow_slave_packet_t pkt;
build_slave_packet(&pkt, type);
if (ensure_peer(dest_mac) != ESP_OK) {
return;
}
esp_err_t err =
esp_now_send(dest_mac, (const uint8_t *)&pkt, sizeof(pkt));
esp_err_t err = esp_now_send(dest_mac, (const uint8_t *)&pkt, sizeof(pkt));
if (err != ESP_OK) {
ESP_LOGW(TAG, "slave info send failed: %s", esp_err_to_name(err));
ESP_LOGW(TAG, "send type=%u failed: %s", (unsigned)type,
esp_err_to_name(err));
}
}
static void handle_discover(const uint8_t *sender_mac,
const espnow_discover_packet_t *pkt) {
if (pkt->network != s_config.network) {
return;
static void slave_reset_join(void) {
s_slave_joined = false;
memset(s_master_mac, 0, sizeof(s_master_mac));
s_last_discover_ms = 0;
}
if (s_slave_joined && mac_equal(sender_mac, s_master_mac)) {
return;
}
memcpy(s_master_mac, sender_mac, ESP_NOW_ETH_ALEN);
s_slave_joined = true;
char mac_str[18];
mac_to_str(sender_mac, mac_str, sizeof(mac_str));
ESP_LOGI(TAG, "joined network %u, master %s", (unsigned)pkt->network, mac_str);
send_slave_info(sender_mac);
}
static void handle_slave_info(const espnow_slave_info_packet_t *pkt) {
static void handle_client_packet(const espnow_slave_packet_t *pkt) {
if (pkt->network != s_config.network) {
return;
}
bool is_new = false;
esp_err_t err = client_registry_upsert(
pkt->mac, pkt->slave_id, pkt->version, pkt->available != 0,
pkt->used != 0, &is_new);
bool reactivated = false;
esp_err_t err = client_registry_heartbeat(
pkt->mac, pkt->slave_id, pkt->version, pkt->used != 0, &is_new,
&reactivated);
if (err != ESP_OK) {
ESP_LOGW(TAG, "client registry full");
return;
@ -149,6 +151,88 @@ static void handle_slave_info(const espnow_slave_info_packet_t *pkt) {
ESP_LOGI(TAG, "client registered id=%lu mac=%s ver=%lu",
(unsigned long)pkt->slave_id, mac_str,
(unsigned long)pkt->version);
} else if (reactivated) {
ESP_LOGI(TAG, "client reconnected id=%lu mac=%s",
(unsigned long)pkt->slave_id, mac_str);
}
}
static void handle_discover(const uint8_t *sender_mac,
const espnow_discover_packet_t *pkt) {
if (pkt->network != s_config.network) {
return;
}
uint32_t now = now_ms();
if (s_slave_joined) {
if (!mac_equal(sender_mac, s_master_mac)) {
return;
}
if ((now - s_last_discover_ms) <= SLAVE_MASTER_LOST_MS) {
s_last_discover_ms = now;
return;
}
ESP_LOGW(TAG, "master lost, rejoining");
slave_reset_join();
}
memcpy(s_master_mac, sender_mac, ESP_NOW_ETH_ALEN);
s_slave_joined = true;
s_last_discover_ms = now;
char mac_str[18];
mac_to_str(sender_mac, mac_str, sizeof(mac_str));
ESP_LOGI(TAG, "joined network %u, master %s", (unsigned)pkt->network, mac_str);
send_slave_packet(sender_mac, ESPNOW_MSG_SLAVE_INFO);
}
static void slave_check_master_timeout(void) {
if (!s_slave_joined) {
return;
}
uint32_t now = now_ms();
if (s_last_discover_ms == 0) {
return;
}
if ((now - s_last_discover_ms) > SLAVE_MASTER_LOST_MS) {
ESP_LOGW(TAG, "no master discover for %u ms, reconnecting",
(unsigned)(now - s_last_discover_ms));
slave_reset_join();
}
}
static void slave_heartbeat_task(void *param) {
(void)param;
ESP_LOGI(TAG, "slave heartbeat task (interval %u ms)",
(unsigned)ESPNOW_HEARTBEAT_INTERVAL_MS);
while (1) {
vTaskDelay(pdMS_TO_TICKS(ESPNOW_HEARTBEAT_INTERVAL_MS));
slave_check_master_timeout();
if (!s_slave_joined) {
continue;
}
send_slave_packet(s_master_mac, ESPNOW_MSG_HEARTBEAT);
}
}
static void master_monitor_task(void *param) {
(void)param;
ESP_LOGI(TAG, "master monitor task (timeout %u ms)",
(unsigned)ESPNOW_CLIENT_TIMEOUT_MS);
while (1) {
vTaskDelay(pdMS_TO_TICKS(ESPNOW_HEARTBEAT_INTERVAL_MS));
client_registry_check_timeouts(ESPNOW_CLIENT_TIMEOUT_MS);
}
}
@ -170,8 +254,9 @@ static void espnow_recv_cb(const esp_now_recv_info_t *info, const uint8_t *data,
}
break;
case ESPNOW_MSG_SLAVE_INFO:
if (s_config.master && len >= (int)sizeof(espnow_slave_info_packet_t)) {
handle_slave_info((const espnow_slave_info_packet_t *)data);
case ESPNOW_MSG_HEARTBEAT:
if (s_config.master && len >= (int)sizeof(espnow_slave_packet_t)) {
handle_client_packet((const espnow_slave_packet_t *)data);
}
break;
default:
@ -241,8 +326,7 @@ esp_err_t esp_now_comm_init(const app_config_t *config) {
memset(&s_config, 0, sizeof(s_config));
memcpy(&s_config, config, sizeof(s_config));
client_registry_init();
s_slave_joined = false;
memset(s_master_mac, 0, sizeof(s_master_mac));
slave_reset_join();
s_wifi_channel = network_to_channel(config->network);
ESP_ERROR_CHECK(esp_read_mac(s_own_mac, ESP_MAC_WIFI_STA));
@ -269,8 +353,17 @@ esp_err_t esp_now_comm_init(const app_config_t *config) {
ESP_LOGE(TAG, "failed to create discover task");
return ESP_FAIL;
}
if (xTaskCreate(master_monitor_task, "espnow_mon", 4096, NULL, 4, NULL) !=
pdPASS) {
ESP_LOGE(TAG, "failed to create monitor task");
return ESP_FAIL;
}
} else {
ESP_LOGI(TAG, "slave listening for master discover");
if (xTaskCreate(slave_heartbeat_task, "espnow_hb", 4096, NULL, 4, NULL) !=
pdPASS) {
ESP_LOGE(TAG, "failed to create heartbeat task");
return ESP_FAIL;
}
}
return ESP_OK;