Stabelized OTA Update with Retries

This commit is contained in:
simon 2026-06-06 17:15:55 +02:00
parent 99956e3362
commit f89ea3cbe3
3 changed files with 163 additions and 30 deletions

View File

@ -18,9 +18,9 @@ static const char *TAG = "[OTA_ESPNOW]";
#define OTA_ESPNOW_PREPARE_PRIO 5 #define OTA_ESPNOW_PREPARE_PRIO 5
#define OTA_PREPARE_TIMEOUT_MS 120000u #define OTA_PREPARE_TIMEOUT_MS 120000u
#define OTA_BLOCK_TIMEOUT_MS 30000u #define OTA_BLOCK_TIMEOUT_PER_SLAVE_MS 2000u
#define OTA_BLOCK_MAX_RETRIES 2u
#define OTA_END_TIMEOUT_MS 60000u #define OTA_END_TIMEOUT_MS 60000u
#define OTA_PAYLOAD_DELAY_MS 3
#define OTA_ST_PREPARING 1u #define OTA_ST_PREPARING 1u
#define OTA_ST_READY 2u #define OTA_ST_READY 2u
@ -35,7 +35,8 @@ static const char *TAG = "[OTA_ESPNOW]";
#define OTA_MAX_TARGETS CLIENT_REGISTRY_MAX #define OTA_MAX_TARGETS CLIENT_REGISTRY_MAX
#define OTA_SLAVE_WORK_QUEUE_LEN 12 /** ~21 payloads per 4 KiB block; headroom for bursts + status/end. */
#define OTA_SLAVE_WORK_QUEUE_LEN 32
#define OTA_SLAVE_WORK_STACK 8192 #define OTA_SLAVE_WORK_STACK 8192
#define OTA_SLAVE_WORK_PRIO 5 #define OTA_SLAVE_WORK_PRIO 5
@ -173,6 +174,54 @@ static bool wait_target_bits(uint32_t want_bits, uint32_t timeout_ms) {
return (got & want_bits) == want_bits; return (got & want_bits) == want_bits;
} }
static uint32_t block_ack_timeout_ms(void) {
if (s_dist.count == 0) {
return OTA_BLOCK_TIMEOUT_PER_SLAVE_MS;
}
return (uint32_t)s_dist.count * OTA_BLOCK_TIMEOUT_PER_SLAVE_MS;
}
static void log_missing_block_acks(uint32_t expected_bytes) {
if (s_eg == NULL || s_dist.count == 0) {
return;
}
EventBits_t bits = xEventGroupGetBits(s_eg);
for (uint8_t i = 0; i < s_dist.count; i++) {
uint32_t bit = (1u << (unsigned)i);
if (bits & bit) {
continue;
}
const ota_prog_entry_t *e = &s_prog.entries[i];
ESP_LOGE(TAG,
"slave %lu missing block ack @%lu (last status=%lu bytes=%lu err=%lu)",
(unsigned long)s_dist.id[i], (unsigned long)expected_bytes,
(unsigned long)e->status, (unsigned long)e->bytes_written,
(unsigned long)e->error);
}
}
static esp_err_t send_block_payloads(const uint8_t *block_buf, uint32_t block_len,
uint32_t *seq_io) {
uint32_t sent = 0;
while (sent < block_len) {
uint32_t chunk = block_len - sent;
if (chunk > OTA_UART_HOST_CHUNK_SIZE) {
chunk = OTA_UART_HOST_CHUNK_SIZE;
}
for (uint8_t i = 0; i < s_dist.count; i++) {
esp_err_t err = esp_now_comm_send_ota_payload(s_dist.mac[i], *seq_io,
block_buf + sent, chunk);
if (err != ESP_OK) {
return err;
}
}
(*seq_io)++;
sent += chunk;
}
return ESP_OK;
}
bool ota_espnow_distribution_active(void) { return s_distribution_active; } bool ota_espnow_distribution_active(void) { return s_distribution_active; }
static void send_slave_status(const uint8_t master_mac[6], uint32_t status, static void send_slave_status(const uint8_t master_mac[6], uint32_t status,
@ -221,8 +270,20 @@ static void process_slave_payload(const uint8_t master_mac[6],
ESP_LOGI(TAG, "ESP-NOW OTA payloads started"); ESP_LOGI(TAG, "ESP-NOW OTA payloads started");
} }
ota_feed_result_t r = ota_feed_result_t r = ota_uart_feed_chunk(payload->seq, payload->data.bytes,
ota_uart_feed(payload->data.bytes, payload->data.size); payload->data.size);
if (r == OTA_FEED_SEQ_GAP) {
led_ring_ota_failed();
send_slave_status(master_mac, OTA_ST_FAILED, ota_uart_bytes_written(), 16);
return;
}
if (r == OTA_FEED_SEQ_DUP) {
if (ota_uart_block_ready_for_reack()) {
send_slave_status(master_mac, OTA_ST_BLOCK_ACK, ota_uart_bytes_written(),
0);
}
return;
}
if (r == OTA_FEED_ERROR) { if (r == OTA_FEED_ERROR) {
led_ring_ota_failed(); led_ring_ota_failed();
send_slave_status(master_mac, OTA_ST_FAILED, ota_uart_bytes_written(), 13); send_slave_status(master_mac, OTA_ST_FAILED, ota_uart_bytes_written(), 13);
@ -509,35 +570,71 @@ static esp_err_t distribute_image(const esp_partition_t *partition,
return err; return err;
} }
uint32_t sent = 0; const bool full_block = (block_len >= OTA_UART_FLASH_BLOCK_SIZE);
while (sent < block_len) { s_dist.expected_bytes = offset + block_len;
uint32_t chunk = block_len - sent; const uint32_t block_start_seq = seq;
if (chunk > OTA_UART_HOST_CHUNK_SIZE) {
chunk = OTA_UART_HOST_CHUNK_SIZE; if (full_block) {
xEventGroupClearBits(s_eg, target_mask);
} }
for (uint8_t i = 0; i < s_dist.count; i++) { bool block_sent = false;
err = esp_now_comm_send_ota_payload(s_dist.mac[i], seq, for (uint32_t send_attempt = 0; send_attempt <= OTA_BLOCK_MAX_RETRIES;
block_buf + sent, chunk); send_attempt++) {
if (send_attempt > 0) {
seq = block_start_seq;
if (full_block) {
xEventGroupClearBits(s_eg, target_mask);
}
ESP_LOGW(TAG, "block send failed @%lu — resend %lu/%lu",
(unsigned long)s_dist.expected_bytes,
(unsigned long)send_attempt,
(unsigned long)OTA_BLOCK_MAX_RETRIES);
}
err = send_block_payloads(block_buf, block_len, &seq);
if (err == ESP_OK) {
block_sent = true;
break;
}
}
if (!block_sent) {
ESP_LOGE(TAG, "block send failed @%lu after %lu retries",
(unsigned long)s_dist.expected_bytes,
(unsigned long)OTA_BLOCK_MAX_RETRIES);
prog_end();
s_distribution_active = false;
return err;
}
if (full_block) {
const uint32_t ack_timeout = block_ack_timeout_ms();
bool acked = false;
for (uint32_t attempt = 0; attempt <= OTA_BLOCK_MAX_RETRIES; attempt++) {
if (wait_target_bits(target_mask, ack_timeout)) {
acked = true;
break;
}
log_missing_block_acks(s_dist.expected_bytes);
if (attempt >= OTA_BLOCK_MAX_RETRIES) {
break;
}
ESP_LOGW(TAG, "block ack timeout @%lu — resend %lu/%lu",
(unsigned long)s_dist.expected_bytes,
(unsigned long)(attempt + 1),
(unsigned long)OTA_BLOCK_MAX_RETRIES);
xEventGroupClearBits(s_eg, target_mask);
seq = block_start_seq;
err = send_block_payloads(block_buf, block_len, &seq);
if (err != ESP_OK) { if (err != ESP_OK) {
prog_end(); prog_end();
s_distribution_active = false; s_distribution_active = false;
return err; return err;
} }
} }
seq++; if (!acked) {
sent += chunk; ESP_LOGE(TAG, "timeout block ack @%lu bytes after %lu retries",
vTaskDelay(pdMS_TO_TICKS(OTA_PAYLOAD_DELAY_MS)); (unsigned long)s_dist.expected_bytes,
} (unsigned long)OTA_BLOCK_MAX_RETRIES);
const bool full_block = (block_len >= OTA_UART_FLASH_BLOCK_SIZE);
s_dist.expected_bytes = offset + block_len;
if (full_block) {
xEventGroupClearBits(s_eg, target_mask);
if (!wait_target_bits(target_mask, OTA_BLOCK_TIMEOUT_MS)) {
ESP_LOGE(TAG, "timeout block ack @%lu bytes",
(unsigned long)s_dist.expected_bytes);
prog_end(); prog_end();
s_distribution_active = false; s_distribution_active = false;
return ESP_ERR_TIMEOUT; return ESP_ERR_TIMEOUT;

View File

@ -12,6 +12,7 @@ typedef struct {
uint32_t total_size; uint32_t total_size;
uint32_t received; uint32_t received;
uint32_t written; uint32_t written;
uint32_t expected_seq;
int target_slot; int target_slot;
uint8_t block_buf[OTA_UART_FLASH_BLOCK_SIZE]; uint8_t block_buf[OTA_UART_FLASH_BLOCK_SIZE];
size_t block_len; size_t block_len;
@ -112,10 +113,30 @@ int ota_uart_prepare(uint32_t total_size) {
return s_ota.target_slot; return s_ota.target_slot;
} }
ota_feed_result_t ota_uart_feed(const uint8_t *data, size_t len) { bool ota_uart_block_ready_for_reack(void) {
if (!s_ota.active) {
return false;
}
return s_ota.written > 0 &&
(s_ota.written % OTA_UART_FLASH_BLOCK_SIZE) == 0 &&
s_ota.block_len == 0;
}
ota_feed_result_t ota_uart_feed_chunk(uint32_t seq, const uint8_t *data,
size_t len) {
if (!s_ota.active || data == NULL || len == 0) { if (!s_ota.active || data == NULL || len == 0) {
return OTA_FEED_ERROR; return OTA_FEED_ERROR;
} }
if (seq < s_ota.expected_seq) {
return OTA_FEED_SEQ_DUP;
}
if (seq > s_ota.expected_seq) {
ESP_LOGW(TAG, "seq gap: got %lu expected %lu", (unsigned long)seq,
(unsigned long)s_ota.expected_seq);
return OTA_FEED_SEQ_GAP;
}
s_ota.expected_seq++;
if (len > OTA_UART_HOST_CHUNK_SIZE) { if (len > OTA_UART_HOST_CHUNK_SIZE) {
ESP_LOGW(TAG, "chunk %u > %u, truncating", (unsigned)len, ESP_LOGW(TAG, "chunk %u > %u, truncating", (unsigned)len,
OTA_UART_HOST_CHUNK_SIZE); OTA_UART_HOST_CHUNK_SIZE);
@ -200,6 +221,13 @@ esp_err_t ota_uart_finish(bool set_boot, bool *success_out) {
return err; return err;
} }
if (s_ota.total_size > 0 && s_ota.received != s_ota.total_size) {
ESP_LOGE(TAG, "size mismatch: received=%lu expected=%lu",
(unsigned long)s_ota.received, (unsigned long)s_ota.total_size);
ota_uart_abort();
return ESP_ERR_INVALID_SIZE;
}
err = esp_ota_end(s_ota.handle); err = esp_ota_end(s_ota.handle);
if (err != ESP_OK) { if (err != ESP_OK) {
ESP_LOGE(TAG, "esp_ota_end failed: %s", esp_err_to_name(err)); ESP_LOGE(TAG, "esp_ota_end failed: %s", esp_err_to_name(err));

View File

@ -28,6 +28,8 @@ typedef enum {
typedef enum { typedef enum {
OTA_FEED_OK = 0, OTA_FEED_OK = 0,
OTA_FEED_BLOCK_WRITTEN, OTA_FEED_BLOCK_WRITTEN,
OTA_FEED_SEQ_DUP,
OTA_FEED_SEQ_GAP,
OTA_FEED_ERROR, OTA_FEED_ERROR,
} ota_feed_result_t; } ota_feed_result_t;
@ -41,8 +43,14 @@ int ota_uart_prepare(uint32_t total_size);
void ota_uart_abort(void); void ota_uart_abort(void);
/** Append up to 200 bytes; flushes 4 KiB blocks to flash when full. */ /**
ota_feed_result_t ota_uart_feed(const uint8_t *data, size_t len); * Append up to 200 bytes with strict seq checking (0, 1, 2, ).
* Duplicates (seq < expected) return OTA_FEED_SEQ_DUP; gaps return OTA_FEED_SEQ_GAP.
*/
ota_feed_result_t ota_uart_feed_chunk(uint32_t seq, const uint8_t *data, size_t len);
/** True when a full 4 KiB block is in flash (used to re-ACK host block retries). */
bool ota_uart_block_ready_for_reack(void);
uint32_t ota_uart_bytes_written(void); uint32_t ota_uart_bytes_written(void);