Stabelized OTA Update with Retries
This commit is contained in:
parent
99956e3362
commit
f89ea3cbe3
@ -18,9 +18,9 @@ static const char *TAG = "[OTA_ESPNOW]";
|
||||
#define OTA_ESPNOW_PREPARE_PRIO 5
|
||||
|
||||
#define OTA_PREPARE_TIMEOUT_MS 120000u
|
||||
#define OTA_BLOCK_TIMEOUT_MS 30000u
|
||||
#define OTA_BLOCK_TIMEOUT_PER_SLAVE_MS 2000u
|
||||
#define OTA_BLOCK_MAX_RETRIES 2u
|
||||
#define OTA_END_TIMEOUT_MS 60000u
|
||||
#define OTA_PAYLOAD_DELAY_MS 3
|
||||
|
||||
#define OTA_ST_PREPARING 1u
|
||||
#define OTA_ST_READY 2u
|
||||
@ -35,7 +35,8 @@ static const char *TAG = "[OTA_ESPNOW]";
|
||||
|
||||
#define OTA_MAX_TARGETS CLIENT_REGISTRY_MAX
|
||||
|
||||
#define OTA_SLAVE_WORK_QUEUE_LEN 12
|
||||
/** ~21 payloads per 4 KiB block; headroom for bursts + status/end. */
|
||||
#define OTA_SLAVE_WORK_QUEUE_LEN 32
|
||||
#define OTA_SLAVE_WORK_STACK 8192
|
||||
#define OTA_SLAVE_WORK_PRIO 5
|
||||
|
||||
@ -173,6 +174,54 @@ static bool wait_target_bits(uint32_t want_bits, uint32_t timeout_ms) {
|
||||
return (got & want_bits) == want_bits;
|
||||
}
|
||||
|
||||
static uint32_t block_ack_timeout_ms(void) {
|
||||
if (s_dist.count == 0) {
|
||||
return OTA_BLOCK_TIMEOUT_PER_SLAVE_MS;
|
||||
}
|
||||
return (uint32_t)s_dist.count * OTA_BLOCK_TIMEOUT_PER_SLAVE_MS;
|
||||
}
|
||||
|
||||
static void log_missing_block_acks(uint32_t expected_bytes) {
|
||||
if (s_eg == NULL || s_dist.count == 0) {
|
||||
return;
|
||||
}
|
||||
EventBits_t bits = xEventGroupGetBits(s_eg);
|
||||
for (uint8_t i = 0; i < s_dist.count; i++) {
|
||||
uint32_t bit = (1u << (unsigned)i);
|
||||
if (bits & bit) {
|
||||
continue;
|
||||
}
|
||||
const ota_prog_entry_t *e = &s_prog.entries[i];
|
||||
ESP_LOGE(TAG,
|
||||
"slave %lu missing block ack @%lu (last status=%lu bytes=%lu err=%lu)",
|
||||
(unsigned long)s_dist.id[i], (unsigned long)expected_bytes,
|
||||
(unsigned long)e->status, (unsigned long)e->bytes_written,
|
||||
(unsigned long)e->error);
|
||||
}
|
||||
}
|
||||
|
||||
static esp_err_t send_block_payloads(const uint8_t *block_buf, uint32_t block_len,
|
||||
uint32_t *seq_io) {
|
||||
uint32_t sent = 0;
|
||||
while (sent < block_len) {
|
||||
uint32_t chunk = block_len - sent;
|
||||
if (chunk > OTA_UART_HOST_CHUNK_SIZE) {
|
||||
chunk = OTA_UART_HOST_CHUNK_SIZE;
|
||||
}
|
||||
|
||||
for (uint8_t i = 0; i < s_dist.count; i++) {
|
||||
esp_err_t err = esp_now_comm_send_ota_payload(s_dist.mac[i], *seq_io,
|
||||
block_buf + sent, chunk);
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
(*seq_io)++;
|
||||
sent += chunk;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
bool ota_espnow_distribution_active(void) { return s_distribution_active; }
|
||||
|
||||
static void send_slave_status(const uint8_t master_mac[6], uint32_t status,
|
||||
@ -221,8 +270,20 @@ static void process_slave_payload(const uint8_t master_mac[6],
|
||||
ESP_LOGI(TAG, "ESP-NOW OTA payloads started");
|
||||
}
|
||||
|
||||
ota_feed_result_t r =
|
||||
ota_uart_feed(payload->data.bytes, payload->data.size);
|
||||
ota_feed_result_t r = ota_uart_feed_chunk(payload->seq, payload->data.bytes,
|
||||
payload->data.size);
|
||||
if (r == OTA_FEED_SEQ_GAP) {
|
||||
led_ring_ota_failed();
|
||||
send_slave_status(master_mac, OTA_ST_FAILED, ota_uart_bytes_written(), 16);
|
||||
return;
|
||||
}
|
||||
if (r == OTA_FEED_SEQ_DUP) {
|
||||
if (ota_uart_block_ready_for_reack()) {
|
||||
send_slave_status(master_mac, OTA_ST_BLOCK_ACK, ota_uart_bytes_written(),
|
||||
0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (r == OTA_FEED_ERROR) {
|
||||
led_ring_ota_failed();
|
||||
send_slave_status(master_mac, OTA_ST_FAILED, ota_uart_bytes_written(), 13);
|
||||
@ -509,35 +570,71 @@ static esp_err_t distribute_image(const esp_partition_t *partition,
|
||||
return err;
|
||||
}
|
||||
|
||||
uint32_t sent = 0;
|
||||
while (sent < block_len) {
|
||||
uint32_t chunk = block_len - sent;
|
||||
if (chunk > OTA_UART_HOST_CHUNK_SIZE) {
|
||||
chunk = OTA_UART_HOST_CHUNK_SIZE;
|
||||
}
|
||||
const bool full_block = (block_len >= OTA_UART_FLASH_BLOCK_SIZE);
|
||||
s_dist.expected_bytes = offset + block_len;
|
||||
const uint32_t block_start_seq = seq;
|
||||
|
||||
for (uint8_t i = 0; i < s_dist.count; i++) {
|
||||
err = esp_now_comm_send_ota_payload(s_dist.mac[i], seq,
|
||||
block_buf + sent, chunk);
|
||||
if (full_block) {
|
||||
xEventGroupClearBits(s_eg, target_mask);
|
||||
}
|
||||
|
||||
bool block_sent = false;
|
||||
for (uint32_t send_attempt = 0; send_attempt <= OTA_BLOCK_MAX_RETRIES;
|
||||
send_attempt++) {
|
||||
if (send_attempt > 0) {
|
||||
seq = block_start_seq;
|
||||
if (full_block) {
|
||||
xEventGroupClearBits(s_eg, target_mask);
|
||||
}
|
||||
ESP_LOGW(TAG, "block send failed @%lu — resend %lu/%lu",
|
||||
(unsigned long)s_dist.expected_bytes,
|
||||
(unsigned long)send_attempt,
|
||||
(unsigned long)OTA_BLOCK_MAX_RETRIES);
|
||||
}
|
||||
err = send_block_payloads(block_buf, block_len, &seq);
|
||||
if (err == ESP_OK) {
|
||||
block_sent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!block_sent) {
|
||||
ESP_LOGE(TAG, "block send failed @%lu after %lu retries",
|
||||
(unsigned long)s_dist.expected_bytes,
|
||||
(unsigned long)OTA_BLOCK_MAX_RETRIES);
|
||||
prog_end();
|
||||
s_distribution_active = false;
|
||||
return err;
|
||||
}
|
||||
|
||||
if (full_block) {
|
||||
const uint32_t ack_timeout = block_ack_timeout_ms();
|
||||
bool acked = false;
|
||||
for (uint32_t attempt = 0; attempt <= OTA_BLOCK_MAX_RETRIES; attempt++) {
|
||||
if (wait_target_bits(target_mask, ack_timeout)) {
|
||||
acked = true;
|
||||
break;
|
||||
}
|
||||
log_missing_block_acks(s_dist.expected_bytes);
|
||||
if (attempt >= OTA_BLOCK_MAX_RETRIES) {
|
||||
break;
|
||||
}
|
||||
ESP_LOGW(TAG, "block ack timeout @%lu — resend %lu/%lu",
|
||||
(unsigned long)s_dist.expected_bytes,
|
||||
(unsigned long)(attempt + 1),
|
||||
(unsigned long)OTA_BLOCK_MAX_RETRIES);
|
||||
xEventGroupClearBits(s_eg, target_mask);
|
||||
seq = block_start_seq;
|
||||
err = send_block_payloads(block_buf, block_len, &seq);
|
||||
if (err != ESP_OK) {
|
||||
prog_end();
|
||||
s_distribution_active = false;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
seq++;
|
||||
sent += chunk;
|
||||
vTaskDelay(pdMS_TO_TICKS(OTA_PAYLOAD_DELAY_MS));
|
||||
}
|
||||
|
||||
const bool full_block = (block_len >= OTA_UART_FLASH_BLOCK_SIZE);
|
||||
s_dist.expected_bytes = offset + block_len;
|
||||
|
||||
if (full_block) {
|
||||
xEventGroupClearBits(s_eg, target_mask);
|
||||
if (!wait_target_bits(target_mask, OTA_BLOCK_TIMEOUT_MS)) {
|
||||
ESP_LOGE(TAG, "timeout block ack @%lu bytes",
|
||||
(unsigned long)s_dist.expected_bytes);
|
||||
if (!acked) {
|
||||
ESP_LOGE(TAG, "timeout block ack @%lu bytes after %lu retries",
|
||||
(unsigned long)s_dist.expected_bytes,
|
||||
(unsigned long)OTA_BLOCK_MAX_RETRIES);
|
||||
prog_end();
|
||||
s_distribution_active = false;
|
||||
return ESP_ERR_TIMEOUT;
|
||||
|
||||
@ -12,6 +12,7 @@ typedef struct {
|
||||
uint32_t total_size;
|
||||
uint32_t received;
|
||||
uint32_t written;
|
||||
uint32_t expected_seq;
|
||||
int target_slot;
|
||||
uint8_t block_buf[OTA_UART_FLASH_BLOCK_SIZE];
|
||||
size_t block_len;
|
||||
@ -112,10 +113,30 @@ int ota_uart_prepare(uint32_t total_size) {
|
||||
return s_ota.target_slot;
|
||||
}
|
||||
|
||||
ota_feed_result_t ota_uart_feed(const uint8_t *data, size_t len) {
|
||||
bool ota_uart_block_ready_for_reack(void) {
|
||||
if (!s_ota.active) {
|
||||
return false;
|
||||
}
|
||||
return s_ota.written > 0 &&
|
||||
(s_ota.written % OTA_UART_FLASH_BLOCK_SIZE) == 0 &&
|
||||
s_ota.block_len == 0;
|
||||
}
|
||||
|
||||
ota_feed_result_t ota_uart_feed_chunk(uint32_t seq, const uint8_t *data,
|
||||
size_t len) {
|
||||
if (!s_ota.active || data == NULL || len == 0) {
|
||||
return OTA_FEED_ERROR;
|
||||
}
|
||||
if (seq < s_ota.expected_seq) {
|
||||
return OTA_FEED_SEQ_DUP;
|
||||
}
|
||||
if (seq > s_ota.expected_seq) {
|
||||
ESP_LOGW(TAG, "seq gap: got %lu expected %lu", (unsigned long)seq,
|
||||
(unsigned long)s_ota.expected_seq);
|
||||
return OTA_FEED_SEQ_GAP;
|
||||
}
|
||||
s_ota.expected_seq++;
|
||||
|
||||
if (len > OTA_UART_HOST_CHUNK_SIZE) {
|
||||
ESP_LOGW(TAG, "chunk %u > %u, truncating", (unsigned)len,
|
||||
OTA_UART_HOST_CHUNK_SIZE);
|
||||
@ -200,6 +221,13 @@ esp_err_t ota_uart_finish(bool set_boot, bool *success_out) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (s_ota.total_size > 0 && s_ota.received != s_ota.total_size) {
|
||||
ESP_LOGE(TAG, "size mismatch: received=%lu expected=%lu",
|
||||
(unsigned long)s_ota.received, (unsigned long)s_ota.total_size);
|
||||
ota_uart_abort();
|
||||
return ESP_ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
err = esp_ota_end(s_ota.handle);
|
||||
if (err != ESP_OK) {
|
||||
ESP_LOGE(TAG, "esp_ota_end failed: %s", esp_err_to_name(err));
|
||||
|
||||
@ -28,6 +28,8 @@ typedef enum {
|
||||
typedef enum {
|
||||
OTA_FEED_OK = 0,
|
||||
OTA_FEED_BLOCK_WRITTEN,
|
||||
OTA_FEED_SEQ_DUP,
|
||||
OTA_FEED_SEQ_GAP,
|
||||
OTA_FEED_ERROR,
|
||||
} ota_feed_result_t;
|
||||
|
||||
@ -41,8 +43,14 @@ int ota_uart_prepare(uint32_t total_size);
|
||||
|
||||
void ota_uart_abort(void);
|
||||
|
||||
/** Append up to 200 bytes; flushes 4 KiB blocks to flash when full. */
|
||||
ota_feed_result_t ota_uart_feed(const uint8_t *data, size_t len);
|
||||
/**
|
||||
* Append up to 200 bytes with strict seq checking (0, 1, 2, …).
|
||||
* Duplicates (seq < expected) return OTA_FEED_SEQ_DUP; gaps return OTA_FEED_SEQ_GAP.
|
||||
*/
|
||||
ota_feed_result_t ota_uart_feed_chunk(uint32_t seq, const uint8_t *data, size_t len);
|
||||
|
||||
/** True when a full 4 KiB block is in flash (used to re-ACK host block retries). */
|
||||
bool ota_uart_block_ready_for_reack(void);
|
||||
|
||||
uint32_t ota_uart_bytes_written(void);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user