1
0

lnworker: fix todo, collect failed htlcs in payment

This makes `LNWallet.pay_to_node()` wait
`PaySession.TIMEOUT_WAIT_FOR_NEXT_RESOLVED_HTLC` (0.5 sec) for another
htlc to get resolved after receiving a htlc failure during a payment
attempt.
This seems to make payments more reliable in scenarios where we receive
multiple htlc failures closely after each other as
`create_route_for_payment` then has access to the failed routes/failure
information of all these htlcs when trying to re-split the outstanding
amount.
This commit is contained in:
f321x
2025-10-21 13:19:12 +02:00
parent b3ab732998
commit f5aa82d4f0

View File

@@ -710,6 +710,10 @@ class LNGossip(LNWorker):
class PaySession(Logger): class PaySession(Logger):
# how long we wait for another htlc to resolve after receiving a failure for one sent htlc.
TIMEOUT_WAIT_FOR_NEXT_RESOLVED_HTLC = 0.5
def __init__( def __init__(
self, self,
*, *,
@@ -754,6 +758,10 @@ class PaySession(Logger):
pkey = sha256(self.payment_key) pkey = sha256(self.payment_key)
return f"{self.payment_hash[:4].hex()}-{pkey[:2].hex()}" return f"{self.payment_hash[:4].hex()}-{pkey[:2].hex()}"
@property
def number_htlcs_inflight(self) -> int:
return self._nhtlcs_inflight
def maybe_raise_trampoline_fee(self, htlc_log: HtlcLog): def maybe_raise_trampoline_fee(self, htlc_log: HtlcLog):
if htlc_log.trampoline_fee_level == self.trampoline_fee_level: if htlc_log.trampoline_fee_level == self.trampoline_fee_level:
self.trampoline_fee_level += 1 self.trampoline_fee_level += 1
@@ -1709,46 +1717,57 @@ class LNWallet(LNWorker):
# It is also triggered here to update progress for a lightning payment in the GUI # It is also triggered here to update progress for a lightning payment in the GUI
# (e.g. attempt counter) # (e.g. attempt counter)
util.trigger_callback('invoice_status', self.wallet, payment_hash.hex(), PR_INFLIGHT) util.trigger_callback('invoice_status', self.wallet, payment_hash.hex(), PR_INFLIGHT)
# 3. await a queue # 3. await a queue, collect resolved htlcs
htlc_log = await paysession.wait_for_one_htlc_to_resolve() # TODO maybe wait a bit, more failures might come htlc_log = await paysession.wait_for_one_htlc_to_resolve()
log.append(htlc_log) while True:
if htlc_log.success: log.append(htlc_log)
if self.network.path_finder: if htlc_log.success:
# TODO: report every route to liquidity hints for mpp if self.network.path_finder:
# in the case of success, we report channels of the # TODO: report every route to liquidity hints for mpp
# route as being able to send the same amount in the future, # in the case of success, we report channels of the
# as we assume to not know the capacity # route as being able to send the same amount in the future,
self.network.path_finder.update_liquidity_hints(htlc_log.route, htlc_log.amount_msat) # as we assume to not know the capacity
# remove inflight htlcs from liquidity hints self.network.path_finder.update_liquidity_hints(htlc_log.route, htlc_log.amount_msat)
self.network.path_finder.update_inflight_htlcs(htlc_log.route, add_htlcs=False) # remove inflight htlcs from liquidity hints
return self.network.path_finder.update_inflight_htlcs(htlc_log.route, add_htlcs=False)
# htlc failed return
# if we get a tmp channel failure, it might work to split the amount and try more routes # htlc failed
# if we get a channel update, we might retry the same route and amount # if we get a tmp channel failure, it might work to split the amount and try more routes
route = htlc_log.route # if we get a channel update, we might retry the same route and amount
sender_idx = htlc_log.sender_idx route = htlc_log.route
failure_msg = htlc_log.failure_msg sender_idx = htlc_log.sender_idx
if sender_idx is None: failure_msg = htlc_log.failure_msg
raise PaymentFailure(failure_msg.code_name()) if sender_idx is None:
erring_node_id = route[sender_idx].node_id raise PaymentFailure(failure_msg.code_name())
code, data = failure_msg.code, failure_msg.data erring_node_id = route[sender_idx].node_id
self.logger.info(f"UPDATE_FAIL_HTLC. code={repr(code)}. " code, data = failure_msg.code, failure_msg.data
f"decoded_data={failure_msg.decode_data()}. data={data.hex()!r}") self.logger.info(f"UPDATE_FAIL_HTLC. code={repr(code)}. "
self.logger.info(f"error reported by {erring_node_id.hex()}") f"decoded_data={failure_msg.decode_data()}. data={data.hex()!r}")
if code == OnionFailureCode.MPP_TIMEOUT: self.logger.info(f"error reported by {erring_node_id.hex()}")
raise PaymentFailure(failure_msg.code_name()) if code == OnionFailureCode.MPP_TIMEOUT:
# errors returned by the next trampoline. raise PaymentFailure(failure_msg.code_name())
if fwd_trampoline_onion and code in [ # errors returned by the next trampoline.
OnionFailureCode.TRAMPOLINE_FEE_INSUFFICIENT, if fwd_trampoline_onion and code in [
OnionFailureCode.TRAMPOLINE_EXPIRY_TOO_SOON]: OnionFailureCode.TRAMPOLINE_FEE_INSUFFICIENT,
raise failure_msg OnionFailureCode.TRAMPOLINE_EXPIRY_TOO_SOON]:
# trampoline raise failure_msg
if self.uses_trampoline(): # trampoline
paysession.handle_failed_trampoline_htlc( if self.uses_trampoline():
htlc_log=htlc_log, failure_msg=failure_msg) paysession.handle_failed_trampoline_htlc(
else: htlc_log=htlc_log, failure_msg=failure_msg)
self.handle_error_code_from_failed_htlc( else:
route=route, sender_idx=sender_idx, failure_msg=failure_msg, amount=htlc_log.amount_msat) self.handle_error_code_from_failed_htlc(
route=route, sender_idx=sender_idx, failure_msg=failure_msg, amount=htlc_log.amount_msat)
if paysession.number_htlcs_inflight < 1:
break
# wait a bit, more failures might come
try:
htlc_log = await util.wait_for2(
paysession.wait_for_one_htlc_to_resolve(),
timeout=paysession.TIMEOUT_WAIT_FOR_NEXT_RESOLVED_HTLC)
except asyncio.TimeoutError:
break
# max attempts or timeout # max attempts or timeout
if (attempts is not None and len(log) >= attempts) or (attempts is None and time.time() - paysession.start_time > self.PAYMENT_TIMEOUT): if (attempts is not None and len(log) >= attempts) or (attempts is None and time.time() - paysession.start_time > self.PAYMENT_TIMEOUT):
raise PaymentFailure('Giving up after %d attempts'%len(log)) raise PaymentFailure('Giving up after %d attempts'%len(log))