diff --git a/doc/lightningd-config.5.md b/doc/lightningd-config.5.md index 631dd35f48a1..73d827c0eb53 100644 --- a/doc/lightningd-config.5.md +++ b/doc/lightningd-config.5.md @@ -553,6 +553,10 @@ command, so they invoices can also be paid onchain. Setting this makes `xpay` wait until all parts have failed/succeeded before returning. Usually this is unnecessary, as xpay will return on the first success (we have the preimage, if they don't take all the parts that's their problem) or failure (the destination could succeed another part, but it would mean it was only partially paid). The default is `false`. +* **askrene-timeout**=*SECONDS* [plugin `askrene`, *dynamic*] + + This option makes the `getroutes` call fail if it takes more than this many seconds. Setting it to zero is a fun way to ensure your node never makes payments. + ### Networking options Note that for simple setups, the implicit *autolisten* option does the diff --git a/plugins/askrene/askrene.c b/plugins/askrene/askrene.c index 6e9b35a4c953..8aebd167430b 100644 --- a/plugins/askrene/askrene.c +++ b/plugins/askrene/askrene.c @@ -614,13 +614,15 @@ static struct command_result *do_getroutes(struct command *cmd, /* Compute the routes. At this point we might select between multiple * algorithms. Right now there is only one algorithm available. */ struct timemono time_start = time_mono(); + struct timemono deadline = timemono_add(time_start, + time_from_sec(askrene->route_seconds)); if (info->dev_algo == ALGO_SINGLE_PATH) { - err = single_path_routes(rq, rq, srcnode, dstnode, info->amount, + err = single_path_routes(rq, rq, deadline, srcnode, dstnode, info->amount, info->maxfee, info->finalcltv, info->maxdelay, &flows, &probability); } else { assert(info->dev_algo == ALGO_DEFAULT); - err = default_routes(rq, rq, srcnode, dstnode, info->amount, + err = default_routes(rq, rq, deadline, srcnode, dstnode, info->amount, info->maxfee, info->finalcltv, info->maxdelay, &flows, &probability); } @@ -1295,7 +1297,8 @@ static const char *init(struct command *init_cmd, const char *buf UNUSED, const jsmntok_t *config UNUSED) { struct plugin *plugin = init_cmd->plugin; - struct askrene *askrene = tal(plugin, struct askrene); + struct askrene *askrene = get_askrene(plugin); + askrene->plugin = plugin; list_head_init(&askrene->layers); askrene->reserved = new_reserve_htable(askrene); @@ -1320,7 +1323,18 @@ static const char *init(struct command *init_cmd, int main(int argc, char *argv[]) { + struct askrene *askrene; setup_locale(); - plugin_main(argv, init, NULL, PLUGIN_RESTARTABLE, true, NULL, commands, ARRAY_SIZE(commands), - NULL, 0, NULL, 0, NULL, 0, NULL); + + askrene = tal(NULL, struct askrene); + askrene->route_seconds = 10; + plugin_main(argv, init, take(askrene), PLUGIN_RESTARTABLE, true, NULL, commands, ARRAY_SIZE(commands), + NULL, 0, NULL, 0, NULL, 0, + plugin_option_dynamic("askrene-timeout", + "int", + "How many seconds to try before giving up on calculating a route." + " Defaults to 10 seconds", + u32_option, u32_jsonfmt, + &askrene->route_seconds), + NULL); } diff --git a/plugins/askrene/askrene.h b/plugins/askrene/askrene.h index 21805e64ff5b..a9a809737329 100644 --- a/plugins/askrene/askrene.h +++ b/plugins/askrene/askrene.h @@ -34,6 +34,8 @@ struct askrene { struct node_id my_id; /* Aux command for layer */ struct command *layer_cmd; + /* How long before we abort trying to find a route? */ + u32 route_seconds; }; /* Information for a single route query. */ diff --git a/plugins/askrene/mcf.c b/plugins/askrene/mcf.c index 8d69511eb6b6..e41bfb05730e 100644 --- a/plugins/askrene/mcf.c +++ b/plugins/askrene/mcf.c @@ -1346,6 +1346,7 @@ static bool check_htlc_max_limits(struct route_query *rq, struct flow **flows) */ static const char * linear_routes(const tal_t *ctx, struct route_query *rq, + struct timemono deadline, const struct gossmap_node *srcnode, const struct gossmap_node *dstnode, struct amount_msat amount, struct amount_msat maxfee, u32 finalcltv, u32 maxdelay, @@ -1377,6 +1378,14 @@ linear_routes(const tal_t *ctx, struct route_query *rq, while (!amount_msat_is_zero(amount_to_deliver)) { size_t num_parts, parts_slots, excess_parts; + u32 bottleneck_idx; + + if (timemono_after(time_mono(), deadline)) { + error_message = rq_log(ctx, rq, LOG_BROKEN, + "%s: timed out after deadline", + __func__); + goto fail; + } /* FIXME: This algorithm to limit the number of parts is dumb * for two reasons: @@ -1424,7 +1433,7 @@ linear_routes(const tal_t *ctx, struct route_query *rq, } error_message = - refine_flows(ctx, rq, amount_to_deliver, &new_flows); + refine_flows(ctx, rq, amount_to_deliver, &new_flows, &bottleneck_idx); if (error_message) goto fail; @@ -1459,14 +1468,19 @@ linear_routes(const tal_t *ctx, struct route_query *rq, excess_parts = 1; } else excess_parts = 0; - if (excess_parts > 0 && - !remove_flows(&new_flows, excess_parts)) { - error_message = rq_log(ctx, rq, LOG_BROKEN, - "%s: failed to remove %zu" - " flows from a set of %zu", - __func__, excess_parts, - tal_count(new_flows)); - goto fail; + if (excess_parts > 0) { + /* If we removed all the flows we found, avoid selecting them again, + * by disabling one. */ + if (excess_parts == tal_count(new_flows)) + bitmap_set_bit(rq->disabled_chans, bottleneck_idx); + if (!remove_flows(&new_flows, excess_parts)) { + error_message = rq_log(ctx, rq, LOG_BROKEN, + "%s: failed to remove %zu" + " flows from a set of %zu", + __func__, excess_parts, + tal_count(new_flows)); + goto fail; + } } /* Is this set of flows too expensive? @@ -1635,17 +1649,19 @@ linear_routes(const tal_t *ctx, struct route_query *rq, } const char *default_routes(const tal_t *ctx, struct route_query *rq, + struct timemono deadline, const struct gossmap_node *srcnode, const struct gossmap_node *dstnode, struct amount_msat amount, struct amount_msat maxfee, u32 finalcltv, u32 maxdelay, struct flow ***flows, double *probability) { - return linear_routes(ctx, rq, srcnode, dstnode, amount, maxfee, + return linear_routes(ctx, rq, deadline, srcnode, dstnode, amount, maxfee, finalcltv, maxdelay, flows, probability, minflow); } const char *single_path_routes(const tal_t *ctx, struct route_query *rq, + struct timemono deadline, const struct gossmap_node *srcnode, const struct gossmap_node *dstnode, struct amount_msat amount, @@ -1653,7 +1669,7 @@ const char *single_path_routes(const tal_t *ctx, struct route_query *rq, u32 maxdelay, struct flow ***flows, double *probability) { - return linear_routes(ctx, rq, srcnode, dstnode, amount, maxfee, + return linear_routes(ctx, rq, deadline, srcnode, dstnode, amount, maxfee, finalcltv, maxdelay, flows, probability, single_path_flow); } diff --git a/plugins/askrene/mcf.h b/plugins/askrene/mcf.h index 448aee27a40c..7d60159063d6 100644 --- a/plugins/askrene/mcf.h +++ b/plugins/askrene/mcf.h @@ -64,6 +64,7 @@ struct amount_msat linear_flow_cost(const struct flow *flow, /* A wrapper to the min. cost flow solver that actually takes into consideration * the extra msats per channel needed to pay for fees. */ const char *default_routes(const tal_t *ctx, struct route_query *rq, + struct timemono deadline, const struct gossmap_node *srcnode, const struct gossmap_node *dstnode, struct amount_msat amount, @@ -73,6 +74,7 @@ const char *default_routes(const tal_t *ctx, struct route_query *rq, /* A wrapper to the single-path constrained solver. */ const char *single_path_routes(const tal_t *ctx, struct route_query *rq, + struct timemono deadline, const struct gossmap_node *srcnode, const struct gossmap_node *dstnode, struct amount_msat amount, diff --git a/plugins/askrene/refine.c b/plugins/askrene/refine.c index 5335a9dfaac9..00ee4f75ced9 100644 --- a/plugins/askrene/refine.c +++ b/plugins/askrene/refine.c @@ -235,16 +235,25 @@ static int revcmp_flows(const size_t *a, const size_t *b, struct flow **flows) // -> check that htlc_max are all satisfied // -> check that (x+1) at least one htlc_max is violated /* Given the channel constraints, return the maximum amount that can be - * delivered. */ -static struct amount_msat path_max_deliverable(struct channel_data *path) + * delivered. Sets *bottleneck_idx to one of the contraining channels' idx, if non-NULL */ +static struct amount_msat path_max_deliverable(struct channel_data *path, + u32 *bottleneck_idx) { struct amount_msat deliver = AMOUNT_MSAT(-1); for (size_t i = 0; i < tal_count(path); i++) { deliver = amount_msat_sub_fee(deliver, path[i].fee_base_msat, path[i].fee_proportional_millionths); - deliver = amount_msat_min(deliver, path[i].htlc_max); - deliver = amount_msat_min(deliver, path[i].liquidity_max); + if (amount_msat_greater(deliver, path[i].htlc_max)) { + if (bottleneck_idx) + *bottleneck_idx = path[i].idx; + deliver = path[i].htlc_max; + } + if (amount_msat_greater(deliver, path[i].liquidity_max)) { + if (bottleneck_idx) + *bottleneck_idx = path[i].idx; + deliver = path[i].liquidity_max; + } } return deliver; } @@ -477,9 +486,9 @@ static void write_selected_flows(const tal_t *ctx, size_t *flows_index, tal_free(tmp_flows); } -/* FIXME: on failure return error message */ const char *refine_flows(const tal_t *ctx, struct route_query *rq, - struct amount_msat deliver, struct flow ***flows) + struct amount_msat deliver, struct flow ***flows, + u32 *bottleneck_idx) { const tal_t *working_ctx = tal(ctx, tal_t); const char *error_message = NULL; @@ -499,7 +508,7 @@ const char *refine_flows(const tal_t *ctx, struct route_query *rq, for (size_t i = 0; i < tal_count(channel_mpp_cache); i++) { // FIXME: does path_max_deliverable work for a single // channel with 0 fees? - max_deliverable[i] = path_max_deliverable(channel_mpp_cache[i]); + max_deliverable[i] = path_max_deliverable(channel_mpp_cache[i], bottleneck_idx); min_deliverable[i] = path_min_deliverable(channel_mpp_cache[i]); /* We use an array of indexes to keep track of the order * of the flows. Likewise flows can be removed by simply @@ -578,7 +587,7 @@ void squash_flows(const tal_t *ctx, struct route_query *rq, struct short_channel_id_dir scidd; flows_index[i] = i; paths_str[i] = tal_strdup(working_ctx, ""); - max_deliverable[i] = path_max_deliverable(channel_mpp_cache[i]); + max_deliverable[i] = path_max_deliverable(channel_mpp_cache[i], NULL); for (size_t j = 0; j < tal_count(flow->path); j++) { scidd.scid = diff --git a/plugins/askrene/refine.h b/plugins/askrene/refine.h index c0d60109ae8d..fcd64be21be6 100644 --- a/plugins/askrene/refine.h +++ b/plugins/askrene/refine.h @@ -22,9 +22,13 @@ bool create_flow_reservations_verify(const struct route_query *rq, const struct flow *flow); /* Modify flows to meet HTLC min/max requirements. - * It takes into account the exact value of the fees expected at each hop. */ + * It takes into account the exact value of the fees expected at each hop. + * If we reduce flows because it's too large for one channel, *bottleneck_idx + * is set to the idx of a channel which caused a reduction (if non-NULL). + */ const char *refine_flows(const tal_t *ctx, struct route_query *rq, - struct amount_msat deliver, struct flow ***flows); + struct amount_msat deliver, struct flow ***flows, + u32 *bottleneck_idx); /* Duplicated flows are merged into one. This saves in base fee and HTLC fees. */ diff --git a/tests/test_askrene.py b/tests/test_askrene.py index b54b3776a63e..727083e69f20 100644 --- a/tests/test_askrene.py +++ b/tests/test_askrene.py @@ -4,7 +4,7 @@ from pyln.testing.utils import SLOW_MACHINE from utils import ( only_one, first_scid, GenChannel, generate_gossip_store, - sync_blockheight, wait_for, TEST_NETWORK, TIMEOUT + sync_blockheight, wait_for, TEST_NETWORK, TIMEOUT, mine_funding_to_announce ) import os import pytest @@ -1185,7 +1185,9 @@ def test_real_data(node_factory, bitcoind): l1, l2 = node_factory.line_graph(2, fundamount=AMOUNT, opts=[{'gossip_store_file': outfile.name, 'allow_warning': True, - 'dev-throttle-gossip': None}, + 'dev-throttle-gossip': None, + # This can be slow! + 'askrene-timeout': TIMEOUT}, {'allow_warning': True}]) # These were obviously having a bad day at the time of the snapshot: @@ -1536,3 +1538,73 @@ def test_simple_dummy_channel(node_factory): final_cltv=5, layers=["mylayer"], ) + + +def test_maxparts_infloop(node_factory, bitcoind): + # Three paths from l1 -> l5. + # FIXME: enhance explain_failure! + l1, l2, l3, l4, l5 = node_factory.get_nodes(5, opts=[{'broken_log': 'plugin-cln-askrene.*the obvious route'}] + [{}] * 4) + + for intermediate in (l2, l3, l4): + node_factory.join_nodes([l1, intermediate, l5]) + + # We create exorbitant fees into l3. + for n in (l2, l3, l4): + n.rpc.setchannel(l5.info['id'], feeppm=100000) + + mine_funding_to_announce(bitcoind, (l1, l2, l3, l4, l5)) + wait_for(lambda: len(l1.rpc.listchannels()['channels']) == 12) + + amount = 1_400_000_000 + # You can do this one + route = l1.rpc.getroutes(source=l1.info['id'], + destination=l5.info['id'], + amount_msat=amount, + layers=[], + maxfee_msat=amount, + final_cltv=5) + assert len(route['routes']) == 3 + + # Now with maxparts == 2. Usually askrene can't figure out why it failed, + # but sometimes it gets a theory. + with pytest.raises(RpcError): + l1.rpc.getroutes(source=l1.info['id'], + destination=l5.info['id'], + amount_msat=amount, + layers=[], + maxfee_msat=amount, + final_cltv=5, + maxparts=2) + + +def test_askrene_timeout(node_factory, bitcoind): + """Test askrene's route timeout""" + l1, l2 = node_factory.line_graph(2, opts=[{'broken_log': 'linear_routes: timed out after deadline'}, {}]) + + assert l1.rpc.listconfigs('askrene-timeout')['configs']['askrene-timeout']['value_int'] == 10 + l1.rpc.getroutes(source=l1.info['id'], + destination=l2.info['id'], + amount_msat=1, + layers=['auto.localchans'], + maxfee_msat=1, + final_cltv=5) + + # It will exit instantly. + l1.rpc.setconfig('askrene-timeout', 0) + + with pytest.raises(RpcError, match='linear_routes: timed out after deadline'): + l1.rpc.getroutes(source=l1.info['id'], + destination=l2.info['id'], + amount_msat=1, + layers=['auto.localchans'], + maxfee_msat=1, + final_cltv=5) + + # We can put it back though. + l1.rpc.setconfig('askrene-timeout', 10) + l1.rpc.getroutes(source=l1.info['id'], + destination=l2.info['id'], + amount_msat=1, + layers=['auto.localchans'], + maxfee_msat=1, + final_cltv=5)