pacemaker 2.1.8-2.1.8
Scalable High-Availability cluster resource manager
Loading...
Searching...
No Matches
unpack.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2024 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#include <stdio.h>
13#include <string.h>
14#include <glib.h>
15#include <time.h>
16
17#include <crm/crm.h>
18#include <crm/services.h>
19#include <crm/common/xml.h>
21
22#include <crm/common/util.h>
23#include <crm/pengine/rules.h>
25#include <pe_status_private.h>
26
28
29// A (parsed) resource action history entry
30struct action_history {
31 pcmk_resource_t *rsc; // Resource that history is for
32 pcmk_node_t *node; // Node that history is for
33 xmlNode *xml; // History entry XML
34
35 // Parsed from entry XML
36 const char *id; // XML ID of history entry
37 const char *key; // Operation key of action
38 const char *task; // Action name
39 const char *exit_reason; // Exit reason given for result
40 guint interval_ms; // Action interval
41 int call_id; // Call ID of action
42 int expected_exit_status; // Expected exit status of action
43 int exit_status; // Actual exit status of action
44 int execution_status; // Execution status of action
45};
46
47/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48 * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49 * flag is stringified more readably in log messages.
50 */
51#define set_config_flag(scheduler, option, flag) do { \
52 GHashTable *config_hash = (scheduler)->config_hash; \
53 const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
54 \
55 if (scf_value != NULL) { \
56 if (crm_is_true(scf_value)) { \
57 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
58 LOG_TRACE, "Scheduler", \
59 crm_system_name, (scheduler)->flags, \
60 (flag), #flag); \
61 } else { \
62 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63 LOG_TRACE, "Scheduler", \
64 crm_system_name, (scheduler)->flags, \
65 (flag), #flag); \
66 } \
67 } \
68 } while(0)
69
70static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71 xmlNode *xml_op, xmlNode **last_failure,
72 enum action_fail_response *failed);
73static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74 pcmk_node_t *this_node);
75static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76 bool overwrite, pcmk_scheduler_t *scheduler);
77static void determine_online_status(const xmlNode *node_state,
78 pcmk_node_t *this_node,
80
81static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
83
84
85static gboolean
86is_dangling_guest_node(pcmk_node_t *node)
87{
88 /* we are looking for a remote-node that was supposed to be mapped to a
89 * container resource, but all traces of that container have disappeared
90 * from both the config and the status section. */
91 if (pcmk__is_pacemaker_remote_node(node)
92 && (node->details->remote_rsc != NULL)
93 && (node->details->remote_rsc->container == NULL)
96 return TRUE;
97 }
98
99 return FALSE;
100}
101
111void
113 const char *reason, bool priority_delay)
114{
115 CRM_CHECK(node, return);
116
117 /* A guest node is fenced by marking its container as failed */
118 if (pcmk__is_guest_or_bundle_node(node)) {
120
121 if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
122 if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
123 crm_notice("Not fencing guest node %s "
124 "(otherwise would because %s): "
125 "its guest resource %s is unmanaged",
126 pcmk__node_name(node), reason, rsc->id);
127 } else {
128 pcmk__sched_warn("Guest node %s will be fenced "
129 "(by recovering its guest resource %s): %s",
130 pcmk__node_name(node), rsc->id, reason);
131
132 /* We don't mark the node as unclean because that would prevent the
133 * node from running resources. We want to allow it to run resources
134 * in this transition if the recovery succeeds.
135 */
136 node->details->remote_requires_reset = TRUE;
139 }
140 }
141
142 } else if (is_dangling_guest_node(node)) {
143 crm_info("Cleaning up dangling connection for guest node %s: "
144 "fencing was already done because %s, "
145 "and guest resource no longer exists",
146 pcmk__node_name(node), reason);
149
150 } else if (pcmk__is_remote_node(node)) {
151 pcmk_resource_t *rsc = node->details->remote_rsc;
152
153 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
154 crm_notice("Not fencing remote node %s "
155 "(otherwise would because %s): connection is unmanaged",
156 pcmk__node_name(node), reason);
157 } else if(node->details->remote_requires_reset == FALSE) {
158 node->details->remote_requires_reset = TRUE;
159 pcmk__sched_warn("Remote node %s %s: %s",
160 pcmk__node_name(node),
161 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
162 reason);
163 }
164 node->details->unclean = TRUE;
165 // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
166 pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
167
168 } else if (node->details->unclean) {
169 crm_trace("Cluster node %s %s because %s",
170 pcmk__node_name(node),
171 pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
172 reason);
173
174 } else {
175 pcmk__sched_warn("Cluster node %s %s: %s",
176 pcmk__node_name(node),
177 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
178 reason);
179 node->details->unclean = TRUE;
180 pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
181 }
182}
183
184// @TODO xpaths can't handle templates, rules, or id-refs
185
186// nvpair with provides or requires set to unfencing
187#define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
188 "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \
189 "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
190 "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
191
192// unfencing in rsc_defaults or any resource
193#define XPATH_ENABLE_UNFENCING \
194 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
195 "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
196 "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
197 "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
198
199static void
200set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
201{
202 xmlXPathObjectPtr result = NULL;
203
204 if (!pcmk_is_set(scheduler->flags, flag)) {
206 if (result && (numXpathResults(result) > 0)) {
208 }
210 }
211}
212
213gboolean
215{
216 const char *value = NULL;
217 guint interval_ms = 0U;
218 GHashTable *config_hash = pcmk__strkey_table(free, free);
219
220 pe_rule_eval_data_t rule_data = {
221 .node_hash = NULL,
222 .now = scheduler->now,
223 .match_data = NULL,
224 .rsc_data = NULL,
225 .op_data = NULL
226 };
227
228 scheduler->config_hash = config_hash;
229
232 FALSE, scheduler);
233
235
239 crm_info("Startup probes: disabled (dangerous)");
240 }
241
242 value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
243 if (value && crm_is_true(value)) {
244 crm_info("Watchdog-based self-fencing will be performed via SBD if "
245 "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
246 " is nonzero");
248 }
249
250 /* Set certain flags via xpath here, so they can be used before the relevant
251 * configuration sections are unpacked.
252 */
254 scheduler);
255
257 pcmk_parse_interval_spec(value, &interval_ms);
258
259 if (interval_ms >= INT_MAX) {
260 scheduler->stonith_timeout = INT_MAX;
261 } else {
262 scheduler->stonith_timeout = (int) interval_ms;
263 }
264 crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
265
269 crm_debug("STONITH of failed nodes is enabled");
270 } else {
271 crm_debug("STONITH of failed nodes is disabled");
272 }
273
278 "Support for " PCMK_OPT_STONITH_ACTION " of "
279 "'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
280 "removed in a future release "
281 "(use '" PCMK_ACTION_OFF "' instead)");
283 }
284 crm_trace("STONITH will %s nodes", scheduler->stonith_action);
285
289 crm_debug("Concurrent fencing is enabled");
290 } else {
291 crm_debug("Concurrent fencing is disabled");
292 }
293
295 if (value) {
296 pcmk_parse_interval_spec(value, &interval_ms);
297 scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
298 crm_trace("Priority fencing delay is %ds",
300 }
301
304 crm_debug("Stop all active resources: %s",
305 pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all));
306
310 crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
311 }
312
314
315 if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
317
318 } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
320
321 } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
323
324 } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) {
326 int do_panic = 0;
327
329 &do_panic);
330 if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
332 } else {
334 " to 'stop': cluster has never had quorum");
336 }
337 } else {
339 " to 'stop' because fencing is disabled");
341 }
342
343 } else {
345 }
346
347 switch (scheduler->no_quorum_policy) {
349 crm_debug("On loss of quorum: Freeze resources");
350 break;
352 crm_debug("On loss of quorum: Stop ALL resources");
353 break;
355 crm_debug("On loss of quorum: "
356 "Demote promotable resources and stop other resources");
357 break;
359 crm_notice("On loss of quorum: Fence all remaining nodes");
360 break;
362 crm_notice("On loss of quorum: Ignore");
363 break;
364 }
365
369 crm_trace("Orphan resources are stopped");
370 } else {
371 crm_trace("Orphan resources are ignored");
372 }
373
377 crm_trace("Orphan resource actions are stopped");
378 } else {
379 crm_trace("Orphan resource actions are ignored");
380 }
381
383 if (value != NULL) {
384 if (crm_is_true(value)) {
387 "Support for the " PCMK__OPT_REMOVE_AFTER_STOP
388 " cluster property is deprecated and will be "
389 "removed in a future release");
390 } else {
393 }
394 }
395
398 crm_trace("Maintenance mode: %s",
399 pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance));
400
404 crm_trace("Start failures are always fatal");
405 } else {
406 crm_trace("Start failures are handled by failcount");
407 }
408
412 }
414 crm_trace("Unseen nodes will be fenced");
415 } else {
417 "Blind faith: not fencing unseen nodes");
418 }
419
421
424 crm_trace("Placement strategy: %s", scheduler->placement_strategy);
425
431 scheduler->shutdown_lock /= 1000;
432 crm_trace("Resources will be locked to nodes that were cleanly "
433 "shut down (locks expire after %s)",
435 } else {
436 crm_trace("Resources will not be locked to nodes that were cleanly "
437 "shut down");
438 }
439
444 crm_trace("Do not fence pending nodes");
445 } else {
446 crm_trace("Fence pending nodes after %s",
448 * 1000));
449 }
450
451 return TRUE;
452}
453
455pe_create_node(const char *id, const char *uname, const char *type,
456 const char *score, pcmk_scheduler_t *scheduler)
457{
458 pcmk_node_t *new_node = NULL;
459
460 if (pcmk_find_node(scheduler, uname) != NULL) {
461 pcmk__config_warn("More than one node entry has name '%s'", uname);
462 }
463
464 new_node = calloc(1, sizeof(pcmk_node_t));
465 if (new_node == NULL) {
466 pcmk__sched_err("Could not allocate memory for node %s", uname);
467 return NULL;
468 }
469
470 new_node->weight = char2score(score);
471 new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
472
473 if (new_node->details == NULL) {
474 free(new_node);
475 pcmk__sched_err("Could not allocate memory for node %s", uname);
476 return NULL;
477 }
478
479 crm_trace("Creating node for entry %s/%s", uname, id);
480 new_node->details->id = id;
481 new_node->details->uname = uname;
482 new_node->details->online = FALSE;
483 new_node->details->shutdown = FALSE;
484 new_node->details->rsc_discovery_enabled = TRUE;
485 new_node->details->running_rsc = NULL;
486 new_node->details->data_set = scheduler;
487
488 if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
491
492 } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
495
496 } else {
497 /* @COMPAT 'ping' is the default for backward compatibility, but it
498 * should be changed to 'member' at a compatibility break
499 */
500 if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) {
501 pcmk__config_warn("Node %s has unrecognized type '%s', "
502 "assuming '" PCMK__VALUE_PING "'",
503 pcmk__s(uname, "without name"), type);
504 }
506 "Support for nodes of type '" PCMK__VALUE_PING "' "
507 "(such as %s) is deprecated and will be removed in a "
508 "future release",
509 pcmk__s(uname, "unnamed node"));
510 new_node->details->type = node_ping;
511 }
512
513 new_node->details->attrs = pcmk__strkey_table(free, free);
514
515 if (pcmk__is_pacemaker_remote_node(new_node)) {
516 pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "remote");
517 } else {
518 pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "cluster");
519 }
520
521 new_node->details->utilization = pcmk__strkey_table(free, free);
522 new_node->details->digest_cache = pcmk__strkey_table(free,
524
525 scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
527 return new_node;
528}
529
530static const char *
531expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
532{
533 xmlNode *attr_set = NULL;
534 xmlNode *attr = NULL;
535
536 const char *container_id = pcmk__xe_id(xml_obj);
537 const char *remote_name = NULL;
538 const char *remote_server = NULL;
539 const char *remote_port = NULL;
540 const char *connect_timeout = "60s";
541 const char *remote_allow_migrate=NULL;
542 const char *is_managed = NULL;
543
544 for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
545 attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
546
547 if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
548 continue;
549 }
550
551 for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
552 attr != NULL; attr = pcmk__xe_next(attr)) {
553
554 const char *value = crm_element_value(attr, PCMK_XA_VALUE);
555 const char *name = crm_element_value(attr, PCMK_XA_NAME);
556
557 if (name == NULL) { // Sanity
558 continue;
559 }
560
561 if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
562 remote_name = value;
563
564 } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
565 remote_server = value;
566
567 } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
568 remote_port = value;
569
570 } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
571 connect_timeout = value;
572
573 } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
574 remote_allow_migrate = value;
575
576 } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
577 is_managed = value;
578 }
579 }
580 }
581
582 if (remote_name == NULL) {
583 return NULL;
584 }
585
586 if (pe_find_resource(data->resources, remote_name) != NULL) {
587 return NULL;
588 }
589
590 pe_create_remote_xml(parent, remote_name, container_id,
591 remote_allow_migrate, is_managed,
592 connect_timeout, remote_server, remote_port);
593 return remote_name;
594}
595
596static void
597handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
598{
599 if ((new_node->details->type == pcmk_node_variant_remote)
600 && (new_node->details->remote_rsc == NULL)) {
601 /* Ignore fencing for remote nodes that don't have a connection resource
602 * associated with them. This happens when remote node entries get left
603 * in the nodes section after the connection resource is removed.
604 */
605 return;
606 }
607
609 // All nodes are unclean until we've seen their status entry
610 new_node->details->unclean = TRUE;
611
612 } else {
613 // Blind faith ...
614 new_node->details->unclean = FALSE;
615 }
616
617 /* We need to be able to determine if a node's status section
618 * exists or not separate from whether the node is unclean. */
619 new_node->details->unseen = TRUE;
620}
621
622gboolean
624{
625 xmlNode *xml_obj = NULL;
626 pcmk_node_t *new_node = NULL;
627 const char *id = NULL;
628 const char *uname = NULL;
629 const char *type = NULL;
630 const char *score = NULL;
631
632 for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
633 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
634
635 if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
636 new_node = NULL;
637
638 id = crm_element_value(xml_obj, PCMK_XA_ID);
641 score = crm_element_value(xml_obj, PCMK_XA_SCORE);
642 crm_trace("Processing node %s/%s", uname, id);
643
644 if (id == NULL) {
645 pcmk__config_err("Ignoring <" PCMK_XE_NODE
646 "> entry in configuration without id");
647 continue;
648 }
649 new_node = pe_create_node(id, uname, type, score, scheduler);
650
651 if (new_node == NULL) {
652 return FALSE;
653 }
654
655 handle_startup_fencing(scheduler, new_node);
656
657 add_node_attrs(xml_obj, new_node, FALSE, scheduler);
658
659 crm_trace("Done with node %s",
661 }
662 }
663
666 crm_info("Creating a fake local node");
668 scheduler);
669 }
670
671 return TRUE;
672}
673
674static void
675setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
676{
677 const char *container_id = NULL;
678
679 if (rsc->children) {
680 g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
681 return;
682 }
683
684 container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER);
685 if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
687 container_id);
688
689 if (container) {
690 rsc->container = container;
692 container->fillers = g_list_append(container->fillers, rsc);
693 pcmk__rsc_trace(rsc, "Resource %s's container is %s",
694 rsc->id, container_id);
695 } else {
696 pcmk__config_err("Resource %s: Unknown resource container (%s)",
697 rsc->id, container_id);
698 }
699 }
700}
701
702gboolean
704{
705 xmlNode *xml_obj = NULL;
706
707 /* Create remote nodes and guest nodes from the resource configuration
708 * before unpacking resources.
709 */
710 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
711 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
712
713 const char *new_node_id = NULL;
714
715 /* Check for remote nodes, which are defined by ocf:pacemaker:remote
716 * primitives.
717 */
718 if (xml_contains_remote_node(xml_obj)) {
719 new_node_id = pcmk__xe_id(xml_obj);
720 /* The pcmk_find_node() check ensures we don't iterate over an
721 * expanded node that has already been added to the node list
722 */
723 if (new_node_id
724 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
725 crm_trace("Found remote node %s defined by resource %s",
726 new_node_id, pcmk__xe_id(xml_obj));
727 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
728 NULL, scheduler);
729 }
730 continue;
731 }
732
733 /* Check for guest nodes, which are defined by special meta-attributes
734 * of a primitive of any type (for example, VirtualDomain or Xen).
735 */
736 if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
737 /* This will add an ocf:pacemaker:remote primitive to the
738 * configuration for the guest node's connection, to be unpacked
739 * later.
740 */
741 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
742 scheduler);
743 if (new_node_id
744 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
745 crm_trace("Found guest node %s in resource %s",
746 new_node_id, pcmk__xe_id(xml_obj));
747 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
748 NULL, scheduler);
749 }
750 continue;
751 }
752
753 /* Check for guest nodes inside a group. Clones are currently not
754 * supported as guest nodes.
755 */
756 if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
757 xmlNode *xml_obj2 = NULL;
758 for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
759 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
760
761 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
762 scheduler);
763
764 if (new_node_id
765 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
766 crm_trace("Found guest node %s in resource %s inside group %s",
767 new_node_id, pcmk__xe_id(xml_obj2),
768 pcmk__xe_id(xml_obj));
769 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
770 NULL, scheduler);
771 }
772 }
773 }
774 }
775 return TRUE;
776}
777
778/* Call this after all the nodes and resources have been
779 * unpacked, but before the status section is read.
780 *
781 * A remote node's online status is reflected by the state
782 * of the remote node's connection resource. We need to link
783 * the remote node to this connection resource so we can have
784 * easy access to the connection resource during the scheduler calculations.
785 */
786static void
787link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
788{
789 pcmk_node_t *remote_node = NULL;
790
791 if (new_rsc->is_remote_node == FALSE) {
792 return;
793 }
794
796 /* remote_nodes and remote_resources are not linked in quick location calculations */
797 return;
798 }
799
800 remote_node = pcmk_find_node(scheduler, new_rsc->id);
801 CRM_CHECK(remote_node != NULL, return);
802
803 pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
804 new_rsc->id, pcmk__node_name(remote_node));
805 remote_node->details->remote_rsc = new_rsc;
806
807 if (new_rsc->container == NULL) {
808 /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
809 * the same as is done for cluster nodes.
810 */
811 handle_startup_fencing(scheduler, remote_node);
812
813 } else {
814 /* pe_create_node() marks the new node as "remote" or "cluster"; now
815 * that we know the node is a guest node, update it correctly.
816 */
817 pcmk__insert_dup(remote_node->details->attrs,
818 CRM_ATTR_KIND, "container");
819 }
820}
821
822static void
823destroy_tag(gpointer data)
824{
825 pcmk_tag_t *tag = data;
826
827 if (tag) {
828 free(tag->id);
829 g_list_free_full(tag->refs, free);
830 free(tag);
831 }
832}
833
846gboolean
847unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
848{
849 xmlNode *xml_obj = NULL;
850 GList *gIter = NULL;
851
852 scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
853
854 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
855 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
856
857 pcmk_resource_t *new_rsc = NULL;
858 const char *id = pcmk__xe_id(xml_obj);
859
860 if (pcmk__str_empty(id)) {
861 pcmk__config_err("Ignoring <%s> resource without ID",
862 xml_obj->name);
863 continue;
864 }
865
866 if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
867 if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
868 NULL, NULL) == FALSE) {
869 /* Record the template's ID for the knowledge of its existence anyway. */
871 }
872 continue;
873 }
874
875 crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
876 if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
877 scheduler) == pcmk_rc_ok) {
878 scheduler->resources = g_list_append(scheduler->resources, new_rsc);
879 pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
880
881 } else {
882 pcmk__config_err("Ignoring <%s> resource '%s' "
883 "because configuration is invalid",
884 xml_obj->name, id);
885 }
886 }
887
888 for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
889 pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
890
891 setup_container(rsc, scheduler);
892 link_rsc2remotenode(scheduler, rsc);
893 }
894
895 scheduler->resources = g_list_sort(scheduler->resources,
898 /* Ignore */
899
902
903 pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
904 pcmk__config_err("Either configure some or disable STONITH with the "
905 PCMK_OPT_STONITH_ENABLED " option");
906 pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
907 }
908
909 return TRUE;
910}
911
921void
922pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
923{
924 xmlNode *xml_obj = NULL;
925 int id = 0;
926
927 for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL);
928 xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) {
929
931
932 // Ensure an ID was given
933 if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
934 pcmk__config_warn("Ignoring registration for topology level without ID");
935 continue;
936 }
937
938 // Ensure level ID is in allowed range
939 if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
940 pcmk__config_warn("Ignoring topology registration with invalid level %d",
941 id);
942 continue;
943 }
944
945 }
946}
947
948gboolean
950{
951 xmlNode *xml_tag = NULL;
952
953 scheduler->tags = pcmk__strkey_table(free, destroy_tag);
954
955 for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
956 xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
957
958 xmlNode *xml_obj_ref = NULL;
959 const char *tag_id = pcmk__xe_id(xml_tag);
960
961 if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
962 continue;
963 }
964
965 if (tag_id == NULL) {
966 pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
967 (const char *) xml_tag->name);
968 continue;
969 }
970
971 for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
972 xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
973
974 const char *obj_ref = pcmk__xe_id(xml_obj_ref);
975
976 if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
977 continue;
978 }
979
980 if (obj_ref == NULL) {
981 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
982 xml_obj_ref->name, tag_id);
983 continue;
984 }
985
986 if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
987 return FALSE;
988 }
989 }
990 }
991
992 return TRUE;
993}
994
995/* The ticket state section:
996 * "/cib/status/tickets/ticket_state" */
997static gboolean
998unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
999{
1000 const char *ticket_id = NULL;
1001 const char *granted = NULL;
1002 const char *last_granted = NULL;
1003 const char *standby = NULL;
1004 xmlAttrPtr xIter = NULL;
1005
1006 pcmk_ticket_t *ticket = NULL;
1007
1008 ticket_id = pcmk__xe_id(xml_ticket);
1009 if (pcmk__str_empty(ticket_id)) {
1010 return FALSE;
1011 }
1012
1013 crm_trace("Processing ticket state for %s", ticket_id);
1014
1015 ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
1016 if (ticket == NULL) {
1017 ticket = ticket_new(ticket_id, scheduler);
1018 if (ticket == NULL) {
1019 return FALSE;
1020 }
1021 }
1022
1023 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1024 const char *prop_name = (const char *)xIter->name;
1025 const char *prop_value = pcmk__xml_attr_value(xIter);
1026
1027 if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1028 continue;
1029 }
1030 pcmk__insert_dup(ticket->state, prop_name, prop_value);
1031 }
1032
1033 granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1034 if (granted && crm_is_true(granted)) {
1035 ticket->granted = TRUE;
1036 crm_info("We have ticket '%s'", ticket->id);
1037 } else {
1038 ticket->granted = FALSE;
1039 crm_info("We do not have ticket '%s'", ticket->id);
1040 }
1041
1042 last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1043 if (last_granted) {
1044 long long last_granted_ll;
1045
1046 pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1047 ticket->last_granted = (time_t) last_granted_ll;
1048 }
1049
1050 standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1051 if (standby && crm_is_true(standby)) {
1052 ticket->standby = TRUE;
1053 if (ticket->granted) {
1054 crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1055 }
1056 } else {
1057 ticket->standby = FALSE;
1058 }
1059
1060 crm_trace("Done with ticket state for %s", ticket_id);
1061
1062 return TRUE;
1063}
1064
1065static gboolean
1066unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
1067{
1068 xmlNode *xml_obj = NULL;
1069
1070 for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
1071 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1072
1073 if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
1074 continue;
1075 }
1076 unpack_ticket_state(xml_obj, scheduler);
1077 }
1078
1079 return TRUE;
1080}
1081
1082static void
1083unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1085{
1086 const char *discovery = NULL;
1087 const xmlNode *attrs = NULL;
1088 pcmk_resource_t *rsc = NULL;
1089
1090 if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1091 return;
1092 }
1093
1094 if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1095 return;
1096 }
1097 crm_trace("Processing Pacemaker Remote node %s",
1098 pcmk__node_name(this_node));
1099
1101 &(this_node->details->remote_maintenance), 0);
1102
1103 rsc = this_node->details->remote_rsc;
1104 if (this_node->details->remote_requires_reset == FALSE) {
1105 this_node->details->unclean = FALSE;
1106 this_node->details->unseen = FALSE;
1107 }
1109 NULL);
1110 add_node_attrs(attrs, this_node, TRUE, scheduler);
1111
1112 if (pe__shutdown_requested(this_node)) {
1113 crm_info("%s is shutting down", pcmk__node_name(this_node));
1114 this_node->details->shutdown = TRUE;
1115 }
1116
1119 crm_info("%s is in standby mode", pcmk__node_name(this_node));
1120 this_node->details->standby = TRUE;
1121 }
1122
1125 || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1126 crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1127 this_node->details->maintenance = TRUE;
1128 }
1129
1130 discovery = pcmk__node_attr(this_node,
1133 if ((discovery != NULL) && !crm_is_true(discovery)) {
1135 "Support for the "
1137 " node attribute is deprecated and will be removed"
1138 " (and behave as 'true') in a future release.");
1139
1140 if (pcmk__is_remote_node(this_node)
1142 pcmk__config_warn("Ignoring "
1144 " attribute on Pacemaker Remote node %s"
1145 " because fencing is disabled",
1146 pcmk__node_name(this_node));
1147 } else {
1148 /* This is either a remote node with fencing enabled, or a guest
1149 * node. We don't care whether fencing is enabled when fencing guest
1150 * nodes, because they are "fenced" by recovering their containing
1151 * resource.
1152 */
1153 crm_info("%s has resource discovery disabled",
1154 pcmk__node_name(this_node));
1155 this_node->details->rsc_discovery_enabled = FALSE;
1156 }
1157 }
1158}
1159
1168static void
1169unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1171{
1172 const char *discovery = NULL;
1173 const xmlNode *attrs = pcmk__xe_first_child(state,
1175 NULL, NULL);
1176
1177 add_node_attrs(attrs, node, TRUE, scheduler);
1178
1181 crm_info("%s is in standby mode", pcmk__node_name(node));
1182 node->details->standby = TRUE;
1183 }
1184
1187 crm_info("%s is in maintenance mode", pcmk__node_name(node));
1188 node->details->maintenance = TRUE;
1189 }
1190
1191 discovery = pcmk__node_attr(node,
1194 if ((discovery != NULL) && !crm_is_true(discovery)) {
1195 pcmk__config_warn("Ignoring "
1197 " attribute for %s because disabling resource"
1198 " discovery is not allowed for cluster nodes",
1199 pcmk__node_name(node));
1200 }
1201}
1202
1215static void
1216unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1217{
1218 const char *id = NULL;
1219 const char *uname = NULL;
1220 pcmk_node_t *this_node = NULL;
1221
1222 id = crm_element_value(state, PCMK_XA_ID);
1223 if (id == NULL) {
1224 pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1225 PCMK_XA_ID);
1226 crm_log_xml_info(state, "missing-id");
1227 return;
1228 }
1229
1231 if (uname == NULL) {
1232 /* If a joining peer makes the cluster acquire the quorum from corosync
1233 * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1234 * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have
1235 * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and
1236 * wait for it to join CPG.
1237 */
1238 crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1239 "without " PCMK_XA_UNAME,
1240 id);
1241 }
1242
1243 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1244 if (this_node == NULL) {
1245 crm_notice("Ignoring recorded state for removed node with name %s and "
1246 PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1247 return;
1248 }
1249
1250 if (pcmk__is_pacemaker_remote_node(this_node)) {
1251 /* We can't determine the online status of Pacemaker Remote nodes until
1252 * after all resource history has been unpacked. In this first pass, we
1253 * do need to mark whether the node has been fenced, as this plays a
1254 * role during unpacking cluster node resource state.
1255 */
1257 &(this_node->details->remote_was_fenced), 0);
1258 return;
1259 }
1260
1261 unpack_transient_attributes(state, this_node, scheduler);
1262
1263 /* Provisionally mark this cluster node as clean. We have at least seen it
1264 * in the current cluster's lifetime.
1265 */
1266 this_node->details->unclean = FALSE;
1267 this_node->details->unseen = FALSE;
1268
1269 crm_trace("Determining online status of cluster node %s (id %s)",
1270 pcmk__node_name(this_node), id);
1271 determine_online_status(state, this_node, scheduler);
1272
1274 && this_node->details->online
1276 /* Everything else should flow from this automatically
1277 * (at least until the scheduler becomes able to migrate off
1278 * healthy resources)
1279 */
1280 pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1281 FALSE);
1282 }
1283}
1284
1302static int
1303unpack_node_history(const xmlNode *status, bool fence,
1305{
1306 int rc = pcmk_rc_ok;
1307
1308 // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1309 for (const xmlNode *state = pcmk__xe_first_child(status,
1310 PCMK__XE_NODE_STATE, NULL,
1311 NULL);
1312 state != NULL; state = pcmk__xe_next_same(state)) {
1313
1314 const char *id = pcmk__xe_id(state);
1315 const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1316 pcmk_node_t *this_node = NULL;
1317
1318 if ((id == NULL) || (uname == NULL)) {
1319 // Warning already logged in first pass through status section
1320 crm_trace("Not unpacking resource history from malformed "
1321 PCMK__XE_NODE_STATE " without id and/or uname");
1322 continue;
1323 }
1324
1325 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1326 if (this_node == NULL) {
1327 // Warning already logged in first pass through status section
1328 crm_trace("Not unpacking resource history for node %s because "
1329 "no longer in configuration", id);
1330 continue;
1331 }
1332
1333 if (this_node->details->unpacked) {
1334 crm_trace("Not unpacking resource history for node %s because "
1335 "already unpacked", id);
1336 continue;
1337 }
1338
1339 if (fence) {
1340 // We're processing all remaining nodes
1341
1342 } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1343 /* We can unpack a guest node's history only after we've unpacked
1344 * other resource history to the point that we know that the node's
1345 * connection and containing resource are both up.
1346 */
1347 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1348
1349 if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1350 || (rsc->container->role != pcmk_role_started)) {
1351 crm_trace("Not unpacking resource history for guest node %s "
1352 "because container and connection are not known to "
1353 "be up", id);
1354 continue;
1355 }
1356
1357 } else if (pcmk__is_remote_node(this_node)) {
1358 /* We can unpack a remote node's history only after we've unpacked
1359 * other resource history to the point that we know that the node's
1360 * connection is up, with the exception of when shutdown locks are
1361 * in use.
1362 */
1363 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1364
1365 if ((rsc == NULL)
1367 && (rsc->role != pcmk_role_started))) {
1368 crm_trace("Not unpacking resource history for remote node %s "
1369 "because connection is not known to be up", id);
1370 continue;
1371 }
1372
1373 /* If fencing and shutdown locks are disabled and we're not processing
1374 * unseen nodes, then we don't want to unpack offline nodes until online
1375 * nodes have been unpacked. This allows us to number active clone
1376 * instances first.
1377 */
1378 } else if (!pcmk_any_flags_set(scheduler->flags,
1381 && !this_node->details->online) {
1382 crm_trace("Not unpacking resource history for offline "
1383 "cluster node %s", id);
1384 continue;
1385 }
1386
1387 if (pcmk__is_pacemaker_remote_node(this_node)) {
1388 determine_remote_online_status(scheduler, this_node);
1389 unpack_handle_remote_attrs(this_node, state, scheduler);
1390 }
1391
1392 crm_trace("Unpacking resource history for %snode %s",
1393 (fence? "unseen " : ""), id);
1394
1395 this_node->details->unpacked = TRUE;
1396 unpack_node_lrm(this_node, state, scheduler);
1397
1398 rc = EAGAIN; // Other node histories might depend on this one
1399 }
1400 return rc;
1401}
1402
1403/* remove nodes that are down, stopping */
1404/* create positive rsc_to_node constraints between resources and the nodes they are running on */
1405/* anything else? */
1406gboolean
1408{
1409 xmlNode *state = NULL;
1410
1411 crm_trace("Beginning unpack");
1412
1413 if (scheduler->tickets == NULL) {
1415 }
1416
1417 for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1418 state = pcmk__xe_next(state)) {
1419
1420 if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1421 unpack_tickets_state((xmlNode *) state, scheduler);
1422
1423 } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1424 unpack_node_state(state, scheduler);
1425 }
1426 }
1427
1428 while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1429 crm_trace("Another pass through node resource histories is needed");
1430 }
1431
1432 // Now catch any nodes we didn't see
1433 unpack_node_history(status,
1436 scheduler);
1437
1438 /* Now that we know where resources are, we can schedule stops of containers
1439 * with failed bundle connections
1440 */
1441 if (scheduler->stop_needed != NULL) {
1442 for (GList *item = scheduler->stop_needed; item; item = item->next) {
1443 pcmk_resource_t *container = item->data;
1444 pcmk_node_t *node = pcmk__current_node(container);
1445
1446 if (node) {
1447 stop_action(container, node, FALSE);
1448 }
1449 }
1450 g_list_free(scheduler->stop_needed);
1451 scheduler->stop_needed = NULL;
1452 }
1453
1454 /* Now that we know status of all Pacemaker Remote connections and nodes,
1455 * we can stop connections for node shutdowns, and check the online status
1456 * of remote/guest nodes that didn't have any node history to unpack.
1457 */
1458 for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1459 pcmk_node_t *this_node = gIter->data;
1460
1461 if (!pcmk__is_pacemaker_remote_node(this_node)) {
1462 continue;
1463 }
1464 if (this_node->details->shutdown
1465 && (this_node->details->remote_rsc != NULL)) {
1467 "remote shutdown");
1468 }
1469 if (!this_node->details->unpacked) {
1470 determine_remote_online_status(scheduler, this_node);
1471 }
1472 }
1473
1474 return TRUE;
1475}
1476
1488static long long
1489unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1490{
1491 const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1492 int member = 0;
1493
1494 if (member_time == NULL) {
1495 return -1LL;
1496
1497 } else if (crm_str_to_boolean(member_time, &member) == 1) {
1498 /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1499 * recorded as a boolean for a DC < 2.1.7, or the node is pending
1500 * shutdown and has left the CPG, in which case it was set to 1 to avoid
1501 * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1502 *
1503 * We return the effective time for in_ccm=1 because what's important to
1504 * avoid fencing is that effective time minus this value is less than
1505 * the pending node timeout.
1506 */
1507 return member? (long long) get_effective_time(scheduler) : 0LL;
1508
1509 } else {
1510 long long when_member = 0LL;
1511
1512 if ((pcmk__scan_ll(member_time, &when_member,
1513 0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1514 crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1515 " in " PCMK__XE_NODE_STATE " entry", member_time);
1516 return -1LL;
1517 }
1518 return when_member;
1519 }
1520}
1521
1531static long long
1532unpack_node_online(const xmlNode *node_state)
1533{
1534 const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1535
1536 // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1537 if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1539 return 0LL;
1540
1541 } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1542 return 1LL;
1543
1544 } else {
1545 long long when_online = 0LL;
1546
1547 if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1548 || (when_online < 0)) {
1549 crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1550 PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1551 return 0LL;
1552 }
1553 return when_online;
1554 }
1555}
1556
1566static bool
1567unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1568{
1569 long long value = 0LL;
1570 int value_i = 0;
1571 const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1573
1574 // Value may be boolean or an epoch time
1575 if (crm_str_to_boolean(value_s, &value_i) == 1) {
1576 return (value_i != 0);
1577 }
1578 if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
1579 return (value > 0);
1580 }
1581 crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1582 "node attribute for %s", value_s, pcmk__node_name(node));
1583 return false;
1584}
1585
1586static gboolean
1587determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1588 const xmlNode *node_state,
1589 pcmk_node_t *this_node)
1590{
1591 gboolean online = FALSE;
1592 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1593 const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1594 long long when_member = unpack_node_member(node_state, scheduler);
1595 long long when_online = unpack_node_online(node_state);
1596
1597 if (when_member <= 0) {
1598 crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1599 ((when_member < 0)? "presumed " : ""));
1600
1601 } else if (when_online > 0) {
1602 if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1603 online = TRUE;
1604 } else {
1605 crm_debug("Node %s is not ready to run resources: %s",
1606 pcmk__node_name(this_node), join);
1607 }
1608
1609 } else if (this_node->details->expected_up == FALSE) {
1610 crm_trace("Node %s controller is down: "
1611 "member@%lld online@%lld join=%s expected=%s",
1612 pcmk__node_name(this_node), when_member, when_online,
1613 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1614
1615 } else {
1616 /* mark it unclean */
1617 pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1618 crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1619 pcmk__node_name(this_node), when_member, when_online,
1620 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1621 }
1622 return online;
1623}
1624
1638static inline bool
1639pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1640 long long when_member, long long when_online)
1641{
1643 && (when_member > 0) && (when_online <= 0)) {
1644 // There is a timeout on pending nodes, and node is pending
1645
1646 time_t timeout = when_member + scheduler->node_pending_timeout;
1647
1648 if (get_effective_time(node->details->data_set) >= timeout) {
1649 return true; // Node has timed out
1650 }
1651
1652 // Node is pending, but still has time
1653 pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1654 }
1655 return false;
1656}
1657
1658static bool
1659determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1660 const xmlNode *node_state,
1661 pcmk_node_t *this_node)
1662{
1663 bool termination_requested = unpack_node_terminate(this_node, node_state);
1664 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1665 const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1666 long long when_member = unpack_node_member(node_state, scheduler);
1667 long long when_online = unpack_node_online(node_state);
1668
1669/*
1670 - PCMK__XA_JOIN ::= member|down|pending|banned
1671 - PCMK_XA_EXPECTED ::= member|down
1672
1673 @COMPAT with entries recorded for DCs < 2.1.7
1674 - PCMK__XA_IN_CCM ::= true|false
1675 - PCMK_XA_CRMD ::= online|offline
1676
1677 Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1678 - PCMK__XA_IN_CCM ::= <timestamp>|0
1679 Since when node has been a cluster member. A value 0 of means the node is not
1680 a cluster member.
1681
1682 - PCMK_XA_CRMD ::= <timestamp>|0
1683 Since when peer has been online in CPG. A value 0 means the peer is offline
1684 in CPG.
1685*/
1686
1687 crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1688 pcmk__node_name(this_node), when_member, when_online,
1689 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1690 (termination_requested? " (termination requested)" : ""));
1691
1692 if (this_node->details->shutdown) {
1693 crm_debug("%s is shutting down", pcmk__node_name(this_node));
1694
1695 /* Slightly different criteria since we can't shut down a dead peer */
1696 return (when_online > 0);
1697 }
1698
1699 if (when_member < 0) {
1700 pe_fence_node(scheduler, this_node,
1701 "peer has not been seen by the cluster", FALSE);
1702 return false;
1703 }
1704
1705 if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1706 pe_fence_node(scheduler, this_node,
1707 "peer failed Pacemaker membership criteria", FALSE);
1708
1709 } else if (termination_requested) {
1710 if ((when_member <= 0) && (when_online <= 0)
1711 && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1712 crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1713 return false;
1714 }
1715 pe_fence_node(scheduler, this_node, "fencing was requested", false);
1716
1717 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1719
1720 if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1721 pe_fence_node(scheduler, this_node,
1722 "peer pending timed out on joining the process group",
1723 FALSE);
1724
1725 } else if ((when_member > 0) || (when_online > 0)) {
1726 crm_info("- %s is not ready to run resources",
1727 pcmk__node_name(this_node));
1728 this_node->details->standby = TRUE;
1729 this_node->details->pending = TRUE;
1730
1731 } else {
1732 crm_trace("%s is down or still coming up",
1733 pcmk__node_name(this_node));
1734 }
1735
1736 } else if (when_member <= 0) {
1737 // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1738 pe_fence_node(scheduler, this_node,
1739 "peer is no longer part of the cluster", TRUE);
1740
1741 } else if (when_online <= 0) {
1742 pe_fence_node(scheduler, this_node,
1743 "peer process is no longer available", FALSE);
1744
1745 /* Everything is running at this point, now check join state */
1746
1747 } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1748 crm_info("%s is active", pcmk__node_name(this_node));
1749
1751 CRMD_JOINSTATE_DOWN, NULL)) {
1752 crm_info("%s is not ready to run resources",
1753 pcmk__node_name(this_node));
1754 this_node->details->standby = TRUE;
1755 this_node->details->pending = TRUE;
1756
1757 } else {
1758 pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1759 FALSE);
1760 }
1761
1762 return (when_member > 0);
1763}
1764
1765static void
1766determine_remote_online_status(pcmk_scheduler_t *scheduler,
1767 pcmk_node_t *this_node)
1768{
1769 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1770 pcmk_resource_t *container = NULL;
1771 pcmk_node_t *host = NULL;
1772
1773 /* If there is a node state entry for a (former) Pacemaker Remote node
1774 * but no resource creating that node, the node's connection resource will
1775 * be NULL. Consider it an offline remote node in that case.
1776 */
1777 if (rsc == NULL) {
1778 this_node->details->online = FALSE;
1779 goto remote_online_done;
1780 }
1781
1782 container = rsc->container;
1783
1784 if (container && pcmk__list_of_1(rsc->running_on)) {
1785 host = rsc->running_on->data;
1786 }
1787
1788 /* If the resource is currently started, mark it online. */
1789 if (rsc->role == pcmk_role_started) {
1790 crm_trace("%s node %s presumed ONLINE because connection resource is started",
1791 (container? "Guest" : "Remote"), this_node->details->id);
1792 this_node->details->online = TRUE;
1793 }
1794
1795 /* consider this node shutting down if transitioning start->stop */
1796 if ((rsc->role == pcmk_role_started)
1797 && (rsc->next_role == pcmk_role_stopped)) {
1798
1799 crm_trace("%s node %s shutting down because connection resource is stopping",
1800 (container? "Guest" : "Remote"), this_node->details->id);
1801 this_node->details->shutdown = TRUE;
1802 }
1803
1804 /* Now check all the failure conditions. */
1805 if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1806 crm_trace("Guest node %s UNCLEAN because guest resource failed",
1807 this_node->details->id);
1808 this_node->details->online = FALSE;
1809 this_node->details->remote_requires_reset = TRUE;
1810
1811 } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1812 crm_trace("%s node %s OFFLINE because connection resource failed",
1813 (container? "Guest" : "Remote"), this_node->details->id);
1814 this_node->details->online = FALSE;
1815
1816 } else if ((rsc->role == pcmk_role_stopped)
1817 || ((container != NULL)
1818 && (container->role == pcmk_role_stopped))) {
1819
1820 crm_trace("%s node %s OFFLINE because its resource is stopped",
1821 (container? "Guest" : "Remote"), this_node->details->id);
1822 this_node->details->online = FALSE;
1823 this_node->details->remote_requires_reset = FALSE;
1824
1825 } else if (host && (host->details->online == FALSE)
1826 && host->details->unclean) {
1827 crm_trace("Guest node %s UNCLEAN because host is unclean",
1828 this_node->details->id);
1829 this_node->details->online = FALSE;
1830 this_node->details->remote_requires_reset = TRUE;
1831 }
1832
1833remote_online_done:
1834 crm_trace("Remote node %s online=%s",
1835 this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1836}
1837
1838static void
1839determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1841{
1842 gboolean online = FALSE;
1843 const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1844
1845 CRM_CHECK(this_node != NULL, return);
1846
1847 this_node->details->shutdown = FALSE;
1848 this_node->details->expected_up = FALSE;
1849
1850 if (pe__shutdown_requested(this_node)) {
1851 this_node->details->shutdown = TRUE;
1852
1853 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1854 this_node->details->expected_up = TRUE;
1855 }
1856
1857 if (this_node->details->type == node_ping) {
1858 this_node->details->unclean = FALSE;
1859 online = FALSE; /* As far as resource management is concerned,
1860 * the node is safely offline.
1861 * Anyone caught abusing this logic will be shot
1862 */
1863
1865 online = determine_online_status_no_fencing(scheduler, node_state,
1866 this_node);
1867
1868 } else {
1869 online = determine_online_status_fencing(scheduler, node_state,
1870 this_node);
1871 }
1872
1873 if (online) {
1874 this_node->details->online = TRUE;
1875
1876 } else {
1877 /* remove node from contention */
1878 this_node->fixed = TRUE; // @COMPAT deprecated and unused
1879 this_node->weight = -PCMK_SCORE_INFINITY;
1880 }
1881
1882 if (online && this_node->details->shutdown) {
1883 /* don't run resources here */
1884 this_node->fixed = TRUE; // @COMPAT deprecated and unused
1885 this_node->weight = -PCMK_SCORE_INFINITY;
1886 }
1887
1888 if (this_node->details->type == node_ping) {
1889 crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node));
1890
1891 } else if (this_node->details->unclean) {
1892 pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node));
1893
1894 } else if (this_node->details->online) {
1895 crm_info("%s is %s", pcmk__node_name(this_node),
1896 this_node->details->shutdown ? "shutting down" :
1897 this_node->details->pending ? "pending" :
1898 this_node->details->standby ? "standby" :
1899 this_node->details->maintenance ? "maintenance" : "online");
1900
1901 } else {
1902 crm_trace("%s is offline", pcmk__node_name(this_node));
1903 }
1904}
1905
1914const char *
1915pe_base_name_end(const char *id)
1916{
1917 if (!pcmk__str_empty(id)) {
1918 const char *end = id + strlen(id) - 1;
1919
1920 for (const char *s = end; s > id; --s) {
1921 switch (*s) {
1922 case '0':
1923 case '1':
1924 case '2':
1925 case '3':
1926 case '4':
1927 case '5':
1928 case '6':
1929 case '7':
1930 case '8':
1931 case '9':
1932 break;
1933 case ':':
1934 return (s == end)? s : (s - 1);
1935 default:
1936 return end;
1937 }
1938 }
1939 return end;
1940 }
1941 return NULL;
1942}
1943
1954char *
1955clone_strip(const char *last_rsc_id)
1956{
1957 const char *end = pe_base_name_end(last_rsc_id);
1958 char *basename = NULL;
1959
1960 CRM_ASSERT(end);
1961 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1962 CRM_ASSERT(basename);
1963 return basename;
1964}
1965
1976char *
1977clone_zero(const char *last_rsc_id)
1978{
1979 const char *end = pe_base_name_end(last_rsc_id);
1980 size_t base_name_len = end - last_rsc_id + 1;
1981 char *zero = NULL;
1982
1983 CRM_ASSERT(end);
1984 zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
1985 memcpy(zero, last_rsc_id, base_name_len);
1986 zero[base_name_len] = ':';
1987 zero[base_name_len + 1] = '0';
1988 return zero;
1989}
1990
1991static pcmk_resource_t *
1992create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
1994{
1995 pcmk_resource_t *rsc = NULL;
1996 xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
1997
1998 pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
1999 crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
2000 crm_log_xml_debug(xml_rsc, "Orphan resource");
2001
2002 if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2003 return NULL;
2004 }
2005
2006 if (xml_contains_remote_node(xml_rsc)) {
2007 pcmk_node_t *node;
2008
2009 crm_debug("Detected orphaned remote node %s", rsc_id);
2010 node = pcmk_find_node(scheduler, rsc_id);
2011 if (node == NULL) {
2012 node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL,
2013 scheduler);
2014 }
2015 link_rsc2remotenode(scheduler, rsc);
2016
2017 if (node) {
2018 crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2019 node->details->shutdown = TRUE;
2020 }
2021 }
2022
2023 if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2024 /* This orphaned rsc needs to be mapped to a container. */
2025 crm_trace("Detected orphaned container filler %s", rsc_id);
2027 }
2029 scheduler->resources = g_list_append(scheduler->resources, rsc);
2030 return rsc;
2031}
2032
2044static pcmk_resource_t *
2045create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
2047{
2049
2050 // find_rsc() because we might be a cloned group
2051 pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
2053
2054 pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2055 top->id, parent->id, rsc_id, pcmk__node_name(node));
2056 return orphan;
2057}
2058
2074static pcmk_resource_t *
2075find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2076 pcmk_resource_t *parent, const char *rsc_id)
2077{
2078 GList *rIter = NULL;
2079 pcmk_resource_t *rsc = NULL;
2080 pcmk_resource_t *inactive_instance = NULL;
2081 gboolean skip_inactive = FALSE;
2082
2083 CRM_ASSERT(pcmk__is_anonymous_clone(parent));
2084
2085 // Check for active (or partially active, for cloned groups) instance
2086 pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2087 rsc_id, pcmk__node_name(node), parent->id);
2088 for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2089 GList *locations = NULL;
2090 pcmk_resource_t *child = rIter->data;
2091
2092 /* Check whether this instance is already known to be active or pending
2093 * anywhere, at this stage of unpacking. Because this function is called
2094 * for a resource before the resource's individual operation history
2095 * entries are unpacked, locations will generally not contain the
2096 * desired node.
2097 *
2098 * However, there are three exceptions:
2099 * (1) when child is a cloned group and we have already unpacked the
2100 * history of another member of the group on the same node;
2101 * (2) when we've already unpacked the history of another numbered
2102 * instance on the same node (which can happen if
2103 * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2104 * (3) when we re-run calculations on the same scheduler data as part of
2105 * a simulation.
2106 */
2107 child->fns->location(child, &locations, 2);
2108 if (locations) {
2109 /* We should never associate the same numbered anonymous clone
2110 * instance with multiple nodes, and clone instances can't migrate,
2111 * so there must be only one location, regardless of history.
2112 */
2113 CRM_LOG_ASSERT(locations->next == NULL);
2114
2115 if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2116 /* This child instance is active on the requested node, so check
2117 * for a corresponding configured resource. We use find_rsc()
2118 * instead of child because child may be a cloned group, and we
2119 * need the particular member corresponding to rsc_id.
2120 *
2121 * If the history entry is orphaned, rsc will be NULL.
2122 */
2123 rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2125 if (rsc) {
2126 /* If there are multiple instance history entries for an
2127 * anonymous clone in a single node's history (which can
2128 * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2129 * to false), we want to consider the instances beyond the
2130 * first as orphans, even if there are inactive instance
2131 * numbers available.
2132 */
2133 if (rsc->running_on) {
2134 crm_notice("Active (now-)anonymous clone %s has "
2135 "multiple (orphan) instance histories on %s",
2136 parent->id, pcmk__node_name(node));
2137 skip_inactive = TRUE;
2138 rsc = NULL;
2139 } else {
2140 pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2141 }
2142 }
2143 }
2144 g_list_free(locations);
2145
2146 } else {
2147 pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2148 if (!skip_inactive && !inactive_instance
2149 && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2150 // Remember one inactive instance in case we don't find active
2151 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2153
2154 /* ... but don't use it if it was already associated with a
2155 * pending action on another node
2156 */
2157 if ((inactive_instance != NULL) &&
2158 (inactive_instance->pending_node != NULL) &&
2159 !pcmk__same_node(inactive_instance->pending_node, node)) {
2160 inactive_instance = NULL;
2161 }
2162 }
2163 }
2164 }
2165
2166 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2167 pcmk__rsc_trace(parent, "Resource %s, empty slot",
2168 inactive_instance->id);
2169 rsc = inactive_instance;
2170 }
2171
2172 /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2173 * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2174 * don't want to consume a valid instance number for unclean nodes. Such
2175 * instances may appear to be active according to the history, but should be
2176 * considered inactive, so we can start an instance elsewhere. Treat such
2177 * instances as orphans.
2178 *
2179 * An exception is instances running on guest nodes -- since guest node
2180 * "fencing" is actually just a resource stop, requires shouldn't apply.
2181 *
2182 * @TODO Ideally, we'd use an inactive instance number if it is not needed
2183 * for any clean instances. However, we don't know that at this point.
2184 */
2185 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2186 && (!node->details->online || node->details->unclean)
2187 && !pcmk__is_guest_or_bundle_node(node)
2189
2190 rsc = NULL;
2191 }
2192
2193 if (rsc == NULL) {
2194 rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2195 pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2196 }
2197 return rsc;
2198}
2199
2200static pcmk_resource_t *
2201unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2202 const char *rsc_id)
2203{
2204 pcmk_resource_t *rsc = NULL;
2205 pcmk_resource_t *parent = NULL;
2206
2207 crm_trace("looking for %s", rsc_id);
2208 rsc = pe_find_resource(scheduler->resources, rsc_id);
2209
2210 if (rsc == NULL) {
2211 /* If we didn't find the resource by its name in the operation history,
2212 * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2213 * we create a single :0 orphan to match against here.
2214 */
2215 char *clone0_id = clone_zero(rsc_id);
2217 clone0_id);
2218
2219 if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2220 rsc = clone0;
2221 parent = uber_parent(clone0);
2222 crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2223 } else {
2224 crm_trace("%s is not known as %s either (orphan)",
2225 rsc_id, clone0_id);
2226 }
2227 free(clone0_id);
2228
2229 } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2230 crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2231 rsc_id);
2232 return NULL;
2233
2234 } else {
2235 parent = uber_parent(rsc);
2236 }
2237
2238 if (pcmk__is_anonymous_clone(parent)) {
2239
2240 if (pcmk__is_bundled(parent)) {
2241 rsc = pe__find_bundle_replica(parent->parent, node);
2242 } else {
2243 char *base = clone_strip(rsc_id);
2244
2245 rsc = find_anonymous_clone(scheduler, node, parent, base);
2246 free(base);
2247 CRM_ASSERT(rsc != NULL);
2248 }
2249 }
2250
2251 if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2252 && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_none)) {
2253
2254 pcmk__str_update(&rsc->clone_name, rsc_id);
2255 pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2256 rsc_id, pcmk__node_name(node), rsc->id,
2257 pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
2258 }
2259 return rsc;
2260}
2261
2262static pcmk_resource_t *
2263process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2265{
2266 pcmk_resource_t *rsc = NULL;
2267 const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2268
2269 crm_debug("Detected orphan resource %s on %s",
2270 rsc_id, pcmk__node_name(node));
2271 rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2272 if (rsc == NULL) {
2273 return NULL;
2274 }
2275
2278
2279 } else {
2280 CRM_CHECK(rsc != NULL, return NULL);
2281 pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2283 "__orphan_do_not_run__", scheduler);
2284 }
2285 return rsc;
2286}
2287
2288static void
2289process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2290 enum action_fail_response on_fail)
2291{
2292 pcmk_node_t *tmpnode = NULL;
2293 char *reason = NULL;
2294 enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2295
2296 CRM_ASSERT(rsc);
2297 pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2298 rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node),
2299 pcmk_on_fail_text(on_fail));
2300
2301 /* process current state */
2302 if (rsc->role != pcmk_role_unknown) {
2303 pcmk_resource_t *iter = rsc;
2304
2305 while (iter) {
2306 if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2307 pcmk_node_t *n = pe__copy_node(node);
2308
2309 pcmk__rsc_trace(rsc, "%s%s%s known on %s",
2310 rsc->id,
2311 ((rsc->clone_name == NULL)? "" : " also known as "),
2312 ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2313 pcmk__node_name(n));
2314 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2315 }
2316 if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2317 break;
2318 }
2319 iter = iter->parent;
2320 }
2321 }
2322
2323 /* If a managed resource is believed to be running, but node is down ... */
2324 if ((rsc->role > pcmk_role_stopped)
2325 && node->details->online == FALSE
2326 && node->details->maintenance == FALSE
2328
2329 gboolean should_fence = FALSE;
2330
2331 /* If this is a guest node, fence it (regardless of whether fencing is
2332 * enabled, because guest node fencing is done by recovery of the
2333 * container resource rather than by the fencer). Mark the resource
2334 * we're processing as failed. When the guest comes back up, its
2335 * operation history in the CIB will be cleared, freeing the affected
2336 * resource to run again once we are sure we know its state.
2337 */
2338 if (pcmk__is_guest_or_bundle_node(node)) {
2340 should_fence = TRUE;
2341
2342 } else if (pcmk_is_set(rsc->cluster->flags,
2344 if (pcmk__is_remote_node(node)
2345 && (node->details->remote_rsc != NULL)
2346 && !pcmk_is_set(node->details->remote_rsc->flags,
2347 pcmk_rsc_failed)) {
2348
2349 /* Setting unseen means that fencing of the remote node will
2350 * occur only if the connection resource is not going to start
2351 * somewhere. This allows connection resources on a failed
2352 * cluster node to move to another node without requiring the
2353 * remote nodes to be fenced as well.
2354 */
2355 node->details->unseen = TRUE;
2356 reason = crm_strdup_printf("%s is active there (fencing will be"
2357 " revoked if remote connection can "
2358 "be re-established elsewhere)",
2359 rsc->id);
2360 }
2361 should_fence = TRUE;
2362 }
2363
2364 if (should_fence) {
2365 if (reason == NULL) {
2366 reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2367 }
2368 pe_fence_node(rsc->cluster, node, reason, FALSE);
2369 }
2370 free(reason);
2371 }
2372
2373 /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2374 save_on_fail = on_fail;
2375
2376 if (node->details->unclean) {
2377 /* No extra processing needed
2378 * Also allows resources to be started again after a node is shot
2379 */
2380 on_fail = pcmk_on_fail_ignore;
2381 }
2382
2383 switch (on_fail) {
2385 /* nothing to do */
2386 break;
2387
2390 demote_action(rsc, node, FALSE);
2391 break;
2392
2394 /* treat it as if it is still running
2395 * but also mark the node as unclean
2396 */
2397 reason = crm_strdup_printf("%s failed there", rsc->id);
2398 pe_fence_node(rsc->cluster, node, reason, FALSE);
2399 free(reason);
2400 break;
2401
2403 node->details->standby = TRUE;
2404 node->details->standby_onfail = TRUE;
2405 break;
2406
2407 case pcmk_on_fail_block:
2408 /* is_managed == FALSE will prevent any
2409 * actions being sent for the resource
2410 */
2413 break;
2414
2415 case pcmk_on_fail_ban:
2416 /* make sure it comes up somewhere else
2417 * or not at all
2418 */
2420 "__action_migration_auto__", rsc->cluster);
2421 break;
2422
2423 case pcmk_on_fail_stop:
2426 break;
2427
2429 if ((rsc->role != pcmk_role_stopped)
2430 && (rsc->role != pcmk_role_unknown)) {
2433 stop_action(rsc, node, FALSE);
2434 }
2435 break;
2436
2439 if ((rsc->container != NULL) && pcmk__is_bundled(rsc)) {
2440 /* A bundle's remote connection can run on a different node than
2441 * the bundle's container. We don't necessarily know where the
2442 * container is running yet, so remember it and add a stop
2443 * action for it later.
2444 */
2445 rsc->cluster->stop_needed =
2446 g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2447 } else if (rsc->container) {
2448 stop_action(rsc->container, node, FALSE);
2449 } else if ((rsc->role != pcmk_role_stopped)
2450 && (rsc->role != pcmk_role_unknown)) {
2451 stop_action(rsc, node, FALSE);
2452 }
2453 break;
2454
2458 tmpnode = NULL;
2459 if (rsc->is_remote_node) {
2460 tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2461 }
2462 if (pcmk__is_remote_node(tmpnode)
2463 && !(tmpnode->details->remote_was_fenced)) {
2464 /* The remote connection resource failed in a way that
2465 * should result in fencing the remote node.
2466 */
2467 pe_fence_node(rsc->cluster, tmpnode,
2468 "remote connection is unrecoverable", FALSE);
2469 }
2470 }
2471
2472 /* require the stop action regardless if fencing is occurring or not. */
2473 if (rsc->role > pcmk_role_stopped) {
2474 stop_action(rsc, node, FALSE);
2475 }
2476
2477 /* if reconnect delay is in use, prevent the connection from exiting the
2478 * "STOPPED" role until the failure is cleared by the delay timeout. */
2479 if (rsc->remote_reconnect_ms) {
2480 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2481 }
2482 break;
2483 }
2484
2485 /* ensure a remote-node connection failure forces an unclean remote-node
2486 * to be fenced. By setting unseen = FALSE, the remote-node failure will
2487 * result in a fencing operation regardless if we're going to attempt to
2488 * reconnect to the remote-node in this transition or not. */
2489 if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2490 tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2491 if (tmpnode && tmpnode->details->unclean) {
2492 tmpnode->details->unseen = FALSE;
2493 }
2494 }
2495
2496 if ((rsc->role != pcmk_role_stopped)
2497 && (rsc->role != pcmk_role_unknown)) {
2498 if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2499 if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2500 crm_notice("Removed resource %s is active on %s and will be "
2501 "stopped when possible",
2502 rsc->id, pcmk__node_name(node));
2503 } else {
2504 crm_notice("Removed resource %s must be stopped manually on %s "
2506 " is set to false", rsc->id, pcmk__node_name(node));
2507 }
2508 }
2509
2510 native_add_running(rsc, node, rsc->cluster,
2511 (save_on_fail != pcmk_on_fail_ignore));
2512 switch (on_fail) {
2514 break;
2516 case pcmk_on_fail_block:
2518 break;
2519 default:
2522 break;
2523 }
2524
2525 } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2526 /* Only do this for older status sections that included instance numbers
2527 * Otherwise stopped instances will appear as orphans
2528 */
2529 pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
2530 rsc->clone_name, rsc->id);
2531 free(rsc->clone_name);
2532 rsc->clone_name = NULL;
2533
2534 } else {
2535 GList *possible_matches = pe__resource_actions(rsc, node,
2536 PCMK_ACTION_STOP, FALSE);
2537 GList *gIter = possible_matches;
2538
2539 for (; gIter != NULL; gIter = gIter->next) {
2540 pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2541
2543 }
2544
2545 g_list_free(possible_matches);
2546 }
2547
2548 /* A successful stop after migrate_to on the migration source doesn't make
2549 * the partially migrated resource stopped on the migration target.
2550 */
2551 if ((rsc->role == pcmk_role_stopped)
2553 && rsc->partial_migration_source->details == node->details
2555 && rsc->running_on) {
2556
2557 rsc->role = pcmk_role_started;
2558 }
2559}
2560
2561/* create active recurring operations as optional */
2562static void
2563process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2564 int start_index, int stop_index,
2565 GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2566{
2567 int counter = -1;
2568 const char *task = NULL;
2569 const char *status = NULL;
2570 GList *gIter = sorted_op_list;
2571
2572 CRM_ASSERT(rsc);
2573 pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2574 rsc->id, start_index, stop_index);
2575
2576 for (; gIter != NULL; gIter = gIter->next) {
2577 xmlNode *rsc_op = (xmlNode *) gIter->data;
2578
2579 guint interval_ms = 0;
2580 char *key = NULL;
2581 const char *id = pcmk__xe_id(rsc_op);
2582
2583 counter++;
2584
2585 if (node->details->online == FALSE) {
2586 pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2587 rsc->id, pcmk__node_name(node));
2588 break;
2589
2590 /* Need to check if there's a monitor for role="Stopped" */
2591 } else if (start_index < stop_index && counter <= stop_index) {
2592 pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2593 id, pcmk__node_name(node));
2594 continue;
2595
2596 } else if (counter < start_index) {
2597 pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2598 id, pcmk__node_name(node), counter);
2599 continue;
2600 }
2601
2602 crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2603 if (interval_ms == 0) {
2604 pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2605 id, pcmk__node_name(node));
2606 continue;
2607 }
2608
2609 status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2610 if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2611 pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2612 id, pcmk__node_name(node));
2613 continue;
2614 }
2615 task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2616 /* create the action */
2617 key = pcmk__op_key(rsc->id, task, interval_ms);
2618 pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2619 custom_action(rsc, key, task, node, TRUE, scheduler);
2620 }
2621}
2622
2623void
2624calculate_active_ops(const GList *sorted_op_list, int *start_index,
2625 int *stop_index)
2626{
2627 int counter = -1;
2628 int implied_monitor_start = -1;
2629 int implied_clone_start = -1;
2630 const char *task = NULL;
2631 const char *status = NULL;
2632
2633 *stop_index = -1;
2634 *start_index = -1;
2635
2636 for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2637 const xmlNode *rsc_op = (const xmlNode *) iter->data;
2638
2639 counter++;
2640
2641 task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2642 status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2643
2644 if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2645 && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2646 *stop_index = counter;
2647
2648 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2649 PCMK_ACTION_MIGRATE_FROM, NULL)) {
2650 *start_index = counter;
2651
2652 } else if ((implied_monitor_start <= *stop_index)
2653 && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2654 pcmk__str_casei)) {
2655 const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2656
2657 if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2658 implied_monitor_start = counter;
2659 }
2661 PCMK_ACTION_DEMOTE, NULL)) {
2662 implied_clone_start = counter;
2663 }
2664 }
2665
2666 if (*start_index == -1) {
2667 if (implied_clone_start != -1) {
2668 *start_index = implied_clone_start;
2669 } else if (implied_monitor_start != -1) {
2670 *start_index = implied_monitor_start;
2671 }
2672 }
2673}
2674
2675// If resource history entry has shutdown lock, remember lock node and time
2676static void
2677unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2679{
2680 time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2681
2683 &lock_time) == pcmk_ok) && (lock_time != 0)) {
2684
2685 if ((scheduler->shutdown_lock > 0)
2687 > (lock_time + scheduler->shutdown_lock))) {
2688 pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2689 rsc->id, pcmk__node_name(node));
2690 pe__clear_resource_history(rsc, node);
2691 } else {
2692 /* @COMPAT I don't like breaking const signatures, but
2693 * rsc->lock_node should really be const -- we just can't change it
2694 * until the next API compatibility break.
2695 */
2696 rsc->lock_node = (pcmk_node_t *) node;
2697 rsc->lock_time = lock_time;
2698 }
2699 }
2700}
2701
2712static pcmk_resource_t *
2713unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2715{
2716 GList *gIter = NULL;
2717 int stop_index = -1;
2718 int start_index = -1;
2719 enum rsc_role_e req_role = pcmk_role_unknown;
2720
2721 const char *rsc_id = pcmk__xe_id(lrm_resource);
2722
2723 pcmk_resource_t *rsc = NULL;
2724 GList *op_list = NULL;
2725 GList *sorted_op_list = NULL;
2726
2727 xmlNode *rsc_op = NULL;
2728 xmlNode *last_failure = NULL;
2729
2731 enum rsc_role_e saved_role = pcmk_role_unknown;
2732
2733 if (rsc_id == NULL) {
2734 pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2735 " entry: No " PCMK_XA_ID);
2736 crm_log_xml_info(lrm_resource, "missing-id");
2737 return NULL;
2738 }
2739 crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2740 rsc_id, pcmk__node_name(node));
2741
2742 /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2743 * them
2744 */
2745 for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2746 NULL);
2747 rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
2748
2749 op_list = g_list_prepend(op_list, rsc_op);
2750 }
2751
2753 if (op_list == NULL) {
2754 // If there are no operations, there is nothing to do
2755 return NULL;
2756 }
2757 }
2758
2759 /* find the resource */
2760 rsc = unpack_find_resource(scheduler, node, rsc_id);
2761 if (rsc == NULL) {
2762 if (op_list == NULL) {
2763 // If there are no operations, there is nothing to do
2764 return NULL;
2765 } else {
2766 rsc = process_orphan_resource(lrm_resource, node, scheduler);
2767 }
2768 }
2769 CRM_ASSERT(rsc != NULL);
2770
2771 // Check whether the resource is "shutdown-locked" to this node
2773 unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2774 }
2775
2776 /* process operations */
2777 saved_role = rsc->role;
2778 rsc->role = pcmk_role_unknown;
2779 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2780
2781 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2782 xmlNode *rsc_op = (xmlNode *) gIter->data;
2783
2784 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2785 }
2786
2787 /* create active recurring operations as optional */
2788 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2789 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2790 scheduler);
2791
2792 /* no need to free the contents */
2793 g_list_free(sorted_op_list);
2794
2795 process_rsc_state(rsc, node, on_fail);
2796
2797 if (get_target_role(rsc, &req_role)) {
2798 if ((rsc->next_role == pcmk_role_unknown)
2799 || (req_role < rsc->next_role)) {
2800
2802
2803 } else if (req_role > rsc->next_role) {
2804 pcmk__rsc_info(rsc,
2805 "%s: Not overwriting calculated next role %s"
2806 " with requested next role %s",
2807 rsc->id, pcmk_role_text(rsc->next_role),
2808 pcmk_role_text(req_role));
2809 }
2810 }
2811
2812 if (saved_role > rsc->role) {
2813 rsc->role = saved_role;
2814 }
2815
2816 return rsc;
2817}
2818
2819static void
2820handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
2822{
2823 for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
2824 NULL, NULL);
2825 rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2826
2827 pcmk_resource_t *rsc;
2828 pcmk_resource_t *container;
2829 const char *rsc_id;
2830 const char *container_id;
2831
2832 if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2833 continue;
2834 }
2835
2836 container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2837 rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2838 if (container_id == NULL || rsc_id == NULL) {
2839 continue;
2840 }
2841
2842 container = pe_find_resource(scheduler->resources, container_id);
2843 if (container == NULL) {
2844 continue;
2845 }
2846
2847 rsc = pe_find_resource(scheduler->resources, rsc_id);
2848 if ((rsc == NULL) || (rsc->container != NULL)
2850 continue;
2851 }
2852
2853 pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2854 rsc->id, container_id);
2855 rsc->container = container;
2856 container->fillers = g_list_append(container->fillers, rsc);
2857 }
2858}
2859
2868static void
2869unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2871{
2872 bool found_orphaned_container_filler = false;
2873
2874 // Drill down to PCMK__XE_LRM_RESOURCES section
2875 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2876 if (xml == NULL) {
2877 return;
2878 }
2879 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2880 if (xml == NULL) {
2881 return;
2882 }
2883
2884 // Unpack each PCMK__XE_LRM_RESOURCE entry
2885 for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2887 NULL, NULL);
2888 rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
2889
2890 pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2891
2892 if ((rsc != NULL)
2894 found_orphaned_container_filler = true;
2895 }
2896 }
2897
2898 /* Now that all resource state has been unpacked for this node, map any
2899 * orphaned container fillers to their container resource.
2900 */
2901 if (found_orphaned_container_filler) {
2902 handle_orphaned_container_fillers(xml, scheduler);
2903 }
2904}
2905
2906static void
2907set_active(pcmk_resource_t *rsc)
2908{
2909 const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2910
2911 if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2913 } else {
2914 rsc->role = pcmk_role_started;
2915 }
2916}
2917
2918static void
2919set_node_score(gpointer key, gpointer value, gpointer user_data)
2920{
2921 pcmk_node_t *node = value;
2922 int *score = user_data;
2923
2924 node->weight = *score;
2925}
2926
2927#define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2928 "/" PCMK__XE_NODE_STATE
2929#define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
2930 "/" PCMK__XE_LRM_RESOURCES \
2931 "/" PCMK__XE_LRM_RESOURCE
2932#define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2933
2934static xmlNode *
2935find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2936 int target_rc, pcmk_scheduler_t *scheduler)
2937{
2938 GString *xpath = NULL;
2939 xmlNode *xml = NULL;
2940
2941 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2942 return NULL);
2943
2944 xpath = g_string_sized_new(256);
2945 pcmk__g_strcat(xpath,
2946 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2947 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2948 SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2949 NULL);
2950
2951 /* Need to check against transition_magic too? */
2952 if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2953 pcmk__g_strcat(xpath,
2954 " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2955 NULL);
2956
2957 } else if ((source != NULL)
2958 && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2959 pcmk__g_strcat(xpath,
2960 " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2961 NULL);
2962 } else {
2963 g_string_append_c(xpath, ']');
2964 }
2965
2966 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2967 LOG_DEBUG);
2968 g_string_free(xpath, TRUE);
2969
2970 if (xml && target_rc >= 0) {
2971 int rc = PCMK_OCF_UNKNOWN_ERROR;
2972 int status = PCMK_EXEC_ERROR;
2973
2976 if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2977 return NULL;
2978 }
2979 }
2980 return xml;
2981}
2982
2983static xmlNode *
2984find_lrm_resource(const char *rsc_id, const char *node_name,
2986{
2987 GString *xpath = NULL;
2988 xmlNode *xml = NULL;
2989
2990 CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2991
2992 xpath = g_string_sized_new(256);
2993 pcmk__g_strcat(xpath,
2994 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
2995 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
2996 NULL);
2997
2998 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2999 LOG_DEBUG);
3000
3001 g_string_free(xpath, TRUE);
3002 return xml;
3003}
3004
3014static bool
3015unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3016{
3017 bool result = false;
3018 xmlXPathObjectPtr search;
3019 char *xpath = NULL;
3020
3024 "[@" PCMK__XA_RC_CODE "!='%d']",
3025 node_name, rsc->id, PCMK_OCF_UNKNOWN);
3026
3027 search = xpath_search(rsc->cluster->input, xpath);
3028 result = (numXpathResults(search) == 0);
3029 freeXpathObject(search);
3030 free(xpath);
3031 return result;
3032}
3033
3046static bool
3047monitor_not_running_after(const char *rsc_id, const char *node_name,
3048 const xmlNode *xml_op, bool same_node,
3050{
3051 /* Any probe/monitor operation on the node indicating it was not running
3052 * there
3053 */
3054 xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3056
3057 return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
3058}
3059
3072static bool
3073non_monitor_after(const char *rsc_id, const char *node_name,
3074 const xmlNode *xml_op, bool same_node,
3076{
3077 xmlNode *lrm_resource = NULL;
3078
3079 lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3080 if (lrm_resource == NULL) {
3081 return false;
3082 }
3083
3084 for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3085 NULL, NULL);
3086 op != NULL; op = pcmk__xe_next_same(op)) {
3087
3088 const char * task = NULL;
3089
3090 if (op == xml_op) {
3091 continue;
3092 }
3093
3095
3098 NULL)
3099 && pe__is_newer_op(op, xml_op, same_node) > 0) {
3100 return true;
3101 }
3102 }
3103
3104 return false;
3105}
3106
3119static bool
3120newer_state_after_migrate(const char *rsc_id, const char *node_name,
3121 const xmlNode *migrate_to,
3122 const xmlNode *migrate_from,
3124{
3125 const xmlNode *xml_op = migrate_to;
3126 const char *source = NULL;
3127 const char *target = NULL;
3128 bool same_node = false;
3129
3130 if (migrate_from) {
3131 xml_op = migrate_from;
3132 }
3133
3136
3137 /* It's preferred to compare to the migrate event on the same node if
3138 * existing, since call ids are more reliable.
3139 */
3140 if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3141 if (migrate_from) {
3142 xml_op = migrate_from;
3143 same_node = true;
3144
3145 } else {
3146 xml_op = migrate_to;
3147 }
3148
3149 } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3150 if (migrate_to) {
3151 xml_op = migrate_to;
3152 same_node = true;
3153
3154 } else {
3155 xml_op = migrate_from;
3156 }
3157 }
3158
3159 /* If there's any newer non-monitor operation on the node, or any newer
3160 * probe/monitor operation on the node indicating it was not running there,
3161 * the migration events potentially no longer matter for the node.
3162 */
3163 return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3164 || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3165 scheduler);
3166}
3167
3180static int
3181get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3182 const pcmk_node_t *target_node,
3183 const char **source_name, const char **target_name)
3184{
3185 *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3186 *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3187 if ((*source_name == NULL) || (*target_name == NULL)) {
3188 pcmk__config_err("Ignoring resource history entry %s without "
3190 PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3191 return pcmk_rc_unpack_error;
3192 }
3193
3194 if ((source_node != NULL)
3195 && !pcmk__str_eq(*source_name, source_node->details->uname,
3197 pcmk__config_err("Ignoring resource history entry %s because "
3198 PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3199 pcmk__xe_id(entry), *source_name,
3200 pcmk__node_name(source_node));
3201 return pcmk_rc_unpack_error;
3202 }
3203
3204 if ((target_node != NULL)
3205 && !pcmk__str_eq(*target_name, target_node->details->uname,
3207 pcmk__config_err("Ignoring resource history entry %s because "
3208 PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3209 pcmk__xe_id(entry), *target_name,
3210 pcmk__node_name(target_node));
3211 return pcmk_rc_unpack_error;
3212 }
3213
3214 return pcmk_rc_ok;
3215}
3216
3217/*
3218 * \internal
3219 * \brief Add a migration source to a resource's list of dangling migrations
3220 *
3221 * If the migrate_to and migrate_from actions in a live migration both
3222 * succeeded, but there is no stop on the source, the migration is considered
3223 * "dangling." Add the source to the resource's dangling migration list, which
3224 * will be used to schedule a stop on the source without affecting the target.
3225 *
3226 * \param[in,out] rsc Resource involved in migration
3227 * \param[in] node Migration source
3228 */
3229static void
3230add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3231{
3232 pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3233 rsc->id, pcmk__node_name(node));
3234 rsc->role = pcmk_role_stopped;
3235 rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3236 (gpointer) node);
3237}
3238
3245static void
3246unpack_migrate_to_success(struct action_history *history)
3247{
3248 /* A complete migration sequence is:
3249 * 1. migrate_to on source node (which succeeded if we get to this function)
3250 * 2. migrate_from on target node
3251 * 3. stop on source node
3252 *
3253 * If no migrate_from has happened, the migration is considered to be
3254 * "partial". If the migrate_from succeeded but no stop has happened, the
3255 * migration is considered to be "dangling".
3256 *
3257 * If a successful migrate_to and stop have happened on the source node, we
3258 * still need to check for a partial migration, due to scenarios (easier to
3259 * produce with batch-limit=1) like:
3260 *
3261 * - A resource is migrating from node1 to node2, and a migrate_to is
3262 * initiated for it on node1.
3263 *
3264 * - node2 goes into standby mode while the migrate_to is pending, which
3265 * aborts the transition.
3266 *
3267 * - Upon completion of the migrate_to, a new transition schedules a stop
3268 * on both nodes and a start on node1.
3269 *
3270 * - If the new transition is aborted for any reason while the resource is
3271 * stopping on node1, the transition after that stop completes will see
3272 * the migrate_to and stop on the source, but it's still a partial
3273 * migration, and the resource must be stopped on node2 because it is
3274 * potentially active there due to the migrate_to.
3275 *
3276 * We also need to take into account that either node's history may be
3277 * cleared at any point in the migration process.
3278 */
3279 int from_rc = PCMK_OCF_OK;
3280 int from_status = PCMK_EXEC_PENDING;
3281 pcmk_node_t *target_node = NULL;
3282 xmlNode *migrate_from = NULL;
3283 const char *source = NULL;
3284 const char *target = NULL;
3285 bool source_newer_op = false;
3286 bool target_newer_state = false;
3287 bool active_on_target = false;
3288
3289 // Get source and target node names from XML
3290 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3291 &target) != pcmk_rc_ok) {
3292 return;
3293 }
3294
3295 // Check for newer state on the source
3296 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3297 true, history->rsc->cluster);
3298
3299 // Check for a migrate_from action from this source on the target
3300 migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3301 target, source, -1, history->rsc->cluster);
3302 if (migrate_from != NULL) {
3303 if (source_newer_op) {
3304 /* There's a newer non-monitor operation on the source and a
3305 * migrate_from on the target, so this migrate_to is irrelevant to
3306 * the resource's state.
3307 */
3308 return;
3309 }
3310 crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3311 crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3312 }
3313
3314 /* If the resource has newer state on both the source and target after the
3315 * migration events, this migrate_to is irrelevant to the resource's state.
3316 */
3317 target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3318 history->xml, migrate_from,
3319 history->rsc->cluster);
3320 if (source_newer_op && target_newer_state) {
3321 return;
3322 }
3323
3324 /* Check for dangling migration (migrate_from succeeded but stop not done).
3325 * We know there's no stop because we already returned if the target has a
3326 * migrate_from and the source has any newer non-monitor operation.
3327 */
3328 if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3329 add_dangling_migration(history->rsc, history->node);
3330 return;
3331 }
3332
3333 /* Without newer state, this migrate_to implies the resource is active.
3334 * (Clones are not allowed to migrate, so role can't be promoted.)
3335 */
3336 history->rsc->role = pcmk_role_started;
3337
3338 target_node = pcmk_find_node(history->rsc->cluster, target);
3339 active_on_target = !target_newer_state && (target_node != NULL)
3340 && target_node->details->online;
3341
3342 if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3343 if (active_on_target) {
3344 native_add_running(history->rsc, target_node, history->rsc->cluster,
3345 TRUE);
3346 } else {
3347 // Mark resource as failed, require recovery, and prevent migration
3348 pcmk__set_rsc_flags(history->rsc,
3351 }
3352 return;
3353 }
3354
3355 // The migrate_from is pending, complete but erased, or to be scheduled
3356
3357 /* If there is no history at all for the resource on an online target, then
3358 * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3359 * have the probe result, it will be reflected in target_newer_state.
3360 */
3361 if ((target_node != NULL) && target_node->details->online
3362 && unknown_on_node(history->rsc, target)) {
3363 return;
3364 }
3365
3366 if (active_on_target) {
3367 pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3368 source);
3369
3370 native_add_running(history->rsc, target_node, history->rsc->cluster,
3371 FALSE);
3372 if ((source_node != NULL) && source_node->details->online) {
3373 /* This is a partial migration: the migrate_to completed
3374 * successfully on the source, but the migrate_from has not
3375 * completed. Remember the source and target; if the newly
3376 * chosen target remains the same when we schedule actions
3377 * later, we may continue with the migration.
3378 */
3379 history->rsc->partial_migration_target = target_node;
3380 history->rsc->partial_migration_source = source_node;
3381 }
3382
3383 } else if (!source_newer_op) {
3384 // Mark resource as failed, require recovery, and prevent migration
3385 pcmk__set_rsc_flags(history->rsc,
3388 }
3389}
3390
3397static void
3398unpack_migrate_to_failure(struct action_history *history)
3399{
3400 xmlNode *target_migrate_from = NULL;
3401 const char *source = NULL;
3402 const char *target = NULL;
3403
3404 // Get source and target node names from XML
3405 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3406 &target) != pcmk_rc_ok) {
3407 return;
3408 }
3409
3410 /* If a migration failed, we have to assume the resource is active. Clones
3411 * are not allowed to migrate, so role can't be promoted.
3412 */
3413 history->rsc->role = pcmk_role_started;
3414
3415 // Check for migrate_from on the target
3416 target_migrate_from = find_lrm_op(history->rsc->id,
3418 PCMK_OCF_OK, history->rsc->cluster);
3419
3420 if (/* If the resource state is unknown on the target, it will likely be
3421 * probed there.
3422 * Don't just consider it running there. We will get back here anyway in
3423 * case the probe detects it's running there.
3424 */
3425 !unknown_on_node(history->rsc, target)
3426 /* If the resource has newer state on the target after the migration
3427 * events, this migrate_to no longer matters for the target.
3428 */
3429 && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3430 target_migrate_from,
3431 history->rsc->cluster)) {
3432 /* The resource has no newer state on the target, so assume it's still
3433 * active there.
3434 * (if it is up).
3435 */
3436 pcmk_node_t *target_node = pcmk_find_node(history->rsc->cluster,
3437 target);
3438
3439 if (target_node && target_node->details->online) {
3440 native_add_running(history->rsc, target_node, history->rsc->cluster,
3441 FALSE);
3442 }
3443
3444 } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3445 history->rsc->cluster)) {
3446 /* We know the resource has newer state on the target, but this
3447 * migrate_to still matters for the source as long as there's no newer
3448 * non-monitor operation there.
3449 */
3450
3451 // Mark node as having dangling migration so we can force a stop later
3452 history->rsc->dangling_migrations =
3453 g_list_prepend(history->rsc->dangling_migrations,
3454 (gpointer) history->node);
3455 }
3456}
3457
3464static void
3465unpack_migrate_from_failure(struct action_history *history)
3466{
3467 xmlNode *source_migrate_to = NULL;
3468 const char *source = NULL;
3469 const char *target = NULL;
3470
3471 // Get source and target node names from XML
3472 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3473 &target) != pcmk_rc_ok) {
3474 return;
3475 }
3476
3477 /* If a migration failed, we have to assume the resource is active. Clones
3478 * are not allowed to migrate, so role can't be promoted.
3479 */
3480 history->rsc->role = pcmk_role_started;
3481
3482 // Check for a migrate_to on the source
3483 source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3484 source, target, PCMK_OCF_OK,
3485 history->rsc->cluster);
3486
3487 if (/* If the resource state is unknown on the source, it will likely be
3488 * probed there.
3489 * Don't just consider it running there. We will get back here anyway in
3490 * case the probe detects it's running there.
3491 */
3492 !unknown_on_node(history->rsc, source)
3493 /* If the resource has newer state on the source after the migration
3494 * events, this migrate_from no longer matters for the source.
3495 */
3496 && !newer_state_after_migrate(history->rsc->id, source,
3497 source_migrate_to, history->xml,
3498 history->rsc->cluster)) {
3499 /* The resource has no newer state on the source, so assume it's still
3500 * active there (if it is up).
3501 */
3502 pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3503 source);
3504
3505 if (source_node && source_node->details->online) {
3506 native_add_running(history->rsc, source_node, history->rsc->cluster,
3507 TRUE);
3508 }
3509 }
3510}
3511
3518static void
3519record_failed_op(struct action_history *history)
3520{
3521 if (!(history->node->details->online)) {
3522 return;
3523 }
3524
3525 for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3526 xIter != NULL; xIter = xIter->next) {
3527
3528 const char *key = pcmk__xe_history_key(xIter);
3529 const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3530
3531 if (pcmk__str_eq(history->key, key, pcmk__str_none)
3532 && pcmk__str_eq(uname, history->node->details->uname,
3533 pcmk__str_casei)) {
3534 crm_trace("Skipping duplicate entry %s on %s",
3535 history->key, pcmk__node_name(history->node));
3536 return;
3537 }
3538 }
3539
3540 crm_trace("Adding entry for %s on %s to failed action list",
3541 history->key, pcmk__node_name(history->node));
3542 crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
3543 crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3544 pcmk__xml_copy(history->rsc->cluster->failed, history->xml);
3545}
3546
3547static char *
3548last_change_str(const xmlNode *xml_op)
3549{
3550 time_t when;
3551 char *result = NULL;
3552
3554 &when) == pcmk_ok) {
3555 char *when_s = pcmk__epoch2str(&when, 0);
3556 const char *p = strchr(when_s, ' ');
3557
3558 // Skip day of week to make message shorter
3559 if ((p != NULL) && (*(++p) != '\0')) {
3561 }
3562 free(when_s);
3563 }
3564
3565 if (result == NULL) {
3566 result = pcmk__str_copy("unknown_time");
3567 }
3568
3569 return result;
3570}
3571
3584static int
3585cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
3586{
3587 switch (first) {
3589 switch (second) {
3591 return 1;
3593 return 0;
3594 default:
3595 return -1;
3596 }
3597 break;
3598
3600 switch (second) {
3604 return 1;
3606 return 0;
3607 default:
3608 return -1;
3609 }
3610 break;
3611
3613 switch (second) {
3618 return 1;
3620 return 0;
3621 default:
3622 return -1;
3623 }
3624 break;
3625
3626 default:
3627 break;
3628 }
3629 switch (second) {
3631 return (first == pcmk_on_fail_ignore)? -1 : 1;
3632
3634 switch (first) {
3638 return -1;
3639 default:
3640 return 1;
3641 }
3642 break;
3643
3645 switch (first) {
3650 return -1;
3651 default:
3652 return 1;
3653 }
3654 break;
3655
3656 default:
3657 break;
3658 }
3659 return first - second;
3660}
3661
3668static void
3669ban_from_all_nodes(pcmk_resource_t *rsc)
3670{
3671 int score = -PCMK_SCORE_INFINITY;
3672 pcmk_resource_t *fail_rsc = rsc;
3673
3674 if (fail_rsc->parent != NULL) {
3675 pcmk_resource_t *parent = uber_parent(fail_rsc);
3676
3677 if (pcmk__is_anonymous_clone(parent)) {
3678 /* For anonymous clones, if an operation with
3679 * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3680 * entire clone must stop.
3681 */
3682 fail_rsc = parent;
3683 }
3684 }
3685
3686 // Ban the resource from all nodes
3687 crm_notice("%s will not be started under current conditions", fail_rsc->id);
3688 if (fail_rsc->allowed_nodes != NULL) {
3689 g_hash_table_destroy(fail_rsc->allowed_nodes);
3690 }
3692 g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3693}
3694
3703static void
3704unpack_failure_handling(struct action_history *history,
3705 enum action_fail_response *on_fail,
3706 enum rsc_role_e *fail_role)
3707{
3708 xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3709 history->interval_ms, true);
3710
3711 GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3712 history->task,
3713 history->interval_ms, config);
3714
3715 const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3716
3717 *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3718 history->interval_ms, on_fail_str);
3719 *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3720 meta);
3721 g_hash_table_destroy(meta);
3722}
3723
3734static void
3735unpack_rsc_op_failure(struct action_history *history,
3736 enum action_fail_response config_on_fail,
3737 enum rsc_role_e fail_role, xmlNode **last_failure,
3738 enum action_fail_response *on_fail)
3739{
3740 bool is_probe = false;
3741 char *last_change_s = NULL;
3742
3743 *last_failure = history->xml;
3744
3745 is_probe = pcmk_xe_is_probe(history->xml);
3746 last_change_s = last_change_str(history->xml);
3747
3748 if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3749 && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3750 crm_trace("Unexpected result (%s%s%s) was recorded for "
3751 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3752 services_ocf_exitcode_str(history->exit_status),
3753 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3754 pcmk__s(history->exit_reason, ""),
3755 (is_probe? "probe" : history->task), history->rsc->id,
3756 pcmk__node_name(history->node), last_change_s,
3757 history->exit_status, history->id);
3758 } else {
3759 pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
3760 "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3761 services_ocf_exitcode_str(history->exit_status),
3762 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3763 pcmk__s(history->exit_reason, ""),
3764 (is_probe? "probe" : history->task), history->rsc->id,
3765 pcmk__node_name(history->node), last_change_s,
3766 history->exit_status, history->id);
3767
3768 if (is_probe && (history->exit_status != PCMK_OCF_OK)
3769 && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3770 && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3771
3772 /* A failed (not just unexpected) probe result could mean the user
3773 * didn't know resources will be probed even where they can't run.
3774 */
3775 crm_notice("If it is not possible for %s to run on %s, see "
3776 "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3777 "constraints",
3778 history->rsc->id, pcmk__node_name(history->node));
3779 }
3780
3781 record_failed_op(history);
3782 }
3783
3784 free(last_change_s);
3785
3786 if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3787 pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3788 pcmk_on_fail_text(*on_fail),
3789 pcmk_on_fail_text(config_on_fail), history->key);
3790 *on_fail = config_on_fail;
3791 }
3792
3793 if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3794 resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3795 "__stop_fail__", history->rsc->cluster);
3796
3797 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3798 unpack_migrate_to_failure(history);
3799
3800 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3801 unpack_migrate_from_failure(history);
3802
3803 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3804 history->rsc->role = pcmk_role_promoted;
3805
3806 } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3807 if (config_on_fail == pcmk_on_fail_block) {
3808 history->rsc->role = pcmk_role_promoted;
3810 "demote with " PCMK_META_ON_FAIL "=block");
3811
3812 } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3813 history->rsc->role = pcmk_role_stopped;
3814
3815 } else {
3816 /* Staying in the promoted role would put the scheduler and
3817 * controller into a loop. Setting the role to unpromoted is not
3818 * dangerous because the resource will be stopped as part of
3819 * recovery, and any promotion will be ordered after that stop.
3820 */
3821 history->rsc->role = pcmk_role_unpromoted;
3822 }
3823 }
3824
3825 if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3826 /* leave stopped */
3827 pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3828 history->rsc->role = pcmk_role_stopped;
3829
3830 } else if (history->rsc->role < pcmk_role_started) {
3831 pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3832 set_active(history->rsc);
3833 }
3834
3835 pcmk__rsc_trace(history->rsc,
3836 "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3837 history->rsc->id, pcmk_role_text(history->rsc->role),
3838 pcmk__btoa(history->node->details->unclean),
3839 pcmk_on_fail_text(config_on_fail),
3840 pcmk_role_text(fail_role));
3841
3842 if ((fail_role != pcmk_role_started)
3843 && (history->rsc->next_role < fail_role)) {
3844 pe__set_next_role(history->rsc, fail_role, "failure");
3845 }
3846
3847 if (fail_role == pcmk_role_stopped) {
3848 ban_from_all_nodes(history->rsc);
3849 }
3850}
3851
3861static void
3862block_if_unrecoverable(struct action_history *history)
3863{
3864 char *last_change_s = NULL;
3865
3866 if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3867 return; // All actions besides stop are always recoverable
3868 }
3869 if (pe_can_fence(history->node->details->data_set, history->node)) {
3870 return; // Failed stops are recoverable via fencing
3871 }
3872
3873 last_change_s = last_change_str(history->xml);
3874 pcmk__sched_err("No further recovery can be attempted for %s "
3875 "because %s on %s failed (%s%s%s) at %s "
3876 CRM_XS " rc=%d id=%s",
3877 history->rsc->id, history->task,
3878 pcmk__node_name(history->node),
3879 services_ocf_exitcode_str(history->exit_status),
3880 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3881 pcmk__s(history->exit_reason, ""),
3882 last_change_s, history->exit_status, history->id);
3883
3884 free(last_change_s);
3885
3888}
3889
3899static inline void
3900remap_because(struct action_history *history, const char **why, int value,
3901 const char *reason)
3902{
3903 if (history->execution_status != value) {
3904 history->execution_status = value;
3905 *why = reason;
3906 }
3907}
3908
3931static void
3932remap_operation(struct action_history *history,
3933 enum action_fail_response *on_fail, bool expired)
3934{
3935 bool is_probe = false;
3936 int orig_exit_status = history->exit_status;
3937 int orig_exec_status = history->execution_status;
3938 const char *why = NULL;
3939 const char *task = history->task;
3940
3941 // Remap degraded results to their successful counterparts
3942 history->exit_status = pcmk__effective_rc(history->exit_status);
3943 if (history->exit_status != orig_exit_status) {
3944 why = "degraded result";
3945 if (!expired && (!history->node->details->shutdown
3946 || history->node->details->online)) {
3947 record_failed_op(history);
3948 }
3949 }
3950
3951 if (!pcmk__is_bundled(history->rsc)
3952 && pcmk_xe_mask_probe_failure(history->xml)
3953 && ((history->execution_status != PCMK_EXEC_DONE)
3954 || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3955 history->execution_status = PCMK_EXEC_DONE;
3956 history->exit_status = PCMK_OCF_NOT_RUNNING;
3957 why = "equivalent probe result";
3958 }
3959
3960 /* If the executor reported an execution status of anything but done or
3961 * error, consider that final. But for done or error, we know better whether
3962 * it should be treated as a failure or not, because we know the expected
3963 * result.
3964 */
3965 switch (history->execution_status) {
3966 case PCMK_EXEC_DONE:
3967 case PCMK_EXEC_ERROR:
3968 break;
3969
3970 // These should be treated as node-fatal
3973 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3974 "node-fatal error");
3975 goto remap_done;
3976
3977 default:
3978 goto remap_done;
3979 }
3980
3981 is_probe = pcmk_xe_is_probe(history->xml);
3982 if (is_probe) {
3983 task = "probe";
3984 }
3985
3986 if (history->expected_exit_status < 0) {
3987 /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3988 * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3989 * expected exit status in the transition key, which (along with the
3990 * similar case of a corrupted transition key in the CIB) will be
3991 * reported to this function as -1. Pacemaker 2.0+ does not support
3992 * rolling upgrades from those versions or processing of saved CIB files
3993 * from those versions, so we do not need to care much about this case.
3994 */
3995 remap_because(history, &why, PCMK_EXEC_ERROR,
3996 "obsolete history format");
3997 pcmk__config_warn("Expected result not found for %s on %s "
3998 "(corrupt or obsolete CIB?)",
3999 history->key, pcmk__node_name(history->node));
4000
4001 } else if (history->exit_status == history->expected_exit_status) {
4002 remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
4003
4004 } else {
4005 remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
4006 pcmk__rsc_debug(history->rsc,
4007 "%s on %s: expected %d (%s), got %d (%s%s%s)",
4008 history->key, pcmk__node_name(history->node),
4009 history->expected_exit_status,
4010 services_ocf_exitcode_str(history->expected_exit_status),
4011 history->exit_status,
4012 services_ocf_exitcode_str(history->exit_status),
4013 (pcmk__str_empty(history->exit_reason)? "" : ": "),
4014 pcmk__s(history->exit_reason, ""));
4015 }
4016
4017 switch (history->exit_status) {
4018 case PCMK_OCF_OK:
4019 if (is_probe
4020 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
4021 char *last_change_s = last_change_str(history->xml);
4022
4023 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4024 pcmk__rsc_info(history->rsc,
4025 "Probe found %s active on %s at %s",
4026 history->rsc->id, pcmk__node_name(history->node),
4027 last_change_s);
4028 free(last_change_s);
4029 }
4030 break;
4031
4033 if (is_probe
4034 || (history->expected_exit_status == history->exit_status)
4035 || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
4036
4037 /* For probes, recurring monitors for the Stopped role, and
4038 * unmanaged resources, "not running" is not considered a
4039 * failure.
4040 */
4041 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4042 history->rsc->role = pcmk_role_stopped;
4043 *on_fail = pcmk_on_fail_ignore;
4045 "not running");
4046 }
4047 break;
4048
4050 if (is_probe
4051 && (history->exit_status != history->expected_exit_status)) {
4052 char *last_change_s = last_change_str(history->xml);
4053
4054 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4055 pcmk__rsc_info(history->rsc,
4056 "Probe found %s active and promoted on %s at %s",
4057 history->rsc->id,
4058 pcmk__node_name(history->node), last_change_s);
4059 free(last_change_s);
4060 }
4061 if (!expired
4062 || (history->exit_status == history->expected_exit_status)) {
4063 history->rsc->role = pcmk_role_promoted;
4064 }
4065 break;
4066
4068 if (!expired) {
4069 history->rsc->role = pcmk_role_promoted;
4070 }
4071 remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4072 break;
4073
4075 remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4076 break;
4077
4079 {
4080 guint interval_ms = 0;
4082 &interval_ms);
4083
4084 if (interval_ms == 0) {
4085 if (!expired) {
4086 block_if_unrecoverable(history);
4087 }
4088 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4089 "exit status");
4090 } else {
4091 remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4092 "exit status");
4093 }
4094 }
4095 break;
4096
4100 if (!expired) {
4101 block_if_unrecoverable(history);
4102 }
4103 remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4104 break;
4105
4106 default:
4107 if (history->execution_status == PCMK_EXEC_DONE) {
4108 char *last_change_s = last_change_str(history->xml);
4109
4110 crm_info("Treating unknown exit status %d from %s of %s "
4111 "on %s at %s as failure",
4112 history->exit_status, task, history->rsc->id,
4113 pcmk__node_name(history->node), last_change_s);
4114 remap_because(history, &why, PCMK_EXEC_ERROR,
4115 "unknown exit status");
4116 free(last_change_s);
4117 }
4118 break;
4119 }
4120
4121remap_done:
4122 if (why != NULL) {
4123 pcmk__rsc_trace(history->rsc,
4124 "Remapped %s result from [%s: %s] to [%s: %s] "
4125 "because of %s",
4126 history->key, pcmk_exec_status_str(orig_exec_status),
4127 crm_exit_str(orig_exit_status),
4128 pcmk_exec_status_str(history->execution_status),
4129 crm_exit_str(history->exit_status), why);
4130 }
4131}
4132
4133// return TRUE if start or monitor last failure but parameters changed
4134static bool
4135should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4136 pcmk_resource_t *rsc, pcmk_node_t *node)
4137{
4140 /* We haven't allocated resources yet, so we can't reliably
4141 * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4142 * When that's needed, defer the check until later.
4143 */
4145 rsc->cluster);
4146
4147 } else {
4148 pcmk__op_digest_t *digest_data = NULL;
4149
4150 digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4151 rsc->cluster);
4152 switch (digest_data->rc) {
4154 crm_trace("Resource %s history entry %s on %s"
4155 " has no digest to compare",
4156 rsc->id, pcmk__xe_history_key(xml_op),
4157 node->details->id);
4158 break;
4159 case pcmk__digest_match:
4160 break;
4161 default:
4162 return TRUE;
4163 }
4164 }
4165 }
4166 return FALSE;
4167}
4168
4169// Order action after fencing of remote node, given connection rsc
4170static void
4171order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4173{
4174 pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4175
4176 if (remote_node) {
4177 pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4178 FALSE, scheduler);
4179
4181 }
4182}
4183
4184static bool
4185should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4186 guint interval_ms, bool is_last_failure)
4187{
4188 /* Clearing failures of recurring monitors has special concerns. The
4189 * executor reports only changes in the monitor result, so if the
4190 * monitor is still active and still getting the same failure result,
4191 * that will go undetected after the failure is cleared.
4192 *
4193 * Also, the operation history will have the time when the recurring
4194 * monitor result changed to the given code, not the time when the
4195 * result last happened.
4196 *
4197 * @TODO We probably should clear such failures only when the failure
4198 * timeout has passed since the last occurrence of the failed result.
4199 * However we don't record that information. We could maybe approximate
4200 * that by clearing only if there is a more recent successful monitor or
4201 * stop result, but we don't even have that information at this point
4202 * since we are still unpacking the resource's operation history.
4203 *
4204 * This is especially important for remote connection resources with a
4205 * reconnect interval, so in that case, we skip clearing failures
4206 * if the remote node hasn't been fenced.
4207 */
4208 if (rsc->remote_reconnect_ms
4210 && (interval_ms != 0)
4211 && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4212
4213 pcmk_node_t *remote_node = pcmk_find_node(rsc->cluster, rsc->id);
4214
4215 if (remote_node && !remote_node->details->remote_was_fenced) {
4216 if (is_last_failure) {
4217 crm_info("Waiting to clear monitor failure for remote node %s"
4218 " until fencing has occurred", rsc->id);
4219 }
4220 return TRUE;
4221 }
4222 }
4223 return FALSE;
4224}
4225
4244static bool
4245check_operation_expiry(struct action_history *history)
4246{
4247 bool expired = false;
4248 bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4249 time_t last_run = 0;
4250 int unexpired_fail_count = 0;
4251 const char *clear_reason = NULL;
4252
4253 if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4254 pcmk__rsc_trace(history->rsc,
4255 "Resource history entry %s on %s is not expired: "
4256 "Not Installed does not expire",
4257 history->id, pcmk__node_name(history->node));
4258 return false; // "Not installed" must always be cleared manually
4259 }
4260
4261 if ((history->rsc->failure_timeout > 0)
4263 &last_run) == 0)) {
4264
4265 /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4266 * timestamp
4267 */
4268
4269 time_t now = get_effective_time(history->rsc->cluster);
4270 time_t last_failure = 0;
4271
4272 // Is this particular operation history older than the failure timeout?
4273 if ((now >= (last_run + history->rsc->failure_timeout))
4274 && !should_ignore_failure_timeout(history->rsc, history->task,
4275 history->interval_ms,
4276 is_last_failure)) {
4277 expired = true;
4278 }
4279
4280 // Does the resource as a whole have an unexpired fail count?
4281 unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4282 &last_failure,
4284 history->xml);
4285
4286 // Update scheduler recheck time according to *last* failure
4287 crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4288 " last-failure@%lld",
4289 history->id, (long long) last_run, (expired? "" : "not "),
4290 (long long) now, unexpired_fail_count,
4291 history->rsc->failure_timeout, (long long) last_failure);
4292 last_failure += history->rsc->failure_timeout + 1;
4293 if (unexpired_fail_count && (now < last_failure)) {
4294 pe__update_recheck_time(last_failure, history->rsc->cluster,
4295 "fail count expiration");
4296 }
4297 }
4298
4299 if (expired) {
4300 if (pe_get_failcount(history->node, history->rsc, NULL,
4301 pcmk__fc_default, history->xml)) {
4302 // There is a fail count ignoring timeout
4303
4304 if (unexpired_fail_count == 0) {
4305 // There is no fail count considering timeout
4306 clear_reason = "it expired";
4307
4308 } else {
4309 /* This operation is old, but there is an unexpired fail count.
4310 * In a properly functioning cluster, this should only be
4311 * possible if this operation is not a failure (otherwise the
4312 * fail count should be expired too), so this is really just a
4313 * failsafe.
4314 */
4315 pcmk__rsc_trace(history->rsc,
4316 "Resource history entry %s on %s is not "
4317 "expired: Unexpired fail count",
4318 history->id, pcmk__node_name(history->node));
4319 expired = false;
4320 }
4321
4322 } else if (is_last_failure
4323 && (history->rsc->remote_reconnect_ms != 0)) {
4324 /* Clear any expired last failure when reconnect interval is set,
4325 * even if there is no fail count.
4326 */
4327 clear_reason = "reconnect interval is set";
4328 }
4329 }
4330
4331 if (!expired && is_last_failure
4332 && should_clear_for_param_change(history->xml, history->task,
4333 history->rsc, history->node)) {
4334 clear_reason = "resource parameters have changed";
4335 }
4336
4337 if (clear_reason != NULL) {
4338 pcmk_action_t *clear_op = NULL;
4339
4340 // Schedule clearing of the fail count
4341 clear_op = pe__clear_failcount(history->rsc, history->node,
4342 clear_reason, history->rsc->cluster);
4343
4344 if (pcmk_is_set(history->rsc->cluster->flags,
4346 && (history->rsc->remote_reconnect_ms != 0)) {
4347 /* If we're clearing a remote connection due to a reconnect
4348 * interval, we want to wait until any scheduled fencing
4349 * completes.
4350 *
4351 * We could limit this to remote_node->details->unclean, but at
4352 * this point, that's always true (it won't be reliable until
4353 * after unpack_node_history() is done).
4354 */
4355 crm_info("Clearing %s failure will wait until any scheduled "
4356 "fencing of %s completes",
4357 history->task, history->rsc->id);
4358 order_after_remote_fencing(clear_op, history->rsc,
4359 history->rsc->cluster);
4360 }
4361 }
4362
4363 if (expired && (history->interval_ms == 0)
4364 && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4365 switch (history->exit_status) {
4366 case PCMK_OCF_OK:
4369 case PCMK_OCF_DEGRADED:
4371 // Don't expire probes that return these values
4372 pcmk__rsc_trace(history->rsc,
4373 "Resource history entry %s on %s is not "
4374 "expired: Probe result",
4375 history->id, pcmk__node_name(history->node));
4376 expired = false;
4377 break;
4378 }
4379 }
4380
4381 return expired;
4382}
4383
4384int
4385pe__target_rc_from_xml(const xmlNode *xml_op)
4386{
4387 int target_rc = 0;
4388 const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4389
4390 if (key == NULL) {
4391 return -1;
4392 }
4393 decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4394 return target_rc;
4395}
4396
4406static void
4407update_resource_state(struct action_history *history, int exit_status,
4408 const xmlNode *last_failure,
4409 enum action_fail_response *on_fail)
4410{
4411 bool clear_past_failure = false;
4412
4413 if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4414 || (!pcmk__is_bundled(history->rsc)
4415 && pcmk_xe_mask_probe_failure(history->xml))) {
4416 history->rsc->role = pcmk_role_stopped;
4417
4418 } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4419 clear_past_failure = true;
4420
4421 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4422 pcmk__str_none)) {
4423 if ((last_failure != NULL)
4424 && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4425 pcmk__str_none)) {
4426 clear_past_failure = true;
4427 }
4428 if (history->rsc->role < pcmk_role_started) {
4429 set_active(history->rsc);
4430 }
4431
4432 } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4433 history->rsc->role = pcmk_role_started;
4434 clear_past_failure = true;
4435
4436 } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4437 history->rsc->role = pcmk_role_stopped;
4438 clear_past_failure = true;
4439
4440 } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4441 pcmk__str_none)) {
4442 history->rsc->role = pcmk_role_promoted;
4443 clear_past_failure = true;
4444
4445 } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4446 pcmk__str_none)) {
4447 if (*on_fail == pcmk_on_fail_demote) {
4448 /* Demote clears an error only if
4449 * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4450 */
4451 clear_past_failure = true;
4452 }
4453 history->rsc->role = pcmk_role_unpromoted;
4454
4455 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4456 pcmk__str_none)) {
4457 history->rsc->role = pcmk_role_started;
4458 clear_past_failure = true;
4459
4460 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4461 pcmk__str_none)) {
4462 unpack_migrate_to_success(history);
4463
4464 } else if (history->rsc->role < pcmk_role_started) {
4465 pcmk__rsc_trace(history->rsc, "%s active on %s",
4466 history->rsc->id, pcmk__node_name(history->node));
4467 set_active(history->rsc);
4468 }
4469
4470 if (!clear_past_failure) {
4471 return;
4472 }
4473
4474 switch (*on_fail) {
4475 case pcmk_on_fail_stop:
4476 case pcmk_on_fail_ban:
4479 pcmk__rsc_trace(history->rsc,
4480 "%s (%s) is not cleared by a completed %s",
4481 history->rsc->id, pcmk_on_fail_text(*on_fail),
4482 history->task);
4483 break;
4484
4485 case pcmk_on_fail_block:
4490 *on_fail = pcmk_on_fail_ignore;
4492 "clear past failures");
4493 break;
4494
4496 if (history->rsc->remote_reconnect_ms == 0) {
4497 /* With no reconnect interval, the connection is allowed to
4498 * start again after the remote node is fenced and
4499 * completely stopped. (With a reconnect interval, we wait
4500 * for the failure to be cleared entirely before attempting
4501 * to reconnect.)
4502 */
4503 *on_fail = pcmk_on_fail_ignore;
4505 "clear past failures and reset remote");
4506 }
4507 break;
4508 }
4509}
4510
4519static inline bool
4520can_affect_state(struct action_history *history)
4521{
4522#if 0
4523 /* @COMPAT It might be better to parse only actions we know we're interested
4524 * in, rather than exclude a couple we don't. However that would be a
4525 * behavioral change that should be done at a major or minor series release.
4526 * Currently, unknown operations can affect whether a resource is considered
4527 * active and/or failed.
4528 */
4529 return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4533 "asyncmon", NULL);
4534#else
4535 return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4536 PCMK_ACTION_META_DATA, NULL);
4537#endif
4538}
4539
4548static int
4549unpack_action_result(struct action_history *history)
4550{
4551 if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4552 &(history->execution_status)) < 0)
4553 || (history->execution_status < PCMK_EXEC_PENDING)
4554 || (history->execution_status > PCMK_EXEC_MAX)
4555 || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4556 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4557 "with invalid " PCMK__XA_OP_STATUS " '%s'",
4558 history->id, history->rsc->id,
4559 pcmk__node_name(history->node),
4560 pcmk__s(crm_element_value(history->xml,
4562 ""));
4563 return pcmk_rc_unpack_error;
4564 }
4565 if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4566 &(history->exit_status)) < 0)
4567 || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4568#if 0
4569 /* @COMPAT We should ignore malformed entries, but since that would
4570 * change behavior, it should be done at a major or minor series
4571 * release.
4572 */
4573 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4574 "with invalid " PCMK__XA_RC_CODE " '%s'",
4575 history->id, history->rsc->id,
4576 pcmk__node_name(history->node),
4577 pcmk__s(crm_element_value(history->xml,
4579 ""));
4580 return pcmk_rc_unpack_error;
4581#else
4582 history->exit_status = CRM_EX_ERROR;
4583#endif
4584 }
4585 history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4586 return pcmk_rc_ok;
4587}
4588
4599static int
4600process_expired_result(struct action_history *history, int orig_exit_status)
4601{
4602 if (!pcmk__is_bundled(history->rsc)
4603 && pcmk_xe_mask_probe_failure(history->xml)
4604 && (orig_exit_status != history->expected_exit_status)) {
4605
4606 if (history->rsc->role <= pcmk_role_stopped) {
4607 history->rsc->role = pcmk_role_unknown;
4608 }
4609 crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4610 "Masked failure expired",
4611 history->id, history->rsc->id,
4612 pcmk__node_name(history->node));
4613 return pcmk_rc_ok;
4614 }
4615
4616 if (history->exit_status == history->expected_exit_status) {
4617 return pcmk_rc_undetermined; // Only failures expire
4618 }
4619
4620 if (history->interval_ms == 0) {
4621 crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4622 "Expired failure",
4623 history->id, history->task, history->rsc->id,
4624 pcmk__node_name(history->node));
4625 return pcmk_rc_ok;
4626 }
4627
4628 if (history->node->details->online && !history->node->details->unclean) {
4629 /* Reschedule the recurring action. schedule_cancel() won't work at
4630 * this stage, so as a hacky workaround, forcibly change the restart
4631 * digest so pcmk__check_action_config() does what we want later.
4632 *
4633 * @TODO We should skip this if there is a newer successful monitor.
4634 * Also, this causes rescheduling only if the history entry
4635 * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4636 * scheduler regression test doesn't, but that may not be a
4637 * realistic scenario in production).
4638 */
4639 crm_notice("Rescheduling %s-interval %s of %s on %s "
4640 "after failure expired",
4641 pcmk__readable_interval(history->interval_ms), history->task,
4642 history->rsc->id, pcmk__node_name(history->node));
4644 "calculated-failure-timeout");
4645 return pcmk_rc_ok;
4646 }
4647
4648 return pcmk_rc_undetermined;
4649}
4650
4660static void
4661mask_probe_failure(struct action_history *history, int orig_exit_status,
4662 const xmlNode *last_failure,
4663 enum action_fail_response *on_fail)
4664{
4665 pcmk_resource_t *ban_rsc = history->rsc;
4666
4667 if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4668 ban_rsc = uber_parent(history->rsc);
4669 }
4670
4671 crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4672 services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4673 pcmk__node_name(history->node));
4674 update_resource_state(history, history->expected_exit_status, last_failure,
4675 on_fail);
4676 crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
4677
4678 record_failed_op(history);
4679 resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4680 "masked-probe-failure", history->rsc->cluster);
4681}
4682
4695static bool
4696failure_is_newer(const struct action_history *history,
4697 const xmlNode *last_failure)
4698{
4699 guint failure_interval_ms = 0U;
4700 long long failure_change = 0LL;
4701 long long this_change = 0LL;
4702
4703 if (last_failure == NULL) {
4704 return false; // Resource has no last_failure entry
4705 }
4706
4707 if (!pcmk__str_eq(history->task,
4708 crm_element_value(last_failure, PCMK_XA_OPERATION),
4709 pcmk__str_none)) {
4710 return false; // last_failure is for different action
4711 }
4712
4713 if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4714 &failure_interval_ms) != pcmk_ok)
4715 || (history->interval_ms != failure_interval_ms)) {
4716 return false; // last_failure is for action with different interval
4717 }
4718
4720 &this_change, 0LL) != pcmk_rc_ok)
4721 || (pcmk__scan_ll(crm_element_value(last_failure,
4723 &failure_change, 0LL) != pcmk_rc_ok)
4724 || (failure_change < this_change)) {
4725 return false; // Failure is not known to be newer
4726 }
4727
4728 return true;
4729}
4730
4738static void
4739process_pending_action(struct action_history *history,
4740 const xmlNode *last_failure)
4741{
4742 /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4743 * and there might be a RSC_monitor_INTERVAL entry with the last successful
4744 * or pending result.
4745 *
4746 * If last_failure contains the failure of the pending recurring monitor
4747 * we're processing here, and is newer, the action is no longer pending.
4748 * (Pending results have call ID -1, which sorts last, so the last failure
4749 * if any should be known.)
4750 */
4751 if (failure_is_newer(history, last_failure)) {
4752 return;
4753 }
4754
4755 if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4757 set_active(history->rsc);
4758
4759 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4760 history->rsc->role = pcmk_role_promoted;
4761
4762 } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4763 && history->node->details->unclean) {
4764 /* A migrate_to action is pending on a unclean source, so force a stop
4765 * on the target.
4766 */
4767 const char *migrate_target = NULL;
4768 pcmk_node_t *target = NULL;
4769
4770 migrate_target = crm_element_value(history->xml,
4772 target = pcmk_find_node(history->rsc->cluster, migrate_target);
4773 if (target != NULL) {
4774 stop_action(history->rsc, target, FALSE);
4775 }
4776 }
4777
4778 if (history->rsc->pending_task != NULL) {
4779 /* There should never be multiple pending actions, but as a failsafe,
4780 * just remember the first one processed for display purposes.
4781 */
4782 return;
4783 }
4784
4785 if (pcmk_is_probe(history->task, history->interval_ms)) {
4786 /* Pending probes are currently never displayed, even if pending
4787 * operations are requested. If we ever want to change that,
4788 * enable the below and the corresponding part of
4789 * native.c:native_pending_task().
4790 */
4791#if 0
4792 history->rsc->pending_task = strdup("probe");
4793 history->rsc->pending_node = history->node;
4794#endif
4795 } else {
4796 history->rsc->pending_task = strdup(history->task);
4797 history->rsc->pending_node = history->node;
4798 }
4799}
4800
4801static void
4802unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4803 xmlNode **last_failure, enum action_fail_response *on_fail)
4804{
4805 int old_rc = 0;
4806 bool expired = false;
4807 pcmk_resource_t *parent = rsc;
4808 enum rsc_role_e fail_role = pcmk_role_unknown;
4809 enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4810
4811 struct action_history history = {
4812 .rsc = rsc,
4813 .node = node,
4814 .xml = xml_op,
4815 .execution_status = PCMK_EXEC_UNKNOWN,
4816 };
4817
4818 CRM_CHECK(rsc && node && xml_op, return);
4819
4820 history.id = pcmk__xe_id(xml_op);
4821 if (history.id == NULL) {
4822 pcmk__config_err("Ignoring resource history entry for %s on %s "
4823 "without ID", rsc->id, pcmk__node_name(node));
4824 return;
4825 }
4826
4827 // Task and interval
4828 history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4829 if (history.task == NULL) {
4830 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4831 "without " PCMK_XA_OPERATION,
4832 history.id, rsc->id, pcmk__node_name(node));
4833 return;
4834 }
4835 crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4836 if (!can_affect_state(&history)) {
4837 pcmk__rsc_trace(rsc,
4838 "Ignoring resource history entry %s for %s on %s "
4839 "with irrelevant action '%s'",
4840 history.id, rsc->id, pcmk__node_name(node),
4841 history.task);
4842 return;
4843 }
4844
4845 if (unpack_action_result(&history) != pcmk_rc_ok) {
4846 return; // Error already logged
4847 }
4848
4849 history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4850 history.key = pcmk__xe_history_key(xml_op);
4851 crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4852
4853 pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4854 history.id, history.task, history.call_id,
4855 pcmk__node_name(node),
4856 pcmk_exec_status_str(history.execution_status),
4857 crm_exit_str(history.exit_status));
4858
4859 if (node->details->unclean) {
4860 pcmk__rsc_trace(rsc,
4861 "%s is running on %s, which is unclean (further action "
4862 "depends on value of stop's on-fail attribute)",
4863 rsc->id, pcmk__node_name(node));
4864 }
4865
4866 expired = check_operation_expiry(&history);
4867 old_rc = history.exit_status;
4868
4869 remap_operation(&history, on_fail, expired);
4870
4871 if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4872 goto done;
4873 }
4874
4875 if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4876 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4877 goto done;
4878 }
4879
4880 if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4881 parent = uber_parent(rsc);
4882 }
4883
4884 switch (history.execution_status) {
4885 case PCMK_EXEC_PENDING:
4886 process_pending_action(&history, *last_failure);
4887 goto done;
4888
4889 case PCMK_EXEC_DONE:
4890 update_resource_state(&history, history.exit_status, *last_failure,
4891 on_fail);
4892 goto done;
4893
4895 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4896 if (failure_strategy == pcmk_on_fail_ignore) {
4897 crm_warn("Cannot ignore failed %s of %s on %s: "
4898 "Resource agent doesn't exist "
4899 CRM_XS " status=%d rc=%d id=%s",
4900 history.task, rsc->id, pcmk__node_name(node),
4901 history.execution_status, history.exit_status,
4902 history.id);
4903 /* Also for printing it as "FAILED" by marking it as
4904 * pcmk_rsc_failed later
4905 */
4906 *on_fail = pcmk_on_fail_ban;
4907 }
4909 "hard-error", rsc->cluster);
4910 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4911 last_failure, on_fail);
4912 goto done;
4913
4915 if (pcmk__is_pacemaker_remote_node(node)
4918 /* We should never get into a situation where a managed remote
4919 * connection resource is considered OK but a resource action
4920 * behind the connection gets a "not connected" status. But as a
4921 * fail-safe in case a bug or unusual circumstances do lead to
4922 * that, ensure the remote connection is considered failed.
4923 */
4926 }
4927 break; // Not done, do error handling
4928
4929 case PCMK_EXEC_ERROR:
4932 case PCMK_EXEC_TIMEOUT:
4934 case PCMK_EXEC_INVALID:
4935 break; // Not done, do error handling
4936
4937 default: // No other value should be possible at this point
4938 break;
4939 }
4940
4941 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4942 if ((failure_strategy == pcmk_on_fail_ignore)
4943 || ((failure_strategy == pcmk_on_fail_restart_container)
4944 && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4945
4946 char *last_change_s = last_change_str(xml_op);
4947
4948 crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4949 CRM_XS " %s",
4950 history.task, services_ocf_exitcode_str(history.exit_status),
4951 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4952 pcmk__s(history.exit_reason, ""), rsc->id,
4953 pcmk__node_name(node), last_change_s, history.id);
4954 free(last_change_s);
4955
4956 update_resource_state(&history, history.expected_exit_status,
4957 *last_failure, on_fail);
4958 crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname);
4960
4961 record_failed_op(&history);
4962
4963 if ((failure_strategy == pcmk_on_fail_restart_container)
4964 && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4965 *on_fail = failure_strategy;
4966 }
4967
4968 } else {
4969 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4970 last_failure, on_fail);
4971
4972 if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4973 uint8_t log_level = LOG_ERR;
4974
4975 if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4976 log_level = LOG_NOTICE;
4977 }
4978 do_crm_log(log_level,
4979 "Preventing %s from restarting on %s because "
4980 "of hard failure (%s%s%s) " CRM_XS " %s",
4981 parent->id, pcmk__node_name(node),
4982 services_ocf_exitcode_str(history.exit_status),
4983 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4984 pcmk__s(history.exit_reason, ""), history.id);
4986 "hard-error", rsc->cluster);
4987
4988 } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4989 pcmk__sched_err("Preventing %s from restarting anywhere because "
4990 "of fatal failure (%s%s%s) " CRM_XS " %s",
4991 parent->id,
4992 services_ocf_exitcode_str(history.exit_status),
4993 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4994 pcmk__s(history.exit_reason, ""), history.id);
4996 "fatal-error", rsc->cluster);
4997 }
4998 }
4999
5000done:
5001 pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
5002 rsc->id, pcmk__node_name(node), history.id,
5003 pcmk_role_text(rsc->role),
5005}
5006
5007static void
5008add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
5010{
5011 const char *cluster_name = NULL;
5012
5013 pe_rule_eval_data_t rule_data = {
5014 .node_hash = NULL,
5015 .now = scheduler->now,
5016 .match_data = NULL,
5017 .rsc_data = NULL,
5018 .op_data = NULL
5019 };
5020
5022 CRM_ATTR_UNAME, node->details->uname);
5023
5025 if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
5026 scheduler->dc_node = node;
5027 node->details->is_dc = TRUE;
5030 } else {
5033 }
5034
5035 cluster_name = g_hash_table_lookup(scheduler->config_hash,
5037 if (cluster_name) {
5039 cluster_name);
5040 }
5041
5043 node->details->attrs, NULL, overwrite,
5044 scheduler);
5045
5047 node->details->utilization, NULL,
5048 FALSE, scheduler);
5049
5050 if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5051 pcmk__rsc_node_current) == NULL) {
5052 const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5054
5055 if (site_name) {
5057 CRM_ATTR_SITE_NAME, site_name);
5058
5059 } else if (cluster_name) {
5060 /* Default to cluster-name if unset */
5062 CRM_ATTR_SITE_NAME, cluster_name);
5063 }
5064 }
5065}
5066
5067static GList *
5068extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5069{
5070 int counter = -1;
5071 int stop_index = -1;
5072 int start_index = -1;
5073
5074 xmlNode *rsc_op = NULL;
5075
5076 GList *gIter = NULL;
5077 GList *op_list = NULL;
5078 GList *sorted_op_list = NULL;
5079
5080 /* extract operations */
5081 op_list = NULL;
5082 sorted_op_list = NULL;
5083
5084 for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
5085 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5086
5087 if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5088 crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
5089 crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
5090 op_list = g_list_prepend(op_list, rsc_op);
5091 }
5092 }
5093
5094 if (op_list == NULL) {
5095 /* if there are no operations, there is nothing to do */
5096 return NULL;
5097 }
5098
5099 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5100
5101 /* create active recurring operations as optional */
5102 if (active_filter == FALSE) {
5103 return sorted_op_list;
5104 }
5105
5106 op_list = NULL;
5107
5108 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5109
5110 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5111 xmlNode *rsc_op = (xmlNode *) gIter->data;
5112
5113 counter++;
5114
5115 if (start_index < stop_index) {
5116 crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5117 break;
5118
5119 } else if (counter < start_index) {
5120 crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5121 continue;
5122 }
5123 op_list = g_list_append(op_list, rsc_op);
5124 }
5125
5126 g_list_free(sorted_op_list);
5127 return op_list;
5128}
5129
5130GList *
5131find_operations(const char *rsc, const char *node, gboolean active_filter,
5133{
5134 GList *output = NULL;
5135 GList *intermediate = NULL;
5136
5137 xmlNode *tmp = NULL;
5139 NULL, NULL);
5140
5141 pcmk_node_t *this_node = NULL;
5142
5143 xmlNode *node_state = NULL;
5144
5145 CRM_CHECK(status != NULL, return NULL);
5146
5147 for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
5148 node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5149
5150 if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
5151 const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5152
5153 if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5154 continue;
5155 }
5156
5157 this_node = pcmk_find_node(scheduler, uname);
5158 if(this_node == NULL) {
5159 CRM_LOG_ASSERT(this_node != NULL);
5160 continue;
5161
5162 } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5163 determine_remote_online_status(scheduler, this_node);
5164
5165 } else {
5166 determine_online_status(node_state, this_node, scheduler);
5167 }
5168
5169 if (this_node->details->online
5171 /* offline nodes run no resources...
5172 * unless stonith is enabled in which case we need to
5173 * make sure rsc start events happen after the stonith
5174 */
5175 xmlNode *lrm_rsc = NULL;
5176
5177 tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5178 NULL);
5180 NULL);
5181
5182 for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
5183 lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5184
5185 if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5186 const char *rsc_id = crm_element_value(lrm_rsc,
5187 PCMK_XA_ID);
5188
5189 if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5190 continue;
5191 }
5192
5193 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5194 output = g_list_concat(output, intermediate);
5195 }
5196 }
5197 }
5198 }
5199 }
5200
5201 return output;
5202}
@ pcmk__ar_first_implies_then
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Check whether an action history entry represents a maskable probe.
Definition probes.c:69
#define PCMK_ACTION_STOP
Definition actions.h:75
const char * pcmk_on_fail_text(enum action_fail_response on_fail)
Get string equivalent of a failure handling type.
Definition actions.c:147
bool pcmk_is_probe(const char *task, guint interval)
Check whether an action name and interval represent a probe.
Definition probes.c:30
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition actions.c:426
#define PCMK_ACTION_META_DATA
Definition actions.h:56
#define PCMK_ACTION_PROMOTE
Definition actions.h:66
#define PCMK_ACTION_START
Definition actions.h:72
action_fail_response
Definition actions.h:130
@ pcmk_on_fail_ban
Definition actions.h:150
@ pcmk_on_fail_fence_node
Definition actions.h:162
@ pcmk_on_fail_ignore
Definition actions.h:144
@ pcmk_on_fail_restart_container
Definition actions.h:167
@ pcmk_on_fail_demote
Definition actions.h:178
@ pcmk_on_fail_standby_node
Definition actions.h:159
@ pcmk_on_fail_block
Definition actions.h:153
@ pcmk_on_fail_reset_remote
Definition actions.h:175
@ pcmk_on_fail_stop
Definition actions.h:156
@ pcmk_on_fail_restart
Definition actions.h:147
#define PCMK_ACTION_MIGRATE_FROM
Definition actions.h:58
@ pcmk_action_optional
Definition actions.h:210
#define PCMK_ACTION_MIGRATE_TO
Definition actions.h:59
#define PCMK_ACTION_MONITOR
Definition actions.h:60
#define PCMK_ACTION_OFF
Definition actions.h:63
#define PCMK_ACTION_DEMOTE
Definition actions.h:49
#define PCMK_ACTION_NOTIFY
Definition actions.h:62
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Check whether an action history entry represents a probe.
Definition probes.c:45
#define pcmk__set_action_flags(action, flags_to_set)
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition actions.c:196
#define PCMK__ACTION_POWEROFF
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition attrs.c:118
const char * parent
Definition cib.c:27
const char * name
Definition cib.c:26
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
Definition clone.c:247
int pcmk__effective_rc(int rc)
Definition agents.c:72
#define pcmk__assert_alloc(nmemb, size)
Definition internal.h:297
Utility functions.
char int pcmk_parse_interval_spec(const char *input, guint *result_ms)
Parse milliseconds from a Pacemaker interval specification.
Definition strings.c:451
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
gboolean crm_is_true(const char *s)
Definition strings.c:488
int crm_str_to_boolean(const char *s, int *ret)
Definition strings.c:496
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition util.h:98
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
Definition complex.c:639
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition complex.c:1007
pcmk__cpg_host_t host
Definition cpg.c:4
enum crm_ais_msg_types type
Definition cpg.c:3
char uname[MAX_NAME]
Definition cpg.c:5
char data[0]
Definition cpg.c:10
uint32_t id
Definition cpg.c:0
A dumping ground.
#define CRMD_JOINSTATE_NACK
Definition crm.h:146
#define CRM_ATTR_IS_DC
Definition crm.h:103
#define CRM_ATTR_SITE_NAME
Definition crm.h:105
#define CRMD_JOINSTATE_DOWN
Definition crm.h:143
#define CRMD_JOINSTATE_PENDING
Definition crm.h:144
#define CRM_ATTR_KIND
Definition crm.h:101
#define CRM_ATTR_CLUSTER_NAME
Definition crm.h:104
#define CRM_ATTR_UNAME
Definition crm.h:99
#define CRM_ATTR_ID
Definition crm.h:100
#define CRMD_JOINSTATE_MEMBER
Definition crm.h:145
#define ST__LEVEL_MAX
#define ST__LEVEL_MIN
@ pcmk__digest_match
@ pcmk__digest_unknown
@ pcmk__fc_effective
@ pcmk__fc_default
@ pcmk__rsc_node_current
Where resource is running.
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
Definition iso8601.c:2075
const char * pcmk__readable_interval(guint interval_ms)
Definition iso8601.c:2134
#define CRM_TRACE_INIT_DATA(name)
Definition logging.h:143
#define crm_log_xml_info(xml, text)
Definition logging.h:408
#define crm_info(fmt, args...)
Definition logging.h:397
#define do_crm_log(level, fmt, args...)
Log a message.
Definition logging.h:181
#define crm_warn(fmt, args...)
Definition logging.h:392
#define CRM_XS
Definition logging.h:56
#define crm_log_xml_debug(xml, text)
Definition logging.h:409
#define CRM_LOG_ASSERT(expr)
Definition logging.h:228
#define crm_notice(fmt, args...)
Definition logging.h:395
#define CRM_CHECK(expr, failure_action)
Definition logging.h:245
#define crm_debug(fmt, args...)
Definition logging.h:400
#define crm_trace(fmt, args...)
Definition logging.h:402
#define pcmk__config_warn(fmt...)
#define pcmk__config_err(fmt...)
@ pcmk__wo_ping_node
@ pcmk__wo_poweroff
@ pcmk__wo_blind
@ pcmk__wo_rdisc_enabled
@ pcmk__wo_remove_after
#define pcmk__warn_once(wo_flag, fmt...)
pcmk_scheduler_t * scheduler
#define PCMK_NODE_ATTR_STANDBY
Definition nodes.h:31
@ node_ping
Definition nodes.h:42
@ pcmk_node_variant_remote
Definition nodes.h:40
@ pcmk_node_variant_cluster
Definition nodes.h:39
#define PCMK_NODE_ATTR_TERMINATE
Definition nodes.h:32
#define PCMK_NODE_ATTR_MAINTENANCE
Definition nodes.h:30
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition nvpair.c:446
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition nvpair.c:482
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition nvpair.c:539
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition nvpair.c:567
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition nvpair.c:301
#define PCMK_OPT_STOP_ALL_RESOURCES
Definition options.h:69
#define PCMK_META_INTERVAL
Definition options.h:91
#define PCMK_VALUE_FENCE_LEGACY
Definition options.h:224
#define PCMK_OPT_SYMMETRIC_CLUSTER
Definition options.h:72
#define PCMK_META_ON_FAIL
Definition options.h:98
#define PCMK_OPT_NODE_PENDING_TIMEOUT
Definition options.h:53
#define PCMK_OPT_STARTUP_FENCING
Definition options.h:63
#define PCMK_OPT_STOP_ORPHAN_RESOURCES
Definition options.h:71
#define PCMK_OPT_SHUTDOWN_LOCK_LIMIT
Definition options.h:61
#define PCMK_OPT_MAINTENANCE_MODE
Definition options.h:44
#define PCMK_OPT_STOP_ORPHAN_ACTIONS
Definition options.h:70
#define PCMK_OPT_NO_QUORUM_POLICY
Definition options.h:46
#define PCMK_OPT_PRIORITY_FENCING_DELAY
Definition options.h:58
#define PCMK_VALUE_OFFLINE
Definition options.h:183
#define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS
Definition options.h:137
#define PCMK_VALUE_TRUE
Definition options.h:215
#define PCMK_META_REMOTE_CONNECT_TIMEOUT
Definition options.h:107
#define PCMK_VALUE_DEMOTE
Definition options.h:145
#define PCMK_META_REMOTE_NODE
Definition options.h:108
#define PCMK_OPT_HAVE_WATCHDOG
Definition options.h:40
#define PCMK_VALUE_FREEZE
Definition options.h:155
#define PCMK_OPT_CONCURRENT_FENCING
Definition options.h:33
#define PCMK_OPT_START_FAILURE_IS_FATAL
Definition options.h:62
#define PCMK_OPT_PLACEMENT_STRATEGY
Definition options.h:57
#define PCMK_META_IS_MANAGED
Definition options.h:92
#define PCMK_OPT_STONITH_ENABLED
Definition options.h:65
#define PCMK_OPT_ENABLE_STARTUP_PROBES
Definition options.h:38
#define PCMK_VALUE_REMOTE
Definition options.h:198
#define PCMK_META_TARGET_ROLE
Definition options.h:113
#define PCMK_OPT_CLUSTER_NAME
Definition options.h:31
#define PCMK_VALUE_IGNORE
Definition options.h:161
#define PCMK_VALUE_MEMBER
Definition options.h:169
#define PCMK_META_REMOTE_PORT
Definition options.h:109
#define PCMK_VALUE_FALSE
Definition options.h:152
#define PCMK_OPT_STONITH_TIMEOUT
Definition options.h:67
#define PCMK_META_REMOTE_ALLOW_MIGRATE
Definition options.h:106
#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
Definition options.h:68
#define PCMK_META_REMOTE_ADDR
Definition options.h:105
#define PCMK_OPT_STONITH_ACTION
Definition options.h:64
#define PCMK_OPT_SHUTDOWN_LOCK
Definition options.h:60
#define PCMK_VALUE_STOP
Definition options.h:209
#define PCMK_VALUE_ONLINE
Definition options.h:184
const char * pcmk__cluster_option(GHashTable *options, const char *name)
Definition options.c:1412
#define PCMK__META_CONTAINER
#define PCMK__META_MIGRATE_SOURCE
#define PCMK__OPT_REMOVE_AFTER_STOP
void pcmk__validate_cluster_options(GHashTable *options)
Definition options.c:1558
#define PCMK__META_MIGRATE_TARGET
#define PCMK__VALUE_PING
unsigned int timeout
Definition pcmk_fence.c:32
const char * action
Definition pcmk_fence.c:30
pcmk__action_result_t result
Definition pcmk_fence.c:35
const char * target
Definition pcmk_fence.c:29
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
Definition pe_health.c:24
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
Definition utils.c:295
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
Definition utils.c:36
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
Definition failcounts.c:458
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition utils.c:89
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
Definition pe_actions.c:702
GHashTable * pe__node_list2table(const GList *list)
Definition utils.c:115
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
Definition utils.c:410
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition utils.c:395
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Definition utils.c:719
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
#define demote_action(rsc, node, optional)
Definition internal.h:230
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
Definition remote.c:189
void destroy_ticket(gpointer data)
Definition utils.c:505
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition complex.c:1032
pcmk__op_digest_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Definition pe_digest.c:394
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition utils.c:457
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
Definition clone.c:1283
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition utils.c:359
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
Definition bundle.c:1402
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
Definition failcounts.c:361
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition utils.c:694
void pe__free_digests(gpointer ptr)
Definition pe_digest.c:33
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition utils.c:145
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
Definition bundle.c:920
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition complex.c:1253
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
#define stop_action(rsc, node, optional)
Definition internal.h:214
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
Definition native.c:91
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
Definition utils.c:517
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
bool pe__shutdown_requested(const pcmk_node_t *node)
Definition utils.c:677
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Definition pe_actions.c:132
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition utils.c:627
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
Definition pe_actions.c:890
bool xml_contains_remote_node(xmlNode *xml)
Definition remote.c:47
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition remote.c:125
@ pcmk_rsc_match_clone_only
Match only clones and their instances, by either clone or instance ID.
Definition resources.h:191
@ pcmk_rsc_variant_primitive
Definition resources.h:37
@ pcmk_rsc_promotable
Definition resources.h:106
@ pcmk_rsc_stop_if_failed
Definition resources.h:121
@ pcmk_rsc_migratable
Definition resources.h:157
@ pcmk_rsc_unique
Definition resources.h:100
@ pcmk_rsc_needs_fencing
Definition resources.h:175
@ pcmk_rsc_removed
Definition resources.h:85
@ pcmk_rsc_start_pending
Definition resources.h:142
@ pcmk_rsc_blocked
Definition resources.h:91
@ pcmk_rsc_removed_filler
Definition resources.h:94
@ pcmk_rsc_has_filler
Definition resources.h:169
@ pcmk_rsc_managed
Definition resources.h:88
@ pcmk_rsc_ignore_failure
Definition resources.h:160
@ pcmk_rsc_failed
Definition resources.h:133
#define CRM_ASSERT(expr)
Definition results.h:42
@ CRM_EX_ERROR
Unspecified error.
Definition results.h:256
@ CRM_EX_MAX
Ensure crm_exit_t can hold this.
Definition results.h:320
@ PCMK_OCF_INSUFFICIENT_PRIV
Insufficient privileges.
Definition results.h:185
@ PCMK_OCF_FAILED_PROMOTED
Service failed and possibly in promoted role.
Definition results.h:193
@ PCMK_OCF_RUNNING_PROMOTED
Service active and promoted.
Definition results.h:192
@ PCMK_OCF_DEGRADED_PROMOTED
Service promoted but more likely to fail soon.
Definition results.h:195
@ PCMK_OCF_UNIMPLEMENT_FEATURE
Requested action not implemented.
Definition results.h:184
@ PCMK_OCF_NOT_CONFIGURED
Parameter invalid (inherently)
Definition results.h:187
@ PCMK_OCF_DEGRADED
Service active but more likely to fail soon.
Definition results.h:194
@ PCMK_OCF_NOT_INSTALLED
Dependencies not available locally.
Definition results.h:186
@ PCMK_OCF_UNKNOWN_ERROR
Unspecified error.
Definition results.h:181
@ PCMK_OCF_INVALID_PARAM
Parameter invalid (in local context)
Definition results.h:183
@ PCMK_OCF_NOT_RUNNING
Service safely stopped.
Definition results.h:190
@ PCMK_OCF_OK
Success.
Definition results.h:178
@ PCMK_OCF_UNKNOWN
Action is pending.
Definition results.h:203
@ pcmk_rc_ok
Definition results.h:162
@ pcmk_rc_undetermined
Definition results.h:135
@ pcmk_rc_unpack_error
Definition results.h:125
#define pcmk_ok
Definition results.h:69
const char * crm_exit_str(crm_exit_t exit_code)
Definition results.c:640
@ PCMK_EXEC_CANCELLED
Action was cancelled.
Definition results.h:334
@ PCMK_EXEC_NO_SECRETS
Necessary CIB secrets are unavailable.
Definition results.h:344
@ PCMK_EXEC_ERROR_FATAL
Execution failed, do not retry anywhere.
Definition results.h:339
@ PCMK_EXEC_NOT_INSTALLED
Agent or dependency not available locally.
Definition results.h:340
@ PCMK_EXEC_INVALID
Action cannot be attempted (e.g. shutdown)
Definition results.h:342
@ PCMK_EXEC_DONE
Action completed, result is known.
Definition results.h:333
@ PCMK_EXEC_ERROR
Execution failed, may be retried.
Definition results.h:337
@ PCMK_EXEC_NOT_SUPPORTED
Agent does not implement requested action.
Definition results.h:336
@ PCMK_EXEC_TIMEOUT
Action did not complete in time.
Definition results.h:335
@ PCMK_EXEC_PENDING
Action is in progress.
Definition results.h:332
@ PCMK_EXEC_UNKNOWN
Used only to initialize variables.
Definition results.h:331
@ PCMK_EXEC_ERROR_HARD
Execution failed, do not retry on node.
Definition results.h:338
@ PCMK_EXEC_MAX
Maximum value for this enum.
Definition results.h:347
@ PCMK_EXEC_NO_FENCE_DEVICE
No fence device is configured for target.
Definition results.h:343
@ PCMK_EXEC_NOT_CONNECTED
No connection to executor.
Definition results.h:341
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition roles.c:23
rsc_role_e
Definition roles.h:34
@ pcmk_role_started
Started.
Definition roles.h:37
@ pcmk_role_unknown
Resource role is unknown.
Definition roles.h:35
@ pcmk_role_unpromoted
Unpromoted.
Definition roles.h:38
@ pcmk_role_promoted
Promoted.
Definition roles.h:39
@ pcmk_role_stopped
Stopped.
Definition roles.h:36
#define pcmk__set_rsc_flags(resource, flags_to_set)
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
@ pcmk_no_quorum_freeze
Definition scheduler.h:41
@ pcmk_no_quorum_stop
Definition scheduler.h:42
@ pcmk_no_quorum_ignore
Definition scheduler.h:43
@ pcmk_no_quorum_demote
Definition scheduler.h:45
@ pcmk_no_quorum_fence
Definition scheduler.h:44
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
Definition scheduler.c:103
@ pcmk_sched_stop_removed_resources
Definition scheduler.h:108
@ pcmk_sched_in_maintenance
Definition scheduler.h:86
@ pcmk_sched_symmetric_cluster
Definition scheduler.h:83
@ pcmk_sched_fencing_enabled
Definition scheduler.h:89
@ pcmk_sched_probe_resources
Definition scheduler.h:142
@ pcmk_sched_have_remote_nodes
Definition scheduler.h:148
@ pcmk_sched_have_fencing
Definition scheduler.h:96
@ pcmk_sched_shutdown_lock
Definition scheduler.h:136
@ pcmk_sched_location_only
Definition scheduler.h:158
@ pcmk_sched_quorate
Definition scheduler.h:80
@ pcmk_sched_concurrent_fencing
Definition scheduler.h:102
@ pcmk_sched_start_failure_fatal
Definition scheduler.h:124
@ pcmk_sched_enable_unfencing
Definition scheduler.h:99
@ pcmk_sched_remove_after_stop
Definition scheduler.h:127
@ pcmk_sched_cancel_removed_actions
Definition scheduler.h:114
@ pcmk_sched_stop_all
Definition scheduler.h:117
@ pcmk_sched_startup_fencing
Definition scheduler.h:130
#define pcmk__rsc_info(rsc, fmt, args...)
#define pcmk__rsc_trace(rsc, fmt, args...)
@ pcmk__check_last_failure
#define pcmk__clear_scheduler_flags(scheduler, flags_to_clear)
#define pcmk__sched_warn(fmt...)
#define pcmk__rsc_debug(rsc, fmt, args...)
#define pcmk__sched_err(fmt...)
#define pcmk__set_scheduler_flags(scheduler, flags_to_set)
int char2score(const char *score)
Get the integer value of a score string.
Definition scores.c:36
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition scores.h:24
Services API.
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id_rh)
Definition status.c:430
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
Definition status.c:465
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
Definition strings.c:701
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition strings.c:127
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition strings.c:683
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition strings.c:97
void pcmk__str_update(char **str, const char *value)
Definition strings.c:1277
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition strings.c:1026
@ pcmk__str_none
@ pcmk__str_null_matches
@ pcmk__str_casei
bool pcmk__ends_with(const char *s, const char *match)
Definition strings.c:608
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition strings.c:1050
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
Definition strings.c:1296
#define pcmk__str_copy(str)
enum pcmk__digest_result rc
int weight
Definition nodes.h:162
gboolean fixed
Definition nodes.h:163
struct pe_node_shared_s * details
Definition nodes.h:167
GHashTable * attrs
Definition nodes.h:142
gboolean shutdown
Definition nodes.h:97
GHashTable * digest_cache
Definition nodes.h:144
gboolean expected_up
Definition nodes.h:99
const char * id
Definition nodes.h:72
gboolean online
Definition nodes.h:80
gboolean standby_onfail
Definition nodes.h:83
const char * uname
Definition nodes.h:73
gboolean standby
Definition nodes.h:82
GHashTable * utilization
Definition nodes.h:143
gboolean unpacked
Definition nodes.h:126
pcmk_scheduler_t * data_set
Definition nodes.h:153
gboolean remote_maintenance
Definition nodes.h:124
gboolean is_dc
Definition nodes.h:100
gboolean unclean
Definition nodes.h:91
gboolean remote_requires_reset
Definition nodes.h:112
pcmk_resource_t * remote_rsc
Definition nodes.h:135
gboolean maintenance
Definition nodes.h:104
gboolean rsc_discovery_enabled
Definition nodes.h:106
enum node_type type
Definition nodes.h:74
gboolean pending
Definition nodes.h:87
gboolean remote_was_fenced
Definition nodes.h:118
GList * running_rsc
Definition nodes.h:139
gboolean unseen
Definition nodes.h:93
GList * running_on
Definition resources.h:456
enum pe_obj_types variant
Definition resources.h:410
pcmk_node_t * partial_migration_target
Definition resources.h:450
GHashTable * meta
Definition resources.h:467
GList * children
Definition resources.h:471
pcmk_scheduler_t * cluster
Definition resources.h:408
pcmk_node_t * partial_migration_source
Definition resources.h:453
pcmk_resource_t * container
Definition resources.h:476
pcmk_rsc_methods_t * fns
Definition resources.h:412
GHashTable * known_on
Definition resources.h:459
char * clone_name
Definition resources.h:397
gboolean is_remote_node
Definition resources.h:431
GHashTable * allowed_nodes
Definition resources.h:462
GList * dangling_migrations
Definition resources.h:474
pcmk_node_t * lock_node
Definition resources.h:481
unsigned long long flags
Definition resources.h:428
pcmk_node_t * pending_node
Definition resources.h:480
guint remote_reconnect_ms
Definition resources.h:423
GList * fillers
Definition resources.h:477
enum rsc_role_e next_role
Definition resources.h:465
enum rsc_role_e role
Definition resources.h:464
pcmk_resource_t * parent
Definition resources.h:409
time_t lock_time
Definition resources.h:483
GHashTable * node_hash
Definition common.h:46
char * id
Definition tags.h:30
GList * refs
Definition tags.h:31
GHashTable * state
Definition tickets.h:35
char * id
Definition tickets.h:31
gboolean standby
Definition tickets.h:34
gboolean granted
Definition tickets.h:32
time_t last_granted
Definition tickets.h:33
guint node_pending_timeout
Definition scheduler.h:266
const char * stonith_action
Definition scheduler.h:205
GHashTable * tags
Definition scheduler.h:253
const char * placement_strategy
Definition scheduler.h:206
GHashTable * config_hash
Definition scheduler.h:219
GHashTable * template_rsc_sets
Definition scheduler.h:248
xmlNode * input
Definition scheduler.h:196
GList * resources
Definition scheduler.h:231
unsigned long long flags
Definition scheduler.h:211
pcmk_node_t * dc_node
Definition scheduler.h:203
enum pe_quorum_policy no_quorum_policy
Definition scheduler.h:217
GList * stop_needed
Definition scheduler.h:257
GHashTable * tickets
Definition scheduler.h:222
int priority_fencing_delay
Definition scheduler.h:261
crm_time_t * now
Definition scheduler.h:198
const char * localhost
Definition scheduler.h:251
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Definition resources.h:276
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
Definition resources.h:328
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler)
Definition unpack.c:455
#define set_config_flag(scheduler, option, flag)
Definition unpack.c:51
#define XPATH_ENABLE_UNFENCING
Definition unpack.c:193
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
Definition unpack.c:2624
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition unpack.c:112
const char * pe_base_name_end(const char *id)
Definition unpack.c:1915
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
Definition unpack.c:1407
#define SUB_XPATH_LRM_RESOURCE
Definition unpack.c:2929
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
Definition unpack.c:5131
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition unpack.c:703
char * clone_zero(const char *last_rsc_id)
Definition unpack.c:1977
#define XPATH_NODE_STATE
Definition unpack.c:2927
#define SUB_XPATH_LRM_RSC_OP
Definition unpack.c:2932
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition unpack.c:847
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
Definition unpack.c:214
char * clone_strip(const char *last_rsc_id)
Definition unpack.c:1955
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
Definition unpack.c:623
int pe__target_rc_from_xml(const xmlNode *xml_op)
Definition unpack.c:4385
void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
Definition unpack.c:922
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
Definition unpack.c:949
Wrappers for and extensions to libxml2.
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition xpath.c:189
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition xpath.c:39
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
Definition xpath.c:139
xmlNode * pcmk__xml_copy(xmlNode *parent, xmlNode *src)
Definition xml.c:883
xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v)
Definition xml.c:440
xmlNode * pcmk__xe_next_same(const xmlNode *node)
Definition xml.c:2108
@ pcmk__xaf_none
Flag has no effect.
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags)
Definition xml.c:584
xmlNode * pcmk__xe_create(xmlNode *parent, const char *name)
Definition xml.c:720
#define PCMK_XE_UTILIZATION
Definition xml_names.h:212
#define PCMK_XE_STATUS
Definition xml_names.h:199
#define PCMK_XA_SCORE
Definition xml_names.h:391
#define PCMK_XE_NODE
Definition xml_names.h:133
#define PCMK_XE_FENCING_LEVEL
Definition xml_names.h:114
#define PCMK_XE_GROUP
Definition xml_names.h:116
#define PCMK_XA_OPERATION
Definition xml_names.h:344
#define PCMK_XA_ID
Definition xml_names.h:296
#define PCMK_XA_CRMD
Definition xml_names.h:251
#define PCMK_XE_TICKETS
Definition xml_names.h:208
#define PCMK_XE_INSTANCE_ATTRIBUTES
Definition xml_names.h:119
#define PCMK_XA_LAST_RC_CHANGE
Definition xml_names.h:311
#define PCMK_XE_META_ATTRIBUTES
Definition xml_names.h:127
#define PCMK_XA_VALUE
Definition xml_names.h:437
#define PCMK_XA_LAST_GRANTED
Definition xml_names.h:310
#define PCMK_XA_EXIT_REASON
Definition xml_names.h:269
#define PCMK_XE_PRIMITIVE
Definition xml_names.h:160
#define PCMK_XE_CLUSTER_PROPERTY_SET
Definition xml_names.h:84
#define PCMK_XA_TYPE
Definition xml_names.h:425
#define PCMK_XA_RESOURCE_DISCOVERY
Definition xml_names.h:379
#define PCMK_XE_TAG
Definition xml_names.h:203
#define PCMK_XA_STANDBY
Definition xml_names.h:401
#define PCMK_XA_EXPECTED
Definition xml_names.h:273
#define PCMK_XE_OBJ_REF
Definition xml_names.h:142
#define PCMK_XE_TEMPLATE
Definition xml_names.h:206
#define PCMK_XA_NO_QUORUM_PANIC
Definition xml_names.h:328
#define PCMK_XA_UNAME
Definition xml_names.h:426
#define PCMK_XA_NAME
Definition xml_names.h:325
#define PCMK_XA_RESOURCE
Definition xml_names.h:377
#define PCMK_XA_INDEX
Definition xml_names.h:300
#define PCMK__XE_LRM_RSC_OP
#define PCMK__XE_LRM_RESOURCE
#define PCMK__XA_NODE_FENCED
#define PCMK__XE_TRANSIENT_ATTRIBUTES
#define PCMK__XE_LRM_RESOURCES
#define PCMK__XA_CALL_ID
#define PCMK__XA_RSC_ID
#define PCMK__XA_OP_RESTART_DIGEST
#define PCMK__XA_JOIN
#define PCMK__XA_IN_CCM
#define PCMK__XA_OP_STATUS
#define PCMK__XA_TRANSITION_KEY
#define PCMK__XA_GRANTED
#define PCMK__XE_NODE_STATE
#define PCMK__XE_LRM
#define PCMK__XA_NODE_IN_MAINTENANCE
#define PCMK__XA_RC_CODE
#define PCMK__XE_TICKET_STATE