pacemaker 2.1.8-2.1.8
Scalable High-Availability cluster resource manager
Loading...
Searching...
No Matches
membership.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2024 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#ifndef _GNU_SOURCE
13# define _GNU_SOURCE
14#endif
15
16#include <inttypes.h> // PRIu32
17#include <sys/param.h>
18#include <sys/types.h>
19#include <stdio.h>
20#include <unistd.h>
21#include <string.h>
22#include <glib.h>
23#include <crm/common/ipc.h>
26#include <crm/common/xml.h>
27#include <crm/stonith-ng.h>
28#include "crmcluster_private.h"
29
30/* The peer cache remembers cluster nodes that have been seen.
31 * This is managed mostly automatically by libcluster, based on
32 * cluster membership events.
33 *
34 * Because cluster nodes can have conflicting names or UUIDs,
35 * the hash table key is a uniquely generated ID.
36 *
37 * @COMPAT When this is internal, rename to cluster_node_member_cache and make
38 * static.
39 */
40GHashTable *crm_peer_cache = NULL;
41
42/*
43 * The remote peer cache tracks pacemaker_remote nodes. While the
44 * value has the same type as the peer cache's, it is tracked separately for
45 * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
46 * so the name (which is also the UUID) is used as the hash table key; there
47 * is no equivalent of membership events, so management is not automatic; and
48 * most users of the peer cache need to exclude pacemaker_remote nodes.
49 *
50 * That said, using a single cache would be more logical and less error-prone,
51 * so it would be a good idea to merge them one day.
52 *
53 * libcluster provides two avenues for populating the cache:
54 * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
55 * directly manage it, while refresh_remote_nodes() populates it via the CIB.
56 */
57GHashTable *crm_remote_peer_cache = NULL;
58
59/*
60 * The CIB cluster node cache tracks cluster nodes that have been seen in
61 * the CIB. It is useful mainly when a caller needs to know about a node that
62 * may no longer be in the membership, but doesn't want to add the node to the
63 * main peer cache tables.
64 */
65static GHashTable *cluster_node_cib_cache = NULL;
66
67unsigned long long crm_peer_seq = 0;
68gboolean crm_have_quorum = FALSE;
69static bool autoreap = true;
70
71// Flag setting and clearing for crm_node_t:flags
72
73#define set_peer_flags(peer, flags_to_set) do { \
74 (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
75 "Peer", (peer)->uname, \
76 (peer)->flags, (flags_to_set), \
77 #flags_to_set); \
78 } while (0)
79
80#define clear_peer_flags(peer, flags_to_clear) do { \
81 (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
82 LOG_TRACE, \
83 "Peer", (peer)->uname, \
84 (peer)->flags, (flags_to_clear), \
85 #flags_to_clear); \
86 } while (0)
87
88static void update_peer_uname(crm_node_t *node, const char *uname);
89static crm_node_t *find_cib_cluster_node(const char *id, const char *uname);
90
97unsigned int
99{
100 if (crm_remote_peer_cache == NULL) {
101 return 0U;
102 }
103 return g_hash_table_size(crm_remote_peer_cache);
104}
105
123{
124 crm_node_t *node;
125 char *node_name_copy = NULL;
126
127 if (node_name == NULL) {
128 errno = EINVAL;
129 return NULL;
130 }
131
132 /* It's theoretically possible that the node was added to the cluster peer
133 * cache before it was known to be a Pacemaker Remote node. Remove that
134 * entry unless it has a node ID, which means the name actually is
135 * associated with a cluster node. (@TODO return an error in that case?)
136 */
137 node = pcmk__search_node_caches(0, node_name,
139 if ((node != NULL) && (node->uuid == NULL)) {
140 /* node_name could be a pointer into the cache entry being removed, so
141 * reassign it to a copy before the original gets freed
142 */
143 node_name_copy = strdup(node_name);
144 if (node_name_copy == NULL) {
145 errno = ENOMEM;
146 return NULL;
147 }
148 node_name = node_name_copy;
150 }
151
152 /* Return existing cache entry if one exists */
153 node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
154 if (node) {
155 free(node_name_copy);
156 return node;
157 }
158
159 /* Allocate a new entry */
160 node = calloc(1, sizeof(crm_node_t));
161 if (node == NULL) {
162 free(node_name_copy);
163 return NULL;
164 }
165
166 /* Populate the essential information */
168 node->uuid = strdup(node_name);
169 if (node->uuid == NULL) {
170 free(node);
171 errno = ENOMEM;
172 free(node_name_copy);
173 return NULL;
174 }
175
176 /* Add the new entry to the cache */
177 g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
178 crm_trace("added %s to remote cache", node_name);
179
180 /* Update the entry's uname, ensuring peer status callbacks are called */
181 update_peer_uname(node, node_name);
182 free(node_name_copy);
183 return node;
184}
185
195void
197{
198 /* Do a lookup first, because node_name could be a pointer within the entry
199 * being removed -- we can't log it *after* removing it.
200 */
201 if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
202 crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
203 g_hash_table_remove(crm_remote_peer_cache, node_name);
204 }
205}
206
218static const char *
219remote_state_from_cib(const xmlNode *node_state)
220{
221 bool status = false;
222
224 &status) == pcmk_rc_ok) && !status) {
225 return CRM_NODE_LOST;
226 } else {
227 return CRM_NODE_MEMBER;
228 }
229}
230
231/* user data for looping through remote node xpath searches */
232struct refresh_data {
233 const char *field; /* XML attribute to check for node name */
234 gboolean has_state; /* whether to update node state based on XML */
235};
236
244static void
245remote_cache_refresh_helper(xmlNode *result, void *user_data)
246{
247 const struct refresh_data *data = user_data;
248 const char *remote = crm_element_value(result, data->field);
249 const char *state = NULL;
250 crm_node_t *node;
251
252 CRM_CHECK(remote != NULL, return);
253
254 /* Determine node's state, if the result has it */
255 if (data->has_state) {
256 state = remote_state_from_cib(result);
257 }
258
259 /* Check whether cache already has entry for node */
260 node = g_hash_table_lookup(crm_remote_peer_cache, remote);
261
262 if (node == NULL) {
263 /* Node is not in cache, so add a new entry for it */
265 CRM_ASSERT(node);
266 if (state) {
267 pcmk__update_peer_state(__func__, node, state, 0);
268 }
269
270 } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
271 /* Node is in cache and hasn't been updated already, so mark it clean */
273 if (state) {
274 pcmk__update_peer_state(__func__, node, state, 0);
275 }
276 }
277}
278
279static void
280mark_dirty(gpointer key, gpointer value, gpointer user_data)
281{
283}
284
285static gboolean
286is_dirty(gpointer key, gpointer value, gpointer user_data)
287{
288 return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
289}
290
297static void
298refresh_remote_nodes(xmlNode *cib)
299{
300 struct refresh_data data;
301
303
304 /* First, we mark all existing cache entries as dirty,
305 * so that later we can remove any that weren't in the CIB.
306 * We don't empty the cache, because we need to detect changes in state.
307 */
308 g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
309
310 /* Look for guest nodes and remote nodes in the status section */
311 data.field = PCMK_XA_ID;
312 data.has_state = TRUE;
314 remote_cache_refresh_helper, &data);
315
316 /* Look for guest nodes and remote nodes in the configuration section,
317 * because they may have just been added and not have a status entry yet.
318 * In that case, the cached node state will be left NULL, so that the
319 * peer status callback isn't called until we're sure the node started
320 * successfully.
321 */
322 data.field = PCMK_XA_VALUE;
323 data.has_state = FALSE;
325 remote_cache_refresh_helper, &data);
326 data.field = PCMK_XA_ID;
327 data.has_state = FALSE;
329 remote_cache_refresh_helper, &data);
330
331 /* Remove all old cache entries that weren't seen in the CIB */
332 g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
333}
334
346bool
348{
349 const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
350
351 if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) {
352 return false;
353 }
354
355 switch (cluster_layer) {
357#if SUPPORT_COROSYNC
359#else
360 break;
361#endif // SUPPORT_COROSYNC
362 default:
363 break;
364 }
365
366 crm_err("Unhandled cluster layer: %s",
367 pcmk_cluster_layer_text(cluster_layer));
368 return false;
369}
370
396static gboolean
397should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
398{
399 crm_node_t *node = value;
400 crm_node_t *search = user_data;
401
402 if (search == NULL) {
403 return FALSE;
404 }
405 if ((search->id != 0) && (node->id != search->id)) {
406 return FALSE;
407 }
408 if ((search->id == 0)
409 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
410 // @TODO Consider name even if ID is set?
411 return FALSE;
412 }
413 if (pcmk__cluster_is_node_active(value)) {
414 return FALSE;
415 }
416
417 crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
418 "cache",
419 pcmk__s(node->uname, "(unknown)"), node->id);
420 return TRUE;
421}
422
441void
442pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
443{
444 crm_node_t search = { 0, };
445 char *criterion = NULL; // For logging
446 guint matches = 0;
447
448 if (crm_peer_cache == NULL) {
449 crm_trace("Membership cache not initialized, ignoring removal request");
450 return;
451 }
452
453 search.id = id;
454 search.uname = pcmk__str_copy(node_name); // May log after original freed
455
456 if (id > 0) {
457 criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);
458
459 } else if (node_name != NULL) {
460 criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
461 }
462
463 matches = g_hash_table_foreach_remove(crm_peer_cache,
464 should_forget_cluster_node, &search);
465 if (matches > 0) {
466 if (criterion != NULL) {
467 crm_notice("Removed %u inactive node%s with %s from the membership "
468 "cache",
469 matches, pcmk__plural_s(matches), criterion);
470 } else {
471 crm_notice("Removed all (%u) inactive cluster nodes from the "
472 "membership cache",
473 matches);
474 }
475
476 } else {
477 crm_info("No inactive cluster nodes%s%s to remove from the membership "
478 "cache",
479 ((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
480 }
481
482 free(search.uname);
483 free(criterion);
484}
485
486static void
487count_peer(gpointer key, gpointer value, gpointer user_data)
488{
489 unsigned int *count = user_data;
490 crm_node_t *node = value;
491
493 *count = *count + 1;
494 }
495}
496
506unsigned int
508{
509 unsigned int count = 0;
510
511 if (crm_peer_cache != NULL) {
512 g_hash_table_foreach(crm_peer_cache, count_peer, &count);
513 }
514 return count;
515}
516
517static void
518destroy_crm_node(gpointer data)
519{
520 crm_node_t *node = data;
521
522 crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
523
524 free(node->uname);
525 free(node->state);
526 free(node->uuid);
527 free(node->expected);
528 free(node->conn_host);
529 free(node);
530}
531
536void
538{
539 if (crm_peer_cache == NULL) {
540 crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
541 }
542
543 if (crm_remote_peer_cache == NULL) {
544 crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
545 }
546
547 if (cluster_node_cib_cache == NULL) {
548 cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
549 }
550}
551
556void
558{
559 if (crm_peer_cache != NULL) {
560 crm_trace("Destroying peer cache with %d members",
561 g_hash_table_size(crm_peer_cache));
562 g_hash_table_destroy(crm_peer_cache);
563 crm_peer_cache = NULL;
564 }
565
566 if (crm_remote_peer_cache != NULL) {
567 crm_trace("Destroying remote peer cache with %d members",
569 g_hash_table_destroy(crm_remote_peer_cache);
571 }
572
573 if (cluster_node_cib_cache != NULL) {
574 crm_trace("Destroying configured cluster node cache with %d members",
575 g_hash_table_size(cluster_node_cib_cache));
576 g_hash_table_destroy(cluster_node_cib_cache);
577 cluster_node_cib_cache = NULL;
578 }
579}
580
581static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
582 const void *) = NULL;
583
593void
595 crm_node_t *, const void *))
596{
597 // @TODO Improve documentation of peer_status_callback
598 peer_status_callback = dispatch;
599}
600
615void
617{
618 autoreap = enable;
619}
620
621static void
622dump_peer_hash(int level, const char *caller)
623{
624 GHashTableIter iter;
625 const char *id = NULL;
626 crm_node_t *node = NULL;
627
628 g_hash_table_iter_init(&iter, crm_peer_cache);
629 while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
630 do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
631 }
632}
633
634static gboolean
635hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
636{
637 return value == user_data;
638}
639
651static crm_node_t *
652search_cluster_member_cache(unsigned int id, const char *uname,
653 const char *uuid)
654{
655 GHashTableIter iter;
656 crm_node_t *node = NULL;
657 crm_node_t *by_id = NULL;
658 crm_node_t *by_name = NULL;
659
660 CRM_ASSERT(id > 0 || uname != NULL);
661
663
664 if (uname != NULL) {
665 g_hash_table_iter_init(&iter, crm_peer_cache);
666 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
667 if(node->uname && strcasecmp(node->uname, uname) == 0) {
668 crm_trace("Name match: %s = %p", node->uname, node);
669 by_name = node;
670 break;
671 }
672 }
673 }
674
675 if (id > 0) {
676 g_hash_table_iter_init(&iter, crm_peer_cache);
677 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
678 if(node->id == id) {
679 crm_trace("ID match: %u = %p", node->id, node);
680 by_id = node;
681 break;
682 }
683 }
684
685 } else if (uuid != NULL) {
686 g_hash_table_iter_init(&iter, crm_peer_cache);
687 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
688 if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
689 crm_trace("UUID match: %s = %p", node->uuid, node);
690 by_id = node;
691 break;
692 }
693 }
694 }
695
696 node = by_id; /* Good default */
697 if(by_id == by_name) {
698 /* Nothing to do if they match (both NULL counts) */
699 crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
700
701 } else if(by_id == NULL && by_name) {
702 crm_trace("Only one: %p for %u/%s", by_name, id, uname);
703
704 if(id && by_name->id) {
705 dump_peer_hash(LOG_WARNING, __func__);
706 crm_crit("Node %u and %u share the same name '%s'",
707 id, by_name->id, uname);
708 node = NULL; /* Create a new one */
709
710 } else {
711 node = by_name;
712 }
713
714 } else if(by_name == NULL && by_id) {
715 crm_trace("Only one: %p for %u/%s", by_id, id, uname);
716
717 if(uname && by_id->uname) {
718 dump_peer_hash(LOG_WARNING, __func__);
719 crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
720 uname, by_id->uname, id, uname);
721 }
722
723 } else if(uname && by_id->uname) {
724 if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
725 crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
726 g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
727
728 } else {
729 crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
730 dump_peer_hash(LOG_INFO, __func__);
731 crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
732 TRUE);
733 }
734
735 } else if(id && by_name->id) {
736 crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
737
738 } else {
739 /* Simple merge */
740
741 /* Only corosync-based clusters use node IDs. The functions that call
742 * pcmk__update_peer_state() and crm_update_peer_proc() only know
743 * nodeid, so 'by_id' is authoritative when merging.
744 */
745 dump_peer_hash(LOG_DEBUG, __func__);
746
747 crm_info("Merging %p into %p", by_name, by_id);
748 g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
749 }
750
751 return node;
752}
753
765pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
766{
767 crm_node_t *node = NULL;
768
769 CRM_ASSERT(id > 0 || uname != NULL);
770
772
773 if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
774 node = g_hash_table_lookup(crm_remote_peer_cache, uname);
775 }
776
777 if ((node == NULL)
779
780 node = search_cluster_member_cache(id, uname, NULL);
781 }
782
783 if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
784 char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
785
786 node = find_cib_cluster_node(id_str, uname);
787 free(id_str);
788 }
789
790 return node;
791}
792
807void
808pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
809{
810 char *node_name_copy = NULL;
811
812 if ((node_name == NULL) && (node_id == 0U)) {
813 return;
814 }
815
816 // Purge from Pacemaker Remote node cache
817 if ((node_name != NULL)
818 && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
819 /* node_name could be a pointer into the cache entry being purged,
820 * so reassign it to a copy before the original gets freed
821 */
822 node_name_copy = pcmk__str_copy(node_name);
823 node_name = node_name_copy;
824
825 crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
826 g_hash_table_remove(crm_remote_peer_cache, node_name);
827 }
828
829 pcmk__cluster_forget_cluster_node(node_id, node_name);
830 free(node_name_copy);
831}
832
833#if SUPPORT_COROSYNC
834static guint
835remove_conflicting_peer(crm_node_t *node)
836{
837 int matches = 0;
838 GHashTableIter iter;
839 crm_node_t *existing_node = NULL;
840
841 if (node->id == 0 || node->uname == NULL) {
842 return 0;
843 }
844
846 return 0;
847 }
848
849 g_hash_table_iter_init(&iter, crm_peer_cache);
850 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
851 if (existing_node->id > 0
852 && existing_node->id != node->id
853 && existing_node->uname != NULL
854 && strcasecmp(existing_node->uname, node->uname) == 0) {
855
856 if (pcmk__cluster_is_node_active(existing_node)) {
857 continue;
858 }
859
860 crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
861 existing_node->id, existing_node->uname, node->id);
862
863 g_hash_table_iter_remove(&iter);
864 matches++;
865 }
866 }
867
868 return matches;
869}
870#endif
871
888/* coverity[-alloc] Memory is referenced in one or both hashtables */
890pcmk__get_node(unsigned int id, const char *uname, const char *uuid,
891 uint32_t flags)
892{
893 crm_node_t *node = NULL;
894 char *uname_lookup = NULL;
895
896 CRM_ASSERT(id > 0 || uname != NULL);
897
899
900 // Check the Pacemaker Remote node cache first
902 node = g_hash_table_lookup(crm_remote_peer_cache, uname);
903 if (node != NULL) {
904 return node;
905 }
906 }
907
909 return NULL;
910 }
911
912 node = search_cluster_member_cache(id, uname, uuid);
913
914 /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
915 * we need to do a lookup of the node name using the id in the cluster membership. */
916 if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
917 uname_lookup = pcmk__cluster_node_name(id);
918 }
919
920 if (uname_lookup) {
921 uname = uname_lookup;
922 crm_trace("Inferred a name of '%s' for node %u", uname, id);
923
924 /* try to turn up the node one more time now that we know the uname. */
925 if (node == NULL) {
926 node = search_cluster_member_cache(id, uname, uuid);
927 }
928 }
929
930 if (node == NULL) {
931 char *uniqueid = crm_generate_uuid();
932
933 node = pcmk__assert_alloc(1, sizeof(crm_node_t));
934
935 crm_info("Created entry %s/%p for node %s/%u (%d total)",
936 uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
937 g_hash_table_replace(crm_peer_cache, uniqueid, node);
938 }
939
940 if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
941 crm_info("Node %u is now known as %s", id, uname);
942 }
943
944 if(id > 0 && node->id == 0) {
945 node->id = id;
946 }
947
948 if (uname && (node->uname == NULL)) {
949 update_peer_uname(node, uname);
950 }
951
952 if(node->uuid == NULL) {
953 if (uuid == NULL) {
954 uuid = pcmk__cluster_node_uuid(node);
955 }
956
957 if (uuid) {
958 crm_info("Node %u has uuid %s", id, uuid);
959
960 } else {
961 crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
962 }
963 }
964
965 free(uname_lookup);
966
967 return node;
968}
969
981static void
982update_peer_uname(crm_node_t *node, const char *uname)
983{
984 CRM_CHECK(uname != NULL,
985 crm_err("Bug: can't update node name without name"); return);
986 CRM_CHECK(node != NULL,
987 crm_err("Bug: can't update node name to %s without node", uname);
988 return);
989
990 if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
991 crm_debug("Node uname '%s' did not change", uname);
992 return;
993 }
994
995 for (const char *c = uname; *c; ++c) {
996 if ((*c >= 'A') && (*c <= 'Z')) {
997 crm_warn("Node names with capitals are discouraged, consider changing '%s'",
998 uname);
999 break;
1000 }
1001 }
1002
1003 pcmk__str_update(&node->uname, uname);
1004
1005 if (peer_status_callback != NULL) {
1006 peer_status_callback(crm_status_uname, node, NULL);
1007 }
1008
1009#if SUPPORT_COROSYNC
1011 && !pcmk_is_set(node->flags, crm_remote_node)) {
1012
1013 remove_conflicting_peer(node);
1014 }
1015#endif
1016}
1017
1026static inline const char *
1027proc2text(enum crm_proc_flag proc)
1028{
1029 const char *text = "unknown";
1030
1031 switch (proc) {
1032 case crm_proc_none:
1033 text = "none";
1034 break;
1035 case crm_proc_cpg:
1036 text = "corosync-cpg";
1037 break;
1038 }
1039 return text;
1040}
1041
1058crm_node_t *
1059crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
1060{
1061 uint32_t last = 0;
1062 gboolean changed = FALSE;
1063
1064 CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1065 source, proc2text(flag), status);
1066 return NULL);
1067
1068 /* Pacemaker doesn't spawn processes on remote nodes */
1069 if (pcmk_is_set(node->flags, crm_remote_node)) {
1070 return node;
1071 }
1072
1073 last = node->processes;
1074 if (status == NULL) {
1075 node->processes = flag;
1076 if (node->processes != last) {
1077 changed = TRUE;
1078 }
1079
1080 } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1081 if ((node->processes & flag) != flag) {
1082 node->processes = pcmk__set_flags_as(__func__, __LINE__,
1083 LOG_TRACE, "Peer process",
1084 node->uname, node->processes,
1085 flag, "processes");
1086 changed = TRUE;
1087 }
1088
1089 } else if (node->processes & flag) {
1090 node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1091 LOG_TRACE, "Peer process",
1092 node->uname, node->processes,
1093 flag, "processes");
1094 changed = TRUE;
1095 }
1096
1097 if (changed) {
1098 if (status == NULL && flag <= crm_proc_none) {
1099 crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
1100 node->id);
1101 } else {
1102 crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
1103 proc2text(flag), status);
1104 }
1105
1106 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1107 node->when_online = time(NULL);
1108
1109 } else {
1110 node->when_online = 0;
1111 }
1112
1113 /* Call the client callback first, then update the peer state,
1114 * in case the node will be reaped
1115 */
1116 if (peer_status_callback != NULL) {
1117 peer_status_callback(crm_status_processes, node, &last);
1118 }
1119
1120 /* The client callback shouldn't touch the peer caches,
1121 * but as a safety net, bail if the peer cache was destroyed.
1122 */
1123 if (crm_peer_cache == NULL) {
1124 return NULL;
1125 }
1126
1127 if (autoreap) {
1128 const char *peer_state = NULL;
1129
1130 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1131 peer_state = CRM_NODE_MEMBER;
1132 } else {
1133 peer_state = CRM_NODE_LOST;
1134 }
1135 node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1136 }
1137 } else {
1138 crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
1139 proc2text(flag), status);
1140 }
1141 return node;
1142}
1143
1152void
1153pcmk__update_peer_expected(const char *source, crm_node_t *node,
1154 const char *expected)
1155{
1156 char *last = NULL;
1157 gboolean changed = FALSE;
1158
1159 CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1160 return);
1161
1162 /* Remote nodes don't participate in joins */
1163 if (pcmk_is_set(node->flags, crm_remote_node)) {
1164 return;
1165 }
1166
1167 last = node->expected;
1168 if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1169 node->expected = strdup(expected);
1170 changed = TRUE;
1171 }
1172
1173 if (changed) {
1174 crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
1175 expected, last);
1176 free(last);
1177 } else {
1178 crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
1179 node->id, expected);
1180 }
1181}
1182
1199static crm_node_t *
1200update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
1201 uint64_t membership, GHashTableIter *iter)
1202{
1203 gboolean is_member;
1204
1205 CRM_CHECK(node != NULL,
1206 crm_err("Could not set state for unknown host to %s"
1207 CRM_XS " source=%s", state, source);
1208 return NULL);
1209
1210 is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
1211 if (is_member) {
1212 node->when_lost = 0;
1213 if (membership) {
1214 node->last_seen = membership;
1215 }
1216 }
1217
1218 if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1219 char *last = node->state;
1220
1221 if (is_member) {
1222 node->when_member = time(NULL);
1223
1224 } else {
1225 node->when_member = 0;
1226 }
1227
1228 node->state = strdup(state);
1229 crm_notice("Node %s state is now %s " CRM_XS
1230 " nodeid=%u previous=%s source=%s", node->uname, state,
1231 node->id, (last? last : "unknown"), source);
1232 if (peer_status_callback != NULL) {
1233 peer_status_callback(crm_status_nstate, node, last);
1234 }
1235 free(last);
1236
1237 if (autoreap && !is_member
1238 && !pcmk_is_set(node->flags, crm_remote_node)) {
1239 /* We only autoreap from the peer cache, not the remote peer cache,
1240 * because the latter should be managed only by
1241 * refresh_remote_nodes().
1242 */
1243 if(iter) {
1244 crm_notice("Purged 1 peer with " PCMK_XA_ID
1245 "=%u and/or uname=%s from the membership cache",
1246 node->id, node->uname);
1247 g_hash_table_iter_remove(iter);
1248
1249 } else {
1251 }
1252 node = NULL;
1253 }
1254
1255 } else {
1256 crm_trace("Node %s state is unchanged (%s) " CRM_XS
1257 " nodeid=%u source=%s", node->uname, state, node->id, source);
1258 }
1259 return node;
1260}
1261
1277crm_node_t *
1278pcmk__update_peer_state(const char *source, crm_node_t *node,
1279 const char *state, uint64_t membership)
1280{
1281 return update_peer_state_iter(source, node, state, membership, NULL);
1282}
1283
1290void
1291pcmk__reap_unseen_nodes(uint64_t membership)
1292{
1293 GHashTableIter iter;
1294 crm_node_t *node = NULL;
1295
1296 crm_trace("Reaping unseen nodes...");
1297 g_hash_table_iter_init(&iter, crm_peer_cache);
1298 while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1299 if (node->last_seen != membership) {
1300 if (node->state) {
1301 /*
1302 * Calling update_peer_state_iter() allows us to
1303 * remove the node from crm_peer_cache without
1304 * invalidating our iterator
1305 */
1306 update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1307 membership, &iter);
1308
1309 } else {
1310 crm_info("State of node %s[%u] is still unknown",
1311 node->uname, node->id);
1312 }
1313 }
1314 }
1315}
1316
1317static crm_node_t *
1318find_cib_cluster_node(const char *id, const char *uname)
1319{
1320 GHashTableIter iter;
1321 crm_node_t *node = NULL;
1322 crm_node_t *by_id = NULL;
1323 crm_node_t *by_name = NULL;
1324
1325 if (uname) {
1326 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1327 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1328 if (node->uname && strcasecmp(node->uname, uname) == 0) {
1329 crm_trace("Name match: %s = %p", node->uname, node);
1330 by_name = node;
1331 break;
1332 }
1333 }
1334 }
1335
1336 if (id) {
1337 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1338 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1339 if(strcasecmp(node->uuid, id) == 0) {
1340 crm_trace("ID match: %s= %p", id, node);
1341 by_id = node;
1342 break;
1343 }
1344 }
1345 }
1346
1347 node = by_id; /* Good default */
1348 if (by_id == by_name) {
1349 /* Nothing to do if they match (both NULL counts) */
1350 crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1351
1352 } else if (by_id == NULL && by_name) {
1353 crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1354
1355 if (id) {
1356 node = NULL;
1357
1358 } else {
1359 node = by_name;
1360 }
1361
1362 } else if (by_name == NULL && by_id) {
1363 crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1364
1365 if (uname) {
1366 node = NULL;
1367 }
1368
1369 } else if (uname && by_id->uname
1370 && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1371 /* Multiple nodes have the same uname in the CIB.
1372 * Return by_id. */
1373
1374 } else if (id && by_name->uuid
1375 && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1376 /* Multiple nodes have the same id in the CIB.
1377 * Return by_name. */
1378 node = by_name;
1379
1380 } else {
1381 node = NULL;
1382 }
1383
1384 if (node == NULL) {
1385 crm_debug("Couldn't find node%s%s%s%s",
1386 id? " " : "",
1387 id? id : "",
1388 uname? " with name " : "",
1389 uname? uname : "");
1390 }
1391
1392 return node;
1393}
1394
1395static void
1396cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1397{
1398 const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1399 const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1400 crm_node_t * node = NULL;
1401
1402 CRM_CHECK(id != NULL && uname !=NULL, return);
1403 node = find_cib_cluster_node(id, uname);
1404
1405 if (node == NULL) {
1406 char *uniqueid = crm_generate_uuid();
1407
1408 node = pcmk__assert_alloc(1, sizeof(crm_node_t));
1409
1410 node->uname = pcmk__str_copy(uname);
1411 node->uuid = pcmk__str_copy(id);
1412
1413 g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1414
1415 } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1416 pcmk__str_update(&node->uname, uname);
1417
1418 /* Node is in cache and hasn't been updated already, so mark it clean */
1420 }
1421
1422}
1423
1424static void
1425refresh_cluster_node_cib_cache(xmlNode *cib)
1426{
1428
1429 g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1430
1432 cluster_node_cib_cache_refresh_helper, NULL);
1433
1434 // Remove all old cache entries that weren't seen in the CIB
1435 g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1436}
1437
1438void
1440{
1441 refresh_remote_nodes(cib);
1442 refresh_cluster_node_cib_cache(cib);
1443}
1444
1445// Deprecated functions kept only for backward API compatibility
1446// LCOV_EXCL_START
1447
1448#include <crm/cluster/compat.h>
1449
1450int
1451crm_terminate_member(int nodeid, const char *uname, void *unused)
1452{
1453 return stonith_api_kick(nodeid, uname, 120, TRUE);
1454}
1455
1456int
1457crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1458{
1459 return stonith_api_kick(nodeid, uname, 120, TRUE);
1460}
1461
1462crm_node_t *
1463crm_get_peer(unsigned int id, const char *uname)
1464{
1466}
1467
1468crm_node_t *
1469crm_get_peer_full(unsigned int id, const char *uname, int flags)
1470{
1471 return pcmk__get_node(id, uname, NULL, flags);
1472}
1473
1474int
1476{
1477 unsigned int count = pcmk__cluster_num_remote_nodes();
1478
1479 return QB_MIN(count, INT_MAX);
1480}
1481
1482void
1484{
1485 refresh_remote_nodes(cib);
1486}
1487
1488crm_node_t *
1489crm_remote_peer_get(const char *node_name)
1490{
1491 return pcmk__cluster_lookup_remote_node(node_name);
1492}
1493
1494void
1495crm_remote_peer_cache_remove(const char *node_name)
1496{
1498}
1499
1500gboolean
1502{
1503 return pcmk__cluster_is_node_active(node);
1504}
1505
1506guint
1508{
1510}
1511
1512guint
1513reap_crm_member(uint32_t id, const char *name)
1514{
1515 int matches = 0;
1516 crm_node_t search = { 0, };
1517
1518 if (crm_peer_cache == NULL) {
1519 crm_trace("Membership cache not initialized, ignoring purge request");
1520 return 0;
1521 }
1522
1523 search.id = id;
1524 search.uname = pcmk__str_copy(name);
1525 matches = g_hash_table_foreach_remove(crm_peer_cache,
1526 should_forget_cluster_node, &search);
1527 if(matches) {
1528 crm_notice("Purged %d peer%s with " PCMK_XA_ID
1529 "=%u%s%s from the membership cache",
1530 matches, pcmk__plural_s(matches), search.id,
1531 (search.uname? " and/or uname=" : ""),
1532 (search.uname? search.uname : ""));
1533
1534 } else {
1535 crm_info("No peers with " PCMK_XA_ID
1536 "=%u%s%s to purge from the membership cache",
1537 search.id, (search.uname? " and/or uname=" : ""),
1538 (search.uname? search.uname : ""));
1539 }
1540
1541 free(search.uname);
1542 return matches;
1543}
1544
1545void
1550
1551void
1556
1557void
1558crm_set_autoreap(gboolean enable)
1559{
1561}
1562
1563void
1564crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
1565{
1567}
1568
1569// LCOV_EXCL_STOP
1570// End deprecated API
const char * name
Definition cib.c:26
const char * pcmk__cluster_node_uuid(crm_node_t *node)
Definition cluster.c:89
@ pcmk__node_search_cluster_member
Search for cluster nodes from membership cache.
Definition internal.h:37
@ pcmk__node_search_cluster_cib
Search for cluster nodes from CIB (as of last cache refresh)
Definition internal.h:51
@ pcmk__node_search_remote
Search for remote nodes.
Definition internal.h:40
crm_proc_flag
Definition internal.h:20
@ crm_proc_cpg
Definition internal.h:28
@ crm_proc_none
Definition internal.h:25
char * pcmk__cluster_node_name(uint32_t nodeid)
Definition cluster.c:269
#define CRM_NODE_LOST
Definition cluster.h:45
#define CRM_NODE_MEMBER
Definition cluster.h:49
const char * pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
Get a log-friendly string equivalent of a cluster layer.
Definition cluster.c:388
enum pcmk_cluster_layer pcmk_get_cluster_layer(void)
Get and validate the local cluster layer.
Definition cluster.c:415
pcmk_cluster_layer
Types of cluster layer.
Definition cluster.h:225
@ pcmk_cluster_layer_corosync
Corosync Cluster Engine.
Definition cluster.h:228
@ crm_remote_node
Definition cluster.h:77
@ crm_node_dirty
Definition cluster.h:80
crm_status_type
Definition cluster.h:214
@ crm_status_processes
Definition cluster.h:217
@ crm_status_nstate
Definition cluster.h:216
@ crm_status_uname
Definition cluster.h:215
int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value)
Definition nvpair.c:909
#define pcmk__assert_alloc(nmemb, size)
Definition internal.h:297
uint64_t flags
Definition remote.c:3
char * crm_generate_uuid(void)
Definition utils.c:431
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition utils.c:356
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition util.h:98
Deprecated Pacemaker cluster API.
bool pcmk__corosync_is_peer_active(const crm_node_t *node)
Definition corosync.c:528
bool pcmk__corosync_has_nodelist(void)
Definition corosync.c:727
char uname[MAX_NAME]
Definition cpg.c:5
char data[0]
Definition cpg.c:10
uint32_t id
Definition cpg.c:0
IPC interface to Pacemaker daemons.
#define crm_info(fmt, args...)
Definition logging.h:397
#define do_crm_log(level, fmt, args...)
Log a message.
Definition logging.h:181
#define crm_warn(fmt, args...)
Definition logging.h:392
#define CRM_XS
Definition logging.h:56
#define crm_crit(fmt, args...)
Definition logging.h:386
#define crm_notice(fmt, args...)
Definition logging.h:395
#define CRM_CHECK(expr, failure_action)
Definition logging.h:245
#define crm_debug(fmt, args...)
Definition logging.h:400
#define crm_err(fmt, args...)
Definition logging.h:389
#define crm_trace(fmt, args...)
Definition logging.h:402
#define LOG_TRACE
Definition logging.h:38
gboolean crm_have_quorum
Definition membership.c:68
unsigned int pcmk__cluster_num_remote_nodes(void)
Definition membership.c:98
crm_node_t * pcmk__cluster_lookup_remote_node(const char *node_name)
Definition membership.c:122
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected)
int crm_remote_peer_cache_size(void)
GHashTable * crm_peer_cache
Definition membership.c:40
void crm_set_autoreap(gboolean enable)
void crm_remote_peer_cache_refresh(xmlNode *cib)
void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
Definition membership.c:442
crm_node_t * pcmk__get_node(unsigned int id, const char *uname, const char *uuid, uint32_t flags)
Definition membership.c:890
#define clear_peer_flags(peer, flags_to_clear)
Definition membership.c:80
void pcmk__reap_unseen_nodes(uint64_t membership)
int crm_terminate_member(int nodeid, const char *uname, void *unused)
guint reap_crm_member(uint32_t id, const char *name)
bool pcmk__cluster_is_node_active(const crm_node_t *node)
Definition membership.c:347
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
Definition membership.c:765
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
void pcmk__refresh_node_caches_from_cib(xmlNode *cib)
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
Definition membership.c:808
GHashTable * crm_remote_peer_cache
Definition membership.c:57
void crm_remote_peer_cache_remove(const char *node_name)
#define set_peer_flags(peer, flags_to_set)
Definition membership.c:73
void pcmk__cluster_destroy_node_caches(void)
Definition membership.c:557
crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership)
Update a node's state and membership information.
unsigned long long crm_peer_seq
Definition membership.c:67
unsigned int pcmk__cluster_num_active_nodes(void)
Definition membership.c:507
void pcmk__cluster_forget_remote_node(const char *node_name)
Definition membership.c:196
void pcmk__cluster_init_node_caches(void)
Definition membership.c:537
gboolean crm_is_peer_active(const crm_node_t *node)
void pcmk__cluster_set_autoreap(bool enable)
Definition membership.c:616
void crm_peer_init(void)
void pcmk__cluster_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Definition membership.c:594
void crm_peer_destroy(void)
guint crm_active_peers(void)
crm_node_t * crm_remote_peer_get(const char *node_name)
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition nvpair.c:446
#define PCMK_VALUE_ONLINE
Definition options.h:184
pcmk__action_result_t result
Definition pcmk_fence.c:35
#define CRM_ASSERT(expr)
Definition results.h:42
@ pcmk_rc_ok
Definition results.h:162
Fencing aka. STONITH.
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition st_client.c:1976
#define pcmk__plural_s(i)
void pcmk__str_update(char **str, const char *value)
Definition strings.c:1277
@ pcmk__str_casei
#define pcmk__str_copy(str)
GHashTable * pcmk__strikey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition strings.c:739
uint32_t processes
Definition cluster.h:112
char * uname
Definition cluster.h:88
char * expected
Definition cluster.h:125
time_t when_member
Definition cluster.h:130
time_t when_online
Definition cluster.h:131
char * conn_host
Definition cluster.h:128
uint64_t last_seen
Definition cluster.h:111
uint32_t id
Definition cluster.h:120
time_t when_lost
Definition cluster.h:121
uint64_t flags
Definition cluster.h:110
Wrappers for and extensions to libxml2.
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition xpath.c:170
#define PCMK__XP_REMOTE_NODE_CONFIG
#define PCMK__XP_REMOTE_NODE_STATUS
#define PCMK__XP_MEMBER_NODE_CONFIG
#define PCMK__XP_GUEST_NODE_CONFIG
#define PCMK_XA_ID
Definition xml_names.h:296
#define PCMK_XA_VALUE
Definition xml_names.h:437
#define PCMK_XA_UNAME
Definition xml_names.h:426
#define PCMK__XA_IN_CCM