Browse Source

Add prometheus metrics for Central controllers (#1969)

* add header-only prometheus lib to ext

* rename folder

* Undo rename directory

* prometheus simpleapi included on mac & linux

* wip

* wire up some controller stats

* Get windows building with prometheus

* bsd build flags for prometheus

* Fix multiple network join from environment entrypoint.sh.release (#1961)

* _bond_m guards _bond, not _paths_m (#1965)

* Fix: warning: mutex '_aqm_m' is not held on every path through here [-Wthread-safety-analysis] (#1964)

* Serve prom metrics from /metrics endpoint

* Add prom metrics for Central controller specific things

* reorganize metric initialization

* testing out a labled gauge on Networks

* increment error counter on throw

* Consolidate metrics definitions

Put all metric definitions into node/Metrics.hpp.  Accessed as needed
from there.

* Revert "testing out a labled gauge on Networks"

This reverts commit 499ed6d95e11452019cdf48e32ed4cd878c2705b.

* still blows up but adding to the record for completeness right now

* Fix runtime issues with metrics

* Add metrics files to visual studio project

* Missed an "extern"

* add copyright headers to new files

* Add metrics for sent/received bytes (total)

* put /metrics endpoint behind auth

* sendto returns int on Win32

---------

Co-authored-by: Leonardo Amaral <leleobhz@users.noreply.github.com>
Co-authored-by: Brenton Bostick <bostick@gmail.com>
Grant Limberg 1 year ago
parent
commit
8e6e4ede6d
62 changed files with 4023 additions and 25 deletions
  1. 13 1
      controller/ConnectionPool.hpp
  2. 40 5
      controller/DB.cpp
  3. 2 0
      controller/DB.hpp
  4. 8 2
      controller/FileDB.cpp
  5. 8 1
      controller/PostgreSQL.cpp
  6. 3 0
      controller/PostgreSQL.hpp
  7. 3 0
      ext/prometheus-cpp-lite-1.0/.gitignore
  8. 36 0
      ext/prometheus-cpp-lite-1.0/3rdpatry/http-client-lite/CMakeLists.txt
  9. 22 0
      ext/prometheus-cpp-lite-1.0/3rdpatry/http-client-lite/LICENSE
  10. 30 0
      ext/prometheus-cpp-lite-1.0/3rdpatry/http-client-lite/README.md
  11. 5 0
      ext/prometheus-cpp-lite-1.0/3rdpatry/http-client-lite/examples/CMakeLists.txt
  12. 43 0
      ext/prometheus-cpp-lite-1.0/3rdpatry/http-client-lite/examples/simple_request.cpp
  13. 327 0
      ext/prometheus-cpp-lite-1.0/3rdpatry/http-client-lite/include/jdl/httpclientlite.h
  14. 34 0
      ext/prometheus-cpp-lite-1.0/CMakeLists.txt
  15. 21 0
      ext/prometheus-cpp-lite-1.0/LICENSE
  16. 201 0
      ext/prometheus-cpp-lite-1.0/README.md
  17. 20 0
      ext/prometheus-cpp-lite-1.0/core/CMakeLists.txt
  18. 40 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/atomic_floating.h
  19. 72 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/benchmark.h
  20. 35 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/builder.h
  21. 194 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/ckms_quantiles.h
  22. 94 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/client_metric.h
  23. 27 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/collectable.h
  24. 112 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/counter.h
  25. 355 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/family.h
  26. 205 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/gateway.h
  27. 128 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/gauge.h
  28. 50 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/hash.h
  29. 154 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/histogram.h
  30. 29 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/metric.h
  31. 18 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/metric_family.h
  32. 86 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/push_to_server.h
  33. 123 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/registry.h
  34. 83 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/save_to_file.h
  35. 154 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/summary.h
  36. 211 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/text_serializer.h
  37. 62 0
      ext/prometheus-cpp-lite-1.0/core/include/prometheus/time_window_quantiles.h
  38. 31 0
      ext/prometheus-cpp-lite-1.0/examples/CMakeLists.txt
  39. 64 0
      ext/prometheus-cpp-lite-1.0/examples/gateway_example.cpp
  40. 65 0
      ext/prometheus-cpp-lite-1.0/examples/modern_example.cpp
  41. 67 0
      ext/prometheus-cpp-lite-1.0/examples/original_example.cpp
  42. 44 0
      ext/prometheus-cpp-lite-1.0/examples/push_to_server_example.cpp
  43. 43 0
      ext/prometheus-cpp-lite-1.0/examples/save_to_file_example.cpp
  44. 26 0
      ext/prometheus-cpp-lite-1.0/examples/simpleapi_example.cpp
  45. 57 0
      ext/prometheus-cpp-lite-1.0/examples/simpleapi_use_in_class_example.cpp
  46. 59 0
      ext/prometheus-cpp-lite-1.0/examples/use_benchmark_in_class_example.cpp
  47. 79 0
      ext/prometheus-cpp-lite-1.0/examples/use_counters_in_class_example.cpp
  48. 79 0
      ext/prometheus-cpp-lite-1.0/examples/use_gauge_in_class_example.cpp
  49. 7 0
      ext/prometheus-cpp-lite-1.0/simpleapi/CMakeLists.txt
  50. 156 0
      ext/prometheus-cpp-lite-1.0/simpleapi/include/prometheus/simpleapi.h
  51. 13 0
      ext/prometheus-cpp-lite-1.0/simpleapi/src/simpleapi.cpp
  52. 1 1
      make-bsd.mk
  53. 1 1
      make-linux.mk
  54. 1 1
      make-mac.mk
  55. 75 0
      node/Metrics.cpp
  56. 57 0
      node/Metrics.hpp
  57. 1 0
      objects.mk
  58. 8 2
      osdep/Phy.hpp
  59. 26 4
      service/OneService.cpp
  60. 8 6
      windows/ZeroTierOne/ZeroTierOne.vcxproj
  61. 6 0
      windows/ZeroTierOne/ZeroTierOne.vcxproj.filters
  62. 1 1
      zeroidc/Cargo.toml

+ 13 - 1
controller/ConnectionPool.hpp

@@ -19,6 +19,8 @@
 	#define _DEBUG(x)
 #endif
 
+#include "../node/Metrics.hpp"
+
 #include <deque>
 #include <set>
 #include <memory>
@@ -61,6 +63,7 @@ public:
     {
         while(m_pool.size() < m_minPoolSize){
             m_pool.push_back(m_factory->create());
+            Metrics::pool_avail++;
         }
     };
 
@@ -91,6 +94,7 @@ public:
         while((m_pool.size() + m_borrowed.size()) < m_minPoolSize) {
             std::shared_ptr<Connection> conn = m_factory->create();
             m_pool.push_back(conn);
+            Metrics::pool_avail++;
         }
 
         if(m_pool.size()==0){
@@ -99,8 +103,10 @@ public:
                 try {
                     std::shared_ptr<Connection> conn = m_factory->create();
                     m_borrowed.insert(conn);
+                    Metrics::pool_in_use++;
                     return std::static_pointer_cast<T>(conn);
                 } catch (std::exception &e) {
+                    Metrics::pool_errors++;
                     throw ConnectionUnavailable();
                 }
             } else {
@@ -116,11 +122,13 @@ public:
                             return std::static_pointer_cast<T>(conn);
                         } catch(std::exception& e) {
                             // Error creating a replacement connection
+                            Metrics::pool_errors++;
                             throw ConnectionUnavailable();
                         }
                     }
                 }
                 // Nothing available
+                Metrics::pool_errors++;
                 throw ConnectionUnavailable();
             }
         }
@@ -128,8 +136,10 @@ public:
         // Take one off the front
         std::shared_ptr<Connection> conn = m_pool.front();
         m_pool.pop_front();
+        Metrics::pool_avail--;
         // Add it to the borrowed list
         m_borrowed.insert(conn);
+        Metrics::pool_in_use++;
         return std::static_pointer_cast<T>(conn);
     };
 
@@ -143,7 +153,9 @@ public:
         // Lock
         std::unique_lock<std::mutex> lock(m_poolMutex);
         m_borrowed.erase(conn);
+        Metrics::pool_in_use--;
         if ((m_pool.size() + m_borrowed.size()) < m_maxPoolSize) {
+            Metrics::pool_avail++;
             m_pool.push_back(conn);
         }
     };
@@ -158,4 +170,4 @@ protected:
 
 }
 
-#endif
+#endif

+ 40 - 5
controller/DB.cpp

@@ -13,6 +13,7 @@
 
 #include "DB.hpp"
 #include "EmbeddedNetworkController.hpp"
+#include "../node/Metrics.hpp"
 
 #include <chrono>
 #include <algorithm>
@@ -211,16 +212,19 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 			{
 				std::lock_guard<std::mutex> l(_networks_l);
 				auto nw2 = _networks.find(networkId);
-				if (nw2 != _networks.end())
+				if (nw2 != _networks.end()) {
 					nw = nw2->second;
+				}
 			}
 			if (nw) {
 				std::lock_guard<std::mutex> l(nw->lock);
-				if (OSUtils::jsonBool(old["activeBridge"],false))
+				if (OSUtils::jsonBool(old["activeBridge"],false)) {
 					nw->activeBridgeMembers.erase(memberId);
+				}
 				wasAuth = OSUtils::jsonBool(old["authorized"],false);
-				if (wasAuth)
+				if (wasAuth) {
 					nw->authorizedMembers.erase(memberId);
+				}
 				json &ips = old["ipAssignments"];
 				if (ips.is_array()) {
 					for(unsigned long i=0;i<ips.size();++i) {
@@ -255,11 +259,14 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 
 			nw->members[memberId] = memberConfig;
 
-			if (OSUtils::jsonBool(memberConfig["activeBridge"],false))
+			if (OSUtils::jsonBool(memberConfig["activeBridge"],false)) {
 				nw->activeBridgeMembers.insert(memberId);
+			}
 			isAuth = OSUtils::jsonBool(memberConfig["authorized"],false);
-			if (isAuth)
+			if (isAuth) {
+				Metrics::member_auths++;
 				nw->authorizedMembers.insert(memberId);
+			}
 			json &ips = memberConfig["ipAssignments"];
 			if (ips.is_array()) {
 				for(unsigned long i=0;i<ips.size();++i) {
@@ -303,6 +310,24 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 		}
 	}
 
+	if (notifyListeners) {
+		if(networkId != 0 && memberId != 0 && old.is_object() && !memberConfig.is_object()) {
+			// member delete
+			Metrics::member_count--;
+		} else if (networkId != 0 && memberId != 0 && !old.is_object() && memberConfig.is_object()) {
+			// new member
+			Metrics::member_count++;
+		}
+
+		if (!wasAuth && isAuth) {
+			Metrics::member_auths++;
+		} else if (wasAuth && !isAuth) {
+			Metrics::member_deauths++;
+		} else {
+			Metrics::member_changes++;
+		}
+	}
+
 	if ((notifyListeners)&&((wasAuth)&&(!isAuth)&&(networkId)&&(memberId))) {
 		std::lock_guard<std::mutex> ll(_changeListeners_l);
 		for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
@@ -313,6 +338,16 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 
 void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners)
 {
+	if (notifyListeners) {
+		if (old.is_object() && old.contains("id") && networkConfig.is_object() && networkConfig.contains("id")) {
+			Metrics::network_changes++;
+		} else if (!old.is_object() && networkConfig.is_object() && networkConfig.contains("id")) {
+			Metrics::network_count++;
+		} else if (old.is_object() && old.contains("id") && !networkConfig.is_object()) {
+			Metrics::network_count--;
+		}
+	}
+
 	if (networkConfig.is_object()) {
 		const std::string ids = networkConfig["id"];
 		const uint64_t networkId = Utils::hexStrToU64(ids.c_str());

+ 2 - 0
controller/DB.hpp

@@ -35,6 +35,8 @@
 
 #include <nlohmann/json.hpp>
 
+#include <prometheus/simpleapi.h>
+
 #define ZT_MEMBER_AUTH_TIMEOUT_NOTIFY_BEFORE 25000
 
 namespace ZeroTier

+ 8 - 2
controller/FileDB.cpp

@@ -13,6 +13,8 @@
 
 #include "FileDB.hpp"
 
+#include "../node/Metrics.hpp"
+
 namespace ZeroTier
 {
 
@@ -39,6 +41,7 @@ FileDB::FileDB(const char *path) :
 				if (nwids.length() == 16) {
 					nlohmann::json nullJson;
 					_networkChanged(nullJson,network,false);
+					Metrics::network_count++;
 					std::string membersPath(_networksPath + ZT_PATH_SEPARATOR_S + nwids + ZT_PATH_SEPARATOR_S "member");
 					std::vector<std::string> members(OSUtils::listDirectory(membersPath.c_str(),false));
 					for(auto m=members.begin();m!=members.end();++m) {
@@ -50,6 +53,7 @@ FileDB::FileDB(const char *path) :
 								if (addrs.length() == 10) {
 									nlohmann::json nullJson2;
 									_memberChanged(nullJson2,member,false);
+									Metrics::member_count++;
 								}
 							} catch ( ... ) {}
 						}
@@ -88,8 +92,9 @@ bool FileDB::save(nlohmann::json &record,bool notifyListeners)
 				if ((!old.is_object())||(!_compareRecords(old,record))) {
 					record["revision"] = OSUtils::jsonInt(record["revision"],0ULL) + 1ULL;
 					OSUtils::ztsnprintf(p1,sizeof(p1),"%s" ZT_PATH_SEPARATOR_S "%.16llx.json",_networksPath.c_str(),nwid);
-					if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1)))
+					if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1))) {
 						fprintf(stderr,"WARNING: controller unable to write to path: %s" ZT_EOL_S,p1);
+					}
 					_networkChanged(old,record,notifyListeners);
 					modified = true;
 				}
@@ -110,8 +115,9 @@ bool FileDB::save(nlohmann::json &record,bool notifyListeners)
 						OSUtils::ztsnprintf(p2,sizeof(p2),"%s" ZT_PATH_SEPARATOR_S "%.16llx",_networksPath.c_str(),(unsigned long long)nwid);
 						OSUtils::mkdir(p2);
 						OSUtils::mkdir(pb);
-						if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1)))
+						if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1))) {
 							fprintf(stderr,"WARNING: controller unable to write to path: %s" ZT_EOL_S,p1);
+						}
 					}
 					_memberChanged(old,record,notifyListeners);
 					modified = true;

+ 8 - 1
controller/PostgreSQL.cpp

@@ -119,6 +119,7 @@ MemberNotificationReceiver::MemberNotificationReceiver(PostgreSQL *p, pqxx::conn
 
 void MemberNotificationReceiver::operator() (const std::string &payload, int packend_pid) {
 	fprintf(stderr, "Member Notification received: %s\n", payload.c_str());
+	Metrics::pgsql_mem_notification++;
 	json tmp(json::parse(payload));
 	json &ov = tmp["old_val"];
 	json &nv = tmp["new_val"];
@@ -141,6 +142,7 @@ NetworkNotificationReceiver::NetworkNotificationReceiver(PostgreSQL *p, pqxx::co
 
 void NetworkNotificationReceiver::operator() (const std::string &payload, int packend_pid) {
 	fprintf(stderr, "Network Notification received: %s\n", payload.c_str());
+	Metrics::pgsql_net_notification++;
 	json tmp(json::parse(payload));
 	json &ov = tmp["old_val"];
 	json &nv = tmp["new_val"];
@@ -705,6 +707,8 @@ void PostgreSQL::initializeNetworks()
 				}
 			}
 
+			Metrics::network_count++;
+
 		 	_networkChanged(empty, config, false);
 
 			auto end = std::chrono::high_resolution_clock::now();
@@ -925,6 +929,8 @@ void PostgreSQL::initializeMembers()
 				}
 			}
 
+			Metrics::member_count++;
+
 			_memberChanged(empty, config, false);
 
 			memberId = "";
@@ -1034,7 +1040,6 @@ void PostgreSQL::heartbeat()
 				w.commit();
 			} catch (std::exception &e) {
 				fprintf(stderr, "%s: Heartbeat update failed: %s\n", controllerId, e.what());
-				w.abort();
 				_pool->unborrow(c);
 				std::this_thread::sleep_for(std::chrono::milliseconds(1000));
 				continue;
@@ -1230,6 +1235,7 @@ void PostgreSQL::_networksWatcher_Redis() {
 						}
 						lastID = id;
 					}
+					Metrics::redis_net_notification++;
 				}
 			}
 		} catch (sw::redis::Error &e) {
@@ -1788,6 +1794,7 @@ uint64_t PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &con
 			.sadd("network-nodes-all:{"+controllerId+"}:"+networkId, memberId)
 			.hmset("member:{"+controllerId+"}:"+networkId+":"+memberId, record.begin(), record.end());