diff options
author | DDuarte <dnpd.dd@gmail.com> | 2016-06-04 11:26:57 +0100 |
---|---|---|
committer | DDuarte <dnpd.dd@gmail.com> | 2016-06-04 11:26:57 +0100 |
commit | 8b26aea95a301af5a6af888df4fd3e7683e1ef62 (patch) | |
tree | f888cf7f3a668565f10f37376a259c7d1c69f1d6 /src/common/Metric/Metric.cpp | |
parent | 2c2bb4a2377b40af2fccd484b13993aa16ad6380 (diff) |
Implement real time statistic visualization (#16956)
Docs at https://trinitycore.atlassian.net/wiki/display/tc/Monitoring+a+TrinityCore+server
* Common/Graphs: Initial proof of concept
* Move influx db code to its own class
* Reuse the same socket
* Allow to log values of different categories
* Allow to log events
* Pass the timestamp to influxdb
* Send events in batches
* Send data async
* Log server shutdown.
Fix memory leak.
* Allow to enable/disable Stats in the settings and at runtime
* Read interval between each batch send from config
* Add InfluxDB connection info to configs
* Move each event category to its own table
* Log pathfinding queries
* Move categories table initialization to constructor using enum as key to avoid assigning the table name to the wrong enum value
* Log player login/logout events.
Pass the hostname correctly in the HTTP request.
* Fix linux build
* Handle "Connection: close" HTTP header, reconnecting on next scheduled send.
Disable StatsLogger if connection fails, logging the error.
* Add an enum for categories of logged values, it's still possible to pass a string instead of the enum.
* Don't log the whole batchedData when InfluxDB returns an error, it's too long and unreadable on console.
* Allow to call a function at a specified interval in thread-safe World::Update() context to log data like player count.
* Log map tile load/unload
* Core/StatsLogger: Allow logging more value types other than ints
https://docs.influxdata.com/influxdb/v0.10/write_protocols/write_syntax/
* Fix a typo in string escape of StatsLogger
* Yet more fixes to the escaping in FormatInfluxDBValue
* DB/Gameobject: Fix respawn time of few Quest GameObjects
By Tauriella, closes #16701
* DB/Misc: Fix some engrish
By tkrokli closes #16648
* Tools/MMaps: Add format library linking to mmaps_generator
(Very) partial cherry pick of ed75b0649add23e082976fa4e5d504bc0c312602
* Core/StatsLogger: Simplify code
Convert values and categories arrays to maps initialized in-place
Remove constructor and destructor
* Core/StatsLogger: Add realm name to the event and value tags
* Log amount of processed packet of each session
* Apply recent singleton changes to sStatsLogger too
* Fix influxdb data format if no realm name is present
* Remove unneeded newlines from request body, fixes response 400 from InfluxDB 0.10
* Rename Reporting folder to Metric
* Rename StatsLogger to Metric
* Rename InfluxDB configs to Metric
* Add Grafana dashboards
* Add a random annoying macro
* Move string formatting to Metric::SendBatch(), reducing performance footprint of Metric::LogEvent() and Metric::LogValue()
* Update grafana graphs refresing tags on load and showing now-15m data, refreshing every minute. These settings can be modified in grafana.
* Rename MetricData fields
* Contrib/Grafana: Rename dashboard files
* Contrib/Grafana: Replace hardcoded Windows/Ubuntu realm names by the default, Trinity
* Config/Worldserver: Add missing section to the index
* Contrib/Grafana: Add singlestat panels with current online players, update diff averages (1 min, 5 mins and 15 mins)
http://i.imgur.com/Zi8lfvS.png
* Core/Metric: Replace the enums MetricEventCategory and MetricValueCategory by strings
For the sake of simplicity and less recompile time when adding new metrics, similar to how TC_LOG_* works
* Contrib/Grafana: Display the current number of online players and not its average
Closes #15075
(cherry picked from commit 3ae10160820782d039c3449107960108fb3a63b9)
# Conflicts:
# src/server/game/Server/WorldSession.cpp
# src/server/game/World/World.cpp
# src/server/worldserver/Main.cpp
Diffstat (limited to 'src/common/Metric/Metric.cpp')
-rw-r--r-- | src/common/Metric/Metric.cpp | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/src/common/Metric/Metric.cpp b/src/common/Metric/Metric.cpp new file mode 100644 index 00000000000..9484cebcc72 --- /dev/null +++ b/src/common/Metric/Metric.cpp @@ -0,0 +1,235 @@ +/* +* Copyright (C) 2008-2016 TrinityCore <http://www.trinitycore.org/> +* +* This program is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License as published by the +* Free Software Foundation; either version 2 of the License, or (at your +* option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +* more details. +* +* You should have received a copy of the GNU General Public License along +* with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Metric.h" +#include "Log.h" +#include "Config.h" +#include "Util.h" + +void Metric::Initialize(std::string const& realmName, boost::asio::io_service& ioService, std::function<void()> overallStatusLogger) +{ + _realmName = realmName; + _batchTimer = Trinity::make_unique<boost::asio::deadline_timer>(ioService); + _overallStatusTimer = Trinity::make_unique<boost::asio::deadline_timer>(ioService); + _overallStatusLogger = overallStatusLogger; + LoadFromConfigs(); +} + +bool Metric::Connect() +{ + _dataStream.connect(_hostname, _port); + auto error = _dataStream.error(); + if (error) + { + TC_LOG_ERROR("metric", "Error connecting to '%s:%s', disabling Metric. Error message : %s", + _hostname.c_str(), _port.c_str(), error.message().c_str()); + _enabled = false; + return false; + } + _dataStream.clear(); + return true; +} + +void Metric::LoadFromConfigs() +{ + bool previousValue = _enabled; + _enabled = sConfigMgr->GetBoolDefault("Metric.Enable", false); + _updateInterval = sConfigMgr->GetIntDefault("Metric.Interval", 10); + if (_updateInterval < 1) + { + TC_LOG_ERROR("metric", "'Metric.Interval' config set to %d, overriding to 1.", _updateInterval); + _updateInterval = 1; + } + + _overallStatusTimerInterval = sConfigMgr->GetIntDefault("Metric.OverallStatusInterval", 1); + if (_overallStatusTimerInterval < 1) + { + TC_LOG_ERROR("metric", "'Metric.OverallStatusInterval' config set to %d, overriding to 1.", _overallStatusTimerInterval); + _overallStatusTimerInterval = 1; + } + + // Schedule a send at this point only if the config changed from Disabled to Enabled. + // Cancel any scheduled operation if the config changed from Enabled to Disabled. + if (_enabled && !previousValue) + { + std::string connectionInfo = sConfigMgr->GetStringDefault("Metric.ConnectionInfo", ""); + if (connectionInfo.empty()) + { + TC_LOG_ERROR("metric", "'Metric.ConnectionInfo' not specified in configuration file."); + return; + } + + Tokenizer tokens(connectionInfo, ';'); + if (tokens.size() != 3) + { + TC_LOG_ERROR("metric", "'Metric.ConnectionInfo' specified with wrong format in configuration file."); + return; + } + + _hostname.assign(tokens[0]); + _port.assign(tokens[1]); + _databaseName.assign(tokens[2]); + Connect(); + + ScheduleSend(); + ScheduleOverallStatusLog(); + } +} + +void Metric::Update() +{ + if (_overallStatusTimerTriggered) + { + _overallStatusTimerTriggered = false; + _overallStatusLogger(); + } +} + +void Metric::LogEvent(std::string const& category, std::string const& title, std::string const& description) +{ + using namespace std::chrono; + + MetricData* data = new MetricData; + data->Category = category; + data->Timestamp = system_clock::now(); + data->Type = METRIC_DATA_EVENT; + data->Title = title; + data->Text = description; + + _queuedData.Enqueue(data); +} + +void Metric::SendBatch() +{ + using namespace std::chrono; + + std::stringstream batchedData; + MetricData* data; + bool firstLoop = true; + while (_queuedData.Dequeue(data)) + { + if (!firstLoop) + batchedData << "\n"; + + batchedData << data->Category; + if (!_realmName.empty()) + batchedData << ",realm=" << _realmName; + + batchedData << " "; + + switch (data->Type) + { + case METRIC_DATA_VALUE: + batchedData << "value=" << data->Value; + break; + case METRIC_DATA_EVENT: + batchedData << "title=\"" << data->Title << "\",text=\"" << data->Text << "\""; + break; + } + + batchedData << " "; + + batchedData << std::to_string(duration_cast<nanoseconds>(data->Timestamp.time_since_epoch()).count()); + + firstLoop = false; + delete data; + } + + // Check if there's any data to send + if (batchedData.tellp() == std::streampos(0)) + { + ScheduleSend(); + return; + } + + if (!_dataStream.good() && !Connect()) + return; + + _dataStream << "POST " << "/write?db=" << _databaseName << " HTTP/1.1\r\n"; + _dataStream << "Host: " << _hostname << ":" << _port << "\r\n"; + _dataStream << "Accept: */*\r\n"; + _dataStream << "Content-Type: application/octet-stream\r\n"; + _dataStream << "Content-Transfer-Encoding: binary\r\n"; + + _dataStream << "Content-Length: " << std::to_string(batchedData.tellp()) << "\r\n\r\n"; + _dataStream << batchedData.rdbuf(); + + std::string http_version; + _dataStream >> http_version; + unsigned int status_code = 0; + _dataStream >> status_code; + if (status_code != 204) + { + TC_LOG_ERROR("metric", "Error sending data, returned HTTP code: %u", status_code); + } + + // Read and ignore the status description + std::string status_description; + std::getline(_dataStream, status_description); + // Read headers + std::string header; + while (std::getline(_dataStream, header) && header != "\r") + { + if (header == "Connection: close\r") + _dataStream.close(); + } + + ScheduleSend(); +} + +void Metric::ScheduleSend() +{ + if (_enabled) + { + _batchTimer->expires_from_now(boost::posix_time::seconds(_updateInterval)); + _batchTimer->async_wait(std::bind(&Metric::SendBatch, this)); + } + else + { + _dataStream.close(); + MetricData* data; + // Clear the queue + while (_queuedData.Dequeue(data)) + ; + } +} + +void Metric::ForceSend() +{ + // Send what's queued only if io_service is stopped (so only on shutdown) + if (_enabled && _batchTimer->get_io_service().stopped()) + SendBatch(); +} + +void Metric::ScheduleOverallStatusLog() +{ + if (_enabled) + { + _overallStatusTimer->expires_from_now(boost::posix_time::seconds(_overallStatusTimerInterval)); + _overallStatusTimer->async_wait([this](const boost::system::error_code&) + { + _overallStatusTimerTriggered = true; + ScheduleOverallStatusLog(); + }); + } +} + +Metric* Metric::instance() +{ + static Metric instance; + return &instance; +} |