feat(database): add completely new analytics backend

This commit is contained in:
David Bailey 2025-03-27 23:33:27 +01:00
parent 31080cae2b
commit 5e2f0a7185
7 changed files with 446 additions and 130 deletions

View file

@ -55,132 +55,25 @@ CREATE TABLE dev_post_markdown (
FULLTEXT(post_markdown)
);
CREATE TABLE path_access_counts (
access_time DATETIME NOT NULL,
host VARCHAR(64) NOT NULL,
CREATE TABLE analytics_summations (
time_bucket DATETIME NOT NULL,
metric VARCHAR(16) NOT NULL,
tags JSON NOT NULL,
post_path VARCHAR(255),
agent VARCHAR(255),
referrer VARCHAR(255),
path_access_count INTEGER DEFAULT 0,
path_processing_time DOUBLE PRECISION DEFAULT 0,
metric_value DOUBLE PRECISION DEFAULT 0,
PRIMARY KEY(access_time, host, post_path, agent, referrer)
tags_md5 CHAR(32) AS (MD5(tags)),
INDEX(time_bucket, metric),
CONSTRAINT unique_analytic UNIQUE(time_bucket, metric, tags_md5)
);
CREATE TABLE path_errcodes (
access_timestamp DATETIME NOT NULL,
host VARCHAR(64) NOT NULL,
CREATE TABLE analytics_events (
event_time DATETIME NOT NULL,
metric VARCHAR(64) NOT NULL DEFAULT 'error_msg',
tags JSON NOT NULL,
post_path VARCHAR(255),
agent VARCHAR(255),
referrer VARCHAR(255),
error VARCHAR(1024),
);
event_text TEXT,
CREATE TABLE feed_cache (
host VARCHAR(64) NOT NULL,
search_path VARCHAR(255),
export_type VARCHAR(255),
feed_created_on DATETIME DEFAULT CURRENT_TIMESTAMP,
feed_content MEDIUMTEXT,
PRIMARY KEY(host, search_path, export_type)
);
INSERT INTO posts (post_path, post_path_depth, post_metadata, post_content)
VALUES (
'/about',
0,
'
{
"tags": ["test", "test2", "hellorld"],
"brief": "This is a simple test indeed",
"type": "text/markdown",
"title": "About the dergen"
}
',
'
# About the dergs indeed
This is just a simple test. Might be nice, though!
'
), (
'/about/neira',
1,
'
{
"tags": ["test", "test2", "hellorld", "neira"],
"brief": "This is a soft grab of Neira",
"type": "text/markdown",
"title": "About her"
}
',
'
# Nothing here yet!
Sorry for this. She is working hard :>
'
), (
'/about/xasin',
1,
'
{
"tags": ["test", "test2", "hellorld", "xasin"],
"brief": "This is a soft grab of Xasin",
"type": "text/markdown",
"title": "About her"
}
',
'
# Nothing here yet!
Sorry for this. He is working hard :>
'
), (
'/about/mesh',
1,
'
{
"tags": ["test", "test2", "hellorld", "mesh"],
"brief": "This is a soft grab of Mesh",
"type": "text/markdown",
"title": "About her"
}
',
'
# Nothing here yet!
Sorry for this. Shi is working hard :>
'
), (
'/about/alviere',
1,
'
{
"tags": ["test", "test2", "hellorld", "mesh"],
"brief": "SHE GRABS",
"type": "text/markdown",
"title": "SHE GRABS"
}
',
'
# Nothing here yet!
Sorry for this. She GRABS A LOT
----
## And now, for the lorem:
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Eleifend mi in nulla posuere sollicitudin aliquam ultrices sagittis orci. Risus commodo viverra maecenas accumsan lacus vel facilisis. Sed viverra tellus in hac habitasse. Nulla malesuada pellentesque elit eget gravida cum. Posuere sollicitudin aliquam ultrices sagittis orci a. Libero nunc consequat interdum varius sit amet. Bibendum arcu vitae elementum curabitur vitae nunc sed velit. Amet mauris commodo quis imperdiet massa tincidunt nunc pulvinar. Sed adipiscing diam donec adipiscing. Laoreet id donec ultrices tincidunt arcu non sodales. Id semper risus in hendrerit gravida rutrum quisque non. Ut venenatis tellus in metus vulputate eu.
Risus sed vulputate odio ut enim blandit volutpat. Placerat in egestas erat imperdiet. Non curabitur gravida arcu ac tortor dignissim convallis aenean. Neque aliquam vestibulum morbi blandit cursus risus at. Elementum integer enim neque volutpat ac tincidunt vitae semper. Eu ultrices vitae auctor eu augue ut. In mollis nunc sed id semper risus in hendrerit gravida. Lectus arcu bibendum at varius vel pharetra vel turpis nunc. In pellentesque massa placerat duis. Non quam lacus suspendisse faucibus. Vitae aliquet nec ullamcorper sit amet risus nullam. Accumsan lacus vel facilisis volutpat est velit egestas dui.
Risus feugiat in ante metus dictum at tempor commodo. Duis ut diam quam nulla. Nunc aliquet bibendum enim facilisis gravida neque convallis. Tincidunt augue interdum velit euismod in pellentesque. Praesent semper feugiat nibh sed pulvinar proin gravida hendrerit lectus. Non odio euismod lacinia at quis risus sed vulputate odio. Nunc sed blandit libero volutpat sed cras ornare arcu. Adipiscing enim eu turpis egestas pretium aenean pharetra magna. Ut tristique et egestas quis ipsum suspendisse. Blandit cursus risus at ultrices mi tempus imperdiet nulla malesuada.
'
INDEX(event_time)
);

View file

@ -1,16 +1,20 @@
<?php
interface AnalyticsInterface {
public function log_path_access($path,
public function get_current_timestamp();
public function increment_counter($tags, $counter, $value = 1, $timestamp = null);
public function log_path_access(
$path,
$agent,
$time,
$referrer);
$referrer, $runtime);
public function log_path_errcode(
$path,
$agent,
$referrer,
$code);
$code, $message);
public function pop_analytics($delete = true);
}
?>

View file

@ -0,0 +1,334 @@
<?php
require_once 'analytics_interface.php';
class MySQLAnalyticsHandler
implements AnalyticsInterface {
private $sql_connection;
private $hostname;
function __construct($sql_connection, $hostname) {
$this->sql_connection = $sql_connection;
$this->hostname = $hostname;
}
private function _exec($qery, $argtypes = '', ...$args) {
$stmt = $this->sql_connection->prepare($qery);
if($argtypes != ""){
$stmt->bind_param($argtypes, ...$args);
}
$stmt->execute();
return $stmt->get_result();
}
public function get_current_timestamp() {
return (int)($this->_exec(
"SELECT unix_timestamp(NOW()) AS ctime"
)->fetch_assoc()['ctime']);
}
public function increment_counter($tags, $counter, $value = 1, $timestamp = null) {
$timestamp ??= $this->get_current_timestamp();
$qry =
"INSERT INTO analytics_summations
(
time_bucket,
metric,
tags,
metric_value
)
VALUES
(
from_unixtime(floor(? / 300) * 300),
?,
?,
?
) AS new
ON DUPLICATE KEY
UPDATE metric_value=analytics_summations.metric_value + new.metric_value;
";
$this->_exec($qry,
"dssd",
$timestamp, $counter, json_encode($tags), $value);
}
public function insert_event($event_tags, $event_text) {
$qry =
"INSERT INTO analytics_events (
event_time, tags, event_text
)
VALUES (NOW(), ?, ?)";
$this->_exec($qry, "ss",
json_encode($event_tags), $event_text);
}
public function log_path_access(
$path,
$agent,
$referrer,
$time) {
if(strlen($path) == 0) {
$path = '/';
}
$this->increment_counter([
'host' => $this->hostname,
'path' => $path,
'agent' => $agent,
'referrer' => $referrer,
], 'access_sum');
$this->increment_counter([
'host' => $this->hostname,
'path' => $path
], 'runtime', $time);
}
public function log_path_errcode(
$path, $code, $message) {
$this->insert_event([
'host' => $this->hostname,
'path' => $path,
'code' => $code
], $message);
}
public function generate_lp_line($table, $tags, $values, $timestamp) {
$out_str = $table;
$line_tags = [];
foreach($tags AS $tag_key => $tag_value) {
if(!preg_match('/^[\w_]+$/', $tag_key)) {
throw new Exception('Invalid line tag key (' . $tag_key . ')!');
}
$tag_value = preg_replace('/([,=\s])/', '\\\\$0', $tag_value);
$line_tags []= $tag_key . '=' . $tag_value;
}
$line_values = [];
foreach($values AS $tag_key => $tag_value) {
if(!preg_match('/^[\w_]+$/', $tag_key)) {
throw new Exception('Invalid line value key (' . $tag_key . ')!');
}
if(gettype($tag_value) == 'string') {
$tag_value = preg_replace('/(["\])/', '\\\\$0', $tag_value);
$tag_value = preg_replace('/\n/', '\\\\n', $tag_value);
$tag_value = '"' . $tag_value . '"';
}
elseif (gettype($tag_value) == 'integer') {
$tag_value = $tag_value . 'i';
}
$line_values []= $tag_key . '=' . $tag_value;
}
return $table
. ',' . implode(',', $line_tags)
. ' ' . implode(',', $line_values)
. ' ' . $timestamp;
}
public function pop_analytics($delete = true) {
$this->sql_connection->begin_transaction();
try {
$barrier_time = $this->_exec("SELECT NOW() - INTERVAL 6 MINUTE AS ctime")->fetch_assoc()['ctime'];
$result = $this->_exec("
SELECT *
FROM analytics_summations
WHERE time_bucket < ?
ORDER BY metric, time_bucket DESC", "s", $barrier_time);
$data_category = "access_metrics";
$row = $result->fetch_assoc();
$out_str = '';
while(isset($row)) {
$row_tags = json_decode($row['tags']);
$row_value = $row['metric_value'];
$row_metric = $row['metric'];
$out_str .= $this->generate_lp_line($data_category, $row_tags, [
$row_metric => $row_value
], strtotime($row['time_bucket']) . "000000000") . "\n";
$row = $result->fetch_assoc();
}
$result = $this->_exec("
SELECT *
FROM analytics_events
WHERE event_time < ?
ORDER BY event_time DESC", "s", $barrier_time);
while(isset($row)) {
$row_tags = json_decode($row['tags']);
$row_value = $row['event_text'];
$row_metric = $row['metric'];
$out_str .= $this->generate_lp_line($data_category, $row_tags, [
$row_metric => $row_value
], strtotime($row['time_bucket']) . "000000000") . "\n";
$row = $result->fetch_assoc();
}
if($delete) {
$this->_exec("DELETE FROM analytics_summations WHERE time_bucket <= ?", "s", $barrier_time);
}
$this->sql_connection->commit();
return $out_str;
} catch (\Throwable $th) {
$this->sql_connection->rollback();
throw $th;
}
}
public function pop_analytics_json($delete = true) {
$this->sql_connection->begin_transaction();
try {
$barrier_time = $this->_exec("SELECT NOW() - INTERVAL 6 MINUTE AS ctime")->fetch_assoc()['ctime'];
$out_data = [];
$result = $this->_exec("
SELECT *
FROM analytics_summations
WHERE time_bucket < ?
ORDER BY metric, time_bucket DESC", "s", $barrier_time);
$row = $result->fetch_assoc();
$current_metric_collection = [];
$current_time_bucket_collection = [];
$current_metric = $row['metric'] ?? null;
$current_time_bucket = $row['time_bucket'] ?? null;
while(isset($row)) {
$current_time_bucket_collection[]= [
'tags' => json_decode($row['tags']),
'value' => floatval($row['metric_value'])
];
$row = $result->fetch_assoc();
if(!isset($row)
OR ($row['time_bucket'] != $current_time_bucket)
OR ($row['metric'] != $current_metric)) {
$current_metric_collection []= [
'time' => $current_time_bucket,
'data' => $current_time_bucket_collection
];
$current_time_bucket_collection = [];
$current_time_bucket = $row['time_bucket'] ?? null;
}
if(!isset($row) OR ($row['metric'] != $current_metric)) {
$out_data []= [
'metric' => $current_metric,
'data' => $current_metric_collection
];
$current_metric_collection = [];
$current_metric = $row['metric'] ?? null;
}
}
if($delete) {
$this->_exec("DELETE FROM analytics_summations WHERE time_bucket <= ?", "s", $barrier_time);
}
$this->sql_connection->commit();
return json_encode($out_data);
} catch (\Throwable $th) {
$this->sql_connection->rollback();
throw $th;
}
}
public function pop_analytics_old($delete = true) {
$this->sql_connection->begin_transaction();
$out_data = "";
try {
$barrier_time = $this->_exec("SELECT NOW() - INTERVAL 6 MINUTE AS ctime")->fetch_assoc()['ctime'];
$data = $this->_exec("
SELECT *
FROM analytics_access_sums
WHERE time_bucket < ?
", "s", $barrier_time)->fetch_all(MYSQLI_ASSOC);
$data_prefix="analytics_access_sums";
foreach($data AS $post_data) {
$path = $post_data['request_path'];
if($path == '') {
$path = '/';
}
$out_data .= $data_prefix . ",host=" . $post_data['host'] . ",agent=".$post_data['agent'];
$out_data .= ",path=".$path.",referrer=".$post_data['referrer'];
$out_data .= " access_sum=" . $post_data['access_sum'];
$out_data .= " " . strtotime($post_data['time_bucket']) . "000000000\n";
}
$data = $this->_exec("
SELECT *
FROM analytics_processing_time_sums
WHERE time_bucket < ?
", "s", $barrier_time)->fetch_all(MYSQLI_ASSOC);
$data_prefix="analytics_processing_time_sums";
foreach($data AS $post_data) {
$path = $post_data['request_path'];
if($path == '') {
$path = '/';
}
$out_data .= $data_prefix . ",host=" . $post_data['host'];
$out_data .= ",path=".$path;
$out_data .= " time_sum=" . $post_data['time_sum'];
$out_data .= " " . strtotime($post_data['time_bucket']) . "000000000\n";
}
if($delete) {
$this->_exec("DELETE FROM analytics_access_sums WHERE time_bucket <= ?", "s", $barrier_time);
$this->_exec("DELETE FROM analytics_processing_time_sums WHERE time_bucket <= ?", "s", $barrier_time);
}
$this->sql_connection->commit();
return $out_data;
} catch (\Throwable $th) {
$this->sql_connection->rollback();
throw $th;
}
}
}
?>

View file

@ -19,6 +19,8 @@ parse_str($REQUEST_URI['query'] ?? '', $REQUEST_QUERY);
require_once 'setup/permissions.php';
require_once 'setup/analytics.php';
if(preg_match('/^\/api/', $REQUEST_PATH)) {
require_once 'serve/api.php';
}

View file

@ -23,6 +23,20 @@ switch($API_FUNCTION) {
echo json_encode($sql_adapter->get_postdata($match[2]));
break;
case 'metrics':
// TODO Change this to a "can access metrics", but whatever :>
if(!access_can_upload()) {
http_response_code(401);
echo json_encode([
'status' => '401 Unauthorized'
]);
die();
}
echo $analytics_adapter->pop_analytics($delete = true);
break;
case 'upload':
if(!access_can_upload()) {
http_response_code(401);
@ -81,8 +95,6 @@ switch($API_FUNCTION) {
}
$post_data['tags'] ??= [];
$post_data['tags'] []= "type:" . ($post_data['type'] ?? Post::deduce_type($file_path));
$post_data['tags'] []= "path:" . $file_path;
$sql_adapter->set_postdata($post_data);
}

View file

@ -0,0 +1,66 @@
<?php
$data_time_start = microtime(true);
$analytics_enable_tail = false;
$analytics_post = null;
function deduce_user_agent() {
$real_agent=$_SERVER['HTTP_USER_AGENT'];
if(preg_match('/(Googlebot|\w*Google\w*)/', $real_agent, $match)) {
return "bot/google/" . $match[1];
}
elseif(preg_match('/(Mozilla|Chrome|Chromium)/', $real_agent, $match)) {
return "user/" . $match[1];
}
else {
return "unidentified";
}
}
function analytics_is_user() {
return preg_match('/^user/', deduce_user_agent());
}
register_shutdown_function(function() {
$data_end_time = microtime(true);
global $data_time_start;
global $analytics_adapter;
global $REQUEST_PATH;
global $REQUEST_QUERY;
global $analytics_enable_tail;
$data_time_end = microtime(true);
$http_referer = 'magic';
if(isset($_SERVER['HTTP_REFERER'])) {
$http_referer = parse_url($_SERVER['HTTP_REFERER'], PHP_URL_HOST);
}
$referrer = $REQUEST_QUERY['referer'] ?? $REQUEST_QUERY['ref'] ?? $http_referer;
$compute_time = $data_time_end - $data_time_start;
$analytics_adapter->log_path_access($REQUEST_PATH,
deduce_user_agent(),
$referrer,
$compute_time);
if($analytics_enable_tail) {
echo "<!-- Total page time was: " . $compute_time . " -->";
}
if(isset($analytics_post)) {
$analytics_post->increment_counter("compute_time", $compute_time);
if(analytics_is_user()) {
$analytics_post->increment_counter("views");
}
}
});
?>

View file

@ -1,6 +1,7 @@
<?php
require_once 'db_handler/mysql_handler.php';
require_once 'db_handler/mysql_analytics_handler.php';
require_once 'db_handler/post_handler.php';
$db_params = $SITE_CONFIG['db'];
@ -28,6 +29,10 @@ try {
$sql_adapter = new MySQLHandler($db_connection,
$SITE_CONFIG['site_defaults']['uri_prefix'],
$db_params['prefix']);
$analytics_adapter = new MySQLAnalyticsHandler($db_connection,
$SITE_CONFIG['site_defaults']['uri_prefix']);
$adapter = new PostHandler($sql_adapter);
require_once 'dergdown.php';