feat: ✨ begin work on everything :P
This commit is contained in:
parent
03b77c7395
commit
dca5944549
6 changed files with 445 additions and 6 deletions
46
Gemfile.lock
Normal file
46
Gemfile.lock
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
PATH
|
||||||
|
remote: .
|
||||||
|
specs:
|
||||||
|
timeseries-hoarder (0.1.0)
|
||||||
|
influxparser (~> 0.0.5)
|
||||||
|
pg (~> 1.5)
|
||||||
|
|
||||||
|
GEM
|
||||||
|
remote: https://rubygems.org/
|
||||||
|
specs:
|
||||||
|
ast (2.4.2)
|
||||||
|
influxparser (0.0.5)
|
||||||
|
json (2.6.3)
|
||||||
|
parallel (1.22.1)
|
||||||
|
parser (3.2.2.0)
|
||||||
|
ast (~> 2.4.1)
|
||||||
|
pg (1.5.3)
|
||||||
|
rainbow (3.1.1)
|
||||||
|
rake (13.0.6)
|
||||||
|
regexp_parser (2.7.0)
|
||||||
|
rexml (3.2.5)
|
||||||
|
rubocop (1.50.1)
|
||||||
|
json (~> 2.3)
|
||||||
|
parallel (~> 1.10)
|
||||||
|
parser (>= 3.2.0.0)
|
||||||
|
rainbow (>= 2.2.2, < 4.0)
|
||||||
|
regexp_parser (>= 1.8, < 3.0)
|
||||||
|
rexml (>= 3.2.5, < 4.0)
|
||||||
|
rubocop-ast (>= 1.28.0, < 2.0)
|
||||||
|
ruby-progressbar (~> 1.7)
|
||||||
|
unicode-display_width (>= 2.4.0, < 3.0)
|
||||||
|
rubocop-ast (1.28.0)
|
||||||
|
parser (>= 3.2.1.0)
|
||||||
|
ruby-progressbar (1.13.0)
|
||||||
|
unicode-display_width (2.4.2)
|
||||||
|
|
||||||
|
PLATFORMS
|
||||||
|
x86_64-linux
|
||||||
|
|
||||||
|
DEPENDENCIES
|
||||||
|
rake (~> 13.0)
|
||||||
|
rubocop (~> 1.21)
|
||||||
|
timeseries-hoarder!
|
||||||
|
|
||||||
|
BUNDLED WITH
|
||||||
|
2.3.26
|
83
lib/timeseries/hoarder/CachingTable.rb
Normal file
83
lib/timeseries/hoarder/CachingTable.rb
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
|
||||||
|
require_relative 'Table.rb'
|
||||||
|
|
||||||
|
require 'time'
|
||||||
|
require 'json'
|
||||||
|
|
||||||
|
module Timeseries
|
||||||
|
module Hoarder
|
||||||
|
class CachingTable < Table
|
||||||
|
def initialize(db, name, content_name = 'tags', tag_access_update_delay: 60)
|
||||||
|
@content_name = content_name
|
||||||
|
@id_column = content_name + '_id'
|
||||||
|
|
||||||
|
super(db, name, 'ts_hoarder')
|
||||||
|
|
||||||
|
@known_tags = {}
|
||||||
|
@tag_access_times = {}
|
||||||
|
@tag_access_updates = {}
|
||||||
|
|
||||||
|
@tag_access_update_delay = tag_access_update_delay
|
||||||
|
end
|
||||||
|
|
||||||
|
def table_creation
|
||||||
|
@pg.exec("CREATE TABLE ts_hoarder.#{@table_name} ( #{@id_column} SERIAL PRIMARY KEY, #{@content_name} JSONB, created_at TIMESTAMPTZ, last_used TIMESTAMPTZ )")
|
||||||
|
|
||||||
|
@pg.exec("CREATE INDEX ON ts_hoarder.#{@table_name} USING GIN ( #{@content_name} )")
|
||||||
|
end
|
||||||
|
|
||||||
|
def load_cache_content
|
||||||
|
@pg.exec("SELECT * FROM ts_hoarder.#{@table_name}") do |results|
|
||||||
|
results.each do |tuple|
|
||||||
|
|
||||||
|
tags = JSON.parse(tuple[@content_name])
|
||||||
|
|
||||||
|
@known_tags[tags] = tuple[@id_column]
|
||||||
|
@tag_access_times[tags] = Time.parse(tuple['last_used'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
true
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_entry(tags)
|
||||||
|
return @known_tags[tags] if @known_tags.include? tags
|
||||||
|
|
||||||
|
returned_id = nil
|
||||||
|
|
||||||
|
@pg.transaction do
|
||||||
|
@pg.exec("LOCK TABLE ts_hoarder.#{@table_name}")
|
||||||
|
|
||||||
|
res = @pg.exec_params("SELECT * FROM ts_hoarder.#{@table_name} WHERE #{@content_name} = $1::jsonb", [tags.to_json])
|
||||||
|
|
||||||
|
if(res.num_tuples >= 1)
|
||||||
|
returned_id = res[0][@id_column]
|
||||||
|
@known_tags[tags] = returned_id
|
||||||
|
@tag_access_times[tags] = Time.parse(res[0]['last_used'])
|
||||||
|
else
|
||||||
|
res = @pg.exec_params("INSERT INTO ts_hoarder.#{@table_name} (#{@content_name}, created_at, last_used) VALUES ($1::jsonb, NOW(), NOW()) RETURNING #{@id_column}", [tags.to_json])
|
||||||
|
|
||||||
|
returned_id = res[0][@id_column]
|
||||||
|
@known_tags[tags] = returned_id
|
||||||
|
@tag_access_times[tags] = Time.now()
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
returned_id
|
||||||
|
end
|
||||||
|
|
||||||
|
def [](tags)
|
||||||
|
access_time = Time.now()
|
||||||
|
if(((@tag_access_times[tags] || Time.at(0)) - Time.now()) > @tag_access_update_delay)
|
||||||
|
@tag_access_times[tags] = access_time
|
||||||
|
@tag_access_updates[tags] = true
|
||||||
|
end
|
||||||
|
|
||||||
|
known_id = @known_tags[tags]
|
||||||
|
return known_id unless known_id.nil?
|
||||||
|
|
||||||
|
return create_entry(tags)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
42
lib/timeseries/hoarder/Table.rb
Normal file
42
lib/timeseries/hoarder/Table.rb
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
|
||||||
|
|
||||||
|
module Timeseries
|
||||||
|
module Hoarder
|
||||||
|
class Table
|
||||||
|
def initialize(db, table_name, table_schema = "public")
|
||||||
|
@table_name = table_name
|
||||||
|
@table_schema = table_schema
|
||||||
|
|
||||||
|
@db = db
|
||||||
|
@pg = @db.pg
|
||||||
|
|
||||||
|
@created = false
|
||||||
|
|
||||||
|
ensure_table_exists
|
||||||
|
end
|
||||||
|
|
||||||
|
def ensure_table_exists
|
||||||
|
return if @created
|
||||||
|
|
||||||
|
@pg.transaction do
|
||||||
|
@pg.exec("SELECT pg_advisory_lock(0)")
|
||||||
|
|
||||||
|
r = @pg.exec_params("SELECT 1 FROM information_schema.tables WHERE table_name = $1 AND table_schema = $2", [@table_name, @schema_name])
|
||||||
|
|
||||||
|
if r.num_tuples >= 1
|
||||||
|
@created = true
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
table_creation
|
||||||
|
|
||||||
|
@created = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def table_creation
|
||||||
|
raise "No table creation string method provided!"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
21
lib/timeseries/hoarder/TimeseriesDatabase.rb
Normal file
21
lib/timeseries/hoarder/TimeseriesDatabase.rb
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
require 'pg'
|
||||||
|
|
||||||
|
require_relative 'CachingTable.rb'
|
||||||
|
|
||||||
|
module Timeseries
|
||||||
|
module Hoarder
|
||||||
|
class Database
|
||||||
|
attr_reader :pg
|
||||||
|
attr_reader :data_sources
|
||||||
|
|
||||||
|
def initialize(pg)
|
||||||
|
@pg = pg
|
||||||
|
|
||||||
|
@pg.exec("CREATE SCHEMA IF NOT EXISTS ts_hoarder")
|
||||||
|
|
||||||
|
@data_sources = CachingTable.new(self, 'sources', 'source')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
247
telegraf_psql_ingestor.rb
Normal file
247
telegraf_psql_ingestor.rb
Normal file
|
@ -0,0 +1,247 @@
|
||||||
|
|
||||||
|
require 'pg'
|
||||||
|
require 'json'
|
||||||
|
require 'influxparser'
|
||||||
|
|
||||||
|
Process.setproctitle('telegraf-tsdb-ingest')
|
||||||
|
|
||||||
|
CONNECT_URL = ARGV[1] || "user=postgres dbname=ingestor_test"
|
||||||
|
SCHEMA = 'telegraf_ingest'
|
||||||
|
|
||||||
|
$pg = PG.connect(CONNECT_URL)
|
||||||
|
|
||||||
|
$known_tables = {}
|
||||||
|
|
||||||
|
$high_cardinality_tags = {
|
||||||
|
'process_name' => true,
|
||||||
|
'pid' => true
|
||||||
|
}
|
||||||
|
$source_tags = {
|
||||||
|
'host' => true,
|
||||||
|
'location' => true
|
||||||
|
}
|
||||||
|
|
||||||
|
def ensure_schema_exists(schema)
|
||||||
|
$pg.exec("CREATE SCHEMA IF NOT EXISTS #{schema}");
|
||||||
|
end
|
||||||
|
ensure_schema_exists(SCHEMA)
|
||||||
|
|
||||||
|
def grab_table_list()
|
||||||
|
$pg.exec("SELECT * FROM pg_catalog.pg_tables WHERE schemaname IN ('#{SCHEMA}', 'public');") do |result|
|
||||||
|
result.each do |tuple|
|
||||||
|
table = tuple['schemaname'] + '.' + tuple['tablename']
|
||||||
|
$known_tables[table] = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
grab_table_list
|
||||||
|
|
||||||
|
class DedupContainer
|
||||||
|
attr_reader :cache
|
||||||
|
|
||||||
|
def initialize(pg, dataname, datatype)
|
||||||
|
@pg = pg
|
||||||
|
|
||||||
|
@dataname = dataname
|
||||||
|
@datatype = datatype
|
||||||
|
|
||||||
|
@tablename = "#{SCHEMA}.#{@dataname}s"
|
||||||
|
@id_column = "#{@dataname}_id"
|
||||||
|
|
||||||
|
@cache = {}
|
||||||
|
|
||||||
|
setup_table unless $known_tables[@tablename]
|
||||||
|
end
|
||||||
|
|
||||||
|
def setup_table()
|
||||||
|
@pg.exec("CREATE TABLE #{@tablename} ( #{@id_column} SERIAL PRIMARY KEY, #{@dataname} #{@datatype} UNIQUE)")
|
||||||
|
@pg.exec("CREATE INDEX ON #{@tablename} #{@datatype == 'JSONB' ? 'USING GIN' : ''} (#{@dataname})")
|
||||||
|
end
|
||||||
|
|
||||||
|
def load_table()
|
||||||
|
@pg.exec("SELECT * FROM #{@tablename}") do |result|
|
||||||
|
result.each do |tuple|
|
||||||
|
@cache[tuple[@dataname]] = tuple[@id_column]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_key(key)
|
||||||
|
key_str = key
|
||||||
|
if @datatype == 'JSONB'
|
||||||
|
key_str = key.to_json
|
||||||
|
end
|
||||||
|
|
||||||
|
upsert_statement = "INSERT INTO #{@tablename}(#{@dataname}) VALUES ($1::#{@datatype}) RETURNING #{@id_column}"
|
||||||
|
|
||||||
|
id_res = @pg.exec_params("SELECT #{@id_column} FROM #{@tablename} WHERE #{@dataname} = $1::#{@datatype}", [key_str])
|
||||||
|
if(id_res.ntuples == 0)
|
||||||
|
id_res = @pg.exec_params(upsert_statement, [key_str]);
|
||||||
|
end
|
||||||
|
|
||||||
|
key_id = id_res[0][@id_column].to_i
|
||||||
|
|
||||||
|
@cache[key] = key_id
|
||||||
|
|
||||||
|
key_id
|
||||||
|
end
|
||||||
|
|
||||||
|
def [](key)
|
||||||
|
r = @cache[key]
|
||||||
|
return r unless r.nil?
|
||||||
|
|
||||||
|
add_key key
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class TimeseriesTable
|
||||||
|
attr_reader :internal_tablename
|
||||||
|
|
||||||
|
def initialize(pg, tablename)
|
||||||
|
@pg = pg
|
||||||
|
|
||||||
|
@tablename = tablename
|
||||||
|
@internal_tablename = "#{SCHEMA}._timeseries_#{tablename}"
|
||||||
|
|
||||||
|
@chunk_time_interval = '1d'
|
||||||
|
@compression_interval = '2d'
|
||||||
|
|
||||||
|
@retention_time = '6 months'
|
||||||
|
|
||||||
|
setup_tables unless $known_tables[@internal_tablename]
|
||||||
|
end
|
||||||
|
|
||||||
|
def setup_tables
|
||||||
|
@pg.exec <<-SQL
|
||||||
|
CREATE TABLE #{@internal_tablename} (
|
||||||
|
time TIMESTAMPTZ NOT NULL,
|
||||||
|
source_id INT NOT NULL,
|
||||||
|
tag_id INT NOT NULL,
|
||||||
|
metric_id INT NOT NULL,
|
||||||
|
high_cardinality_tags JSONB,
|
||||||
|
ts_value NUMERIC,
|
||||||
|
|
||||||
|
FOREIGN KEY (source_id) REFERENCES #{SCHEMA}.sources (source_id),
|
||||||
|
FOREIGN KEY (tag_id) REFERENCES #{SCHEMA}.tags (tag_id),
|
||||||
|
FOREIGN KEY (metric_id) REFERENCES #{SCHEMA}.metrics (metric_id)
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
@pg.exec "CREATE INDEX ON #{@internal_tablename} (metric_id, source_id, tag_id)"
|
||||||
|
@pg.exec "SELECT * FROM create_hypertable('#{@internal_tablename}', 'time', chunk_time_interval => INTERVAL '#{@chunk_time_interval}')"
|
||||||
|
|
||||||
|
@pg.exec "ALTER TABLE #{@internal_tablename} SET (timescaledb.compress, timescaledb.compress_segmentby = 'source_id, tag_id, metric_id, high_cardinality_tags')"
|
||||||
|
|
||||||
|
@pg.exec "SELECT * FROM add_compression_policy('#{@internal_tablename}', INTERVAL '#{@compression_interval}')"
|
||||||
|
@pg.exec "SELECT * FROM add_retention_policy('#{@internal_tablename}', INTERVAL '#{@retention_time}')"
|
||||||
|
|
||||||
|
@pg.exec <<-SQL
|
||||||
|
CREATE VIEW #{@tablename} AS (
|
||||||
|
SELECT time, source, tag, metric, high_cardinality_tags, ts_value
|
||||||
|
FROM #{@internal_tablename}
|
||||||
|
INNER JOIN #{SCHEMA}.sources USING (source_id)
|
||||||
|
INNER JOIN #{SCHEMA}.tags USING (tag_id)
|
||||||
|
INNER JOIN #{SCHEMA}.metrics USING (metric_id)
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
$known_sources = DedupContainer.new($pg, 'source', 'JSONB');
|
||||||
|
$known_metrics = DedupContainer.new($pg, 'metric', 'VARCHAR');
|
||||||
|
$known_tags = DedupContainer.new($pg, 'tag', 'JSONB');
|
||||||
|
|
||||||
|
$timeseries = {}
|
||||||
|
|
||||||
|
puts $known_tables
|
||||||
|
puts $known_sources
|
||||||
|
|
||||||
|
puts $known_sources[{"host" => "xnm-core.lucidragons.de"}]
|
||||||
|
|
||||||
|
def ingest_line(line)
|
||||||
|
begin
|
||||||
|
line = InfluxParser.parse_point(line)
|
||||||
|
rescue => e
|
||||||
|
STDERR.puts "Error in line protocol parsing: #{e}"
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
series = $timeseries[line['series']]
|
||||||
|
if(series.nil?)
|
||||||
|
series = TimeseriesTable.new($pg, line['series']);
|
||||||
|
$timeseries[line['series']] = series
|
||||||
|
end
|
||||||
|
|
||||||
|
line_source_tags = {}
|
||||||
|
line_series_tags = {}
|
||||||
|
line_high_cardinality_tags = {}
|
||||||
|
|
||||||
|
tags = line['tags']
|
||||||
|
values = line['values']
|
||||||
|
|
||||||
|
if tags.include? 'metric' and values.include? 'value'
|
||||||
|
metric = tags['metric']
|
||||||
|
values[tags['metric']] = values['value']
|
||||||
|
|
||||||
|
tags.delete 'metric'
|
||||||
|
values.delete 'value'
|
||||||
|
end
|
||||||
|
|
||||||
|
tags.each do |tag, tag_value|
|
||||||
|
if($source_tags[tag])
|
||||||
|
line_source_tags[tag] = tag_value
|
||||||
|
elsif($high_cardinality_tags[tag])
|
||||||
|
line_high_cardinality_tags[tag] = tag_value
|
||||||
|
else
|
||||||
|
line_series_tags[tag] = tag_value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
line_high_cardinality_tags = nil if line_high_cardinality_tags.empty?
|
||||||
|
|
||||||
|
timestamp = Time.at(line['timestamp'].to_f * 1e-9);
|
||||||
|
|
||||||
|
line_source_id = $known_sources[line_source_tags]
|
||||||
|
line_series_id = $known_tags[line_series_tags]
|
||||||
|
|
||||||
|
metric_ids_array = []
|
||||||
|
values_array = []
|
||||||
|
|
||||||
|
values.each do |metric, value|
|
||||||
|
next unless value.is_a? Numeric
|
||||||
|
|
||||||
|
metric_ids_array << $known_metrics[metric]
|
||||||
|
values_array << value
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "Inserting into #{series.internal_tablename}"
|
||||||
|
|
||||||
|
metric_ids_array = '{'+metric_ids_array.join(',')+'}'
|
||||||
|
values_array = '{'+values_array.join(',')+'}'
|
||||||
|
|
||||||
|
insert_statement = <<-SQL
|
||||||
|
INSERT INTO #{series.internal_tablename}
|
||||||
|
(time, source_id, tag_id, metric_id, ts_value, high_cardinality_tags)
|
||||||
|
VALUES ($1::timestamptz, $2::int, $3::int, unnest($4::int[]), unnest($5::numeric[]), $6::jsonb)
|
||||||
|
SQL
|
||||||
|
$pg.exec_params(insert_statement, [timestamp, line_source_id, line_series_id, metric_ids_array, values_array, line_high_cardinality_tags.to_json])
|
||||||
|
end
|
||||||
|
|
||||||
|
$stdin.sync = true
|
||||||
|
|
||||||
|
lines_queue = Queue.new
|
||||||
|
|
||||||
|
Thread.new do
|
||||||
|
loop do
|
||||||
|
lines_queue << STDIN.gets
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
loop do
|
||||||
|
sleep 10
|
||||||
|
next if lines_queue.empty?
|
||||||
|
|
||||||
|
$pg.transaction do
|
||||||
|
ingest_line(lines_queue.pop) until lines_queue.empty?
|
||||||
|
end
|
||||||
|
end
|
|
@ -8,16 +8,15 @@ Gem::Specification.new do |spec|
|
||||||
spec.authors = ["xaseiresh"]
|
spec.authors = ["xaseiresh"]
|
||||||
spec.email = ["davidbailey.2889@gmail.com"]
|
spec.email = ["davidbailey.2889@gmail.com"]
|
||||||
|
|
||||||
spec.summary = "TODO: Write a short summary, because RubyGems requires one."
|
spec.summary = "Quick&Dirty time series ingestor gem"
|
||||||
spec.description = "TODO: Write a longer description or delete this line."
|
spec.description = "Quickly, comfortably, reliably and flexibly ingest your Influx-Style time series into TimescaleDB"
|
||||||
spec.homepage = "TODO: Put your gem's website or public repo URL here."
|
spec.homepage = "https://forgejo.lucidragons.de/lucidergs/timeseries-hoarder"
|
||||||
spec.required_ruby_version = ">= 2.6.0"
|
spec.required_ruby_version = ">= 2.6.0"
|
||||||
|
|
||||||
spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
|
spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
|
||||||
|
|
||||||
spec.metadata["homepage_uri"] = spec.homepage
|
spec.metadata["homepage_uri"] = spec.homepage
|
||||||
spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
spec.metadata["source_code_uri"] = "https://forgejo.lucidragons.de/lucidergs/timeseries-hoarder"
|
||||||
spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
|
||||||
|
|
||||||
# Specify which files should be added to the gem when it is released.
|
# Specify which files should be added to the gem when it is released.
|
||||||
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
||||||
|
@ -31,7 +30,8 @@ Gem::Specification.new do |spec|
|
||||||
spec.require_paths = ["lib"]
|
spec.require_paths = ["lib"]
|
||||||
|
|
||||||
# Uncomment to register a new dependency of your gem
|
# Uncomment to register a new dependency of your gem
|
||||||
# spec.add_dependency "example-gem", "~> 1.0"
|
spec.add_dependency "pg", "~> 1.5"
|
||||||
|
spec.add_dependency "influxparser", "~> 0.0.5"
|
||||||
|
|
||||||
# For more information and examples about making a new gem, check out our
|
# For more information and examples about making a new gem, check out our
|
||||||
# guide at: https://bundler.io/guides/creating_gem.html
|
# guide at: https://bundler.io/guides/creating_gem.html
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue