migrants/map/step-01.rb
Daniel Hernandez da22d312a9 Add Step 1: Direct mapping from MariaDB to RDF.
Dockerfile and docker-compose.yml for MariaDB container,
map/step-01.rb implementing the W3C Direct Mapping for all 9 tables.
2026-02-26 16:42:30 +01:00

136 lines
3.6 KiB
Ruby

#!/usr/bin/env ruby
# frozen_string_literal: true
# Step 1: Direct Mapping from relational database to RDF
# Implements the W3C Direct Mapping (https://www.w3.org/TR/rdb-direct-mapping/)
require 'sequel'
require 'rdf'
require 'rdf/turtle'
BASE_IRI = 'http://example.org/migrants/'
DB = Sequel.mysql2(host: '127.0.0.1', port: 3306, user: 'migrants', database: 'migrants', password: '1234')
# Foreign key definitions: table -> { column -> [referenced_table, referenced_column] }
FOREIGN_KEYS = {
migration_table: {
IDPerson: [:person, :IDPerson],
IDStartPlace: [:location, :IDLocation],
IDDestPlace: [:location, :IDLocation]
},
organisation: {
IDLocation: [:location, :IDLocation]
},
person: {
IDBirthPlace: [:location, :IDLocation],
IDDeathPlace: [:location, :IDLocation]
},
personnames: {
IDPerson: [:person, :IDPerson]
},
person_profession: {
IDPerson: [:person, :IDPerson]
},
relationship: {
IDPerson_active: [:person, :IDPerson],
IDPerson_passive: [:person, :IDPerson],
IDLocation: [:location, :IDLocation],
IDOrganisation: [:organisation, :IDOrganisation]
},
religions: {
IDPerson: [:person, :IDPerson]
},
work: {
IDPerson: [:person, :IDPerson],
IDLocation: [:location, :IDLocation],
IDOrganisation: [:organisation, :IDOrganisation],
IDOrganisation2: [:organisation, :IDOrganisation]
}
}.freeze
# Primary keys for each table
PRIMARY_KEYS = {
location: :IDLocation,
migration_table: :IDMig,
organisation: :IDOrganisation,
person: :IDPerson,
person_profession: :IDProfPerson,
personnames: :IDPersonname,
relationship: :IDRel,
religions: :IDReligion,
work: :IDWork
}.freeze
def row_iri(table, pk_value)
RDF::URI.new("#{BASE_IRI}#{table}/#{URI.encode_www_form_component(pk_value.to_s)}")
end
def column_iri(table, column)
RDF::URI.new("#{BASE_IRI}#{table}##{column}")
end
def class_iri(table)
RDF::URI.new("#{BASE_IRI}#{table}")
end
def ref_iri(table, fk_col)
RDF::URI.new("#{BASE_IRI}#{table}#ref-#{fk_col}")
end
def to_rdf_literal(value)
case value
when Integer
RDF::Literal.new(value, datatype: RDF::XSD.integer)
when Float
RDF::Literal.new(value, datatype: RDF::XSD.double)
when Date
RDF::Literal.new(value.to_s, datatype: RDF::XSD.date)
when Time, DateTime
RDF::Literal.new(value.to_s, datatype: RDF::XSD.dateTime)
when TrueClass, FalseClass
RDF::Literal.new(value, datatype: RDF::XSD.boolean)
else
RDF::Literal.new(value.to_s)
end
end
graph = RDF::Graph.new
PRIMARY_KEYS.each do |table, pk_col|
fk_defs = FOREIGN_KEYS.fetch(table, {})
DB[table].each do |row|
pk_value = row[pk_col]
subject = row_iri(table, pk_value)
# rdf:type
graph << [subject, RDF.type, class_iri(table)]
row.each do |col, value|
next if value.nil?
col_sym = col.to_sym
if fk_defs.key?(col_sym)
# Foreign key -> object property linking to referenced row
ref_table, _ref_col = fk_defs[col_sym]
graph << [subject, ref_iri(table, col), row_iri(ref_table, value)]
else
# Regular column -> datatype property
graph << [subject, column_iri(table, col), to_rdf_literal(value)]
end
end
end
end
output_path = File.expand_path('../graph-01.ttl', __dir__)
RDF::Turtle::Writer.open(output_path, prefixes: {
rdf: RDF.to_uri,
xsd: RDF::XSD.to_uri,
base: RDF::URI.new(BASE_IRI)
}) do |writer|
graph.each_statement { |stmt| writer << stmt }
end
puts "Written #{graph.count} triples to #{output_path}"