19 queries in updates/ convert categorical columns (continent, country, city, gender, profession, etc.) from literals to typed RDF objects with rdfs:label. map/step-02.rb applies them to produce data/graph-02.ttl. Also fix step-01.rb to sanitize column names with spaces and avoid prefix serialization issues with fragment IRIs.
139 lines
3.7 KiB
Ruby
139 lines
3.7 KiB
Ruby
#!/usr/bin/env ruby
|
|
# frozen_string_literal: true
|
|
|
|
# Step 1: Direct Mapping from relational database to RDF
|
|
# Implements the W3C Direct Mapping (https://www.w3.org/TR/rdb-direct-mapping/)
|
|
|
|
require 'sequel'
|
|
require 'rdf'
|
|
require 'rdf/turtle'
|
|
|
|
BASE_IRI = 'http://example.org/migrants/'
|
|
|
|
DB = Sequel.mysql2(host: '127.0.0.1', port: 3306, user: 'migrants', database: 'migrants', password: '1234')
|
|
|
|
# Foreign key definitions: table -> { column -> [referenced_table, referenced_column] }
|
|
FOREIGN_KEYS = {
|
|
migration_table: {
|
|
IDPerson: [:person, :IDPerson],
|
|
IDStartPlace: [:location, :IDLocation],
|
|
IDDestPlace: [:location, :IDLocation]
|
|
},
|
|
organisation: {
|
|
IDLocation: [:location, :IDLocation]
|
|
},
|
|
person: {
|
|
IDBirthPlace: [:location, :IDLocation],
|
|
IDDeathPlace: [:location, :IDLocation]
|
|
},
|
|
personnames: {
|
|
IDPerson: [:person, :IDPerson]
|
|
},
|
|
person_profession: {
|
|
IDPerson: [:person, :IDPerson]
|
|
},
|
|
relationship: {
|
|
IDPerson_active: [:person, :IDPerson],
|
|
IDPerson_passive: [:person, :IDPerson],
|
|
IDLocation: [:location, :IDLocation],
|
|
IDOrganisation: [:organisation, :IDOrganisation]
|
|
},
|
|
religions: {
|
|
IDPerson: [:person, :IDPerson]
|
|
},
|
|
work: {
|
|
IDPerson: [:person, :IDPerson],
|
|
IDLocation: [:location, :IDLocation],
|
|
IDOrganisation: [:organisation, :IDOrganisation],
|
|
IDOrganisation2: [:organisation, :IDOrganisation]
|
|
}
|
|
}.freeze
|
|
|
|
# Primary keys for each table
|
|
PRIMARY_KEYS = {
|
|
location: :IDLocation,
|
|
migration_table: :IDMig,
|
|
organisation: :IDOrganisation,
|
|
person: :IDPerson,
|
|
person_profession: :IDProfPerson,
|
|
personnames: :IDPersonname,
|
|
relationship: :IDRel,
|
|
religions: :IDReligion,
|
|
work: :IDWork
|
|
}.freeze
|
|
|
|
def row_iri(table, pk_value)
|
|
RDF::URI.new("#{BASE_IRI}#{table}/#{URI.encode_www_form_component(pk_value.to_s)}")
|
|
end
|
|
|
|
def sanitize_name(name)
|
|
name.to_s.gsub(/[^a-zA-Z0-9_-]/, '_').gsub(/_+/, '_').gsub(/\A_+|_+\z/, '')
|
|
end
|
|
|
|
def column_iri(table, column)
|
|
RDF::URI.new("#{BASE_IRI}#{table}##{sanitize_name(column)}")
|
|
end
|
|
|
|
def class_iri(table)
|
|
RDF::URI.new("#{BASE_IRI}#{table}")
|
|
end
|
|
|
|
def ref_iri(table, fk_col)
|
|
RDF::URI.new("#{BASE_IRI}#{table}#ref-#{sanitize_name(fk_col)}")
|
|
end
|
|
|
|
def to_rdf_literal(value)
|
|
case value
|
|
when Integer
|
|
RDF::Literal.new(value, datatype: RDF::XSD.integer)
|
|
when Float
|
|
RDF::Literal.new(value, datatype: RDF::XSD.double)
|
|
when Date
|
|
RDF::Literal.new(value.to_s, datatype: RDF::XSD.date)
|
|
when Time, DateTime
|
|
RDF::Literal.new(value.to_s, datatype: RDF::XSD.dateTime)
|
|
when TrueClass, FalseClass
|
|
RDF::Literal.new(value, datatype: RDF::XSD.boolean)
|
|
else
|
|
RDF::Literal.new(value.to_s)
|
|
end
|
|
end
|
|
|
|
graph = RDF::Graph.new
|
|
|
|
PRIMARY_KEYS.each do |table, pk_col|
|
|
fk_defs = FOREIGN_KEYS.fetch(table, {})
|
|
|
|
DB[table].each do |row|
|
|
pk_value = row[pk_col]
|
|
subject = row_iri(table, pk_value)
|
|
|
|
# rdf:type
|
|
graph << [subject, RDF.type, class_iri(table)]
|
|
|
|
row.each do |col, value|
|
|
next if value.nil?
|
|
|
|
col_sym = col.to_sym
|
|
|
|
if fk_defs.key?(col_sym)
|
|
# Foreign key -> object property linking to referenced row
|
|
ref_table, _ref_col = fk_defs[col_sym]
|
|
graph << [subject, ref_iri(table, col), row_iri(ref_table, value)]
|
|
else
|
|
# Regular column -> datatype property
|
|
graph << [subject, column_iri(table, col), to_rdf_literal(value)]
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
output_path = File.expand_path('../data/graph-01.ttl', __dir__)
|
|
RDF::Turtle::Writer.open(output_path, prefixes: {
|
|
rdf: RDF.to_uri,
|
|
xsd: RDF::XSD.to_uri
|
|
}) do |writer|
|
|
graph.each_statement { |stmt| writer << stmt }
|
|
end
|
|
|
|
puts "Written #{graph.count} triples to #{output_path}"
|