Add Step 1: Direct mapping from MariaDB to RDF.
Dockerfile and docker-compose.yml for MariaDB container, map/step-01.rb implementing the W3C Direct Mapping for all 9 tables.
This commit is contained in:
parent
87dcd4d65c
commit
da22d312a9
5 changed files with 182232 additions and 2 deletions
8
Dockerfile
Normal file
8
Dockerfile
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
FROM mariadb:10.11
|
||||||
|
|
||||||
|
ENV MARIADB_ROOT_PASSWORD=root
|
||||||
|
ENV MARIADB_DATABASE=migrants
|
||||||
|
ENV MARIADB_USER=migrants
|
||||||
|
ENV MARIADB_PASSWORD=1234
|
||||||
|
|
||||||
|
COPY teatre-migrants.sql /docker-entrypoint-initdb.d/
|
||||||
35
README.md
35
README.md
|
|
@ -1,3 +1,34 @@
|
||||||
# migrants
|
# Theatre Migrants
|
||||||
|
|
||||||
To generate a knowledge graph about migrants in the theater in Europe.
|
To generate a knowledge graph about migrants in the theatre in Europe.
|
||||||
|
|
||||||
|
## Generating the ontology
|
||||||
|
|
||||||
|
Next there are set of steps describing how to generate the migrants RDF graph.
|
||||||
|
|
||||||
|
### Step 1 - Loading the input data into a relational database
|
||||||
|
|
||||||
|
#### Task
|
||||||
|
|
||||||
|
The file `teatre-migrants.sql` contains the dump of a MariaDB database. The tables involved in this schema are described in the file `db_schema.md`. We will load this data in MariaDB to access the data with SQL. To this end:
|
||||||
|
|
||||||
|
1. Create a Dockerfile to create a docker container for MariaDB.
|
||||||
|
|
||||||
|
2. Upload the dump into a database in the container.
|
||||||
|
|
||||||
|
3. Create a Ruby script `map/step-01.rb` that uses the gem `sequel` to connect to the database. This Ruby script should return a file called `graph-01.ttl` containing all the data from the tables loaded in the database using the direct mapping from relational databases to RDF.
|
||||||
|
|
||||||
|
#### Summary
|
||||||
|
|
||||||
|
The `Dockerfile` creates a MariaDB 10.11 container that automatically loads `teatre-migrants.sql` on first start. The `docker-compose.yml` exposes the database on port 3306 with a healthcheck.
|
||||||
|
|
||||||
|
The script `map/step-01.rb` connects to the database via `sequel` and implements the [W3C Direct Mapping](https://www.w3.org/TR/rdb-direct-mapping/) for all 9 tables (`location`, `migration_table`, `organisation`, `person`, `person_profession`, `personnames`, `relationship`, `religions`, `work`). Each table row becomes an RDF resource identified by its primary key, each column becomes a datatype property, and each foreign key becomes an object property linking to the referenced row. The output file `graph-01.ttl` contains 162,029 triples.
|
||||||
|
|
||||||
|
To run:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker compose up -d
|
||||||
|
bundle exec ruby map/step-01.rb
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2 -
|
||||||
10
docker-compose.yml
Normal file
10
docker-compose.yml
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "3306:3306"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
182045
graph-01.ttl
Normal file
182045
graph-01.ttl
Normal file
File diff suppressed because it is too large
Load diff
136
map/step-01.rb
Normal file
136
map/step-01.rb
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Step 1: Direct Mapping from relational database to RDF
|
||||||
|
# Implements the W3C Direct Mapping (https://www.w3.org/TR/rdb-direct-mapping/)
|
||||||
|
|
||||||
|
require 'sequel'
|
||||||
|
require 'rdf'
|
||||||
|
require 'rdf/turtle'
|
||||||
|
|
||||||
|
BASE_IRI = 'http://example.org/migrants/'
|
||||||
|
|
||||||
|
DB = Sequel.mysql2(host: '127.0.0.1', port: 3306, user: 'migrants', database: 'migrants', password: '1234')
|
||||||
|
|
||||||
|
# Foreign key definitions: table -> { column -> [referenced_table, referenced_column] }
|
||||||
|
FOREIGN_KEYS = {
|
||||||
|
migration_table: {
|
||||||
|
IDPerson: [:person, :IDPerson],
|
||||||
|
IDStartPlace: [:location, :IDLocation],
|
||||||
|
IDDestPlace: [:location, :IDLocation]
|
||||||
|
},
|
||||||
|
organisation: {
|
||||||
|
IDLocation: [:location, :IDLocation]
|
||||||
|
},
|
||||||
|
person: {
|
||||||
|
IDBirthPlace: [:location, :IDLocation],
|
||||||
|
IDDeathPlace: [:location, :IDLocation]
|
||||||
|
},
|
||||||
|
personnames: {
|
||||||
|
IDPerson: [:person, :IDPerson]
|
||||||
|
},
|
||||||
|
person_profession: {
|
||||||
|
IDPerson: [:person, :IDPerson]
|
||||||
|
},
|
||||||
|
relationship: {
|
||||||
|
IDPerson_active: [:person, :IDPerson],
|
||||||
|
IDPerson_passive: [:person, :IDPerson],
|
||||||
|
IDLocation: [:location, :IDLocation],
|
||||||
|
IDOrganisation: [:organisation, :IDOrganisation]
|
||||||
|
},
|
||||||
|
religions: {
|
||||||
|
IDPerson: [:person, :IDPerson]
|
||||||
|
},
|
||||||
|
work: {
|
||||||
|
IDPerson: [:person, :IDPerson],
|
||||||
|
IDLocation: [:location, :IDLocation],
|
||||||
|
IDOrganisation: [:organisation, :IDOrganisation],
|
||||||
|
IDOrganisation2: [:organisation, :IDOrganisation]
|
||||||
|
}
|
||||||
|
}.freeze
|
||||||
|
|
||||||
|
# Primary keys for each table
|
||||||
|
PRIMARY_KEYS = {
|
||||||
|
location: :IDLocation,
|
||||||
|
migration_table: :IDMig,
|
||||||
|
organisation: :IDOrganisation,
|
||||||
|
person: :IDPerson,
|
||||||
|
person_profession: :IDProfPerson,
|
||||||
|
personnames: :IDPersonname,
|
||||||
|
relationship: :IDRel,
|
||||||
|
religions: :IDReligion,
|
||||||
|
work: :IDWork
|
||||||
|
}.freeze
|
||||||
|
|
||||||
|
def row_iri(table, pk_value)
|
||||||
|
RDF::URI.new("#{BASE_IRI}#{table}/#{URI.encode_www_form_component(pk_value.to_s)}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def column_iri(table, column)
|
||||||
|
RDF::URI.new("#{BASE_IRI}#{table}##{column}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def class_iri(table)
|
||||||
|
RDF::URI.new("#{BASE_IRI}#{table}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def ref_iri(table, fk_col)
|
||||||
|
RDF::URI.new("#{BASE_IRI}#{table}#ref-#{fk_col}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_rdf_literal(value)
|
||||||
|
case value
|
||||||
|
when Integer
|
||||||
|
RDF::Literal.new(value, datatype: RDF::XSD.integer)
|
||||||
|
when Float
|
||||||
|
RDF::Literal.new(value, datatype: RDF::XSD.double)
|
||||||
|
when Date
|
||||||
|
RDF::Literal.new(value.to_s, datatype: RDF::XSD.date)
|
||||||
|
when Time, DateTime
|
||||||
|
RDF::Literal.new(value.to_s, datatype: RDF::XSD.dateTime)
|
||||||
|
when TrueClass, FalseClass
|
||||||
|
RDF::Literal.new(value, datatype: RDF::XSD.boolean)
|
||||||
|
else
|
||||||
|
RDF::Literal.new(value.to_s)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
graph = RDF::Graph.new
|
||||||
|
|
||||||
|
PRIMARY_KEYS.each do |table, pk_col|
|
||||||
|
fk_defs = FOREIGN_KEYS.fetch(table, {})
|
||||||
|
|
||||||
|
DB[table].each do |row|
|
||||||
|
pk_value = row[pk_col]
|
||||||
|
subject = row_iri(table, pk_value)
|
||||||
|
|
||||||
|
# rdf:type
|
||||||
|
graph << [subject, RDF.type, class_iri(table)]
|
||||||
|
|
||||||
|
row.each do |col, value|
|
||||||
|
next if value.nil?
|
||||||
|
|
||||||
|
col_sym = col.to_sym
|
||||||
|
|
||||||
|
if fk_defs.key?(col_sym)
|
||||||
|
# Foreign key -> object property linking to referenced row
|
||||||
|
ref_table, _ref_col = fk_defs[col_sym]
|
||||||
|
graph << [subject, ref_iri(table, col), row_iri(ref_table, value)]
|
||||||
|
else
|
||||||
|
# Regular column -> datatype property
|
||||||
|
graph << [subject, column_iri(table, col), to_rdf_literal(value)]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
output_path = File.expand_path('../graph-01.ttl', __dir__)
|
||||||
|
RDF::Turtle::Writer.open(output_path, prefixes: {
|
||||||
|
rdf: RDF.to_uri,
|
||||||
|
xsd: RDF::XSD.to_uri,
|
||||||
|
base: RDF::URI.new(BASE_IRI)
|
||||||
|
}) do |writer|
|
||||||
|
graph.each_statement { |stmt| writer << stmt }
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "Written #{graph.count} triples to #{output_path}"
|
||||||
Loading…
Reference in a new issue