Add Step 1: Direct mapping from MariaDB to RDF.
Dockerfile and docker-compose.yml for MariaDB container, map/step-01.rb implementing the W3C Direct Mapping for all 9 tables.
This commit is contained in:
parent
87dcd4d65c
commit
da22d312a9
5 changed files with 182232 additions and 2 deletions
8
Dockerfile
Normal file
8
Dockerfile
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
FROM mariadb:10.11
|
||||
|
||||
ENV MARIADB_ROOT_PASSWORD=root
|
||||
ENV MARIADB_DATABASE=migrants
|
||||
ENV MARIADB_USER=migrants
|
||||
ENV MARIADB_PASSWORD=1234
|
||||
|
||||
COPY teatre-migrants.sql /docker-entrypoint-initdb.d/
|
||||
35
README.md
35
README.md
|
|
@ -1,3 +1,34 @@
|
|||
# migrants
|
||||
# Theatre Migrants
|
||||
|
||||
To generate a knowledge graph about migrants in the theater in Europe.
|
||||
To generate a knowledge graph about migrants in the theatre in Europe.
|
||||
|
||||
## Generating the ontology
|
||||
|
||||
Next there are set of steps describing how to generate the migrants RDF graph.
|
||||
|
||||
### Step 1 - Loading the input data into a relational database
|
||||
|
||||
#### Task
|
||||
|
||||
The file `teatre-migrants.sql` contains the dump of a MariaDB database. The tables involved in this schema are described in the file `db_schema.md`. We will load this data in MariaDB to access the data with SQL. To this end:
|
||||
|
||||
1. Create a Dockerfile to create a docker container for MariaDB.
|
||||
|
||||
2. Upload the dump into a database in the container.
|
||||
|
||||
3. Create a Ruby script `map/step-01.rb` that uses the gem `sequel` to connect to the database. This Ruby script should return a file called `graph-01.ttl` containing all the data from the tables loaded in the database using the direct mapping from relational databases to RDF.
|
||||
|
||||
#### Summary
|
||||
|
||||
The `Dockerfile` creates a MariaDB 10.11 container that automatically loads `teatre-migrants.sql` on first start. The `docker-compose.yml` exposes the database on port 3306 with a healthcheck.
|
||||
|
||||
The script `map/step-01.rb` connects to the database via `sequel` and implements the [W3C Direct Mapping](https://www.w3.org/TR/rdb-direct-mapping/) for all 9 tables (`location`, `migration_table`, `organisation`, `person`, `person_profession`, `personnames`, `relationship`, `religions`, `work`). Each table row becomes an RDF resource identified by its primary key, each column becomes a datatype property, and each foreign key becomes an object property linking to the referenced row. The output file `graph-01.ttl` contains 162,029 triples.
|
||||
|
||||
To run:
|
||||
|
||||
```sh
|
||||
docker compose up -d
|
||||
bundle exec ruby map/step-01.rb
|
||||
```
|
||||
|
||||
### Step 2 -
|
||||
10
docker-compose.yml
Normal file
10
docker-compose.yml
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
services:
|
||||
db:
|
||||
build: .
|
||||
ports:
|
||||
- "3306:3306"
|
||||
healthcheck:
|
||||
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
182045
graph-01.ttl
Normal file
182045
graph-01.ttl
Normal file
File diff suppressed because it is too large
Load diff
136
map/step-01.rb
Normal file
136
map/step-01.rb
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
#!/usr/bin/env ruby
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Step 1: Direct Mapping from relational database to RDF
|
||||
# Implements the W3C Direct Mapping (https://www.w3.org/TR/rdb-direct-mapping/)
|
||||
|
||||
require 'sequel'
|
||||
require 'rdf'
|
||||
require 'rdf/turtle'
|
||||
|
||||
BASE_IRI = 'http://example.org/migrants/'
|
||||
|
||||
DB = Sequel.mysql2(host: '127.0.0.1', port: 3306, user: 'migrants', database: 'migrants', password: '1234')
|
||||
|
||||
# Foreign key definitions: table -> { column -> [referenced_table, referenced_column] }
|
||||
FOREIGN_KEYS = {
|
||||
migration_table: {
|
||||
IDPerson: [:person, :IDPerson],
|
||||
IDStartPlace: [:location, :IDLocation],
|
||||
IDDestPlace: [:location, :IDLocation]
|
||||
},
|
||||
organisation: {
|
||||
IDLocation: [:location, :IDLocation]
|
||||
},
|
||||
person: {
|
||||
IDBirthPlace: [:location, :IDLocation],
|
||||
IDDeathPlace: [:location, :IDLocation]
|
||||
},
|
||||
personnames: {
|
||||
IDPerson: [:person, :IDPerson]
|
||||
},
|
||||
person_profession: {
|
||||
IDPerson: [:person, :IDPerson]
|
||||
},
|
||||
relationship: {
|
||||
IDPerson_active: [:person, :IDPerson],
|
||||
IDPerson_passive: [:person, :IDPerson],
|
||||
IDLocation: [:location, :IDLocation],
|
||||
IDOrganisation: [:organisation, :IDOrganisation]
|
||||
},
|
||||
religions: {
|
||||
IDPerson: [:person, :IDPerson]
|
||||
},
|
||||
work: {
|
||||
IDPerson: [:person, :IDPerson],
|
||||
IDLocation: [:location, :IDLocation],
|
||||
IDOrganisation: [:organisation, :IDOrganisation],
|
||||
IDOrganisation2: [:organisation, :IDOrganisation]
|
||||
}
|
||||
}.freeze
|
||||
|
||||
# Primary keys for each table
|
||||
PRIMARY_KEYS = {
|
||||
location: :IDLocation,
|
||||
migration_table: :IDMig,
|
||||
organisation: :IDOrganisation,
|
||||
person: :IDPerson,
|
||||
person_profession: :IDProfPerson,
|
||||
personnames: :IDPersonname,
|
||||
relationship: :IDRel,
|
||||
religions: :IDReligion,
|
||||
work: :IDWork
|
||||
}.freeze
|
||||
|
||||
def row_iri(table, pk_value)
|
||||
RDF::URI.new("#{BASE_IRI}#{table}/#{URI.encode_www_form_component(pk_value.to_s)}")
|
||||
end
|
||||
|
||||
def column_iri(table, column)
|
||||
RDF::URI.new("#{BASE_IRI}#{table}##{column}")
|
||||
end
|
||||
|
||||
def class_iri(table)
|
||||
RDF::URI.new("#{BASE_IRI}#{table}")
|
||||
end
|
||||
|
||||
def ref_iri(table, fk_col)
|
||||
RDF::URI.new("#{BASE_IRI}#{table}#ref-#{fk_col}")
|
||||
end
|
||||
|
||||
def to_rdf_literal(value)
|
||||
case value
|
||||
when Integer
|
||||
RDF::Literal.new(value, datatype: RDF::XSD.integer)
|
||||
when Float
|
||||
RDF::Literal.new(value, datatype: RDF::XSD.double)
|
||||
when Date
|
||||
RDF::Literal.new(value.to_s, datatype: RDF::XSD.date)
|
||||
when Time, DateTime
|
||||
RDF::Literal.new(value.to_s, datatype: RDF::XSD.dateTime)
|
||||
when TrueClass, FalseClass
|
||||
RDF::Literal.new(value, datatype: RDF::XSD.boolean)
|
||||
else
|
||||
RDF::Literal.new(value.to_s)
|
||||
end
|
||||
end
|
||||
|
||||
graph = RDF::Graph.new
|
||||
|
||||
PRIMARY_KEYS.each do |table, pk_col|
|
||||
fk_defs = FOREIGN_KEYS.fetch(table, {})
|
||||
|
||||
DB[table].each do |row|
|
||||
pk_value = row[pk_col]
|
||||
subject = row_iri(table, pk_value)
|
||||
|
||||
# rdf:type
|
||||
graph << [subject, RDF.type, class_iri(table)]
|
||||
|
||||
row.each do |col, value|
|
||||
next if value.nil?
|
||||
|
||||
col_sym = col.to_sym
|
||||
|
||||
if fk_defs.key?(col_sym)
|
||||
# Foreign key -> object property linking to referenced row
|
||||
ref_table, _ref_col = fk_defs[col_sym]
|
||||
graph << [subject, ref_iri(table, col), row_iri(ref_table, value)]
|
||||
else
|
||||
# Regular column -> datatype property
|
||||
graph << [subject, column_iri(table, col), to_rdf_literal(value)]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
output_path = File.expand_path('../graph-01.ttl', __dir__)
|
||||
RDF::Turtle::Writer.open(output_path, prefixes: {
|
||||
rdf: RDF.to_uri,
|
||||
xsd: RDF::XSD.to_uri,
|
||||
base: RDF::URI.new(BASE_IRI)
|
||||
}) do |writer|
|
||||
graph.each_statement { |stmt| writer << stmt }
|
||||
end
|
||||
|
||||
puts "Written #{graph.count} triples to #{output_path}"
|
||||
Loading…
Reference in a new issue