From 32ef100fdc293947d2870cb9f0d6029a0f89c1bb Mon Sep 17 00:00:00 2001 From: Daniel Hernandez Date: Sun, 22 Feb 2026 08:20:48 +0100 Subject: [PATCH] Add scripts for tables person and religions. --- db_schema/person.txt | 35 +++++++++++++++++++ db_schema/religions.txt | 16 +++++++++ src/001_religions.rb | 45 ++++++++++++++++++++++++ src/002_generate_person.rb | 71 ++++++++++++++++++++++++++++++++++++++ src/migrants.rb | 5 +++ src/vocabularies.rb | 11 +++++- 6 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 db_schema/person.txt create mode 100644 db_schema/religions.txt create mode 100644 src/001_religions.rb create mode 100644 src/002_generate_person.rb create mode 100644 src/migrants.rb diff --git a/db_schema/person.txt b/db_schema/person.txt new file mode 100644 index 0000000..3aa63c9 --- /dev/null +++ b/db_schema/person.txt @@ -0,0 +1,35 @@ +show columns from person; ++----------------+----------------------------------------------------------------------------------+------+-----+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++----------------+----------------------------------------------------------------------------------+------+-----+---------+-------+ +| IDPerson | varchar(20) | NO | PRI | NULL | | +| first_name | varchar(50) | YES | | NULL | | +| family_name | varchar(50) | YES | | NULL | | +| religion | text | YES | | NULL | | +| Nametype | enum('Birth name','Marriage','Pseudonym/Stage name','Other','Migration-Related') | YES | | NULL | | +| gender | enum('male','female') | YES | | NULL | | +| imageURL | text | YES | | NULL | | +| image_source | varchar(2500) | YES | | NULL | | +| profession | text | YES | | NULL | | +| fuzzybirthdate | varchar(100) | YES | | NULL | | +| birthdate | date | YES | | NULL | | +| birthdate_max | date | YES | | NULL | | +| Birth Info | varchar(200) | YES | | NULL | | +| IDBirthPlace | varchar(20) | YES | MUL | NULL | | +| fuzzydeathdate | varchar(100) | YES | | NULL | | +| deathdate | date | YES | | NULL | | +| deathdate_max | date | YES | | NULL | | +| Death Info | varchar(500) | YES | | NULL | | +| IDDeathPlace | varchar(20) | YES | MUL | NULL | | +| Wikipedia | varchar(2500) | YES | | NULL | | +| Wikidata | varchar(70) | YES | | NULL | | +| GND | varchar(70) | YES | | NULL | | +| LCCN | varchar(70) | YES | | NULL | | +| VIAF | varchar(70) | YES | | NULL | | +| CERL | varchar(70) | YES | | NULL | | +| ISNI | varchar(100) | YES | | NULL | | +| SNAC | varchar(70) | YES | | NULL | | +| comment | text | YES | | NULL | | +| Source | varchar(3000) | YES | | NULL | | +| Importsource | enum('Own','Tau Cooperation') | YES | | NULL | | ++----------------+----------------------------------------------------------------------------------+------+-----+---------+-------+ diff --git a/db_schema/religions.txt b/db_schema/religions.txt new file mode 100644 index 0000000..204be4b --- /dev/null +++ b/db_schema/religions.txt @@ -0,0 +1,16 @@ +show columns from religions; ++-----------------+-------------------------------------------------------------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++-----------------+-------------------------------------------------------------------+------+-----+---------+----------------+ +| IDReligion | int(11) | NO | PRI | NULL | auto_increment | +| IDPerson | varchar(20) | YES | MUL | NULL | | +| religion | enum('Buddhism','Christian','Hinduism','Jewish','Muslim','Other') | YES | | NULL | | +| denomination | varchar(300) | YES | | NULL | | +| DateStart_Fuzzy | varchar(100) | YES | | NULL | | +| date_start | date | YES | | NULL | | +| DateStart_Min | date | YES | | NULL | | +| DateStart_Max | varchar(100) | YES | | NULL | | +| DateEnd_Min | date | YES | | NULL | | +| DateEnd_Max | date | YES | | NULL | | +| comment | varchar(1500) | YES | | NULL | | ++-----------------+-------------------------------------------------------------------+------+-----+---------+----------------+ diff --git a/src/001_religions.rb b/src/001_religions.rb new file mode 100644 index 0000000..c91d0ec --- /dev/null +++ b/src/001_religions.rb @@ -0,0 +1,45 @@ +#!/usr/bin/env ruby + +require_relative 'database' +require_relative 'vocabularies' +require_relative 'migrants' + +output = File.open(File.join('data', '001-religions.ttl'), 'w') + +output.puts prefixes(:rdfs, :mig, :schema, :person, :religion, :xsd) +output.puts + +DB[:religions].each do |row| + id = row[:IDReligion] + props = ["a mig:ReligionAffiliation"] + + props << "mig:person person:#{row[:IDPerson]}" unless row[:IDPerson].to_s.empty? + props << "mig:religion #{ttl_literal(row[:religion])}" unless row[:religion].to_s.empty? + props << "mig:denomination #{ttl_literal(row[:denomination])}" unless row[:denomination].to_s.empty? + props << "mig:dateStartFuzzy #{ttl_literal(row[:DateStart_Fuzzy])}" unless row[:DateStart_Fuzzy].to_s.empty? + + if row[:date_start] + props << "mig:dateStart #{RDF::Literal.new(row[:date_start].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + if row[:DateStart_Min] + props << "mig:dateStartMin #{RDF::Literal.new(row[:DateStart_Min].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + + props << "mig:dateStartMax #{ttl_literal(row[:DateStart_Max])}" unless row[:DateStart_Max].to_s.empty? + + if row[:DateEnd_Min] + props << "mig:dateEndMin #{RDF::Literal.new(row[:DateEnd_Min].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + if row[:DateEnd_Max] + props << "mig:dateEndMax #{RDF::Literal.new(row[:DateEnd_Max].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + + props << "rdfs:comment #{ttl_literal(row[:comment])}" unless row[:comment].to_s.empty? + + output.puts "religion:#{id} #{props.first} ;" + props[1..-2].each { |p| output.puts " #{p} ;" } + output.puts " #{props.last} ." + output.puts +end + +output.close diff --git a/src/002_generate_person.rb b/src/002_generate_person.rb new file mode 100644 index 0000000..4afa3e9 --- /dev/null +++ b/src/002_generate_person.rb @@ -0,0 +1,71 @@ +#!/usr/bin/env ruby + +require_relative 'database' +require_relative 'vocabularies' +require_relative 'migrants' + +output = File.open(File.join('data', '002-persons.ttl'), 'w') + +output.puts prefixes(:wd, :rdfs, :mig, :schema, :person, :location, :xsd) +output.puts + +DB[:person].each do |row| + id = row[:IDPerson] + props = ["a schema:Person"] + + props << "schema:givenName #{ttl_literal(row[:first_name])}" unless row[:first_name].to_s.empty? + props << "schema:familyName #{ttl_literal(row[:family_name])}" unless row[:family_name].to_s.empty? + props << "mig:religion #{ttl_literal(row[:religion])}" unless row[:religion].to_s.empty? + props << "mig:nameType #{ttl_literal(row[:Nametype])}" unless row[:Nametype].to_s.empty? + + case row[:gender] + when 'male' then props << "schema:gender schema:Male" + when 'female' then props << "schema:gender schema:Female" + end + + props << "schema:image <#{row[:imageURL]}>" unless row[:imageURL].to_s.empty? + props << "mig:imageSource #{ttl_literal(row[:image_source])}" unless row[:image_source].to_s.empty? + props << "schema:hasOccupation #{ttl_literal(row[:profession])}" unless row[:profession].to_s.empty? + props << "mig:fuzzyBirthDate #{ttl_literal(row[:fuzzybirthdate])}" unless row[:fuzzybirthdate].to_s.empty? + + if row[:birthdate] + props << "schema:birthDate #{RDF::Literal.new(row[:birthdate].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + if row[:birthdate_max] + props << "mig:birthDateMax #{RDF::Literal.new(row[:birthdate_max].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + + props << "mig:birthInfo #{ttl_literal(row[:'Birth Info'])}" unless row[:'Birth Info'].to_s.empty? + props << "schema:birthPlace location:#{row[:IDBirthPlace]}" unless row[:IDBirthPlace].to_s.empty? + + props << "mig:fuzzyDeathDate #{ttl_literal(row[:fuzzydeathdate])}" unless row[:fuzzydeathdate].to_s.empty? + + if row[:deathdate] + props << "schema:deathDate #{RDF::Literal.new(row[:deathdate].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + if row[:deathdate_max] + props << "mig:deathDateMax #{RDF::Literal.new(row[:deathdate_max].to_s, datatype: RDF::XSD.date).to_ntriples}" + end + + props << "mig:deathInfo #{ttl_literal(row[:'Death Info'])}" unless row[:'Death Info'].to_s.empty? + props << "schema:deathPlace location:#{row[:IDDeathPlace]}" unless row[:IDDeathPlace].to_s.empty? + + props << "schema:sameAs <#{row[:Wikipedia]}>" unless row[:Wikipedia].to_s.empty? + props << "schema:sameAs wd:#{row[:Wikidata]}" unless row[:Wikidata].to_s.empty? + props << "mig:gnd #{ttl_literal(row[:GND])}" unless row[:GND].to_s.empty? + props << "mig:lccn #{ttl_literal(row[:LCCN])}" unless row[:LCCN].to_s.empty? + props << "mig:viaf #{ttl_literal(row[:VIAF])}" unless row[:VIAF].to_s.empty? + props << "mig:cerl #{ttl_literal(row[:CERL])}" unless row[:CERL].to_s.empty? + props << "mig:isni #{ttl_literal(row[:ISNI])}" unless row[:ISNI].to_s.empty? + props << "mig:snac #{ttl_literal(row[:SNAC])}" unless row[:SNAC].to_s.empty? + props << "rdfs:comment #{ttl_literal(row[:comment])}" unless row[:comment].to_s.empty? + props << "mig:source #{ttl_literal(row[:Source])}" unless row[:Source].to_s.empty? + props << "mig:importSource #{ttl_literal(row[:Importsource])}" unless row[:Importsource].to_s.empty? + + output.puts "person:#{id} #{props.first} ;" + props[1..-2].each { |p| output.puts " #{p} ;" } + output.puts " #{props.last} ." + output.puts +end + +output.close diff --git a/src/migrants.rb b/src/migrants.rb new file mode 100644 index 0000000..fe056d5 --- /dev/null +++ b/src/migrants.rb @@ -0,0 +1,5 @@ + +# Serializes a Ruby value as an RDF/Turtle literal. +def ttl_literal(value) + RDF::Literal.new(value).to_ntriples +end diff --git a/src/vocabularies.rb b/src/vocabularies.rb index c594664..bcacd76 100644 --- a/src/vocabularies.rb +++ b/src/vocabularies.rb @@ -7,13 +7,22 @@ SCHEMA = RDF::Vocabulary.new('https://schema.org/') COUNTRY = RDF::Vocabulary.new('https://daniel.degu.cl/data/countries.ttl#Country-') MIG = RDF::Vocabulary.new('https://daniel.degu.cl/data/mig.ttl#') +PERSON = RDF::Vocabulary.new('https://daniel.degu.cl/data/persons.ttl#') +LOCATION = RDF::Vocabulary.new('https://daniel.degu.cl/data/locations.ttl#') +RELIGION = RDF::Vocabulary.new('https://daniel.degu.cl/data/religions.ttl#') +XSD = RDF::Vocabulary.new('http://www.w3.org/2001/XMLSchema#') + PREFIXES = { wd: WD, wdt: WDT, rdfs: RDFS, schema: SCHEMA, country: COUNTRY, - mig: MIG + mig: MIG, + person: PERSON, + location: LOCATION, + religion: RELIGION, + xsd: XSD } def prefixes(*namespaces)