Add scripts for tables person and religions.

2026-02-22 08:20:48 +01:00 · 2026-02-22 08:20:48 +01:00 · 32ef100fdc
commit 32ef100fdc
parent c97a0778c1
6 changed files with 182 additions and 1 deletions
--- a/db_schema/person.txt
+++ b/db_schema/person.txt
@ -0,0 +1,35 @@
 show columns from person;
 +----------------+----------------------------------------------------------------------------------+------+-----+---------+-------+
 | Field          | Type                                                                             | Null | Key | Default | Extra |
 +----------------+----------------------------------------------------------------------------------+------+-----+---------+-------+
 | IDPerson       | varchar(20)                                                                      | NO   | PRI | NULL    |       |
 | first_name     | varchar(50)                                                                      | YES  |     | NULL    |       |
 | family_name    | varchar(50)                                                                      | YES  |     | NULL    |       |
 | religion       | text                                                                             | YES  |     | NULL    |       |
 | Nametype       | enum('Birth name','Marriage','Pseudonym/Stage name','Other','Migration-Related') | YES  |     | NULL    |       |
 | gender         | enum('male','female')                                                            | YES  |     | NULL    |       |
 | imageURL       | text                                                                             | YES  |     | NULL    |       |
 | image_source   | varchar(2500)                                                                    | YES  |     | NULL    |       |
 | profession     | text                                                                             | YES  |     | NULL    |       |
 | fuzzybirthdate | varchar(100)                                                                     | YES  |     | NULL    |       |
 | birthdate      | date                                                                             | YES  |     | NULL    |       |
 | birthdate_max  | date                                                                             | YES  |     | NULL    |       |
 | Birth Info     | varchar(200)                                                                     | YES  |     | NULL    |       |
 | IDBirthPlace   | varchar(20)                                                                      | YES  | MUL | NULL    |       |
 | fuzzydeathdate | varchar(100)                                                                     | YES  |     | NULL    |       |
 | deathdate      | date                                                                             | YES  |     | NULL    |       |
 | deathdate_max  | date                                                                             | YES  |     | NULL    |       |
 | Death Info     | varchar(500)                                                                     | YES  |     | NULL    |       |
 | IDDeathPlace   | varchar(20)                                                                      | YES  | MUL | NULL    |       |
 | Wikipedia      | varchar(2500)                                                                    | YES  |     | NULL    |       |
 | Wikidata       | varchar(70)                                                                      | YES  |     | NULL    |       |
 | GND            | varchar(70)                                                                      | YES  |     | NULL    |       |
 | LCCN           | varchar(70)                                                                      | YES  |     | NULL    |       |
 | VIAF           | varchar(70)                                                                      | YES  |     | NULL    |       |
 | CERL           | varchar(70)                                                                      | YES  |     | NULL    |       |
 | ISNI           | varchar(100)                                                                     | YES  |     | NULL    |       |
 | SNAC           | varchar(70)                                                                      | YES  |     | NULL    |       |
 | comment        | text                                                                             | YES  |     | NULL    |       |
 | Source         | varchar(3000)                                                                    | YES  |     | NULL    |       |
 | Importsource   | enum('Own','Tau Cooperation')                                                    | YES  |     | NULL    |       |
 +----------------+----------------------------------------------------------------------------------+------+-----+---------+-------+
--- a/db_schema/religions.txt
+++ b/db_schema/religions.txt
@ -0,0 +1,16 @@
 show columns from religions;
 +-----------------+-------------------------------------------------------------------+------+-----+---------+----------------+
 | Field           | Type                                                              | Null | Key | Default | Extra          |
 +-----------------+-------------------------------------------------------------------+------+-----+---------+----------------+
 | IDReligion      | int(11)                                                           | NO   | PRI | NULL    | auto_increment |
 | IDPerson        | varchar(20)                                                       | YES  | MUL | NULL    |                |
 | religion        | enum('Buddhism','Christian','Hinduism','Jewish','Muslim','Other') | YES  |     | NULL    |                |
 | denomination    | varchar(300)                                                      | YES  |     | NULL    |                |
 | DateStart_Fuzzy | varchar(100)                                                      | YES  |     | NULL    |                |
 | date_start      | date                                                              | YES  |     | NULL    |                |
 | DateStart_Min   | date                                                              | YES  |     | NULL    |                |
 | DateStart_Max   | varchar(100)                                                      | YES  |     | NULL    |                |
 | DateEnd_Min     | date                                                              | YES  |     | NULL    |                |
 | DateEnd_Max     | date                                                              | YES  |     | NULL    |                |
 | comment         | varchar(1500)                                                     | YES  |     | NULL    |                |
 +-----------------+-------------------------------------------------------------------+------+-----+---------+----------------+
--- a/src/001_religions.rb
+++ b/src/001_religions.rb
@ -0,0 +1,45 @@
 #!/usr/bin/env ruby
 require_relative 'database'
 require_relative 'vocabularies'
 require_relative 'migrants'
 output = File.open(File.join('data', '001-religions.ttl'), 'w')
 output.puts prefixes(:rdfs, :mig, :schema, :person, :religion, :xsd)
 output.puts
 DB[:religions].each do |row|
  id = row[:IDReligion]
  props = ["a mig:ReligionAffiliation"]
  props << "mig:person person:#{row[:IDPerson]}"                                          unless row[:IDPerson].to_s.empty?
  props << "mig:religion #{ttl_literal(row[:religion])}"                                  unless row[:religion].to_s.empty?
  props << "mig:denomination #{ttl_literal(row[:denomination])}"                          unless row[:denomination].to_s.empty?
  props << "mig:dateStartFuzzy #{ttl_literal(row[:DateStart_Fuzzy])}"                    unless row[:DateStart_Fuzzy].to_s.empty?
  if row[:date_start]
    props << "mig:dateStart #{RDF::Literal.new(row[:date_start].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  if row[:DateStart_Min]
    props << "mig:dateStartMin #{RDF::Literal.new(row[:DateStart_Min].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  props << "mig:dateStartMax #{ttl_literal(row[:DateStart_Max])}"                        unless row[:DateStart_Max].to_s.empty?
  if row[:DateEnd_Min]
    props << "mig:dateEndMin #{RDF::Literal.new(row[:DateEnd_Min].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  if row[:DateEnd_Max]
    props << "mig:dateEndMax #{RDF::Literal.new(row[:DateEnd_Max].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  props << "rdfs:comment #{ttl_literal(row[:comment])}"                                   unless row[:comment].to_s.empty?
  output.puts "religion:#{id} #{props.first} ;"
  props[1..-2].each { |p| output.puts "  #{p} ;" }
  output.puts "  #{props.last} ."
  output.puts
 end
 output.close
--- a/src/002_generate_person.rb
+++ b/src/002_generate_person.rb
@ -0,0 +1,71 @@
 #!/usr/bin/env ruby
 require_relative 'database'
 require_relative 'vocabularies'
 require_relative 'migrants'
 output = File.open(File.join('data', '002-persons.ttl'), 'w')
 output.puts prefixes(:wd, :rdfs, :mig, :schema, :person, :location, :xsd)
 output.puts
 DB[:person].each do |row|
  id = row[:IDPerson]
  props = ["a schema:Person"]
  props << "schema:givenName #{ttl_literal(row[:first_name])}"       unless row[:first_name].to_s.empty?
  props << "schema:familyName #{ttl_literal(row[:family_name])}"     unless row[:family_name].to_s.empty?
  props << "mig:religion #{ttl_literal(row[:religion])}"             unless row[:religion].to_s.empty?
  props << "mig:nameType #{ttl_literal(row[:Nametype])}"             unless row[:Nametype].to_s.empty?
  case row[:gender]
  when 'male'   then props << "schema:gender schema:Male"
  when 'female' then props << "schema:gender schema:Female"
  end
  props << "schema:image <#{row[:imageURL]}>"                                          unless row[:imageURL].to_s.empty?
  props << "mig:imageSource #{ttl_literal(row[:image_source])}"      unless row[:image_source].to_s.empty?
  props << "schema:hasOccupation #{ttl_literal(row[:profession])}"   unless row[:profession].to_s.empty?
  props << "mig:fuzzyBirthDate #{ttl_literal(row[:fuzzybirthdate])}" unless row[:fuzzybirthdate].to_s.empty?
  if row[:birthdate]
    props << "schema:birthDate #{RDF::Literal.new(row[:birthdate].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  if row[:birthdate_max]
    props << "mig:birthDateMax #{RDF::Literal.new(row[:birthdate_max].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  props << "mig:birthInfo #{ttl_literal(row[:'Birth Info'])}"        unless row[:'Birth Info'].to_s.empty?
  props << "schema:birthPlace location:#{row[:IDBirthPlace]}"                         unless row[:IDBirthPlace].to_s.empty?
  props << "mig:fuzzyDeathDate #{ttl_literal(row[:fuzzydeathdate])}" unless row[:fuzzydeathdate].to_s.empty?
  if row[:deathdate]
    props << "schema:deathDate #{RDF::Literal.new(row[:deathdate].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  if row[:deathdate_max]
    props << "mig:deathDateMax #{RDF::Literal.new(row[:deathdate_max].to_s, datatype: RDF::XSD.date).to_ntriples}"
  end
  props << "mig:deathInfo #{ttl_literal(row[:'Death Info'])}"        unless row[:'Death Info'].to_s.empty?
  props << "schema:deathPlace location:#{row[:IDDeathPlace]}"                         unless row[:IDDeathPlace].to_s.empty?
  props << "schema:sameAs <#{row[:Wikipedia]}>"                                        unless row[:Wikipedia].to_s.empty?
  props << "schema:sameAs wd:#{row[:Wikidata]}"                                        unless row[:Wikidata].to_s.empty?
  props << "mig:gnd #{ttl_literal(row[:GND])}"                       unless row[:GND].to_s.empty?
  props << "mig:lccn #{ttl_literal(row[:LCCN])}"                     unless row[:LCCN].to_s.empty?
  props << "mig:viaf #{ttl_literal(row[:VIAF])}"                     unless row[:VIAF].to_s.empty?
  props << "mig:cerl #{ttl_literal(row[:CERL])}"                     unless row[:CERL].to_s.empty?
  props << "mig:isni #{ttl_literal(row[:ISNI])}"                     unless row[:ISNI].to_s.empty?
  props << "mig:snac #{ttl_literal(row[:SNAC])}"                     unless row[:SNAC].to_s.empty?
  props << "rdfs:comment #{ttl_literal(row[:comment])}"               unless row[:comment].to_s.empty?
  props << "mig:source #{ttl_literal(row[:Source])}"                 unless row[:Source].to_s.empty?
  props << "mig:importSource #{ttl_literal(row[:Importsource])}"     unless row[:Importsource].to_s.empty?
  output.puts "person:#{id} #{props.first} ;"
  props[1..-2].each { |p| output.puts "  #{p} ;" }
  output.puts "  #{props.last} ."
  output.puts
 end
 output.close
--- a/src/migrants.rb
+++ b/src/migrants.rb
@ -0,0 +1,5 @@
 # Serializes a Ruby value as an RDF/Turtle literal.
 def ttl_literal(value)
  RDF::Literal.new(value).to_ntriples
 end
--- a/src/vocabularies.rb
+++ b/src/vocabularies.rb
@ -7,13 +7,22 @@ SCHEMA = RDF::Vocabulary.new('https://schema.org/')
 COUNTRY = RDF::Vocabulary.new('https://daniel.degu.cl/data/countries.ttl#Country-')
 MIG = RDF::Vocabulary.new('https://daniel.degu.cl/data/mig.ttl#')
 PERSON = RDF::Vocabulary.new('https://daniel.degu.cl/data/persons.ttl#')
 LOCATION = RDF::Vocabulary.new('https://daniel.degu.cl/data/locations.ttl#')
 RELIGION = RDF::Vocabulary.new('https://daniel.degu.cl/data/religions.ttl#')
 XSD = RDF::Vocabulary.new('http://www.w3.org/2001/XMLSchema#')
 PREFIXES = {
  wd: WD,
  wdt: WDT,
  rdfs: RDFS,
  schema: SCHEMA,
  country: COUNTRY,
-  mig: MIG
+  mig: MIG,
  person: PERSON,
  location: LOCATION,
  religion: RELIGION,
  xsd: XSD
 }
 def prefixes(*namespaces)