Mapping countries.

This commit is contained in:
Daniel Hernandez 2026-02-22 18:04:55 +01:00
parent c9c768aa17
commit 38ce175b5b
7 changed files with 595 additions and 2 deletions

250
data/countries.ttl Normal file
View file

@ -0,0 +1,250 @@
@prefix country: <https://daniel.degu.cl/data/theater-migrants/countries.ttl#Country-> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
country:Algeria a schema:Country ;
rdfs:label "Algeria"@en .
country:Argentina a schema:Country ;
rdfs:label "Argentina"@en .
country:Australia a schema:Country ;
rdfs:label "Australia"@en .
country:Austria a schema:Country ;
rdfs:label "Austria"@en .
country:Azerbaijan a schema:Country ;
rdfs:label "Azerbaijan"@en .
country:Belarus a schema:Country ;
rdfs:label "Belarus"@en .
country:Belgium a schema:Country ;
rdfs:label "Belgium"@en .
country:Brazil a schema:Country ;
rdfs:label "Brazil"@en .
country:Bulgaria a schema:Country ;
rdfs:label "Bulgaria"@en .
country:Canada a schema:Country ;
rdfs:label "Canada"@en .
country:Channel_Islands a schema:Country ;
rdfs:label "Channel Islands"@en .
country:Chile a schema:Country ;
rdfs:label "Chile"@en .
country:China a schema:Country ;
rdfs:label "China"@en .
country:Columbia a schema:Country ;
rdfs:label "Columbia"@en .
country:Costa_Rica a schema:Country ;
rdfs:label "Costa Rica"@en .
country:Croatia a schema:Country ;
rdfs:label "Croatia"@en .
country:Cuba a schema:Country ;
rdfs:label "Cuba"@en .
country:Czech_Republic a schema:Country ;
rdfs:label "Czech Republic"@en .
country:Denmark a schema:Country ;
rdfs:label "Denmark"@en .
country:Eastern_Europe a schema:Country ;
rdfs:label "Eastern Europe"@en .
country:Egypt a schema:Country ;
rdfs:label "Egypt"@en .
country:El_Salvador a schema:Country ;
rdfs:label "El Salvador"@en .
country:England a schema:Country ;
rdfs:label "England"@en .
country:England_UK a schema:Country ;
rdfs:label "England (UK)"@en .
country:Estland a schema:Country ;
rdfs:label "Estland"@en .
country:Europe a schema:Country ;
rdfs:label "Europe"@en .
country:Fiji a schema:Country ;
rdfs:label "Fiji"@en .
country:France a schema:Country ;
rdfs:label "France"@en .
country:Germany a schema:Country ;
rdfs:label "Germany"@en .
country:Greece a schema:Country ;
rdfs:label "Greece"@en .
country:Hungary a schema:Country ;
rdfs:label "Hungary"@en .
country:India a schema:Country ;
rdfs:label "India"@en .
country:Indonesia a schema:Country ;
rdfs:label "Indonesia"@en .
country:Ireland a schema:Country ;
rdfs:label "Ireland"@en .
country:Israel a schema:Country ;
rdfs:label "Israel"@en .
country:Italy a schema:Country ;
rdfs:label "Italy"@en .
country:Japan a schema:Country ;
rdfs:label "Japan"@en .
country:Korea a schema:Country ;
rdfs:label "Korea"@en .
country:Latvia a schema:Country ;
rdfs:label "Latvia"@en .
country:Lithuania a schema:Country ;
rdfs:label "Lithuania"@en .
country:Mexico a schema:Country ;
rdfs:label "Mexico"@en .
country:Moldova a schema:Country ;
rdfs:label "Moldova"@en .
country:Monaco a schema:Country ;
rdfs:label "Monaco"@en .
country:Myanmar a schema:Country ;
rdfs:label "Myanmar"@en .
country:Netherlands a schema:Country ;
rdfs:label "Netherlands"@en .
country:New_Zealand a schema:Country ;
rdfs:label "New Zealand"@en .
country:North_America a schema:Country ;
rdfs:label "North America"@en .
country:Northern_Ireland_UK a schema:Country ;
rdfs:label "Northern Ireland (UK)"@en .
country:Norway a schema:Country ;
rdfs:label "Norway"@en .
country:Palestine a schema:Country ;
rdfs:label "Palestine"@en .
country:Panama a schema:Country ;
rdfs:label "Panama"@en .
country:Paraguay a schema:Country ;
rdfs:label "Paraguay"@en .
country:Peru a schema:Country ;
rdfs:label "Peru"@en .
country:Poland a schema:Country ;
rdfs:label "Poland"@en .
country:Poland_Ukraine a schema:Country ;
rdfs:label "Poland / Ukraine"@en .
country:Portugal a schema:Country ;
rdfs:label "Portugal"@en .
country:Prussia a schema:Country ;
rdfs:label "Prussia"@en .
country:Romania a schema:Country ;
rdfs:label "Romania"@en .
country:Russia a schema:Country ;
rdfs:label "Russia"@en .
country:Scandinavia a schema:Country ;
rdfs:label "Scandinavia"@en .
country:Scotland a schema:Country ;
rdfs:label "Scotland"@en .
country:Serbia a schema:Country ;
rdfs:label "Serbia"@en .
country:Singapore a schema:Country ;
rdfs:label "Singapore"@en .
country:Slovakia a schema:Country ;
rdfs:label "Slovakia"@en .
country:Slovenia a schema:Country ;
rdfs:label "Slovenia"@en .
country:South_Africa a schema:Country ;
rdfs:label "South Africa"@en .
country:Soviet_Union a schema:Country ;
rdfs:label "Soviet Union"@en .
country:Spain a schema:Country ;
rdfs:label "Spain"@en .
country:Sri_Lanka a schema:Country ;
rdfs:label "Sri Lanka"@en .
country:Sweden a schema:Country ;
rdfs:label "Sweden"@en .
country:Switzerland a schema:Country ;
rdfs:label "Switzerland"@en .
country:Turkey a schema:Country ;
rdfs:label "Turkey"@en .
country:UAS a schema:Country ;
rdfs:label "UAS"@en .
country:Ukraine a schema:Country ;
rdfs:label "Ukraine"@en .
country:United_Kingdom a schema:Country ;
rdfs:label "United Kingdom"@en .
country:United_States a schema:Country ;
rdfs:label "United States"@en .
country:Uruguay a schema:Country ;
rdfs:label "Uruguay"@en .
country:USA a schema:Country ;
rdfs:label "USA"@en .
country:Vietnam a schema:Country ;
rdfs:label "Vietnam"@en .
country:Wales_UK a schema:Country ;
rdfs:label "Wales (UK)"@en .
country:West_Europe a schema:Country ;
rdfs:label "West Europe"@en .
country:Yugoslavia a schema:Country ;
rdfs:label "Yugoslavia"@en .

191
data/countries_wikidata.ttl Normal file
View file

@ -0,0 +1,191 @@
@prefix country: <https://daniel.degu.cl/data/theater-migrants/countries.ttl#Country-> .
@prefix geo: <http://sws.geonames.org/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix wdt: <http://www.wikidata.org/prop/direct/> .
country:Algeria schema:sameAs wd:Q262 ;
schema:sameAs geo:2589581 .
country:Argentina schema:sameAs wd:Q414 ;
schema:sameAs geo:3865483 .
country:Australia schema:sameAs wd:Q408 ;
schema:sameAs geo:2077456 .
country:Austria schema:sameAs wd:Q40 ;
schema:sameAs geo:2782113 .
country:Azerbaijan schema:sameAs wd:Q227 ;
schema:sameAs geo:587116 .
country:Belarus schema:sameAs wd:Q184 ;
schema:sameAs geo:630336 .
country:Belgium schema:sameAs wd:Q31 ;
schema:sameAs geo:2802361 .
country:Brazil schema:sameAs wd:Q155 ;
schema:sameAs geo:3469034 .
country:Bulgaria schema:sameAs wd:Q219 ;
schema:sameAs geo:732800 .
country:Canada schema:sameAs wd:Q16 ;
schema:sameAs geo:6251999 .
country:Chile schema:sameAs wd:Q298 ;
schema:sameAs geo:3895114 .
country:Costa_Rica schema:sameAs wd:Q800 ;
schema:sameAs geo:3624060 .
country:Croatia schema:sameAs wd:Q224 ;
schema:sameAs geo:3202326 .
country:Cuba schema:sameAs wd:Q241 ;
schema:sameAs geo:3562981 .
country:Czech_Republic schema:sameAs wd:Q213 ;
schema:sameAs geo:3077311 .
country:Denmark schema:sameAs wd:Q35 ;
schema:sameAs geo:2623032 .
country:Egypt schema:sameAs wd:Q79 ;
schema:sameAs geo:357994 .
country:El_Salvador schema:sameAs wd:Q792 ;
schema:sameAs geo:3585968 .
country:Fiji schema:sameAs wd:Q712 ;
schema:sameAs geo:2205218 .
country:France schema:sameAs wd:Q142 ;
schema:sameAs geo:3017382 .
country:Germany schema:sameAs wd:Q183 ;
schema:sameAs geo:2921044 .
country:Greece schema:sameAs wd:Q41 ;
schema:sameAs geo:390903 .
country:Hungary schema:sameAs wd:Q28 ;
schema:sameAs geo:719819 .
country:India schema:sameAs wd:Q668 ;
schema:sameAs geo:1269750 .
country:Indonesia schema:sameAs wd:Q252 ;
schema:sameAs geo:1643084 .
country:Ireland schema:sameAs wd:Q27 ;
schema:sameAs geo:2963597 .
country:Israel schema:sameAs wd:Q801 ;
schema:sameAs geo:294640 .
country:Italy schema:sameAs wd:Q38 ;
schema:sameAs geo:3175395 .
country:Japan schema:sameAs wd:Q17 ;
schema:sameAs geo:1861060 .
country:Latvia schema:sameAs wd:Q211 ;
schema:sameAs geo:458258 .
country:Lithuania schema:sameAs wd:Q37 ;
schema:sameAs geo:597427 .
country:Mexico schema:sameAs wd:Q96 ;
schema:sameAs geo:3996063 .
country:Moldova schema:sameAs wd:Q217 ;
schema:sameAs geo:617790 .
country:Monaco schema:sameAs wd:Q235 ;
schema:sameAs geo:2993457 .
country:Myanmar schema:sameAs wd:Q836 ;
schema:sameAs geo:1327865 .
country:Netherlands schema:sameAs wd:Q55 ;
schema:sameAs geo:2750405 .
country:New_Zealand schema:sameAs wd:Q664 ;
schema:sameAs geo:2186224 .
country:Norway schema:sameAs wd:Q20 ;
schema:sameAs geo:3144096 .
country:Palestine schema:sameAs wd:Q219060 ;
schema:sameAs geo:6254930 .
country:Panama schema:sameAs wd:Q804 ;
schema:sameAs geo:3703430 .
country:Paraguay schema:sameAs wd:Q733 ;
schema:sameAs geo:3437598 .
country:Peru schema:sameAs wd:Q419 ;
schema:sameAs geo:3932488 .
country:Poland schema:sameAs wd:Q36 ;
schema:sameAs geo:798544 .
country:Portugal schema:sameAs wd:Q45 ;
schema:sameAs geo:2264397 .
country:Romania schema:sameAs wd:Q218 ;
schema:sameAs geo:798549 .
country:Russia schema:sameAs wd:Q159 ;
schema:sameAs geo:2017370 .
country:Serbia schema:sameAs wd:Q403 ;
schema:sameAs geo:6290252 .
country:Singapore schema:sameAs wd:Q334 ;
schema:sameAs geo:1880251 .
country:Slovakia schema:sameAs wd:Q214 ;
schema:sameAs geo:3057568 .
country:Slovenia schema:sameAs wd:Q215 ;
schema:sameAs geo:3190538 .
country:South_Africa schema:sameAs wd:Q258 ;
schema:sameAs geo:953987 .
country:Spain schema:sameAs wd:Q29 ;
schema:sameAs geo:2510769 .
country:Sri_Lanka schema:sameAs wd:Q854 ;
schema:sameAs geo:1227603 .
country:Sweden schema:sameAs wd:Q34 ;
schema:sameAs geo:2661886 .
country:Switzerland schema:sameAs wd:Q39 ;
schema:sameAs geo:2658434 .
country:Turkey schema:sameAs wd:Q43 ;
schema:sameAs geo:298795 .
country:Ukraine schema:sameAs wd:Q212 ;
schema:sameAs geo:690791 .
country:United_Kingdom schema:sameAs wd:Q145 ;
schema:sameAs geo:2635167 .
country:United_States schema:sameAs wd:Q30 ;
schema:sameAs geo:6252001 .
country:Uruguay schema:sameAs wd:Q77 ;
schema:sameAs geo:3439705 .
country:Vietnam schema:sameAs wd:Q881 ;
schema:sameAs geo:1562822 .

View file

@ -0,0 +1,71 @@
@prefix country: <https://daniel.degu.cl/data/theater-migrants/countries.ttl#Country-> .
@prefix geo: <http://sws.geonames.org/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix wdt: <http://www.wikidata.org/prop/direct/> .
# No Wikidata entity found for Channel Islands
country:Channel_Islands rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for China
country:China rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Columbia
country:Columbia rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Eastern Europe
country:Eastern_Europe rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for England
country:England rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for England (UK)
country:England_UK rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Estland
country:Estland rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Europe
country:Europe rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Korea
country:Korea rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for North America
country:North_America rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Northern Ireland (UK)
country:Northern_Ireland_UK rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Poland / Ukraine
country:Poland_Ukraine rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Prussia
country:Prussia rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Scandinavia
country:Scandinavia rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Scotland
country:Scotland rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Soviet Union
country:Soviet_Union rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for UAS
country:UAS rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for USA
country:USA rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Wales (UK)
country:Wales_UK rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for West Europe
country:West_Europe rdfs:comment "No Wikidata entity found" .
# No Wikidata entity found for Yugoslavia
country:Yugoslavia rdfs:comment "No Wikidata entity found" .

View file

@ -17,7 +17,7 @@ rdfs = RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#')
# Common functions # Common functions
def toName(name) def toName(name)
name.gsub(' ', '_') name.gsub(/[^a-zA-Z0-9_-]/, '_').gsub(/_+/, '_').gsub(/\A_+|_+\z/, '')
end end
def get_wd_name(uri) def get_wd_name(uri)

View file

@ -0,0 +1,58 @@
#!/usr/bin/env ruby
require 'rdf'
require 'rdf/turtle'
require_relative 'database'
require_relative 'vocabularies'
require_relative 'migrants'
graph = RDF::Graph.load(File.join('data', 'countries.ttl'))
output = File.open(File.join('data', 'countries_wikidata.ttl'), 'w')
review = File.open(File.join('data', 'countries_wikidata_review.ttl'), 'w')
ttl_prefixes = "#{prefixes(:wd, :wdt, :geo, :rdfs, :schema, :country)}\n\n"
output << ttl_prefixes
review << ttl_prefixes
graph.query([nil, RDF.type, SCHEMA.Country]) do |stmt|
subject = stmt.subject
label_stmt = graph.query([subject, RDFS.label, nil]).first
next unless label_stmt
label = label_stmt.object
local_name = subject.to_s.sub(COUNTRY.to_s, '')
solutions = WIKIDATA.query(<<~SPARQL)
SELECT DISTINCT ?country ?geoNamesID WHERE {
?country rdfs:label #{label.to_ntriples} .
?country wdt:P31/wdt:P279* wd:Q6256 .
?country wdt:P1566 ?geoNamesID .
}
SPARQL
case solutions.size
when 0
review.puts "# No Wikidata entity found for #{label}"
review.puts "country:#{local_name} rdfs:comment \"No Wikidata entity found\" ."
review.puts
when 1
sol = solutions.first
wd_id = get_wd_name(sol[:country].to_s)
output.puts "country:#{local_name} schema:sameAs #{wd_id} ;"
output.puts " schema:sameAs geo:#{sol[:geoNamesID]} ."
output.puts
else
review.puts "# Multiple Wikidata entities found for #{label} — remove all but one:"
solutions.each do |sol|
wd_id = get_wd_name(sol[:country].to_s)
review.puts "country:#{local_name} schema:sameAs #{wd_id} ;"
review.puts " schema:sameAs geo:#{sol[:geoNamesID]} ."
end
review.puts
end
end
output.close
review.close

View file

@ -0,0 +1,21 @@
#!/usr/bin/env ruby
require_relative 'database'
require_relative 'vocabularies'
require_relative 'migrants'
output = File.open(File.join('data', 'countries.ttl'), 'w')
output.puts prefixes(:rdfs, :schema, :country)
output.puts
countries = DB[:location].distinct.select(:Country).where(Sequel.~(Country: nil)).where(Sequel.~(Country: '')).order(:Country)
countries.each do |row|
name = row[:Country]
output.puts "country:#{toName(name)} a schema:Country ;"
output.puts " rdfs:label #{RDF::Literal.new(name, language: :en).to_ntriples} ."
output.puts
end
output.close

View file

@ -3,6 +3,7 @@
# External vocabularies # External vocabularies
WD = RDF::Vocabulary.new('http://www.wikidata.org/entity/') WD = RDF::Vocabulary.new('http://www.wikidata.org/entity/')
WDT = RDF::Vocabulary.new('http://www.wikidata.org/prop/direct/') WDT = RDF::Vocabulary.new('http://www.wikidata.org/prop/direct/')
GEO = RDF::Vocabulary.new('http://sws.geonames.org/')
RDFS = RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#') RDFS = RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#')
SCHEMA = RDF::Vocabulary.new('https://schema.org/') SCHEMA = RDF::Vocabulary.new('https://schema.org/')
SKOS = RDF::Vocabulary.new('http://www.w3.org/2004/02/skos/core#') SKOS = RDF::Vocabulary.new('http://www.w3.org/2004/02/skos/core#')
@ -27,6 +28,7 @@ WORK = RDF::Vocabulary.new('https://daniel.degu.cl/data/theater-migrants/works.t
PREFIXES = { PREFIXES = {
wd: WD, wd: WD,
wdt: WDT, wdt: WDT,
geo: GEO,
rdfs: RDFS, rdfs: RDFS,
schema: SCHEMA, schema: SCHEMA,
country: COUNTRY, country: COUNTRY,
@ -37,7 +39,7 @@ PREFIXES = {
occupation: OCCUPATION, occupation: OCCUPATION,
organisation: ORGANISATION, organisation: ORGANISATION,
personname: PERSONNAME, personname: PERSONNAME,
personOccupation: PROFESSION, personOccupation: PERSON_OCCUPATION,
region: REGION, region: REGION,
relationship: RELATIONSHIP, relationship: RELATIONSHIP,
religion: RELIGION, religion: RELIGION,