Script to generate countries.
This commit is contained in:
parent
cd6ea52f07
commit
351529c8ee
10 changed files with 456 additions and 8 deletions
3
Gemfile
3
Gemfile
|
|
@ -2,3 +2,6 @@ source 'https://rubygems.org'
|
||||||
|
|
||||||
gem 'sequel'
|
gem 'sequel'
|
||||||
gem 'mysql2'
|
gem 'mysql2'
|
||||||
|
gem 'sparql'
|
||||||
|
gem 'nokogiri'
|
||||||
|
gem 'pry'
|
||||||
|
|
|
||||||
89
Gemfile.lock
89
Gemfile.lock
|
|
@ -1,24 +1,111 @@
|
||||||
GEM
|
GEM
|
||||||
remote: https://rubygems.org/
|
remote: https://rubygems.org/
|
||||||
specs:
|
specs:
|
||||||
|
base64 (0.3.0)
|
||||||
|
bcp47_spec (0.2.1)
|
||||||
bigdecimal (4.0.1)
|
bigdecimal (4.0.1)
|
||||||
|
builder (3.3.0)
|
||||||
|
coderay (1.1.3)
|
||||||
|
connection_pool (3.0.2)
|
||||||
|
ebnf (2.6.0)
|
||||||
|
base64 (~> 0.2)
|
||||||
|
htmlentities (~> 4.3)
|
||||||
|
rdf (~> 3.3)
|
||||||
|
scanf (~> 1.0)
|
||||||
|
sxp (~> 2.0)
|
||||||
|
unicode-types (~> 1.8)
|
||||||
|
htmlentities (4.4.2)
|
||||||
|
io-console (0.8.2)
|
||||||
|
link_header (0.0.8)
|
||||||
|
logger (1.7.0)
|
||||||
|
matrix (0.4.3)
|
||||||
|
method_source (1.1.0)
|
||||||
mysql2 (0.5.7)
|
mysql2 (0.5.7)
|
||||||
bigdecimal
|
bigdecimal
|
||||||
|
net-http-persistent (4.0.8)
|
||||||
|
connection_pool (>= 2.2.4, < 4)
|
||||||
|
nokogiri (1.19.0-x86_64-linux-gnu)
|
||||||
|
racc (~> 1.4)
|
||||||
|
pry (0.16.0)
|
||||||
|
coderay (~> 1.1)
|
||||||
|
method_source (~> 1.0)
|
||||||
|
reline (>= 0.6.0)
|
||||||
|
racc (1.8.1)
|
||||||
|
rdf (3.3.1)
|
||||||
|
bcp47_spec (~> 0.2)
|
||||||
|
link_header (~> 0.0, >= 0.0.8)
|
||||||
|
rdf-aggregate-repo (3.3.0)
|
||||||
|
rdf (~> 3.3)
|
||||||
|
rdf-xsd (3.3.0)
|
||||||
|
rdf (~> 3.3)
|
||||||
|
rexml (~> 3.2)
|
||||||
|
readline (0.0.4)
|
||||||
|
reline
|
||||||
|
reline (0.6.3)
|
||||||
|
io-console (~> 0.5)
|
||||||
|
rexml (3.4.4)
|
||||||
|
scanf (1.0.0)
|
||||||
sequel (5.101.0)
|
sequel (5.101.0)
|
||||||
bigdecimal
|
bigdecimal
|
||||||
|
sparql (3.3.2)
|
||||||
|
builder (~> 3.2, >= 3.2.4)
|
||||||
|
ebnf (~> 2.5)
|
||||||
|
logger (~> 1.5)
|
||||||
|
rdf (~> 3.3)
|
||||||
|
rdf-aggregate-repo (~> 3.3)
|
||||||
|
rdf-xsd (~> 3.3)
|
||||||
|
readline (~> 0.0)
|
||||||
|
sparql-client (~> 3.3)
|
||||||
|
sxp (~> 2.0)
|
||||||
|
sparql-client (3.3.0)
|
||||||
|
net-http-persistent (~> 4.0, >= 4.0.2)
|
||||||
|
rdf (~> 3.3)
|
||||||
|
sxp (2.0.0)
|
||||||
|
matrix (~> 0.4)
|
||||||
|
rdf (~> 3.3)
|
||||||
|
unicode-types (1.11.0)
|
||||||
|
|
||||||
PLATFORMS
|
PLATFORMS
|
||||||
ruby
|
|
||||||
x86_64-linux
|
x86_64-linux
|
||||||
|
|
||||||
DEPENDENCIES
|
DEPENDENCIES
|
||||||
mysql2
|
mysql2
|
||||||
|
nokogiri
|
||||||
|
pry
|
||||||
sequel
|
sequel
|
||||||
|
sparql
|
||||||
|
|
||||||
CHECKSUMS
|
CHECKSUMS
|
||||||
|
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
||||||
|
bcp47_spec (0.2.1) sha256=3fd62edf96c126bd9624e4319ac74082a966081859d1ee0ef3c3041640a37810
|
||||||
bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
|
bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
|
||||||
|
builder (3.3.0) sha256=497918d2f9dca528fdca4b88d84e4ef4387256d984b8154e9d5d3fe5a9c8835f
|
||||||
|
coderay (1.1.3) sha256=dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b
|
||||||
|
connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
|
||||||
|
ebnf (2.6.0) sha256=e746a316caa885cc45e243dc33efc194943956760bc9bc13948de1732fbcf63e
|
||||||
|
htmlentities (4.4.2) sha256=bbafbdf69f2eca9262be4efef7e43e6a1de54c95eb600f26984f71d2fe96c5c3
|
||||||
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
||||||
|
link_header (0.0.8) sha256=15c65ce43b29f739b30d05e5f25c22c23797e89cf6f905dbb595fb4c70cb55f9
|
||||||
|
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
||||||
|
matrix (0.4.3) sha256=a0d5ab7ddcc1973ff690ab361b67f359acbb16958d1dc072b8b956a286564c5b
|
||||||
|
method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5
|
||||||
mysql2 (0.5.7) sha256=ba09ede515a0ae8a7192040a1b778c0fb0f025fa5877e9be895cd325fa5e9d7b
|
mysql2 (0.5.7) sha256=ba09ede515a0ae8a7192040a1b778c0fb0f025fa5877e9be895cd325fa5e9d7b
|
||||||
|
net-http-persistent (4.0.8) sha256=ef3de8319d691537b329053fae3a33195f8b070bbbfae8bf1a58c796081960e6
|
||||||
|
nokogiri (1.19.0-x86_64-linux-gnu) sha256=f482b95c713d60031d48c44ce14562f8d2ce31e3a9e8dd0ccb131e9e5a68b58c
|
||||||
|
pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e
|
||||||
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
||||||
|
rdf (3.3.1) sha256=dda6a2c95198915fc63d66ee270e35d4a76d431720747a2cf97ecd92062fa150
|
||||||
|
rdf-aggregate-repo (3.3.0) sha256=5693ccabf4dbbec7113c95e9aab028311f19d6022764fdebc6327f9d55a9efdc
|
||||||
|
rdf-xsd (3.3.0) sha256=fab51d27b20344237d9b622ef32e83e4c44940840bfc76a245ce6b6abba44772
|
||||||
|
readline (0.0.4) sha256=6138eef17be2b98298b672c3ea63bf9cb5158d401324f26e1e84f235879c1d6a
|
||||||
|
reline (0.6.3) sha256=1198b04973565b36ec0f11542ab3f5cfeeec34823f4e54cebde90968092b1835
|
||||||
|
rexml (3.4.4) sha256=19e0a2c3425dfbf2d4fc1189747bdb2f849b6c5e74180401b15734bc97b5d142
|
||||||
|
scanf (1.0.0) sha256=533db7f7e5acafea1a145d6c5329cef667a58fbcb7d64379a808ff1199ee1b00
|
||||||
sequel (5.101.0) sha256=d2ae3fd997a7c4572e8357918e777869faf90dc19310fcd6332747122aed2b29
|
sequel (5.101.0) sha256=d2ae3fd997a7c4572e8357918e777869faf90dc19310fcd6332747122aed2b29
|
||||||
|
sparql (3.3.2) sha256=20d73a62801fd6d03c834ca5012c11aaf6594ef536f554e92fd94bf9b3ed64dc
|
||||||
|
sparql-client (3.3.0) sha256=71225eefad48dc2baab6b7008df8a9bcfffa833e5f25387dbe87ff52a5cad64e
|
||||||
|
sxp (2.0.0) sha256=79971bbab54a82fe4a861332475eb8c1f33142d70f2b7e830dacbd9082824721
|
||||||
|
unicode-types (1.11.0) sha256=81d1201273260fa89b85471e7eebb93a51bb4e5f078a525508dcae7835d176f9
|
||||||
|
|
||||||
BUNDLED WITH
|
BUNDLED WITH
|
||||||
4.0.3
|
4.0.3
|
||||||
|
|
|
||||||
0
data/000-found_countries_many.ttl
Normal file
0
data/000-found_countries_many.ttl
Normal file
183
data/000-found_countries_one.ttl
Normal file
183
data/000-found_countries_one.ttl
Normal file
|
|
@ -0,0 +1,183 @@
|
||||||
|
country:Argentina a mig:Country ;
|
||||||
|
rdfs:label "Argentina"@en ;
|
||||||
|
mig:wikidataID wd:Q414 .
|
||||||
|
country:Austria a mig:Country ;
|
||||||
|
rdfs:label "Austria"@en ;
|
||||||
|
mig:wikidataID wd:Q40 .
|
||||||
|
country:Australia a mig:Country ;
|
||||||
|
rdfs:label "Australia"@en ;
|
||||||
|
mig:wikidataID wd:Q408 .
|
||||||
|
country:Azerbaijan a mig:Country ;
|
||||||
|
rdfs:label "Azerbaijan"@en ;
|
||||||
|
mig:wikidataID wd:Q227 .
|
||||||
|
country:Belgium a mig:Country ;
|
||||||
|
rdfs:label "Belgium"@en ;
|
||||||
|
mig:wikidataID wd:Q31 .
|
||||||
|
country:Belarus a mig:Country ;
|
||||||
|
rdfs:label "Belarus"@en ;
|
||||||
|
mig:wikidataID wd:Q184 .
|
||||||
|
country:Bulgaria a mig:Country ;
|
||||||
|
rdfs:label "Bulgaria"@en ;
|
||||||
|
mig:wikidataID wd:Q219 .
|
||||||
|
country:Brazil a mig:Country ;
|
||||||
|
rdfs:label "Brazil"@en ;
|
||||||
|
mig:wikidataID wd:Q155 .
|
||||||
|
country:Canada a mig:Country ;
|
||||||
|
rdfs:label "Canada"@en ;
|
||||||
|
mig:wikidataID wd:Q16 .
|
||||||
|
country:Switzerland a mig:Country ;
|
||||||
|
rdfs:label "Switzerland"@en ;
|
||||||
|
mig:wikidataID wd:Q39 .
|
||||||
|
country:Chile a mig:Country ;
|
||||||
|
rdfs:label "Chile"@en ;
|
||||||
|
mig:wikidataID wd:Q298 .
|
||||||
|
country:Ukraine a mig:Country ;
|
||||||
|
rdfs:label "Ukraine"@en ;
|
||||||
|
mig:wikidataID wd:Q212 .
|
||||||
|
country:Costa_Rica a mig:Country ;
|
||||||
|
rdfs:label "Costa Rica"@en ;
|
||||||
|
mig:wikidataID wd:Q800 .
|
||||||
|
country:Cuba a mig:Country ;
|
||||||
|
rdfs:label "Cuba"@en ;
|
||||||
|
mig:wikidataID wd:Q241 .
|
||||||
|
country:Czech_Republic a mig:Country ;
|
||||||
|
rdfs:label "Czech Republic"@en ;
|
||||||
|
mig:wikidataID wd:Q213 .
|
||||||
|
country:Denmark a mig:Country ;
|
||||||
|
rdfs:label "Denmark"@en ;
|
||||||
|
mig:wikidataID wd:Q35 .
|
||||||
|
country:Algeria a mig:Country ;
|
||||||
|
rdfs:label "Algeria"@en ;
|
||||||
|
mig:wikidataID wd:Q262 .
|
||||||
|
country:Egypt a mig:Country ;
|
||||||
|
rdfs:label "Egypt"@en ;
|
||||||
|
mig:wikidataID wd:Q79 .
|
||||||
|
country:El_Salvador a mig:Country ;
|
||||||
|
rdfs:label "El Salvador"@en ;
|
||||||
|
mig:wikidataID wd:Q792 .
|
||||||
|
country:Spain a mig:Country ;
|
||||||
|
rdfs:label "Spain"@en ;
|
||||||
|
mig:wikidataID wd:Q29 .
|
||||||
|
country:Fiji a mig:Country ;
|
||||||
|
rdfs:label "Fiji"@en ;
|
||||||
|
mig:wikidataID wd:Q712 .
|
||||||
|
country:France a mig:Country ;
|
||||||
|
rdfs:label "France"@en ;
|
||||||
|
mig:wikidataID wd:Q142 .
|
||||||
|
country:United_Kingdom a mig:Country ;
|
||||||
|
rdfs:label "United Kingdom"@en ;
|
||||||
|
mig:wikidataID wd:Q145 .
|
||||||
|
country:Germany a mig:Country ;
|
||||||
|
rdfs:label "Germany"@en ;
|
||||||
|
mig:wikidataID wd:Q183 .
|
||||||
|
country:Greece a mig:Country ;
|
||||||
|
rdfs:label "Greece"@en ;
|
||||||
|
mig:wikidataID wd:Q41 .
|
||||||
|
country:Croatia a mig:Country ;
|
||||||
|
rdfs:label "Croatia"@en ;
|
||||||
|
mig:wikidataID wd:Q224 .
|
||||||
|
country:Hungary a mig:Country ;
|
||||||
|
rdfs:label "Hungary"@en ;
|
||||||
|
mig:wikidataID wd:Q28 .
|
||||||
|
country:Slovakia a mig:Country ;
|
||||||
|
rdfs:label "Slovakia"@en ;
|
||||||
|
mig:wikidataID wd:Q214 .
|
||||||
|
country:Indonesia a mig:Country ;
|
||||||
|
rdfs:label "Indonesia"@en ;
|
||||||
|
mig:wikidataID wd:Q252 .
|
||||||
|
country:Ireland a mig:Country ;
|
||||||
|
rdfs:label "Ireland"@en ;
|
||||||
|
mig:wikidataID wd:Q27 .
|
||||||
|
country:India a mig:Country ;
|
||||||
|
rdfs:label "India"@en ;
|
||||||
|
mig:wikidataID wd:Q668 .
|
||||||
|
country:Israel a mig:Country ;
|
||||||
|
rdfs:label "Israel"@en ;
|
||||||
|
mig:wikidataID wd:Q801 .
|
||||||
|
country:Italy a mig:Country ;
|
||||||
|
rdfs:label "Italy"@en ;
|
||||||
|
mig:wikidataID wd:Q38 .
|
||||||
|
country:Japan a mig:Country ;
|
||||||
|
rdfs:label "Japan"@en ;
|
||||||
|
mig:wikidataID wd:Q17 .
|
||||||
|
country:Latvia a mig:Country ;
|
||||||
|
rdfs:label "Latvia"@en ;
|
||||||
|
mig:wikidataID wd:Q211 .
|
||||||
|
country:Lithuania a mig:Country ;
|
||||||
|
rdfs:label "Lithuania"@en ;
|
||||||
|
mig:wikidataID wd:Q37 .
|
||||||
|
country:Sri_Lanka a mig:Country ;
|
||||||
|
rdfs:label "Sri Lanka"@en ;
|
||||||
|
mig:wikidataID wd:Q854 .
|
||||||
|
country:Monaco a mig:Country ;
|
||||||
|
rdfs:label "Monaco"@en ;
|
||||||
|
mig:wikidataID wd:Q235 .
|
||||||
|
country:Myanmar a mig:Country ;
|
||||||
|
rdfs:label "Myanmar"@en ;
|
||||||
|
mig:wikidataID wd:Q836 .
|
||||||
|
country:Moldova a mig:Country ;
|
||||||
|
rdfs:label "Moldova"@en ;
|
||||||
|
mig:wikidataID wd:Q217 .
|
||||||
|
country:Mexico a mig:Country ;
|
||||||
|
rdfs:label "Mexico"@en ;
|
||||||
|
mig:wikidataID wd:Q96 .
|
||||||
|
country:Netherlands a mig:Country ;
|
||||||
|
rdfs:label "Netherlands"@en ;
|
||||||
|
mig:wikidataID wd:Q55 .
|
||||||
|
country:Norway a mig:Country ;
|
||||||
|
rdfs:label "Norway"@en ;
|
||||||
|
mig:wikidataID wd:Q20 .
|
||||||
|
country:New_Zealand a mig:Country ;
|
||||||
|
rdfs:label "New Zealand"@en ;
|
||||||
|
mig:wikidataID wd:Q664 .
|
||||||
|
country:Panama a mig:Country ;
|
||||||
|
rdfs:label "Panama"@en ;
|
||||||
|
mig:wikidataID wd:Q804 .
|
||||||
|
country:Peru a mig:Country ;
|
||||||
|
rdfs:label "Peru"@en ;
|
||||||
|
mig:wikidataID wd:Q419 .
|
||||||
|
country:Poland a mig:Country ;
|
||||||
|
rdfs:label "Poland"@en ;
|
||||||
|
mig:wikidataID wd:Q36 .
|
||||||
|
country:Paraguay a mig:Country ;
|
||||||
|
rdfs:label "Paraguay"@en ;
|
||||||
|
mig:wikidataID wd:Q733 .
|
||||||
|
country:Palestine a mig:Country ;
|
||||||
|
rdfs:label "Palestine"@en ;
|
||||||
|
mig:wikidataID wd:Q219060 .
|
||||||
|
country:Portugal a mig:Country ;
|
||||||
|
rdfs:label "Portugal"@en ;
|
||||||
|
mig:wikidataID wd:Q45 .
|
||||||
|
country:Romania a mig:Country ;
|
||||||
|
rdfs:label "Romania"@en ;
|
||||||
|
mig:wikidataID wd:Q218 .
|
||||||
|
country:Russia a mig:Country ;
|
||||||
|
rdfs:label "Russia"@en ;
|
||||||
|
mig:wikidataID wd:Q159 .
|
||||||
|
country:Sweden a mig:Country ;
|
||||||
|
rdfs:label "Sweden"@en ;
|
||||||
|
mig:wikidataID wd:Q34 .
|
||||||
|
country:Singapore a mig:Country ;
|
||||||
|
rdfs:label "Singapore"@en ;
|
||||||
|
mig:wikidataID wd:Q334 .
|
||||||
|
country:Slovenia a mig:Country ;
|
||||||
|
rdfs:label "Slovenia"@en ;
|
||||||
|
mig:wikidataID wd:Q215 .
|
||||||
|
country:Serbia a mig:Country ;
|
||||||
|
rdfs:label "Serbia"@en ;
|
||||||
|
mig:wikidataID wd:Q403 .
|
||||||
|
country:Turkey a mig:Country ;
|
||||||
|
rdfs:label "Turkey"@en ;
|
||||||
|
mig:wikidataID wd:Q43 .
|
||||||
|
country:United_States a mig:Country ;
|
||||||
|
rdfs:label "United States"@en ;
|
||||||
|
mig:wikidataID wd:Q30 .
|
||||||
|
country:Uruguay a mig:Country ;
|
||||||
|
rdfs:label "Uruguay"@en ;
|
||||||
|
mig:wikidataID wd:Q77 .
|
||||||
|
country:Vietnam a mig:Country ;
|
||||||
|
rdfs:label "Vietnam"@en ;
|
||||||
|
mig:wikidataID wd:Q881 .
|
||||||
|
country:South_Africa a mig:Country ;
|
||||||
|
rdfs:label "South Africa"@en ;
|
||||||
|
mig:wikidataID wd:Q258 .
|
||||||
24
data/000-found_countries_zero.ttl
Normal file
24
data/000-found_countries_zero.ttl
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
Channel Islands
|
||||||
|
China
|
||||||
|
Columbia
|
||||||
|
Eastern Europe
|
||||||
|
England
|
||||||
|
Estland
|
||||||
|
Europe
|
||||||
|
Northern Ireland (UK)
|
||||||
|
ITaly
|
||||||
|
Korea
|
||||||
|
|
||||||
|
USA
|
||||||
|
North America
|
||||||
|
Poland / Ukraine
|
||||||
|
Prussia
|
||||||
|
Scandinavia
|
||||||
|
Scotland
|
||||||
|
Soviet Union
|
||||||
|
England (UK)
|
||||||
|
Wales (UK)
|
||||||
|
USa
|
||||||
|
UAS
|
||||||
|
West Europe
|
||||||
|
Yugoslavia
|
||||||
53
src/000-found-countries.rb
Normal file
53
src/000-found-countries.rb
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
require_relative 'database'
|
||||||
|
require_relative 'vocabularies'
|
||||||
|
|
||||||
|
# Output files of this step
|
||||||
|
found_one = File.open(File.join('data', '000-found_countries_one.ttl'), 'w')
|
||||||
|
found_zero = File.open(File.join('data', '000-found_countries_zero.ttl'), 'w')
|
||||||
|
found_many = File.open(File.join('data', '000-found_countries_many.ttl'), 'w')
|
||||||
|
files = [found_one, found_zero, found_many]
|
||||||
|
|
||||||
|
files.each do |file|
|
||||||
|
file.puts prefixes(:wd, :wdt, :rdfs)
|
||||||
|
end
|
||||||
|
|
||||||
|
countries = {}
|
||||||
|
|
||||||
|
DB[:location].each do |location|
|
||||||
|
unless location[:Country].nil? or location[:Country].empty? or countries.include? location[:Country]
|
||||||
|
countries[location[:Country]] = {
|
||||||
|
id: "region:#{location[:country]}"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def country_definition(country_name_literal, wd_solutions)
|
||||||
|
wd_ids = wd_solutions.map do |solution|
|
||||||
|
country_uri = solution[:country].to_s
|
||||||
|
get_wd_name(country_uri)
|
||||||
|
end
|
||||||
|
"country:#{toName(country_name_literal.to_s)} a mig:Country ;\n" \
|
||||||
|
" rdfs:label #{country_name_literal.to_ntriples} ;\n" \
|
||||||
|
" mig:wikidataID #{wd_ids.join(' , ')} .\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
countries.each do |country_name, country_attrs|
|
||||||
|
p country_name
|
||||||
|
country_name_literal = RDF::Literal.new(country_name, language: :en)
|
||||||
|
query = wikidata.select
|
||||||
|
.where([:country, RDFS.label, country_name_literal])
|
||||||
|
.where([:country, WDT.P31, WD.Q6256])
|
||||||
|
solutions = query.solutions
|
||||||
|
case solutions.size
|
||||||
|
when 0
|
||||||
|
found_zero << "#{country_name}\n"
|
||||||
|
when 1
|
||||||
|
found_one << country_definition(country_name_literal, solutions)
|
||||||
|
else
|
||||||
|
found_many << country_definition(country_name_literal, solutions)
|
||||||
|
end
|
||||||
|
sleep(1)
|
||||||
|
end
|
||||||
|
|
||||||
26
src/database.rb
Normal file
26
src/database.rb
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
require 'sequel'
|
||||||
|
require 'sparql/client'
|
||||||
|
require 'rdf'
|
||||||
|
|
||||||
|
# Database connectors
|
||||||
|
|
||||||
|
DB = Sequel.mysql2(host: 'localhost', user: 'migrants', database: 'migrants', password: '1234')
|
||||||
|
|
||||||
|
wikidata = SPARQL::Client.new('https://query.wikidata.org/sparql')
|
||||||
|
|
||||||
|
# Vocabularies
|
||||||
|
wd = RDF::Vocabulary.new('http://www.wikidata.org/entity/')
|
||||||
|
wdt = RDF::Vocabulary.new('http://www.wikidata.org/prop/direct/')
|
||||||
|
rdfs = RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#')
|
||||||
|
|
||||||
|
|
||||||
|
# Common functions
|
||||||
|
|
||||||
|
def toName(name)
|
||||||
|
name.gsub(' ', '_')
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_wd_name(uri)
|
||||||
|
uri.to_s.gsub('http://www.wikidata.org/entity/', 'wd:')
|
||||||
|
end
|
||||||
|
|
||||||
65
src/map_countries.rb
Executable file
65
src/map_countries.rb
Executable file
|
|
@ -0,0 +1,65 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
require 'sparql/client'
|
||||||
|
require 'rdf'
|
||||||
|
require 'pry'
|
||||||
|
|
||||||
|
found_one = File.open('found_countries_one.ttl', 'w')
|
||||||
|
found_zero = File.open('found_countries_zero.ttl', 'w')
|
||||||
|
found_many = File.open('found_countries_many.ttl', 'w')
|
||||||
|
|
||||||
|
wikidata = SPARQL::Client.new('https://query.wikidata.org/sparql')
|
||||||
|
|
||||||
|
# Vocabularies
|
||||||
|
wd = RDF::Vocabulary.new('http://www.wikidata.org/entity/')
|
||||||
|
wdt = RDF::Vocabulary.new('http://www.wikidata.org/prop/direct/')
|
||||||
|
rdfs = RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#')
|
||||||
|
|
||||||
|
# binding.pry
|
||||||
|
|
||||||
|
# query = wikidata.select.where([:s, :p, :o]).limit(10)
|
||||||
|
|
||||||
|
# query.each_solution do |solution|
|
||||||
|
# p solution
|
||||||
|
# end
|
||||||
|
|
||||||
|
require_relative 'database'
|
||||||
|
|
||||||
|
COUNTRIES = {}
|
||||||
|
|
||||||
|
DB[:location].each do |location|
|
||||||
|
unless location[:Country].nil? or COUNTRIES.include? location[:Country]
|
||||||
|
COUNTRIES[location[:Country]] = {
|
||||||
|
id: "region:#{location[:country]}"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def country_definition(country_name_literal, wd_solutions)
|
||||||
|
wd_ids = wd_solutions.map do |solution|
|
||||||
|
country_uri = solution[:country].to_s
|
||||||
|
get_wd_name(country_uri)
|
||||||
|
end
|
||||||
|
"country:#{toName(country_name_literal.to_s)} a mig:Country ;\n" \
|
||||||
|
" rdfs:label #{country_name_literal.to_ntriples} ;\n" \
|
||||||
|
" mig:wikidataID #{wd_ids.join(' , ')} .\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
COUNTRIES.each do |country_name, country_attrs|
|
||||||
|
p country_name
|
||||||
|
country_name_literal = RDF::Literal.new(country_name, language: :en)
|
||||||
|
query = wikidata.select
|
||||||
|
.where([:country, rdfs.label, country_name_literal])
|
||||||
|
.where([:country, wdt.P31, wd.Q6256])
|
||||||
|
solutions = query.solutions
|
||||||
|
case solutions.size
|
||||||
|
when 0
|
||||||
|
found_zero << "#{country_name}\n"
|
||||||
|
when 1
|
||||||
|
found_one << country_definition(country_name_literal, solutions)
|
||||||
|
else
|
||||||
|
found_many << country_definition(country_name_literal, solutions)
|
||||||
|
end
|
||||||
|
sleep(1)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
@ -1,12 +1,6 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
require 'sequel'
|
require_relative 'database'
|
||||||
|
|
||||||
DB = Sequel.mysql2(host: 'localhost', user: 'migrants', database: 'migrants', password: '1234')
|
|
||||||
|
|
||||||
def toName(name)
|
|
||||||
name.gsub(' ', '_')
|
|
||||||
end
|
|
||||||
|
|
||||||
# Define the regions
|
# Define the regions
|
||||||
|
|
||||||
|
|
|
||||||
13
src/vocabularies.rb
Normal file
13
src/vocabularies.rb
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
# Vocabularies
|
||||||
|
|
||||||
|
WD = RDF::Vocabulary.new('http://www.wikidata.org/entity/')
|
||||||
|
WDT = RDF::Vocabulary.new('http://www.wikidata.org/prop/direct/')
|
||||||
|
RDFS = RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#')
|
||||||
|
|
||||||
|
PREFIXES = { wd: WD, wdt: WDT, rdfs: RDFS }
|
||||||
|
|
||||||
|
def prefixes(*namespaces)
|
||||||
|
namespaces.map do |ns|
|
||||||
|
"@prefix #{ns}: <#{PREFIXES[ns]}> .\n"
|
||||||
|
end.join
|
||||||
|
end
|
||||||
Loading…
Reference in a new issue