migrants/Rakefile
Daniel Hernandez b4ed3cbde7 Add step-07: clean up secondary organisations and simplify person-profession
- Remove redundant tm:secondaryOrganisation when it equals org:organization (230)
- Promote tm:secondaryOrganisation to org:organization when no primary exists (256)
- Replace tm:PersonProfession intermediate class with direct schema:hasOccupation
  links from persons to schema:Occupation instances (742 instances removed)
- Remove tm:PersonProfession class and its properties from the ontology
- Add step-07 binary, Rakefile task, figures, and documentation
2026-03-01 15:48:51 +01:00

211 lines
7.3 KiB
Ruby

RUBY = RbConfig.ruby
def run(script)
sh "#{RUBY} #{script}"
end
# ── Individual file tasks ────────────────────────────────────────────────────
file 'data/countries.ttl' => 'src/map_countries_from_location.rb' do
run 'src/map_countries_from_location.rb'
end
# Generates data/countries_wikidata_review.ttl as a side-effect
file 'data/countries_wikidata.ttl' => ['data/countries.ttl', 'src/link_countries_to_wikidata.rb'] do
run 'src/link_countries_to_wikidata.rb'
end
file 'data/locations.ttl' => 'src/map_locations.rb' do
run 'src/map_locations.rb'
end
file 'data/migrations.ttl' => 'src/map_migration_table.rb' do
run 'src/map_migration_table.rb'
end
file 'data/occupations.ttl' => 'src/map_occupations_from_person_profession.rb' do
run 'src/map_occupations_from_person_profession.rb'
end
file 'data/organisations.ttl' => 'src/map_organisation.rb' do
run 'src/map_organisation.rb'
end
file 'data/personnames.ttl' => 'src/map_personnames.rb' do
run 'src/map_personnames.rb'
end
# Generates data/persons_occupations_review.ttl as a side-effect
file 'data/persons_occupations.ttl' => ['data/occupations.ttl', 'src/map_person_profession.rb'] do
run 'src/map_person_profession.rb'
end
file 'data/persons.ttl' => 'src/map_person.rb' do
run 'src/map_person.rb'
end
file 'data/relationships.ttl' => 'src/map_relationship.rb' do
run 'src/map_relationship.rb'
end
file 'data/persons_religions.ttl' => 'src/map_religions.rb' do
run 'src/map_religions.rb'
end
# Generates data/religions_wikidata_review.ttl as a side-effect
file 'data/religions_wikidata.ttl' => ['data/religions.ttl', 'src/link_religions_to_wikidata.rb'] do
run 'src/link_religions_to_wikidata.rb'
end
file 'data/works.ttl' => 'src/map_work.rb' do
run 'src/map_work.rb'
end
# ── Pipeline steps ───────────────────────────────────────────────────────────
file 'data/graph-01.ttl' => 'map/step-01.rb' do
run 'map/step-01.rb'
end
UPDATE_QUERIES = FileList['updates/*.rq']
file 'data/graph-02.ttl' => ['data/graph-01.ttl', 'map/step-02.rb'] + UPDATE_QUERIES do
run 'map/step-02.rb'
end
UPDATE_QUERIES_STEP03 = FileList['updates_step03/*.rq']
file 'data/graph-03.ttl' => ['data/graph-02.ttl'] + UPDATE_QUERIES_STEP03 do
sh 'step-03'
end
UPDATE_QUERIES_STEP04 = FileList['updates_step04/*.rq']
file 'data/graph-04.ttl' => ['data/graph-03.ttl'] + UPDATE_QUERIES_STEP04 do
sh 'step-04'
end
UPDATE_QUERIES_STEP05 = FileList['updates_step05/*.rq']
file 'data/graph-05.ttl' => ['data/graph-04.ttl'] + UPDATE_QUERIES_STEP05 do
sh 'step-05'
end
CONSTRUCT_QUERIES_STEP06 = FileList['constructs_step06/*.rq']
file 'data/graph-06.ttl' => ['data/graph-05.ttl'] + CONSTRUCT_QUERIES_STEP06 do
sh 'step-06'
end
UPDATE_QUERIES_STEP07 = FileList['updates_step07/*.rq']
file 'data/graph-07.ttl' => ['data/graph-06.ttl'] + UPDATE_QUERIES_STEP07 do
sh 'step-07'
end
# ── Examples ─────────────────────────────────────────────────────────────────
SPARQL = File.expand_path('~/.cargo/bin/sparql')
file 'data_examples/step_01_1hop.ttl' => ['data/graph-01.ttl', 'queries/step_01_1hop_example.rq'] do
sh "#{SPARQL} queries/step_01_1hop_example.rq --graph data/graph-01.ttl --prettify > data_examples/step_01_1hop.ttl"
end
file 'data_examples/step_01_2hop.ttl' => ['data/graph-02.ttl', 'queries/step_01_2hop_example.rq'] do
sh "#{SPARQL} queries/step_01_2hop_example.rq --graph data/graph-02.ttl --prettify > data_examples/step_01_2hop.ttl"
end
file 'data_examples/step_02_1hop.ttl' => ['data/graph-02.ttl', 'queries/step_02_1hop_example.rq'] do
sh "#{SPARQL} queries/step_02_1hop_example.rq --graph data/graph-02.ttl --prettify > data_examples/step_02_1hop.ttl"
end
file 'data_examples/step_02_2hop.ttl' => ['data/graph-02.ttl', 'queries/step_02_2hop_example.rq'] do
sh "#{SPARQL} queries/step_02_2hop_example.rq --graph data/graph-02.ttl --prettify > data_examples/step_02_2hop.ttl"
end
file 'data_examples/step_03_1hop.ttl' => ['data/graph-03.ttl', 'queries/step_03_1hop_example.rq'] do
sh "#{SPARQL} queries/step_03_1hop_example.rq --graph data/graph-03.ttl --prettify > data_examples/step_03_1hop.ttl"
end
file 'data_examples/step_03_2hop.ttl' => ['data/graph-03.ttl', 'queries/step_03_2hop_example.rq'] do
sh "#{SPARQL} queries/step_03_2hop_example.rq --graph data/graph-03.ttl --prettify > data_examples/step_03_2hop.ttl"
end
file 'data_examples/step_04_1hop.ttl' => ['data/graph-04.ttl', 'queries/step_04_1hop_example.rq'] do
sh "#{SPARQL} queries/step_04_1hop_example.rq --graph data/graph-04.ttl --prettify > data_examples/step_04_1hop.ttl"
end
file 'data_examples/step_04_2hop.ttl' => ['data/graph-04.ttl', 'queries/step_04_2hop_example.rq'] do
sh "#{SPARQL} queries/step_04_2hop_example.rq --graph data/graph-04.ttl --prettify > data_examples/step_04_2hop.ttl"
end
file 'data_examples/step_05_1hop.ttl' => ['data/graph-05.ttl', 'queries/step_05_1hop_example.rq'] do
sh "#{SPARQL} queries/step_05_1hop_example.rq --graph data/graph-05.ttl --prettify > data_examples/step_05_1hop.ttl"
end
file 'data_examples/step_05_2hop.ttl' => ['data/graph-05.ttl', 'queries/step_05_2hop_example.rq'] do
sh "#{SPARQL} queries/step_05_2hop_example.rq --graph data/graph-05.ttl --prettify > data_examples/step_05_2hop.ttl"
end
file 'data_examples/step_06_1hop.ttl' => ['data/graph-06.ttl', 'queries/step_06_1hop_example.rq'] do
sh "#{SPARQL} queries/step_06_1hop_example.rq --graph data/graph-06.ttl --prettify > data_examples/step_06_1hop.ttl"
end
file 'data_examples/step_06_2hop.ttl' => ['data/graph-06.ttl', 'queries/step_06_2hop_example.rq'] do
sh "#{SPARQL} queries/step_06_2hop_example.rq --graph data/graph-06.ttl --prettify > data_examples/step_06_2hop.ttl"
end
# ── Aggregate tasks ──────────────────────────────────────────────────────────
GENERATED = %w[
data/countries.ttl
data/countries_wikidata.ttl
data/locations.ttl
data/migrations.ttl
data/occupations.ttl
data/organisations.ttl
data/personnames.ttl
data/persons_occupations.ttl
data/persons.ttl
data/relationships.ttl
data/persons_religions.ttl
data/religions_wikidata.ttl
data/works.ttl
].freeze
GRAPHS = %w[
data/graph-01.ttl
data/graph-02.ttl
data/graph-03.ttl
data/graph-04.ttl
data/graph-05.ttl
data/graph-06.ttl
data/graph-07.ttl
].freeze
EXAMPLES = %w[
data_examples/step_01_1hop.ttl
data_examples/step_01_2hop.ttl
data_examples/step_02_1hop.ttl
data_examples/step_02_2hop.ttl
data_examples/step_03_1hop.ttl
data_examples/step_03_2hop.ttl
data_examples/step_04_1hop.ttl
data_examples/step_04_2hop.ttl
data_examples/step_05_1hop.ttl
data_examples/step_05_2hop.ttl
data_examples/step_06_1hop.ttl
data_examples/step_06_2hop.ttl
].freeze
task examples: EXAMPLES
task default: GENERATED + GRAPHS + EXAMPLES
task :clean do
review_files = %w[
data/countries_wikidata_review.ttl
data/religions_wikidata_review.ttl
]
rm_f GENERATED + GRAPHS + EXAMPLES + review_files
end