Add step-08: merge migration reasons and refactor temporal properties

Replace tm:reason and tm:secondaryReason with a single tm:hasReason
property (937 triples). Refactor 6 flat date properties into structured
tm:uncertainBeginning/tm:uncertainEnd intervals using W3C OWL-Time,
introducing tm:FuzzyInterval as a superclass of tm:Migration,
org:Membership, tm:Relationship, tm:PersonName, and
tm:ReligionAffiliation. Output: data/graph-08.ttl (218,251 triples).
This commit is contained in:
Daniel Hernandez 2026-03-01 17:41:50 +01:00
parent b4ed3cbde7
commit bb78b8a758
14 changed files with 216926 additions and 46 deletions

View file

@ -31,6 +31,10 @@ path = "src/map/step_06.rs"
name = "step-07"
path = "src/map/step_07.rs"
[[bin]]
name = "step-08"
path = "src/map/step_08.rs"
[dependencies]
sophia = "0.9"
oxigraph = "*"

View file

@ -270,3 +270,36 @@ To run:
```sh
cargo run --release --bin step-07
```
### Step 8 - Merge migration reasons and refactor temporal properties
#### Task
Two structural changes are applied to the graph produced by Step 7:
**Merge migration reasons.** The functional properties `tm:reason` (774 uses) and `tm:secondaryReason` (163 uses) are replaced by a single non-functional property `tm:hasReason`, resulting in 937 reason triples.
**Refactor temporal properties into tm:FuzzyInterval.** Six flat date properties (`tm:dateStartMin`, `tm:dateStartMax`, `tm:dateEndMin`, `tm:dateEndMax`, `tm:dateStartFuzzy`, `tm:dateEndFuzzy`) are replaced by a structured model based on W3C OWL-Time. A new class `tm:FuzzyInterval` (subclass of `time:TemporalEntity`) is introduced, with two object properties `tm:uncertainBeginning` and `tm:uncertainEnd` pointing to `time:DateTimeInterval` resources. Each interval has `time:hasBeginning` and `time:hasEnd` linking to `time:Instant` nodes with `time:inXSDDate` values, plus an optional `rdfs:label` for fuzzy date strings. Five classes are declared as subclasses of `tm:FuzzyInterval`: `tm:Migration`, `org:Membership`, `tm:Relationship`, `tm:PersonName`, `tm:ReligionAffiliation`.
#### Summary
8 SPARQL UPDATE queries in `updates_step08/`:
| Query | Description |
|-------|-------------|
| 001 | Merge `tm:reason` and `tm:secondaryReason` into `tm:hasReason` |
| 002a | Create `tm:uncertainBeginning` interval from `tm:dateStartMin` |
| 002b | Add upper bound to `tm:uncertainBeginning` from `tm:dateStartMax` |
| 003a | Create `tm:uncertainEnd` interval from `tm:dateEndMin` |
| 003b | Add upper bound to `tm:uncertainEnd` from `tm:dateEndMax` |
| 004a | Add `rdfs:label` on `uncertainBeginning` from `tm:dateStartFuzzy` |
| 004b | Add `rdfs:label` on `uncertainEnd` from `tm:dateEndFuzzy` |
| 005 | Remove all 6 old date properties |
The program `src/map/step_08.rs` loads `data/graph-07.ttl`, applies all queries, and writes `data/graph-08.ttl`.
To run:
```sh
cargo run --release --bin step-08
```

View file

@ -103,6 +103,12 @@ file 'data/graph-07.ttl' => ['data/graph-06.ttl'] + UPDATE_QUERIES_STEP07 do
sh 'step-07'
end
UPDATE_QUERIES_STEP08 = FileList['updates_step08/*.rq']
file 'data/graph-08.ttl' => ['data/graph-07.ttl'] + UPDATE_QUERIES_STEP08 do
sh 'step-08'
end
# ── Examples ─────────────────────────────────────────────────────────────────
SPARQL = File.expand_path('~/.cargo/bin/sparql')
@ -181,6 +187,7 @@ GRAPHS = %w[
data/graph-05.ttl
data/graph-06.ttl
data/graph-07.ttl
data/graph-08.ttl
].freeze
EXAMPLES = %w[

216588
data/graph-08.ttl Normal file

File diff suppressed because it is too large Load diff

84
src/map/step_08.rs Normal file
View file

@ -0,0 +1,84 @@
/// Step 8: Merge migration reasons and refactor temporal properties.
///
/// Loads `data/graph-07.ttl`, applies all SPARQL UPDATE queries from the
/// `updates_step08/` directory (sorted alphabetically), and writes the
/// result to `data/graph-08.ttl`.
///
/// Usage: Run from the mapping project directory:
/// cargo run --release --bin step-08
use std::fs;
use oxigraph::io::{RdfFormat, RdfParser};
use oxigraph::model::GraphNameRef;
use oxigraph::store::Store;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let input_path = "data/graph-07.ttl";
let output_path = "data/graph-08.ttl";
let updates_dir = "updates_step08";
// Create in-memory store and load input graph
let store = Store::new()?;
eprintln!("Loading graph from {}...", input_path);
let input = fs::File::open(input_path)?;
let reader = std::io::BufReader::new(input);
let parser = RdfParser::from_format(RdfFormat::Turtle)
.without_named_graphs()
.with_default_graph(GraphNameRef::DefaultGraph);
store.load_from_reader(parser, reader)?;
let initial_count = count_triples(&store);
eprintln!("Loaded {} triples.", initial_count);
// Read and sort SPARQL UPDATE files
let mut update_files: Vec<_> = fs::read_dir(updates_dir)?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.extension()
.and_then(|e| e.to_str())
.map_or(false, |e| e == "rq")
})
.collect();
update_files.sort();
// Apply each SPARQL UPDATE query
for query_file in &update_files {
let query = fs::read_to_string(query_file)?;
let name = query_file
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
let before = count_triples(&store);
store.update(&query)?;
let after = count_triples(&store);
let diff = after as i64 - before as i64;
let sign = if diff >= 0 { "+" } else { "" };
eprintln!(
"Applied {}: {} -> {} triples ({}{})",
name, before, after, sign, diff
);
}
let final_count = count_triples(&store);
eprintln!("Writing {} triples to {}...", final_count, output_path);
// Dump store to Turtle
fs::create_dir_all("data")?;
let output = fs::File::create(output_path)?;
let writer = std::io::BufWriter::new(output);
store.dump_graph_to_writer(GraphNameRef::DefaultGraph, RdfFormat::Turtle, writer)?;
eprintln!("Done.");
Ok(())
}
fn count_triples(store: &Store) -> usize {
store
.quads_for_pattern(None, None, None, None)
.count()
}

View file

@ -7,6 +7,7 @@
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix schema: <https://schema.org/> .
@prefix org: <http://www.w3.org/ns/org#> .
@prefix time: <http://www.w3.org/2006/time#> .
@prefix tm: <https://daniel.degu.cl/ontologies/theatre-migrants/> .
# =============================================================================
@ -19,6 +20,7 @@
dct:title "Theatre Migrants Ontology"@en ;
dct:description "A domain ontology extending Schema.org and the W3C Organization Ontology with custom classes and properties for modeling migrations, organizational memberships, interpersonal relationships, religion affiliations, and temporal uncertainty in the context of European theatre history."@en ;
owl:imports <http://www.w3.org/ns/org#> ;
owl:imports <http://www.w3.org/2006/time#> ;
dct:created "2025-01-01"^^xsd:date ;
dct:license <https://creativecommons.org/licenses/by/4.0/> ;
dct:creator <https://orcid.org/0000-0002-7896-0875> ;
@ -49,22 +51,32 @@
# Classes — domain-specific
# =============================================================================
tm:FuzzyInterval a owl:Class ;
rdfs:subClassOf time:TemporalEntity ;
rdfs:label "FuzzyInterval"@en ;
rdfs:comment "An entity with uncertain temporal bounds, modeled via tm:uncertainBeginning and tm:uncertainEnd pointing to time:DateTimeInterval resources."@en ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:Migration a owl:Class ;
rdfs:subClassOf tm:FuzzyInterval ;
rdfs:label "Migration"@en ;
rdfs:comment "A migration event recording the movement of a person from one place to another."@en ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:Relationship a owl:Class ;
rdfs:subClassOf tm:FuzzyInterval ;
rdfs:label "Relationship"@en ;
rdfs:comment "An interpersonal relationship between two persons."@en ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:PersonName a owl:Class ;
rdfs:subClassOf tm:FuzzyInterval ;
rdfs:label "PersonName"@en ;
rdfs:comment "An alternative or historical name for a person, optionally typed and time-bounded."@en ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:ReligionAffiliation a owl:Class ;
rdfs:subClassOf tm:FuzzyInterval ;
rdfs:label "ReligionAffiliation"@en ;
rdfs:comment "A person's affiliation with a religion during a given period."@en ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
@ -199,16 +211,9 @@ tm:destinationPlace a owl:ObjectProperty ;
rdfs:range schema:Place ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:reason a owl:ObjectProperty ;
rdfs:label "reason"@en ;
rdfs:comment "The primary reason for this migration."@en ;
rdfs:domain tm:Migration ;
rdfs:range tm:MigrationReason ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:secondaryReason a owl:ObjectProperty ;
rdfs:label "secondaryReason"@en ;
rdfs:comment "A secondary reason for this migration."@en ;
tm:hasReason a owl:ObjectProperty ;
rdfs:label "hasReason"@en ;
rdfs:comment "A reason for this migration."@en ;
rdfs:domain tm:Migration ;
rdfs:range tm:MigrationReason ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
@ -222,6 +227,8 @@ tm:secondaryReason a owl:ObjectProperty ;
# domain-specific information.
# =============================================================================
org:Membership rdfs:subClassOf tm:FuzzyInterval .
tm:workLocation a owl:ObjectProperty ;
rdfs:label "workLocation"@en ;
rdfs:comment "The place where this work engagement took place."@en ;
@ -409,47 +416,25 @@ tm:geoNamesID a owl:DatatypeProperty ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
# =============================================================================
# Datatype properties — shared temporal properties
# Object properties — shared temporal properties (FuzzyInterval)
#
# The following properties model temporal uncertainty. Events (Migration,
# Work, Relationship, PersonName, ReligionAffiliation) may have imprecise
# dates expressed as min/max bounds or fuzzy strings.
# Temporal uncertainty is modeled using tm:uncertainBeginning and
# tm:uncertainEnd, each pointing to a time:DateTimeInterval with
# time:hasBeginning/time:hasEnd pointing to time:Instant nodes.
# =============================================================================
tm:dateStartMin a owl:DatatypeProperty ;
rdfs:label "dateStartMin"@en ;
rdfs:comment "Lower bound of the start date."@en ;
rdfs:range xsd:date ;
tm:uncertainBeginning a owl:ObjectProperty ;
rdfs:label "uncertainBeginning"@en ;
rdfs:comment "The uncertain beginning of this entity, expressed as a time:DateTimeInterval."@en ;
rdfs:domain tm:FuzzyInterval ;
rdfs:range time:DateTimeInterval ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:dateStartMax a owl:DatatypeProperty ;
rdfs:label "dateStartMax"@en ;
rdfs:comment "Upper bound of the start date."@en ;
rdfs:range xsd:date ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:dateEndMin a owl:DatatypeProperty ;
rdfs:label "dateEndMin"@en ;
rdfs:comment "Lower bound of the end date."@en ;
rdfs:range xsd:date ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:dateEndMax a owl:DatatypeProperty ;
rdfs:label "dateEndMax"@en ;
rdfs:comment "Upper bound of the end date."@en ;
rdfs:range xsd:date ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:dateStartFuzzy a owl:DatatypeProperty ;
rdfs:label "dateStartFuzzy"@en ;
rdfs:comment "Free-text representation of an uncertain start date."@en ;
rdfs:range xsd:string ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
tm:dateEndFuzzy a owl:DatatypeProperty ;
rdfs:label "dateEndFuzzy"@en ;
rdfs:comment "Free-text representation of an uncertain end date."@en ;
rdfs:range xsd:string ;
tm:uncertainEnd a owl:ObjectProperty ;
rdfs:label "uncertainEnd"@en ;
rdfs:comment "The uncertain end of this entity, expressed as a time:DateTimeInterval."@en ;
rdfs:domain tm:FuzzyInterval ;
rdfs:range time:DateTimeInterval ;
rdfs:isDefinedBy <https://daniel.degu.cl/ontologies/theatre-migrants/> .
# =============================================================================

View file

@ -0,0 +1,25 @@
# Merge tm:reason and tm:secondaryReason into a single tm:hasReason property.
# tm:reason affects 774 migrations; tm:secondaryReason affects 163 migrations.
# Total tm:hasReason after merge: 937.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
DELETE {
?s tm:reason ?r .
}
INSERT {
?s tm:hasReason ?r .
}
WHERE {
?s tm:reason ?r .
}
;
DELETE {
?s tm:secondaryReason ?r .
}
INSERT {
?s tm:hasReason ?r .
}
WHERE {
?s tm:secondaryReason ?r .
}

View file

@ -0,0 +1,19 @@
# Create tm:uncertainBeginning → time:DateTimeInterval → time:hasBeginning →
# time:Instant with time:inXSDDate from tm:dateStartMin.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
PREFIX time: <http://www.w3.org/2006/time#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT {
?entity tm:uncertainBeginning ?interval .
?interval rdf:type time:DateTimeInterval .
?interval time:hasBeginning ?instant .
?instant rdf:type time:Instant .
?instant time:inXSDDate ?date .
}
WHERE {
?entity tm:dateStartMin ?date .
BIND(IRI(CONCAT(STR(?entity), "/uncertainBeginning")) AS ?interval)
BIND(IRI(CONCAT(STR(?entity), "/uncertainBeginning/begin")) AS ?instant)
}

View file

@ -0,0 +1,19 @@
# Create tm:uncertainBeginning → time:DateTimeInterval → time:hasEnd →
# time:Instant with time:inXSDDate from tm:dateStartMax.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
PREFIX time: <http://www.w3.org/2006/time#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT {
?entity tm:uncertainBeginning ?interval .
?interval rdf:type time:DateTimeInterval .
?interval time:hasEnd ?instant .
?instant rdf:type time:Instant .
?instant time:inXSDDate ?date .
}
WHERE {
?entity tm:dateStartMax ?date .
BIND(IRI(CONCAT(STR(?entity), "/uncertainBeginning")) AS ?interval)
BIND(IRI(CONCAT(STR(?entity), "/uncertainBeginning/end")) AS ?instant)
}

View file

@ -0,0 +1,19 @@
# Create tm:uncertainEnd → time:DateTimeInterval → time:hasBeginning →
# time:Instant with time:inXSDDate from tm:dateEndMin.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
PREFIX time: <http://www.w3.org/2006/time#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT {
?entity tm:uncertainEnd ?interval .
?interval rdf:type time:DateTimeInterval .
?interval time:hasBeginning ?instant .
?instant rdf:type time:Instant .
?instant time:inXSDDate ?date .
}
WHERE {
?entity tm:dateEndMin ?date .
BIND(IRI(CONCAT(STR(?entity), "/uncertainEnd")) AS ?interval)
BIND(IRI(CONCAT(STR(?entity), "/uncertainEnd/begin")) AS ?instant)
}

View file

@ -0,0 +1,19 @@
# Create tm:uncertainEnd → time:DateTimeInterval → time:hasEnd →
# time:Instant with time:inXSDDate from tm:dateEndMax.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
PREFIX time: <http://www.w3.org/2006/time#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT {
?entity tm:uncertainEnd ?interval .
?interval rdf:type time:DateTimeInterval .
?interval time:hasEnd ?instant .
?instant rdf:type time:Instant .
?instant time:inXSDDate ?date .
}
WHERE {
?entity tm:dateEndMax ?date .
BIND(IRI(CONCAT(STR(?entity), "/uncertainEnd")) AS ?interval)
BIND(IRI(CONCAT(STR(?entity), "/uncertainEnd/end")) AS ?instant)
}

View file

@ -0,0 +1,16 @@
# Add rdfs:label on the uncertainBeginning interval from tm:dateStartFuzzy.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX time: <http://www.w3.org/2006/time#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT {
?entity tm:uncertainBeginning ?interval .
?interval rdf:type time:DateTimeInterval .
?interval rdfs:label ?label .
}
WHERE {
?entity tm:dateStartFuzzy ?label .
BIND(IRI(CONCAT(STR(?entity), "/uncertainBeginning")) AS ?interval)
}

View file

@ -0,0 +1,16 @@
# Add rdfs:label on the uncertainEnd interval from tm:dateEndFuzzy.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX time: <http://www.w3.org/2006/time#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT {
?entity tm:uncertainEnd ?interval .
?interval rdf:type time:DateTimeInterval .
?interval rdfs:label ?label .
}
WHERE {
?entity tm:dateEndFuzzy ?label .
BIND(IRI(CONCAT(STR(?entity), "/uncertainEnd")) AS ?interval)
}

View file

@ -0,0 +1,46 @@
# Remove all 6 old date properties now that their values have been
# restructured into tm:uncertainBeginning / tm:uncertainEnd intervals.
PREFIX tm: <https://daniel.degu.cl/ontologies/theatre-migrants/>
DELETE {
?s tm:dateStartMin ?o .
}
WHERE {
?s tm:dateStartMin ?o .
}
;
DELETE {
?s tm:dateStartMax ?o .
}
WHERE {
?s tm:dateStartMax ?o .
}
;
DELETE {
?s tm:dateEndMin ?o .
}
WHERE {
?s tm:dateEndMin ?o .
}
;
DELETE {
?s tm:dateEndMax ?o .
}
WHERE {
?s tm:dateEndMax ?o .
}
;
DELETE {
?s tm:dateStartFuzzy ?o .
}
WHERE {
?s tm:dateStartFuzzy ?o .
}
;
DELETE {
?s tm:dateEndFuzzy ?o .
}
WHERE {
?s tm:dateEndFuzzy ?o .
}