-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathupdate-subjects-authority-file.xquery
115 lines (101 loc) · 4.83 KB
/
update-subjects-authority-file.xquery
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare option saxon:output "indent=yes";
(: Read authority files :)
declare variable $base := doc("../authority/subjects_base.xml")/tei:TEI/tei:text/tei:body/tei:list/tei:item[@xml:id];
declare variable $additions := doc("../authority/subjects_additions.xml")/tei:TEI/tei:text/tei:body/tei:list/tei:item[@xml:id];
declare variable $currentsubjects := ($base, $additions);
declare variable $collection := collection('../collections/?select=*.xml;recurse=yes');
declare variable $currentkeys := $currentsubjects//@xml:id/data();
declare variable $linebreak := ' ';
declare function local:logging($level, $msg, $values)
{
(: Trick XQuery into doing trace() to output message to STDERR but not insert it into the XML :)
substring(trace('', concat(upper-case($level), ' ', $msg, ' ', string-join($values, ' '), ' ')), 0, 0)
};
declare function local:percentEncode($str as xs:string) as xs:string
{
string-join(for $s in tokenize($str, '%') return encode-for-uri($s), '%')
};
processing-instruction xml-model {'href="http://www.tei-c.org/release/xml/tei/custom/schema/relaxng/tei_all.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"'},
processing-instruction xml-model {'href="http://www.tei-c.org/release/xml/tei/custom/schema/relaxng/tei_all.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"'},
processing-instruction xml-model {'href="authority-schematron.sch" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"'},
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Title</title>
</titleStmt>
<publicationStmt>
<p>Publication Information</p>
</publicationStmt>
<sourceDesc>
<p>Information about the source</p>
</sourceDesc>
</fileDesc>
</teiHeader>
<text>
<body>
<list>
{
let $newlcsh := (
for $s in $collection/tei:TEI[@xml:id]//(tei:term|tei:placeName|tei:settlement|tei:region|tei:country)[matches(@key, 'subject_(sh|n|no)\d+') and not(tokenize(@key, '\s+') = $currentkeys) and string-length(normalize-space(string())) gt 1]
return
<item xml:id="{ $s/@key }">
<term type="display">{ normalize-space(string-join($s//text(), ' ')) }</term>
{
comment{concat(' ../collections/', local:percentEncode(substring-after(base-uri($s), 'collections/')), ' ')}
}
</item>
)
let $dedupednewlcsh := (
for $k in distinct-values($newlcsh/@xml:id/data())
return
<item xml:id="{ $k }">
{
for $n at $pos in distinct-values($newlcsh[@xml:id = $k]/term/text())
return
if ($pos eq 1) then
<term type="display">{ $n }</term>
else
<term type="variant">{ $n }</term>
}
<note type="links">
<list type="links">
<item>
<ref>
{
if (starts-with($k, 'subject_sh')) then
attribute target { concat('https://id.loc.gov/authorities/subjects/', substring-after($k, 'subject_'), '.html') }
else
attribute target { concat('https://id.loc.gov/authorities/names/', substring-after($k, 'subject_'), '.html') }
}
<title>LC</title>
</ref>
</item>
</list>
</note>
{
for $c in distinct-values($newlcsh[@xml:id = $k]/comment())
order by $c
return comment{ $c }
}
</item>
)
let $lcshfrompreviousrun := $additions[matches(@xml:id, 'subject_(sh|n|no)\d+')]
(: Output the new _additions authority file :)
return (
$linebreak,
<item>{ comment{' Dummy subject, just so this file validates, do not delete '} }<term type="display"/></item>,
$linebreak,
$linebreak,
comment{' TODO: Review the following entries, update their key attributes in the TEI files, add corresp attributes pointing to related terms, then cut and paste them into subjects_base.xml '},
$linebreak,
for $e in ($dedupednewlcsh, $lcshfrompreviousrun) order by $e/term[@type='display']/text() return ($e, $linebreak),
$linebreak,
$linebreak
)
}
</list>
</body>
</text>
</TEI>