added task to pull down all THDL transcripts from orion
This commit is contained in:
parent
f804001124
commit
4f553caf54
1 changed files with 24 additions and 26 deletions
|
@ -44,16 +44,6 @@
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
<target name="archive-get-transcripts" depends="archive-compile">
|
|
||||||
<java classname="org.thdl.archive.GetTranscripts" fork="yes">
|
|
||||||
<arg value="${archive.metadata.file}"/>
|
|
||||||
<arg value="${wylie.transcript.dir}"/>
|
|
||||||
<classpath>
|
|
||||||
<pathelement location="${bin.dir}"/>
|
|
||||||
<path refid="classpath"/>
|
|
||||||
</classpath>
|
|
||||||
</java>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="archive-transcripts-to-unicode">
|
<target name="archive-transcripts-to-unicode">
|
||||||
<java classname="net.sf.saxon.Transform" fork="yes">
|
<java classname="net.sf.saxon.Transform" fork="yes">
|
||||||
|
@ -70,34 +60,42 @@
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- archive tasks -->
|
<!-- archive tasks -->
|
||||||
|
|
||||||
|
<!-- title_metadata.xml is not being retrieved as UTF-8. need to fix this.
|
||||||
|
perhaps we don't need to decodeUTF the transcript file name???
|
||||||
|
-->
|
||||||
<target name="archive-get-metadata">
|
<target name="archive-get-metadata">
|
||||||
<mkdir dir="${archive}"/>
|
<mkdir dir="${archive}"/>
|
||||||
<get src="${get.title.metadata}" dest="${archive}/title_metadata.xml" verbose="on"/>
|
<get src="${get.title.metadata}" dest="${archive}/title_metadata.xml" verbose="on"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="archive-get-transcripts">
|
<target name="archive-get-list-of-transcripts">
|
||||||
<mkdir dir="${wylie}"/>
|
<mkdir dir="${wylie}"/>
|
||||||
<xslt in="${archive}/title_metadata.xml" out="${archive}/titles_as_list.txt" style="${styles}/get-list-of-transcripts.xsl"/>
|
<xslt in="${archive}/title_metadata.xml" out="${archive}/titles_as_list.txt" style="${styles}/get-list-of-transcripts.xsl"/>
|
||||||
<loadfile property="transcript-list" srcFile="${archive}/titles_as_list.txt"/>
|
</target>
|
||||||
<foreach list="${transcript-list}" delimiter=" " param="filename" target="archive-get-one-transcript"/>
|
|
||||||
|
<!-- must delete following transcripts from titles_as_list.txt
|
||||||
|
02069_clip-21-husked-barle_00.xml
|
||||||
|
02116_nasal-congestion_00.xml
|
||||||
|
A_New_Script02.xml
|
||||||
|
|
||||||
|
00007_06-dawa-and-purdrön_08.xml
|
||||||
|
or, should put in protection against URLs that cannot be "got"
|
||||||
|
-->
|
||||||
|
<target name="archive-get-transcripts">
|
||||||
|
<loadfile property="transcript-list" srcFile="${archive}/titles_as_list.txt"/>
|
||||||
|
<!--<loadfile property="transcript-list" srcFile="${archive}/titles_as_list.txt" encoding="UTF-8"/>-->
|
||||||
|
<foreach list="${transcript-list}" delimiter=" " param="filename" target="archive-get-one-transcript"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<!-- problem: destination filename should not be URLEncoded -->
|
||||||
<target name="archive-get-one-transcript">
|
<target name="archive-get-one-transcript">
|
||||||
|
<!--<urlencode property="file.location" value="00007_06-dawa-and-purdrön_08.xml" />
|
||||||
|
<get src="${url.to.transcripts}/${file.location}" dest="${wylie}/TEST.xml"/>-->
|
||||||
<get src="${url.to.transcripts}/${filename}" dest="${wylie}/${filename}"/>
|
<get src="${url.to.transcripts}/${filename}" dest="${wylie}/${filename}"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<!--
|
|
||||||
<filelist
|
|
||||||
id="docfiles"
|
|
||||||
dir="${doc.src}">
|
|
||||||
<file name="foo.xml"/>
|
|
||||||
<file name="bar.xml"/>
|
|
||||||
</filelist>
|
|
||||||
|
|
||||||
<filterreader classname="org.apache.tools.ant.filters.PrefixLines">
|
|
||||||
<param name="prefix" value="Foo"/>
|
|
||||||
</filterreader>
|
|
||||||
-->
|
|
||||||
<!-- concordance program -->
|
<!-- concordance program -->
|
||||||
<target name="lucene-thdl-compile" depends="init">
|
<target name="lucene-thdl-compile" depends="init">
|
||||||
<mkdir dir="${lucene-thdl.bin}"/>
|
<mkdir dir="${lucene-thdl.bin}"/>
|
||||||
|
|
Loading…
Reference in a new issue