112 lines
4.4 KiB
XML
112 lines
4.4 KiB
XML
<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<project name="lucene-thdl" default="lucene-thdl-jar" basedir=".">
|
|
<import file="build.xml"/>
|
|
|
|
<taskdef resource="net/sf/antcontrib/antcontrib.properties">
|
|
<classpath>
|
|
<pathelement location="${ext}/to-be-installed-with-ant/ant-contrib.jar"/>
|
|
</classpath>
|
|
</taskdef>
|
|
|
|
<property name="lucene-thdl.bin" location="${bin}/lucene-thdl"/>
|
|
<property name="archive" location="archive"/>
|
|
<property name="wylie" location="${archive}/wylie"/>
|
|
<property name="unicode" location="${archive}/unicode"/>
|
|
<property name="solr" location="${archive}/solr"/>
|
|
<property name="styles" location="${archive}/styles"/>
|
|
<property name="get.title.metadata" value="http://thdl.org/avarch/mediaflowcat/title_metadata.php"/>
|
|
<property name="url.to.transcripts" value="http://www.thdl.org/avarch/transcripts"/>
|
|
<property name="url.to.media.high" value="http://www.thdl.org/media/high"/>
|
|
<property name="url.to.media.low" value="http://www.thdl.org/media/low"/>
|
|
<property name="url.to.media.audio" value="http://www.thdl.org/media/audio"/>
|
|
|
|
<path id="lucene.classpath">
|
|
<fileset id="lucene.extensions" dir="${ext}/apache">
|
|
<include name="*.jar"/>
|
|
</fileset>
|
|
</path>
|
|
|
|
<!--
|
|
<target name="segment-videos">
|
|
<fileset dir="${segmentation.instructions.dir}" id="segmentation.instructions">
|
|
<include name="**/*.xml"/>
|
|
</fileset>
|
|
<pathconvert pathsep=" " property="about.to.segment" refid="segmentation.instructions"/>
|
|
<java classname="fieldling.quicktime.MovieSegmenter" dir="${segmented.video.dir}" fork="yes">
|
|
<arg line="${about.to.segment}"/>
|
|
<classpath>
|
|
<pathelement location="${bin.dir}"/>
|
|
<path refid="classpath"/>
|
|
</classpath>
|
|
</java>
|
|
</target>
|
|
-->
|
|
|
|
<!--
|
|
|
|
<target name="archive-transcripts-to-unicode">
|
|
<java classname="net.sf.saxon.Transform" fork="yes">
|
|
<arg value="-o"/>
|
|
<arg value="${unicode.transcript.dir}"/>
|
|
<arg value="${wylie.transcript.dir}"/>
|
|
<arg value="${stylesheet.dir}/qdToUnicode.xsl"/>
|
|
<classpath>
|
|
<pathelement location="${bin.dir}"/>
|
|
<path refid="classpath"/>
|
|
</classpath>
|
|
</java>
|
|
</target>
|
|
-->
|
|
|
|
<!-- archive tasks -->
|
|
|
|
<!-- title_metadata.xml is not being retrieved as UTF-8. need to fix this.
|
|
perhaps we don't need to decodeUTF the transcript file name???
|
|
-->
|
|
<target name="archive-get-metadata">
|
|
<mkdir dir="${archive}"/>
|
|
<get src="${get.title.metadata}" dest="${archive}/title_metadata.xml" verbose="on"/>
|
|
</target>
|
|
|
|
<target name="archive-get-list-of-transcripts">
|
|
<mkdir dir="${wylie}"/>
|
|
<xslt in="${archive}/title_metadata.xml" out="${archive}/titles_as_list.txt" style="${styles}/get-list-of-transcripts.xsl"/>
|
|
</target>
|
|
|
|
<!-- must delete following transcripts from titles_as_list.txt
|
|
02069_clip-21-husked-barle_00.xml
|
|
02116_nasal-congestion_00.xml
|
|
A_New_Script02.xml
|
|
|
|
00007_06-dawa-and-purdrön_08.xml
|
|
or, should put in protection against URLs that cannot be "got"
|
|
-->
|
|
<target name="archive-get-transcripts">
|
|
<loadfile property="transcript-list" srcFile="${archive}/titles_as_list.txt"/>
|
|
<!--<loadfile property="transcript-list" srcFile="${archive}/titles_as_list.txt" encoding="UTF-8"/>-->
|
|
<foreach list="${transcript-list}" delimiter=" " param="filename" target="archive-get-one-transcript"/>
|
|
</target>
|
|
|
|
<!-- problem: destination filename should not be URLEncoded -->
|
|
<target name="archive-get-one-transcript">
|
|
<!--<urlencode property="file.location" value="00007_06-dawa-and-purdrön_08.xml" />
|
|
<get src="${url.to.transcripts}/${file.location}" dest="${wylie}/TEST.xml"/>-->
|
|
<get src="${url.to.transcripts}/${filename}" dest="${wylie}/${filename}"/>
|
|
</target>
|
|
|
|
|
|
<!-- concordance program -->
|
|
<target name="lucene-thdl-compile" depends="init">
|
|
<mkdir dir="${lucene-thdl.bin}"/>
|
|
<javac srcdir="${source}" destdir="${lucene-thdl.bin}" includes="org/thdl/lucene/**.java" debug="on">
|
|
<classpath refid="lucene.classpath"/>
|
|
</javac>
|
|
</target>
|
|
|
|
<target name="lucene-thdl-jar" depends="lucene-thdl-compile">
|
|
<jar destfile="${vanillalib}/lucene-thdl.jar" basedir="${lucene-thdl.bin}/"/>
|
|
</target>
|
|
|
|
</project>
|