replaced solr version 1.1 with 1.2
This commit is contained in:
parent
0addc3c957
commit
6327403a3f
12 changed files with 116 additions and 163 deletions
|
@ -2,9 +2,10 @@ The files in this directory must be copied to your solr webapp
|
||||||
before you can use lucene-thdl-build.xml to post, commit, or
|
before you can use lucene-thdl-build.xml to post, commit, or
|
||||||
delete documents from your solr server.
|
delete documents from your solr server.
|
||||||
|
|
||||||
schema.xml :
|
schema.xml & solrconfig.xml:
|
||||||
Copy this file to your solr/conf directory,
|
Copy these files to your solr/conf directory,
|
||||||
replacing the existing schema.xml file.
|
replacing the existing files with these names
|
||||||
|
|
||||||
|
|
||||||
lucene-thdl.jar :
|
lucene-thdl.jar :
|
||||||
Create an up to date copy of this file by
|
Create an up to date copy of this file by
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
http://wiki.apache.org/solr/SchemaXml
|
http://wiki.apache.org/solr/SchemaXml
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<schema name="THDL" version="1.1">
|
<schema name="THDL Archive" version="1.1">
|
||||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||||
Applications should change this to reflect the nature of the search collection.
|
Applications should change this to reflect the nature of the search collection.
|
||||||
version="1.1" is Solr's version number for the schema syntax and semantics. It should
|
version="1.1" is Solr's version number for the schema syntax and semantics. It should
|
||||||
|
@ -158,10 +158,15 @@
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
|
||||||
<fieldtype name="text_wylie" class="solr.TextField">
|
<fieldtype name="text_wylie" class="solr.TextField">
|
||||||
<analyzer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<!-- <analyzer class="org.thdl.lucene.WylieTibetanAnalyzer"/> -->
|
<!-- <analyzer class="org.thdl.lucene.WylieTibetanAnalyzer"/> -->
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
|
||||||
|
<fieldtype name="text_idlist" class="solr.TextField">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<!-- <filter class="org.thdl.lucene.NumberPadderFactory"/> -->
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
</types>
|
</types>
|
||||||
|
|
||||||
|
|
||||||
|
@ -183,6 +188,7 @@
|
||||||
|
|
||||||
<!-- transcript documents -->
|
<!-- transcript documents -->
|
||||||
<field name="id" type="string" indexed="true" stored="true"/>
|
<field name="id" type="string" indexed="true" stored="true"/>
|
||||||
|
<field name="form_bo" type="text_tibetan" indexed="true" stored="true"/>
|
||||||
|
|
||||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||||
will be used if the name matches any of the patterns.
|
will be used if the name matches any of the patterns.
|
||||||
|
@ -207,7 +213,7 @@
|
||||||
<dynamicField name="*_bo" type="text_tibetan" indexed="true" stored="true"/>
|
<dynamicField name="*_bo" type="text_tibetan" indexed="true" stored="true"/>
|
||||||
|
|
||||||
<!-- suffix for any field containing Tibetan transliterated into Wylie -->
|
<!-- suffix for any field containing Tibetan transliterated into Wylie -->
|
||||||
<dynamicField name="*_bo-Latn" type="text_wylie" indexed="true" stored="true"/>
|
<dynamicField name="*_bo-Latn" type="text_ws" indexed="true" stored="true"/>
|
||||||
|
|
||||||
<!-- suffix for any field containing English language content -->
|
<!-- suffix for any field containing English language content -->
|
||||||
<dynamicField name="*_en" type="text_lu" indexed="true" stored="true"/>
|
<dynamicField name="*_en" type="text_lu" indexed="true" stored="true"/>
|
||||||
|
@ -216,11 +222,8 @@
|
||||||
<dynamicField name="*_zh" type="string" indexed="true" stored="true"/>
|
<dynamicField name="*_zh" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
<dynamicField name="*_idref" type="string" indexed="true" stored="false"/>
|
<dynamicField name="*_idref" type="string" indexed="true" stored="false"/>
|
||||||
<dynamicField name="*_opt" type="string" indexed="true" stored="true"/>
|
<dynamicField name="*_idlist" type="text_ws" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_lang" type="string" indexed="true" stored="true"/>
|
<dynamicField name="*_lang" type="string" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_size" type="sint" indexed="true" stored="true"/>
|
|
||||||
<dynamicField name="*_name" type="string" indexed="true" stored="true"/>
|
|
||||||
<dynamicField name="*_duration" type="date" indexed="true" stored="true"/>
|
|
||||||
<dynamicField name="*_filename" type="string" indexed="true" stored="true"/>
|
<dynamicField name="*_filename" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
</fields>
|
</fields>
|
||||||
|
|
|
@ -17,8 +17,7 @@
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<xsl:for-each select="//transcript">
|
<xsl:for-each select="//transcript">
|
||||||
<xsl:variable name="filename" select="."/>
|
<xsl:variable name="filename" select="."/>
|
||||||
<!-- <xsl:variable name="filename" select="encoder:encode(.,'UTF-8')"/> -->
|
<xsl:result-document href="Transcript_{../@id}.xml" format="unicode.transcript.with.metadata">
|
||||||
<xsl:result-document href="{$filename}" format="unicode.transcript.with.metadata">
|
|
||||||
<xsl:element name="TITLE">
|
<xsl:element name="TITLE">
|
||||||
<xsl:attribute name="id">
|
<xsl:attribute name="id">
|
||||||
<xsl:value-of select="../@id"/>
|
<xsl:value-of select="../@id"/>
|
||||||
|
|
|
@ -5,11 +5,7 @@
|
||||||
|
|
||||||
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
|
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
|
||||||
|
|
||||||
<xsl:param name="TITLE_TYPE" select="'AVDB_TITLE'"/>
|
<xsl:import href="solarizeConstantsForImport.xsl"/>
|
||||||
<xsl:param name="VIDEO_TYPE" select="'VIDEO'"/>
|
|
||||||
<xsl:param name="TRANSCRIPT_FRAGMENT_TYPE" select="'TRANSCRIPT_FRAGMENT'"/>
|
|
||||||
<xsl:param name="DURATION_PREFIX" select="'1970-01-01T'"/>
|
|
||||||
<xsl:param name="DURATION_SUFFIX" select="'Z'"/>
|
|
||||||
|
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<xsl:apply-templates select="TITLE"/>
|
<xsl:apply-templates select="TITLE"/>
|
||||||
|
@ -23,49 +19,55 @@
|
||||||
</xsl:apply-templates>
|
</xsl:apply-templates>
|
||||||
<xsl:apply-templates select="TEXT/S">
|
<xsl:apply-templates select="TEXT/S">
|
||||||
<xsl:with-param name="title.id" select="$title.id"/>
|
<xsl:with-param name="title.id" select="$title.id"/>
|
||||||
|
<xsl:with-param name="belongs.to" select="METADATA/belongsTo"/>
|
||||||
</xsl:apply-templates>
|
</xsl:apply-templates>
|
||||||
</add>
|
</add>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
|
<!-- should we also include transcript and video ids? -->
|
||||||
<xsl:template match="METADATA">
|
<xsl:template match="METADATA">
|
||||||
<xsl:param name="title.id" select="''"/>
|
<xsl:param name="title.id" select="''"/>
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id"><xsl:value-of select="$title.id"/></field>
|
<field name="id"><xsl:value-of select="$title.id"/></field>
|
||||||
<field name="thdlType_opt"><xsl:value-of select="$TITLE_TYPE"/></field>
|
<field name="thdlType_s"><xsl:value-of select="$TITLE_TYPE"/></field>
|
||||||
<field name="speechType_opt"><xsl:value-of select="speechType"/></field>
|
<field name="belongsTo_idlist"><xsl:value-of select="belongsTo"/></field>
|
||||||
|
<field name="speechType_s"><xsl:value-of select="speechType"/></field>
|
||||||
<field name="language_lang"><xsl:value-of select="language"/></field>
|
<field name="language_lang"><xsl:value-of select="language"/></field>
|
||||||
<field name="administrativeLocation_opt"><xsl:value-of select="administrativeLocation"/></field>
|
<field name="administrativeLocation_s"><xsl:value-of select="administrativeLocation"/></field>
|
||||||
<field name="culturalRegion_opt"><xsl:value-of select="culturalRegion"/></field>
|
<field name="culturalRegion_s"><xsl:value-of select="culturalRegion"/></field>
|
||||||
<field name="title_en"><xsl:value-of select="name"/></field>
|
<field name="title_en"><xsl:value-of select="name"/></field>
|
||||||
<field name="caption_en"><xsl:value-of select="caption"/></field>
|
<field name="caption_en"><xsl:value-of select="caption"/></field>
|
||||||
<!-- should we also include transcript and video ids? -->
|
|
||||||
<field name="transcript_filename"><xsl:value-of select="transcript"/></field>
|
<field name="transcript_filename"><xsl:value-of select="transcript"/></field>
|
||||||
<xsl:for-each select="video">
|
<xsl:variable name="video.ids">
|
||||||
<xsl:choose>
|
<xsl:call-template name="getVideoList">
|
||||||
<xsl:when test="mediaDescription='Audio'">
|
<xsl:with-param name="metadata" select="."/>
|
||||||
<field name="audio_size"><xsl:value-of select="size"/></field>
|
</xsl:call-template>
|
||||||
<field name="audio_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
</xsl:variable>
|
||||||
<field name="audio_filename"><xsl:value-of select="name"/></field>
|
<xsl:if test="normalize-space($video.ids)">
|
||||||
</xsl:when>
|
<field name="videos_idlist"><xsl:value-of select="normalize-space($video.ids)"/></field>
|
||||||
<xsl:otherwise> <!-- must be video -->
|
</xsl:if>
|
||||||
<xsl:choose>
|
|
||||||
<xsl:when test="connectionSpeed='fast'">
|
|
||||||
<field name="high_size"><xsl:value-of select="size"/></field>
|
|
||||||
<field name="high_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
|
||||||
<field name="high_filename"><xsl:value-of select="name"/></field>
|
|
||||||
</xsl:when>
|
|
||||||
<xsl:otherwise>
|
|
||||||
<field name="low_size"><xsl:value-of select="size"/></field>
|
|
||||||
<field name="low_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
|
||||||
<field name="low_filename"><xsl:value-of select="name"/></field>
|
|
||||||
</xsl:otherwise>
|
|
||||||
</xsl:choose>
|
|
||||||
</xsl:otherwise>
|
|
||||||
</xsl:choose>
|
|
||||||
</xsl:for-each>
|
|
||||||
</doc>
|
</doc>
|
||||||
|
<xsl:for-each select="video">
|
||||||
|
<doc>
|
||||||
|
<field name="id"><xsl:value-of select="concat(@id)"/></field>
|
||||||
|
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
||||||
|
<field name="thdlType_s"><xsl:value-of select="$VIDEO_TYPE"/></field>
|
||||||
|
<field name="mediaType_s"><xsl:value-of select="mediaDescription"/></field>
|
||||||
|
<field name="connSpeed_s"><xsl:value-of select="connectionSpeed"/></field>
|
||||||
|
<field name="quality_s"><xsl:value-of select="quality"/></field>
|
||||||
|
<field name="size_i"><xsl:value-of select="size"/></field>
|
||||||
|
<field name="duration_dt"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||||
|
<field name="media_filename"><xsl:value-of select="name"/></field>
|
||||||
|
</doc>
|
||||||
|
</xsl:for-each>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template name="getVideoList">
|
||||||
|
<xsl:param name="metadata" select="''"/>
|
||||||
|
<xsl:for-each select="$metadata/video">
|
||||||
|
<xsl:value-of select="@id"/><xsl:text> </xsl:text>
|
||||||
|
</xsl:for-each>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
<!-- Here's what a chunk of metadata looks like:
|
<!-- Here's what a chunk of metadata looks like:
|
||||||
|
|
||||||
|
@ -100,19 +102,26 @@
|
||||||
|
|
||||||
<xsl:template match="S">
|
<xsl:template match="S">
|
||||||
<xsl:param name="title.id" select="''"/>
|
<xsl:param name="title.id" select="''"/>
|
||||||
|
<xsl:param name="belongs.to" select="''"/>
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field>
|
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field>
|
||||||
<field name="transcript_idref"><xsl:value-of select="$title.id"/></field>
|
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
||||||
<field name="thdl_type"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
<field name="thdlType_s"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
||||||
|
<field name="belongsTo_idlist"><xsl:value-of select="$belongs.to"/></field>
|
||||||
<field name="form_bo"><xsl:value-of select="FORM[@xml:lang='bo']"/></field>
|
<field name="form_bo"><xsl:value-of select="FORM[@xml:lang='bo']"/></field>
|
||||||
<field name="form_bo-Latn"><xsl:value-of select="FORM[@xml:lang='bo-Latn']"/></field>
|
<field name="form_bo-Latn"><xsl:value-of select="FORM[@xml:lang='bo-Latn']"/></field>
|
||||||
|
<xsl:if test="TRANSL[@xml:lang='en']">
|
||||||
<field name="transl_en"><xsl:value-of select="TRANSL[@xml:lang='en']"/></field>
|
<field name="transl_en"><xsl:value-of select="TRANSL[@xml:lang='en']"/></field>
|
||||||
<field name="transl_zh"><xsl:value-of select="TRANSL[@xml:lang='zh']"/></field>
|
|
||||||
<xsl:if test="AUDIO/@start">
|
|
||||||
<field name="start_f"><xsl:value-of select="AUDIO/@start"/></field>
|
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:if test="AUDIO/@end">
|
<xsl:if test="TRANSL[@xml:lang='zh']">
|
||||||
|
<field name="transl_zh"><xsl:value-of select="TRANSL[@xml:lang='zh']"/></field>
|
||||||
|
</xsl:if>
|
||||||
|
<xsl:if test="string(AUDIO/@start)">
|
||||||
|
<field name="start_f"><xsl:value-of select="AUDIO/@start"/></field>
|
||||||
|
<xsl:if test="string(AUDIO/@end)">
|
||||||
<field name="end_f"><xsl:value-of select="AUDIO/@end"/></field>
|
<field name="end_f"><xsl:value-of select="AUDIO/@end"/></field>
|
||||||
|
<field name="duration_f"><xsl:value-of select="AUDIO/@end - AUDIO/@start"/></field>
|
||||||
|
</xsl:if>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
</doc>
|
</doc>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
Binary file not shown.
Binary file not shown.
BIN
extensions/apache/lucene-analyzers.jar
Normal file
BIN
extensions/apache/lucene-analyzers.jar
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
extensions/apache/lucene-spellchecker.jar
Normal file
BIN
extensions/apache/lucene-spellchecker.jar
Normal file
Binary file not shown.
|
@ -19,16 +19,26 @@
|
||||||
</classpath>
|
</classpath>
|
||||||
</taskdef>
|
</taskdef>
|
||||||
|
|
||||||
<property name="get.title.metadata" value="http://thdl.org/avarch/mediaflowcat/title_metadata.php"/>
|
<property name="get.avdb.metadata" value="http://thdl.org/avarch/mediaflowcat/title_metadata.php"/>
|
||||||
|
<property name="only.titles" value="excludeNonTitles=true"/>
|
||||||
|
<property name="only.nontitles" value="excludeTitles=true"/>
|
||||||
<property name="url.to.transcripts" value="http://www.thdl.org/avarch/transcripts"/>
|
<property name="url.to.transcripts" value="http://www.thdl.org/avarch/transcripts"/>
|
||||||
<property name="url.to.media.high" value="http://www.thdl.org/media/high"/>
|
<property name="url.to.media.high" value="http://www.thdl.org/media/high"/>
|
||||||
<property name="url.to.media.low" value="http://www.thdl.org/media/low"/>
|
<property name="url.to.media.low" value="http://www.thdl.org/media/low"/>
|
||||||
<property name="url.to.media.audio" value="http://www.thdl.org/media/audio"/>
|
<property name="url.to.media.audio" value="http://www.thdl.org/media/audio"/>
|
||||||
<property name="lucene-thdl.bin" location="${bin}/lucene-thdl"/>
|
<property name="lucene-thdl.bin" location="${bin}/lucene-thdl"/>
|
||||||
<property name="archive" location="archive"/>
|
<property name="archive" location="archive"/>
|
||||||
|
<property name="title.metadata" value="${archive}/title_metadata.xml"/>
|
||||||
|
<property name="nontitle.metadata" value="${archive}/nontitle_metadata.xml"/>
|
||||||
<property name="styles" location="${archive}/styles"/>
|
<property name="styles" location="${archive}/styles"/>
|
||||||
<property name="transcripts" location="${archive}/transcripts-with-metadata"/>
|
<property name="transcripts" location="${archive}/transcripts-with-metadata"/>
|
||||||
<property name="solr" location="${archive}/solr"/>
|
<property name="solr" location="${archive}/solr"/>
|
||||||
|
<property name="solr.titles" location="${solr}/titles"/>
|
||||||
|
<property name="solr.nontitle.dir" location="${solr}/nontitles"/>
|
||||||
|
<property name="solr.nontitle.file" location="${solr.nontitle.dir}/add_nontitles.xml"/>
|
||||||
|
<property name="copy.to.solr.webapp" location="${archive}/copy-to-solr-webapp"/>
|
||||||
|
<property name="url.to.solr" value="http://localhost:8983/solr"/>
|
||||||
|
<property name="url.to.solr.update" value="${url.to.solr}/update"/>
|
||||||
|
|
||||||
<path id="lucene.classpath">
|
<path id="lucene.classpath">
|
||||||
<fileset id="lucene.extensions" dir="${ext}/apache">
|
<fileset id="lucene.extensions" dir="${ext}/apache">
|
||||||
|
@ -51,7 +61,21 @@
|
||||||
<!-- archive tasks -->
|
<!-- archive tasks -->
|
||||||
<target name="archive-get-metadata">
|
<target name="archive-get-metadata">
|
||||||
<mkdir dir="${archive}"/>
|
<mkdir dir="${archive}"/>
|
||||||
<get src="${get.title.metadata}" dest="${archive}/title_metadata.xml" verbose="on"/>
|
<get src="${get.avdb.metadata}?${parameters}" dest="${output.filename}" verbose="on"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="archive-get-all-title-metadata">
|
||||||
|
<antcall target="archive-get-metadata">
|
||||||
|
<param name="parameters" value="${only.titles}"/>
|
||||||
|
<param name="output.filename" value="${title.metadata}"/>
|
||||||
|
</antcall>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="archive-get-all-nontitle-metadata">
|
||||||
|
<antcall target="archive-get-metadata">
|
||||||
|
<param name="parameters" value="${only.nontitles}"/>
|
||||||
|
<param name="output.filename" value="${nontitle.metadata}"/>
|
||||||
|
</antcall>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<!-- due to encoding issues, none of the transcripts with filenames that need to be url-encoded
|
<!-- due to encoding issues, none of the transcripts with filenames that need to be url-encoded
|
||||||
|
@ -61,7 +85,7 @@
|
||||||
<mkdir dir="${transcripts}"/>
|
<mkdir dir="${transcripts}"/>
|
||||||
<java classname="net.sf.saxon.Transform" fork="yes">
|
<java classname="net.sf.saxon.Transform" fork="yes">
|
||||||
<arg value="-s"/>
|
<arg value="-s"/>
|
||||||
<arg value="${archive}/title_metadata.xml"/>
|
<arg value="${title.metadata}"/>
|
||||||
<arg value="-o"/>
|
<arg value="-o"/>
|
||||||
<arg value="${transcripts}/DUMMY_FOR_BASE_URI"/>
|
<arg value="${transcripts}/DUMMY_FOR_BASE_URI"/>
|
||||||
<arg value="${styles}/mergeMetadataAndData.xsl"/>
|
<arg value="${styles}/mergeMetadataAndData.xsl"/>
|
||||||
|
@ -74,9 +98,14 @@
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<!-- solr tasks -->
|
<!-- solr tasks -->
|
||||||
<target name="solr-prepare-transcripts">
|
<target name="solr-prepare-titles">
|
||||||
<mkdir dir="${solr}"/>
|
<mkdir dir="${solr.titles}"/>
|
||||||
<xslt basedir="${transcripts}" destdir="${solr}" extension=".xml" style="${styles}/solarizeTranscript.xsl"/>
|
<xslt basedir="${transcripts}" includes="*.xml" destdir="${solr.titles}" extension=".xml" style="${styles}/solarizeTranscript.xsl"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="solr-prepare-nontitles">
|
||||||
|
<mkdir dir="${solr.nontitle.dir}"/>
|
||||||
|
<xslt in="${nontitle.metadata}" out="${solr.nontitle.file}/" style="${styles}/solarizeNonTitles.xsl"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
|
@ -95,91 +124,23 @@
|
||||||
-Durl=http://localhost:8983/solr/update
|
-Durl=http://localhost:8983/solr/update
|
||||||
-Dcommit=yes
|
-Dcommit=yes
|
||||||
-->
|
-->
|
||||||
<target name="solr-post-and-commit-transcripts">
|
<target name="solr-post-and-commit-titles">
|
||||||
<java classname="net.sf.saxon.Transform" fork="yes">
|
<fileset dir="${solr.titles}" includes="*.xml" id="solr.add.fileset"/>
|
||||||
<classpath>
|
<pathconvert pathsep=" " property="list.of.files.to.post" refid="solr.add.fileset"/>
|
||||||
<path refid="lucene.classpath"/>
|
<java jar="${ext}/apache/post.jar" fork="true">
|
||||||
</classpath>
|
<arg line="${list.of.files.to.post}"/>
|
||||||
|
<jvmarg value="-Durl=${url.to.solr.update}"/>
|
||||||
|
<jvmarg value="-Dcommit=yes"/>
|
||||||
</java>
|
</java>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="archive-get-list-of-transcripts">
|
<target name="solr-post-and-commit-nontitles">
|
||||||
<!-- <java classname="net.sf.saxon.Transform" fork="yes">
|
<java jar="${ext}/apache/post.jar" fork="true">
|
||||||
<arg value="-s"/>
|
<arg line="${solr.nontitle.file}"/>
|
||||||
<arg value="${archive}/title_metadata.xml"/>
|
<jvmarg value="-Durl=${url.to.solr.update}"/>
|
||||||
<arg value="-o"/>
|
<jvmarg value="-Dcommit=yes"/>
|
||||||
<arg value="${archive}/titles_as_list.txt"/>
|
|
||||||
<arg value="${styles}/get-list-of-transcripts.xsl"/>
|
|
||||||
<classpath>
|
|
||||||
<path refid="saxon.classpath"/>
|
|
||||||
</classpath>
|
|
||||||
</java>-->
|
|
||||||
<xslt in="${archive}/title_metadata.xml" out="${archive}/titles_as_list.txt" style="${styles}/get-list-of-transcripts.xsl"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="archive-get-transcripts">
|
|
||||||
<mkdir dir="${wylie}"/>
|
|
||||||
<loadfile property="transcript-list" srcFile="${archive}/titles_as_list2.txt"/> <!-- encoding="UTF-8" -->
|
|
||||||
<foreach list="${transcript-list}" delimiter=" " param="filename" target="archive-get-one-transcript"/>
|
|
||||||
<!--<foreach list="${transcript-list}" delimiter=" " param="id.plus.filename" target="archive-get-one-transcript"/>-->
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="archive-get-one-transcript">
|
|
||||||
<!--<propertyregex property="filename" input="${id.plus.filename}" regexp=".+/(.+)" select="\1"/>-->
|
|
||||||
<get src="${url.to.transcripts}/${filename}" dest="${wylie}/${filename}" ignoreerrors="true"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<!-- solr stuff -->
|
|
||||||
<!-- <target name="solr-1:clean-local" depends="clean">
|
|
||||||
<delete dir="${solarized.transcript.dir.prefinal}"/>
|
|
||||||
<delete dir="${solarized.transcript.dir.final}"/>
|
|
||||||
<mkdir dir="${solarized.transcript.dir.prefinal}"/>
|
|
||||||
<mkdir dir="${solarized.transcript.dir.final}"/>
|
|
||||||
</target>-->
|
|
||||||
|
|
||||||
<!-- <target name="solr-2:prepare-documents" depends="init, dbxml-4:ngram-magic">-->
|
|
||||||
<!-- create xml data file used to assign tags to mono tsheg bars -->
|
|
||||||
<!--
|
|
||||||
<java classname="net.sf.saxon.Transform" fork="yes">
|
|
||||||
<arg value="-o"/>
|
|
||||||
<arg value="${build.dir}/tshegbartags.xml"/>
|
|
||||||
<arg value="${lexicon.dir}/lhasa-verbs.xml"/>
|
|
||||||
<arg value="${stylesheet.dir}/prepareTshegBarTagger.xsl"/>
|
|
||||||
<classpath>
|
|
||||||
<pathelement location="${bin.dir}"/>
|
|
||||||
<path refid="classpath"/>
|
|
||||||
</classpath>
|
|
||||||
</java>
|
</java>
|
||||||
-->
|
</target>
|
||||||
|
|
||||||
<!-- create xml file used to assign synonyms to certain mono tsheg bars -->
|
|
||||||
<!--
|
|
||||||
<java classname="net.sf.saxon.Transform" fork="yes">
|
|
||||||
<arg value="-o"/>
|
|
||||||
<arg value="${build.dir}/synonyms.xml"/>
|
|
||||||
<arg value="${lexicon.dir}/lhasa-verbs.xml"/>
|
|
||||||
<arg value="${stylesheet.dir}/makeSynonymFile.xsl"/>
|
|
||||||
<classpath>
|
|
||||||
<pathelement location="${bin.dir}"/>
|
|
||||||
<path refid="classpath"/>
|
|
||||||
</classpath>
|
|
||||||
</java>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- <loadfile property="xquery" srcfile="${stylesheet.dir}/solarizeTranscriptDatabase.xql" encoding="UTF-8"/>
|
|
||||||
<java classname="org.thdl.dbxml.QueryTools" fork="yes">
|
|
||||||
<arg value="${dbxml.environment.dir}"/>
|
|
||||||
<arg value="${dbxml.container}"/>
|
|
||||||
<arg value="${xquery}"/>
|
|
||||||
<arg value="${solarized.transcript.dir.prefinal}"/>
|
|
||||||
<classpath>
|
|
||||||
<pathelement location="${bin.dir}"/>
|
|
||||||
<path refid="classpath"/>
|
|
||||||
</classpath>
|
|
||||||
<jvmarg value="-Djava.library.path=${dbxml.lib}"/>
|
|
||||||
</java>-->
|
|
||||||
|
|
||||||
<!-- insert whether or not media exists for segment -->
|
<!-- insert whether or not media exists for segment -->
|
||||||
<!-- <java classname="net.sf.saxon.Transform" fork="yes">
|
<!-- <java classname="net.sf.saxon.Transform" fork="yes">
|
||||||
|
@ -209,27 +170,6 @@
|
||||||
So, to replace, we'll find XML document by name (document id) in dbxml database,
|
So, to replace, we'll find XML document by name (document id) in dbxml database,
|
||||||
then get all sentence ids for that document, then combine docId_sentenceId and
|
then get all sentence ids for that document, then combine docId_sentenceId and
|
||||||
remove/replace from lucene.
|
remove/replace from lucene.
|
||||||
-->
|
|
||||||
<!--
|
|
||||||
<target name="solr-3:commit-documents">
|
|
||||||
<exec executable="sh" dir="${solarized.transcript.dir.final}">
|
|
||||||
<arg value="post.sh"/>
|
|
||||||
<arg value="*.xml"/>
|
|
||||||
</exec>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="solr-4:delete-documents" depends="solr-1:clean-local">
|
|
||||||
<exec executable="curl">
|
|
||||||
<arg value="${solr.update}"/>
|
|
||||||
<arg value="-data-binary"/> should have double dash at beginning
|
|
||||||
<arg value="<delete><query>id:[* TO *]</query></delete>"/>
|
|
||||||
</exec>
|
|
||||||
<exec executable="curl">
|
|
||||||
<arg value="${solr.update}"/>
|
|
||||||
<arg value="-data-binary"/> double dash again!
|
|
||||||
<arg value="<commit/>"/>
|
|
||||||
</exec>
|
|
||||||
</target>
|
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- concordance program -->
|
<!-- concordance program -->
|
||||||
|
@ -242,6 +182,7 @@
|
||||||
|
|
||||||
<target name="lucene-thdl-jar" depends="lucene-thdl-compile">
|
<target name="lucene-thdl-jar" depends="lucene-thdl-compile">
|
||||||
<jar destfile="${vanillalib}/lucene-thdl.jar" basedir="${lucene-thdl.bin}/"/>
|
<jar destfile="${vanillalib}/lucene-thdl.jar" basedir="${lucene-thdl.bin}/"/>
|
||||||
|
<copy file="${vanillalib}/lucene-thdl.jar" todir="${copy.to.solr.webapp}"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
Loading…
Reference in a new issue