added more dynamic fields to solr schema, and added post.jar for batch posting within java of xml files
This commit is contained in:
parent
d9d12d9f64
commit
305cbc46fe
4 changed files with 65 additions and 9 deletions
|
@ -182,8 +182,6 @@
|
||||||
|
|
||||||
<!-- transcript documents -->
|
<!-- transcript documents -->
|
||||||
<field name="id" type="string" indexed="true" stored="true"/>
|
<field name="id" type="string" indexed="true" stored="true"/>
|
||||||
<field name="thdl_type" type="string" indexed="true" stored="true"/>
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||||
will be used if the name matches any of the patterns.
|
will be used if the name matches any of the patterns.
|
||||||
|
@ -216,6 +214,13 @@
|
||||||
<!-- FIXME shouldn't be string!! suffix for any field containing Chinese language content -->
|
<!-- FIXME shouldn't be string!! suffix for any field containing Chinese language content -->
|
||||||
<dynamicField name="*_zh" type="string" indexed="true" stored="true"/>
|
<dynamicField name="*_zh" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<dynamicField name="*_idref" type="string" indexed="true" stored="false"/>
|
||||||
|
<dynamicField name="*_opt" type="string" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_lang" type="string" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_size" type="sint" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_name" type="string" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_duration" type="date" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_filename" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
</fields>
|
</fields>
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,10 @@
|
||||||
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
|
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
|
||||||
|
|
||||||
<xsl:param name="TITLE_TYPE" select="'AVDB_TITLE'"/>
|
<xsl:param name="TITLE_TYPE" select="'AVDB_TITLE'"/>
|
||||||
|
<xsl:param name="VIDEO_TYPE" select="'VIDEO'"/>
|
||||||
<xsl:param name="TRANSCRIPT_FRAGMENT_TYPE" select="'TRANSCRIPT_FRAGMENT'"/>
|
<xsl:param name="TRANSCRIPT_FRAGMENT_TYPE" select="'TRANSCRIPT_FRAGMENT'"/>
|
||||||
|
<xsl:param name="DURATION_PREFIX" select="'1970-01-01T'"/>
|
||||||
|
<xsl:param name="DURATION_SUFFIX" select="'Z'"/>
|
||||||
|
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<xsl:apply-templates select="TITLE"/>
|
<xsl:apply-templates select="TITLE"/>
|
||||||
|
@ -28,18 +31,42 @@
|
||||||
<xsl:param name="title.id" select="''"/>
|
<xsl:param name="title.id" select="''"/>
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id"><xsl:value-of select="$title.id"/></field>
|
<field name="id"><xsl:value-of select="$title.id"/></field>
|
||||||
<field name="thdl_type"><xsl:value-of select="$TITLE_TYPE"/></field>
|
<field name="thdlType_opt"><xsl:value-of select="$TITLE_TYPE"/></field>
|
||||||
<field name="speechType"><xsl:value-of select="speechType"/></field>
|
<field name="speechType_opt"><xsl:value-of select="speechType"/></field>
|
||||||
<field name="language"><xsl:value-of select="language"/></field>
|
<field name="language_lang"><xsl:value-of select="language"/></field>
|
||||||
<field name="culturalRegion"><xsl:value-of select="culturalRegion"/></field>
|
<field name="administrativeLocation_opt"><xsl:value-of select="administrativeLocation"/></field>
|
||||||
|
<field name="culturalRegion_opt"><xsl:value-of select="culturalRegion"/></field>
|
||||||
<field name="title_en"><xsl:value-of select="name"/></field>
|
<field name="title_en"><xsl:value-of select="name"/></field>
|
||||||
<field name="caption_en"><xsl:value-of select="caption"/></field>
|
<field name="caption_en"><xsl:value-of select="caption"/></field>
|
||||||
<field name="speechType"><xsl:value-of select="speechType"/></field>
|
<!-- should we also include transcript and video ids? -->
|
||||||
<field name="transcriptFilename"><xsl:value-of select="transcript"/></field>
|
<field name="transcript_filename"><xsl:value-of select="transcript"/></field>
|
||||||
<!-- make plan for videos; incl. caressing #s so that you can search for ranges -->
|
<xsl:for-each select="video">
|
||||||
|
<xsl:choose>
|
||||||
|
<xsl:when test="mediaDescription='Audio'">
|
||||||
|
<field name="audio_size"><xsl:value-of select="size"/></field>
|
||||||
|
<field name="audio_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||||
|
<field name="audio_filename"><xsl:value-of select="name"/></field>
|
||||||
|
</xsl:when>
|
||||||
|
<xsl:otherwise> <!-- must be video -->
|
||||||
|
<xsl:choose>
|
||||||
|
<xsl:when test="connectionSpeed='fast'">
|
||||||
|
<field name="high_size"><xsl:value-of select="size"/></field>
|
||||||
|
<field name="high_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||||
|
<field name="high_filename"><xsl:value-of select="name"/></field>
|
||||||
|
</xsl:when>
|
||||||
|
<xsl:otherwise>
|
||||||
|
<field name="low_size"><xsl:value-of select="size"/></field>
|
||||||
|
<field name="low_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||||
|
<field name="low_filename"><xsl:value-of select="name"/></field>
|
||||||
|
</xsl:otherwise>
|
||||||
|
</xsl:choose>
|
||||||
|
</xsl:otherwise>
|
||||||
|
</xsl:choose>
|
||||||
|
</xsl:for-each>
|
||||||
</doc>
|
</doc>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
|
|
||||||
<!-- Here's what a chunk of metadata looks like:
|
<!-- Here's what a chunk of metadata looks like:
|
||||||
|
|
||||||
<speechType>Conversation</speechType>
|
<speechType>Conversation</speechType>
|
||||||
|
@ -75,6 +102,7 @@
|
||||||
<xsl:param name="title.id" select="''"/>
|
<xsl:param name="title.id" select="''"/>
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field>
|
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field>
|
||||||
|
<field name="transcript_idref"><xsl:value-of select="$title.id"/></field>
|
||||||
<field name="thdl_type"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
<field name="thdl_type"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
||||||
<field name="form_bo"><xsl:value-of select="FORM[@xml:lang='bo']"/></field>
|
<field name="form_bo"><xsl:value-of select="FORM[@xml:lang='bo']"/></field>
|
||||||
<field name="form_bo-Latn"><xsl:value-of select="FORM[@xml:lang='bo-Latn']"/></field>
|
<field name="form_bo-Latn"><xsl:value-of select="FORM[@xml:lang='bo-Latn']"/></field>
|
||||||
|
|
BIN
extensions/apache/post.jar
Normal file
BIN
extensions/apache/post.jar
Normal file
Binary file not shown.
|
@ -79,6 +79,29 @@
|
||||||
<xslt basedir="${transcripts}" destdir="${solr}" extension=".xml" style="${styles}/solarizeTranscript.xsl"/>
|
<xslt basedir="${transcripts}" destdir="${solr}" extension=".xml" style="${styles}/solarizeTranscript.xsl"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
SimplePostTool: version 1.2
|
||||||
|
This is a simple command line tool for POSTing raw XML to a Solr
|
||||||
|
port. XML data can be read from files specified as commandline
|
||||||
|
args; as raw commandline arg strings; or via STDIN.
|
||||||
|
Examples:
|
||||||
|
java -Ddata=files -jar post.jar *.xml
|
||||||
|
java -Ddata=args -jar post.jar '<delete><id>42</id></delete>'
|
||||||
|
java -Ddata=stdin -jar post.jar < hd.xml
|
||||||
|
Other options controlled by System Properties include the Solr
|
||||||
|
URL to POST to, and whether a commit should be executed. These
|
||||||
|
are the defaults for all System Properties...
|
||||||
|
-Ddata=files
|
||||||
|
-Durl=http://localhost:8983/solr/update
|
||||||
|
-Dcommit=yes
|
||||||
|
-->
|
||||||
|
<target name="solr-post-and-commit-transcripts">
|
||||||
|
<java classname="net.sf.saxon.Transform" fork="yes">
|
||||||
|
<classpath>
|
||||||
|
<path refid="lucene.classpath"/>
|
||||||
|
</classpath>
|
||||||
|
</java>
|
||||||
|
</target>
|
||||||
|
|
||||||
<target name="archive-get-list-of-transcripts">
|
<target name="archive-get-list-of-transcripts">
|
||||||
<!-- <java classname="net.sf.saxon.Transform" fork="yes">
|
<!-- <java classname="net.sf.saxon.Transform" fork="yes">
|
||||||
|
|
Loading…
Reference in a new issue