replaced solr version 1.1 with 1.2
This commit is contained in:
parent
0addc3c957
commit
6327403a3f
12 changed files with 116 additions and 163 deletions
|
@ -2,10 +2,11 @@ The files in this directory must be copied to your solr webapp
|
|||
before you can use lucene-thdl-build.xml to post, commit, or
|
||||
delete documents from your solr server.
|
||||
|
||||
schema.xml :
|
||||
Copy this file to your solr/conf directory,
|
||||
replacing the existing schema.xml file.
|
||||
schema.xml & solrconfig.xml:
|
||||
Copy these files to your solr/conf directory,
|
||||
replacing the existing files with these names
|
||||
|
||||
|
||||
lucene-thdl.jar :
|
||||
Create an up to date copy of this file by
|
||||
running the task lucene-thdl-jar, then copy
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
http://wiki.apache.org/solr/SchemaXml
|
||||
-->
|
||||
|
||||
<schema name="THDL" version="1.1">
|
||||
<schema name="THDL Archive" version="1.1">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
Applications should change this to reflect the nature of the search collection.
|
||||
version="1.1" is Solr's version number for the schema syntax and semantics. It should
|
||||
|
@ -158,10 +158,15 @@
|
|||
</fieldtype>
|
||||
|
||||
<fieldtype name="text_wylie" class="solr.TextField">
|
||||
<analyzer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- <analyzer class="org.thdl.lucene.WylieTibetanAnalyzer"/> -->
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="text_idlist" class="solr.TextField">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- <filter class="org.thdl.lucene.NumberPadderFactory"/> -->
|
||||
</fieldtype>
|
||||
|
||||
</types>
|
||||
|
||||
|
||||
|
@ -183,6 +188,7 @@
|
|||
|
||||
<!-- transcript documents -->
|
||||
<field name="id" type="string" indexed="true" stored="true"/>
|
||||
<field name="form_bo" type="text_tibetan" indexed="true" stored="true"/>
|
||||
|
||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||
will be used if the name matches any of the patterns.
|
||||
|
@ -207,7 +213,7 @@
|
|||
<dynamicField name="*_bo" type="text_tibetan" indexed="true" stored="true"/>
|
||||
|
||||
<!-- suffix for any field containing Tibetan transliterated into Wylie -->
|
||||
<dynamicField name="*_bo-Latn" type="text_wylie" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_bo-Latn" type="text_ws" indexed="true" stored="true"/>
|
||||
|
||||
<!-- suffix for any field containing English language content -->
|
||||
<dynamicField name="*_en" type="text_lu" indexed="true" stored="true"/>
|
||||
|
@ -216,11 +222,8 @@
|
|||
<dynamicField name="*_zh" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="*_idref" type="string" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_opt" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_idlist" type="text_ws" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_lang" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_size" type="sint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_name" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_duration" type="date" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_filename" type="string" indexed="true" stored="true"/>
|
||||
|
||||
</fields>
|
||||
|
|
|
@ -17,8 +17,7 @@
|
|||
<xsl:template match="/">
|
||||
<xsl:for-each select="//transcript">
|
||||
<xsl:variable name="filename" select="."/>
|
||||
<!-- <xsl:variable name="filename" select="encoder:encode(.,'UTF-8')"/> -->
|
||||
<xsl:result-document href="{$filename}" format="unicode.transcript.with.metadata">
|
||||
<xsl:result-document href="Transcript_{../@id}.xml" format="unicode.transcript.with.metadata">
|
||||
<xsl:element name="TITLE">
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="../@id"/>
|
||||
|
|
|
@ -5,11 +5,7 @@
|
|||
|
||||
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
|
||||
|
||||
<xsl:param name="TITLE_TYPE" select="'AVDB_TITLE'"/>
|
||||
<xsl:param name="VIDEO_TYPE" select="'VIDEO'"/>
|
||||
<xsl:param name="TRANSCRIPT_FRAGMENT_TYPE" select="'TRANSCRIPT_FRAGMENT'"/>
|
||||
<xsl:param name="DURATION_PREFIX" select="'1970-01-01T'"/>
|
||||
<xsl:param name="DURATION_SUFFIX" select="'Z'"/>
|
||||
<xsl:import href="solarizeConstantsForImport.xsl"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<xsl:apply-templates select="TITLE"/>
|
||||
|
@ -23,49 +19,55 @@
|
|||
</xsl:apply-templates>
|
||||
<xsl:apply-templates select="TEXT/S">
|
||||
<xsl:with-param name="title.id" select="$title.id"/>
|
||||
<xsl:with-param name="belongs.to" select="METADATA/belongsTo"/>
|
||||
</xsl:apply-templates>
|
||||
</add>
|
||||
</xsl:template>
|
||||
|
||||
<!-- should we also include transcript and video ids? -->
|
||||
<xsl:template match="METADATA">
|
||||
<xsl:param name="title.id" select="''"/>
|
||||
<doc>
|
||||
<field name="id"><xsl:value-of select="$title.id"/></field>
|
||||
<field name="thdlType_opt"><xsl:value-of select="$TITLE_TYPE"/></field>
|
||||
<field name="speechType_opt"><xsl:value-of select="speechType"/></field>
|
||||
<field name="thdlType_s"><xsl:value-of select="$TITLE_TYPE"/></field>
|
||||
<field name="belongsTo_idlist"><xsl:value-of select="belongsTo"/></field>
|
||||
<field name="speechType_s"><xsl:value-of select="speechType"/></field>
|
||||
<field name="language_lang"><xsl:value-of select="language"/></field>
|
||||
<field name="administrativeLocation_opt"><xsl:value-of select="administrativeLocation"/></field>
|
||||
<field name="culturalRegion_opt"><xsl:value-of select="culturalRegion"/></field>
|
||||
<field name="administrativeLocation_s"><xsl:value-of select="administrativeLocation"/></field>
|
||||
<field name="culturalRegion_s"><xsl:value-of select="culturalRegion"/></field>
|
||||
<field name="title_en"><xsl:value-of select="name"/></field>
|
||||
<field name="caption_en"><xsl:value-of select="caption"/></field>
|
||||
<!-- should we also include transcript and video ids? -->
|
||||
<field name="transcript_filename"><xsl:value-of select="transcript"/></field>
|
||||
<xsl:for-each select="video">
|
||||
<xsl:choose>
|
||||
<xsl:when test="mediaDescription='Audio'">
|
||||
<field name="audio_size"><xsl:value-of select="size"/></field>
|
||||
<field name="audio_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||
<field name="audio_filename"><xsl:value-of select="name"/></field>
|
||||
</xsl:when>
|
||||
<xsl:otherwise> <!-- must be video -->
|
||||
<xsl:choose>
|
||||
<xsl:when test="connectionSpeed='fast'">
|
||||
<field name="high_size"><xsl:value-of select="size"/></field>
|
||||
<field name="high_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||
<field name="high_filename"><xsl:value-of select="name"/></field>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<field name="low_size"><xsl:value-of select="size"/></field>
|
||||
<field name="low_duration"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||
<field name="low_filename"><xsl:value-of select="name"/></field>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
<xsl:variable name="video.ids">
|
||||
<xsl:call-template name="getVideoList">
|
||||
<xsl:with-param name="metadata" select="."/>
|
||||
</xsl:call-template>
|
||||
</xsl:variable>
|
||||
<xsl:if test="normalize-space($video.ids)">
|
||||
<field name="videos_idlist"><xsl:value-of select="normalize-space($video.ids)"/></field>
|
||||
</xsl:if>
|
||||
</doc>
|
||||
<xsl:for-each select="video">
|
||||
<doc>
|
||||
<field name="id"><xsl:value-of select="concat(@id)"/></field>
|
||||
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
||||
<field name="thdlType_s"><xsl:value-of select="$VIDEO_TYPE"/></field>
|
||||
<field name="mediaType_s"><xsl:value-of select="mediaDescription"/></field>
|
||||
<field name="connSpeed_s"><xsl:value-of select="connectionSpeed"/></field>
|
||||
<field name="quality_s"><xsl:value-of select="quality"/></field>
|
||||
<field name="size_i"><xsl:value-of select="size"/></field>
|
||||
<field name="duration_dt"><xsl:value-of select="concat($DURATION_PREFIX,duration,$DURATION_SUFFIX)"/></field>
|
||||
<field name="media_filename"><xsl:value-of select="name"/></field>
|
||||
</doc>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="getVideoList">
|
||||
<xsl:param name="metadata" select="''"/>
|
||||
<xsl:for-each select="$metadata/video">
|
||||
<xsl:value-of select="@id"/><xsl:text> </xsl:text>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Here's what a chunk of metadata looks like:
|
||||
|
||||
|
@ -100,19 +102,26 @@
|
|||
|
||||
<xsl:template match="S">
|
||||
<xsl:param name="title.id" select="''"/>
|
||||
<xsl:param name="belongs.to" select="''"/>
|
||||
<doc>
|
||||
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field>
|
||||
<field name="transcript_idref"><xsl:value-of select="$title.id"/></field>
|
||||
<field name="thdl_type"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
||||
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
||||
<field name="thdlType_s"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
||||
<field name="belongsTo_idlist"><xsl:value-of select="$belongs.to"/></field>
|
||||
<field name="form_bo"><xsl:value-of select="FORM[@xml:lang='bo']"/></field>
|
||||
<field name="form_bo-Latn"><xsl:value-of select="FORM[@xml:lang='bo-Latn']"/></field>
|
||||
<field name="transl_en"><xsl:value-of select="TRANSL[@xml:lang='en']"/></field>
|
||||
<field name="transl_zh"><xsl:value-of select="TRANSL[@xml:lang='zh']"/></field>
|
||||
<xsl:if test="AUDIO/@start">
|
||||
<field name="start_f"><xsl:value-of select="AUDIO/@start"/></field>
|
||||
<xsl:if test="TRANSL[@xml:lang='en']">
|
||||
<field name="transl_en"><xsl:value-of select="TRANSL[@xml:lang='en']"/></field>
|
||||
</xsl:if>
|
||||
<xsl:if test="AUDIO/@end">
|
||||
<field name="end_f"><xsl:value-of select="AUDIO/@end"/></field>
|
||||
<xsl:if test="TRANSL[@xml:lang='zh']">
|
||||
<field name="transl_zh"><xsl:value-of select="TRANSL[@xml:lang='zh']"/></field>
|
||||
</xsl:if>
|
||||
<xsl:if test="string(AUDIO/@start)">
|
||||
<field name="start_f"><xsl:value-of select="AUDIO/@start"/></field>
|
||||
<xsl:if test="string(AUDIO/@end)">
|
||||
<field name="end_f"><xsl:value-of select="AUDIO/@end"/></field>
|
||||
<field name="duration_f"><xsl:value-of select="AUDIO/@end - AUDIO/@start"/></field>
|
||||
</xsl:if>
|
||||
</xsl:if>
|
||||
</doc>
|
||||
</xsl:template>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue