improved indexing
This commit is contained in:
parent
6327403a3f
commit
10951a717f
5 changed files with 28 additions and 14 deletions
|
@ -2,14 +2,19 @@ The files in this directory must be copied to your solr webapp
|
||||||
before you can use lucene-thdl-build.xml to post, commit, or
|
before you can use lucene-thdl-build.xml to post, commit, or
|
||||||
delete documents from your solr server.
|
delete documents from your solr server.
|
||||||
|
|
||||||
schema.xml & solrconfig.xml:
|
First, run the task solr-prepare-for-copy-to-solr-webapp.
|
||||||
Copy these files to your solr/conf directory,
|
|
||||||
replacing the existing files with these names
|
|
||||||
|
|
||||||
|
Then, copy files to SOLR as follows:
|
||||||
lucene-thdl.jar :
|
|
||||||
Create an up to date copy of this file by
|
*.xml:
|
||||||
running the task lucene-thdl-jar, then copy
|
Copy to your solr/conf directory, replacing any
|
||||||
to your solr/lib directory. If solr/lib does
|
existing files..
|
||||||
|
|
||||||
|
*.xsl
|
||||||
|
Copy to the solr/conf/xslt directory.
|
||||||
|
|
||||||
|
*.jar
|
||||||
|
Copy to your solr/lib directory. If solr/lib does
|
||||||
not exist, then create it.
|
not exist, then create it.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -188,6 +188,7 @@
|
||||||
|
|
||||||
<!-- transcript documents -->
|
<!-- transcript documents -->
|
||||||
<field name="id" type="string" indexed="true" stored="true"/>
|
<field name="id" type="string" indexed="true" stored="true"/>
|
||||||
|
<field name="numericRef" type="string" indexed="true" stored="false"/>
|
||||||
<field name="form_bo" type="text_tibetan" indexed="true" stored="true"/>
|
<field name="form_bo" type="text_tibetan" indexed="true" stored="true"/>
|
||||||
|
|
||||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||||
|
@ -244,6 +245,6 @@
|
||||||
A custom similarity may be specified here, but the default is fine
|
A custom similarity may be specified here, but the default is fine
|
||||||
for most applications. -->
|
for most applications. -->
|
||||||
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
||||||
|
|
||||||
</schema>
|
</schema>
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<xsl:for-each select="//transcript">
|
<xsl:for-each select="//transcript">
|
||||||
<xsl:variable name="filename" select="."/>
|
<xsl:variable name="filename" select="."/>
|
||||||
<xsl:result-document href="Transcript_{../@id}.xml" format="unicode.transcript.with.metadata">
|
<xsl:result-document href="{../@id}.xml" format="unicode.transcript.with.metadata">
|
||||||
<xsl:element name="TITLE">
|
<xsl:element name="TITLE">
|
||||||
<xsl:attribute name="id">
|
<xsl:attribute name="id">
|
||||||
<xsl:value-of select="../@id"/>
|
<xsl:value-of select="../@id"/>
|
||||||
|
|
|
@ -28,7 +28,8 @@
|
||||||
<xsl:template match="METADATA">
|
<xsl:template match="METADATA">
|
||||||
<xsl:param name="title.id" select="''"/>
|
<xsl:param name="title.id" select="''"/>
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id"><xsl:value-of select="$title.id"/></field>
|
<field name="id"><xsl:value-of select="$title.id"/></field> <!-- has 't' prefix -->
|
||||||
|
<field name="numericRef"><xsl:value-of select="substring($title.id,2)"/></field> <!-- strips prefix for searching -->
|
||||||
<field name="thdlType_s"><xsl:value-of select="$TITLE_TYPE"/></field>
|
<field name="thdlType_s"><xsl:value-of select="$TITLE_TYPE"/></field>
|
||||||
<field name="belongsTo_idlist"><xsl:value-of select="belongsTo"/></field>
|
<field name="belongsTo_idlist"><xsl:value-of select="belongsTo"/></field>
|
||||||
<field name="speechType_s"><xsl:value-of select="speechType"/></field>
|
<field name="speechType_s"><xsl:value-of select="speechType"/></field>
|
||||||
|
@ -49,7 +50,8 @@
|
||||||
</doc>
|
</doc>
|
||||||
<xsl:for-each select="video">
|
<xsl:for-each select="video">
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id"><xsl:value-of select="concat(@id)"/></field>
|
<field name="id"><xsl:value-of select="@id"/></field> <!-- has 'v' prefix -->
|
||||||
|
<field name="numericRef"><xsl:value-of select="substring(@id,2)"/></field> <!-- strips prefix for searching -->
|
||||||
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
||||||
<field name="thdlType_s"><xsl:value-of select="$VIDEO_TYPE"/></field>
|
<field name="thdlType_s"><xsl:value-of select="$VIDEO_TYPE"/></field>
|
||||||
<field name="mediaType_s"><xsl:value-of select="mediaDescription"/></field>
|
<field name="mediaType_s"><xsl:value-of select="mediaDescription"/></field>
|
||||||
|
@ -103,8 +105,10 @@
|
||||||
<xsl:template match="S">
|
<xsl:template match="S">
|
||||||
<xsl:param name="title.id" select="''"/>
|
<xsl:param name="title.id" select="''"/>
|
||||||
<xsl:param name="belongs.to" select="''"/>
|
<xsl:param name="belongs.to" select="''"/>
|
||||||
|
<xsl:variable name="s.id" select="concat($title.id, '_', @id)"/>
|
||||||
<doc>
|
<doc>
|
||||||
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field>
|
<field name="id"><xsl:value-of select="$s.id"/></field> <!-- has 't' prefix -->
|
||||||
|
<field name="numericRef"><xsl:value-of select="substring($s.id,2)"/></field> <!-- strips prefix for searching -->
|
||||||
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
|
||||||
<field name="thdlType_s"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
<field name="thdlType_s"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
|
||||||
<field name="belongsTo_idlist"><xsl:value-of select="$belongs.to"/></field>
|
<field name="belongsTo_idlist"><xsl:value-of select="$belongs.to"/></field>
|
|
@ -100,7 +100,7 @@
|
||||||
<!-- solr tasks -->
|
<!-- solr tasks -->
|
||||||
<target name="solr-prepare-titles">
|
<target name="solr-prepare-titles">
|
||||||
<mkdir dir="${solr.titles}"/>
|
<mkdir dir="${solr.titles}"/>
|
||||||
<xslt basedir="${transcripts}" includes="*.xml" destdir="${solr.titles}" extension=".xml" style="${styles}/solarizeTranscript.xsl"/>
|
<xslt basedir="${transcripts}" includes="*.xml" destdir="${solr.titles}" extension=".xml" style="${styles}/solarizeTitles.xsl"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="solr-prepare-nontitles">
|
<target name="solr-prepare-nontitles">
|
||||||
|
@ -141,6 +141,10 @@
|
||||||
<jvmarg value="-Dcommit=yes"/>
|
<jvmarg value="-Dcommit=yes"/>
|
||||||
</java>
|
</java>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="solr-prepare-for-copy-to-solr-webapp" depends="lucene-thdl-jar">
|
||||||
|
<!--<copy file="${styles}/solarizeConstantsForImport.xsl" todir="${copy.to.solr.webapp}"/>-->
|
||||||
|
</target>
|
||||||
|
|
||||||
<!-- insert whether or not media exists for segment -->
|
<!-- insert whether or not media exists for segment -->
|
||||||
<!-- <java classname="net.sf.saxon.Transform" fork="yes">
|
<!-- <java classname="net.sf.saxon.Transform" fork="yes">
|
||||||
|
|
Loading…
Reference in a new issue