improved indexing

This commit is contained in:
eg3p 2007-06-24 18:28:47 +00:00
parent 6327403a3f
commit 10951a717f
5 changed files with 28 additions and 14 deletions

View File

@ -2,14 +2,19 @@ The files in this directory must be copied to your solr webapp
before you can use lucene-thdl-build.xml to post, commit, or
delete documents from your solr server.
schema.xml & solrconfig.xml:
Copy these files to your solr/conf directory,
replacing the existing files with these names
First, run the task solr-prepare-for-copy-to-solr-webapp.
lucene-thdl.jar :
Create an up to date copy of this file by
running the task lucene-thdl-jar, then copy
to your solr/lib directory. If solr/lib does
Then, copy files to SOLR as follows:
*.xml:
Copy to your solr/conf directory, replacing any
existing files..
*.xsl
Copy to the solr/conf/xslt directory.
*.jar
Copy to your solr/lib directory. If solr/lib does
not exist, then create it.

View File

@ -188,6 +188,7 @@
<!-- transcript documents -->
<field name="id" type="string" indexed="true" stored="true"/>
<field name="numericRef" type="string" indexed="true" stored="false"/>
<field name="form_bo" type="text_tibetan" indexed="true" stored="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
@ -244,6 +245,6 @@
A custom similarity may be specified here, but the default is fine
for most applications. -->
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
</schema>

View File

@ -17,7 +17,7 @@
<xsl:template match="/">
<xsl:for-each select="//transcript">
<xsl:variable name="filename" select="."/>
<xsl:result-document href="Transcript_{../@id}.xml" format="unicode.transcript.with.metadata">
<xsl:result-document href="{../@id}.xml" format="unicode.transcript.with.metadata">
<xsl:element name="TITLE">
<xsl:attribute name="id">
<xsl:value-of select="../@id"/>

View File

@ -28,7 +28,8 @@
<xsl:template match="METADATA">
<xsl:param name="title.id" select="''"/>
<doc>
<field name="id"><xsl:value-of select="$title.id"/></field>
<field name="id"><xsl:value-of select="$title.id"/></field> <!-- has 't' prefix -->
<field name="numericRef"><xsl:value-of select="substring($title.id,2)"/></field> <!-- strips prefix for searching -->
<field name="thdlType_s"><xsl:value-of select="$TITLE_TYPE"/></field>
<field name="belongsTo_idlist"><xsl:value-of select="belongsTo"/></field>
<field name="speechType_s"><xsl:value-of select="speechType"/></field>
@ -49,7 +50,8 @@
</doc>
<xsl:for-each select="video">
<doc>
<field name="id"><xsl:value-of select="concat(@id)"/></field>
<field name="id"><xsl:value-of select="@id"/></field> <!-- has 'v' prefix -->
<field name="numericRef"><xsl:value-of select="substring(@id,2)"/></field> <!-- strips prefix for searching -->
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
<field name="thdlType_s"><xsl:value-of select="$VIDEO_TYPE"/></field>
<field name="mediaType_s"><xsl:value-of select="mediaDescription"/></field>
@ -103,8 +105,10 @@
<xsl:template match="S">
<xsl:param name="title.id" select="''"/>
<xsl:param name="belongs.to" select="''"/>
<xsl:variable name="s.id" select="concat($title.id, '_', @id)"/>
<doc>
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field>
<field name="id"><xsl:value-of select="$s.id"/></field> <!-- has 't' prefix -->
<field name="numericRef"><xsl:value-of select="substring($s.id,2)"/></field> <!-- strips prefix for searching -->
<field name="title_idref"><xsl:value-of select="$title.id"/></field>
<field name="thdlType_s"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
<field name="belongsTo_idlist"><xsl:value-of select="$belongs.to"/></field>

View File

@ -100,7 +100,7 @@
<!-- solr tasks -->
<target name="solr-prepare-titles">
<mkdir dir="${solr.titles}"/>
<xslt basedir="${transcripts}" includes="*.xml" destdir="${solr.titles}" extension=".xml" style="${styles}/solarizeTranscript.xsl"/>
<xslt basedir="${transcripts}" includes="*.xml" destdir="${solr.titles}" extension=".xml" style="${styles}/solarizeTitles.xsl"/>
</target>
<target name="solr-prepare-nontitles">
@ -141,6 +141,10 @@
<jvmarg value="-Dcommit=yes"/>
</java>
</target>
<target name="solr-prepare-for-copy-to-solr-webapp" depends="lucene-thdl-jar">
<!--<copy file="${styles}/solarizeConstantsForImport.xsl" todir="${copy.to.solr.webapp}"/>-->
</target>
<!-- insert whether or not media exists for segment -->
<!-- <java classname="net.sf.saxon.Transform" fork="yes">