improved indexing

This commit is contained in:
eg3p 2007-06-24 18:28:47 +00:00
parent 6327403a3f
commit 10951a717f
5 changed files with 28 additions and 14 deletions

View file

@ -2,14 +2,19 @@ The files in this directory must be copied to your solr webapp
before you can use lucene-thdl-build.xml to post, commit, or before you can use lucene-thdl-build.xml to post, commit, or
delete documents from your solr server. delete documents from your solr server.
schema.xml & solrconfig.xml: First, run the task solr-prepare-for-copy-to-solr-webapp.
Copy these files to your solr/conf directory,
replacing the existing files with these names
Then, copy files to SOLR as follows:
lucene-thdl.jar : *.xml:
Create an up to date copy of this file by Copy to your solr/conf directory, replacing any
running the task lucene-thdl-jar, then copy existing files..
to your solr/lib directory. If solr/lib does
*.xsl
Copy to the solr/conf/xslt directory.
*.jar
Copy to your solr/lib directory. If solr/lib does
not exist, then create it. not exist, then create it.

View file

@ -188,6 +188,7 @@
<!-- transcript documents --> <!-- transcript documents -->
<field name="id" type="string" indexed="true" stored="true"/> <field name="id" type="string" indexed="true" stored="true"/>
<field name="numericRef" type="string" indexed="true" stored="false"/>
<field name="form_bo" type="text_tibetan" indexed="true" stored="true"/> <field name="form_bo" type="text_tibetan" indexed="true" stored="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields <!-- Dynamic field definitions. If a field name is not found, dynamicFields

View file

@ -17,7 +17,7 @@
<xsl:template match="/"> <xsl:template match="/">
<xsl:for-each select="//transcript"> <xsl:for-each select="//transcript">
<xsl:variable name="filename" select="."/> <xsl:variable name="filename" select="."/>
<xsl:result-document href="Transcript_{../@id}.xml" format="unicode.transcript.with.metadata"> <xsl:result-document href="{../@id}.xml" format="unicode.transcript.with.metadata">
<xsl:element name="TITLE"> <xsl:element name="TITLE">
<xsl:attribute name="id"> <xsl:attribute name="id">
<xsl:value-of select="../@id"/> <xsl:value-of select="../@id"/>

View file

@ -28,7 +28,8 @@
<xsl:template match="METADATA"> <xsl:template match="METADATA">
<xsl:param name="title.id" select="''"/> <xsl:param name="title.id" select="''"/>
<doc> <doc>
<field name="id"><xsl:value-of select="$title.id"/></field> <field name="id"><xsl:value-of select="$title.id"/></field> <!-- has 't' prefix -->
<field name="numericRef"><xsl:value-of select="substring($title.id,2)"/></field> <!-- strips prefix for searching -->
<field name="thdlType_s"><xsl:value-of select="$TITLE_TYPE"/></field> <field name="thdlType_s"><xsl:value-of select="$TITLE_TYPE"/></field>
<field name="belongsTo_idlist"><xsl:value-of select="belongsTo"/></field> <field name="belongsTo_idlist"><xsl:value-of select="belongsTo"/></field>
<field name="speechType_s"><xsl:value-of select="speechType"/></field> <field name="speechType_s"><xsl:value-of select="speechType"/></field>
@ -49,7 +50,8 @@
</doc> </doc>
<xsl:for-each select="video"> <xsl:for-each select="video">
<doc> <doc>
<field name="id"><xsl:value-of select="concat(@id)"/></field> <field name="id"><xsl:value-of select="@id"/></field> <!-- has 'v' prefix -->
<field name="numericRef"><xsl:value-of select="substring(@id,2)"/></field> <!-- strips prefix for searching -->
<field name="title_idref"><xsl:value-of select="$title.id"/></field> <field name="title_idref"><xsl:value-of select="$title.id"/></field>
<field name="thdlType_s"><xsl:value-of select="$VIDEO_TYPE"/></field> <field name="thdlType_s"><xsl:value-of select="$VIDEO_TYPE"/></field>
<field name="mediaType_s"><xsl:value-of select="mediaDescription"/></field> <field name="mediaType_s"><xsl:value-of select="mediaDescription"/></field>
@ -103,8 +105,10 @@
<xsl:template match="S"> <xsl:template match="S">
<xsl:param name="title.id" select="''"/> <xsl:param name="title.id" select="''"/>
<xsl:param name="belongs.to" select="''"/> <xsl:param name="belongs.to" select="''"/>
<xsl:variable name="s.id" select="concat($title.id, '_', @id)"/>
<doc> <doc>
<field name="id"><xsl:value-of select="concat($title.id, '_', @id)"/></field> <field name="id"><xsl:value-of select="$s.id"/></field> <!-- has 't' prefix -->
<field name="numericRef"><xsl:value-of select="substring($s.id,2)"/></field> <!-- strips prefix for searching -->
<field name="title_idref"><xsl:value-of select="$title.id"/></field> <field name="title_idref"><xsl:value-of select="$title.id"/></field>
<field name="thdlType_s"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field> <field name="thdlType_s"><xsl:value-of select="$TRANSCRIPT_FRAGMENT_TYPE"/></field>
<field name="belongsTo_idlist"><xsl:value-of select="$belongs.to"/></field> <field name="belongsTo_idlist"><xsl:value-of select="$belongs.to"/></field>

View file

@ -100,7 +100,7 @@
<!-- solr tasks --> <!-- solr tasks -->
<target name="solr-prepare-titles"> <target name="solr-prepare-titles">
<mkdir dir="${solr.titles}"/> <mkdir dir="${solr.titles}"/>
<xslt basedir="${transcripts}" includes="*.xml" destdir="${solr.titles}" extension=".xml" style="${styles}/solarizeTranscript.xsl"/> <xslt basedir="${transcripts}" includes="*.xml" destdir="${solr.titles}" extension=".xml" style="${styles}/solarizeTitles.xsl"/>
</target> </target>
<target name="solr-prepare-nontitles"> <target name="solr-prepare-nontitles">
@ -142,6 +142,10 @@
</java> </java>
</target> </target>
<target name="solr-prepare-for-copy-to-solr-webapp" depends="lucene-thdl-jar">
<!--<copy file="${styles}/solarizeConstantsForImport.xsl" todir="${copy.to.solr.webapp}"/>-->
</target>
<!-- insert whether or not media exists for segment --> <!-- insert whether or not media exists for segment -->
<!-- <java classname="net.sf.saxon.Transform" fork="yes"> <!-- <java classname="net.sf.saxon.Transform" fork="yes">
<arg value="-o"/> <arg value="-o"/>