added a few stylesheets for lucene-based indexing of THDL's avdb
This commit is contained in:
		
							parent
							
								
									3dd452a298
								
							
						
					
					
						commit
						f8a97cce4e
					
				
					 3 changed files with 138 additions and 3 deletions
				
			
		
							
								
								
									
										16
									
								
								archive/styles/get-list-of-transcripts.xsl
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								archive/styles/get-list-of-transcripts.xsl
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,16 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| 
 | ||||
| <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | ||||
| 	<xsl:output method="text" encoding="utf-8"/> | ||||
| 	 | ||||
| 	<xsl:param name="prefix" select="''"/> | ||||
| 	 | ||||
| 	<xsl:template match="/"> | ||||
| 		<xsl:apply-templates select="//transcript"/> | ||||
| 	</xsl:template> | ||||
| 	 | ||||
| 	<xsl:template match="transcript"> | ||||
| 		<xsl:value-of select="$prefix"/><xsl:value-of select="."/><xsl:text> | ||||
| </xsl:text> | ||||
| 	</xsl:template> | ||||
| </xsl:stylesheet> | ||||
							
								
								
									
										109
									
								
								archive/styles/qdToUnicode.xsl
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								archive/styles/qdToUnicode.xsl
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,109 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| 
 | ||||
| <xsl:stylesheet  | ||||
| 	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||||
|        xmlns:thdl="java:org.thdl.tib.text.ttt.EwtsToUnicodeForXslt" | ||||
|        exclude-result-prefixes="thdl" | ||||
| 	version="2.0"> | ||||
|          | ||||
|       <!--  <xsl:param name="mediaref"/> --> | ||||
|          | ||||
|         <xsl:template match="/"> | ||||
|                 <xsl:apply-templates/> | ||||
|         </xsl:template> | ||||
|    | ||||
|      <!--   <xsl:template match="SOUNDFILE"> | ||||
|                 <SOUNDFILE href="{$mediaref}"/> | ||||
|         </xsl:template> --> | ||||
|          | ||||
|         <xsl:template match="TEXT"> | ||||
|             <TEXT> | ||||
|                 <xsl:apply-templates/> | ||||
|             </TEXT> | ||||
|         </xsl:template> | ||||
|          | ||||
|         <xsl:template match="S"> | ||||
|           <xsl:element name="S"> | ||||
|             <xsl:apply-templates select="@*[not(. = 'id')]"/> | ||||
|             <xsl:attribute name="id"> | ||||
|                 <xsl:value-of select="generate-id(.)"/> | ||||
|             </xsl:attribute> | ||||
|             <xsl:apply-templates select="*"/> | ||||
|           </xsl:element>   | ||||
|         </xsl:template> | ||||
| 
 | ||||
|         <xsl:template match="FORM"> | ||||
|                 <xsl:variable name="wylie" select="string(.)"/> | ||||
|                 <xsl:variable name="converted" select="thdl:convertEwtsTo($wylie)"/> | ||||
|                 <FORM xml:lang="bo"> | ||||
|                         <xsl:for-each select="tokenize($converted, '[\[\]]')"> | ||||
|                                <xsl:choose> | ||||
|                                         <xsl:when test="starts-with(., '#ERROR')">࿐</xsl:when> | ||||
|                                          | ||||
|                                              <!--   <ERROR number="{substring(., 8, 3)}" offense="{substring-before(substring-after(., '{'), '}')}"> | ||||
|                                                         <MSG><xsl:value-of select="."/></MSG> | ||||
|                                                         <SRC><xsl:value-of select="$wylie"/></SRC> | ||||
|                                                 </ERROR> | ||||
|                                         </xsl:when>--> | ||||
|                                         <xsl:otherwise> | ||||
|                                                 <xsl:value-of select="."/> | ||||
|                                         </xsl:otherwise> | ||||
|                                 </xsl:choose> | ||||
|                         </xsl:for-each> | ||||
|                 </FORM> | ||||
|                 <FORM xml:lang="bo-Latn"> | ||||
|                     <xsl:value-of select="$wylie"/> | ||||
|                 </FORM> | ||||
|         </xsl:template> | ||||
|          | ||||
|         <xsl:template match="TRANSL"> | ||||
|             <TRANSL xml:lang="en"> | ||||
|                 <xsl:apply-templates/> | ||||
|             </TRANSL> | ||||
|         </xsl:template> | ||||
|          | ||||
|         <xsl:template match="TRANSL_ZH"> | ||||
|             <TRANSL xml:lang="zh"> | ||||
|                 <xsl:apply-templates/> | ||||
|             </TRANSL> | ||||
|         </xsl:template> | ||||
|          | ||||
|         <xsl:template match="SPEAKER"> | ||||
|                 <xsl:variable name="wylie" select="string(.)"/> | ||||
|                 <xsl:variable name="converted" select="thdl:convertEwtsTo($wylie)"/> | ||||
|                 <SPEAKER xml:lang="bo" personId="{@personId}" wylie="{$wylie}"> | ||||
|                         <xsl:for-each select="tokenize($converted, '[\[\]]')"> | ||||
|                                 <xsl:choose> | ||||
|                                         <xsl:when test="starts-with(., '#ERROR')">࿐</xsl:when> | ||||
|                                         <!-- | ||||
|                                                 <ERROR number="{substring(., 8, 3)}" offense="{substring-before(substring-after(., '{'), '}')}"> | ||||
|                                                         <MSG><xsl:value-of select="."/></MSG> | ||||
|                                                         <SRC><xsl:value-of select="$wylie"/></SRC> | ||||
|                                                 </ERROR> | ||||
|                                         </xsl:when> --> | ||||
|                                         <xsl:otherwise> | ||||
|                                                 <xsl:value-of select="."/> | ||||
|                                         </xsl:otherwise> | ||||
|                                 </xsl:choose> | ||||
|                         </xsl:for-each> | ||||
|                 </SPEAKER> | ||||
|         </xsl:template> | ||||
| 
 | ||||
|         <xsl:template match="node()|@*"> | ||||
|           <xsl:copy> | ||||
|             <xsl:apply-templates select="@*|node()"/> | ||||
|           </xsl:copy> | ||||
|         </xsl:template> | ||||
| 
 | ||||
|       <!--  <xsl:template match="*"> | ||||
|                 <xsl:element name="{name(.)}"> | ||||
|                     <xsl:for-each select="@*"> | ||||
|                             <xsl:attribute name="{name(.)}"> | ||||
|                                     <xsl:value-of select="."/> | ||||
|                             </xsl:attribute> | ||||
|                     </xsl:for-each> | ||||
|                     <xsl:apply-templates/> | ||||
|                 </xsl:element> | ||||
|         </xsl:template> --> | ||||
| 
 | ||||
| </xsl:stylesheet> | ||||
|  | @ -1,6 +1,6 @@ | |||
| <?xml version="1.0" encoding="utf-8"?> | ||||
| 
 | ||||
| <project name="thdl-concordancer" default="index-for-solr" basedir="."> | ||||
| <project name="lucene-thdl" default="lucene-thdl-jar" basedir="."> | ||||
| 
 | ||||
|     <import file="build.xml"/> | ||||
|      | ||||
|  | @ -9,6 +9,7 @@ | |||
|     <property name="wylie" location="${archive}/wylie"/> | ||||
|     <property name="unicode" location="${archive}/unicode"/> | ||||
|     <property name="solr" location="${archive}/solr"/> | ||||
|     <property name="styles" location="${archive}/styles"/> | ||||
|     <property name="get.title.metadata" value="http://thdl.org/avarch/mediaflowcat/title_metadata.php"/> | ||||
|     <property name="url.to.transcripts" value="http://www.thdl.org/avarch/transcripts"/> | ||||
|     <property name="url.to.media.high" value="http://www.thdl.org/media/high"/> | ||||
|  | @ -69,6 +70,12 @@ | |||
|         <get src="${get.title.metadata}" dest="${archive}/title_metadata.xml" verbose="on"/> | ||||
|     </target> | ||||
|      | ||||
|     <target name="archive-get-transcripts"> | ||||
|     	<xslt in="${archive}/title_metadata.xml" out="${archive}/titles_as_list.txt" style="${styles}/get-list-of-transcripts.xsl"> | ||||
| 		<param name="prefix" expression="${url.to.transcripts}/"/> | ||||
| 	</xslt> | ||||
|     </target> | ||||
|   | ||||
|     <!-- | ||||
|     <filelist  | ||||
|     id="docfiles"  | ||||
|  | @ -76,8 +83,11 @@ | |||
|     <file name="foo.xml"/> | ||||
|     <file name="bar.xml"/> | ||||
|     </filelist> | ||||
| --> | ||||
| 
 | ||||
| <filterreader classname="org.apache.tools.ant.filters.PrefixLines"> | ||||
|   <param name="prefix" value="Foo"/> | ||||
| </filterreader> | ||||
| --> | ||||
|     <!-- concordance program --> | ||||
| 	<target name="lucene-thdl-compile" depends="init"> | ||||
|         <mkdir dir="${lucene-thdl.bin}"/> | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue