<!DOCTYPE gs540_hw [ 
  <!ELEMENT gs540_hw (results, analysis, program)>
  <!ATTLIST gs540_hw assignment CDATA #REQUIRED>
  <!ATTLIST gs540_hw name CDATA #REQUIRED>
  <!ATTLIST gs540_hw email CDATA #REQUIRED>
  <!ELEMENT results (result|model)*>
  <!ELEMENT result ANY>
  <!ATTLIST result type (first_line|viterbi_iteration|gene_histogram) #REQUIRED>
  <!ATTLIST result file CDATA #IMPLIED>
  <!ATTLIST result iteration CDATA #IMPLIED>
  <!ELEMENT model ANY>
  <!ATTLIST model type (hmm) #REQUIRED>
  <!ELEMENT states ANY>
  <!ELEMENT initial_state_probabilities ANY>  
  <!ELEMENT transition_probabilities ANY>
  <!ATTLIST transition_probabilities state CDATA #REQUIRED>
  <!ELEMENT emission_probabilities ANY>
  <!ATTLIST emission_probabilities state CDATA #REQUIRED>
  <!ELEMENT analysis (annotation)+>
  <!ELEMENT annotation ANY>
  <!ATTLIST annotation gene_start CDATA #REQUIRED>
  <!ATTLIST annotation gene_end CDATA #REQUIRED>
  <!ELEMENT program (comments, file+)>
  <!ELEMENT comments ANY>
  <!ELEMENT file ANY>
  <!ATTLIST file file_name CDATA #REQUIRED>
]>

<gs540_hw assignment='7' name='student name' email='student email'>  
  <results> 
   <result type='first_line' file='filename'>  
      first line of the .fna file that you use for
      viterbi training. NOTE: please do not change the filename or 
      the first line in any way.
   </result>
   <result type='viterbi_iteration' iteration='some number'>
     <result type='gene_histogram'>
      The gene histogram should give the number of top strand
      and bottom strand genes found at this iteration, and
      should be reported as below.
      top strand genes=some number,bottom strand genes=some number
     </result>
     <model type='hmm'>

        The model object should specify an HMM by giving
        state labels, initial state probabilities (= transition probabilities from the begin state), state
        transition probabilities, and symbol emission
        probabilities.
       <states>
	  give your state labels, separated by commas:
          1,2
       </states>
   
       <initial_state_probabilities>
          initial state probabilities should give,
          for each state, the state label
          and probability of starting in that state (i.e. the 
          probability of transitioning into 
          that state from the begin state) separated by an equals
          sign.  Entries should be separated by commas.

          1=0.90000,2=0.10000

       </initial_state_probabilities>
       <transition_probabilities state='1'>
	   transition probabilities should give, for each
           state, the state label and probability
           of transitioning to that state from the 
           state indicated in the attributes list.
           The present field (with state='1') gives the probabilities
           of transitioning from state 1 to states 1 and 2:
           1=0.99000,2=0.01000
       </transition_probabilities>
       <transition_probabilities state='2'>

           1=0.20000,2=0.80000
       </transition_probabilities>
       <emission_probabilities state='1'>
          For each symbol emitted by the state 
          indicated in the attributes for this
          field, give the probability
          of emitting that symbol.
          TTT=.15000,TCT=.15000,TAT=.15000,TGT=.15000,
          ...
       </emission_probabilities>
       <emission_probabilities state='2'>
          TTT=.20000,TCT=.15000,TAT=.15000,TGT=.10000,
          ...
       </emission_probabilities>
     </model>
   </result>

  </results>

  <analysis>
    <annotation gene_start='1' gene_end='99'>  
       Gene name and short description (one sentence) as derived from Genbank or UCSC genome browser. 
       Example:
       GyrB: Encodes DNA topoisomerase IV subunit B
    </annotation>
  </analysis>
  <program>
    <comments> 
       put comments about your code here 
    </comments>
    <file file_name='myprog.c'>
      <![CDATA[
      file contents here 
      ]]>
    </file>
  </program>
</gs540_hw>




