<!DOCTYPE gs540_hw [ 
  <!ELEMENT gs540_hw (results, analysis, program)>
  <!ATTLIST gs540_hw assignment CDATA #REQUIRED>
  <!ATTLIST gs540_hw name CDATA #REQUIRED>
  <!ATTLIST gs540_hw email CDATA #REQUIRED>
  <!ELEMENT results (result|model)*>
  <!ELEMENT result ANY>
  <!ATTLIST result type (viterbi_iteration|state_histogram|segment_histogram|longest_segment_list) #REQUIRED>
  <!ATTLIST result file CDATA #IMPLIED>
  <!ATTLIST result iteration CDATA #IMPLIED>
  <!ELEMENT model ANY>
  <!ATTLIST model type (hmm) #REQUIRED>
  <!ELEMENT states ANY>
  <!ELEMENT initial_state_probabilities ANY>  
  <!ELEMENT transition_probabilities ANY>
  <!ATTLIST transition_probabilities state CDATA #REQUIRED>
  <!ELEMENT emission_probabilities ANY>
  <!ATTLIST emission_probabilities state CDATA #REQUIRED>
  <!ELEMENT analysis (annotation)+>
  <!ELEMENT annotation ANY>
  <!ATTLIST annotation start CDATA #REQUIRED>
  <!ATTLIST annotation end CDATA #REQUIRED>
  <!ELEMENT program (comments, file+)>
  <!ELEMENT comments ANY>
  <!ELEMENT file ANY>
  <!ATTLIST file file_name CDATA #REQUIRED>
]>

<gs540_hw assignment='8' name='student name' email='student email'>  
  <results> 
   <result type='viterbi_iteration' iteration='1'>
    <result type='state_histogram'>

        State histograms should give, for each state, the state label
        (1 or 2), followed by an equals sign, followed by the number
        of positions in the sequence having that state in the Viterbi
        parse. Put a comma between the entries. For instance, if the
        sequence has length 8 and the Viterbi parse is 11222111 then
        your histogram should be:

        1=5,2=3
    </result>
    <result type='segment_histogram'>

        Segment histograms should give, for each state, the state
        label, followed by an equals sign, followed by the number of
        segments consisting of that state.  For instance, for the
        Viterbi parse above your histogram would be:

	1=2,2=1
    </result>
     <model type='hmm'>

        The model object should specify an HMM by giving state labels,
        initial state probabilities (= transition probabilities from
        the begin state), state transition probabilities, and symbol
        emission probabilities.
       <states>
          1,2
       </states>
   
       <initial_state_probabilities>
         1=0.95000,2=0.05000
       </initial_state_probabilities>
       <transition_probabilities state='1'>
          1=0.95000,2=0.05000
       </transition_probabilities>
       <transition_probabilities state='2'>
           1=0.05000,2=0.95000
       </transition_probabilities>
       <emission_probabilities state='1'>
          For each symbol emitted by the state 
          indicated in the attributes for this
          field, give the probability
          of emitting that symbol.
          AAA=.1000,AAC=.05000,AAT=.0.05000,AAG=.0.05000,AA-=0.0050,
          ...
       </emission_probabilities>
       <emission_probabilities state='2'>
          AAA=.1500,AAC=.01000,AAT=.0.01000,AAG=.0.01000,AA-=0.0020,
          ...
       </emission_probabilities>
     </model>
   </result>
<result type='longest_segment_list'>
  Report your 10 longest conserved (state 2) segments here. Remember
  to give the coordinates so relative to the start of the chromosome
  rather than the start of the alignment (which starts at position
  115,597,756 on chromosome 7)

  (116741000,116752000),
  (116745000,116756000),
  ...
</result>


  </results>

  <analysis>
    <annotation start='116741000' end='116752000'>  
       Give a short description for each of your 5 longest conserved segments.
       Example: Overlaps with exon3 of the protein coding gene cMyc
    </annotation>
  </analysis>
  <program>
    <comments> 
       put comments about your code here 
    </comments>
    <file file_name='myprog.c'>
      <![CDATA[
      File contents here. Don't forget to wrap the code in a CDATA element
      ]]>
    </file>
  </program>
</gs540_hw>




