????JFIF??x?x????'
Server IP : 79.136.114.73 / Your IP : 18.117.8.233 Web Server : Apache/2.4.7 (Ubuntu) PHP/5.5.9-1ubuntu4.29 OpenSSL/1.0.1f System : Linux b8009 3.13.0-170-generic #220-Ubuntu SMP Thu May 9 12:40:49 UTC 2019 x86_64 User : www-data ( 33) PHP Version : 5.5.9-1ubuntu4.29 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority, MySQL : ON | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /usr/bin/ |
Upload File : |
#!/usr/bin/perl $| =1; use strict; use warnings; use sigtrap; use Getopt::Std; use vars qw( $opt_r $opt_o $opt_x $opt_i $opt_e $opt_A $opt_D $opt_M $opt_S $opt_C $opt_I $opt_N $opt_a $opt_3 $opt_p $opt_5 $opt_t $opt_Z $opt_1 $opt_c ); getopts('roxi:e:A:D:M:S:C:I:N:a:3:p5:tZ:1c:'); my $ACCEPTOR = "0:30"; my $DONOR = "0:9"; my $START = "0:15"; my $STOP = "0:9"; my $CODING = "4"; my $INTRON = "4"; my $INTER = "4"; my $PROM; my $UTR5 = ""; my $UTR3 = ""; my $POLYA = ""; my $UTR5Length; # defined below; my $UTR3Length; # defined below my $InterLength = 500; my $EsnglLength = 1000; die " usage: hmm-assembler.pl <name> <directory of files from forge> options: -i <length> [$InterLength] -e <length> [$EsnglLength] -A <order:length> [$ACCEPTOR] -D <order:length> [$DONOR] -M <order:length> [$START] -S <order:length> [$STOP] -C <order> [$CODING] -I <order> [$INTRON] -N <order> [$INTER] -3 <order:length> [$UTR3] include 3'UTR model, requires -a -a <order:length> [$POLYA] include PolyA model, requires -3 -5 <order:length> [$UTR5] include 5'UTR moel, requires -p -p include generic promoter model, requires -5 -r include generic repeat model -o include reverse ORF model -x use explicit duration intron model -t include C.elegans trans-splicing, requires -p, -5 -Z <clade> sets clade-specific values (worm, fly, plant) -1 single gene model -c <score> include GC-AG splice donor model " unless @ARGV == 2; my ($NAME, $DIR) = @ARGV; my $REPEATS = $opt_r; my $REVERSE_ORF = $opt_o; my $EXPLICIT = $opt_x; my $TRANS_SPLICE = $opt_t; my $SPECIES = $opt_Z? $opt_Z : ""; my $SINGLE_GENE = $opt_1; my $GC_AG = $opt_c; $ACCEPTOR = $opt_A if $opt_A; $DONOR = $opt_D if $opt_D; $START = $opt_M if $opt_M; $STOP = $opt_S if $opt_S; $CODING = $opt_C if $opt_C; $INTRON = $opt_I if $opt_I; $INTER = $opt_N if $opt_N; $POLYA = $opt_a if $opt_a; $UTR3 = $opt_3 if $opt_3; $PROM = $opt_p; $UTR5 = $opt_5 if $opt_5; $InterLength = $opt_i if $opt_i; $EsnglLength = $opt_e if $opt_e; if (($POLYA and !$UTR3) or (!$POLYA and $UTR3)) { die "both -a and -3 must be specified"; } if ($POLYA and $UTR3) { my ($order, $length) = split(/:/, $UTR3); $UTR3 = $order; # reassign $UTR3Length = $length; # geometric distribution } if (($PROM and !$UTR5) or (!$PROM and $UTR5)) { die "both -p and -5 must be specified"; } if ($PROM and $UTR5) { my ($order, $length) = split(/:/, $UTR5); $UTR5 = $order; $UTR5Length = $length; } if ($TRANS_SPLICE) { die "both -p and -5 must be specified" unless $PROM and $UTR5; } #################### # Species override # #################### if ($SPECIES eq 'worm') { $ACCEPTOR = '0:15'; $REVERSE_ORF = 1; $GC_AG = -5; } elsif ($SPECIES eq 'plant') { $ACCEPTOR = '0:20'; } elsif ($SPECIES eq 'fly') { $ACCEPTOR = '0:30'; $REVERSE_ORF = 1; } elsif ($SPECIES =~ /\S/) { die "unrecognized clade ($SPECIES)"; } ##################### # Single gene model # ##################### if ($SINGLE_GENE) {die "single gene not supported yet"} ########## # States # ########## my $States = 6; my %State = ( Einit => {init => 0, term => 0, min => 3, max => -1, dur => 'explicit'}, Exon => {init => 0, term => 0, min => 6, max => -1, dur => 'explicit'}, Eterm => {init => 0, term => 0, min => 3, max => -1, dur => 'explicit'}, Esngl => {init => 0, term => 0, min => 150, max => -1, dur => 'explicit'}, Inter => {init => 0.9, term => 0.9, min => 0, max => 0, dur => 'geometric'}, Intron => {init => 0.1, term => 0.1, min => 0, max => 0, dur => 'geometric'}, ); if ($REPEATS) { $State{Repeat} = {init => 0, term => 0, min => 100, max => -1, dur => 'explicit'}; $States++; } if ($REVERSE_ORF) { $State{ORF} = {init => 0, term => 0, min => 100, max => -1, dur => 'explicit'}; $States++; } if ($POLYA) { $State{PolyA} = {init => 0, term => 0, min => 1, max => 1, dur => 'explicit'}; $State{UTR3} = {init => 0.1, term => 0.1, min => 0, max => 0, dur => 'geometric'}; $States += 2; } if ($PROM) { $State{Prom} = {init => 0, term => 0, min => 1, max => 1, dur => 'explicit'}; $State{UTR5} = {init => 0.1, term => 0.1, min => 0, max => 0, dur => 'geometric'}; $States += 2; } if ($TRANS_SPLICE) { $State{TSS} = {init => 0, term => 0, min => 1, max => 1, dur => 'explicit'}; $States += 1; } if ($EXPLICIT) { $State{Intron}{dur} = 'explicit'; $State{Intron}{min} = 1; $State{Intron}{max} = -1; } ############### # Transitions # ############### my $Transitions = 4; my %Transition = ( Einit => {Intron => 1}, Esngl => {Inter => 1}, Eterm => {Inter => 1}, Exon => {Intron => 1}, ); open(FILE, "$DIR/transitions"); while (<FILE>) { my ($s1, $s2, $prob) = split; $Transition{$s1}{$s2} = $prob; $Transitions++; } close FILE; # optional section if ($REPEATS) { $Transition{Intron}{Repeat} = 1; $Transition{Repeat}{Intron} = 1; $Transition{Inter}{Repeat} = 1; $Transition{Repeat}{Inter} = 1; $Transitions += 4; } if ($REVERSE_ORF) { $Transition{Intron}{ORF} = 1; $Transition{ORF}{Intron} = 1; $Transition{Inter}{ORF} = 1; $Transition{ORF}{Inter} = 1; $Transitions += 4; } if ($POLYA) { $Transition{Esngl} = {UTR3 => 1}; $Transition{Eterm} = {UTR3 => 1}; $Transition{UTR3} = {PolyA => 1}; $Transition{PolyA} = {Inter => 1}; $Transitions += 2; } if ($PROM) { $Transition{Inter}{Prom} = 1; $Transition{Prom}{UTR5} = 1; $Transition{UTR5}{Esngl} = $Transition{Inter}{Esngl}; $Transition{UTR5}{Einit} = $Transition{Inter}{Einit}; delete $Transition{Inter}{Esngl}; delete $Transition{Inter}{Einit}; $Transitions += 2; } if ($TRANS_SPLICE) { $Transition{Inter}{Prom} = 0.5; $Transition{Inter}{TSS} = 0.5; # what are the real figures? $Transitions++; } ############### # Phase prefs # ############### my $Phaseprefs = `cat $DIR/phaseprefs`; ############# # Durations # ############# my $Durations = 6; my %Duration = ( Einit => 1, Eterm => 1, Esngl => 1, Exon => 1, Intron => 1, Inter => 1, ); if ($REPEATS) { $Duration{Repeat} = 1; $Durations++; } if ($REVERSE_ORF) { $Duration{ORF} = 1; $Durations++; } if ($POLYA) { $Duration{PolyA} = 1; $Duration{UTR3} = 1; $Durations += 2; } if ($PROM) { $Duration{Prom} = 1; $Duration{UTR5} = 1; $Durations += 2; } if ($TRANS_SPLICE) { $Duration{TSS} = 1; $Durations++; } ########## # Models # ########## my $Models = 7; my %Model = ( Acceptor => $ACCEPTOR, Donor => $DONOR, Start => $START, Stop => $STOP, Coding => $CODING, Intron => $INTRON, Inter => $INTER, ); if ($REPEATS) { $Model{Repeat} = 'Repeat'; $Models++; } if ($POLYA) { $Model{PolyA} = $POLYA; $Model{UTR3} = $UTR3; $Models += 2; } if ($PROM) { $Model{Prom} = 'Prom'; $Model{UTR5} = $UTR5; $Models += 2; } if ($TRANS_SPLICE) { $Model{TSS} = 'TSS'; $Models++; } # no need for ORF models, they are the same as coding ############ # Assemble # ############ # header print "zoeHMM $NAME $States $Transitions $Durations $Models\n"; # states print "\n<STATES>\n\n"; foreach my $name (sort keys %State) { print join("\t", $name, $State{$name}{init}, $State{$name}{term}, $State{$name}{min}, $State{$name}{max}, $State{$name}{dur}), "\n"; } # transitions print "\n<STATE_TRANSITIONS>\n\n"; foreach my $s1 (sort keys %Transition) { foreach my $s2 (sort keys %{$Transition{$s1}}) { print join("\t", $s1, $s2, $Transition{$s1}{$s2}), "\n"; } } # phaseprefs print "\n<PHASE_PREFERENCES>\n\n"; print $Phaseprefs; # durations print "\n<STATE_DURATIONS>\n\n"; foreach my $name (sort keys %Duration) { if ($name eq 'Repeat') { print "Repeat 1\n\tCONSTANT 0 -1\n\t\t0\n"; } elsif ($name eq 'ORF') { my $hack = `cat $DIR/Exon-explicit.duration`; $hack =~ s/Exon/ORF/; print $hack; } elsif ($name eq 'PolyA') { print "PolyA 1\n\tCONSTANT 0 -1\n\t\t0\n"; # may want to change that } elsif ($name eq 'Prom') { print "Prom 1\n\tCONSTANT 0 -1\n\t\t0\n"; # may want to change this too } elsif ($name eq 'TSS') { print "TSS 1\n\tCONSTANT 0 -1\n\t\t0\n"; # here as well } elsif ($name eq 'Inter') { print "Inter 1\n\tGEOMETRIC 0 -1\n\t\t$InterLength\n"; } elsif ($name eq 'Esngl') { print "Esngl 1\n\tGEOMETRIC 0 -1\n\t\t$EsnglLength\n"; } elsif ($name eq 'UTR3') { print "UTR3 1\n\tGEOMETRIC 0 -1\n\t\t$UTR3Length\n"; } elsif ($name eq 'UTR5') { print "UTR5 1\n\tGEOMETRIC 0 -1\n\t\t$UTR5Length\n"; } else { my $file = "$name-$State{$name}{dur}.duration"; system("cat $DIR/$file"); } print "\n"; } # models my $TSS_MODEL = 'TSS SDT 2 1 4 2 0.000 AG WMM 15 11 4 0 0.000 0.440 -0.800 -1.561 0.790 0.452 -0.664 -1.565 0.734 0.546 -0.660 -1.787 0.694 0.681 -1.267 -1.862 0.771 0.743 -1.716 -1.906 0.811 0.175 -1.667 -2.024 1.208 -2.278 -2.834 -4.043 1.845 -5.146 -4.228 -6.441 1.966 -1.458 -0.723 -1.433 1.411 -2.982 1.737 -7.579 -0.906 1.999 -9.900 -9.900 -9.900 -9.900 -9.900 1.999 -9.900 0.684 -0.753 0.351 -0.929 0.220 -0.506 -0.606 0.560 0.235 -0.166 -0.481 0.281 NN TRM 0 0 0 0 0.000 '; # C. elegans weight matrix for acceptor site print "<SEQUENCE_MODELS>\n\n"; foreach my $name (sort keys %Model) { my ($order, $length); if ($Model{$name} eq 'Repeat') { print "Repeat LUT 1 0 5 0 0\n\t-1 -1 -1 -1 1\n\n"; next; } elsif ($Model{$name} eq 'Prom') { print "Prom WMM 2 0 4 0 0\n\t0 0 0 0\n\t0 0 0 0\n\n"; next; } elsif ($Model{$name} eq 'TSS') { print $TSS_MODEL, "\n"; next; } elsif ($Model{$name} =~ /:/) { ($order, $length) = split(/:/, $Model{$name}); } else { if ($Model{$name} =~ /^(\d+)\+/) { $order = $1; $length = ($order +1) . "+"; } else { $order = $Model{$name}; $length = $order +1; } } my $file = "$DIR/$name-$order-$length.model"; if ($name eq 'Donor' and $GC_AG) { print "Donor SDT 2 0 4 3 0.0\n"; my @gc; open(DN, "Donor-0-9.model") or die; while (<DN>) { my @f = split; push @gc, \@f; } close DN; shift @gc; pop @gc; # unnecessary bits $gc[0][0] = 'GC'; $gc[0][6] = $GC_AG; # assign score here ($gc[5][1], $gc[5][3]) = ($gc[5][3], $gc[5][1]); # swap T and C values print "\t@{$gc[0]}\n"; for (my $i = 1; $i < @gc; $i++) { print "\t\t", join("\t", @{$gc[$i]}), "\n"; } open(IN, $file) or die; my $head = <IN>; # throw-away while (<IN>) {print} } else { system("cat $file"); } print "\n"; } # C. elegans trans-splice site # using splice acceptor site weight matrix __END__ Copyright (C) 2003-2004 Ian Korf This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.