Assemblytics_web-master/000077500000000000000000000000001304413665500156555ustar00rootroot00000000000000Assemblytics_web-master/.gitignore000066400000000000000000000001561304413665500176470ustar00rootroot00000000000000user_data/* !user_data/.htaccess user_uploads/* !user_uploads/.htaccess *.delta python_env reproducibility Assemblytics_web-master/LICENSE000066400000000000000000000020731304413665500166640ustar00rootroot00000000000000 The MIT License (MIT) Copyright (c) 2016 Maria Nattestad Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Assemblytics_web-master/README.md000066400000000000000000000013251304413665500171350ustar00rootroot00000000000000# Assemblytics (full web application) This repo contains the full Assemblytics web application. The web app is running on assemblytics.com, and the command-line version is available at https://github.com/marianattestad/assemblytics This repository exists so you can install it on your own servers or locally on your computer if need be. The code here includes the interactive dot plot (which you can't use from the command-line version). Dependencies: - R - ggplot - plyr - Python - argparse - numpy Notes for installation: - Make sure to open up permissions in user_uploads and user_data so the webserver can read and write there. - It does not contain the examples as some of these are huge files. Assemblytics_web-master/analysis.php000077500000000000000000000125361304413665500202230ustar00rootroot00000000000000


All plots are available as both .png and .pdf files through the Download button below and can be used in publications. Cite Assemblytics.

Assembly statistics

Variant summary statistics

Variant file preview

View analysis later

"; ?>

Progress

Checking progress...
Assemblytics_web-master/bin/000077500000000000000000000000001304413665500164255ustar00rootroot00000000000000Assemblytics_web-master/bin/Assemblytics_Nchart.R000066400000000000000000000072061304413665500225160ustar00rootroot00000000000000# Author: Maria Nattestad # Email: mnattest@cshl.edu # This script is part of Assemblytics, a program to detect and analyze structural variants from an assembly aligned to a reference genome using MUMmer. library(ggplot2) library(scales) args<-commandArgs(TRUE) prefix <- args[1] filename_ref <- paste(prefix, ".coords.ref.genome", sep="") filename_query <- paste(prefix, ".coords.query.genome", sep="") ref.data <- read.csv(filename_ref, sep="\t", quote='',header=FALSE) query.data <- read.csv(filename_query, sep="\t", quote='',header=FALSE) names(ref.data) <- c("name","length") names(query.data) <- c("name","length") ref.data$length <- as.numeric(ref.data$length) query.data$length <- as.numeric(query.data$length) genome.length <- max(sum(ref.data$length),sum(query.data$length)) ref.cumsum <- data.frame(NG=cumsum(ref.data$length/genome.length*100),contig.length=ref.data$length,contig.source="Reference") query.cumsum <- data.frame(NG=cumsum(query.data$length/genome.length*100),contig.length=query.data$length,contig.source="Query") both.plot <- rbind(ref.cumsum,query.cumsum) ref.cumsum.0 <- rbind(data.frame(NG=c(0),contig.length=max(ref.cumsum$contig.length),contig.source="Reference"),ref.cumsum) query.cumsum.0 <- rbind(data.frame(NG=c(0),contig.length=max(query.cumsum$contig.length),contig.source="Query"),query.cumsum) with.zeros <- rbind(ref.cumsum.0,query.cumsum.0) bp_format<-function(num) { if (num > 1000000000) { paste(formatC(num/1000000000,format="f",digits=3,big.mark=",",drop0trailing = TRUE)," Gbp",sep="") } else if (num > 1000000) { paste(formatC(num/1000000,format="f",digits=3,big.mark=",",drop0trailing = TRUE)," Mbp",sep="") } else { paste(formatC(num,format="f",big.mark=",",drop0trailing = TRUE), " bp", sep="") } } theme_set(theme_bw(base_size = 12) + theme(panel.grid.minor = element_line(colour = NA))) colors <- c("blue","limegreen") for (to_png in c(TRUE,FALSE)) { if (to_png) { png(file=paste(prefix,".Assemblytics.Nchart.png",sep=""),width=1000,height=1000,res=200) } else { pdf(paste(prefix,".Assemblytics.Nchart.pdf",sep="")) } if (nrow(with.zeros) > 2) { print( ggplot(with.zeros, aes(x = NG, y = contig.length, color=contig.source)) + xlim(0,100) + scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)), limits=c(1,genome.length)) + geom_path(size=1.5,alpha=0.5) + geom_point(data=both.plot,size=2,alpha=0.5) + labs(x = paste("NG(x)% where 100% = ",bp_format(genome.length), sep=""),y="Sequence length",colour="Assembly",title="Cumulative sequence length") + scale_color_manual(values=colors) + annotation_logticks(sides="lr") ) } else { # To make bacterial genomes at least show a dot instead of an error because # they only have 1 contig print( ggplot(both.plot, aes(x = NG, y = contig.length, color=contig.source)) + xlim(0,100) + scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)), limits=c(1,genome.length)) + # geom_path(size=1.5,alpha=0.5) + geom_point(size=4,alpha=0.5) + labs(x = paste("NG(x)% where 100% = ",bp_format(genome.length), sep=""),y="Sequence length",colour="Assembly",title="Cumulative sequence length") + scale_color_manual(values=colors) + annotation_logticks(sides="lr") ) } dev.off() } Assemblytics_web-master/bin/Assemblytics_between_alignments.pl000077500000000000000000000353071304413665500253710ustar00rootroot00000000000000#!/usr/bin/perl -w # Authors: Maria Nattestad and Mike Schatz # Email: mnattest@cshl.edu use strict; my @chromosome_filter_choices = ("all-chromosomes","primary-chromosomes"); my @longrange_filter_choices = ("include-longrange","exclude-longrange","longrange-only"); my @output_file_choices = ("bed","bedpe"); my $USAGE = "Usage:\nAssemblytics_between_alignments.pl coords.tab minimum_event_size maximum_event_size [@chromosome_filter_choices] [@longrange_filter_choices] [@output_file_choices] > fusions.svs.bedpe "; my $coordsfile = shift @ARGV or die $USAGE; my $minimum_event_size = int(shift @ARGV); my $maximum_event_size = int(shift @ARGV); my $chromosome_filter = shift @ARGV or die $USAGE; my $longrange_filter = shift @ARGV or die $USAGE; my $output_file = shift @ARGV or die $USAGE; # How close do alignments have to be in order to call deletions and insertions? (as opposed to contractions and expansions) my $narrow_threshold = 50; # Number of basepairs of distance in either the reference or the query before we call an SV long-range my $longrange = $maximum_event_size; # What is the longest two alignments can map apart in the query before we throw the variant between them away? my $max_query_dist = 100000; my %chromosome_filter_choices_hash = map { $_, 1 } @chromosome_filter_choices; my %longrange_filter_choices_hash = map { $_, 1 } @longrange_filter_choices; my %output_file_choices_hash = map { $_, 1 } @output_file_choices; if ( $chromosome_filter_choices_hash{ $chromosome_filter } && $longrange_filter_choices_hash{ $longrange_filter } && $output_file_choices_hash { $output_file }) { # All is well with the world } else { die $USAGE; } if ($longrange_filter ne "exclude-longrange" && $output_file eq "bed"){ die "Cannot output bed while allowing long-range variants\n$USAGE"; } # open COORDS, "./bin/show-coords -rclHT $deltafile |" # or die "Can't process $deltafile ($!)\n"; open COORDS, "$coordsfile" or die "Can't process $coordsfile ($!)\n"; ##open COORDS, "show-coords -rclHT $deltafile |" ## or die "Can't process $deltafile ($!)\n"; ## Require the flanking alignments are at least this long to call an SV ## Note there is no minimum length for fusions, this is determined by how ## the delta file was filtered my $MIN_SV_ALIGN = 100; #my $minimum_event_size = 50; my $approximately_zero = $narrow_threshold; my %alignments; my $numalignments = 0; while () { chomp; my @vals = split /\s+/, $_; my $rid = $vals[6]; my $qid = $vals[7]; my $a; $a->{"rstart"} = $vals[0]; $a->{"rend"} = $vals[1]; $a->{"qstart"} = $vals[2]; $a->{"qend"} = $vals[3]; $a->{"rlen"} = $vals[4]; $a->{"qlen"} = $vals[5]; $a->{"rid"} = $vals[6]; $a->{"qid"} = $vals[7]; $a->{"str"} = $_; $a->{"qidx"} = 0; $a->{"qrc"} = ($a->{"qend"} > $a->{"qstart"}) ? 0 : 1; push @{$alignments{$qid}->{$rid}}, $a; # a is a hash with all the info for one alignment $numalignments++; } print STDERR "Loaded $numalignments alignments\n"; my $candidatefusions = 0; my $candidatesvs = 0; my $sv_id_counter = 0; my %svstats; foreach my $qid (sort keys %alignments) # query name is the key for the alignments hash { my @refs = sort keys %{$alignments{$qid}}; # grab all alignments of that query my $numref = scalar @refs; ## scan for fusions # if ($numref > 1) # if query aligns to multiple chromosomes # { # my $allrefs = join " ", @refs; # join the names together for output # print "== $qid [$numref] $allrefs\n"; # output the names of the chromosomes # $candidatefusions++; # my $rcnt = 0; # foreach my $rid (@refs) # { # print "--\n" if ($rcnt > 0); # $rcnt++; # foreach my $a (@{$alignments{$qid}->{$rid}}) # { # my $str = $a->{"str"}; # print "$str\n"; # } # } # print "\n"; # } ## Resort the alignments by query sort position my @qaligns; foreach my $rid (@refs) { foreach my $a (@{$alignments{$qid}->{$rid}}) { push @qaligns, $a; } } ## Now record the index of the sorted query indices @qaligns = sort { $a->{"qstart"} <=> $b->{"qstart"}} @qaligns; for (my $i=0; $i < scalar @qaligns; $i++) { $qaligns[$i]->{"qidx"} = $i; } ## scan for SVs my $numalign = scalar @qaligns; if ($numalign > 1) # if the query has more than 1 alignment { ## note skip first one for (my $j = 1; $j < $numalign; $j++) { my $ai = $qaligns[$j-1]; my $aj = $qaligns[$j]; my $istr = $ai->{"str"}; my $jstr = $aj->{"str"}; # if ($ai->{"rid"} ne $aj->{"rid"}) # { # ## skip the fusions for now ############################################################################################# # next; # } my $rid = $ai->{"rid"}; if (($ai->{"rlen"} >= $MIN_SV_ALIGN) && ($aj->{"rlen"} >= $MIN_SV_ALIGN)) { ## r alignments are always forward, q alignments may be flipped my $rpos; my $qpos; my $rdist = 0; my $qdist = 0; my $svtype = 0; my $chromi = $ai->{"rid"}; my $chromj = $aj->{"rid"}; my $posi; my $posj; my $strandi; my $strandj; $sv_id_counter++; if (($ai->{"qrc"} == 0) && ($aj->{"qrc"} == 0)) { ## ri: [1 - 1000] | j: [2000 - 3000] => 1000 ## qi: [1 - 1000] | j: [2000 - 3000] => 1000 $svtype = "FF"; $qdist = $aj->{"qstart"} - $ai->{"qend"}; $rdist = $aj->{"rstart"} - $ai->{"rend"}; if ($rdist >= 0) { $rpos = sprintf("%s:%d-%d:+", $rid, $ai->{"rend"}, $aj->{"rstart"}); } else { $rpos = sprintf("%s:%d-%d:-", $rid, $aj->{"rstart"}, $ai->{"rend"}); } if ($qdist >= 0) { $qpos = sprintf("%s:%d-%d:+", $qid, $ai->{"qend"}, $aj->{"qstart"}); } else { $qpos = sprintf("%s:%d-%d:-", $qid, $aj->{"qstart"}, $ai->{"qend"}); } # When the alignments are forward-forward, the connection point is at the end of the first (i: rend) and at the beginning of the second (j: rstart) # i + - j # ------> --------> $posi = $ai->{"rend"}; $posj = $aj->{"rstart"}; $strandi = "+"; $strandj = "-"; } elsif (($ai->{"qrc"} == 1) && ($aj->{"qrc"} == 1)) { ## ri: [2000 - 3000] | j: [1 - 1000] => 1000 ## qi: [1000 - 1] | j: [3000 - 2000] => 1000 $svtype = "RR"; $rdist = $ai->{"rstart"} - $aj->{"rend"}; $qdist = $aj->{"qend"} - $ai->{"qstart"}; if ($rdist >= 0) { $rpos = sprintf("%s:%d-%d:+", $rid, $aj->{"rend"}, $ai->{"rstart"}); } else { $rpos = sprintf("%s:%d-%d:-", $rid, $ai->{"rstart"}, $aj->{"rend"}); } if ($qdist >= 0) { $qpos = sprintf("%s:%d-%d:+", $qid, $ai->{"qstart"}, $aj->{"qend"}); } else { $qpos = sprintf("%s:%d-%d:-", $qid, $aj->{"qend"}, $ai->{"qstart"}); } # When the alignments are reverse-reverse, the connection point is at the beginning of the first (i: rstart) and at the end of the second (j: rend) # j + - i # <------- <-------- $posi = $ai->{"rstart"}; # rstart means first reference coordinate, not with respect to the contig $posj = $aj->{"rend"}; # rend means last reference coordinate, not with respect to the contig $strandi = "-"; $strandj = "+"; } elsif (($ai->{"qrc"} == 0) && ($aj->{"qrc"} == 1)) { ## ri: [1 - 1000] | j: [2000 - 3000] => 1000 ## qi: [1 - 1000] | j: [3000 - 2000] => 1000 $svtype = "FR"; $qdist = $aj->{"qend"} - $ai->{"qend"}; $rdist = $aj->{"rstart"} - $ai->{"rend"}; if ($rdist >= 0) { $rpos = sprintf("%s:%d-%d:+", $rid, $ai->{"rend"}, $aj->{"rstart"}); } else { $rpos = sprintf("%s:%d-%d:-", $rid, $aj->{"rstart"}, $ai->{"rend"}); } if ($qdist >= 0) { $qpos = sprintf("%s:%d-%d:+", $qid, $ai->{"qend"}, $aj->{"qend"}); } else { $qpos = sprintf("%s:%d-%d:-", $qid, $aj->{"qend"}, $ai->{"qend"}); } # When the alignments are forward-reverse, the connection point is at the beginning of the first (i: rstart) and at the end of the second (j: rend) # i + j + # -------> <-------- $posi = $ai->{"rend"}; $posj = $aj->{"rend"}; $strandi = "+"; $strandj = "+"; } elsif (($ai->{"qrc"} == 1) && ($aj->{"qrc"} == 0)) { ## ri: [1 - 1000] | j: [2000 - 3000] => 1000 ## qi: [1000 - 1] | j: [2000 - 3000] => 1000 $svtype = "RF"; $qdist = $ai->{"qend"} - $aj->{"qend"}; $rdist = $aj->{"rstart"} - $ai->{"rend"}; if ($rdist >= 0) { $rpos = sprintf("%s:%d-%d:+", $rid, $ai->{"rend"}, $aj->{"rstart"}); } else { $rpos = sprintf("%s:%d-%d:-", $rid, $aj->{"rstart"}, $ai->{"rend"}); } if ($qdist >= 0) { $qpos = sprintf("%s:%d-%d:+", $qid, $aj->{"qend"}, $ai->{"qend"}); } else { $qpos = sprintf("%s:%d-%d:-", $qid, $ai->{"qend"}, $aj->{"qend"}); } # When the alignments are reverse-forward: # - i - j # <------- --------> $posi = $ai->{"rstart"}; $posj = $aj->{"rstart"}; $strandi = "-"; $strandj = "-"; } else { my $irc = $ai->{"qrc"}; my $jrc = $aj->{"qrc"}; print "ERROR: Unknown SV: $irc $jrc\n"; print "$istr\n"; print "$jstr\n"; die "ERROR: Unknown SV: $irc $jrc\n"; } my $totaldist = $rdist + $qdist; my $typeguess = ""; my $abs_event_size = abs($rdist-$qdist); if ($chromi ne $chromj) { # interchromosomal $typeguess = "Interchromosomal"; $rdist = 0; } else { # same chromosome if ($strandi eq $strandj) { $typeguess = "Inversion"; $abs_event_size = $rdist; } elsif ($qdist > $rdist) { # both are significantly negative: (means the size of an overlapping region got larger, so tandem element expansion) if ($rdist > -1*$approximately_zero && $rdist < $approximately_zero && $qdist > -1*$approximately_zero) { $typeguess = "Insertion"; # split into out of nowhere (rdist ~ 0) vs. rdist is > 0: insertion_in_unmapped_region } else { if ($rdist < 0 || $qdist < 0) { $typeguess = "Tandem_expansion"; } else { $typeguess = "Repeat_expansion"; } } } elsif ($qdist < $rdist) { # both are significantly negative: (means the size of an overlapping region got smaller, so tandem element contraction) if ($rdist > -1*$approximately_zero && $qdist > -1*$approximately_zero && $qdist < $approximately_zero) { $typeguess = "Deletion"; # split into out of nowhere (rdist ~ 0) vs. rdist is > 0: deletion_in_unmapped_region } else { if ($rdist < 0 || $qdist < 0) { $typeguess = "Tandem_contraction"; } else { $typeguess = "Repeat_contraction"; } } } else { $typeguess = "None"; } if ($abs_event_size > $longrange) { # || abs($rdist) > $longrange || abs($qdist) > $longrange $typeguess = "Longrange"; if (abs($qdist) > $max_query_dist) { $typeguess = "None"; } } # my $ratio; # if ($qdist != 0){ # # $ratio = abs(($rdist/$qdist)-1); # # if ($ratio < 0.1) { # # $typeguess = "Equilibrium"; # # } # if ($rdist==$qdist || abs($qdist) > $longrange) { # $typeguess = "None"; # } # } } # my @chromosome_filter_choices = ("all-chromosomes","primary-chromosomes"); # my @longrange_filter_choices = ("include-longrange","exclude-longrange"); my $chromi_length = length $chromi; # length of the chromosome names: a way to filter to primary chromosomes and cut out alts and patches from the assembly my $chromj_length = length $chromj; if ($typeguess ne "Inversion" && $typeguess ne "None" && $abs_event_size >= $minimum_event_size) { # always required if ($chromosome_filter eq "all-chromosomes" || ($chromi_length < 6 && $chromj_length < 6)) { # test for primary chromosomes unless "all-chromosomes" is chosen if ($longrange_filter ne "exclude-longrange" || ($typeguess ne "Interchromosomal" && $typeguess ne "Longrange")) { if ($longrange_filter ne "longrange-only" || ($typeguess eq "Interchromosomal" || $typeguess eq "Longrange")) { if ($output_file eq "bedpe") { print "$chromi\t$posi\t@{[$posi + 1]}\t$chromj\t$posj\t@{[$posj + 1]}\tAssemblytics_b_$sv_id_counter\t$abs_event_size\t$strandi\t$strandj\t$typeguess\t$rdist\t$qdist\t$qpos\t$abs_event_size\t$svtype\tbetween_alignments\n"; } else { use List::Util qw(min max); my $ref_start = min(($posi, $posj)); my $ref_stop = max(($posi, $posj)); if ($ref_stop eq $ref_start) { $ref_stop = $ref_start + 1; } # "chrom","start","stop","name","event.size","strand","event.type","ref.dist","query.dist","contig.name" print "$chromi\t$ref_start\t$ref_stop\tAssemblytics_b_$sv_id_counter\t$abs_event_size\t+\t$typeguess\t$rdist\t$qdist\t$qpos\tbetween_alignments\n"; } } } } #if ($filter_type ~~ ("primary-allsizes","primary-shortrange") { # && $typeguess ne "Interchromosomal" && $typeguess ne "Inversion" && $chromi_length < 6 && $chromj_length < 6 && $abs_event_size >= $minimum_event_size) { } $candidatesvs++; #push @{$svstats{$svtype}}, $totaldist; } } } } # print "Processed $numalignments alignments found $candidatefusions fusions and $candidatesvs SVs\n"; # print STDERR "Processed $numalignments alignments found $candidatefusions fusions and $candidatesvs SVs\n"; # foreach my $svtype (keys %svstats) # { # my @events = @{$svstats{$svtype}}; # my $cnt = scalar @events; # my $sum = 0.0; # foreach my $e (@events) # { # $sum += $e; # } # my $mean = sprintf ("%0.02f", $sum/$cnt); # print "svtype[$svtype]: $cnt $mean\n"; # print STDERR "svtype[$svtype]: $cnt $mean\n"; # } Assemblytics_web-master/bin/Assemblytics_dotplot.R000066400000000000000000000122431304413665500227610ustar00rootroot00000000000000library(ggplot2) args<-commandArgs(TRUE) prefix <- args[1] filename <- paste(prefix,".oriented_coords.csv",sep="") plot.output.filename <- paste(prefix,".Assemblytics.Dotplot_filtered",sep="") plot.title <- "Dot plot of Assemblytics filtered alignments" ref.pos <- function(chrom,pos,chr.lengths) { chrom.index <- which(names(chr.lengths)==chrom)-1 offset.based.on.previous.chromosomes <- 0 if (chrom.index != 0) { offset.based.on.previous.chromosomes <- sum(as.numeric(chr.lengths[c(1:chrom.index)])) } loc <- offset.based.on.previous.chromosomes + pos loc } coords <- read.csv(filename,sep=",",header=TRUE) if (nrow(coords)>100000) { coords <- coords[1:100000,] } # names(coords) <- c("ref_start", "ref_end","query_start","query_end","ref_length","query_length","ref","query") coords$ref <- as.character(coords$ref) coords$query <- as.character(coords$query) ordered_common_chromosome_names <- c(seq(1,100),paste("chr",seq(1,100),sep=""),paste("Chr",seq(1,100),sep=""),c("X","Y","M","MT","Chr0","chr0","0")) all_chromosomes_some_ordered <- c(intersect(ordered_common_chromosome_names,unique(coords$ref)),setdiff(unique(coords$ref),ordered_common_chromosome_names)) coords$ref <- factor(coords$ref,levels=all_chromosomes_some_ordered) chromosomes <- levels(coords$ref) chr.lengths <- sapply(chromosomes,function(chr){max(coords[coords$ref==chr,"ref_length"])}) names(chr.lengths) <- chromosomes coords <- cbind(coords, alignment.length=abs(coords$query_start-coords$query_end)) coords <- cbind(coords, ref.loc.start=mapply(FUN=ref.pos,coords$ref,coords$ref_start,MoreArgs=list(chr.lengths)), ref.loc.stop=mapply(FUN=ref.pos,coords$ref,coords$ref_end,MoreArgs=list(chr.lengths))) # pick longest alignment. then pick the ref.loc.start of that query.group <- split(coords,factor(coords$query)) ref.loc.of.longest.alignment.by.query <- unlist(sapply(query.group, function(coords.for.each.query) {coords.for.each.query$ref.loc.start[coords.for.each.query$alignment.length==max(coords.for.each.query$alignment.length)][1]}),recursive=FALSE) # decide optimal-ish ordering of the queries ordered.query.names <- names(ref.loc.of.longest.alignment.by.query)[order(ref.loc.of.longest.alignment.by.query)] # construct a query.lengths list query.lengths <- sapply(ordered.query.names,function(each.query){ max(coords[coords$query==each.query,"query_length"]) }) # use the query.lengths to give offset positions to each query, adding a query.loc.start column and a query.loc.stop column to each entry in filtered.coords coords$query.loc.start <- mapply(FUN=ref.pos,coords$query,coords$query_start,MoreArgs=list(query.lengths)) coords$query.loc.stop <- mapply(FUN=ref.pos,coords$query,coords$query_end,MoreArgs=list(query.lengths)) # Hide labels for chromosomes accounting for less than 2% of the reference chr.labels <- names(chr.lengths) chr.labels[chr.lengths < 0.02*sum(as.numeric(chr.lengths))] <- "" query.labels <- names(query.lengths) query.labels[query.lengths < 0.02*sum(as.numeric(query.lengths))] <- "" theme_set(theme_bw(base_size = 24)) colors <- c("black","red") coords$tag <- factor(coords$tag,levels=c("unique","repetitive")) # CREATE PNG png(file=paste(plot.output.filename, ".png",sep=""),width=1000,height=1000) print(ggplot(coords, aes(x=ref.loc.start,xend=ref.loc.stop,y=query.loc.start,yend=query.loc.stop,color=tag)) + geom_segment(lineend="butt",size=1.5) + labs(x="Reference",y="Query",title=plot.title) + scale_y_continuous(breaks = cumsum(as.numeric(query.lengths)),labels=query.labels,expand=c(0,0), limits = c(0,sum(as.numeric(query.lengths)))) + scale_x_continuous(breaks = cumsum(as.numeric(chr.lengths)),labels=chr.labels,expand=c(0,0),limits=c(0,sum(as.numeric(chr.lengths)))) + scale_color_manual(values=colors,name="Filter") + theme( axis.ticks.y=element_line(size=0), axis.text.x = element_text(angle = 90, hjust = 1,vjust=-0.5), axis.text.y = element_text(size=12,vjust=1.1), plot.title = element_text(vjust=3), panel.grid.major.x = element_line(colour = "black",size=0.1), panel.grid.major.y = element_line(colour = "black",size=0.1), panel.grid.minor = element_line(NA) )) dev.off() # CREATE PDF # pdf(file=paste(plot.output.filename, ".pdf",sep=""),width=100,height=100,res=100) # print(ggplot(coords, aes(x=ref.loc.start,xend=ref.loc.stop,y=query.loc.start,yend=query.loc.stop)) + geom_segment(lineend="butt",size=1.5) + labs(x="Reference",y="Query",title=plot.title) + scale_y_continuous(breaks = cumsum(as.numeric(query.lengths)),labels=query.labels,expand=c(0,0), limits = c(0,sum(as.numeric(query.lengths)))) + scale_color_manual(values=colors) + scale_x_continuous(breaks = cumsum(as.numeric(chr.lengths)),labels=chr.labels,expand=c(0,0),limits=c(0,sum(as.numeric(chr.lengths)))) + theme( # axis.ticks.y=element_line(size=0), # axis.text.x = element_text(angle = 90, hjust = 1,vjust=-0.5), # axis.text.y = element_text(size=12,vjust=1.1), # plot.title = element_text(vjust=3), # panel.grid.major.x = element_line(colour = "black",size=0.1), # panel.grid.major.y = element_line(colour = "black",size=0.1), # panel.grid.minor = element_line(NA) # )) # # dev.off() Assemblytics_web-master/bin/Assemblytics_index.py000077500000000000000000000154731304413665500226450ustar00rootroot00000000000000#! /usr/bin/env python # Author: Maria Nattestad # Email: mnattest@cshl.edu # This script is part of Assemblytics, a program to detect and analyze structural variants from an assembly aligned to a reference genome using MUMmer. import argparse import numpy as np import re import operator def run(args): coords = args.coords output_prefix = args.out f = open(coords) f.readline() # ignore header fields_by_query = {} existing_query_names = set() existing_reference_names = set() reference_lengths = [] query_lengths = {} for line in f: fields = line.strip().split(",") query_name = fields[7] query_lengths[query_name] = int(fields[5]) if not query_name in existing_query_names: fields_by_query[query_name] = [] existing_query_names.add(query_name) fields_by_query[query_name].append(fields) ref_name = fields[6] ref_length = int(fields[4]) if not ref_name in existing_reference_names: existing_reference_names.add(ref_name) reference_lengths.append((ref_name,ref_length)) f.close() # Find the order of the reference chromosomes reference_lengths.sort(key=lambda x: natural_key(x[0])) # Find the cumulative sums cumulative_sum = 0 ref_chrom_offsets = {} queries_by_reference = {} for ref,ref_length in reference_lengths: ref_chrom_offsets[ref] = cumulative_sum cumulative_sum += ref_length queries_by_reference[ref] = set() # Calculate relative positions of each alignment in this cumulative length, and take the median of these for each query, then sort the queries by those scores flip_by_query = {} references_by_query = {} # for index relative_ref_position_by_query = [] # for ordering for query_name in fields_by_query: lines = fields_by_query[query_name] sum_forward = 0 sum_reverse = 0 amount_of_reference = {} ref_position_scores = [] references_by_query[query_name] = set() for ref,ref_length in reference_lengths: amount_of_reference[ref] = 0 for fields in lines: tag = fields[8] if tag == "unique": query_stop = int(fields[3]) query_start = int(fields[2]) ref_start = int(fields[0]) ref_stop = int(fields[1]) alignment_length = abs(int(fields[3])-int(fields[2])) ref = fields[6] # for index: references_by_query[query_name].add(ref) queries_by_reference[ref].add(query_name) # amount_of_reference[ref] += alignment_length # for ordering: ref_position_scores.append(ref_chrom_offsets[ref] + (ref_start+ref_stop)/2) # for orientation: if query_stop < query_start: sum_reverse += alignment_length else: sum_forward += alignment_length # orientation: flip_by_query[query_name] = sum_reverse > sum_forward # for ref in amount_of_reference: # if amount_of_reference[ref] > 0: # references_by_query[query_name].add(ref) # queries_by_reference[ref].add(query_name) # ordering if len(ref_position_scores) > 0: relative_ref_position_by_query.append((query_name,np.median(ref_position_scores))) else: relative_ref_position_by_query.append((query_name,0)) relative_ref_position_by_query.sort(key=lambda x: x[1]) fout_ref_index = open(output_prefix + ".ref.index",'w') fout_ref_index.write("ref,ref_length,matching_queries\n") # reference_lengths is sorted by the reference chromosome name for ref,ref_length in reference_lengths: fout_ref_index.write("%s,%d,%s\n" % (ref,ref_length,"~".join(queries_by_reference[ref]))) fout_ref_index.close() fout_query_index = open(output_prefix + ".query.index",'w') fout_query_index.write("query,query_length,matching_refs\n") # relative_ref_position_by_query is sorted by rel_pos for query,rel_pos in relative_ref_position_by_query: fout_query_index.write("%s,%d,%s\n" % (query,query_lengths[query],"~".join(references_by_query[query]))) fout_query_index.close() f = open(coords) fout = open(output_prefix + ".oriented_coords.csv",'w') header = f.readline().strip() fout.write(header+",alignment_length\n") # copy the header alignment_length_column = len(header.split(",")) # sorted_by_alignment_length = [] uniques = [] repetitives = [] for line in f: fields = line.strip().split(",") query_name = fields[7] if flip_by_query[query_name] == True: fields[2] = int(fields[5]) - int(fields[2]) fields[3] = int(fields[5]) - int(fields[3]) alignment_length = abs(int(fields[2])-int(fields[1])) fields.append(alignment_length) if fields[8] == "unique": uniques.append(fields) else: repetitives.append(fields) f.close() uniques.sort(key=lambda x: x[alignment_length_column],reverse=True) repetitives.sort(key=lambda x: x[alignment_length_column],reverse=True) fout_info = open(output_prefix + ".info.csv",'w') fout_info.write("key,value\n") fout_info.write("unique alignments,%d\n" % len(uniques)) fout_info.write("repetitive alignments,%d\n" % len(repetitives)) for fields in uniques: fout.write(",".join(map(str,fields)) + "\n") if len(repetitives) < 100000: for fields in repetitives: fout.write(",".join(map(str,fields)) + "\n") fout_info.write("showing repetitive alignments,True\n") else: fout_repeats = open(output_prefix + ".oriented_coords.repetitive.csv",'w') fout_repeats.write(header+",alignment_length\n") # copy the header for fields in repetitives: fout_repeats.write(",".join(map(str,fields)) + "\n") fout_repeats.close() fout_info.write("showing repetitive alignments,False: Too many\n") fout.close() fout_info.close() def natural_key(string_): """See http://www.codinghorror.com/blog/archives/001018.html""" return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)] def main(): parser=argparse.ArgumentParser(description="Index and orient a coordinate file for dotplots.") parser.add_argument("-coords",help="coords.csv file from Assemblytics_uniq_anchor.py" ,dest="coords", type=str, required=True) parser.add_argument("-out",help="output prefix for indices and oriented coordinates file" ,dest="out", type=str, required=True) parser.set_defaults(func=run) args=parser.parse_args() args.func(args) if __name__=="__main__": main()Assemblytics_web-master/bin/Assemblytics_summary.py000077500000000000000000000125531304413665500232270ustar00rootroot00000000000000#!/usr/bin/env python # Author: Maria Nattestad # Email: mnattest@cshl.edu # This script is part of Assemblytics, a program to detect and analyze structural variants from an assembly aligned to a reference genome using MUMmer. import argparse import numpy as np def SVtable(args): filename = args.file minimum_variant_size = args.minimum_variant_size maximum_variant_size = args.maximum_variant_size simplify_types = False f=open(filename) typeList = [] sizeList = [] rawTypes = [] linecounter = 0 for line in f: fields = line.strip().split() if not fields[4].isdigit(): continue svType = fields[6] rawTypes.append(svType) if simplify_types == True: if svType == "Insertion" or svType == "Expansion": typeList.append("Insertion/Expansion") elif svType == "Deletion" or svType == "Contraction": typeList.append("Deletion/Contraction") else: typeList.append(svType) else: typeList.append(svType) sizeList.append(int(fields[4])) linecounter += 1 f.close() size_thresholds = [10,50,500,10000,50000,100000,500000,1000000] sizeArray = np.array(sizeList) typeArray = np.array(typeList) svTypes = ["Insertion","Deletion","Tandem_expansion","Tandem_contraction","Repeat_expansion","Repeat_contraction"] if simplify_types == True: svTypes = ["Insertion/Expansion","Deletion/Contraction"] overall_total = 0 overall_total_bases = 0 overall_total_SVs = 0 overall_total_SV_bases = 0 SV_size = 50 all_SV_types = svTypes + list(set(rawTypes)-set(svTypes)) f_output_csv = open(filename[0:-4]+".summary.csv",'w') if linecounter > 0: for svType in all_SV_types: sizes = sizeArray[typeArray==svType] overall_total += len(sizes) overall_total_bases += sum(sizes) overall_total_SVs += len(sizes[sizes>=SV_size]) overall_total_SV_bases += sum(sizes[sizes>=SV_size]) print svType f_output_csv.write(svType + "\n") format = "%20s%10s%15s" print format % ("", "Count","Total bp") f_output_csv.write("Size range,Count,Total bp\n") previous_size = minimum_variant_size for threshold in size_thresholds: if threshold <= minimum_variant_size or previous_size >= maximum_variant_size: continue subset = sizes[np.logical_and(sizes>=previous_size,sizes=previous_size]; print format % ("> %s bp: " % (intWithCommas(previous_size)), str(len(subset)), str(sum(subset))) f_output_csv.write("%s,%s,%s\n" % ("> %s bp" % (previous_size), str(len(subset)), str(sum(subset)))) print format % ("Total: ",str(len(sizes)),str(sum(sizes))) + "\n" f_output_csv.write("%s,%s,%s\n\n" % ("Total",str(len(sizes)),str(sum(sizes)))) else: print "No variants found. Plots depicting variant size distributions will also be missing.\n" print "Total number of all variants: %s" % (intWithCommas(overall_total)) f_output_csv.write("Total for all variants,%s,%s bp\n" % (overall_total,int(overall_total_bases))) print "Total bases affected by all variants: %s" % (gig_meg(int(overall_total_bases))) print "Total number of structural variants: %s" % (intWithCommas(overall_total_SVs)) f_output_csv.write("Total for all structural variants,%s,%s bp\n" % (overall_total_SVs,int(overall_total_SV_bases)) ) print "Total bases affected by structural variants: %s" % (gig_meg(int(overall_total_SV_bases))) f_output_csv.close() def gig_meg(number,digits = 2): gig = 1000000000. meg = 1000000. kil = 1000. if number > gig: return str(round(number/gig,digits)) + " Gbp" elif number > meg: return str(round(number/meg,digits)) + " Mbp" elif number > kil: return str(round(number/kil,digits)) + " Kbp" else: return str(number) + " bp" def intWithCommas(x): if type(x) not in [type(0), type(0L)]: raise TypeError("Parameter must be an integer.") if x < 0: return '-' + intWithCommas(-x) result = '' while x >= 1000: x, r = divmod(x, 1000) result = ",%03d%s" % (r, result) return "%d%s" % (x, result) def main(): parser=argparse.ArgumentParser(description='Output a summary table of variants from Assemblytics',formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-i',help='bed file of variants from Assemblytics',dest='file',type=str,required=True) parser.add_argument('-min',help='minimum variant size',dest='minimum_variant_size',type=int,required=True) parser.add_argument('-max',help='maximum variant size',dest='maximum_variant_size',type=int,required=True) args=parser.parse_args() SVtable(args) if __name__=="__main__": main() Assemblytics_web-master/bin/Assemblytics_uniq_anchor.py000077500000000000000000000407771304413665500240510ustar00rootroot00000000000000#! /usr/bin/env python # Author: Maria Nattestad # Email: mnattest@cshl.edu # This script is part of Assemblytics, a program to detect and analyze structural variants from an assembly aligned to a reference genome using MUMmer. import argparse import gzip # from intervaltree import * import time import numpy as np import operator def run(args): filename = args.delta unique_length = args.unique_length output_filename = args.out keep_small_uniques = args.keep_small_uniques if keep_small_uniques: print "Keeping fully unique alignments even if they are below the unique anchor length of", unique_length, "bp" else: print "Discarding all alignments below the unique anchor length of", unique_length, "bp" print "Use --keep-small-uniques to keep all the fully unique alignments even below this length" if unique_length == 10000: print "Use --unique-length X to set the unique anchor length requirement. Default is 10000, such that each alignment must have at least 10000 bp from the query that are not included in any other alignments." print "header:" f = open(filename) header1 = f.readline() if header1[0:2]=="\x1f\x8b": f.close() f = gzip.open(filename) print f.readline().strip() else: print header1.strip() # Ignore the first two lines for now print f.readline().strip() linecounter = 0 current_query_name = "" current_header = "" lines_by_query = {} header_lines_by_query = {} before = time.time() last = before existing_query_names = set() for line in f: if line[0]==">": fields = line.strip().split() current_query_name = fields[1] current_header = line.strip() if current_query_name not in existing_query_names: lines_by_query[current_query_name] = [] header_lines_by_query[current_query_name] = [] existing_query_names.add(current_query_name) else: fields = line.strip().split() if len(fields) > 4: # sometimes start and end are the other way around, but for this they need to be in order query_min = min([int(fields[2]),int(fields[3])]) query_max = max([int(fields[2]),int(fields[3])]) ########## TESTING ONLY ########### # lines_by_query[current_query_name] = (query_min,query_max) # test_list = test_list + [(query_min,query_max)] ##################################### lines_by_query[current_query_name].append((query_min,query_max)) header_lines_by_query[current_query_name].append(current_header) # linecounter += 1 # if linecounter % 10000000 == 0: # print "%d,%f" % (linecounter, time.time()-last) # last = time.time() f.close() print "First read through the file: %d seconds for %d query-reference combinations" % (time.time()-before,linecounter) before = time.time() alignments_to_keep = {} num_queries = len(lines_by_query) print "Filtering alignments of %d queries" % (num_queries) num_query_step_to_report = num_queries/100 if num_queries < 100: num_query_step_to_report = num_queries/10 if num_queries < 10: num_query_step_to_report = 1 query_counter = 0 for query in lines_by_query: ################ TESTING #################### # results_intervaltree = summarize_intervaltree(lines_by_query[query], unique_length_required = unique_length) # intervaltree_filtered_out = set(range(0,len(lines_by_query[query]))) - set(results_intervaltree) # results_planesweep = summarize_planesweep(lines_by_query[query], unique_length_required = unique_length) # planesweep_filtered_out = set(range(0,len(lines_by_query[query]))) - set(results_planesweep) # if intervaltree_filtered_out == planesweep_filtered_out : # num_matches += 1 # else: # num_mismatches += 1 # print "MISMATCH:" # print "number of alignments:", len(lines_by_query[query]) # print "results_intervaltree:" # print results_intervaltree # for i in results_intervaltree: # print lines_by_query[query][i] # print "results_planesweep:" # print results_planesweep # for i in results_planesweep: # print lines_by_query[query][i] ################ TESTING #################### alignments_to_keep[query] = summarize_planesweep(lines_by_query[query], unique_length_required = unique_length,keep_small_uniques=keep_small_uniques) query_counter += 1 if (query_counter % num_query_step_to_report) == 0: print "Progress: %d%%" % (query_counter*100/num_queries) print "Progress: 100%" print "Deciding which alignments to keep: %d seconds for %d queries" % (time.time()-before,num_queries) before = time.time() fout = gzip.open(output_filename + ".Assemblytics.unique_length_filtered_l%d.delta.gz" % (unique_length),'w') f = open(filename) header1 = f.readline() if header1[0:2]=="\x1f\x8b": f.close() f = gzip.open(filename) header1 = f.readline() fout.write(header1) # write the first line that we read already fout.write(f.readline()) linecounter = 0 # For filtered delta file: list_of_alignments_to_keep = [] alignment_counter = {} keep_printing = False # For coords: current_query_name = "" current_query_position = 0 fcoords_out_tab = open(output_filename + ".coords.tab",'w') fcoords_out_csv = open(output_filename + ".coords.csv",'w') fcoords_out_csv.write("ref_start,ref_end,query_start,query_end,ref_length,query_length,ref,query,tag\n") # For basic assembly stats: ref_sequences = set() query_sequences = set() ref_lengths = [] query_lengths = [] f_stats_out = open(output_filename + ".Assemblytics_assembly_stats.txt","w") for line in f: linecounter += 1 if line[0]==">": fields = line.strip().split() # For delta file output: query = fields[1] list_of_alignments_to_keep = alignments_to_keep[query] header_needed = False for index in list_of_alignments_to_keep: if line.strip() == header_lines_by_query[query][index]: header_needed = True if header_needed == True: fout.write(line) # if we have any alignments under this header, print the header alignment_counter[query] = alignment_counter.get(query,0) # For coords: current_reference_name = fields[0][1:] current_query_name = fields[1] current_reference_size = int(fields[2]) current_query_size = int(fields[3]) # For basic assembly stats: if not current_reference_name in ref_sequences: ref_lengths.append(current_reference_size) ref_sequences.add(current_reference_name) if not current_query_name in query_sequences: query_lengths.append(current_query_size) query_sequences.add(current_query_name) else: fields = line.strip().split() if len(fields) > 4: # For coords: ref_start = int(fields[0]) ref_end = int(fields[1]) query_start = int(fields[2]) query_end = int(fields[3]) csv_tag = "repetitive" if alignment_counter[query] in list_of_alignments_to_keep: fout.write(line) fcoords_out_tab.write("\t".join(map(str,[ref_start,ref_end,query_start, query_end,current_reference_size,current_query_size,current_reference_name,current_query_name])) + "\n") csv_tag = "unique" keep_printing = True else: keep_printing = False fcoords_out_csv.write(",".join(map(str,[ref_start,ref_end,query_start, query_end,current_reference_size,current_query_size,current_reference_name.replace(",","_"),current_query_name.replace(",","_"),csv_tag])) + "\n") alignment_counter[query] = alignment_counter[query] + 1 elif keep_printing == True: fout.write(line) fcoords_out_tab.close() fcoords_out_csv.close() print "Reading file and recording all the entries we decided to keep: %d seconds for %d total lines in file" % (time.time()-before,linecounter) ref_lengths.sort() query_lengths.sort() # Assembly statistics ref_lengths = np.array(ref_lengths) query_lengths = np.array(query_lengths) f_stats_out.write("Reference: %s\n" % (header1.split()[0].split("/")[-1])) f_stats_out.write( "Number of sequences: %s\n" % intWithCommas(len(ref_lengths))) f_stats_out.write( "Total sequence length: %s\n" % gig_meg(sum(ref_lengths))) f_stats_out.write( "Mean: %s\n" % gig_meg(np.mean(ref_lengths))) f_stats_out.write( "Min: %s\n" % gig_meg(np.min(ref_lengths))) f_stats_out.write( "Max: %s\n" % gig_meg(np.max(ref_lengths))) f_stats_out.write( "N50: %s\n" % gig_meg(N50(ref_lengths))) f_stats_out.write( "\n\n") f_stats_out.write( "Query: %s\n" % header1.split()[1].split("/")[-1]) f_stats_out.write( "Number of sequences: %s\n" % intWithCommas(len(query_lengths))) f_stats_out.write( "Total sequence length: %s\n" % gig_meg(sum(query_lengths))) f_stats_out.write( "Mean: %s\n" % gig_meg(np.mean(query_lengths))) f_stats_out.write( "Min: %s\n" % gig_meg(np.min(query_lengths))) f_stats_out.write( "Max: %s\n" % gig_meg(np.max(query_lengths))) f_stats_out.write( "N50: %s\n" % gig_meg(N50(query_lengths))) f.close() fout.close() f_stats_out.close() def N50(sorted_list): # List should be sorted as increasing # We flip the list around here so we start with the largest element cumsum = 0 for length in sorted_list[::-1]: cumsum += length if cumsum >= sum(sorted_list)/2: return length def gig_meg(number,digits = 2): gig = 1000000000. meg = 1000000. kil = 1000. if number > gig: return str(round(number/gig,digits)) + " Gbp" elif number > meg: return str(round(number/meg,digits)) + " Mbp" elif number > kil: return str(round(number/kil,digits)) + " Kbp" else: return str(number) + " bp" def intWithCommas(x): if type(x) not in [type(0), type(0L)]: raise TypeError("Parameter must be an integer.") if x < 0: return '-' + intWithCommas(-x) result = '' while x >= 1000: x, r = divmod(x, 1000) result = ",%03d%s" % (r, result) return "%d%s" % (x, result) def summarize_planesweep(lines,unique_length_required, keep_small_uniques=False): alignments_to_keep = [] # print len(lines) # If no alignments: if len(lines)==0: return [] # If only one alignment: if len(lines) == 1: if keep_small_uniques == True or abs(lines[0][1] - lines[0][0]) >= unique_length_required: return [0] else: return [] starts_and_stops = [] for query_min,query_max in lines: # print query_min, query_max starts_and_stops.append((query_min,"start")) starts_and_stops.append((query_max,"stop")) sorted_starts_and_stops = sorted(starts_and_stops,key=operator.itemgetter(0)) # print sorted_starts_and_stops current_coverage = 0 last_position = -1 # sorted_unique_intervals = [] sorted_unique_intervals_left = [] sorted_unique_intervals_right = [] for pos,change in sorted_starts_and_stops: # print sorted_starts_and_stops[i] # pos = sorted_starts_and_stops[i][0] # change = sorted_starts_and_stops[i][1] # print pos,change # First alignment only: # if last_position == -1: # last_position = pos # continue # print last_position,pos,current_coverage if current_coverage == 1: # sorted_unique_intervals.append((last_position,pos)) sorted_unique_intervals_left.append(last_position) sorted_unique_intervals_right.append(pos) if change == "start": current_coverage += 1 else: current_coverage -= 1 last_position = pos linecounter = 0 for query_min,query_max in lines: i = binary_search(query_min,sorted_unique_intervals_left,0,len(sorted_unique_intervals_left)) exact_match = False if sorted_unique_intervals_left[i] == query_min and sorted_unique_intervals_right[i] == query_max: exact_match = True sum_uniq = 0 while i < len(sorted_unique_intervals_left) and sorted_unique_intervals_left[i] >= query_min and sorted_unique_intervals_right[i] <= query_max: sum_uniq += sorted_unique_intervals_right[i] - sorted_unique_intervals_left[i] i += 1 # print query_min,query_max,sum_uniq if sum_uniq >= unique_length_required: alignments_to_keep.append(linecounter) elif keep_small_uniques == True and exact_match == True: alignments_to_keep.append(linecounter) # print "Keeping small alignment:", query_min, query_max # print sorted_unique_intervals_left[i-1],sorted_unique_intervals_right[i-1] linecounter += 1 return alignments_to_keep def binary_search(query, numbers, left, right): # Returns index of the matching element or the first element to the right if left >= right: return right mid = (right+left)/2 if query == numbers[mid]: return mid elif query < numbers[mid]: return binary_search(query,numbers,left,mid) else: # if query > numbers[mid]: return binary_search(query,numbers,mid+1,right) # def summarize_intervaltree(lines, unique_length_required): # alignments_to_keep = [] # # print len(lines) # if len(lines)==0: # return alignments_to_keep # if len(lines) == 1: # if abs(lines[0][1] - lines[0][0]) >= unique_length_required: # return [0] # starts_and_stops = [] # for query_min,query_max in lines: # starts_and_stops.append((query_min,query_max)) # # build full tree # tree = IntervalTree.from_tuples(starts_and_stops) # # for each interval (keeping the same order as the lines in the input file) # line_counter = 0 # for query_min,query_max in lines: # # create a tree object from the current interval # this_interval = IntervalTree.from_tuples([(query_min,query_max)]) # # create a copy of the tree without this one interval # rest_of_tree = tree - this_interval # # find difference between this interval and the rest of the tree by subtracting out the other intervals one by one # for other_interval in rest_of_tree: # this_interval.chop(other_interval.begin, other_interval.end) # # loop through to count the total number of unique basepairs # total_unique_length = 0 # for sub_interval in this_interval: # total_unique_length += sub_interval.end - sub_interval.begin # # if the total unique length is above our threshold, add the index to the list we are reporting # if total_unique_length >= unique_length_required: # alignments_to_keep.append(line_counter) # line_counter += 1 # return alignments_to_keep def main(): parser=argparse.ArgumentParser(description="Filters alignments in delta file based whether each alignment has a unique sequence anchoring it") parser.add_argument("--delta",help="delta file" ,dest="delta", type=str, required=True) parser.add_argument("--out",help="output file" ,dest="out", type=str, required=True) parser.add_argument("--unique-length",help="The total length of unique sequence an alignment must have on the query side to be retained. Default: 10000" ,dest="unique_length",type=int, default=10000) parser.add_argument("--keep-small-uniques",help="Keep small aligments (below the unique anchor length) if they are completely unique without any part of the alignment mapping multiple places" ,dest="keep_small_uniques",action="store_true") parser.set_defaults(func=run) args=parser.parse_args() args.func(args) if __name__=="__main__": main() Assemblytics_web-master/bin/Assemblytics_variant_charts.R000066400000000000000000000151421304413665500243050ustar00rootroot00000000000000library(ggplot2) library(plyr) args<-commandArgs(TRUE) output_prefix <- args[1] abs_min_var <- as.numeric(args[2]) abs_max_var <- as.numeric(args[3]) filename <- paste(output_prefix,".Assemblytics_structural_variants.bed",sep="") bed <- read.csv(filename, sep="\t", quote='', header=TRUE) names(bed)[1:11] <- c("chrom","start","stop","name","size","strand","type","ref.dist","query.dist","contig_position","method.found") # bed$type <- factor(bed$type, levels = c("Insertion","Deletion","Repeat_expansion","Repeat_contraction","Tandem_expansion","Tandem_contraction")) bed$type <- revalue(bed$type, c("Repeat_expansion"="Repeat expansion", "Repeat_contraction"="Repeat contraction", "Tandem_expansion"="Tandem expansion", "Tandem_contraction"="Tandem contraction")) types.allowed <- c("Insertion","Deletion","Repeat expansion","Repeat contraction","Tandem expansion","Tandem contraction") bed$type <- factor(bed$type, levels = types.allowed) theme_set(theme_bw(base_size = 12)) library(RColorBrewer) # display.brewer.all() color_palette_name <- "Set1" big_palette<-brewer.pal(9,"Set1")[c(1,2,3,4,5,7)] # Nature-style formatting for publication using commas (e.g.: 7,654,321) comma_format<-function(num) { formatC(abs(num),format="f",big.mark=",",drop0trailing = TRUE) } ############### FOR LOG PLOT ############### alt <- bed alt[alt$type=="Deletion",]$size <- -1*alt[alt$type=="Deletion",]$size alt[alt$type=="Repeat contraction",]$size <- -1*alt[alt$type=="Repeat contraction",]$size alt[alt$type=="Tandem contraction",]$size <- -1*alt[alt$type=="Tandem contraction",]$size alt$Type <- "None" if (nrow(alt[alt$type %in% c("Insertion","Deletion"),]) > 0) { alt[alt$type %in% c("Insertion","Deletion"),]$Type <- "Indel" } if (nrow(alt[alt$type %in% c("Tandem expansion","Tandem contraction"),]) > 0) { alt[alt$type %in% c("Tandem expansion","Tandem contraction"),]$Type <- "Tandem" } if (nrow(alt[alt$type %in% c("Repeat expansion","Repeat contraction"),]) > 0) { alt[alt$type %in% c("Repeat expansion","Repeat contraction"),]$Type <- "Repeat" } ############################################# ####### Run plotting with various size ranges and with either all variants or only indels ###### var_size_cutoffs <- c(abs_min_var,10,50,500,abs_max_var) var_size_cutoffs <- var_size_cutoffs[var_size_cutoffs>=abs_min_var & var_size_cutoffs<=abs_max_var] for (to_png in c(TRUE,FALSE)) { indels_only = FALSE # for (indels_only in c(TRUE,FALSE)) { var_type_filename <- "all_variants" if (indels_only) { var_type_filename <- "indels" } for (i in seq(1,length(var_size_cutoffs)-1)) { min_var <- var_size_cutoffs[i] max_var <- var_size_cutoffs[i+1] if (min_var < abs_max_var && max_var > abs_min_var) { types_to_plot = types.allowed if (indels_only) { types_to_plot <- c("Insertion","Deletion") } filtered_bed <- bed[bed$size>=min_var & bed$size<=max_var & bed$type %in% types_to_plot,] filtered_bed$type <- factor(filtered_bed$type,levels=types_to_plot) binwidth <- max_var/100 if (binwidth < 1) { binwidth <- 1 } if (nrow(filtered_bed)>0) { if (to_png) { png(paste(output_prefix,".Assemblytics.size_distributions.", var_type_filename, ".", min_var, "-",max_var, ".png", sep=""),1000,1000,res=200) } else { pdf(paste(output_prefix,".Assemblytics.size_distributions.", var_type_filename, ".", min_var, "-",max_var, ".pdf", sep="")) } print(ggplot(filtered_bed,aes(x=size, fill=type)) + geom_histogram(binwidth=binwidth) + scale_fill_manual(values=big_palette,drop=FALSE) + facet_grid(type ~ .,drop=FALSE) + labs(fill="Variant type",x="Variant size",y="Count",title=paste("Variants",comma_format(min_var),"to", comma_format(max_var),"bp")) + scale_x_continuous(labels=comma_format,expand=c(0,0),limits=c(min_var-1,max_var)) + scale_y_continuous(labels=comma_format,expand=c(0,0)) + theme( strip.text=element_blank(),strip.background=element_blank(), plot.title = element_text(vjust=3), axis.text=element_text(size=8), panel.grid.minor = element_line(colour = NA), panel.grid.major = element_line(colour = NA) ) ) dev.off() } else { print("No variants in plot:") print(paste("min_var=",min_var)) print(paste("max_var=",max_var)) } } } # LOG PLOT: if (to_png) { png(paste(output_prefix,".Assemblytics.size_distributions.", var_type_filename, ".log_all_sizes.png", sep=""),width=2000,height=1000,res=200) } else { pdf(paste(output_prefix,".Assemblytics.size_distributions.", var_type_filename, ".log_all_sizes.pdf", sep="")) } print(ggplot(alt,aes(x=size, fill=type,y=..count..+1)) + geom_histogram(binwidth=abs_max_var/100, position="identity",alpha=0.7) + scale_fill_manual(values=big_palette,drop=FALSE) + facet_grid(Type ~ .,drop=FALSE) + labs(fill="Variant type",x="Variant size",y="Log(count + 1)",title=paste("Variants",comma_format(abs_min_var),"to", comma_format(abs_max_var),"bp")) + scale_x_continuous(labels=comma_format,expand=c(0,0),limits=c(-1*abs_max_var,abs_max_var)) + # scale_y_continuous(labels=comma_format,expand=c(0,0)) + scale_y_log10(labels=comma_format,expand=c(0,0)) + annotation_logticks(sides="l") + theme( strip.text=element_blank(),strip.background=element_blank(), plot.title = element_text(vjust=3), axis.text=element_text(size=8), panel.grid.minor = element_line(colour = NA), panel.grid.major = element_line(colour = NA) ) ) dev.off() # } } ############################################################################## Assemblytics_web-master/bin/Assemblytics_within_alignment.py000077500000000000000000000117361304413665500250740ustar00rootroot00000000000000#! /usr/bin/env python import argparse import gzip # Author: Maria Nattestad # Email: mnattest@cshl.edu # This script is part of Assemblytics, a program to detect and analyze structural variants from an assembly aligned to a reference genome using MUMmer. def run(args): filename = args.delta minimum_variant_size = args.minimum_variant_size f = open(filename) header1 = f.readline() if header1[0:2]=="\x1f\x8b": f.close() f = gzip.open(filename) header1 = f.readline() # Ignore the first two lines for now f.readline() linecounter = 0 current_reference_name = "" current_reference_position = 0 current_query_name = "" current_query_position = 0 variants = [] for line in f: if line[0]==">": # linecounter += 1 # if linecounter > 1: # break fields = line.strip().split() current_reference_name = fields[0][1:] current_query_name = fields[1] else: fields = line.strip().split() if len(fields) > 4: # current_reference_position = int(fields[0]) current_reference_position = min(int(fields[0]),int(fields[1])) # fields[1] is the reference position at the end of the alignment # current_query_position = int(fields[2]) current_query_position = min(int(fields[2]),int(fields[3])) # fields[3] is the query position at the end of the alignment else: tick = int(fields[0]) if abs(tick) == 1: # then go back and edit the last entry to add 1 more to its size report = variants[-1] report[4] = report[4] + 1 # size if tick > 0: # deletion, moves in reference report[2] = report[2] + 1 # reference end position report[7] = report[7] + 1 # reference gap size current_reference_position += 1 # update reference position after deletion elif tick < 0: # insertion, moves in query report[8] = report[8] + 1 # query gap size report[12] = report[12] + 1 # query end position current_query_position += 1 # update query position after insertion else: # report the last one and continue current_reference_position += abs(tick) - 1 current_query_position += abs(tick) - 1 if tick > 0: size = 1 # report = "%s\t%d\t%d\tAssemblytics_%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (current_reference_name,current_reference_position,current_reference_position+size,len(variants)+1,size,"+","Deletion",size,0,current_query_name,"within_alignment") report = [current_reference_name,current_reference_position,current_reference_position+size,"Assemblytics_w_"+str(len(variants)+1),size,"+","Deletion",size,0,current_query_name,"within_alignment",current_query_position,current_query_position] current_reference_position += size # update reference position after deletion variants.append(report) elif tick < 0: size = 1 # report = "%s\t%d\t%d\tAssemblytics_%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (current_reference_name,current_reference_position,current_reference_position,len(variants)+1,size,"+","Insertion",0,size,current_query_name,"within_alignment") report = [current_reference_name,current_reference_position,current_reference_position,"Assemblytics_w_"+str(len(variants)+1),size,"+","Insertion",0,size,current_query_name,"within_alignment",current_query_position,current_query_position+size] current_query_position += size # update query position after insertion variants.append(report) # TESTING # print line, report f.close() newcounter = 1 for line in variants: # report = "%s\t%d\t%d\tAssemblytics_%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % line if line[4] >= minimum_variant_size: line[3] = "Assemblytics_w_%d" % (newcounter) print "\t".join(map(str,line[0:10])) + ":" + str(line[11]) + "-" + str(line[12]) + ":+\t" + line[10] # print "\t".join(map(str,line)) newcounter += 1 def main(): parser=argparse.ArgumentParser(description="Outputs MUMmer coordinates annotated with length of unique sequence for each alignment") parser.add_argument("--delta",help="delta file" ,dest="delta", type=str, required=True) parser.add_argument("--min",help="Minimum size (bp) of variant to include, default = 50" ,dest="minimum_variant_size",type=int, default=50) parser.set_defaults(func=run) args=parser.parse_args() args.func(args) if __name__=="__main__": main() Assemblytics_web-master/bin/web_pipeline000077500000000000000000000121171304413665500210170ustar00rootroot00000000000000#!/bin/bash USAGE="web_pipeline delta output_prefix unique_length_required min_size" if [ -z "$1" ] then echo "ERROR in Assemblytics: No delta file given" echo "Usage:" echo $USAGE exit fi if [ -z "$2" ] then echo "ERROR in Assemblytics: No output prefix given" echo "Usage:" echo $USAGE exit fi if [ -z "$3" ] then echo "ERROR in Assemblytics: No unique length requirement parameter given" echo "Usage:" echo $USAGE exit fi if [ -z "$4" ] then echo "ERROR in Assemblytics: No minimum size parameter given" echo "Usage:" echo $USAGE exit fi if [ -z "$5" ] then echo "ERROR in Assemblytics: No maximum size parameter given" echo "Usage:" echo $USAGE exit fi # Author: Maria Nattestad # Email: mnattest@cshl.edu DELTA=${1?"$USAGE"} OUTPUT_PREFIX=${2?"$USAGE"} UNIQUE_LENGTH=${3?"$USAGE"} MINIMUM_SIZE=${4?"$USAGE"} MAXIMUM_SIZE=${5?"$USAGE"} >&2 echo Input delta file: $DELTA >&2 echo Output prefix: $OUTPUT_PREFIX LOG_FILE=${OUTPUT_PREFIX%/*}/progress.log echo "${OUTPUT_PREFIX##*/}" >> $LOG_FILE echo "STARTING,DONE,Starting unique anchor filtering." >> $LOG_FILE >&2 echo "1. Filter delta file" ./bin/Assemblytics_uniq_anchor.py --delta $DELTA --unique-length $UNIQUE_LENGTH --out $OUTPUT_PREFIX --keep-small-uniques if [ -e $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz ]; then echo "UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments." >> $LOG_FILE >&2 echo "2. Finding variants between alignments" ./bin/Assemblytics_between_alignments.pl $OUTPUT_PREFIX.coords.tab $MINIMUM_SIZE $MAXIMUM_SIZE all-chromosomes exclude-longrange bed > $OUTPUT_PREFIX.variants_between_alignments.bed if [ -e $OUTPUT_PREFIX.variants_between_alignments.bed ]; then echo "BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments." >> $LOG_FILE >&2 echo "3. Finding variants within alignments" ./bin/Assemblytics_within_alignment.py --delta $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz --min $MINIMUM_SIZE > $OUTPUT_PREFIX.variants_within_alignments.bed if [ -e $OUTPUT_PREFIX.variants_within_alignments.bed ]; then echo "WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together." >> $LOG_FILE >&2 echo "4. Combine variants between and within alignments"; HEADER="reference\tref_start\tref_stop\tID\tsize\tstrand\ttype\tref_gap_size\tquery_gap_size\tquery_coordinates\tmethod" cat <(echo -e $HEADER) $OUTPUT_PREFIX.variants_within_alignments.bed $OUTPUT_PREFIX.variants_between_alignments.bed > $OUTPUT_PREFIX.Assemblytics_structural_variants.bed if [ -e $OUTPUT_PREFIX.Assemblytics_structural_variants.bed ]; then echo "COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics." >> $LOG_FILE Rscript ./bin/Assemblytics_variant_charts.R $OUTPUT_PREFIX $MINIMUM_SIZE $MAXIMUM_SIZE ./bin/Assemblytics_index.py -coords $OUTPUT_PREFIX.coords.csv -out $OUTPUT_PREFIX # Rscript ./bin/Assemblytics_dotplot.R $OUTPUT_PREFIX cat $OUTPUT_PREFIX.coords.tab | awk '{print $7,$5}' OFS='\t' | sort | uniq | sort -k2,2nr > $OUTPUT_PREFIX.coords.ref.genome cat $OUTPUT_PREFIX.coords.tab | awk '{print $8,$6}' OFS='\t' | sort | uniq | sort -k2,2nr > $OUTPUT_PREFIX.coords.query.genome Rscript ./bin/Assemblytics_Nchart.R $OUTPUT_PREFIX ./bin/Assemblytics_summary.py -i $OUTPUT_PREFIX.Assemblytics_structural_variants.bed -min $MINIMUM_SIZE -max $MAXIMUM_SIZE > $OUTPUT_PREFIX.Assemblytics_structural_variants.summary zip $OUTPUT_PREFIX.Assemblytics_results.zip $OUTPUT_PREFIX.Assemblytics* head $OUTPUT_PREFIX.Assemblytics_structural_variants.bed | column -t > $OUTPUT_PREFIX.variant_preview.txt if grep -q "Total" $OUTPUT_PREFIX.Assemblytics_structural_variants.summary; then echo "SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully" >> $LOG_FILE else echo "SUMMARY,FAIL,Step 5: Assemblytics_summary.py failed" >> $LOG_FILE fi else echo "COMBINE,FAIL,Step 4: combining variants failed" >> $LOG_FILE fi else echo "WITHIN,FAIL,Step 3: Assemblytics_within_alignment.py failed: Possible problem before this step or with Python on server." >> $LOG_FILE fi else echo "BETWEEN,FAIL,Step 2: Assemblytics_between_alignments.pl failed: Possible problem with Perl or show-coords on server." >> $LOG_FILE fi else echo "UNIQFILTER,FAIL,Step 1: Assemblytics_uniq_anchor.py failed: Possible problem with Python or Python packages on server." >> $LOG_FILE fi Assemblytics_web-master/check_progress.php000077500000000000000000000005371304413665500213770ustar00rootroot00000000000000 Assemblytics_web-master/cite.php000066400000000000000000000020261304413665500173120ustar00rootroot00000000000000

How to cite Assemblytics

Please cite Assemblytics at Bioinformatics: http://www.ncbi.nlm.nih.gov/pubmed/27318204


The code is open-source at https://github.com/MariaNattestad/assemblytics (command-line version)

https://github.com/MariaNattestad/Assemblytics_web (full web application)

Assemblytics_web-master/contact.php000077500000000000000000000015631304413665500200310ustar00rootroot00000000000000

Contact information

Science and web application by Maria Nattestad

Principal investigator / The Boss: Mike Schatz


For questions, email: mnattest@cshl.edu

Assemblytics_web-master/css/000077500000000000000000000000001304413665500164455ustar00rootroot00000000000000Assemblytics_web-master/css/custom_styles.css000066400000000000000000000063221304413665500220770ustar00rootroot00000000000000body { padding-top: 70px; padding-bottom: 30px; } /* Title jumbotron */ #header_bar{ margin-top:-20px; padding-top:5px; padding-bottom:5px; background-color:#3094FF; } #header_bar>h1 { color:black; font-size:3.5em; } #header_bar>h3 { color:black; font-size:1.5em; } /* View this analysis later url*/ #codepanel { word-break:break-all; } #submit_button { /*float:right;*/ /*margin: 20px 40px 40px 40px;*/ margin: 2% 4% 4% 4%; } /* Dropzone text */ .dz-message { font-size:2em; } #results { visibility: hidden; } .frame { margin:0% 2% 2% 2%; } .plot_frame { margin:2% 2% 2% 2%; } /* Navigation bar */ .navbar-default { /*background-color: #ffffff;*/ /*border-color: #d2d2d2;*/ } .navbar-default .navbar-brand { color: #000000; } .navbar-default .navbar-brand:hover, .navbar-default .navbar-brand:focus { color: #000000; } .navbar-default .navbar-text { color: #000000; } .navbar-default .navbar-nav > li > a { color: #000000; } .navbar-default .navbar-nav > li > a:hover, .navbar-default .navbar-nav > li > a:focus { color: #000000; } .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .active > a:hover, .navbar-default .navbar-nav > .active > a:focus { color: #000000; background-color: #d2d2d2; } .navbar-default .navbar-nav > .open > a, .navbar-default .navbar-nav > .open > a:hover, .navbar-default .navbar-nav > .open > a:focus { color: #000000; background-color: #d2d2d2; } .navbar-default .navbar-toggle { border-color: #d2d2d2; } .navbar-default .navbar-toggle:hover, .navbar-default .navbar-toggle:focus { background-color: #d2d2d2; } .navbar-default .navbar-toggle .icon-bar { background-color: #000000; } .navbar-default .navbar-collapse, .navbar-default .navbar-form { border-color: #000000; } .navbar-default .navbar-link { color: #000000; } .navbar-default .navbar-link:hover { color: #000000; } @media (max-width: 767px) { .navbar-default .navbar-nav .open .dropdown-menu > li > a { color: #000000; } .navbar-default .navbar-nav .open .dropdown-menu > li > a:hover, .navbar-default .navbar-nav .open .dropdown-menu > li > a:focus { color: #000000; } .navbar-default .navbar-nav .open .dropdown-menu > .active > a, .navbar-default .navbar-nav .open .dropdown-menu > .active > a:hover, .navbar-default .navbar-nav .open .dropdown-menu > .active > a:focus { color: #000000; background-color: #d2d2d2; } } .panel { margin:2% 2% 2% 2%; #border-color:#E8E8E8; } .panel > .panel-heading{ #background-color: #E8E8E8; #background-image:none; #border-color:#E8E8E8; #color:black; } a.fancybox img { border: none; -o-transform: scale(1,1); -ms-transform: scale(1,1); -moz-transform: scale(1,1); -webkit-transform: scale(1,1); transform: scale(1,1); -o-transition: all 0.2s ease-in-out; -ms-transition: all 0.2s ease-in-out; -moz-transition: all 0.2s ease-in-out; -webkit-transition: all 0.2s ease-in-out; transition: all 0.2s ease-in-out; } /*a.fancybox:hover img { position: relative; z-index: 999; -o-transform: scale(1.03,1.03); -ms-transform: scale(1.03,1.03); -moz-transform: scale(1.03,1.03); -webkit-transform: scale(1.03,1.03); transform: scale(1.03,1.03); }*/ Assemblytics_web-master/file_upload.php000077500000000000000000000005421304413665500206550ustar00rootroot00000000000000 Assemblytics_web-master/header.html000077500000000000000000000136171304413665500200060ustar00rootroot00000000000000 Assemblytics Assemblytics Assemblytics_web-master/index.php000077500000000000000000000172571304413665500175140ustar00rootroot00000000000000

Instructions

Upload a delta file to analyze alignments of an assembly to another assembly or a reference genome

  1. Download and install MUMmer
  2. Align your assembly to a reference genome using nucmer (from MUMmer package)

    $ nucmer -maxmatch -l 100 -c 500 REFERENCE.fa ASSEMBLY.fa -prefix OUT

    Consult the MUMmer manual if you encounter problems

  3. Optional: Gzip the delta file to speed up upload (usually 2-4X faster)

    $ gzip OUT.delta
    Then use the OUT.delta.gz file for upload.

  4. Upload the .delta or delta.gz file (view example) to Assemblytics

Important: Use only contigs rather than scaffolds from the assembly. This will prevent false positives when the number of Ns in the scaffolded sequence does not match perfectly to the distance in the reference.

The unique sequence length required represents an anchor for determining if a sequence is unique enough to safely call variants from, which is an alternative to the mapping quality filter for read alignment.

Run Assemblytics

Description

Unique sequence length required

Maximum variant size

Minimum variant size

Assemblytics_web-master/input_validation.php000077500000000000000000000111401304413665500217370ustar00rootroot00000000000000
> user_data/$code/run.log');} // $kmer_length = $_POST["kmer_length"]; // $read_length = $_POST["read_length"]; $url="analysis.php?code=$code"; $run_url="run_algorithm.php"; $filename="user_uploads/$code"; $back_button= "
"; //$continue_button= "
"; $continue_button= "
"; // if (!file_exists ($filename)) { echo "
No file uploaded
"; echo "$back_button"; exit; } $consistent=true; // $myfile = fopen($filename, "r") or die("Unable to open file!"); // $line1 = fgets($myfile); $myfile = gzopen($filename, "r") or die("Unable to open file!"); $line1 = gzgets($myfile); $line1 = trim(preg_replace( '/\s+/', ' ', $line1 )); $array=array_map("trim",explode(' ',$line1)); if (count($array)==2) { // echo "GOOD first line"; } else { echo "Bad first line. \n"; $consistent=false; } $line2 = fgets($myfile); $line2 = trim(preg_replace( '/\s+/', ' ', $line2 )); if ($line2 == "NUCMER") { // echo "GOOD second line"; } else { echo "Bad second line\n"; $consistent=false; } // fclose($myfile); gzclose($myfile); if ($consistent) { // if ($previous_bins > 500) { echo "
Great! File was uploaded and looks like a real delta file
"; // } else { // echo "
File was uploaded and has acceptable dimensions: $line_counter samples by $previous_bins bins, but the analysis is unlikely to work optimally without more bins. We recommend at least 500 bins for higher accuracy.
"; // } if (!file_exists("user_data/$code")) { } else { echo "
File already submitted once. Please continue.
"; } echo "
"; echo "$back_button"; echo "
"; echo "$continue_button"; echo "
"; } else { echo "
This doesn't look like a delta file. Are you sure it is the output from nucmer? The first line should list two file names separated by a space, and the second line should be the word 'NUCMER'. The remaining lines in the file specify alignments."; echo "
"; echo "$back_button"; } ?>
Assemblytics_web-master/interactive_dotplot.php000066400000000000000000000023101304413665500224440ustar00rootroot00000000000000

Assemblytics_web-master/js/000077500000000000000000000000001304413665500162715ustar00rootroot00000000000000Assemblytics_web-master/js/analysis_page_script.js000077500000000000000000000237211304413665500230420ustar00rootroot00000000000000var analysis_path="analysis.php?code="; ////////////////////////////////////////////////////////////// /////// For analysis page: ////////////////////////////////////////////////////////////// function showProgress() { var run_id_code=getUrlVars()["code"]; var prog=0; // remember ajax is asynchronous, so only the stuff inside the success: part will be called after retrieving information. If I put something after the statement, it can't use the info from check_progress.php because it is executed before this php script is called //alert('before ajax'); jQuery.ajax({ type:"POST", url: "check_progress.php", dataType: 'json', data: {code: run_id_code}, success: function (obj) { // alert("inside success"); // alert(obj); prog=obj; last_line = prog[prog.length-1]; console.log(last_line) nickname = prog[0].slice(0,prog[0].length-1); // this cuts off the last character, which is a carriage return we don't want to pass on to the visualizer as GET document.getElementById("nickname_header").innerHTML = nickname.replace(/_/g," "); output_array = prog.slice(1,prog.length); output_info = "" for (var i=0;i < output_array.length; i++) { sub_array = output_array[i].split(","); output_info += "

" + sub_array.slice(2,sub_array.length) + "

"; } document.getElementById("plot_info").innerHTML = output_info; if (last_line.indexOf('SUMMARY,DONE') > -1) { document.getElementById("plot_info").innerHTML = "Analysis completed successfully"; document.getElementById("progress_panel").className = "panel panel-success center"; check_plot_exists(0,nickname); } else if (last_line.indexOf("FAIL") > -1) { // SOMETHING FAILED document.getElementById("progress_panel").className = "panel panel-danger center"; } else { setTimeout(function(){showProgress();},500); } } }); } function getUrlVars() { var vars = {}; var parts = window.location.href.replace(/[?&]+([^=&]+)=([^&]*)/gi, function(m,key,value) { vars[key] = value; }); return vars; } var done_making_images = false; function imageresize() { console.log("resizing") var size_fraction = 3; // 1 means fit one plot on the page, 3 means fit 3 plots on the page var top_padding = 200; var side_padding = 0.05; var aspect_ratio = 1; //$(".fluidimage").height()/$(".fluidimage").width(); var height = Math.min(content_width/aspect_ratio*(1-side_padding), $( window ).height()-top_padding)/size_fraction; // console.log($(".fluidimage").height()); // console.log($(".fluidimage").width()); $(".fluidimage").height(height + "px"); // $(".fluidimage").width(height*aspect_ratio + "px"); // Fancybox plot zooming // http://www.dwuser.com/education/content/click-to-zoom-for-photos-adding-lightbox-effect-to-your-images/ var addToAll = true; var gallery = true; var titlePosition = 'inside'; $(addToAll ? 'img' : 'img.fancybox').each(function(){ var $this = $(this); var title = $this.attr('title'); var src = $this.attr('data-big') || $this.attr('src'); var a = $('').attr('href', src).attr('title', title); $this.wrap(a); }); if (gallery) $('a.fancybox').attr('rel', 'fancyboxgallery'); $('a.fancybox').fancybox({ titlePosition: titlePosition }); $.noConflict(); } var content_width = $( window ).width(); function check_plot_exists(counter,nickname) { document.title = "Assemblytics: " + nickname; var run_id_code=getUrlVars()["code"]; var plot_url_prefix="user_data/"+run_id_code + "/" + nickname + ".Assemblytics."; var summary_table_url="user_data/"+run_id_code + "/" + nickname + ".Assemblytics_structural_variants.summary"; var variant_preview_url="user_data/"+run_id_code + "/" + nickname + ".variant_preview.txt"; var assembly_stats_url="user_data/"+run_id_code + "/" + nickname + ".Assemblytics_assembly_stats.txt"; var zip_file_url="user_data/"+run_id_code + "/" + nickname + ".Assemblytics_results.zip"; var file_to_wait_for=plot_url_prefix + "size_distributions.png"; console.log(nickname) if (counter>=100) { alert("Taking too long to find "+ file_to_wait_for) } else { wait_then_resize(); jQuery.ajax({ type:"POST", url: "list_plots.php", dataType: 'json', data: {code: run_id_code}, error: function() { console.log(counter+1); setTimeout(function(){check_plot_exists(counter+1,nickname);},500); }, success: function (obj) { console.log("SUCCESS:") var plot_filenames = obj; console.log(plot_filenames) for (i in plot_filenames) { plot_filename = plot_filenames[i]; console.log(plot_filename); document.getElementById("container_for_all_plots").innerHTML += '
'; } document.getElementById("container_for_all_plots").innerHTML += '

'; // plot_names = ["unfiltered_dotplot","dotplot","Nchart","size_distributions_all_variants_full_view","size_distributions_zoom","size_distributions"] // // document.getElementById("landing_for_plot1").innerHTML=''; // // document.getElementById("landing_for_plot2").innerHTML=''; // // document.getElementById("landing_for_plot3").innerHTML=''; // document.getElementById("landing_for_plot4").innerHTML=''; // document.getElementById("landing_for_plot5").innerHTML=''; // document.getElementById("landing_for_plot6").innerHTML=''; // document.getElementById("landing_for_plot7").innerHTML=''; // document.getElementById("landing_for_plot8").innerHTML=''; document.getElementById("landing_for_summary_statistics").innerHTML=''; document.getElementById("landing_for_variant_file_preview").innerHTML='
'; document.getElementById("landing_for_assembly_stats").innerHTML=''; document.getElementById("download_zip").href = zip_file_url; console.log("done_making_images") // Show all results document.getElementById("results").style.visibility= 'visible'; done_making_images = true; } }); } } function wait_then_resize() { if (done_making_images == true) { imageresize(); } else { setTimeout(wait_then_resize,50); } } function imgError(image) { image.onerror = ""; image.src = "resources/error_image.png"; // document.getElementById("missing_plots").innerHTML="Assemblytics has been updated, and new plots are available if you re-run this dataset"; var parent = image.parentNode; parent.parentNode.removeChild(parent); imageresize(); // console.log("Assemblytics has been updated, and new plots are available if you re-run this dataset"); return true; } $(document).ready(function() { showProgress(); $(window).bind("resize", function(){ //Adjusts image when browser resized imageresize(); }); }); // How to execute code after getting info from multiple files: // //$.when( // $.get(filename_input, function(csvString) { // array_input = $.csv.toArrays(csvString, {onParseValue: $.csv.hooks.castToScalar}); // }), // $.get(filename_output, function(csvString) { // array_ouput = $.csv.toArrays(csvString, {onParseValue: $.csv.hooks.castToScalar} ); // }) //).then(function() { // console.log(array_input) // console.log(typeof array_input) // console.log(array_input.length) // console.log(array_input[0].length) // var diff=[] // for (i=0; i'; document.getElementById("myAwesomeDropzone").innerHTML = ''; } window.onload = showCode(); Assemblytics_web-master/js/interactive_dotplot.js000066400000000000000000000703601304413665500227170ustar00rootroot00000000000000 function getUrlVars() { var vars = {}; var parts = window.location.href.replace(/[?&]+([^=&]+)=([^&]*)/gi, function(m,key,value) { vars[key] = value; }); return vars; } var run_id_code=getUrlVars()["code"]; var directory="user_data/" + run_id_code + "/"; var nickname=getUrlVars()["nickname"]; document.getElementById("nickname_header_dotplot").innerHTML = nickname.replace(/_/g," "); console.log(run_id_code) console.log(nickname) ////////// Positions and sizes for drawing ////////// var w = window, d = document, e = d.documentElement, g = d.getElementsByTagName('body')[0]; var svg_width; var svg_height; var top_edge_padding; var bottom_edge_padding; var left_edge_padding; var right_edge_padding; var dotplot_canvas_width; var dotplot_canvas_height; var chrom_label_y_offset; var contig_label_x_offset; var min_pixels_to_draw = 1; var max_num_alignments = 100000; ////////// Drawing/D3 objects ////////// var svg = null; var dotplot_container = null; var dotplot_canvas = null; var dotplot_ref_axis; var dotplot_query_axis; ////////// Scales ////////// var dotplot_ref_scale = d3.scale.linear(); var dotplot_query_scale = d3.scale.linear(); ////////// Behavior /////////// var zoom = null; ////////// reference x query selection /////////// var refs_selected = null; var queries_selected = null; ////////// Data ////////// var loaded_ref_index = false; var loaded_query_index = false; var loaded_alignments = false; var ref_index = null; var query_index = null; var matching_queries_by_ref = {}; var matching_refs_by_query = {}; var coords_data = null; var ref_chrom_start_positions = {}; // ref_chrom_start_positions["chr1"] = 234793761 // absolute position on the dot plot var query_chrom_start_positions = {}; // query_chrom_start_positions["JSAC01000015.1"] = 8237493 // absolute position on the dot plot var ref_chrom_label_data = []; var query_chrom_label_data = []; var cumulative_ref_size = 0; var cumulative_query_size = 0; console.log("Starting"); load_data(); responsive_sizing(); function responsive_sizing() { // top_banner_height = 120; // without title top_banner_height = 170; // with title window_width = (w.innerWidth || e.clientWidth || g.clientWidth);//*0.98; svg_width = window_width*0.7; svg_height = (w.innerHeight || e.clientHeight || g.clientHeight) - top_banner_height; var right_panel_width = window_width-svg_width; // console.log(svg_width) top_edge_padding = svg_height*0.04; bottom_edge_padding = svg_height*0.15; left_edge_padding = svg_width*0.10; right_edge_padding = svg_width*0.03; //////// Create the SVG //////// svg = d3.select("svg") .attr("width", svg_width) .attr("height", svg_height); d3.select("#panel") .attr("width",right_panel_width) .attr("height",svg_height); svg.append("rect") .attr("width",svg_width) .attr("height",svg_height) .attr("class","background") .style('fill',"none"); dotplot_canvas_width = svg_width - left_edge_padding - right_edge_padding; dotplot_canvas_height = svg_height - top_edge_padding - bottom_edge_padding; // TEMPORARY: // dotplot_canvas_width = dotplot_canvas_width/2; // Make it into a square // dotplot_canvas_width = Math.min(dotplot_canvas_height,dotplot_canvas_width); // dotplot_canvas_height = dotplot_canvas_width; // Calculate positions/padding for labels, etc. chrom_label_y_offset = bottom_edge_padding/10; contig_label_x_offset = -left_edge_padding/10; } var info_stats = ""; function load_data() { console.log("Starting to load data from file"); d3.select("#panel").style("visibility",'visible'); message_to_user("Loading data"); d3.csv(directory + nickname + ".info.csv", function(error,info_input) { if (error) throw error; for (var i = 0; i cumulative_ref_size) { var xdom0 = dotplot_ref_scale.domain()[0] - dotplot_ref_scale.domain()[1] + cumulative_ref_size; dotplot_ref_scale.domain([xdom0, cumulative_ref_size]); } if (dotplot_query_scale.domain()[0] < 0) { dotplot_query_scale.domain([0, dotplot_query_scale.domain()[1] - dotplot_query_scale.domain()[0] + 0]); } if (dotplot_query_scale.domain()[1] > cumulative_query_size) { var ydom0 = dotplot_query_scale.domain()[0] - dotplot_query_scale.domain()[1] + cumulative_query_size; dotplot_query_scale.domain([ydom0, cumulative_query_size]); } redraw_on_zoom(); dotplot_container.select("#ref_axis").call(dotplot_ref_axis); dotplot_container.select("#query_axis").call(dotplot_query_axis); }); dotplot_canvas.call(zoom); draw_chromosome_labels(); } function redraw_on_zoom() { draw_alignments(); draw_chromosome_labels(); } function hover_alignment(d) { d3.select("#hover_message").selectAll("p").remove(); d3.select("#hover_message").append("p").text("Reference = " + d.ref + ": " + d.ref_start + " - " + d.ref_end) d3.select("#hover_message").append("p").text("Query = " + d.query + ": " + d.query_start + " - " + d.query_end) } function draw_alignment(updateSelection) { updateSelection .filter(filter_to_view) .style("stroke-width",1) .style("stroke", function(d) { if (d.tag=="repetitive") { return "red"; } else {return "black";} }) .attr("fill","none") .style("cursor", "crosshair") .on("mouseover", hover_alignment) .each(function (d) { var x1 = dotplot_ref_scale(d.abs_ref_start); var x2 = dotplot_ref_scale(d.abs_ref_end); var y1 = dotplot_query_scale(d.abs_query_start); var y2 = dotplot_query_scale(d.abs_query_end); var tangent = (y2-y1)/(x2-x1); var new_x1 = x1; var new_y1 = y1; var found_solution_1 = true; /////////////////// point 1 /////////////////// if (x1 < 0 || y1 > dotplot_canvas_height || x1 > dotplot_canvas_width || y1 < 0) { found_solution_1 = false if (x1 < 0) { // left wall var new_x = 0; var new_y = y1 - x1 * tangent; if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x1 = new_x; new_y1 = new_y; found_solution_1 = true; } } if (found_solution_1 == false && y1 > dotplot_canvas_height) { // floor var new_x = (dotplot_canvas_height-y1)/tangent + x1; var new_y = dotplot_canvas_height; if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x1 = new_x; new_y1 = new_y; found_solution_1 = true; } } if (found_solution_1 == false && x1 > dotplot_canvas_width) { // right wall var new_x = dotplot_canvas_width; var new_y = y1+tangent*(dotplot_canvas_width-x1); if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x1 = new_x; new_y1 = new_y; found_solution_1 = true; } } if (found_solution_1 == false && y1 < 0) { // ceiling var new_y = 0; var new_x = x1 + (0-y1)/tangent; if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x1 = new_x; new_y1 = new_y; found_solution_1 = true; } } } /////////////////// point 2 /////////////////// var new_x2 = x2; var new_y2 = y2; var found_solution_2 = true; if (x2 < 0 || y2 > dotplot_canvas_height || x2 > dotplot_canvas_width || y2 < 0) { found_solution_2 = false; if (x2 < 0) { // left wall var new_x = 0; var new_y = y1 - x1 * tangent; if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x2 = new_x; new_y2 = new_y; found_solution_2 = true; } } if (found_solution_2 == false && y2 > dotplot_canvas_height) { // floor var new_x = (dotplot_canvas_height-y1)/tangent + x1; var new_y = dotplot_canvas_height; if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x2 = new_x; new_y2 = new_y; found_solution_2 = true; } } if (found_solution_2 == false && x2 > dotplot_canvas_width) { // right wall var new_x = dotplot_canvas_width; var new_y = y1+tangent*(dotplot_canvas_width-x1); if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x2 = new_x; new_y2 = new_y; found_solution_2 = true; } } if (found_solution_2 == false && y2 < 0) { // ceiling var new_y = 0; var new_x = x1 + (0-y1)/tangent; if (new_x >= 0 && new_x <= dotplot_canvas_width && new_y >= 0 && new_y <= dotplot_canvas_height) { new_x2 = new_x; new_y2 = new_y; found_solution_2 = true; } } } // console.log(found_solution_1 + " -- " + found_solution_2); if (!(found_solution_1 && found_solution_2)) { // Don't draw if it new_x2 = new_x1; new_y2 = new_y1; } d3.select(this).attr({ x1:new_x1, y1:new_y1, x2:new_x2, y2:new_y2 }) }) } function filter_to_view(d) { if (refs_selected != null && refs_selected.indexOf(d.ref) == -1) { return false; } if (queries_selected != null && queries_selected.indexOf(d.query) == -1) { return false; } var x1 = dotplot_ref_scale(d.abs_ref_start); var x2 = dotplot_ref_scale(d.abs_ref_end); var y1 = dotplot_query_scale(d.abs_query_start); var y2 = dotplot_query_scale(d.abs_query_end); // if (num_alignments_in_view >= max_num_alignments) { // return false; // } else { if (!((x1 < 0 && x2 < 0) || (x1 > dotplot_canvas_width && x2 > dotplot_canvas_width) || (y1 < 0 && y2 < 0) || (y1 > dotplot_canvas_height && y2 > dotplot_canvas_height))) { num_alignments_in_view += 1; return true; } else { return false; } // } } var current_draw_ID = 0; function draw_lines(svg, data, batchSize) { num_alignments_in_view = 0; var filtered_data = data.filter(filter_to_view); var alignments = svg.selectAll('line.alignment').data(filtered_data); current_draw_ID += 1; var this_draw_ID = current_draw_ID; function drawBatch(batchNumber) { return function() { console.log("drawBatch"); var startIndex = batchNumber * batchSize; var stopIndex = Math.min(filtered_data.length, startIndex + batchSize); var updateSelection = d3.selectAll(alignments[0].slice(startIndex, stopIndex)); var enterSelection = d3.selectAll(alignments.enter()[0].slice(startIndex, stopIndex)); var exitSelection = d3.selectAll(alignments.exit()[0].slice(startIndex, stopIndex)); enterSelection.each(function(d, i) { var newElement = svg.append('line')[0][0]; enterSelection[0][i] = newElement; updateSelection[0][i] = newElement; newElement.__data__ = this.__data__; }).attr("class","alignment"); exitSelection.remove(); draw_alignment(updateSelection); if (stopIndex >= filtered_data.length) { message_to_user("Done"); } else { if (current_draw_ID == this_draw_ID) { setTimeout(drawBatch(batchNumber + 1), 0); } } }; } setTimeout(drawBatch(0), 0); } function draw_alignments() { message_to_user("Drawing alignments"); calculate_positions(); dotplot_canvas.selectAll("line.alignment").remove(); var BATCH_SIZE = 1000; draw_lines(dotplot_canvas.data([0]), coords_data, BATCH_SIZE); } function clear_chromosome_labels() { dotplot_canvas.selectAll("line.chromosome").remove(); dotplot_container.selectAll("text.chromosome").remove(); dotplot_canvas.selectAll("line.contig").remove(); dotplot_container.selectAll("text.contig").remove(); } function draw_chromosome_labels() { clear_chromosome_labels(); ////////////////////////////// Reference labels ////////////////////////////// dotplot_canvas.selectAll("line.chromosome") .data(ref_chrom_label_data) .enter() .append("line") .filter(function(d) {return ((refs_selected == null || refs_selected.indexOf(d.chrom)!=-1) && (dotplot_ref_scale(d.pos) > 0 && dotplot_ref_scale(d.pos) < dotplot_canvas_width))}) .attr("class","chromosome") .style("stroke-width",1) .style("stroke", "gray") .attr("fill","none") .attr("x1",function(d){ return dotplot_ref_scale(d.pos); }) .attr("y1",0) .attr("x2",function(d){ return dotplot_ref_scale(d.pos); }) .attr("y2",dotplot_canvas_height); dotplot_container.selectAll("text.chromosome") .data(ref_chrom_label_data) .enter() .append("text") .filter(function(d) {return ((refs_selected == null || refs_selected.indexOf(d.chrom)!=-1) && (!(dotplot_ref_scale(d.pos + d.length) < 0 || dotplot_ref_scale(d.pos) > dotplot_canvas_width)))}) .attr("class","chromosome") .attr("text-anchor", "end") .attr("dominant-baseline","middle") .attr("y",function(d) { if (dotplot_ref_scale(d.pos) > 0 && dotplot_ref_scale(d.pos + d.length) < dotplot_canvas_width) { return dotplot_ref_scale(d.pos+d.length/2); } else if (dotplot_ref_scale(d.pos + d.length) < dotplot_canvas_width) { // If end of chromosome is showing, put label at average of left wall and end of chromosome return (dotplot_ref_scale(d.pos+d.length) + 0)/2; } else if (dotplot_ref_scale(d.pos) > 0) { // If start of chromosome is showing, put label at average of start and the right wall return (dotplot_ref_scale(d.pos)+dotplot_canvas_width)/2; } else { return dotplot_canvas_width/2; } }) .attr("x",function(d) {return -dotplot_canvas_height-chrom_label_y_offset;}) .text(function(d) {return d.chrom; }) .style("fill","gray") .style("font-size",function(d) { return Math.min(dotplot_ref_scale(d.length) , ((bottom_edge_padding*0.8) / this.getComputedTextLength() * 14)) + "px"; }) .attr("transform", "rotate(-90)") .on("click",zoom_to_chromosome) .on("contextmenu", function (d, i) { d3.event.preventDefault(); reset_selections(); }); ////////////////////////////// Query labels ////////////////////////////// if (query_chrom_label_data.length < 200 || queries_selected != null) { dotplot_canvas.selectAll("line.contig") .data(query_chrom_label_data) .enter() .append("line") .filter(function(d) {return (dotplot_query_scale(d.pos) > 0 && dotplot_query_scale(d.pos) < dotplot_canvas_height)}) .attr("class","contig") .style("stroke-width",1) .style("stroke", "gray") .attr("fill","none") .attr("x1",0) .attr("y1",function(d){ return dotplot_query_scale(d.pos); }) .attr("x2",dotplot_canvas_width) .attr("y2",function(d){ return dotplot_query_scale(d.pos); }); dotplot_container.selectAll("text.contig") .data(query_chrom_label_data) .enter() .append("text") .filter(function(d) {return !(dotplot_query_scale(d.pos) < 0 || dotplot_query_scale(d.pos + d.length) > dotplot_canvas_height)}) .attr("class","contig") .attr("text-anchor", "end") .attr("dominant-baseline","middle") .attr("y",function(d) { if (dotplot_query_scale(d.pos) < dotplot_canvas_height && dotplot_query_scale(d.pos + d.length) > 0) { return dotplot_query_scale(d.pos+d.length/2); } else if (dotplot_query_scale(d.pos + d.length) > 0) { // If end of chromosome is showing, put label at average of floor and end of chromosome return (dotplot_query_scale(d.pos+d.length) + dotplot_canvas_height)/2; } else if (dotplot_query_scale(d.pos) < dotplot_canvas_height) { // If start of chromosome is showing, put label at average of start and the ceiling return (dotplot_query_scale(d.pos)+0)/2; } else { return dotplot_canvas_height/2; } }) .attr("x",function(d) {return contig_label_x_offset;}) .text(function(d) {return d.chrom; }) .style("fill","gray") .style("font-size",function(d) {return Math.min(dotplot_query_scale(d.pos)-dotplot_query_scale(d.pos + d.length), left_edge_padding*0.8 / this.getComputedTextLength() * 14) + "px"; }) .on("click",zoom_to_contig) .on("contextmenu", function (d, i) { d3.event.preventDefault(); reset_selections(); }); } } function reset_selections(){ refs_selected = null; queries_selected = null; clear_chromosome_labels(); use_indices(); draw_dotplot(); draw_alignments(); } function measure_shared_sequence_ref(query,ref) { var shared_sequence = 0; for (var i = 0; i < coords_data.length; i++) { if (coords_data[i].ref == ref && coords_data[i].query == query) { shared_sequence += Math.abs(dotplot_ref_scale(coords_data[i].abs_ref_end) - dotplot_ref_scale(coords_data[i].abs_ref_start)); } } return shared_sequence; } function measure_shared_sequence_query(query,ref) { var shared_sequence = 0; for (var i = 0; i < coords_data.length; i++) { if (coords_data[i].ref == ref && coords_data[i].query == query) { shared_sequence += Math.abs(dotplot_query_scale(coords_data[i].abs_query_end) - dotplot_query_scale(coords_data[i].abs_query_start)); } } return shared_sequence; } var min_shared_seq_in_pixels = 5; function zoom_to_chromosome(d) { console.log("zoom to chromosome"); console.log(d.chrom); // console.log(matching_queries_by_ref[d.chrom]); refs_selected = [d.chrom]; var potential_queries_selected = matching_queries_by_ref[d.chrom]; // console.log(queries_selected); queries_selected = potential_queries_selected; use_indices(); // Narrow down to queries with at least a small shared sequence queries_selected = []; for (var i = 0; i < potential_queries_selected.length; i++) { if (measure_shared_sequence_ref(potential_queries_selected[i],d.chrom) >= min_shared_seq_in_pixels) { queries_selected.push(potential_queries_selected[i]); } } clear_chromosome_labels(); use_indices() draw_dotplot(); draw_alignments(); } function zoom_to_contig(d) { console.log(d.chrom); console.log(matching_refs_by_query[d.chrom]); queries_selected = [d.chrom]; var potential_refs_selected = matching_refs_by_query[d.chrom]; refs_selected = potential_refs_selected; use_indices(); console.log(potential_refs_selected); // Narrow down to queries with at least a small shared sequence refs_selected = []; for (var i = 0; i < potential_refs_selected.length; i++) { // console.log(potential_refs_selected[i]); // console.log(measure_shared_sequence_query(d.chrom,potential_refs_selected[i])); if (measure_shared_sequence_query(d.chrom,potential_refs_selected[i]) >= min_shared_seq_in_pixels) { refs_selected.push(potential_refs_selected[i]); } } console.log(refs_selected); clear_chromosome_labels(); use_indices(); draw_dotplot(); draw_alignments(); } function message_to_user(message) { d3.select("#user_message") .text(message) } window.onresize = resizeWindow; function resizeWindow() { clear_chromosome_labels(); responsive_sizing(); draw_dotplot(); draw_alignments(); } Assemblytics_web-master/js/render_queue.js000066400000000000000000000030061304413665500213110ustar00rootroot00000000000000var renderQueue = (function(func) { var _queue = [], // data to be rendered _rate = 1000, // number of calls per frame _invalidate = function() {}, // invalidate last render queue _clear = function() {}; // clearing function var rq = function(data) { if (data) rq.data(data); _invalidate(); _clear(); rq.render(); }; rq.render = function() { var valid = true; _invalidate = rq.invalidate = function() { valid = false; }; function doFrame() { if (!valid) return true; var chunk = _queue.splice(0,_rate); chunk.map(func); timer_frame(doFrame); } doFrame(); }; rq.data = function(data) { _invalidate(); _queue = data.slice(0); // creates a copy of the data return rq; }; rq.add = function(data) { _queue = _queue.concat(data); }; rq.rate = function(value) { if (!arguments.length) return _rate; _rate = value; return rq; }; rq.remaining = function() { return _queue.length; }; // clear the canvas rq.clear = function(func) { if (!arguments.length) { _clear(); return rq; } _clear = func; return rq; }; rq.invalidate = _invalidate; var timer_frame = window.requestAnimationFrame || window.webkitRequestAnimationFrame || window.mozRequestAnimationFrame || window.oRequestAnimationFrame || window.msRequestAnimationFrame || function(callback) { setTimeout(callback, 17); }; return rq; });Assemblytics_web-master/list_plots.php000066400000000000000000000003611304413665500205620ustar00rootroot00000000000000 Assemblytics_web-master/resources/000077500000000000000000000000001304413665500176675ustar00rootroot00000000000000Assemblytics_web-master/resources/error_image.png000066400000000000000000001016111304413665500226700ustar00rootroot00000000000000PNG  IHDRM iCCPICC Profile8U]hU>sg#$Sl4t? % V46nI6"dΘ83OEP|1Ŀ (>/ % (>P苦;3ie|{g蹪X-2s=+WQ+]L6O w[C{_F qb Uvz?Zb1@/zcs>~if,ӈUSjF 1_Mjbuݠpamhmçϙ>a\+5%QKFkm}ۖ?ޚD\!~6,-7SثŜvķ5Z;[rmS5{yDyH}r9|-ăFAJjI.[/]mK 7KRDrYQO-Q||6 (0 MXd(@h2_f<:”_δ*d>e\c?~,7?& ك^2Iq2"y@g|UP`oYiTXtXML:com.adobe.xmp 1 L'Y@IDATx m|7f(C ATī͠**i(ZۗUhVcZ5T)-چ 1Cb!A"^sysܛ|k=sNn~g$ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @vYCM6^ Tds9\V*Qy@l]xWej} lE[Sql-ܰvn|ߝKZ{z\gW⒒ɵ[%:˱$ߴtM*3jMܡY|s܎ءo/j{+]z[;{oG"V$΋W*~X!]_.XX ?ːjiOv<|Nf?k|vB"ju^3bܿe//c\'D]"@K@ӤUoF _Rvt6afe}YMyߴ<:EҢ-.'oK1$ӼU{ؖU񲊏W|"??6WEK*򏈝QqR*S1WV<"+"g^q3*NH*V\lWL>\kIWFﯲ#vM|W&< 4uR~"l*Xv‡T+c5Χ_XKɟW[[*׬r9"?'ٗ/VwS*_1-m>Eo|[YU95OڏxEoŤR9ת+^SqZ)͊Eғj, &|D^"&?x~*fk揭xm'**V[l<˵}?OEic/ךT*#Fw+>Y_u4N+Uuic6ަ6nn 'LwBݲyj9 5}c暴K\MUΊV{x~7+]*zNm:ͽCMOs!ypסm΋wTȽ*گ+;wڭӊUϳ+NNJ[T<}jU|gVWĕ+MrkϽ>χTLJ\/V<WP+j"nyNKL:F"P5k{q纟2;@n gU"uKLݵN޿ΩHI*o/ -\:c֗c+&=P8,[~P幹}qJ۫}:fZ?Zow< v25N yʤy@MJy 1Jl/ ̏URP+9^9n|]df-z:{w+X*Qe<=b*M\ zc*.3x=Χehs.4Ӥ{Ck)ۧe6lSf}<,?gN}"< oc?UqIi]UfNC]ێGUS3۽)y5Uw>LkwD_1䳍yε6i?Ⱦ2cm]V杮gڧE~YEy_>"羜{ݢn=]+o~έEwToowm}[/H^,(C _`PlX>nRgUاb߫ZENME*KtE.{WUdx>lW}"0 Բ*)y/7"<>}\Uq׊ZϺo^ѧ5~O ζú>{Hw*ՊVQWH+ZjYonY6/sxLER5+ZzQeRÊ?8w_Q󤶝Y+-*MxCE֛Hq ];VTZşW|"Vi)C֙$Wv m_]qT^؊++ϊW.֏~5Ӗ;c"V'Ws'nW1OR\ZzdUm)j*x~EʲlT)7+Re_qߊW#˵uP>-zT<vO*ZZeh۝+Vh&W1On5vEfV\݊7Uua1eTmWv,e_ui~eO^I#&3fWMoYU]'?:JS܊a\T)'m\ uW̘?}X&,\gycR3{T$y?30׼G߿A90r?"}_cOzfZްR=7w oS;~YǴfJTOk&'w{Vˋ>r9W*'[YġOGL[b:!ƓsIm;+4Z8~KEXjjvϮSLHyIL}v]F:BzTd~\?]\M"y!ܭ*O|OfZ5 .;@8>g\yԴQS{[A7{cR ~+YΑWm۲7\ΏmsT1m΅f7 ۜ}w}^~̱m8NS\gTͮ{'MGK\3M߹s4;~WEfRa:Q]*֚NY&[2~V*ӖOϩ强C_1c S̺,>6ZEf7To"c*V1N^/󹎲+i[ egNYCj{fzϕ7]l2}I&efER^r厬>i)/ikM}Őò )gG)~UY^],6zV̊/}uʹ}Y̹r9FR}Irm]Zr?(_\jfIU+ъUL[vϪkܧӇu溹zŤow |y nWmyx_eZ{Zh~8Qj[NClvGu] ׫ie&%7>kyֺi} el9+ҭ0mMj01Uu3]u6˜OѦ\dvS4C/Rӂܗ?-Z~&<"sJfFi}JW+UE3S?lJ?GvVf>[ѧ*LWY+vXkjj|=)ݰ Sk2D]~S;-SF_e{,;+-soXM/T?xXy>feyvJv///S2+ӟ[is黥y{o4%C <>A{Oؓ Ov!<4^tqXGyYW^_e;u-Wer GӿĬQ`L?Y]|+gs\nY.#+p[ԶS:쿵m4酊sڷvOzu뛔}]2Is[.5 eY]8o-U ˝0Zud6 ҕCinULk4|y֚{e{[0]}awh5|wܸbȾ}r2FM6gmw?;icc8пÍX'{2]Cyw' 0SyRBm`kRlGD_i2UdQi4{BE[oeI +qۦMYz\/S^Sjv{]krkY;ۏ~*f[\{[ mvյOUzC[MKy}q*raf8Ue﬊qZtz}n~] 錚Ma~_܅3??-}ۖn~RUO3C;-TZu8NK&Z>Y+XҊgWdߎϊ1RG|BK%WV%letIi[&+˹r'--s eWNt4/W ˴ iǢۨgtܟ9^TˮvٌmZV=c{}y*r <.WZ|Zcڶ[cc=6nk9ٮL}繘{;UTk?ϕ|poA9uH哎92YtPvlZg8ʴtNU~6m_yXEy>f嗽7?k1ǸZ}?]yַ_Tܫ-w|[ZhۨUgY_ygXCeZy6m[vl[? ߐס;]A pz*䡖wTMBxTEye].\Oٚ[&4К熳H7*J5 UA{V$"/OY/*̾q*vh*^P"zf-Hګۊ4 wٍާ/Ms?OڲYQS9.tM>)jiq_wv0Œ/sv6{rl魓)Y6e ry%?^v>?-|Zc6qC,v>.rXsݸPitF {f3?U\"%T/AyF]ί,H3o4M+kNyJTe{j45yFL*]0΍khE[_}=؊*U񤊤]+~cky0{d=.l e׷.zݯa5Y@NXi ٭n.6/zxz\gTG]+8"Td`tC*W"˵CUUP eUt*$/*WV}1η[*hԯGjY tZ%KAC>AnQx1K{*;Ԛ"ϧ+vاW$MZF_Կ\ϓelVM2 ]-}sJyߔn.rDIٓ tu9ھ\٩eFqeYۯ2)*uwTD +t}Uok+5m7jf03\ޱ̱i}Zv飙,H׮T~2MrdoyIʹ{Z2 ՞^r}ﲆmy"c^iiF}MZz_Vgg> :"?γ'vۈ{o;^}4@,хEC74i)?Y΋nxzŤ*ͦp~]؊Wj%d=R=a^H`9O*OQ;w\6${ל/~?wl}u~a{--s e׬c_V䘶r$]ĿL,paw^ۋof;M[Ry6 9fT/z_W6~אjC+& S׮J2icں۹viejei-oDf3ÆXӃkڞS!ad=hNZG;sd}"O?Yqu- Fy^fkġGԴ &T~ҍ|hX>5K8Y^|[܏.fi<0u{C6xI%У*&eγquV 9fVp˚<ǓVUƅko}_4\ )1[m3>V JsiiZvcuJӬ~7n]ۮc+AKiI[Ο\!{GߩxPdZ7,UޮwMig[=I92o{a->6e[AYpZuKE|0}_y޼b:Sv˺}}o={A!pʟ 1&>ޑܠrCr_ص"芔qqگ 2Kt..Z]k}ZKE3kEX3;/&I鯪0gwר|Ʃˠkߊ>mԝyA֦YdҧGLI<޼O\ӫV$]Ѷ鵕=]@U5v]*6uwdoP--]ڧӫ޳*ms*)m^U1N\wںc>^3߬HwW\~i 'L1k/L,۰\֑rC+^6"NۆܗZ_/|RO9x>=fZ/|ǥMVzg֔k4C2,V5u}][.6Hii5ߧeϧeYٷk{Veu6!l)ݧt"}Xϓrr mҮjvg~O:"}YYu}ӻ~+ d\ѧipD_Q'tu9;^2N)Kߓ/GuǍ/{oX)<׳5/jE[O{O:]ZOlYwzmmcL \6]mn6ŗ|W|gҢeT|+i|zimEʟX^2ͷt㴹 6mǃήl~4ue^+ry8G?OZ3m/U['rw@yΕS+x\ŧ+|]>iiյ^ FEۗV>8e_VuOTS]5{z^hwTE?nZ)LoۖuU*Ӗ{ߐ}oE\8m}z\˹Rvf;ާTYErY66oȵڼ?Rm{W$WyicuVk۾Ls_k]sͼ"gVi{2ٞIi}\YϨI\=_[o]x|EN|b.W6,NGv d}ohuϳ}Z#grk?Ss]y=**hˎ/˼/soXwlѯ#eq_|λ*ۼ}_xvzֻrd6w>MmDb/sm9q?WSZ=a;_-t煬Սya˅6N.~2σi̿⠊Ii^tZ+b} qR\iʡFCˀn|7$dv x&9/"/Yw*i*&շo <U19Wmgαi.U3~ݹ9dC+^FS{QZQ·Wi>$u|ۭ;i9~O`y}n&7r,gX++8*:~u@;]*&TW~*˹\,^ϺNE9fmWKm4{V-sߊ3_ҳ9U}&eEs6@Yt6}zFqzcqE7&V-{JR?b]{BZڽxڳ'[Nlʲsپ֮MS\j[B9>t{ܤue +ݶLhhEcL3Lg-S:@_f}ڧx]B~~Y'Yb[ ݶbVFtj+>\1--\o_qoU|t1!ˀ}ȃtO˯S+TW|습ݩb<kI\BE+PԯTl+RI(" ?5RC~gU=r?:3`>"8-s.[4lzW';Q=⼊rߦ"x~b=S9fkنlsô&e6l}Z_~>29S0ϕ+N-,o5DrNvMvI} "3Ӷ7l}}&Όm6aַ p)8;RM`@ڸڤ > ^  Mv&k;#Ϸڱw @ pqavkkTy @K%ڵߴt|e͘ @ @` [TlG>Wql*$.,Cy2Gm"{gnY  @ @ @ @ @ @ @ @ @ @ @UjK=IDAT @ @ @%ӥkw-=ְTTd m5!0M`ϪrY_hՇerp7ej7qE-IWص߭hi\Gj(\3.Q{fg @حΠ{-qnts*_!Wॵ@ε]pFk3m[J?mGPʮ,ٟ+KkrOd"@G K ۾}*Y[WH.O;i#_\ɰi;ֲmmܑܑm%@+W;;kß~q{eT$ݲMvqIOHi?O ?m b+Vs~xk*NȯoV̓mi[ﯲ#i~ЧjO+V+'WcEIfU}I vYhwڿxOWEE~itjm}řQ}+֚r b#ift%?sc*>[/Vd۟R1?xZ&+Z:2 juekͮQ[ZdNU۶t>xS *˵^j~@8l<{k8uF,YU}W͍3jʤ`niOɢF}Z=?4*ݜT]Ii{M^sBmC*t߹?Pimc[]+ngъnJU~  @`۔"eS-mT芼D߻9?H+I݇+3k+lPii-3j+/+[*֚O5l6eC6ӵ u߫*W]n}gfZݡ}(}[m;rl_q?tE2tء.?v`c*m)@Q}Z%H:"Ǵ-a>etʴe|ӕ_7,e+֒{|xtbeRV0L74'dQq*ȶͲZo<*8"||Ń*ؿ"_HiS,zI?Sq9\\xCEi;)-Ug7iR0OW^uv}YU @k_:SşU+{En{?oA+f=mP^ R{<႒fYE9M+}i>X%gշzє TҵyN Е+󾊴Ku+ZT 4iӕ8"}"ۧ+;+vWV]FuRŻ+|9o[ZL7>{ͤ}a#/lkof3N'VA:@_hI󳬖9?'/9q5qbX]AwۧW>wooHPf^.]m`}+2 @|rKb{!Kc̑/`J*O ffRRi=/RjO({Ke۾^.5fEۻ [mSM]]8mʲUlS7|Ef?XH|"y1-ݮ*&xWy8bRڳ :ȷIò)Hۿ Te32n5|?yb~@̽f{zür?ii==(NJ: C|+H z2F" 20ɷiEܻ"ߴ曕|s r 'LWĭl3P7}\~ʷoh}7š33@ηR^SEsvjEu~jN|*n=%TklU*rάg(?·E+C[^OuPڞF}~DVq|Г8\?UU1Nd{M[6}ns.>sZ}h==럔6=aI(#@E^\v @`/Y_U?WW jLW_lf4ofv4g*WqxUdgER% \i;+R9єLm띆'i~ ΙuA̤sfk⿭ȱjZfΊ;8ݞF}~~\7UMEC[ fzLşUh&kڲ6?Ζo[j gͪTGA`a#m#Z-ae:)86h* ֐O\tMܵZUͯQ{TX 6Nt9'sz\u{?&5ۚͷΙujf,T\wԚ"+>\ڇ?J˨ߍ߫"]+8zT",ri˦mye}}ZZm_d @b/`~?D6uh/*? S7I3momڭ;fy͊ :2@?")~'ls#pZ{`rZXf[2H7{)I9gZmfO38 !/=V(-_6D~+[qVJERLlOϭğyۗm @`1H ? mb=|;vtWiWIyR*Ay~a=i>4)ߞj4Ki&qNYy;?Uz 5I[QYyExM' S}M&9>+m/,/1턆96xPןd{M[6}`/ّ}ݶ(0@_RX67?Sɀ5/ky҇t Yew{ ēxBŵ3SO.7߰g(y̅j.|3xiϛtSWos\l3yfW2)*+߆޻ALc;tuo*tx|"˾YSwDEb{>V1qvղG4NCY?5yڼ2s˼o?-h =PÁA)~E5W֟.bKE-\O+5Ӗ;]y{U$<=-{j峾Ӈ 0ågYѶwl,eYׇyΊh{F_r yeפ}mzmʆJ @kح ֚>Y rϘP^3im{IisjżO8Ⱞ.Z^No@2kI7FmWWYhתχh4eϫLŴAz?̲Y{ʞYѧ5eo__hݦ9.{V Ǥsml7MZX@U @{WvU|"-rx,ms="ًKʷשxIŃ׸Q{T[ qvMOhw*VKT}+2ZEt[rLn[u烔V5_ؿ"jdW*Iy6Uά|E2ظcENȀ~lv*1~b{6jU-s~Lf9gaExV1X--sٞjW= @J ze`S"@ @kY @؆WoS*2@{D @lC/oن* @ @`@Mϩ$ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ ۡA P0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0` 0`9bzoGIENDB`Assemblytics_web-master/run_algorithm.php000066400000000000000000000034721304413665500212460ustar00rootroot00000000000000 > user_data/ERRORS/run_algorithm.log');} $code=$_POST["code"]; if( !isset($_POST['nickname']) ) { echo shell_exec('echo ERROR: No nickname passed to run_algorithm.php >> user_data/$code/run_algorithm.log');} if( !isset($_POST['uniqlength']) ) { echo shell_exec('echo ERROR: No uniqlength passed to run_algorithm.php >> user_data/$code/run_algorithm.log');} if( !isset($_POST['min_size']) ) { echo shell_exec('echo ERROR: No min_size passed to run_algorithm.php >> user_data/$code/run_algorithm.log');} if( !isset($_POST['max_size']) ) { echo shell_exec('echo ERROR: No max_size passed to run_algorithm.php >> user_data/$code/run_algorithm.log');} $nickname = $_POST["nickname"]; $uniqlength = $_POST["uniqlength"]; $min_size = $_POST["min_size"]; $max_size = $_POST["max_size"]; $url="analysis.php?code=$code"; $filename="user_uploads/$code"; $oldmask = umask(0); mkdir("user_data/$code"); umask($oldmask); echo shell_exec("./bin/web_pipeline $filename user_data/$code/$nickname $uniqlength $min_size $max_size &> user_data/$code/run_algorithm_errors.log &"); $new_dataset = array( "date"=>time(), "codename"=>$code, "description"=> $nickname ); $my_datasets = array(); if(isset($_COOKIE["results"])) { // echo "cookie is already there, adding to it."; $my_datasets = json_decode($_COOKIE["results"], true); } else { // echo "cookie not set, creating new one"; } array_push($my_datasets, $new_dataset); setcookie("results", json_encode($my_datasets)); header('Location: '.$url); ?> Assemblytics_web-master/title.html000066400000000000000000000003321304413665500176620ustar00rootroot00000000000000

Assemblytics

Analyze your assembly by comparing it to a reference genome

Assemblytics_web-master/user_data/000077500000000000000000000000001304413665500176245ustar00rootroot00000000000000Assemblytics_web-master/user_data/.htaccess000066400000000000000000000000251304413665500214170ustar00rootroot00000000000000php_flag engine off Assemblytics_web-master/user_uploads/000077500000000000000000000000001304413665500203625ustar00rootroot00000000000000Assemblytics_web-master/user_uploads/.htaccess000066400000000000000000000000251304413665500221550ustar00rootroot00000000000000php_flag engine off