queue : threadSpecificWaitQueues) {
OutputItem item = queue.peek();
while (item != null && item.rank == nextRank) {
if (allEmpty) {
allEmpty = false;
if (queue.size() > queueHighWaterMark)
queueHighWaterMark = queue.size();
}
try {
outputQueue.put(item);
} catch (InterruptedException e) {
Basic.caught(e);
}
nextRank++;
item = queue.poll(); // don't use take(), don't want to block here...
}
}
if (allEmpty) {
if (isClosing) {
outputQueue.put(SENTINEL);
return;
} else
try {
Thread.sleep(1);
} catch (InterruptedException e) {
Basic.caught(e);
}
}
}
} catch (InterruptedException ex) {
Basic.caught(ex);
}
});
thread1.start();
// this thread writes output to file
final Thread thread2 = new Thread(() -> {
try {
while (true) {
OutputItem item = outputQueue.take();
if (item == SENTINEL) {
hasFinishedOutput.countDown();
return;
}
byte[][] strings = item.strings;
if (strings != null) {
for (byte[] string : strings) {
byte b = 0;
for (byte aString : string) {
b = aString;
if (b == 0)
break; // zero-terminated byte string
writer.write((char) b);
}
if (b != '\t') // if this ends on a tab, don't add new line, it is the query-name for BlastTab or SAM
writer.write('\n');
}
}
}
} catch (Exception ex) {
Basic.caught(ex);
}
});
thread2.start();
}
/**
* close
*
* @throws java.io.IOException
*/
public void close() throws IOException {
isClosing = true;
try {
hasFinishedOutput.await();
} catch (InterruptedException e) {
Basic.caught(e);
}
if (fileFooter.length() > 0)
writer.write(fileFooter.toString());
writer.flush();
if (isFile)
writer.close();
/*
if (queueHighWaterMark > 1) {
System.err.println("(outputQueueHighWaterMark: " + queueHighWaterMark+")");
}
*/
}
/**
* Write byte strings to the out stream by rank.
* By rank means that output is generated only when all output of lower output
* has already been written
* Does not make a copy of the byte arrays, so shouldn't recycle because unclear when this will be written
* Then must not be overwritten
*
* @param rank each call must have a different rank and no rank can be skipped
* @param strings can be null
*/
public void writeByRank(int threadId, long rank, byte[][] strings) {
try {
threadSpecificWaitQueues[threadId].put(new OutputItem(rank, strings));
} catch (InterruptedException e) {
Basic.caught(e);
}
}
/**
* write a header and body by rank. By rank means that output is generated only when all output of lower output
* has already been written
* Does not make a copy of the byte arrays, so shouldn't recycle because unclear when this will be written
*
* @param rank
* @param header
* @param body
*/
public void writeByRank(int threadId, long rank, byte[] header, byte[] body) {
try {
threadSpecificWaitQueues[threadId].put(new OutputItem(rank, new byte[][]{header, body}));
} catch (InterruptedException e) {
Basic.caught(e);
}
}
/**
* skip a rank
*
* @param rank
*/
public void skipByRank(int threadId, int rank) {
try {
threadSpecificWaitQueues[threadId].put(new OutputItem(rank, null));
} catch (InterruptedException e) {
Basic.caught(e);
}
}
/**
* write this at the top of the file
*
* @param string
* @throws java.io.IOException
*/
public void writeFirst(String string) throws IOException {
writer.write(string);
}
/**
* write this at the end of the file
*
* @param string
* @throws java.io.IOException
*/
public void writeLast(String string) {
fileFooter.append(string);
}
}
/**
* output item consists of rank and bytes to write
*/
class OutputItem {
final long rank;
final byte[][] strings;
OutputItem(long rank, byte[][] strings) {
this.rank = rank;
this.strings = strings;
}
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("rank=").append(this.rank);
if (strings != null) {
for (byte[] string : strings) buf.append(Basic.toString(string));
}
return buf.toString();
}
}
malt-0.5.2/src/malt/io/RMA6Writer.java 0000664 0000000 0000000 00000027263 14004551276 0017371 0 ustar 00root root 0000000 0000000 /*
* RMA6Writer.java Copyright (C) 2020. Daniel H. Huson
*
* (Some files contain contributions from other authors, who are then mentioned separately.)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
package malt.io;
import jloda.util.Basic;
import jloda.util.CanceledException;
import jloda.util.ProgressPercentage;
import malt.MaltOptions;
import malt.Version;
import malt.data.ReadMatch;
import malt.mapping.Mapping;
import malt.mapping.MappingManager;
import megan.classification.Classification;
import megan.core.ContaminantManager;
import megan.core.Document;
import megan.core.SyncArchiveAndDataTable;
import megan.data.IReadBlock;
import megan.data.IReadBlockIterator;
import megan.io.InputOutputReaderWriter;
import megan.rma6.MatchLineRMA6;
import megan.rma6.RMA6Connector;
import megan.rma6.RMA6FileCreator;
import java.io.IOException;
import java.util.Arrays;
/**
* Create an RMA6 file from SAM data in Malt
*
* Daniel Huson, 6.2015
*/
public class RMA6Writer {
private final RMA6FileCreator rma6FileCreator;
private final String rma6File;
private final boolean parseHeaders;
private final String[] cNames;
private final int maxMatchesPerQuery;
private final MaltOptions maltOptions;
private final MatchLineRMA6[] matches;
private final int[][] match2classification2id;
private byte[] queryText = new byte[10000];
private byte[] matchesText = new byte[10000];
/**
* constructor
*
* @param maltOptions
* @param rma6File
* @throws IOException
*/
public RMA6Writer(final MaltOptions maltOptions, String rma6File) throws IOException {
System.err.println("Starting file: " + rma6File);
this.maltOptions = maltOptions;
this.rma6File = rma6File;
this.parseHeaders = maltOptions.isParseHeaders();
maxMatchesPerQuery = maltOptions.getMaxAlignmentsPerQuery();
cNames = MappingManager.getCNames();
int taxonMapperIndex = Basic.getIndex(Classification.Taxonomy, Arrays.asList(cNames));
matches = new MatchLineRMA6[maxMatchesPerQuery];
for (int i = 0; i < matches.length; i++) {
matches[i] = new MatchLineRMA6(cNames.length, taxonMapperIndex);
}
match2classification2id = new int[maxMatchesPerQuery][cNames.length];
rma6FileCreator = new RMA6FileCreator(rma6File, true);
rma6FileCreator.writeHeader(Version.SHORT_DESCRIPTION, maltOptions.getMode(), cNames, false);
rma6FileCreator.startAddingQueries();
}
/**
* process the matches associated with a given query.
* This is used in malt1
*
* @param queryHeader
* @param matchesArray
* @param numberOfMatches
* @throws IOException
*/
public synchronized void processMatches(String queryHeader, String querySequence, ReadMatch[] matchesArray, int numberOfMatches) throws IOException {
// setup query text:
byte[] queryName = Basic.swallowLeadingGreaterSign(Basic.getFirstWord(queryHeader)).getBytes();
byte[] queryHeaderText = queryHeader.getBytes();
byte[] querySequenceText = querySequence.getBytes();
if (queryHeaderText.length + querySequenceText.length + 100 > queryText.length) {
queryText = new byte[100 + queryHeaderText.length + querySequenceText.length];
}
System.arraycopy(queryHeaderText, 0, queryText, 0, queryHeaderText.length);
int queryTextLength = queryHeaderText.length;
queryText[queryTextLength++] = '\n';
System.arraycopy(querySequenceText, 0, queryText, queryTextLength, querySequenceText.length);
queryTextLength += querySequenceText.length;
queryText[queryTextLength++] = '\n';
final String[] key = new String[cNames.length];
for (int i = 0; i < cNames.length; i++) {
key[i] = getKey(cNames[i]);
}
// setup matches text:
int matchesTextLength = 0;
numberOfMatches = Math.min(maxMatchesPerQuery, numberOfMatches);
for (int m = 0; m < numberOfMatches; m++) {
final ReadMatch match = matchesArray[m];
final byte[] matchText = match.getRMA6Text();
final int approximateLengthToAdd = matchesTextLength + matchText.length + queryName.length;
if (approximateLengthToAdd + 100 > matchesText.length) {
byte[] tmp = new byte[approximateLengthToAdd + 10000];
System.arraycopy(matchesText, 0, tmp, 0, matchesTextLength);
matchesText = tmp;
}
System.arraycopy(queryName, 0, matchesText, matchesTextLength, queryName.length);
matchesTextLength += queryName.length;
matchesText[matchesTextLength++] = '\t';
System.arraycopy(matchText, 0, matchesText, matchesTextLength, matchText.length);
matchesTextLength += matchText.length;
matchesText[matchesTextLength++] = '\n';
matches[m].setBitScore(match.getBitScore());
matches[m].setExpected(match.getExpected());
matches[m].setPercentIdentity(match.getPercentIdentity());
final String refHeader = (parseHeaders ? getWordAsString(match.getRMA6Text(), 2) : null);
for (int i = 0; i < cNames.length; i++) {
int id = 0;
if (parseHeaders)
id = parseIdInHeader(key[i], refHeader);
if (id == 0) {
Mapping mapping = MappingManager.getMapping(i);
if (mapping != null)
id = MappingManager.getMapping(i).get(match.getReferenceId());
}
match2classification2id[m][i] = id;
matches[m].setFId(i, id);
}
}
rma6FileCreator.addQuery(queryText, queryTextLength, numberOfMatches, matchesText, matchesTextLength, match2classification2id, 0);
}
private int parseIdInHeader(String key, String word) {
int pos = word.indexOf(key);
if (pos != -1) {
if (Basic.isInteger(word.substring(pos + key.length())))
return Basic.parseInt(word.substring(pos + key.length()));
}
return 0;
}
/**
* finish generation of RMA6 file
*
* @throws IOException
* @throws CanceledException
*/
public void close(String contaminantsFile) throws IOException {
try {
System.err.println("Finishing file: " + rma6File);
rma6FileCreator.endAddingQueries();
rma6FileCreator.writeClassifications(new String[0], null, null);
rma6FileCreator.close();
final boolean pairedReads = maltOptions.isPairedReads();
if (pairedReads) { // update paired reads info and then run dataprocessor
long count = 0;
try (InputOutputReaderWriter raf = new InputOutputReaderWriter(rma6File, "rw");
IReadBlockIterator it = (new RMA6Connector(rma6File)).getAllReadsIterator(0, 1000, false, false)) {
final ProgressPercentage progress = new ProgressPercentage("Linking paired reads");
progress.setProgress(0);
progress.setProgress(it.getMaximumProgress());
while (it.hasNext()) {
final IReadBlock readBlock = it.next();
if (readBlock.getMateUId() > 0) {
if (readBlock.getMateUId() > readBlock.getUId())
throw new IOException("Mate uid=" + readBlock.getMateUId() + ": too big");
raf.seek(readBlock.getMateUId());
raf.writeLong(readBlock.getUId());
count++;
}
progress.setProgress(it.getProgress());
}
progress.close();
System.err.printf("Number of pairs:%,14d%n", count);
}
}
// we need to run data processor
final Document doc = new Document();
doc.setTopPercent(maltOptions.getTopPercentLCA());
doc.setLcaAlgorithm(maltOptions.isUseWeightedLCA() ? Document.LCAAlgorithm.weighted : Document.LCAAlgorithm.naive);
doc.setLcaCoveragePercent(maltOptions.getLcaCoveragePercent());
doc.setMinSupportPercent(maltOptions.getMinSupportPercentLCA());
doc.setMinSupport(maltOptions.getMinSupportLCA());
doc.setMaxExpected((float) maltOptions.getMaxExpected());
doc.setMinScore((float) maltOptions.getMinBitScore());
doc.setPairedReads(pairedReads);
doc.setMaxExpected((float) maltOptions.getMaxExpected());
doc.setMinPercentIdentity(maltOptions.getMinPercentIdentityLCA());
doc.setUseIdentityFilter(maltOptions.isUsePercentIdentityFilterLCA());
doc.getActiveViewers().addAll(Arrays.asList(MappingManager.getCNames()));
doc.setReadAssignmentMode(Document.ReadAssignmentMode.readCount); // todo: make this an option
if (Basic.fileExistsAndIsNonEmpty(contaminantsFile)) {
ContaminantManager contaminantManager = new ContaminantManager();
contaminantManager.read(contaminantsFile);
doc.getDataTable().setContaminants(contaminantManager.getTaxonIdsString());
}
doc.getMeganFile().setFileFromExistingFile(rma6File, false);
doc.loadMeganFile();
doc.processReadHits();
// update and then save auxiliary data:
final String sampleName = Basic.replaceFileSuffix(Basic.getFileNameWithoutPath(rma6File), "");
SyncArchiveAndDataTable.syncRecomputedArchive2Summary(doc.getReadAssignmentMode(), sampleName, "LCA", doc.getBlastMode(), doc.getParameterString(), new RMA6Connector(rma6File), doc.getDataTable(), 0);
doc.saveAuxiliaryData();
} catch (CanceledException ex) {
throw new IOException(ex); // this can't happen because ProgressPercent never throws CanceledException
}
}
/**
* get key
*
* @param fName
* @return key
*/
private static String getKey(String fName) {
switch (fName.toLowerCase()) {
case "interpro2go":
return "ipr|";
case "eggnog":
return "cog|";
default:
return fName.toLowerCase() + "|";
}
}
/**
* get a word as string
*
* @param text
* @param whichWord
* @return string or null
*/
private static String getWordAsString(byte[] text, int whichWord) {
int start = -1;
whichWord--;
for (int i = 0; i < text.length; i++) {
if (Character.isWhitespace(text[i])) {
if (whichWord > 0) {
whichWord--;
if (whichWord == 0)
start = i;
} else if (whichWord == 0) {
return new String(text, start, i - start);
}
}
}
if (start >= 0)
return new String(text, start, text.length - start);
return null;
}
}
malt-0.5.2/src/malt/io/SAMHelper.java 0000664 0000000 0000000 00000041054 14004551276 0017241 0 ustar 00root root 0000000 0000000 /*
* SAMHelper.java Copyright (C) 2020. Daniel H. Huson
*
* (Some files contain contributions from other authors, who are then mentioned separately.)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
package malt.io;
import jloda.util.BlastMode;
import malt.data.DNA5;
/**
* helps to create a SAM line from an alignment
* Daniel Huson, 8.2014
*/
public class SAMHelper {
private static final String FILE_HEADER_BLASTN_TEMPLATE = "@HD\tVN:1.5\tSO:unsorted\tGO:query\n@PG\tID:1\tPN:MALT\tCL:%s\tDS:BlastN\n@RG\tID:1\tPL:unknown\tSM:unknown\n@CO\tBlastN-like alignments\n" +
"@CO\tReporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length\n";
private static final String FILE_HEADER_BLASTP_TEMPLATE = "@HD\tVN:1.5\tSO:unsorted\tGO:query\n@PG\tID:1\tPN:MALT\tCL:%s\tDS:BlastP\n@RG\tID:1\tPL:unknown\tSM:unknown\n@CO\tBlastP-like alignments\n" +
"@CO\tReporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length\n";
private static final String FILE_HEADER_BLASTX_TEMPLATE = "@HD\tVN:1.5\tSO:unsorted\tGO:query\n@PG\tID:1\tPN:MALT\tCL:%s\tDS:BlastX\n@RG\tID:1\tPL:unknown\tSM:unknown\n@CO\tBlastX-like alignments\n" +
"@CO\tReporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length, ZF: frame, ZS: query start DNA coordinate\n";
/*
0 QNAME String
1 FLAG Int
2 RNAME String
3 POS Int
4 MAPQ Int
5 CIGAR String
6 RNEXT String
7 PNEXT Int
8 TLEN Int
9 SEQ String
10 QUAL String Regexp/Range [!-?A-~]{1,255} [0,216 -1] \*|[!-()+-<>-~][!-~]* [0,229 -1][0,28 -1] \*|([0-9]+[MIDNSHPX=])+ \*|=|[!-()+-<>-~][!-~]* [0,229 -1] [-229 +1,229 -1] \*|[A-Za-z=.]+ [!-~]+
11 additional stuff including score and MD
*/
/**
* creates a SAM line. If queryHeader==null, does not output the initial query token
*
* @param mode
* @param queryHeader
* @param queryStart
* @param queryStartBlastX
* @param queryEnd
* @param queryLength
* @param alignedQuery
* @param referenceHeader
* @param referenceStart
* @param referenceEnd
* @param alignedReference
* @param referenceLength
* @param bitScore
* @param rawScore
* @param expected
* @param percentIdentity
* @param frame
* @param softClipped
* @return
*/
public static String createSAMLine(final BlastMode mode, final byte[] queryHeader, final byte[] querySequence, final int queryStart, final int queryStartBlastX, final int queryEnd, final int queryLength, final byte[] alignedQuery,
final byte[] referenceHeader, final int referenceStart, final int referenceEnd, final byte[] alignedReference, final int referenceLength,
final double bitScore, final int rawScore, final double expected, final float percentIdentity, int frame, final byte[] qualityValues, boolean softClipped) {
if (querySequence == null && softClipped)
softClipped = false;
final StringBuilder buffer = new StringBuilder();
// QNAME:
boolean first = true;
if (queryHeader != null) {
for (byte a : queryHeader) {
if (first && a == '>') {
first = false;
continue;
}
if (a == 0 || Character.isSpaceChar(a))
break;
buffer.append((char) a);
}
buffer.append('\t');
}
// FLAG
final boolean reverseComplemented = ((queryStart < queryEnd) != (referenceStart < referenceEnd));
final int queryOffset;
switch (mode) {
case BlastN:
if (reverseComplemented) {
queryOffset = queryLength - queryEnd;
buffer.append(0x10); // SEQ is reverse complemented
} else {
queryOffset = queryStart;
buffer.append(0);
}
break;
case BlastX:
if (reverseComplemented)
buffer.append(0x10); // SEQ is reverse complemented
else
buffer.append(0);
queryOffset = 0; // will explicitly save query start and query end
break;
default:
case BlastP:
queryOffset = queryStart;
buffer.append(0);
}
buffer.append('\t');
// RNAME:
first = true;
for (byte a : referenceHeader) {
if (first && a == '>') {
first = false;
continue;
}
if (a == 0 || Character.isSpaceChar(a))
break;
buffer.append((char) a);
}
buffer.append('\t');
// POS:
int pos = Math.min(referenceStart, referenceEnd);
buffer.append(pos);
buffer.append('\t');
// MAPQ
buffer.append("255"); // unknown
buffer.append('\t');
// CIGAR
appendCigar(alignedQuery, queryOffset, queryLength, alignedReference, reverseComplemented, softClipped, buffer);
buffer.append('\t');
// RNEXT
buffer.append("*"); // unknown
buffer.append('\t');
// PNEXT
buffer.append("0"); // unknown
buffer.append('\t');
// TLEN
buffer.append("0");
buffer.append('\t');
// SEQ
if (softClipped && querySequence != null) {
if (reverseComplemented) {
for (int i = queryLength - 1; i >= 0; i--) {
buffer.append((char) DNA5.getInstance().getBaseComplement(querySequence[i]));
}
} else {
for (int i = 0; i < queryLength; i++)
buffer.append((char) querySequence[i]);
}
} else {
if (reverseComplemented) {
for (int i = alignedQuery.length - 1; i >= 0; i--) {
byte a = alignedQuery[i];
if (a != '-')
buffer.append((char) DNA5.getInstance().getBaseComplement(a));
}
} else {
for (byte a : alignedQuery) {
if (a != '-')
buffer.append((char) a);
}
}
}
buffer.append('\t');
// QUAL
if (qualityValues == null)
buffer.append("*");
else {
if (softClipped) {
if (reverseComplemented) {
for (int i = queryLength - 1; i >= 0; i--)
buffer.append((char) qualityValues[i]);
} else {
for (int i = 0; i < queryLength; i++)
buffer.append((char) qualityValues[i]);
}
} else {
if (reverseComplemented) {
for (int i = queryStart; i < queryEnd; i++)
buffer.append((char) qualityValues[queryLength - (i + 1)]);
} else {
for (int i = queryStart; i < queryEnd; i++)
buffer.append((char) qualityValues[i]);
}
}
}
buffer.append('\t');
// optional stuff:
buffer.append(String.format("AS:i:%d\t", (int) Math.round(bitScore)));
buffer.append(String.format("NM:i:%d\t", computeEditDistance(alignedQuery, alignedReference)));
buffer.append(String.format("ZL:i:%d\t", referenceLength));
buffer.append(String.format("ZR:i:%d\t", rawScore));
buffer.append(String.format("ZE:f:%g\t", (float) expected));
buffer.append(String.format("ZI:i:%d\t", (int) Math.round(percentIdentity)));
if (mode == BlastMode.BlastX) {
buffer.append(String.format("ZF:i:%d\t", frame));
buffer.append(String.format("ZS:i:%d\t", queryStartBlastX));
}
appendMDString(alignedQuery, alignedReference, reverseComplemented, buffer);
return buffer.toString();
}
/**
* append the cigar string
*
* @param alignedQuery
* @param queryOffset
* @param queryLength
* @param alignedReference
* @param reverseComplemented
* @param softClipped
* @param buffer
*/
private static void appendCigar(byte[] alignedQuery, int queryOffset, int queryLength, byte[] alignedReference, boolean reverseComplemented, boolean softClipped, StringBuilder buffer) {
int clip = (!reverseComplemented ? queryOffset : (queryLength - queryOffset - alignedQuery.length));
if (clip > 0) {
buffer.append(clip).append(softClipped ? "S" : "H");
}
if (reverseComplemented) {
char state = 'M'; // M in match, I insert, D deletion
int count = 0;
for (int i = alignedQuery.length - 1; i >= 0; i--) {
if (alignedQuery[i] == '-') {
if (state == 'D') {
count++;
} else if (count > 0) {
buffer.append(count).append(state);
state = 'D';
count = 1;
}
} else if (alignedReference[i] == '-') {
if (state == 'I') {
count++;
} else if (count > 0) {
buffer.append(count).append(state);
state = 'I';
count = 1;
}
} else { // match or mismatch
if (state == 'M') {
count++;
} else if (count > 0) {
buffer.append(count).append(state);
state = 'M';
count = 1;
}
}
}
if (count > 0) {
buffer.append(count).append(state);
}
} else {
char cigarState = 'M'; // M in match, D deletion, I insertion
int count = 0;
for (int i = 0; i < alignedQuery.length; i++) {
if (alignedQuery[i] == '-') {
if (cigarState == 'D') {
count++;
} else if (count > 0) {
buffer.append(count).append(cigarState);
cigarState = 'D';
count = 1;
}
} else if (alignedReference[i] == '-') {
if (cigarState == 'I') {
count++;
} else if (count > 0) {
buffer.append(count).append(cigarState);
cigarState = 'I';
count = 1;
}
} else { // match or mismatch
if (cigarState == 'M') {
count++;
} else if (count > 0) {
buffer.append(count).append(cigarState);
cigarState = 'M';
count = 1;
}
}
}
if (count > 0) {
buffer.append(count).append(cigarState);
}
}
clip = (reverseComplemented ? queryOffset : (queryLength - queryOffset - alignedQuery.length));
if (clip > 0) {
buffer.append(clip).append(softClipped ? "S" : "H");
}
}
/**
* append the MD string
*
* @param alignedQuery
* @param alignedReference
* @param reverseComplemented
* @param buffer
*/
private static void appendMDString(final byte[] alignedQuery, final byte[] alignedReference, final boolean reverseComplemented, final StringBuilder buffer) {
buffer.append("MD:Z:");
if (reverseComplemented) {
int countMatches = 0;
boolean inDeletion = false;
for (int i = alignedQuery.length - 1; i >= 0; i--) {
if (alignedQuery[i] == '-') { // gap in query
if (countMatches > 0) {
buffer.append(countMatches);
countMatches = 0;
}
if (!inDeletion) {
buffer.append("^");
inDeletion = true;
}
buffer.append((char) (DNA5.getInstance().getBaseComplement(alignedReference[i])));
} else if (alignedReference[i] != '-') { // match or mismatch
if (alignedQuery[i] == alignedReference[i]) {
countMatches++;
} else {
if (inDeletion)
buffer.append(0);
if (countMatches > 0) {
buffer.append(countMatches);
countMatches = 0;
}
buffer.append((char) (DNA5.getInstance().getBaseComplement(alignedReference[i])));
}
if (inDeletion)
inDeletion = false;
}
// else alignedReference[i] == '-': this has no effect
}
if (countMatches > 0)
buffer.append(countMatches);
else if (inDeletion)
buffer.append(0);
} else {
int countMatches = 0;
boolean inDeletion = false;
for (int i = 0; i < alignedQuery.length; i++) {
if (alignedQuery[i] == '-') { // gap in query
if (countMatches > 0) {
buffer.append(countMatches);
countMatches = 0;
}
if (!inDeletion) {
buffer.append("^");
inDeletion = true;
}
buffer.append((char) alignedReference[i]);
} else if (alignedReference[i] != '-') { // match or mismatch
if (alignedQuery[i] == alignedReference[i]) {
countMatches++;
} else {
if (inDeletion)
buffer.append("0");
if (countMatches > 0) {
buffer.append(countMatches);
countMatches = 0;
}
buffer.append((char) alignedReference[i]);
}
if (inDeletion)
inDeletion = false;
}
// else alignedReference[i] == '-': this has no effect
}
if (countMatches > 0)
buffer.append(countMatches);
else if (inDeletion)
buffer.append(0);
}
}
/**
* compute edit distance from alignment
*
* @param alignedQuery
* @param alignedReference
* @return edit distance
*/
private static int computeEditDistance(byte[] alignedQuery, byte[] alignedReference) {
int distance = 0;
for (int i = 0; i < alignedQuery.length; i++) {
if (alignedQuery[i] == '-' || alignedReference[i] == '-' || alignedQuery[i] != alignedReference[i])
distance++;
}
return distance;
}
/**
* gets the SAM header line
*
* @param mode
* @return SAM header line or null
*/
public static String getSAMHeader(BlastMode mode, String commandLine) {
switch (mode) {
case BlastN:
return String.format(FILE_HEADER_BLASTN_TEMPLATE, (commandLine != null ? commandLine : ""));
case BlastP:
return String.format(FILE_HEADER_BLASTP_TEMPLATE, (commandLine != null ? commandLine : ""));
case BlastX:
return String.format(FILE_HEADER_BLASTX_TEMPLATE, (commandLine != null ? commandLine : ""));
default:
return "???";
}
}
}
malt-0.5.2/src/malt/mapping/ 0000775 0000000 0000000 00000000000 14004551276 0015636 5 ustar 00root root 0000000 0000000 malt-0.5.2/src/malt/mapping/Mapping.java 0000664 0000000 0000000 00000016274 14004551276 0020106 0 ustar 00root root 0000000 0000000 /*
* Mapping.java Copyright (C) 2020. Daniel H. Huson
*
* (Some files contain contributions from other authors, who are then mentioned separately.)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
package malt.mapping;
import jloda.util.Basic;
import jloda.util.CanceledException;
import jloda.util.ProgressListener;
import jloda.util.ProgressPercentage;
import malt.data.ISequenceAccessor;
import malt.data.RefIndex2ClassId;
import megan.accessiondb.AccessAccessionMappingDatabase;
import megan.classification.Classification;
import megan.classification.IdParser;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
/**
* Maintains mapping from Reference indices to classification
* Daniel Huson, 2.2016
*/
public class Mapping extends RefIndex2ClassId {
private final static String version = "V1.1";
private final String fName;
/**
* construct a table
*
* @param maxIndex
*/
public Mapping(String fName, int maxIndex) {
super(maxIndex);
this.fName = fName;
}
/**
* compute the mapping for the given reference database
*
* @param referencesDB
* @param progress
*/
public static Mapping create(String fName, ISequenceAccessor referencesDB, IdParser classificationMapper, ProgressListener progress) throws IOException {
final Mapping mapping = new Mapping(fName, referencesDB.getNumberOfSequences());
final String tag = Classification.createShortTag(fName);
progress.setMaximum(referencesDB.getNumberOfSequences());
progress.setProgress(0);
for (int i = 0; i < referencesDB.getNumberOfSequences(); i++) {
String header = Basic.toString(referencesDB.getHeader(i));
int classId = classificationMapper.getIdFromHeaderLine(header);
if (classId != 0) {
mapping.put(i, classId);
referencesDB.extendHeader(i, tag, classId);
}
progress.incrementProgress();
}
if (progress instanceof ProgressPercentage)
progress.close();
return mapping;
}
/**
* compute the mapping for the given reference database
*
* @param referencesDB
* @param progress
*/
public static Map create(Collection namesToUse, ISequenceAccessor referencesDB, AccessAccessionMappingDatabase mappingDatabase, ProgressListener progress) throws IOException, SQLException {
final Collection cNames = mappingDatabase.getClassificationNames();
final int maxIndex = cNames.stream().mapToInt(name -> {
try {
return mappingDatabase.getClassificationIndex(name);
} catch (SQLException throwables) {
throwables.printStackTrace();
return 0;
}
}).max().orElse(-1);
final Map mappings = new HashMap<>();
final String[] cIndex2Name = new String[maxIndex + 1];
final String[] tags = new String[maxIndex + 1];
{
int c = 0;
for (String cName : cNames) {
final int index = mappingDatabase.getClassificationIndex(cName) - 2;
if (namesToUse.contains(cName)) {
mappings.put(cName, new Mapping(cName, referencesDB.getNumberOfSequences()));
cIndex2Name[index] = cName;
tags[c] = Classification.createShortTag(cName);
}
c++;
}
}
progress.setMaximum(referencesDB.getNumberOfSequences());
progress.setProgress(0);
final int chunkSize = 10000;
final String[] accessions = new String[chunkSize];
for (int offset = 0; offset < referencesDB.getNumberOfSequences(); offset += chunkSize) {
final int numberInChunk = Math.min(chunkSize, referencesDB.getNumberOfSequences() - offset * chunkSize);
for (int r = 0; r < numberInChunk; r++) {
accessions[r] = getFirstWordAccession(referencesDB.getHeader(offset + r));
}
final Map accession2ids = mappingDatabase.getValues(accessions, numberInChunk);
for (int r = 0; r < numberInChunk; r++) {
if (accessions[r].length() > 0) {
final int[] ids = accession2ids.get(accessions[r]);
if (ids != null) {
//System.err.println((offset+r)+" -> "+Basic.toString(referencesDB.getHeader(offset + r))+" -> "+accessions[r]);
for (int c = 0; c < cIndex2Name.length; c++) {
if (cIndex2Name[c] != null) {
final int index = ids[c];
if (index != 0) {
//System.err.println(cIndex2Name[c]+" -> "+index);
mappings.get(cIndex2Name[c]).put(offset + r, index);
referencesDB.extendHeader(c, tags[c], index);
}
}
}
}
}
}
progress.setProgress(offset + numberInChunk);
}
return mappings;
}
public static String getFirstWordAccession(byte[] bytes) {
final String aLine = Basic.toString(bytes);
int a = 0;
while (a < aLine.length()) {
if (aLine.charAt(a) == '>' || aLine.charAt(a) == '@' || Character.isWhitespace(aLine.charAt(a)))
a++;
else
break;
}
int b = a + 1;
while (b < aLine.length()) {
if (Character.isLetterOrDigit(aLine.charAt(b)) || aLine.charAt(b) == '_')
b++;
else
break;
}
if (b - a > 4) {
return aLine.substring(a, b);
} else
return "";
}
/**
* save to a stream and then close the stream
*
* @param file
* @throws IOException
*/
public void save(File file) throws IOException {
super.save(file, makeMagicNumber(fName));
}
/**
* construct from an existing file
*
* @param file
* @throws IOException
* @throws CanceledException
*/
public Mapping(String fName, File file) throws IOException, CanceledException {
super(file, makeMagicNumber(fName));
this.fName = fName;
}
private static byte[] makeMagicNumber(String fName) {
return ("MA" + fName + version).getBytes();
}
}
malt-0.5.2/src/malt/mapping/MappingManager.java 0000664 0000000 0000000 00000007264 14004551276 0021400 0 ustar 00root root 0000000 0000000 /*
* MappingManager.java Copyright (C) 2020. Daniel H. Huson
*
* (Some files contain contributions from other authors, who are then mentioned separately.)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
package malt.mapping;
import jloda.util.Basic;
import jloda.util.CanceledException;
import megan.classification.Classification;
import megan.classification.ClassificationManager;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
/**
* manages MALT mapping files
* Daniel Huson, 2.2016
*/
public class MappingManager {
private static String[] cNames;
private static int taxonomyIndex;
private static Mapping[] mappings;
/**
* load all mappings
*
* @param cNames
* @param indexDirectory
* @throws IOException
* @throws CanceledException
*/
public static void loadMappings(String[] cNames, String indexDirectory) throws IOException, CanceledException {
MappingManager.cNames = cNames;
mappings = new Mapping[cNames.length];
taxonomyIndex = -1;
for (int i = 0; i < cNames.length; i++) {
String cName = cNames[i];
if (cName.equals(Classification.Taxonomy))
taxonomyIndex = i;
String fileName = cName.toLowerCase() + ".idx";
ClassificationManager.ensureTreeIsLoaded(cName);
final File file = new File(indexDirectory, fileName);
if (file.exists())
mappings[i] = new Mapping(cName, file);
else
mappings[i] = null;
}
}
/**
* get all names of loaded mappings
*
* @return names
*/
public static String[] getCNames() {
return cNames;
}
/**
* gets the appopriate mapping for the given fID
*
* @param fID
* @return mapping
*/
public static Mapping getMapping(int fID) {
return mappings[fID];
}
/**
* gets the taxonomy mapping
*
* @return taxonomy mapping
*/
public static Mapping getTaxonomyMapping() {
if (taxonomyIndex >= 0)
return getMapping(taxonomyIndex);
else
return null;
}
/**
* determine all available classifications
*
* @param indexDirectory
* @return list of available classifications
*/
public static String[] determineAvailableMappings(String indexDirectory) {
File[] files = (new File(indexDirectory)).listFiles();
if (files != null) {
ArrayList cNames = new ArrayList<>(files.length);
for (File file : files) {
String name = file.getName();
if (name.endsWith(".tre")) {
name = Basic.replaceFileSuffix(name, "");
for (String cName : ClassificationManager.getAllSupportedClassifications()) {
if (cName.equalsIgnoreCase(name))
cNames.add(cName);
}
}
}
return cNames.toArray(new String[0]);
} else
return new String[0];
}
}
malt-0.5.2/src/malt/resources/ 0000775 0000000 0000000 00000000000 14004551276 0016215 5 ustar 00root root 0000000 0000000 malt-0.5.2/src/malt/resources/icons/ 0000775 0000000 0000000 00000000000 14004551276 0017330 5 ustar 00root root 0000000 0000000 malt-0.5.2/src/malt/resources/icons/malt-build.icns 0000664 0000000 0000000 00000024217 14004551276 0022246 0 ustar 00root root 0000000 0000000 icns (is32 ]tkW k f۳G
T _6 ( [+RL Wﵵ7>
_;nM2 `0 n ZL5 W
_ݯA װ bT 0t eS Rs aR`|*7zoJǟZE{sux颙iFؘJ}^CĚʩGvrzֲۨ栙ᣌPƢsnmxlc XEz5BbWERiKK}ZeNYJSrhTFXF__Rl?_TQrQgnW]YfeQMrVcSdSUfHtTy}CT`dGh<[GN[QDnPcHARCr_KIfYM~EpSvYExbs8mk il32 ˝ݢ53@0c2=>( L<