chip-seq/ 0000744 0227442 0026227 00000000000 13433535475 013071 5 ustar ambrosin gr-bucher chip-seq/chipcenter.c 0000744 0227442 0026227 00000062534 13351136302 015357 0 ustar ambrosin gr-bucher /*
chip_center.c
Tag centering Tool.
The program moves observed ChIP-tags to
estimate center-position of DNA fragments.
# Arguments:
# feature type, relative tag shift
Giovanna Ambrosini, ISREC, Giovanna.Ambrosini@isrec.ch
Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/*
#define DEBUG
*/
#define _GNU_SOURCE
#include
#include
#include
#include
#include
#include
#include "hashtable.h"
#ifdef DEBUG
#include
#endif
#include "version.h"
/*#define BUF_SIZE 4096 */
#define BUF_SIZE 8192
#define LINE_SIZE 1024
#define FT_MAX 64
#define SEQ_ID 32
#define POS_MAX 16
#define CNT_MAX 16
#define EXT_MAX 256
#define CHR_NB 18
#define AC_MAX 18
#define CHR_SIZE 10
typedef struct _options_t {
int help;
int debug;
char *dbPath;
int db;
int strand;
} options_t;
static options_t options;
typedef struct _feature_t {
char seq_id[SEQ_ID];
unsigned long *pos;
char **feature;
char strand;
int *cnt;
char **ext;
} feature_t, *feature_p_t;
feature_t ft_plus, ft_minus;
typedef struct _end_line_t {
char seq_id[SEQ_ID];
unsigned long pos;
char feature[5];
char strand;
int cnt;
} _end_line_t;
_end_line_t end_line;
static hash_table_t *size_table = NULL;
char *Feature = NULL;
char *newFeature = NULL;
int Shift = 0;
int ft_specs = 1; /* if = 0 Process all features */
int Coff = 1;
int ext_flag = 0;
int end_line_flag = 0;
unsigned long Len = 0; /* Total Sequence Length */
unsigned long Counts = 0; /* Total Target Counts */
int
process_size()
{
FILE *input;
int c;
char buf[LINE_SIZE];
char *chrSizeFile;
int cLen;
if (options.db) {
cLen = (int)strlen(options.dbPath) + 10;
if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) {
perror("process_ac: malloc");
exit(1);
}
strcpy(chrSizeFile, options.dbPath);
} else {
cLen = 16 + 10;
if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) {
perror("process_ac: malloc");
exit(1);
}
strcpy(chrSizeFile, "/local/db/genome");
}
strcat(chrSizeFile, "/chr_size");
input = fopen(chrSizeFile, "r");
if (input == NULL) {
fprintf(stderr, "Could not open file %s: %s(%d)\n",
chrSizeFile, strerror(errno), errno);
return 1;
}
do {
c = fgetc(input);
} while(c != '\n');
size_table = hash_table_new(MODE_COPY);
while (fgets(buf, LINE_SIZE, input) != NULL) {
char *s;
char chr_size[CHR_SIZE] = "";
char ncbi_ac[AC_MAX] = "";
int i = 0;
int ac_len = 0;
int size_len = 0;
/*int valid = 1;*/
s = buf;
/* Check line */
/* Get first character: if # skip line */
if (*s == '#')
continue;
/* Chromosome NCBI AC */
i = 0;
while (*s != 0 && !isspace(*s)) {
if (i >= AC_MAX) {
fprintf(stderr, "AC too long \"%s\" \n", s);
fclose(input);
exit(1);
}
ncbi_ac[i++] = *s++;
}
if (i < AC_MAX)
ncbi_ac[i] = 0;
ac_len = i + 1;
while (isspace(*s))
s++;
i = 0;
/* Chrom SIZE */
while (*s != 0 && !isspace(*s)) {
if (i >= CHR_SIZE) {
fprintf(stderr, "Size too long in %s\n", s);
fclose(input);
exit(1);
}
chr_size[i++] = *s++;
}
if (i < CHR_NB)
chr_size[i] = 0;
size_len = i + 1;
/* printf("adding key %s (len = %d) value %s (ac) (len = %d) to hash table\n", ncbi_ac, ac_len, chr_size, size_len); */
hash_table_add(size_table, ncbi_ac, (size_t)ac_len, chr_size, (size_t)size_len);
if (options.debug) {
char *csize = hash_table_lookup(size_table, ncbi_ac, (size_t)ac_len);
fprintf (stderr, " SIZE Hash table: %s (len = %d) -> %s (len = %d)\n", ncbi_ac, ac_len, csize, size_len);
}
}
return 0;
}
void
merge(unsigned int size1, unsigned int size2)
{
/* Merge the two sorted sub-lists and print the result*/
unsigned int j = 0;
unsigned int k = 0;
while (j < size1 && k < size2) {
if (ft_plus.pos[j] < ft_minus.pos[k]) {
if (ext_flag) {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]);
else
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]);
}
} else {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]);
else
printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]);
}
}
j++;
} else {
if (ext_flag) {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]);
else
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]);
}
} else {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]);
else
printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]);
}
}
k++;
}
}
while (j < size1) {
if (ext_flag) {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]);
else
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]);
}
} else {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]);
else
printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]);
}
}
j++;
}
while (k < size2) {
if (ext_flag) {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]);
else
printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]);
}
} else {
if (newFeature != NULL) {
printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]);
} else {
if (Feature != NULL)
printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]);
else
printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]);
}
}
k++;
}
if (end_line_flag)
printf("%s\t%s\t%lu\t%c\t%d\n",end_line.seq_id, end_line.feature, end_line.pos, end_line.strand, end_line.cnt);
}
int
process_sga(FILE *input)
{
char seq_id_prev[SEQ_ID] = "";
int pos, cnt, last_pos = 0;
int first = 1;
size_t mLen1 = BUF_SIZE;
size_t mLen2 = BUF_SIZE;
unsigned int k = 0;
unsigned int j = 0;
char *s, *res, *buf;
size_t mLen = LINE_SIZE;
int ac_len = 0;
char *csize = NULL;
int chr_size = 0;
if ((ft_plus.pos = (unsigned long*)calloc(mLen1, sizeof(unsigned long))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
if ((ft_minus.pos = (unsigned long*)calloc(mLen2, sizeof(unsigned long))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
if ((ft_plus.cnt = (int*)calloc(mLen1, sizeof(int))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
if ((ft_minus.cnt = (int*)calloc(mLen2, sizeof(int))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
if (Feature == NULL) {
if ((ft_plus.feature = (char**)calloc(mLen1, sizeof(*(ft_plus.feature)))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
if ((ft_minus.feature = (char**)calloc(mLen2, sizeof(*(ft_minus.feature)))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
}
if ((s = malloc(mLen * sizeof(char))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
#ifdef DEBUG
int c = 1;
#endif
if (options.strand) {
ft_minus.strand = '0';
ft_plus.strand = '0';
} else {
ft_minus.strand = '-';
ft_plus.strand = '+';
}
/*
while (fscanf(f,"%s %s %d %c %d", seq_id, ft, &pos, &strand, &cnt) != EOF) {
*/
while ((res = fgets(s, (int) mLen, input)) != NULL) {
size_t cLen = strlen(s);
char seq_id[SEQ_ID] = "";
char ft[FT_MAX] = "";
char position[POS_MAX] = "";
char count[CNT_MAX] = "";
char strand = '\0';
char ext[EXT_MAX];
unsigned int i = 0;
memset(ext, 0, (size_t)EXT_MAX);
while (cLen + 1 == mLen && s[cLen - 1] != '\n') {
mLen *= 2;
if ((s = realloc(s, mLen)) == NULL) {
perror("process_file: realloc");
exit(1);
}
res = fgets(s + cLen, (int) (mLen - cLen), input);
cLen = strlen(s);
}
if (s[cLen - 1] == '\n')
s[cLen - 1] = 0;
buf = s;
/* Get SGA fields */
/* SEQ ID */
while (*buf != 0 && !isspace(*buf)) {
if (i >= SEQ_ID) {
fprintf(stderr, "Seq ID is too long \"%s\" \n", buf);
exit(1);
}
seq_id[i++] = *buf++;
}
while (isspace(*buf))
buf++;
/* FEATURE */
i = 0;
while (*buf != 0 && !isspace(*buf)) {
if (i >= FT_MAX) {
fprintf(stderr, "Feature is too long \"%s\" \n", buf);
exit(1);
}
ft[i++] = *buf++;
}
while (isspace(*buf))
buf++;
/* Position */
i = 0;
while (isdigit(*buf)) {
if (i >= POS_MAX) {
fprintf(stderr, "Position is too large \"%s\" \n", buf);
exit(1);
}
position[i++] = *buf++;
}
position[i] = 0;
pos = atoi(position);
while (isspace(*buf))
buf++;
/* Strand */
strand = *buf++;
while (isspace(*buf))
buf++;
/* Counts */
i = 0;
while (isdigit(*buf) || *buf == '+' || *buf == '-') {
if (i >= CNT_MAX) {
fprintf(stderr, "Count is too large \"%s\" \n", buf);
exit(1);
}
count[i++] = *buf++;
}
count[i] = 0;
cnt = atoi(count);
while (isspace(*buf))
buf++;
/* SGA Extension */
i = 0;
while (*buf != 0) {
if (i >= EXT_MAX) {
fprintf(stderr, "Extension is too long \"%s\" \n", buf);
exit(1);
}
ext[i++] = *buf++;
ext_flag = 1;
}
#ifdef DEBUG
printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext);
#endif
if (ext_flag && first) {
if ((ft_plus.ext = (char**)calloc(mLen1, sizeof(*(ft_plus.ext)))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
if ((ft_minus.ext = (char**)calloc(mLen2, sizeof(*(ft_minus.ext)))) == NULL) {
perror("process_sga: malloc");
exit(1);
}
first = 0;
}
if (j >= mLen1 - 1) {
mLen1 *= 2;
#ifdef DEBUG
fprintf(stderr, "reallocating memory for ft_plus (j=%d, size=%d)\n", j, (int)mLen1);
#endif
if ((ft_plus.pos = (unsigned long*)realloc(ft_plus.pos, mLen1 * sizeof(unsigned long))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
if ((ft_plus.cnt = (int*)realloc(ft_plus.cnt, mLen1 * sizeof(int))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
if (ext_flag) {
if ((ft_plus.ext = (char**)realloc(ft_plus.ext, mLen1 * sizeof(*(ft_minus.ext)))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
}
if (Feature == NULL) {
if ((ft_plus.feature = (char**)realloc(ft_plus.feature, mLen1 * sizeof(*(ft_minus.feature)))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
}
}
if (k >= mLen2 - 1) {
mLen2 *= 2;
#ifdef DEBUG
fprintf(stderr, "reallocating memory for ft_minus (k=%d, size=%d)\n", k, (int)mLen2);
#endif
if ((ft_minus.pos = (unsigned long*)realloc(ft_minus.pos, mLen2 * sizeof(unsigned long))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
if ((ft_minus.cnt = (int*)realloc(ft_minus.cnt, mLen2 * sizeof(int))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
if (ext_flag) {
if ((ft_minus.ext = (char**)realloc(ft_minus.ext, mLen2 * sizeof(*(ft_minus.ext)))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
}
if (Feature == NULL) {
if ((ft_minus.feature = (char**)realloc(ft_minus.feature, mLen2 * sizeof(*(ft_minus.feature)))) == NULL) {
perror("process_sga: realloc");
exit(1);
}
}
}
/* Check Chromosome BEGINNING and Merge tag lists of previous Chrom */
if (strcmp(seq_id, seq_id_prev) != 0) {
/* Get Chromosome size */
ac_len = (int)strlen(seq_id) + 1;
//printf ("Chr ID: SeqID %s, len %d\n", seq_id, ac_len);
csize = hash_table_lookup(size_table, seq_id, (size_t)ac_len);
//printf ("Chr SIZE: csize %s\n", csize);
if (csize != NULL) {
chr_size = (int) atoi(csize);
} else {
chr_size = 0;
}
merge(j, k);
strcpy(seq_id_prev, seq_id);
if (Feature == NULL) {
for (i = 0; i < j; i++) {
if (ft_plus.feature[i] != NULL)
free(ft_plus.feature[i]);
}
for (i = 0; i < k; i++) {
if (ft_minus.feature[i] != NULL)
free(ft_minus.feature[i]);
}
}
if (ext_flag) {
for (i = 0; i < j; i++) {
if (ft_plus.ext[i] != NULL)
free(ft_plus.ext[i]);
}
for (i = 0; i < k; i++) {
if (ft_minus.ext[i] != NULL)
free(ft_minus.ext[i]);
}
}
Len += last_pos;
j = 0;
k = 0;
}
/* Check tag positions at Chromosome END and store END line */
if (strcmp(ft, "END") == 0 && (j > 0 || k > 0)) {
/* Check if last positions in ft_plus array go beyond END position */
while ((j > 0) && (ft_plus.pos[j-1] > (unsigned int)pos)) {
if (Feature != NULL)
fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (END=%d)\n", ft_plus.seq_id, Feature, ft_plus.pos[j-1], '+', ft_plus.cnt[j-1], pos);
else
fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (END=%d)\n", ft_plus.seq_id, ft_plus.feature[j-1], ft_plus.pos[j-1], '+', ft_plus.cnt[j-1], pos);
/* skip tag */
j--;
}
/* Check if last positions in ft_minus array go beyond END position */
while (((unsigned int)k > 0) && (ft_minus.pos[k-1] > (unsigned int)pos)) {
if (Feature != NULL)
fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (Chrom END=%d)\n", ft_minus.seq_id, Feature, ft_minus.pos[k-1], '-', ft_minus.cnt[k-1], pos);
else
fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (Chrom END=%d)\n", ft_minus.seq_id, ft_minus.feature[k-1], ft_minus.pos[k-1], '-', ft_minus.cnt[k-1], pos);
/* skip tag */
k--;
}
/* Add END Line */
strcpy(end_line.seq_id, seq_id);
strcpy(end_line.feature, "END");
end_line.pos = (unsigned long)pos;
end_line.strand = '0';
end_line.cnt = 1;
end_line_flag = 1;
} /* Chromosome END */
if (!ft_specs) { /* Process all features */
if (strand == '+') {
/* Check Chromosome Boundaries */
if ((pos + Shift <= 0) || (pos + Shift > chr_size)) {
fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chron size = %d)\n", seq_id, ft, pos, '+', cnt, pos + Shift, chr_size);
continue;
}
strcpy(ft_plus.seq_id, seq_id);
ft_plus.feature[j] = malloc(strlen(ft) + 1);
strcpy(ft_plus.feature[j], ft);
if (ext_flag) {
ft_plus.ext[j] = malloc(strlen(ext) + 1);
strcpy(ft_plus.ext[j], ext);
}
ft_plus.pos[j] = pos + Shift;
if (cnt > Coff)
ft_plus.cnt[j] = Coff;
else
ft_plus.cnt[j] = cnt;
Counts += ft_plus.cnt[j];
j++;
}
if (strand == '-') {
/* Check Chromosome Boundaries */
if ((pos - Shift <= 0) || (pos - Shift > chr_size)) {
fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chrom size = %d)\n", seq_id, ft, pos, '-', cnt, pos - Shift, chr_size);
continue;
}
strcpy(ft_minus.seq_id, seq_id);
ft_minus.feature[k] = malloc(strlen(ft) + 1);
strcpy(ft_minus.feature[k], ft);
if (ext_flag) {
ft_minus.ext[k] = malloc(strlen(ext) + 1);
strcpy(ft_minus.ext[k], ext);
}
ft_minus.pos[k] = pos - Shift;
if (cnt > Coff)
ft_minus.cnt[k] = Coff;
else
ft_minus.cnt[k] = cnt;
Counts += ft_minus.cnt[k];
k++;
}
} else {
if (strcmp(ft, Feature) == 0 && strand == '+') {
/* Check Chromosome Boundary */
if ((pos + Shift <= 0) || (pos + Shift > chr_size)) {
fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chron size = %d)\n", seq_id, ft, pos, '+', cnt, pos + Shift, chr_size);
continue;
}
strcpy(ft_plus.seq_id, seq_id);
if (ext_flag) {
ft_plus.ext[j] = malloc(strlen(ext) + 1);
strcpy(ft_plus.ext[j], ext);
}
ft_plus.pos[j] = pos + Shift;
if (cnt > Coff)
ft_plus.cnt[j] = Coff;
else
ft_plus.cnt[j] = cnt;
Counts += ft_plus.cnt[j];
j++;
}
if (strcmp(ft, Feature) == 0 && strand == '-') {
/* Check Chromosome Boundary */
if ((pos - Shift <= 0) || (pos - Shift > chr_size)) {
fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chrom size = %d)\n", seq_id, ft, pos, '-', cnt, pos - Shift, chr_size);
continue;
}
strcpy(ft_minus.seq_id, seq_id);
if (ext_flag) {
ft_minus.ext[k] = malloc(strlen(ext) + 1);
strcpy(ft_minus.ext[k], ext);
}
ft_minus.pos[k] = pos - Shift;
if (cnt > Coff)
ft_minus.cnt[k] = Coff;
else
ft_minus.cnt[k] = cnt;
Counts += ft_minus.cnt[k];
k++;
}
}
last_pos = pos;
} /* End of While */
free(s);
/* Merge tag lists from last Chromosome */
Len += last_pos;
merge(j, k);
fprintf (stderr, "Total Tag Counts : %lu , Total Sequence Len : %lu\n", Counts, Len);
if (Feature == NULL) {
for (unsigned int i = 0; i < j; i++) {
if (ft_plus.feature[i] != NULL)
free(ft_plus.feature[i]);
}
for (unsigned int i = 0; i < k; i++) {
if (ft_minus.feature[i] != NULL)
free(ft_minus.feature[i]);
}
free(ft_plus.feature);
free(ft_minus.feature);
}
if (ext_flag) {
for (unsigned int i = 0; i < j; i++) {
if (ft_plus.ext[i] != NULL)
free(ft_plus.ext[i]);
}
for (unsigned int i = 0; i < k; i++) {
if (ft_minus.ext[i] != NULL)
free(ft_minus.ext[i]);
}
free(ft_plus.ext);
free(ft_minus.ext);
}
free(ft_plus.pos);
free(ft_plus.cnt);
free(ft_minus.pos);
free(ft_minus.cnt);
if (input != stdin) {
fclose(input);
}
return 0;
}
int
main(int argc, char *argv[])
{
#ifdef DEBUG
mcheck(NULL);
mtrace();
#endif
FILE *input;
while (1) {
int c = getopt(argc, argv, "f:dhi:zs:c:r:");
if (c == -1)
break;
switch (c) {
case 'c':
Coff = atoi(optarg);
break;
case 'd':
options.debug = 1;
break;
case 'f':
Feature = optarg;
break;
case 'h':
options.help = 1;
break;
case 'i':
options.dbPath = optarg;
options.db = 1;
break;
case 'r':
newFeature = optarg;
break;
case 's':
Shift = atoi(optarg);
break;
case 'z':
options.strand = 1;
break;
default:
printf ("?? getopt returned character code 0%o ??\n", c);
}
}
if (optind > argc || options.help == 1 || Shift == 0 || Coff < 0) {
fprintf(stderr, "Usage: %s [options] [-f ] -s [<] \n"
" - version %s\n"
" where options are:\n"
" \t\t -h Show this help text\n"
" \t\t -d Print debug information\n"
" \t\t -i Use to locate the chr_size file\n"
" \t\t [Default=/local/db/genome]\n"
" \t\t -z Set strand to zero\n"
" \t\t -c Count Cut-off (default is %d)\n"
" \t\t -r New feature name (for feature replacement)\n"
"\n\tFeature Centering Tool for ChIP-seq data analysis.\n"
"\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (), and\n"
"\tshifts (by ) ChIP-tag positions corresponding to feature to the estimated\n"
"\tcenter-positions of DNA fragments. If no feature specification is set, the program accepts\n"
"\tall lines of the input SGA. If -r is specified, the feature field is replaced\n"
"\twith the new string. The program checks whether, after shifting, the new genome\n"
"\tcoordinates are still within chromosome boundaries. Consequently, the file chr_size must be read.\n\n",
argv[0], VERSION, Coff);
return 1;
}
if (argc > optind) {
if(!strcmp(argv[optind],"-")) {
input = stdin;
} else {
input = fopen(argv[optind], "r");
if (NULL == input) {
fprintf(stderr, "Unable to open '%s': %s(%d)\n",
argv[optind], strerror(errno), errno);
exit(EXIT_FAILURE);
}
if (options.debug)
fprintf(stderr, "Processing file %s\n", argv[optind]);
}
} else {
input = stdin;
}
if (options.debug) {
fprintf(stderr, " Arguments:\n");
fprintf(stderr, " Selected Feature : %s\n", Feature);
fprintf(stderr, " Relative Shift : %d\n\n", Shift);
}
/* Check Feature Specs */
if (Feature == NULL) {
ft_specs = 0; /* Process all features */
} else {
char *s = Feature;
int i = 0;
while (*s != 0 && !isspace(*s++)) {
if (i >= FT_MAX) {
fprintf(stderr, "Feature Description too long \"%s\" \n", Feature);
return 1;
}
i++;
}
Feature[i] = '\0';
}
/* Check newFeature Specs */
if (newFeature != NULL) {
char *s = newFeature;
int i = 0;
while (*s != 0 && !isspace(*s++)) {
if (i >= FT_MAX) {
fprintf(stderr, "New Feature Name too long \"%s\" \n", newFeature);
return 1;
}
i++;
}
}
if (options.debug) {
if (!ft_specs) {
fprintf(stderr, "Feature Specs: ALL -> Process all features\n");
} else {
fprintf(stderr, "Feature Specs: Feature name : %s\n", Feature);
}
if (newFeature != NULL) {
fprintf(stderr, "Replace feature name with : %s\n", newFeature);
}
}
if (process_size() == 0) {
if (options.debug)
fprintf(stderr, " HASH Table for chromosome size initialized\n");
} else {
return 1;
}
if (process_sga(input) != 0) {
return 1;
}
return 0;
}
chip-seq/chipcenter.1.gz 0000744 0227442 0026227 00000001600 13046354254 015710 0 ustar ambrosin gr-bucher ؙX chipcenter.1 }UMo8Wt)Л I[mTC@S#XTI*Y;Cv~\lxșy~xr&s(ߍz<]tDPC+eQ7oM"sCezxVZ#+5nuSW/<5#TWʈ^+Vu!Fc16_B4NQB榹XAEAlXT;UW_@߱z
byF3yXl2z95-s,J) yŜ%_[AQsl!Pʂ-rљa݁