chip-seq/0000744022744200262270000000000013433535475013071 5ustar ambrosingr-bucherchip-seq/chipcenter.c0000744022744200262270000006253413351136302015357 0ustar ambrosingr-bucher/* chip_center.c Tag centering Tool. The program moves observed ChIP-tags to estimate center-position of DNA fragments. # Arguments: # feature type, relative tag shift Giovanna Ambrosini, ISREC, Giovanna.Ambrosini@isrec.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #include "hashtable.h" #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 #define CHR_NB 18 #define AC_MAX 18 #define CHR_SIZE 10 typedef struct _options_t { int help; int debug; char *dbPath; int db; int strand; } options_t; static options_t options; typedef struct _feature_t { char seq_id[SEQ_ID]; unsigned long *pos; char **feature; char strand; int *cnt; char **ext; } feature_t, *feature_p_t; feature_t ft_plus, ft_minus; typedef struct _end_line_t { char seq_id[SEQ_ID]; unsigned long pos; char feature[5]; char strand; int cnt; } _end_line_t; _end_line_t end_line; static hash_table_t *size_table = NULL; char *Feature = NULL; char *newFeature = NULL; int Shift = 0; int ft_specs = 1; /* if = 0 Process all features */ int Coff = 1; int ext_flag = 0; int end_line_flag = 0; unsigned long Len = 0; /* Total Sequence Length */ unsigned long Counts = 0; /* Total Target Counts */ int process_size() { FILE *input; int c; char buf[LINE_SIZE]; char *chrSizeFile; int cLen; if (options.db) { cLen = (int)strlen(options.dbPath) + 10; if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrSizeFile, options.dbPath); } else { cLen = 16 + 10; if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrSizeFile, "/local/db/genome"); } strcat(chrSizeFile, "/chr_size"); input = fopen(chrSizeFile, "r"); if (input == NULL) { fprintf(stderr, "Could not open file %s: %s(%d)\n", chrSizeFile, strerror(errno), errno); return 1; } do { c = fgetc(input); } while(c != '\n'); size_table = hash_table_new(MODE_COPY); while (fgets(buf, LINE_SIZE, input) != NULL) { char *s; char chr_size[CHR_SIZE] = ""; char ncbi_ac[AC_MAX] = ""; int i = 0; int ac_len = 0; int size_len = 0; /*int valid = 1;*/ s = buf; /* Check line */ /* Get first character: if # skip line */ if (*s == '#') continue; /* Chromosome NCBI AC */ i = 0; while (*s != 0 && !isspace(*s)) { if (i >= AC_MAX) { fprintf(stderr, "AC too long \"%s\" \n", s); fclose(input); exit(1); } ncbi_ac[i++] = *s++; } if (i < AC_MAX) ncbi_ac[i] = 0; ac_len = i + 1; while (isspace(*s)) s++; i = 0; /* Chrom SIZE */ while (*s != 0 && !isspace(*s)) { if (i >= CHR_SIZE) { fprintf(stderr, "Size too long in %s\n", s); fclose(input); exit(1); } chr_size[i++] = *s++; } if (i < CHR_NB) chr_size[i] = 0; size_len = i + 1; /* printf("adding key %s (len = %d) value %s (ac) (len = %d) to hash table\n", ncbi_ac, ac_len, chr_size, size_len); */ hash_table_add(size_table, ncbi_ac, (size_t)ac_len, chr_size, (size_t)size_len); if (options.debug) { char *csize = hash_table_lookup(size_table, ncbi_ac, (size_t)ac_len); fprintf (stderr, " SIZE Hash table: %s (len = %d) -> %s (len = %d)\n", ncbi_ac, ac_len, csize, size_len); } } return 0; } void merge(unsigned int size1, unsigned int size2) { /* Merge the two sorted sub-lists and print the result*/ unsigned int j = 0; unsigned int k = 0; while (j < size1 && k < size2) { if (ft_plus.pos[j] < ft_minus.pos[k]) { if (ext_flag) { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]); else printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]); } } else { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]); else printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]); } } j++; } else { if (ext_flag) { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]); else printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]); } } else { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]); else printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]); } } k++; } } while (j < size1) { if (ext_flag) { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]); else printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j], ft_plus.ext[j]); } } else { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, newFeature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, Feature, ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]); else printf("%s\t%s\t%lu\t%c\t%d\n",ft_plus.seq_id, ft_plus.feature[j], ft_plus.pos[j], ft_plus.strand, ft_plus.cnt[j]); } } j++; } while (k < size2) { if (ext_flag) { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]); else printf("%s\t%s\t%lu\t%c\t%d\t%s\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k], ft_minus.ext[k]); } } else { if (newFeature != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, newFeature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]); } else { if (Feature != NULL) printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, Feature, ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]); else printf("%s\t%s\t%lu\t%c\t%d\n",ft_minus.seq_id, ft_minus.feature[k], ft_minus.pos[k], ft_minus.strand, ft_minus.cnt[k]); } } k++; } if (end_line_flag) printf("%s\t%s\t%lu\t%c\t%d\n",end_line.seq_id, end_line.feature, end_line.pos, end_line.strand, end_line.cnt); } int process_sga(FILE *input) { char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; int first = 1; size_t mLen1 = BUF_SIZE; size_t mLen2 = BUF_SIZE; unsigned int k = 0; unsigned int j = 0; char *s, *res, *buf; size_t mLen = LINE_SIZE; int ac_len = 0; char *csize = NULL; int chr_size = 0; if ((ft_plus.pos = (unsigned long*)calloc(mLen1, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ft_minus.pos = (unsigned long*)calloc(mLen2, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ft_plus.cnt = (int*)calloc(mLen1, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ft_minus.cnt = (int*)calloc(mLen2, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if (Feature == NULL) { if ((ft_plus.feature = (char**)calloc(mLen1, sizeof(*(ft_plus.feature)))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ft_minus.feature = (char**)calloc(mLen2, sizeof(*(ft_minus.feature)))) == NULL) { perror("process_sga: malloc"); exit(1); } } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } #ifdef DEBUG int c = 1; #endif if (options.strand) { ft_minus.strand = '0'; ft_plus.strand = '0'; } else { ft_minus.strand = '-'; ft_plus.strand = '+'; } /* while (fscanf(f,"%s %s %d %c %d", seq_id, ft, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char ft[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } ft[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf) || *buf == '+' || *buf == '-') { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; ext_flag = 1; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext); #endif if (ext_flag && first) { if ((ft_plus.ext = (char**)calloc(mLen1, sizeof(*(ft_plus.ext)))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ft_minus.ext = (char**)calloc(mLen2, sizeof(*(ft_minus.ext)))) == NULL) { perror("process_sga: malloc"); exit(1); } first = 0; } if (j >= mLen1 - 1) { mLen1 *= 2; #ifdef DEBUG fprintf(stderr, "reallocating memory for ft_plus (j=%d, size=%d)\n", j, (int)mLen1); #endif if ((ft_plus.pos = (unsigned long*)realloc(ft_plus.pos, mLen1 * sizeof(unsigned long))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ft_plus.cnt = (int*)realloc(ft_plus.cnt, mLen1 * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if (ext_flag) { if ((ft_plus.ext = (char**)realloc(ft_plus.ext, mLen1 * sizeof(*(ft_minus.ext)))) == NULL) { perror("process_sga: realloc"); exit(1); } } if (Feature == NULL) { if ((ft_plus.feature = (char**)realloc(ft_plus.feature, mLen1 * sizeof(*(ft_minus.feature)))) == NULL) { perror("process_sga: realloc"); exit(1); } } } if (k >= mLen2 - 1) { mLen2 *= 2; #ifdef DEBUG fprintf(stderr, "reallocating memory for ft_minus (k=%d, size=%d)\n", k, (int)mLen2); #endif if ((ft_minus.pos = (unsigned long*)realloc(ft_minus.pos, mLen2 * sizeof(unsigned long))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ft_minus.cnt = (int*)realloc(ft_minus.cnt, mLen2 * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if (ext_flag) { if ((ft_minus.ext = (char**)realloc(ft_minus.ext, mLen2 * sizeof(*(ft_minus.ext)))) == NULL) { perror("process_sga: realloc"); exit(1); } } if (Feature == NULL) { if ((ft_minus.feature = (char**)realloc(ft_minus.feature, mLen2 * sizeof(*(ft_minus.feature)))) == NULL) { perror("process_sga: realloc"); exit(1); } } } /* Check Chromosome BEGINNING and Merge tag lists of previous Chrom */ if (strcmp(seq_id, seq_id_prev) != 0) { /* Get Chromosome size */ ac_len = (int)strlen(seq_id) + 1; //printf ("Chr ID: SeqID %s, len %d\n", seq_id, ac_len); csize = hash_table_lookup(size_table, seq_id, (size_t)ac_len); //printf ("Chr SIZE: csize %s\n", csize); if (csize != NULL) { chr_size = (int) atoi(csize); } else { chr_size = 0; } merge(j, k); strcpy(seq_id_prev, seq_id); if (Feature == NULL) { for (i = 0; i < j; i++) { if (ft_plus.feature[i] != NULL) free(ft_plus.feature[i]); } for (i = 0; i < k; i++) { if (ft_minus.feature[i] != NULL) free(ft_minus.feature[i]); } } if (ext_flag) { for (i = 0; i < j; i++) { if (ft_plus.ext[i] != NULL) free(ft_plus.ext[i]); } for (i = 0; i < k; i++) { if (ft_minus.ext[i] != NULL) free(ft_minus.ext[i]); } } Len += last_pos; j = 0; k = 0; } /* Check tag positions at Chromosome END and store END line */ if (strcmp(ft, "END") == 0 && (j > 0 || k > 0)) { /* Check if last positions in ft_plus array go beyond END position */ while ((j > 0) && (ft_plus.pos[j-1] > (unsigned int)pos)) { if (Feature != NULL) fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (END=%d)\n", ft_plus.seq_id, Feature, ft_plus.pos[j-1], '+', ft_plus.cnt[j-1], pos); else fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (END=%d)\n", ft_plus.seq_id, ft_plus.feature[j-1], ft_plus.pos[j-1], '+', ft_plus.cnt[j-1], pos); /* skip tag */ j--; } /* Check if last positions in ft_minus array go beyond END position */ while (((unsigned int)k > 0) && (ft_minus.pos[k-1] > (unsigned int)pos)) { if (Feature != NULL) fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (Chrom END=%d)\n", ft_minus.seq_id, Feature, ft_minus.pos[k-1], '-', ft_minus.cnt[k-1], pos); else fprintf (stderr, "WARNING: TAG: %s\t%s\t%lu\t%c\t%d\t goes beyond chrom bounds (Chrom END=%d)\n", ft_minus.seq_id, ft_minus.feature[k-1], ft_minus.pos[k-1], '-', ft_minus.cnt[k-1], pos); /* skip tag */ k--; } /* Add END Line */ strcpy(end_line.seq_id, seq_id); strcpy(end_line.feature, "END"); end_line.pos = (unsigned long)pos; end_line.strand = '0'; end_line.cnt = 1; end_line_flag = 1; } /* Chromosome END */ if (!ft_specs) { /* Process all features */ if (strand == '+') { /* Check Chromosome Boundaries */ if ((pos + Shift <= 0) || (pos + Shift > chr_size)) { fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chron size = %d)\n", seq_id, ft, pos, '+', cnt, pos + Shift, chr_size); continue; } strcpy(ft_plus.seq_id, seq_id); ft_plus.feature[j] = malloc(strlen(ft) + 1); strcpy(ft_plus.feature[j], ft); if (ext_flag) { ft_plus.ext[j] = malloc(strlen(ext) + 1); strcpy(ft_plus.ext[j], ext); } ft_plus.pos[j] = pos + Shift; if (cnt > Coff) ft_plus.cnt[j] = Coff; else ft_plus.cnt[j] = cnt; Counts += ft_plus.cnt[j]; j++; } if (strand == '-') { /* Check Chromosome Boundaries */ if ((pos - Shift <= 0) || (pos - Shift > chr_size)) { fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chrom size = %d)\n", seq_id, ft, pos, '-', cnt, pos - Shift, chr_size); continue; } strcpy(ft_minus.seq_id, seq_id); ft_minus.feature[k] = malloc(strlen(ft) + 1); strcpy(ft_minus.feature[k], ft); if (ext_flag) { ft_minus.ext[k] = malloc(strlen(ext) + 1); strcpy(ft_minus.ext[k], ext); } ft_minus.pos[k] = pos - Shift; if (cnt > Coff) ft_minus.cnt[k] = Coff; else ft_minus.cnt[k] = cnt; Counts += ft_minus.cnt[k]; k++; } } else { if (strcmp(ft, Feature) == 0 && strand == '+') { /* Check Chromosome Boundary */ if ((pos + Shift <= 0) || (pos + Shift > chr_size)) { fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chron size = %d)\n", seq_id, ft, pos, '+', cnt, pos + Shift, chr_size); continue; } strcpy(ft_plus.seq_id, seq_id); if (ext_flag) { ft_plus.ext[j] = malloc(strlen(ext) + 1); strcpy(ft_plus.ext[j], ext); } ft_plus.pos[j] = pos + Shift; if (cnt > Coff) ft_plus.cnt[j] = Coff; else ft_plus.cnt[j] = cnt; Counts += ft_plus.cnt[j]; j++; } if (strcmp(ft, Feature) == 0 && strand == '-') { /* Check Chromosome Boundary */ if ((pos - Shift <= 0) || (pos - Shift > chr_size)) { fprintf (stderr, "WARNING: TAG: %s\t%s\t%d\t%c\t%d\t goes beyond chromosome boundaries (pos after TAG Shift=%d, chrom size = %d)\n", seq_id, ft, pos, '-', cnt, pos - Shift, chr_size); continue; } strcpy(ft_minus.seq_id, seq_id); if (ext_flag) { ft_minus.ext[k] = malloc(strlen(ext) + 1); strcpy(ft_minus.ext[k], ext); } ft_minus.pos[k] = pos - Shift; if (cnt > Coff) ft_minus.cnt[k] = Coff; else ft_minus.cnt[k] = cnt; Counts += ft_minus.cnt[k]; k++; } } last_pos = pos; } /* End of While */ free(s); /* Merge tag lists from last Chromosome */ Len += last_pos; merge(j, k); fprintf (stderr, "Total Tag Counts : %lu , Total Sequence Len : %lu\n", Counts, Len); if (Feature == NULL) { for (unsigned int i = 0; i < j; i++) { if (ft_plus.feature[i] != NULL) free(ft_plus.feature[i]); } for (unsigned int i = 0; i < k; i++) { if (ft_minus.feature[i] != NULL) free(ft_minus.feature[i]); } free(ft_plus.feature); free(ft_minus.feature); } if (ext_flag) { for (unsigned int i = 0; i < j; i++) { if (ft_plus.ext[i] != NULL) free(ft_plus.ext[i]); } for (unsigned int i = 0; i < k; i++) { if (ft_minus.ext[i] != NULL) free(ft_minus.ext[i]); } free(ft_plus.ext); free(ft_minus.ext); } free(ft_plus.pos); free(ft_plus.cnt); free(ft_minus.pos); free(ft_minus.cnt); if (input != stdin) { fclose(input); } return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; while (1) { int c = getopt(argc, argv, "f:dhi:zs:c:r:"); if (c == -1) break; switch (c) { case 'c': Coff = atoi(optarg); break; case 'd': options.debug = 1; break; case 'f': Feature = optarg; break; case 'h': options.help = 1; break; case 'i': options.dbPath = optarg; options.db = 1; break; case 'r': newFeature = optarg; break; case 's': Shift = atoi(optarg); break; case 'z': options.strand = 1; break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1 || Shift == 0 || Coff < 0) { fprintf(stderr, "Usage: %s [options] [-f ] -s [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information\n" " \t\t -i Use to locate the chr_size file\n" " \t\t [Default=/local/db/genome]\n" " \t\t -z Set strand to zero\n" " \t\t -c Count Cut-off (default is %d)\n" " \t\t -r New feature name (for feature replacement)\n" "\n\tFeature Centering Tool for ChIP-seq data analysis.\n" "\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (), and\n" "\tshifts (by ) ChIP-tag positions corresponding to feature to the estimated\n" "\tcenter-positions of DNA fragments. If no feature specification is set, the program accepts\n" "\tall lines of the input SGA. If -r is specified, the feature field is replaced\n" "\twith the new string. The program checks whether, after shifting, the new genome\n" "\tcoordinates are still within chromosome boundaries. Consequently, the file chr_size must be read.\n\n", argv[0], VERSION, Coff); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Selected Feature : %s\n", Feature); fprintf(stderr, " Relative Shift : %d\n\n", Shift); } /* Check Feature Specs */ if (Feature == NULL) { ft_specs = 0; /* Process all features */ } else { char *s = Feature; int i = 0; while (*s != 0 && !isspace(*s++)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Description too long \"%s\" \n", Feature); return 1; } i++; } Feature[i] = '\0'; } /* Check newFeature Specs */ if (newFeature != NULL) { char *s = newFeature; int i = 0; while (*s != 0 && !isspace(*s++)) { if (i >= FT_MAX) { fprintf(stderr, "New Feature Name too long \"%s\" \n", newFeature); return 1; } i++; } } if (options.debug) { if (!ft_specs) { fprintf(stderr, "Feature Specs: ALL -> Process all features\n"); } else { fprintf(stderr, "Feature Specs: Feature name : %s\n", Feature); } if (newFeature != NULL) { fprintf(stderr, "Replace feature name with : %s\n", newFeature); } } if (process_size() == 0) { if (options.debug) fprintf(stderr, " HASH Table for chromosome size initialized\n"); } else { return 1; } if (process_sga(input) != 0) { return 1; } return 0; } chip-seq/chipcenter.1.gz0000744022744200262270000000160013046354254015710 0ustar ambrosingr-bucherؙXchipcenter.1}UMo8W t)Л I[mTC@S#XTI*Y;Cv~\lxșy~x r&s(ߍz<]•tDPC+eQ7oM"sCezxVZ#+ 5nuSW/<5#TWʈ^+Vu!Fc16_B4NQB榹XAEAlXT;UW_@߱z byF3yXl2z95-s,J) yŜ%_ [AQsl!Pʂ-rљa݁+QilXة~&@Hzl1eGO[ZRL^Bҫ'4fI,^TISĵA{cl]e0d x^\)Ҏl. */ //#define DEBUG #define _GNU_SOURCE #include #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int help; int debug; } options_t; static options_t options; typedef struct _feature_t { char seq_id[SEQ_ID]; char *ft; char ft_str; char **name; char *strand; int *pos; int *cnt; int *npo; int *nct; } feature_t, *feature_p_t; feature_t ref_ft; int strand_flag = 0; char *Feature = NULL; float Dthres = 0; int Tpen = 0; int Coff = 1; unsigned long TotLen = 0; unsigned long TotCnts = 0; int TotSeqs = 0; int NumSegs = 0; unsigned long TotFragLen = 0; unsigned long AvFragLen = 0; unsigned long AvCntsFrag = 0; unsigned long TotCntsFrag = 0; void split_seq(int len, int end, int last_seq) { /* Partitioning Algorithm */ /* X(i) partitioning ends in over-representation state Y(i) partitioning ends in under-representation state q(i) trace-back codes Recursion X(i) = max X(i-1) + cnt(i) - (pos(i)-pos(i-1))*Dthres, (XX) Y(i-1) + Tpen + cnt(i) - Dthres + (pos(i)-pos(i-1)-1)*Dthres, (YX) X(i-1) + 2*Tpen + cnt(i) - Dthres + (pos(i)-pos(i-1)-2)*Dthres (ZX) Y(i) = max X(i-1) + Tpen + (pos(i)-pos(i-1))*Dthres - cnt(i), (XY) Y(i-1) + (pos(i)-pos(i-1))*Dthres - cnt(i) (YY) */ int i, j, k = 1; size_t mLen1 = BUF_SIZE; size_t mLen2 = BUF_SIZE; size_t mLen3 = BUF_SIZE; float Xmax = 0; float Ymax = 0; float XX, YX, ZX, YY, XY; int *q; int **Reg; int *cnt; int seq_len = 0; int tot_cnts = 0; if (( q = (int*)calloc(mLen1, sizeof(int))) == NULL) { perror("split_seq: malloc"); exit(1); } if (( cnt = (int*)calloc(mLen2, sizeof(int))) == NULL) { perror("split_seq: malloc"); exit(1); } if (( Reg = (int**)calloc((size_t)2, sizeof(int*))) == NULL) { perror("split_seq: malloc"); exit(1); } for(i = 0; i < 2; i++) { Reg[i] = (int*)calloc(mLen3, sizeof(int)); } if (end == 0) { ref_ft.pos[len + 1] = ref_ft.pos[len]; ref_ft.name[len + 1] = ref_ft.name[len]; } else ref_ft.pos[len + 1] = end; ref_ft.cnt[len + 1] = 0; q[0] = 0; /* Fill in trace-back codes q[i] */ /* 0 Xmax = XX ; Ymax = YY 1 Xmax = ZX ; Ymax = YY 2 Xmax = YX ; Ymax = YY 3 Xmax = XX ; Ymax = XY 4 Xmax = ZX ; Ymax = XY 5 Xmax = YX ; Ymax = XY */ for (i = 1; i <= len + 1; i++) { XX = Xmax + ref_ft.cnt[i] - (ref_ft.pos[i] - ref_ft.pos[i - 1])*Dthres; YX = Ymax + Tpen + ref_ft.cnt[i] - Dthres + (ref_ft.pos[i] - ref_ft.pos[i - 1] - 1)*Dthres; ZX = Xmax + 2*Tpen + ref_ft.cnt[i] - Dthres + (ref_ft.pos[i] - ref_ft.pos[i - 1] - 1)*Dthres; YY = Ymax - ref_ft.cnt[i] + (ref_ft.pos[i] - ref_ft.pos[i - 1])*Dthres; XY = Xmax + Tpen - ref_ft.cnt[i] + (ref_ft.pos[i] - ref_ft.pos[i - 1])*Dthres; if ((unsigned int)i >= mLen1) { mLen1 *= 2; if (( q = (int *)realloc(q, mLen1 * sizeof(int))) == NULL) { perror("split_seq: realloc"); exit(1); } } q[i] = 0; if ((YX < XX) && (ZX < XX)) { Xmax = XX; } else if (YX < ZX) { Xmax = ZX; q[i] += 1; } else { Xmax = YX; q[i] += 2; } if (XY < YY) { Ymax = YY; } else { Ymax = XY; q[i] += 3; } } /* Trace-back */ k = 1; if (Xmax > Ymax) { Reg[0][k] = 0; Reg[1][k] = ref_ft.pos[len + 1]; } else { Reg[0][k] = 0; Reg[1][k] = 0; } for (i = len + 1; i > 0; i--) { if (Reg[1][k] == 0) { if (q[i] > 2) Reg[1][k] = ref_ft.pos[i - 1]; /* Enter a signal rich region (we are in a poor region) */ } else { if (q[i] > 2) q[i] -= 3; if ((unsigned int)k >= mLen2) { mLen2 *= 2; if ((cnt = (int*)realloc(cnt , mLen2 * sizeof(int))) == NULL) { perror("split_seq: realloc"); exit(1); } memset((void *)&cnt[k], 0, mLen2 * sizeof(int) / 2); } if (q[i] == 1) { /* Enter signal-rich region for the first time. We first close the previous one */ Reg[0][k] = ref_ft.pos[i]; cnt[k] += ref_ft.cnt[i]; k++; if ((unsigned int)k >= mLen3) { mLen3 *= 2; for(j = 0; j < 2; j++) { if ((Reg[j] = (int*)realloc(Reg[j], mLen3 * sizeof(int))) == NULL) { perror("split_seq: realloc"); exit(1); } } } Reg[0][k] = 0; Reg[1][k] = ref_ft.pos[i - 1]; } else if (q[i] == 2) { /* We are at the end of a signal-rich region we are closing it, without creating a new one (we're going into a signal-poor region) */ Reg[0][k] = ref_ft.pos[i]; cnt[k] += ref_ft.cnt[i]; k++; if ((unsigned int)k >= mLen3) { mLen3 *= 2; for(j = 0; j < 2; j++) { if ((Reg[j] = (int*)realloc(Reg[j], mLen3 * sizeof(int))) == NULL) { perror("split_seq: realloc"); exit(1); } } } Reg[0][k] = 0; Reg[1][k] = 0; } else { /* Extend signal-rich region */ cnt[k] += ref_ft.cnt[i]; } } #ifdef DEBUG fprintf (stderr, "%d, %d, %d, %d, %d\n", i, k, Reg[0][k], Reg[1][k], cnt[k]); #endif } /* Print out signal enriched regions */ if (Reg[1][k] == 0) k--; if (Reg[0][k] == 0) Reg[0][k] = 1; #ifdef DEBUG fprintf(stderr, "Printout number of Regions: k= %d\n", k); #endif for (i = k; i >= 1; i--) { /* Print out (SGA format) */ /* printf("%s\t%s\t%d\t%c\t%d\n", ref_ft.seq_id, ref_ft.name[i], Reg[0][i], '+', cnt[i]); printf("%s\t%s\t%d\t%c\t%d\n", ref_ft.seq_id, ref_ft.name[i], Reg[1][i], '-', cnt[i]); */ printf("%s\t%s\t%d\t%c\t%d\n", ref_ft.seq_id, ref_ft.name[i], Reg[0][i], '+', cnt[i]); printf("%s\t%s\t%d\t%c\t%d\n", ref_ft.seq_id, ref_ft.name[i], Reg[1][i], '-', cnt[i]); seq_len += Reg[1][i] - Reg[0][i]; tot_cnts += cnt[i]; } if (end != 0) printf("%s\t%s\t%d\t%c\t%d\n", ref_ft.seq_id, ref_ft.name[len + 1], ref_ft.pos[len + 1], '0', 1); if (k && (len != 0)) { /* fprintf(stderr,"# %s : Number of segments %d, Tot. length %d, Ave. length %d, Tot. counts %d\n", ref_ft.seq_id, k, seq_len, seq_len/k, tot_cnts); */ NumSegs += k; TotFragLen += seq_len; AvFragLen += seq_len/k; TotCntsFrag += tot_cnts; AvCntsFrag += tot_cnts/k; } if (last_seq) { fprintf(stderr,"# Num of Sequences : %d , Total Sequence Length : %lu (bp) , Total Counts : %lu , Total Num of Fragments : %d\n\n", TotSeqs, TotLen, TotCnts, NumSegs); fprintf(stderr,"# Total Fragment Length : %lu , Average Fragment Length : %lu (bp) , Percentage of Total Length : %2.5f\n\n", TotFragLen, AvFragLen/TotSeqs, (float)TotFragLen/(float)TotLen); fprintf(stderr,"# Percentage of Total Counts : %2.5f , Average Num of Counts per Fragment : %lu , Num of Counts per bp : %2.5f \n", (float)TotCntsFrag/(float)TotCnts, AvCntsFrag/TotSeqs, (float)TotCntsFrag/(float)TotFragLen); } free(q); free(cnt); for(j = 0; j < 2; j++) { if (Reg[j] != NULL) free(Reg[j]); } free(Reg); } int process_sga(FILE *input, char *iFile) { char seq_id_prev[SEQ_ID] = ""; int pos, cnt, end = 0, last_pos = 0; size_t mLen = BUF_SIZE; char *s, *res, *buf; size_t bLen = LINE_SIZE; unsigned int k = 0; unsigned int i = 0; int end_flag = 0; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.strand = (char*)calloc(mLen, sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.cnt = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.name = (char**)calloc(mLen, sizeof(*(ref_ft.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d", seq_id, ft, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char ft[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int j = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (j >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[j++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ j = 0; while (*buf != 0 && !isspace(*buf)) { if (j >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } ft[j++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ j = 0; while (isdigit(*buf)) { if (j >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[j++] = *buf++; } position[j] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ j = 0; while (isdigit(*buf)) { if (j >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[j++] = *buf++; } count[j] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ j = 0; while (*buf != 0) { if (j >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[j++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s (%c) Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext); #endif if (k >= mLen - 1) { mLen *= 2; #ifdef DEBUG fprintf(stderr, "reallocating memory for ref_ft.pos ref_ft.strand ref_ft.cnt (k=%d, size=%d)\n", c, mLen); #endif if ((ref_ft.pos = (int *)realloc(ref_ft.pos, mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.cnt = (int *)realloc(ref_ft.cnt, mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.strand = (char *)realloc(ref_ft.strand, mLen * sizeof(char))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.name = (char**)realloc(ref_ft.name, mLen * sizeof(*(ref_ft.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } /* Check Chromosome BEGINNING, split sequences in regions and printout results*/ if (strcmp(seq_id, seq_id_prev) != 0) { split_seq((int)k, end, 0); for (i = 1; i <= k; i++) { if (ref_ft.name[i] != NULL) free(ref_ft.name[i]); } if (end_flag) { if (ref_ft.name[k + 1] != NULL) free(ref_ft.name[k + 1]); } k = 0; end = 0; end_flag = 0; TotLen += last_pos; TotSeqs ++; ref_ft.pos[0] = 1; ref_ft.cnt[0] = 0; strcpy(seq_id_prev, seq_id); strcpy(ref_ft.seq_id, seq_id); } if (ref_ft.ft == NULL) { k++; ref_ft.name[k] = malloc(strlen(ft) + 1); strcpy(ref_ft.name[k], ft); ref_ft.strand[k] = strand; ref_ft.pos[k] = pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; TotCnts += ref_ft.cnt[k]; } else if (ref_ft.ft_str == '\0') { if (strcmp(ft, ref_ft.ft) == 0) { k++; ref_ft.name[k] = malloc(strlen(ft) + 1); strcpy(ref_ft.name[k], ft); ref_ft.strand[k] = strand; ref_ft.pos[k] = pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; TotCnts += ref_ft.cnt[k]; } } else if (strand_flag == 1) { if (strand == ref_ft.ft_str) { k++; ref_ft.name[k] = malloc(strlen(ft) + 1); strcpy(ref_ft.name[k], ft); ref_ft.strand[k] = strand; ref_ft.pos[k] = pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; TotCnts += ref_ft.cnt[k]; } } else if (strcmp(ft, ref_ft.ft) == 0 && strand == ref_ft.ft_str) { k++; ref_ft.name[k] = malloc(strlen(ft) + 1); strcpy(ref_ft.name[k], ft); ref_ft.strand[k] = strand; ref_ft.pos[k] = pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; TotCnts += ref_ft.cnt[k]; } if (strcmp(ft, "END") == 0 ) { ref_ft.name[k + 1] = malloc(strlen(ft) + 1); strcpy(ref_ft.name[k + 1], ft); end = pos; end_flag = 1; } last_pos = pos; #ifdef DEBUG fprintf(stderr,"k = [%d] pos = %d cnts = %d\n", k, ref_ft.pos[k], ref_ft.cnt[k]); #endif } /* End of While */ free(s); /* Partition last chromosome */ TotLen += last_pos; split_seq((int)k, end, 1); for (i = 1; i <= k; i++) { if (ref_ft.name[i] != NULL) free(ref_ft.name[i]); } if (end_flag) { if (ref_ft.name[k + 1] != NULL) free(ref_ft.name[k + 1]); } free(ref_ft.name); free(ref_ft.pos); free(ref_ft.cnt); free(ref_ft.strand); if (input != stdin) { fclose(input); } return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; while (1) { int c = getopt(argc, argv, "f:dhs:p:c:"); if (c == -1) break; switch (c) { case 'f': Feature = optarg; break; case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 's': Dthres = atof(optarg); break; case 'p': Tpen = atoi(optarg); break; case 'c': Coff = atoi(optarg); break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1 || Dthres == 0 || Tpen == 0 || Coff < 0) { fprintf(stderr, "Usage: %s [options] [-f ] -s -p [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information and check SGA file\n" " \t\t -c Count Cut-off (default is %d). It must be >= 0.\n" "\n\tPartitioning Tool.\n" "\n\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (),\n" "\tand heuristically partitions the data corresponding to a specific feature\n" "\t, if the latter is given.\n" "\tThe parameter is a name that corresponds to the second field of the SGA file.\n" "\tIt might optionally include the strand specification (+|-).\n" "\tIf no feature is given then all input tags are processed.\n" "\tThe SGA input file MUST BE sorted by sequence name (or chromosome id), position,\n" "\tand strand.\n" "\tOne should check the input SGA file with the following command:\n" "\tsort -s -c -k1,1 -k3,3n -k4,4 .\n\n" "\tIn debug mode (-d), the program performs the sorting order check.\n\n" "\tInput parameters are the density threshold (),\n" "\tthe transition penalty (), which are used to\n" "\tsplit the data in regions of high density count.\n" "\tA value can be optionally specified as a cut-off for the feature counts.\n" "\tThe output is an SGA-formatted list containing the regions of interest.\n" "\tThe program also generates (to stderr) a statistical report with the\n" "\tfollowing information:\n" "\t - Total number of processed sequences, total DNA length, and\n" "\t total number of fragments;\n" "\t - Total length of fragments, average fragment length, and\n" "\t percentage of total DNA length;\n" "\t - Percentage of total counts, average number of counts, and\n" "\t number of count per bp (count density).\n\n", argv[0], VERSION, Coff); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Selected Feature : %s\n", Feature); fprintf(stderr, " Density Threshold : %f\n\n", Dthres); fprintf(stderr, " Transition Penalty : %d\n\n", Tpen); fprintf(stderr, " Count Cut-off : %d\n\n", Coff); } /* Process Feature Specs */ if (Feature == NULL) { ref_ft.ft = NULL; /* Process all features */ ref_ft.ft_str = '\0'; } else { ref_ft.ft = malloc(FT_MAX * sizeof(char)); char *s = Feature; int i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Description too long \"%s\" \n", Feature); return 1; } ref_ft.ft[i++] = *s++; } ref_ft.ft[i] = '\0'; ref_ft.ft_str = '\0'; while (isspace(*s++)) ref_ft.ft_str = *s; } if (options.debug) { if (ref_ft.ft_str == '\0' && ref_ft.ft == NULL) { fprintf(stderr, "Feature Specs: ALL -> Process all features\n"); } else if (ref_ft.ft_str == '\0') { fprintf(stderr, "Feature Specs: Feature name : %s\n", ref_ft.ft); } else { fprintf(stderr, "Feature Specs: Feature name/str : %s %c\n", ref_ft.ft, ref_ft.ft_str); } } if ( ref_ft.ft != NULL && (strcmp(ref_ft.ft, "+") == 0 || strcmp(ref_ft.ft, "-") == 0)) { strcpy(&ref_ft.ft_str, ref_ft.ft); strand_flag = 1; if (options.debug) fprintf(stderr, "Feature Specs: Process all features on str : %c\n", ref_ft.ft_str); } if (process_sga(input, argv[optind++]) != 0) { return 1; } free(ref_ft.ft); return 0; } chip-seq/tools/0000744022744200262270000000000013433535237014225 5ustar ambrosingr-bucherchip-seq/tools/sga2gff.pl0000744022744200262270000002446711553014432016107 0ustar ambrosingr-bucher#!/usr/bin/perl # converts sga format to gff format # usage: ./sga2gff [-l taglen -x -e] file.sga use strict; use Getopt::Long; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] #my %opts; #getopt('lf:', \%opts); my %opt; my @options = ("help", "h", "taglen=i", "l=i", "x", "ext", "e", "db=s"); my $file = ""; my $taglen = 0; my $expand = 0; my $ext_flag = 0; #my $DB = "/home/local/db/genome/"; my $DB = "/db/genome/"; if( ! GetOptions( \%opt, @options ) ) { &Usage(); } &Usage() if defined($opt{'help'}) || defined($opt{'h'}); &Usage() if $#ARGV < 0; #define options if ($opt{'db'} ne '') { $DB = $opt{'db'}; } open FH, $DB."chro_idx.nstorage" or die "Wrong Chrom Id Storable file $DB.\"chro_idx.nstorage\": $!"; # hash defining chromosome from SV of chromosomes in current genome assemblies my $chr2SV = retrieve($DB."chro_idx.nstorage"); if ($opt{'l'} ne '') { $taglen = $opt{'l'}; } if ($opt{'taglen'} ne '') { $taglen = $opt{'taglen'}; } if ($opt{'x'} ne '') { $expand = 1; } if ($opt{'e'} ne '') { $ext_flag = 1; } if ($opt{'ext'} ne '') { $ext_flag = 1; } $file = $ARGV[0]; #print "SGA file : $file\n"; #print "Options: taglen : $taglen, Expand : $expand\n"; # open the SGA file open (my $SGA, "$file") || die "can't open $file : $!"; my $start; my $end; my $source = "ChIPSeq"; my $firstline = <$SGA>; my @f = split(/\t/,$firstline); chomp $f[0]; chomp $f[4]; if ($ext_flag) { chomp $f[5]; if (not defined($f[5])) { print STDERR "The extension option is incompatible with the format of your SGA file : it must have the 'description' field defined!/n"; exit(1); } } if ($f[0] =~ /^chr/) { print "##gff-version 3\n"; if ($f[3] eq '0') { $f[3] = '.'; $start=$f[2]; $end=$start; } if ($f[3] eq '+') { $start=$f[2]; $end=$start + $taglen; } elsif($f[3] eq '-') { $start=$f[2] - $taglen; $end=$f[2]; } if ($ext_flag) { if ($expand) { for (my $i = $f[4]; $i > 0; $i--) { print "$f[0]\t$f[1]\t$f[5]\t$start\t$end\t.\t$f[3]\t.\t1\n"; } } else { print "$f[0]\t$f[1]\t$f[5]\t$start\t$end\t$f[4]\t$f[3]\t.\t1\n"; } print_gff_1_ext($SGA); } else { if ($expand) { for (my $i = $f[4]; $i > 0; $i--) { print "$f[0]\t$source\t$f[1]\t$start\t$end\t.\t$f[3]\t.\t1\n"; } } else { print "$f[0]\t$source\t$f[1]\t$start\t$end\t$f[4]\t$f[3]\t.\t1\n"; } print_gff_1($SGA); } } elsif ($f[0] =~ /N[CT]_\S+\.\d+/ && exists($chr2SV->{$f[0]})) { print "##gff-version 3\n"; if ($f[3] eq '0') { $f[3] = '.'; $start=$f[2]; $end=$start; } if ($f[3] eq '+') { $start=$f[2]; $end=$start + $taglen; } elsif($f[3] eq '-') { $start=$f[2] - $taglen; $end=$f[2]; } if ($ext_flag) { if ($expand) { for (my $i = $f[4]; $i > 0; $i--) { print $chr2SV->{$f[0]},"\t$f[1]\t$f[5]\t$start\t$end\t.\t$f[3]\t.\t1\n"; } } else { print $chr2SV->{$f[0]},"\t$f[1]\t$f[5]\t$start\t$end\t$f[4]\t$f[3]\t.\t1\n"; } print_gff_2_ext($SGA); } else { if ($expand) { for (my $i = $f[4]; $i > 0; $i--) { print $chr2SV->{$f[0]},"\t$source\t$f[1]\t$start\t$end\t.\t$f[3]\t.\t1\n"; } } else { print $chr2SV->{$f[0]},"\t$source\t$f[1]\t$start\t$end\t$f[4]\t$f[3]\t.\t1\n"; } print_gff_2($SGA); } } else { print STDERR "Unrecognized sequence version $f[0] : please, check the chromosome identifier!\n"; exit(1); } close ($SGA); sub print_gff_1 { my ($fh) = @_; my $count = 1; if ($expand) { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } for (my $i = $ar[4]; $i > 0; $i--) { print "$ar[0]\t$source\t$ar[1]\t$start\t$end\t.\t$ar[3]\t.\t$count\n"; } } } else { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); # if (exists($chr2SV->{$species}->{$ar[0]})){ $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } print "$ar[0]\t$source\t$ar[1]\t$start\t$end\t$ar[4]\t$ar[3]\t.\t$count\n"; # } # else { # my $line = $count + 1; # print STDERR "line: $line - Sequence version $ar[0] not in current genome assembly!\n"; # } } } } sub print_gff_1_ext { my ($fh) = @_; my $count = 1; if ($expand) { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } for (my $i = $ar[4]; $i > 0; $i--) { print "$ar[0]\t$ar[1]\t$ar[5]\t$start\t$end\t.\t$ar[3]\t.\t$count\n"; } } } else { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); # if (exists($chr2SV->{$species}->{$ar[0]})){ $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } print "$ar[0]\t$ar[1]\t$ar[5]\t$start\t$end\t$ar[4]\t$ar[3]\t.\t$count\n"; # } # else { # my $line = $count + 1; # print STDERR "line: $line - Sequence version $ar[0] not in current genome assembly!\n"; # } } } } sub print_gff_2 { my ($fh) = @_; my $count = 1; if ($expand) { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } for (my $i = $ar[4]; $i > 0; $i--) { print $chr2SV->{$ar[0]},"\t$source\t$ar[1]\t$start\t$end\t.\t$ar[3]\t.\t$count\n"; } } } else { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); # if (exists($chr2SV->{$ar[0]})){ $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } print $chr2SV->{$ar[0]},"\t$source\t$ar[1]\t$start\t$end\t$ar[4]\t$ar[3]\t.\t$count\n"; # } # else { # my $line = $count + 1; # print STDERR "line: $line - Sequence version $ar[0] not in current genome assembly!\n"; # } } } } sub print_gff_2_ext { my ($fh) = @_; my $count = 1; if ($expand) { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } for (my $i = $ar[4]; $i > 0; $i--) { print $chr2SV->{$ar[0]},"\t$ar[1]\t$ar[5]\t$start\t$end\t.\t$ar[3]\t.\t$count\n"; } } } else { while(<$fh>){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); # if (exists($chr2SV->{$ar[0]})){ $count++; if ($ar[3] eq '0') { $ar[3] = '.'; $start=$ar[2]; $end=$start; } if ($ar[3] eq '+') { $start=$ar[2]; $end=$start + $taglen; } elsif($ar[3] eq '-') { $start=$ar[2] - $taglen; $end=$ar[2]; } print $chr2SV->{$ar[0]},"\t$ar[1]\t$ar[5]\t$start\t$end\t$ar[4]\t$ar[3]\t.\t$count\n"; # } # else { # my $line = $count + 1; # print STDERR "line: $line - Sequence version $ar[0] not in current genome assembly!\n"; # } } } } sub Usage { print STDERR <<"_USAGE_"; sga2gff.pl [options] where options are: -h|--help Show this stuff --db Use to locate Chrom Id Storable File 'chro_idx.nstorage' -l|--taglen Set Read length -x Expand SGA lines into multiple GFF lines -e|--ext Use SGA 6th field to set GFF feature field and store SGA feature into GFF source field _USAGE_ exit(1); } 1; chip-seq/tools/gff2sga.pl0000744022744200262270000004601712276126030016103 0ustar ambrosingr-bucher#!/usr/bin/perl # converts gff format to sga format # usage: ./gff2sga.pl [<-a feature> <-s species> <-c centered> <-u unoriented> <-x extended SGA>] use strict; use Getopt::Long; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] use Math::Round; #my %opts; #getopt('asf:', \%opts); # -a, -s, & -f take arg. Values in %opts, Hash keys will be the switch names my %opt; my @options = ( "help", "h", "species=s", "s=s", "feature=s", "f=s", "c", "u", "x", "db=s"); my $file = ""; my $species = ""; my $feature = ""; my $centered = 0; my $unoriented = 0; my $extended = 0; my $ext_sc = 0; my $score = 0; #my $DB = "/home/local/db/genome/"; my $DB = "/db/genome/"; if( ! GetOptions( \%opt, @options ) ) { &Usage(); } &Usage() if defined($opt{'help'}) || defined($opt{'h'}); &Usage() if $#ARGV < 0; if ($opt{'db'} ne '') { $DB = $opt{'db'}; } open FH, $DB."chro_idx.nstorage" or die "Wrong Chrom Id Storable file $DB.\"chro_idx.nstorage\": $!"; my $chr2SV = retrieve($DB."chro_idx.nstorage"); if ($opt{'f'} ne '') { $feature = $opt{'f'}; } if ($opt{'feature'} ne '') { $feature = $opt{'feature'}; } if ($opt{'s'} ne '') { $species = $opt{'s'}; } if ($opt{'species'} ne '') { $species = $opt{'species'}; } if ($opt{'c'} ne '') { $centered = 1; } if ($opt{'u'} ne '') { $unoriented = 1; } if ($opt{'x'} ne '') { $extended = 1; } $file = $ARGV[0]; #print "GFF file : $file\n"; #print "Options: feature : $feature, Species $species, Centered: $centered, Unoriented : $unoriented\n"; my $wrong_seqs = 0; # open the GFF file open (my $GFF3, "$file") || die "can't open $file : $!"; if ($centered) { if ($unoriented) { if ($extended) { print_peak_sga_ext($GFF3); } else { print_peak_sga($GFF3); } } else { if ($extended) { print_peak_sga_oriented_ext($GFF3); } else { print_peak_sga_oriented($GFF3); } } } else { if ($extended) { print_sga_ext($GFF3); } else { print_sga($GFF3); } } close ($GFF3); sub print_sga { my ($fh) = @_; if ($species ne '') { while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; ### skip comments next if (/^\#/); ### skip blank lines next if (/^\s*$/); ################### ## syntax checks ## ################### my @cols = split(/\t/); ### we should have at least 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; #$wrong_seqs++; next; } # $cols[5]=1; if (($cols[5] != int($cols[5])) || ($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } unless (exists($chr2SV->{$species}->{$cols[0]}) || $cols[0] =~ /N[CT]_\S+\.\d+/){ print STDERR "Chromosome $cols[0] without sequence accession, line $.\n"; next; } my $id = ""; if ($cols[0] =~ /N[CT]_\S+\.\d+/) { $id = $cols[0]; } else { $id = $chr2SV->{$species}->{$cols[0]}; } if ($feature ne '') { $cols[2] = $feature; } if ($cols[6] eq '+'){ if ($ext_sc) { print $id,"\t$cols[2]\t$cols[3]\t$cols[6]\t$cols[5]\t$score\n"; } else { print $id,"\t$cols[2]\t$cols[3]\t$cols[6]\t$cols[5]\n"; } } elsif ($cols[6] eq '-'){ if ($ext_sc) { print $id,"\t$cols[2]\t$cols[4]\t$cols[6]\t$cols[5]\t$score\n"; } else { print $id,"\t$cols[2]\t$cols[4]\t$cols[6]\t$cols[5]\n"; } } elsif ($cols[6] eq '.' || $cols[6] eq '?'){ if ($ext_sc) { print $id,"\t$cols[2]\t$cols[4]\t0\t1\t$score\n"; } else { print $id,"\t$cols[2]\t$cols[4]\t0\t1\n"; } } undef @cols; } } else { #if (species) not defined while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; ### skip comments next if (/^\#/); ### skip blank lines next if (/^\s*$/); ################### ## syntax checks ## ################### my @cols = split(/\t/); ### we should have 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; next; } if (($cols[5] != int($cols[5])) || ($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } if ($feature ne '') { $cols[2] = $feature; } if ($cols[6] eq '+'){ if ($ext_sc) { print $cols[0],"\t$cols[2]\t$cols[3]\t$cols[6]\t$cols[5]\t$score\n"; } else { print $cols[0],"\t$cols[2]\t$cols[3]\t$cols[6]\t$cols[5]\n"; } } elsif ($cols[6] eq '-'){ if ($ext_sc) { print $cols[0],"\t$cols[2]\t$cols[4]\t$cols[6]\t$cols[5]\t$score\n"; } else { print $cols[0],"\t$cols[2]\t$cols[4]\t$cols[6]\t$cols[5]\n"; } } elsif ($cols[6] eq '.' || $cols[6] eq '?'){ if ($ext_sc) { print $cols[0],"\t$cols[2]\t$cols[4]\t0\t1\t$score\n"; } else { print $cols[0],"\t$cols[2]\t$cols[4]\t0\t1\n"; } } undef @cols; } } } sub print_sga_ext { my ($fh) = @_; if ($species ne '') { while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; ### skip comments next if (/^\#/); ### skip blank lines next if (/^\s*$/); ################### ## syntax checks ## ################### my @cols = split(/\t/); ### we should have at least 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; #$wrong_seqs++; next; } # $cols[5]=1; if (($cols[5] != int($cols[5])) || ($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } unless (exists($chr2SV->{$species}->{$cols[0]}) || $cols[0] =~ /N[CT]_\S+\.\d+/){ print STDERR "Chromosome $cols[0] without sequence accession, line $.\n"; next; } my $id = ""; if ($cols[0] =~ /N[CT]_\S+\.\d+/) { $id = $cols[0]; } else { $id = $chr2SV->{$species}->{$cols[0]}; } if ($feature ne '') { $cols[1] = $feature; } if ($cols[6] eq '+'){ print $id,"\t$cols[1]\t$cols[3]\t$cols[6]\t$cols[5]\t$cols[2] $score\n"; } elsif ($cols[6] eq '-'){ print $id,"\t$cols[1]\t$cols[4]\t$cols[6]\t$cols[5]\t$cols[2] $score\n"; } elsif ($cols[6] eq '.' || $cols[6] eq '?'){ print $id,"\t$cols[1]\t$cols[4]\t0\t1\t$cols[2] $score\n"; } undef @cols; } } else { #if (species) not defined while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; ### skip comments next if (/^\#/); ### skip blank lines next if (/^\s*$/); ################### ## syntax checks ## ################### my @cols = split(/\t/); ### we should have 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; next; } if (($cols[5] != int($cols[5])) || ($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } if ($feature ne '') { $cols[1] = $feature; } if ($cols[6] eq '+'){ print $cols[0],"\t$cols[1]\t$cols[3]\t$cols[6]\t$cols[5]\t$cols[2] $score\n"; } elsif ($cols[6] eq '-'){ print $cols[0],"\t$cols[1]\t$cols[4]\t$cols[6]\t$cols[5]\t$cols[2] $score\n"; } elsif ($cols[6] eq '.' || $cols[6] eq '?'){ print $cols[0],"\t$cols[1]\t$cols[4]\t0\t1\t$cols[2] $score\n"; } undef @cols; } } } sub print_peak_sga { my ($fh) = @_; if ($species ne '') { while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have at least 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; #$wrong_seqs++; next; } if (($cols[5] != int($cols[5])) || ($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } unless (exists($chr2SV->{$species}->{$cols[0]}) || $cols[0] =~ /N[CT]_\S+\.\d+/){ print STDERR "Chromosome $cols[0] without sequence accession, line $.\n"; next; } my $id = ""; if ($cols[0] =~ /N[CT]_\S+\.\d+/) { $id = $cols[0]; } else { $id = $chr2SV->{$species}->{$cols[0]}; } if ($feature ne '') { $cols[2] = $feature; } my $center = int(($cols[3]+$cols[4])/2); if ($ext_sc) { print $id,"\t$cols[2]\t$center\t0\t$cols[5]\t$score\n"; } else { print $id,"\t$cols[2]\t$center\t0\t$cols[5]\n"; } undef @cols; } } else { #if (species) not defined while(<$fh>){ chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; next; } if (($cols[5] != int($cols[5]))||($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } if ($feature ne '') { $cols[2] = $feature; } my $center = int(($cols[3]+$cols[4])/2); if ($ext_sc) { print "$cols[0]\t$cols[2]\t$center\t0\t$cols[5]\t$score\n"; } else { print "$cols[0]\t$cols[2]\t$center\t0\t$cols[5]\n"; } undef @cols; } } } sub print_peak_sga_ext { my ($fh) = @_; if ($species ne '') { while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have at least 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; #$wrong_seqs++; next; } if (($cols[5] != int($cols[5]))||($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } unless (exists($chr2SV->{$species}->{$cols[0]}) || $cols[0] =~ /N[CT]_\S+\.\d+/){ print STDERR "Chromosome $cols[0] without sequence accession, line $.\n"; next; } my $id = ""; if ($cols[0] =~ /N[CT]_\S+\.\d+/) { $id = $cols[0]; } else { $id = $chr2SV->{$species}->{$cols[0]}; } if ($feature ne '') { $cols[1] = $feature; } my $center = int(($cols[3]+$cols[4])/2); print $id,"\t$cols[1]\t$center\t0\t$cols[5]\t$cols[2] $score\n"; undef @cols; } } else { #if (species) not defined while(<$fh>){ chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; next; } if (($cols[5] != int($cols[5]))||($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } if ($feature ne '') { $cols[1] = $feature; } my $center = int(($cols[3]+$cols[4])/2); print "$cols[0]\t$cols[1]\t$center\t0\t$cols[5]\t$cols[2] $score\n"; undef @cols; } } } sub print_peak_sga_oriented { my ($fh) = @_; if ($species ne '') { while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have at least 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; #$wrong_seqs++; next; } if (($cols[5] != int($cols[5]))||($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } unless (exists($chr2SV->{$species}->{$cols[0]}) || $cols[0] =~ /N[CT]_\S+\.\d+/){ print STDERR "Chromosome $cols[0] without sequence accession, line $.\n"; next; } my $id = ""; if ($cols[0] =~ /N[CT]_\S+\.\d+/) { $id = $cols[0]; } else { $id = $chr2SV->{$species}->{$cols[0]}; } if ($feature ne '') { $cols[2] = $feature; } my $center = int(($cols[3]+$cols[4])/2); if ($ext_sc) { print $id,"\t$cols[2]\t$center\t$cols[6]\t$cols[5]\t$score\n"; } else { print $id,"\t$cols[2]\t$center\t$cols[6]\t$cols[5]\n"; } undef @cols; } } else { #if (species) not defined while(<$fh>){ chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; next; } if (($cols[5] != int($cols[5]))||($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } if ($feature ne '') { $cols[2] = $feature; } my $center = int(($cols[3]+$cols[4])/2); if ($ext_sc) { print "$cols[0]\t$cols[2]\t$center\t$cols[6]\t$cols[5]\t$score\n"; } else { print "$cols[0]\t$cols[2]\t$center\t$cols[6]\t$cols[5]\n"; } undef @cols; } } } sub print_peak_sga_oriented_ext { my ($fh) = @_; if ($species ne '') { while(<$fh>){ # chr2 NimbleScan 1859802:RajiIPH3/SJOIPH3:BLOCK1 132843487 132843531 -0.11 + . seq_id=18S_CHR2:132843487-132849513;probe_id=CHR0200P132843487;count=1 chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have at least 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; #$wrong_seqs++; next; } if (($cols[5] != int($cols[5]))||($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } unless (exists($chr2SV->{$species}->{$cols[0]}) || $cols[0] =~ /N[CT]_\S+\.\d+/){ print STDERR "Chromosome $cols[0] without sequence accession, line $.\n"; next; } my $id = ""; if ($cols[0] =~ /N[CT]_\S+\.\d+/) { $id = $cols[0]; } else { $id = $chr2SV->{$species}->{$cols[0]}; } if ($feature ne '') { $cols[1] = $feature; } my $center = int(($cols[3]+$cols[4])/2); print $id,"\t$cols[1]\t$center\t$cols[6]\t$cols[5]\t$cols[2] $score\n"; undef @cols; } } else { #if (species) not defined while(<$fh>){ chomp; next if (/^\#/); next if (/^\s*$/); my @cols = split(/\t/); ### we should have 8 columns. if (scalar(@cols) < 8) { print STDERR "Incorrect column count, line $.\n"; next; } if (($cols[5] != int($cols[5]))||($cols[5] eq '.') || ($cols[5] < 0) || ($ext_sc)) { $score = $cols[5]; $cols[5] = 1; $ext_sc = 1; } if ($feature ne '') { $cols[1] = $feature; } my $center = int(($cols[3]+$cols[4])/2); print "$cols[0]\t$cols[1]\t$center\t$cols[6]\t$cols[5]\t$cols[2] $score\n"; undef @cols; } } } sub Usage { print STDERR <<"_USAGE_"; gff2sga.pl [options] where options are: -h|--help Show this stuff --db Use to locate Chrom Id Storable File 'chro_idx.nstorage' -f|--feature Set Feature name -s|--species Assembly (i.e hg18) -c Generate a Centered SGA file -u Generate an Unoriented SGA file -x Generate an extended SGA file with the 6th field equal to the GFF 'feature' field, and the feature field equal to the GFF 'source' _USAGE_ exit(1); } 1; chip-seq/tools/check_bed.pl0000744022744200262270000000326012166546052016454 0ustar ambrosingr-bucher#!/usr/bin/perl # check BED file : check whether tag mapping on chromosomes is correct # usage: ./check_bed.pl <-s species>] -f use strict; use Getopt::Std; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] my %opts; getopt('sf:', \%opts); # -s, & -f take arg. Values in %opts, Hash keys will be the switch names my $DB = "/db/genome/"; #my $DB = "/home/local/db/genome/"; my $chr2SV = retrieve($DB."chro_idx.nstorage"); &Usage() unless ($opts{'f'} && $opts{'s'}); # open the BED file open(my $BED, "grep \'\^chr\' $opts{'f'}|") || die "can't open $opts{'f'} : $!"; #open(my $BED, "$opts{'f'}") || die "can't open $opts{'f'} : $!"; my $cnt = 0; my $err_cnt = 0; while(<$BED>) { chomp; my @f=split/\t/; my $chr_len = $chr2SV->{'length'}->{$chr2SV->{$opts{'s'}}->{$f[0]}}; $cnt++; #print "chrom: $f[0] length: $chr_len\n"; if ($f[1] > $chr_len or $f[2] > $chr_len) { print STDERR "Error line $cnt : position $f[1] exceeds chrom length $chr_len (chrom: $f[0] )!\n"; $err_cnt++; } } close($BED); if ($err_cnt) { print STDERR "\n\nWarning : Out of a total of $cnt BED lines, $err_cnt lines could not be mapped to the given genome assembly.\nPlease, check that the assembly you're using is the correct one.\n\n"; } else { print STDERR "\ncheck_bed: correct.\nTotal number of BED lines : ($cnt)\n\n"; } sub Usage { print STDERR <<"_USAGE_"; check_bed.pl -s -f Check whether tag positions on chromosomes are correctly mapped (e.g. positions do not exceed cromosome length). _USAGE_ exit(1); } 1; chip-seq/tools/sga2wig.c0000744022744200262270000007663713401230654015750 0ustar ambrosingr-bucher/* sga2wig.c Convert SGA file to Wiggle Track (WIG) format. There are two options for formatting wiggle data: - variableStep [0] - fixedStep [1] variableStep is for data with irregular intervals between new data points. It begins with a declaration line and is followed by two columns containing chromosome positions and data values: variableStep chrom=chrN [span=windowSize] chromStartA dataValueA chromStartB dataValueB ... etc ... ... etc ... fixedStep is for data with regular intervals between new data values. It begins with a declaration line and is followed by a single column of data values: fixedStep chrom=chrN start=position step=stepInterval [span=windowSize = stepInterval] dataValue1 dataValue2 ... etc ... (For more details on WIG format see https://genome.ucsc.edu/goldenPath/help/wiggle.html) # Arguments: # SGA File # Options: # Set Path to locate chr_NC_gi file # (used for NCBI id to chromosome name conversion) # Set WIG format option: variableStep[0]/fixedStep[1] (def=0) # Set Chromosome number (by def=0 i.e. take all chromosomes)) # Set chromosome start coordinate (def=-1, i.e. entire chromosome region) # Set chromosome end coordinate (def=-1, i.e. entire chromosome region) # Set count cut-off for SGA count field (def=99999) # Set normalisation factor for total raead counts within a step interval # - For fixedStep WIG format (def=0, i.e. take sum of total counts) # Set Wiggle span parameter (def=150) # Set Wiggle step parameter (fixedStep Format) (def=150 step=span) # SGA input file is a peak file, expand peak coordinates by span value # on both upstream and downstream directions # Set data viewing paramenter: autoscale (def=OFF) # Set data viewing paramenter: always0 (include zero) (def=OFF) # Set data viewing paramenter: wfunction function (def=mean+whiskers, maximum|mean|minimum) # Set data viewing paramenter: smoothing window (def=OFF[0], =0,2..16) # Set data viewing paramenter: visibility mode (def=full, dense|hide) # Set Wiggle track name (def=Custom-Wig) # Set Wiggle track description (def=ChIP-Seq) # Set Wiggle track color (def= 0,200,100) Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include "hashtable.h" #ifdef DEBUG #include #endif #include "version.h" #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define CHR_NB 18 #define AC_MAX 18 #define CHR_SIZE 10 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 #define FIELD_MAX 64 typedef struct _options_t { char *dbPath; int help; int debug; int db; int wigFormat; char *chrNb; char *chrName; int chrFlag; unsigned long chrStart; unsigned long chrEnd; int cutOff; int norm; int span; int step; int peakFlag; char *autoscale; char *always0; char *wfunction; char *smoothing; char *visibility; char *trackName; char *trackDesc; char *trackColor; } options_t; static options_t options; typedef struct _feature_t { char seq_id[SEQ_ID]; unsigned long *pos; int *cnt; } feature_t, *feature_p_t; static feature_t sga_ft; static hash_table_t *ac_table = NULL; static hash_table_t *size_table = NULL; unsigned long endPos = 0; unsigned long startPos = 0; int process_ac() { FILE *input; int c; char buf[LINE_SIZE]; char *chrFile; char chrom[12]; int cLen; if (options.db) { cLen = (int)strlen(options.dbPath) + 12; if ((chrFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrFile, options.dbPath); } else { cLen = 21 + 12; if ((chrFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrFile, "/home/local/db/genome"); } strcat(chrFile, "/chr_NC_gi"); input = fopen(chrFile, "r"); if (input == NULL) { fprintf(stderr, "Could not open file %s: %s(%d)\n", chrFile, strerror(errno), errno); return 1; } do { c = fgetc(input); } while(c != '\n'); ac_table = hash_table_new(MODE_COPY); while (fgets(buf, LINE_SIZE, input) != NULL) { char *s; char chr_nb[CHR_NB] = ""; char ncbi_ac[AC_MAX] = ""; int i = 0; int nb_len = 0; int ac_len = 0; /*int valid = 1;*/ s = buf; /* Check line */ /* Get first character: if # skip line */ if (*s == '#') continue; /* Chrom NB */ while (*s != 0 && !isspace(*s)) { if (i >= CHR_NB) { fprintf(stderr, "AC too long in %s\n", s); fclose(input); exit(1); } chr_nb[i++] = *s++; } if (i < CHR_NB) chr_nb[i] = 0; nb_len = i + 1; while (isspace(*s)) s++; /* Chromosome NCBI AC */ i = 0; while (*s != 0 && !isspace(*s)) { if (i >= AC_MAX) { fprintf(stderr, "AC too long \"%s\" \n", s); fclose(input); exit(1); } ncbi_ac[i++] = *s++; } if (i < AC_MAX) ncbi_ac[i] = 0; ac_len = i + 1; strcpy(chrom, "chr"); strcat(chrom, chr_nb); nb_len = (int)strlen(chrom) + 1; /* printf("adding key %s (len = %d) value %s (ac) (len = %d) to hash table\n", chrom, nb_len, ncbi_ac, ac_len); */ /* Store both NCBI identifier to chrom number and chrom number to chrom number keys */ hash_table_add(ac_table, ncbi_ac, (size_t)ac_len, chrom, (size_t)nb_len); if (options.debug) { char *cn = hash_table_lookup(ac_table, ncbi_ac, (size_t)ac_len); fprintf (stderr, " AC Hash table: %s (len = %d) -> %s (len = %d)\n", ncbi_ac, ac_len, cn, nb_len); } hash_table_add(ac_table, chrom, (size_t)nb_len, chrom, (size_t)nb_len); if (options.debug) { char *cn = hash_table_lookup(ac_table, chrom, (size_t)nb_len); fprintf (stderr, " AC Hash table: %s (len = %d) -> %s (len = %d)\n", chrom, nb_len, cn, nb_len); } } return 0; } int process_size() { FILE *input; int c; char buf[LINE_SIZE]; char *chrSizeFile; int cLen; if (options.db) { cLen = (int)strlen(options.dbPath) + 10; if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrSizeFile, options.dbPath); } else { cLen = 21 + 10; if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrSizeFile, "/home/local/db/genome"); } strcat(chrSizeFile, "/chr_size"); input = fopen(chrSizeFile, "r"); if (input == NULL) { fprintf(stderr, "Could not open file %s: %s(%d)\n", chrSizeFile, strerror(errno), errno); return 1; } do { c = fgetc(input); } while(c != '\n'); size_table = hash_table_new(MODE_COPY); while (fgets(buf, LINE_SIZE, input) != NULL) { char *s; char chr_size[CHR_SIZE] = ""; char ncbi_ac[AC_MAX] = ""; int i = 0; int ac_len = 0; int size_len = 0; /*int valid = 1;*/ s = buf; /* Check line */ /* Get first character: if # skip line */ if (*s == '#') continue; /* Chromosome NCBI AC */ i = 0; while (*s != 0 && !isspace(*s)) { if (i >= AC_MAX) { fprintf(stderr, "AC too long \"%s\" \n", s); fclose(input); exit(1); } ncbi_ac[i++] = *s++; } if (i < AC_MAX) ncbi_ac[i] = 0; ac_len = i + 1; while (isspace(*s)) s++; i = 0; /* Chrom SIZE */ while (*s != 0 && !isspace(*s)) { if (i >= CHR_SIZE) { fprintf(stderr, "Size too long in %s\n", s); fclose(input); exit(1); } chr_size[i++] = *s++; } if (i < CHR_NB) chr_size[i] = 0; size_len = i + 1; /* printf("adding key %s (len = %d) value %s (ac) (len = %d) to hash table\n", ncbi_ac, ac_len, chr_size, size_len); */ hash_table_add(size_table, ncbi_ac, (size_t)ac_len, chr_size, (size_t)size_len); if (options.debug) { char *csize = hash_table_lookup(size_table, ncbi_ac, (size_t)ac_len); fprintf (stderr, " SIZE Hash table: %s (len = %d) -> %s (len = %d)\n", ncbi_ac, ac_len, csize, size_len); } } return 0; } char** str_split(char* a_str, const char a_delim) { char** result = 0; size_t count = 0; char* tmp = a_str; char* last_comma = 0; char delim[2]; delim[0] = a_delim; delim[1] = 0; /* Count how many elements will be extracted. */ while (*tmp) { if (a_delim == *tmp) { count++; last_comma = tmp; } tmp++; } /* Add space for trailing token. */ count += last_comma < (a_str + strlen(a_str) - 1); /* Add space for terminating null string so caller knows where the list of returned strings ends. */ count++; result = malloc(sizeof(char*) *count); if (result) { size_t idx = 0; char* token = strtok(a_str, delim); while (token) { assert(idx < count); *(result + idx++) = strdup(token); token = strtok(0, delim); } assert(idx == count - 1); *(result + idx) = 0; } return result; } void write_wig_vs (unsigned long *pos, int *counts, unsigned int len, char *ac) { unsigned int i = 0; unsigned int k = 0; unsigned long *p = pos; int *cnt = counts; int ac_len = 0; if (endPos == 0) { return; } if (ac == NULL) return; ac_len = (int)strlen(ac) + 1; char *chr_nb = hash_table_lookup(ac_table, ac, (size_t)ac_len); if (chr_nb == NULL) return; printf ("variableStep chrom=%s span=%d\n", chr_nb, options.span); printf ("1 0\n"); int offset = 2; for (i = 0; i < len; i++) { if (options.peakFlag) { long new_pos = *p++ - options.span/2; if (new_pos <= 1){ new_pos = offset++; } printf("%ld %d\n", new_pos, *cnt++); } else { k = 1; /* allows data composed of contiguous runs of options.span bases the same data value to be specified more succinctly */ while((i + k < len) && (p[i + k] < p[i] + options.span)) { if (cnt[i] == cnt[i + k]) k++; else break; } //printf("%lu %d\n", *s++, *cnt++); printf("%lu %d\n", p[i], cnt[i]); if (k == options.span) i = i + k - 1; } } } void write_wig_fs (unsigned long *pos, int *counts, unsigned int len, char *ac) { unsigned int i = 0; unsigned int k = 0; unsigned long *s = pos; int *cnt = counts; int ac_len = 0; char *csize = NULL; unsigned long chr_size = 0; long sum = 0; if (endPos == 0) { return; } unsigned long end_loop = endPos - options.step; if (ac == NULL) return; ac_len = (int)strlen(ac) + 1; csize = hash_table_lookup(size_table, ac, (size_t)ac_len); if (csize == NULL) return; chr_size = (unsigned long) atoi(csize); char *chr_nb = hash_table_lookup(ac_table, ac, (size_t)ac_len); if (chr_nb == NULL) return; if (end_loop > chr_size) { end_loop = chr_size - options.span + 1; } printf ("fixedStep chrom=%s start=%lu step=%d span=%d\n", chr_nb, startPos, options.step, options.span); for (i = startPos; i <= end_loop; i += options.step) { while ( (k < len) && (pos[k] <= i + options.step) ){ sum += counts[k++]; } if (options.norm) { sum = (long)((sum * options.norm)/options.step); } printf("%ld\n", sum); sum = 0; } } int process_sga(FILE *input, char *iFile) { unsigned long start, end; unsigned long pos; int cnt; char *name; int first = 1; int wig_hdr = 1; char *s, *res, *buf; size_t bLen = LINE_SIZE; size_t sga_mLen = BUF_SIZE; char seq_id[SEQ_ID] = ""; char seq_id_prev[SEQ_ID] = ""; unsigned int k = 0; if (options.debug) fprintf(stderr, " Processing SGA file %s\n", iFile); if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } /* Malloc position and count arrays */ if ((sga_ft.pos = (unsigned long *)calloc(sga_mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((sga_ft.cnt = (int *)calloc(sga_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char ft[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; int i = 0; int id_len = 0; char *cn = NULL; memset(ext, 0, (size_t)EXT_MAX); memset(seq_id, 0, (size_t)SEQ_ID); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } ft[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf) || *buf == '-') { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s (%c) Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext); #endif //printf(" seq ID: %s Feat: %s (%c) Pos: %d Cnts: %d Ext: %s\n", seq_id, ft, strand, pos, cnt, ext); if (k > sga_mLen - 1) { sga_mLen *= 2; if ((sga_ft.pos = (unsigned long *)realloc(sga_ft.pos, sga_mLen * sizeof(unsigned long))) == NULL) { perror("process_sga: realloc pos array"); exit(1); } if ((sga_ft.cnt = (int *)realloc(sga_ft.cnt, sga_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc count array"); exit(1); } } if (strcmp(seq_id, seq_id_prev) != 0) { if (k > 0) endPos = sga_ft.pos[k - 1]; else endPos = 0; if (options.wigFormat) { write_wig_fs(sga_ft.pos, sga_ft.cnt, k, seq_id_prev); } else { write_wig_vs(sga_ft.pos, sga_ft.cnt, k, seq_id_prev); } strcpy(seq_id_prev, seq_id); k = 0; wig_hdr = 1; } /* Set WIG annotation track line */ if (first) { if (options.trackName == NULL) { if ((options.trackName = malloc((strlen(ft) + 1) * sizeof(char))) == NULL) { perror("process_sga: malloc Feature"); exit(1); } strcpy(options.trackName, ft); } if (options.chrName != NULL) { //printf("chr : %s start : %d end : %d\n", options.chrName, options.chrStart, options.chrEnd); if ((options.chrStart != -1) && (options.chrEnd != -1)) { printf("browser position %s:%d-%d\n", options.chrName, options.chrStart, options.chrEnd); } else if (options.chrStart != -1) { printf("browser position %s:%d-%d\n", options.chrName, options.chrStart, options.chrStart + 100000); } else { printf("browser position %s:%d-%d\n", options.chrName, 1, 100000); } } else { printf("browser full refGene\n"); } printf("track type=wiggle_0 name=\"%s\" description=\"%s\" visibility=%s color=%s autoScale=%s alwaysZero=%s maxHeightPixels=100:50:20 graphType=bar priority=30 windowingFunction=%s smoothingWindow=%s\n", options.trackName, options.trackDesc, options.visibility, options.trackColor, options.autoscale, options.always0, options.wfunction, options.smoothing); first = 0; } /* Get Chromosome name */ //printf("Seq ID: %s pos: %lu cnt: %d\n", seq_id, pos, cnt); id_len = (int)strlen(seq_id) + 1; cn = hash_table_lookup(ac_table, seq_id, (size_t)id_len); //printf("Chr name: %s\n", cn); if (options.chrFlag) { /* Chromosome name has been specified */ if ((options.chrStart != -1) && (options.chrEnd != -1)) { if (cn != NULL) { //printf ("chr: %s\tselected chr: %s\n", cn, options.chrName); if (strcmp(cn, options.chrName) == 0 && pos >= options.chrStart && pos <= options.chrEnd) { //printf("Adding chr: %s pos: %lu ([%lu-%lu])\n", cn, pos, options.chrStart, options.chrEnd); if (cnt > options.cutOff) sga_ft.cnt[k] = options.cutOff; else sga_ft.cnt[k] = cnt; sga_ft.pos[k] = pos; k++; if (wig_hdr) { startPos = options.chrStart; wig_hdr = 0; } } } } else if (options.chrStart != -1) { if (cn != NULL) { if (strcmp(cn, options.chrName) == 0 && pos >= options.chrStart) { if (cnt > options.cutOff) sga_ft.cnt[k] = options.cutOff; else sga_ft.cnt[k] = cnt; sga_ft.pos[k] = pos; k++; if (wig_hdr) { startPos = options.chrStart; wig_hdr = 0; } } } } else { if (cn != NULL) { if (strcmp(cn, options.chrName) == 0) { if (cnt > options.cutOff) sga_ft.cnt[k] = options.cutOff; else sga_ft.cnt[k] = cnt; sga_ft.pos[k] = pos; k++; if (wig_hdr) { startPos = pos; wig_hdr = 0; } } } } } else { /* Scan entire SGA */ if (cnt > options.cutOff) sga_ft.cnt[k] = options.cutOff; else sga_ft.cnt[k] = cnt; sga_ft.pos[k] = pos; k++; if (wig_hdr) { startPos = pos; wig_hdr = 0; } } } /* End of While */ if (input != stdin) { fclose(input); } /* The last time (at EOF) */ if (k > 0) endPos = sga_ft.pos[k - 1]; else return 0; //printf("The last time: SEQ_ID %s, endPos: %lu len: %d\n", seq_id, endPos, k); if (options.wigFormat) { write_wig_fs(sga_ft.pos, sga_ft.cnt, k, seq_id); } else { write_wig_vs(sga_ft.pos, sga_ft.cnt, k, seq_id); } free(sga_ft.pos); free(sga_ft.cnt); return 0; } int main(int argc, char *argv[]) { options.wigFormat = 0; options.peakFlag = 0; options.chrNb = NULL; options.chrName = NULL; options.chrFlag = 0; options.chrStart = -1; options.chrEnd = -1; options.cutOff = 99999; options.span = 150; options.step = 150; #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; int i = 0; int j = 0; int usage = 0; static struct option long_options[] = { /* These options may or may not set a flag. We distinguish them by their indices. */ {"debug", no_argument, 0, 'd'}, {"help", no_argument, 0, 'h'}, {"db", required_argument, 0, 'i'}, {"format", required_argument, 0, 'o'}, {"chrnb", required_argument, 0, 'n'}, {"start", required_argument, 0, 'b'}, {"end", required_argument, 0, 'e'}, {"coff", required_argument, 0, 'c'}, {"norm", required_argument, 0, 'f'}, {"span", required_argument, 0, 's'}, {"name", required_argument, 0, 0 }, {"desc", required_argument, 0, 0 }, {"color", required_argument, 0, 0 }, {"autoscale", required_argument, 0, 0 }, {"always0", required_argument, 0, 0 }, {"wfunction", required_argument, 0, 0 }, {"smoothing", required_argument, 0, 0 }, {"visibility", required_argument, 0, 0 }, /* These option only sets a flag. */ {"peakf", no_argument, &options.peakFlag, 1}, {0, 0, 0, 0} }; int option_index = 0; while (1) { int c = getopt_long(argc, argv, "dhi:o:n:b:e:c:f:s:", long_options, &option_index); if (c == -1) break; switch (c) { case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'i': options.dbPath = optarg; options.db = 1; break; case 'o': options.wigFormat = atoi(optarg); break; case 'n': options.chrNb = optarg; if (strcmp(options.chrNb, "0") != 0) { if ( (options.chrName = malloc((strlen(options.chrNb) + 4) * sizeof(char))) == NULL) { perror("process_sga: malloc Chr Name"); exit(1); } strcpy(options.chrName, "chr"); strcat(options.chrName, options.chrNb); options.chrFlag = 1; } break; case 'b': options.chrStart = atoi(optarg); break; case 'e': options.chrEnd = atoi(optarg); if (options.chrEnd != -1 && options.chrStart == -1) options.chrStart = 1; break; case 'c': options.cutOff = atoi(optarg); break; case 'f': options.norm = atoi(optarg); break; case 's': options.span = atoi(optarg); options.step = options.span; break; case 0: /* This option is to set the annotation track line */ if (strcmp(long_options[option_index].name, "name") == 0) { options.trackName = optarg; } if (strcmp(long_options[option_index].name, "desc") == 0) { options.trackDesc = optarg; } if (strcmp(long_options[option_index].name, "color") == 0) { options.trackColor = optarg; } if (strcmp(long_options[option_index].name, "autoscale") == 0) { options.autoscale = optarg; } if (strcmp(long_options[option_index].name, "always0") == 0) { options.always0 = optarg; } if (strcmp(long_options[option_index].name, "wfunction") == 0) { options.wfunction = optarg; } if (strcmp(long_options[option_index].name, "smoothing") == 0) { options.smoothing = optarg; } if (strcmp(long_options[option_index].name, "visibility") == 0) { options.visibility = optarg; } break; default: printf ("?? getopt returned character code 0%o ??\n", c); usage = 1; } } /* printf("optind: %d argc: %d\n", optind, argc); */ if (optind > argc || options.help == 1 || usage) { fprintf(stderr, "Usage: %s [options] [<] \n" " - version %s\n" " where options are:\n" " \t\t -d|--debug Produce Debug information\n" " \t\t -h|--help Show this Help text\n" " \t\t -i|--db Use to locate the chr_NC_gi and chr_size files\n" " \t\t [default is: $HOME/db/genome]\n" " \t\t -o|--format <0|1> Set Wiggle Track data format: variableStep[def=0]/fixedStep[1]\n" " \t\t -n|--chrnb Chromosome number [def: 0 (all chromosomes)]\n" " \t\t -b|--start Chromosome start [def: -1 (entire chromosome)]\n" " \t\t -e|--end Chromosome end [def: -1 (entire chromosome)]\n" " \t\t -c|--coff Count cut-off for the SGA input file [def=99999]\n" " \t\t -s|--span Wiggle Track Span(/stepInterval) parameter [def=%d]\n" " \t\t For fixedStep data format, it defines the step parameter\n" " \t\t -f|--norm Normalization factor for total tag counts within step intervals [def=0]\n" " \t\t This option is only valid for fixedStep data format\n" " \t\t --peakf Indicate that the The SGA input file represents a peak file\n" " \t\t [i.e. coordinates are peak centers]\n" " \t\t In such case, the span range begins upstream of [span=]%d bp\n" " \t\t chromosome position specified, and ends [span=]%d bp downstream\n" " \t\t --name Set name for track name field [def. name=SGA-feature]\n" " \t\t --desc Set track description field [def. desc=\"ChIP-Seq Custom data\"]\n" " \t\t --color Define the track color in comma-separated RGB values [def. 100,100,100]\n" " \t\t --autoscale Data viewing paramenter: set auto-scale to UCSC data view [def=OFF]\n" " \t\t --always0 Data viewing paramenter: always include zero [def=OFF]\n" " \t\t --wfunction Data viewing paramenter: windowing function [def=mean+whiskers|maximum|mean|minimum]\n" " \t\t --smoothing Data viewing paramenter: smoothing window [def=OFF[0], =0,2..16]\n" " \t\t --visibility Display mode: [def=full|dense|hide]\n" "\n\tConvert SGA format into Wiggle Track format (WIG).\n" "\tWIG format is line-oriented, and is composed of declaration lines and data lines.\n\n" "\t- variableStep is for data with irregular intervals between new data points.\n" "\t It begins with a declaration line and is followed by two columns containing chromosome positions and data values:\n\n" "\t variableStep chrom=chrN [span=windowSize]\n" "\t chromStartA dataValueA\n" "\t chromStartB dataValueB\n" "\t ... etc ... ... etc ...\n\n" "\t- fixedStep is for data with regular intervals between new data values.\n" "\t It begins with a declaration line and is followed by a single column of data values:\n\n" "\t fixedStep chrom=chrN start=position step=stepInterval [span=windowSize = stepInterval]\n" "\t dataValue1\n" "\t dataValue2\n" "\t ... etc ...\n\n", argv[0], VERSION, options.span, options.span, options.span); return 1; } /* printf("argc: %d optind: %d\n", argc, optind); */ if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.trackDesc == NULL) { if ((options.trackDesc = malloc(32 * sizeof(char))) == NULL) { perror("main: malloc trackDesc"); exit(1); } strcpy(options.trackDesc, "ChIP-Seq Custom data"); } if (options.trackColor == NULL) { if ((options.trackColor = malloc(12 * sizeof(char))) == NULL) { perror("main: malloc trackColor"); exit(1); } strcpy(options.trackColor, "0,200,100"); } if (options.autoscale == NULL) { if ((options.autoscale = malloc(4 * sizeof(char))) == NULL) { perror("main: malloc autoscale"); exit(1); } strcpy(options.autoscale, "off"); } if (options.always0 == NULL) { if ((options.always0 = malloc(4 * sizeof(char))) == NULL) { perror("main: malloc always0"); exit(1); } strcpy(options.always0, "off"); } if (options.wfunction == NULL) { if ((options.wfunction = malloc(5 * sizeof(char))) == NULL) { perror("main: malloc wfunction"); exit(1); } strcpy(options.wfunction, "mean"); } if (options.smoothing == NULL) { if ((options.smoothing = malloc(4 * sizeof(char))) == NULL) { perror("main: malloc smoothing"); exit(1); } strcpy(options.smoothing, "off"); } if (options.visibility == NULL) { if ((options.visibility = malloc(5 * sizeof(char))) == NULL) { perror("main: malloc visibility"); exit(1); } strcpy(options.visibility, "full"); } if (options.peakFlag) { options.span *= 2; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Wiggle data format : %d\n", options.wigFormat); fprintf(stderr, " Chromosome Nb : %s Name : %s\n", options.chrNb, options.chrName); fprintf(stderr, " Chromosome Start : %d\n", options.chrStart); fprintf(stderr, " Chromosome End : %d\n", options.chrEnd); fprintf(stderr, " Count cut-off : %d\n", options.cutOff); fprintf(stderr, " Normalisation factor for fixedStep WIG : %d\n", options.norm); fprintf(stderr, " WIG Span/Step Interval: %d\n", options.span); fprintf(stderr, " Peak Flag (for SGA input file): %d\n", options.peakFlag); fprintf(stderr, " Wiggle Track Display options:\n"); fprintf(stderr, " -----------------------------\n"); fprintf(stderr, " Track name: %s\n", options.trackName); fprintf(stderr, " Track description: %s\n", options.trackDesc); fprintf(stderr, " Track color: %s\n", options.trackColor); fprintf(stderr, " Autoscale: %s\n", options.autoscale); fprintf(stderr, " Always0: %s\n", options.always0); fprintf(stderr, " Wfunction: %s\n", options.wfunction); fprintf(stderr, " Smoothing: %s\n", options.smoothing); fprintf(stderr, " Visibility: %s\n", options.visibility); fprintf(stderr, "\n"); } if (!((optind == 1) && (optind == argc))) { if (process_ac() == 0) { if (options.debug) fprintf(stderr, " HASH Table for chromosome access identifier initialized\n"); } else { return 1; } if (process_size() == 0) { if (options.debug) fprintf(stderr, " HASH Table for chromosome size initialized\n"); } else { return 1; } } else { return 1; } if (process_sga(input, argv[optind++]) != 0) { return 1; } return 0; } chip-seq/tools/partit2gff.pl0000744022744200262270000000274511537713130016636 0ustar ambrosingr-bucher#!/usr/local/bin/perl # converts sga format from partitioning to gff format # usage: ./partit2gff file.sga use strict; &Usage() if $#ARGV < 0; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] # hash defining chromosome from SV of chromosomes in current genome assemblies my $chr2SV = retrieve("/db/genome/chro_idx.nstorage"); my @pos = (); my $count = 0; print "##gff-version 3\n"; while(<>){ #print $chr2AC{$chro},"\t$exp\t$pos\t+\t",$score_ref->{$chr2AC{$chro}}->{'+'}->{$pos},"\n"; # Partitioning algorithm reports positions of start and end of tag, #NC_000001.9 CTCF 227595 + 1 Start Line #NC_000001.9 CTCF 227633 - 1 End Line my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); # eventually escape offending characters? if ($ar[3] eq '+') { $pos[0] = $ar[2]; } elsif ($ar[3] eq '-') { $pos[1] = $ar[2]; $count++; if (exists($chr2SV->{$ar[0]})){ # ChrX . gene XXXX YYYY . + . ID=gene01;name=resA print $chr2SV->{$ar[0]},"\tChIPSeq\t$ar[1]\t$pos[0]\t$pos[1]\t$ar[4]\t+\t.\t$count\n"; } else { print $ar[0],"\tChIPSeq\t$ar[1]\t$pos[0]\t$pos[1]\t$ar[4]\t+\t.\t$count\n"; #print STDERR "Sequence version $ar[0] not in current genome assembly!\n"; } @pos = (); } } sub Usage { print STDERR <<"_USAGE_"; partit2gff.pl _USAGE_ exit(1); } 1; chip-seq/tools/bed2bed_display.c0000744022744200262270000004367413351220465017417 0ustar ambrosingr-bucher/* bed2bed_display.c Convert BED file to BED format suitable for displaying ChIP-seq peaks. # Arguments: # # BED File # Minimal BED score - 5th field (integer) [-a min] # Maximal BED score - 5th field (integer) [-b max] # # Options: # # Set Feature name [-f fname] # BED output formats: [-o 1|2] def=1 # # 1- BED track with score and strand fields -use score to visualize the track # the score value will determine the level of gray in which this feature is displayed # (higher numbers = darker gray). # # Track Parameters: # # ex: track name=STAT1Peaks description="Robertson 2007, STAT1 peaks" visibility=2 useScore=1 # # 2- BedGraph format # The bedGraph format allows display of continuous-valued data in track format. # chromA chromStartA chromEndA dataValueA # chromB chromStartB chromEndB dataValueB # # Parameters for bedGraph track definition lines: # # track type=bedGraph name=track_label description=center_label visibility=1|2|3 color=r,g,b graphType=bar|points Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include "hashtable.h" #ifdef DEBUG #include #endif #include "version.h" #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define CHR_NB 18 #define AC_MAX 18 #define POS_MAX 16 #define SCORE_MAX 12 #define NAME_MAX 128 #define FIELD_MAX 64 typedef struct _options_t { int help; int debug; char *feature; int oformat; char *autoscale; char *always0; char *wfunction; char *smoothing; char *visibility; char *trackName; char *trackDesc; char *trackColor; } options_t; static options_t options; int minScore = -1; int maxScore = -1; int rescale_score(int s) /* Rescale score range to [167,1000] */ { int a = 167; int b = 1000; int val; if (maxScore-minScore > 0) { val = ((b-a)*(s-minScore)/(maxScore-minScore)) + a; } else { val = s; } return(val); } int process_bed(FILE *input, char *iFile) { unsigned long start, end; int score; int new_score; int count = 1; char *s, *res, *buf; size_t bLen = LINE_SIZE; if (options.debug) fprintf(stderr, " Processing BED file %s\n", iFile); if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_bed: malloc"); exit(1); } /* Print Track Header */ if (options.oformat == 1) { printf("track name=\"%s\" description=\"%s\" visibility=%s useScore=1\n", options.trackName, options.trackDesc, options.visibility); } else if (options.oformat == 2) { printf("track type=bedGraph name=\"%s\" description=\"%s\" visibility=%s color=%s autoScale=%s alwaysZero=%s maxHeightPixels=100:50:20 graphType=bar priority=30 windowingFunction=%s smoothingWindow=%s\n", options.trackName, options.trackDesc, options.visibility, options.trackColor, options.autoscale, options.always0, options.wfunction, options.smoothing); } #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char bed_fld[12][FIELD_MAX]; char strand = '\0'; char field[32]; int i = 0; while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; else if (s[cLen - 1] == '\n' && s[cLen - 2] == '\r') s[cLen - 2] = 0; buf = s; /* printf ("BED line : %s\n", s); */ /* Check line */ /* Get first word/field */ if ( sscanf(s, "%31[^ ]", field) == 1 ) { if ( (strcmp(field, "track") == 0) || (strcmp(field, "browser") == 0) || (strcmp(field, "itemRgb") == 0) ) continue; } /* Get BED fields */ /* Chrom NB */ while (*buf != 0 && !isspace(*buf)) { if (i >= CHR_NB) { bed_fld[0][i] = 0; fprintf(stderr, "Chrom NB too long \"%s\" Skipping line...\n", bed_fld[0]); break; } bed_fld[0][i++] = *buf++; } if (i < AC_MAX) bed_fld[0][i] = 0; else continue; while (isspace(*buf)) buf++; /* Start Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Start position too large \"%s\" \n", buf); exit(1); } bed_fld[1][i++] = *buf++; } bed_fld[1][i] = 0; start = (unsigned long)atoi(bed_fld[1]); while (isspace(*buf)) buf++; /* End Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "End position too large \"%s\" \n", buf); exit(1); } bed_fld[2][i++] = *buf++; } bed_fld[2][i] = 0; end = (unsigned long)atoi(bed_fld[2]); while (isspace(*buf)) buf++; /* Name/Feature */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field Name too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[3][i++] = *buf++; } bed_fld[3][i] = 0; while (isspace(*buf)) buf++; /* Score */ i = 0; while (isdigit(*buf) || *buf == '+' || *buf == '-' || *buf == '.') { if (i >= SCORE_MAX) { fprintf(stderr, "Score too large \"%s\" \n", buf); exit(1); } bed_fld[4][i++] = *buf++; } bed_fld[4][i] = 0; score = atoi(bed_fld[4]); while (isspace(*buf)) buf++; /* Field 6/Strand */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 6 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[5][i++] = *buf++; } bed_fld[5][i] = 0; while (isspace(*buf)) buf++; /* Field 7 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 7 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[6][i++] = *buf++; } bed_fld[6][i] = 0; while (isspace(*buf)) buf++; /* Field 8 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 8 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[7][i++] = *buf++; } bed_fld[7][i] = 0; while (isspace(*buf)) buf++; /* Field 9 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 9 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[8][i++] = *buf++; } bed_fld[8][i] = 0; while (isspace(*buf)) buf++; /* Field 10 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 10 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[9][i++] = *buf++; } bed_fld[9][i] = 0; while (isspace(*buf)) buf++; /* Field 11 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 11 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[10][i++] = *buf++; } bed_fld[10][i] = 0; while (isspace(*buf)) buf++; /* Field 12 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 12 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[11][i++] = *buf++; } bed_fld[11][i] = 0; while (isspace(*buf)) buf++; #ifdef DEBUG printf(" [%d] Chr nb: %s Start: %lu End: %lu Name: %s Score: %d Strand: %c Field #7 %s Field #8 %s Field #9 %s Field #10 %s Field #11 %s Field #12 %s\n", c++, bed_fld[0], start, end, bed_fld[3], score, bed_fld[5], bed_fld[6], bed_fld[7], bed_fld[8], bed_fld[9], bed_fld[10], bed_fld[11]); #endif /* Set Strand field */ if (bed_fld[5][0] == '+' || bed_fld[5][0] == '-') strand = bed_fld[5][0]; else strand = '.'; /* Set Feature/Name field */ if (options.feature == NULL) { if (bed_fld[3][0] != '\0') { options.feature = malloc((strlen(bed_fld[3])+1) * sizeof(char)); strcpy(options.feature, bed_fld[3]); } } /* Print out BED line */ if (options.oformat == 1) { /* Rescale Score */ new_score = rescale_score(score); printf("%s\t%lu\t%lu\t%s%d\t%d\t%c\n", bed_fld[0], start, end, options.feature, count, new_score, strand); count++; } else if (options.oformat == 2) { printf("%s\t%lu\t%lu\t%d\n", bed_fld[0], start, end, score); } } /* End of While */ if (input != stdin) { fclose(input); } return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; int i = 0; int usage = 0; options.oformat = 1; static struct option long_options[] = { /* These options may or may not set a flag. We distinguish them by their indices. */ {"debug", no_argument, 0, 'd'}, {"help", no_argument, 0, 'h'}, {"feature", required_argument, 0, 'f'}, {"oformat", required_argument, 0, 'o'}, {"minscore", required_argument, 0, 'a'}, {"maxscore", required_argument, 0, 'b'}, {"name", required_argument, 0, 0 }, {"desc", required_argument, 0, 0 }, {"color", required_argument, 0, 0 }, {"autoscale", required_argument, 0, 0 }, {"always0", required_argument, 0, 0 }, {"wfunction", required_argument, 0, 0 }, {"smoothing", required_argument, 0, 0 }, {"visibility", required_argument, 0, 0 }, {0, 0, 0, 0} }; int option_index = 0; while (1) { int c = getopt_long(argc, argv, "dhf:a:b:o:", long_options, &option_index); if (c == -1) break; switch (c) { case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'f': options.feature = optarg; break; case 'a': minScore = atoi(optarg); break; case 'b': maxScore = atoi(optarg); break; case 'o': options.oformat = atoi(optarg); break; case 0: /* This option is to set the annotation track line */ if (strcmp(long_options[option_index].name, "name") == 0) { options.trackName = optarg; } if (strcmp(long_options[option_index].name, "desc") == 0) { options.trackDesc = optarg; } if (strcmp(long_options[option_index].name, "color") == 0) { options.trackColor = optarg; } if (strcmp(long_options[option_index].name, "autoscale") == 0) { options.autoscale = optarg; } if (strcmp(long_options[option_index].name, "always0") == 0) { options.always0 = optarg; } if (strcmp(long_options[option_index].name, "wfunction") == 0) { options.wfunction = optarg; } if (strcmp(long_options[option_index].name, "smoothing") == 0) { options.smoothing = optarg; } if (strcmp(long_options[option_index].name, "visibility") == 0) { options.visibility = optarg; } break; default: printf ("?? getopt returned character code 0%o ??\n", c); usage = 1; } } if (optind > argc || options.help == 1 || usage || minScore == -1 || maxScore == -1) { fprintf(stderr, "Usage: %s [options] [-a ] [-b ] [<] \n" " - version %s\n" " where options are:\n" " \t\t -d|--debug Produce Debug information\n" " \t\t -h|--help Show this Help text\n" " \t\t -f|--feature Set Feature name \n" " \t\t -o|--oformat <1|2> Set output Format (1:BED[Def] 2:BedGraph)\n" " \t\t --name Set name for track name field [def. name=SGA-feature]\n" " \t\t --desc Set track description field [def. desc=\"ChIP-Seq Custom data\"]\n" " \t\t --color Define the track color in comma-separated RGB values [def. 100,100,100]\n" " \t\t --autoscale Data viewing paramenter: set auto-scale to UCSC data view [def=OFF]\n" " \t\t --always0 Data viewing paramenter: always include zero [def=OFF]\n" " \t\t --wfunction Data viewing paramenter: windowing function [def=mean+whiskers|maximum|mean|minimum]\n" " \t\t --smoothing Data viewing paramenter: smoothing window [def=OFF[0], =0,2..16]\n" " \t\t --visibility Display mode: [def=dense|full|hide] or [def=1|2|3]\n" "\n\tConvert BED file to BED format suitable for displaying ChIP-seq peaks.\n" " \tTwo formats are supported: 1) BED Track with Score field; 2) BedGraph.\n" " \tFor BED track format, only the graphical parameter 'visibility' can be set.\n\n", argv[0], VERSION); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.feature != NULL) { char *s = options.feature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Name too long \"%s\" \n", options.feature); return 1; } s++; } } if (options.trackName == NULL) { if (options.feature != NULL) { if ((options.trackName = malloc((strlen(options.feature) + 1) * sizeof(char))) == NULL) { perror("process_sga: malloc Feature"); exit(1); } strcpy(options.trackName, options.feature); } else { if ((options.trackDesc = malloc(24 * sizeof(char))) == NULL) { perror("main: malloc trackDesc"); exit(1); } strcpy(options.trackName, "ChIP-SeqPeaks"); } } if (options.trackDesc == NULL) { if ((options.trackDesc = malloc(32 * sizeof(char))) == NULL) { perror("main: malloc trackDesc"); exit(1); } strcpy(options.trackDesc, "ChIP-Seq Custom data"); } if (options.trackColor == NULL) { if ((options.trackColor = malloc(12 * sizeof(char))) == NULL) { perror("main: malloc trackColor"); exit(1); } strcpy(options.trackColor, "0,200,100"); } if (options.autoscale == NULL) { if ((options.autoscale = malloc(4 * sizeof(char))) == NULL) { perror("main: malloc autoscale"); exit(1); } strcpy(options.autoscale, "off"); } if (options.always0 == NULL) { if ((options.always0 = malloc(4 * sizeof(char))) == NULL) { perror("main: malloc always0"); exit(1); } strcpy(options.always0, "off"); } if (options.wfunction == NULL) { if ((options.wfunction = malloc(5 * sizeof(char))) == NULL) { perror("main: malloc wfunction"); exit(1); } strcpy(options.wfunction, "mean"); } if (options.smoothing == NULL) { if ((options.smoothing = malloc(4 * sizeof(char))) == NULL) { perror("main: malloc smoothing"); exit(1); } strcpy(options.smoothing, "off"); } if (options.visibility == NULL) { if ((options.visibility = malloc(5 * sizeof(char))) == NULL) { perror("main: malloc visibility"); exit(1); } if (options.oformat == 1) { strcpy(options.visibility, "1"); } else if (options.oformat == 2) { strcpy(options.visibility, "dense"); } } if (options.debug) { fprintf(stderr, " Arguments:\n"); if (options.feature != NULL) fprintf(stderr, " Feature : %s\n", options.feature); fprintf(stderr, " Minimal BED Score: %d\n", minScore); fprintf(stderr, " maximal BED score: %d\n", maxScore); if (options.oformat == 1) { fprintf(stderr, " BED Track Display options:\n"); fprintf(stderr, " Visibility: %s\n", options.visibility); fprintf(stderr, " useScore=1\n"); } else if (options.oformat == 2) { fprintf(stderr, " BedGraph Track Display options:\n"); fprintf(stderr, " -----------------------------\n"); fprintf(stderr, " Track name: %s\n", options.trackName); fprintf(stderr, " Track description: %s\n", options.trackDesc); fprintf(stderr, " Track color: %s\n", options.trackColor); fprintf(stderr, " Autoscale: %s\n", options.autoscale); fprintf(stderr, " Always0: %s\n", options.always0); fprintf(stderr, " Wfunction: %s\n", options.wfunction); fprintf(stderr, " Smoothing: %s\n", options.smoothing); fprintf(stderr, " Visibility: %s\n", options.visibility); } } if (process_bed(input, argv[optind++]) != 0) { return 1; } return 0; } chip-seq/tools/eland2sga.pl0000744022744200262270000001045211553015221016412 0ustar ambrosingr-bucher#!/usr/local/bin/perl # transform ChIP-seq eland mappings via hash variable into sorted sga # usage: eland2sga.pl -a feature -s species -f eland_file use strict; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] use Getopt::Std; my %opts; getopt('asf:', \%opts); # -a, -s, & -f take arg. Values in %opts, Hash keys will be the switch names die("\nusage: eland2sga.pl -a feature -s species -f eland_file\n\n") unless (($opts{'f'})&&($opts{'a'})&&($opts{'s'})); # hash defining chromosome from SV of chromosomes in current genome assemblies my $DB = "/db/genome/"; #my $DB = "/home/local/db/genome/"; my $chr2AC = retrieve($DB."chro_idx.nstorage"); # eventually add older assemblies? my $species=$opts{'s'}; die ("Sorry, $species is not a supported species!\n\n") unless ($chr2AC->{$species}); my ($score_ref); open(FI, "$opts{'f'}") or die ("Cannot open $opts{'f'}: $!\n") ; while(){ my $lin=$_; chomp $lin; my @ar=split(/\t/,$lin); $ar[6] =~ s/^.+hromosome\.//; $ar[6] =~ s/\.fa$//; # my ($chr)= $ar[6] =~ /hromosome.([0-9RLYX]+).fa/; my $chr = 'chr'.$ar[6]; if (exists($chr2AC->{$species}->{$chr})){ # attribute each tag to its start position and increment counter (multiple column formats) # >EAS38_3_1_526_564 TGTTCTCTGCATGCTATTTTTTAGTATTGCCGTAA U0 1 0 0 Mus_musculus.NCBIM37.48.dna.chromosome.15.fa 15876793 R .. #.bed: chr12 119248213 119248236 U0 0 + if ($ar[8] eq 'F'){ $score_ref->{$chr2AC->{$species}->{$chr}}->{'+'}->{$ar[7]} +=1; } elsif ($ar[8] eq 'R'){ my $pos=$ar[7] + length($ar[1]); $score_ref->{$chr2AC->{$species}->{$chr}}->{'-'}->{$pos} +=1; } else{ warn "parsing error of orientation in following line:\n$lin\n"; } # attribute counts to its center position and record in hash (force into numeric) # $score_ref->{$chr2AC{$ar[0]}}->{int(($ar[1]+$ar[2])/2)}=eval($ar[3]); } else{ # warn "parsing error of chromosome number in following line:\n$lin\n"; } } close(FI); #nstore (\$score_ref, "/scratch/frt/cschmid/EMO-4_bed_ori.nstorage"); my @chr; my $spec='T'; # flag foreach my $ac(keys %$score_ref){ unless (($spec eq 'T')||($spec eq $chr2AC->{'assembly'}->{$ac})){ die "The file $opts{'f'} contains entries from both $spec and from ",$chr2AC->{'assembly'}->{$ac}," (chromosome $ac)\n"; } $species = $chr2AC->{'assembly'}->{$ac}; $chr2AC->{$ac} =~ s/chr//; push(@chr, $chr2AC->{$ac}); } # order chromosome array as in sga files my @chrb = sort {$a<=>$b} @chr; my (@spl,$sc); foreach $sc('X','Y'){ if ($chrb[0] eq $sc){ push(@spl, shift(@chrb)); } } push(@chrb,@spl); foreach my $chro(@chrb){ # foreach position with data, print out counts in sga format: # versioned genome sequence ID # feature type # position # strand (+, -, or 0 for un-oriented features) # tag count (or signal intensity, positive int) my @arr=keys %{$score_ref->{$chr2AC->{$species}->{'chr'.$chro}}->{'+'}}; push @arr,keys %{$score_ref->{$chr2AC->{$species}->{'chr'.$chro}}->{'-'}}; my %double; # hash to control for positions with tags in both ori foreach my $pos(sort {$a <=> $b} @arr){ if ((exists($score_ref->{$chr2AC->{$species}->{'chr'.$chro}}->{'+'}->{$pos}))&&(!$double{$pos})){ print $chr2AC->{$species}->{'chr'.$chro},"\t$opts{'a'}\t$pos\t+\t",$score_ref->{$chr2AC->{$species}->{'chr'.$chro}}->{'+'}->{$pos},"\n"; $double{$pos}='T'; } elsif (exists($score_ref->{$chr2AC->{$species}->{'chr'.$chro}}->{'-'}->{$pos})){ print $chr2AC->{$species}->{'chr'.$chro},"\t$opts{'a'}\t$pos\t-\t",$score_ref->{$chr2AC->{$species}->{'chr'.$chro}}->{'-'}->{$pos},"\n"; } else{ print "trouble for position $pos on chro 'chr'.$chro: no tags in either orientation??\n"; die; } } # read length of chromosome from /db/genome/eukaryote.ptr my $line = `grep $chr2AC->{$species}->{'chr'.$chro} /db/genome/eukaryote.ptr`; my @ar = split(/\s+/, $line); print "$ar[0]\tEND\t$ar[2]\t0\t1\n"; # grep 'Mm/ch' /db/genome/eukaryote.ptr| perl -ane 'if (/^\w+\_\d+\.\d+\s+/){@ar=split/\s+/;print "$ar[0]\tEND\t$ar[2]\t0\t1\n";}' } 1; chip-seq/tools/fps2sga.pl0000744022744200262270000000567512632304700016134 0ustar ambrosingr-bucher#!/usr/bin/perl # converts fps format to sga format # usage: ./fps2sga.pl [-f -s <-x extended SGA>] use strict; use Getopt::Long; my %opt; my @options = ("help", "h", "species=s", "s=s", "feature=s", "f=s", "x", "ignore0flag"); my $file = ""; my $species = ""; my $feature = "CHIP"; my $extended = 0; my $ignore0flag = 0; if( ! GetOptions( \%opt, @options ) ) { &Usage(); } &Usage() if defined($opt{'help'}) || defined($opt{'h'}); &Usage() if $#ARGV < 0; if ($opt{'f'} ne '') { $feature = $opt{'f'}; } if ($opt{'feature'} ne '') { $feature = $opt{'feature'}; } if ($opt{'s'} ne '') { $species = $opt{'s'}; } if ($opt{'species'} ne '') { $species = $opt{'species'}; } if ($opt{'x'} ne '') { $extended = 1; } if ($opt{'ignore0flag'} ne '') { $ignore0flag = 1; } $file = $ARGV[0]; #print "FPS file : $file\n"; #print "Options: feature : $feature, Species $species\n"; #my $err_cnt = 0; my $tot_lines = 0; open(IN, "$file") or die ("Cannot open $file: $!\n") ; while(){ #FP H3K36me1_1 :+U EU:NC_000001.9 1- 532614; 99999. # if (/FP (.{20}):[-+][SMRU ] EU:(N[TC]_\d+\.?\d*)\s+[01]([-+])\s*(\d+)(;.+)/){ next if /#/; next if (!/^FP/); my @fps_fields = unpack('a5 a20 a5 a3 a18 a1 a1 a10 a7', $_); #if (/FP (.{20}):[-+][SMRU ](\d|\s) EU:(N[TC]_\d+\.?\d*)\s+[01]([-+])\s*(\d{10})(.+)/) { #my $counts=(split(/_/,$1))[1]; my $counts = ""; my $strand = ""; my $desc = $fps_fields[1]; my $id = $fps_fields[4]; my $pos = $fps_fields[7]; $desc =~ s/\s+//g; $id =~ s/\s+//g; $pos =~ s/\s+//g; $counts = 1 if ($counts eq ''); if ($ignore0flag) { $strand = $fps_fields[6]; } else { if ($2 eq 0) { $strand = '0'; } else { $strand = $fps_fields[6]; } } if ($extended) { $desc =~ s/\s+$//; print "$id\t".$feature."\t$pos\t$strand\t$counts\t$desc\n"; } else { print "$id\t".$feature."\t$pos\t$strand\t$counts\n"; } $tot_lines++; } close(IN); #if ($err_cnt) { # print STDERR "\nWarning : Out of a total of $tot_lines FP lines, $err_cnt lines could not be mapped to the genome.\n\nPlease, check that the functional position reference is a genomic coordinate.\nThe accepted genome db code and entry number are the following:\n EU:N[TC]_\d+\.?\d*\n"; #} sub Usage { print STDERR <<"_USAGE_"; fps2sga.pl [options] where options are: -h|--help Show this stuff -f|--feature Set Feature name -s|--species Assembly (i.e hg18) -x Generate an extended SGA file with the 6th field equal to the FPS 'description' field --ignore0flag Ignore 0 flag at position 29 of the FP line, so keep orientation the same as original _USAGE_ exit(1); } 1; chip-seq/tools/version.h0000777022744200262270000000000013354703337020133 2../version.hustar ambrosingr-bucherchip-seq/tools/bed2sga.c0000744022744200262270000005334613350730441015706 0ustar ambrosingr-bucher/* bed2sga.c Convert BED file to SGA format. # Arguments: # BED File # Species (e.g. hg19) # Options: # Set Path to locate chr_NC_gi file # (used for chrom name to NCBI id conversion) # Set Feature name # Center SGA file # Unoriented SGA file # Extended SGA file # Use BED score field to set SGA count field # Input is ENCODE narrowPeak format Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include "hashtable.h" #ifdef DEBUG #include #endif #include "version.h" #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define CHR_NB 18 #define AC_MAX 18 #define POS_MAX 16 #define SCORE_MAX 12 #define NAME_MAX 128 #define FIELD_MAX 64 typedef struct _options_t { char *dbPath; char *extField; int help; int debug; int db; int useScore; int useSigVal; int center; int unoriented; int extend; int narrowPeak; int regional; } options_t; static options_t options; static hash_table_t *ac_table = NULL; char *Species = NULL; char *Feature = NULL; int check_name_flag = 0; int extIdx[10]; int extLen = 0; int process_ac() { FILE *input; int c; char buf[LINE_SIZE]; char *chrFile; char chrom[12]; int cLen; int break_flag = 0; int read_chr_nc = 0; if (options.db) { /*cLen = (int)strlen(options.dbPath) + (int)strlen(Species) + 12;*/ cLen = (int)strlen(options.dbPath) + 12; if ((chrFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrFile, options.dbPath); } else { /*cLen = 21 + (int)strlen(Species) + 12;*/ cLen = 21 + 12; if ((chrFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrFile, "/home/local/db/genome"); } /* strcat(chrFile, "/"); */ /*strcat(chrFile, Species); */ strcat(chrFile, "/chr_NC_gi"); input = fopen(chrFile, "r"); if (input == NULL) { fprintf(stderr, "Could not open file %s: %s(%d)\n", chrFile, strerror(errno), errno); return 1; } do { c = fgetc(input); } while(c != '\n'); ac_table = hash_table_new(MODE_COPY); while (fgets(buf, LINE_SIZE, input) != NULL) { char *s; char chr_nb[CHR_NB] = ""; char ncbi_ac[AC_MAX] = ""; char field[128]; int i = 0; int nb_len = 0; int ac_len = 0; s = buf; /* Check line */ /* Get first character: if (char=# and not break_flag) check species */ /* printf ("chr_NC_gi line : %s\n", s); */ if (*s == '#') { if (!break_flag) { /* Get first word */ s++; if ( sscanf(s, "%127[^\t]", field) == 1 ) { /* printf ("field: %s Species: %s\n", field, Species); */ if ((strcmp(field, Species) == 0)) { /* printf ("setting flags read_chr_nc break_flag...\n"); */ read_chr_nc = 1;; break_flag = 1; } else { /* printf ("next line...\n"); */ continue; } } } else { break; } } if (read_chr_nc) { /* Chrom NB */ while (*s != 0 && !isspace(*s)) { if (i >= CHR_NB) { fprintf(stderr, "Chrom NB too long in %s\n", s); fclose(input); exit(1); } chr_nb[i++] = *s++; } if (i < CHR_NB) chr_nb[i] = 0; nb_len = i + 1; while (isspace(*s)) s++; /* Chromosome NCBI AC */ i = 0; while (*s != 0 && !isspace(*s)) { if (i >= AC_MAX) { fprintf(stderr, "AC too long \"%s\" \n", s); fclose(input); exit(1); } ncbi_ac[i++] = *s++; } if (i < AC_MAX) ncbi_ac[i] = 0; ac_len = i + 1; strcpy(chrom, "chr"); strcat(chrom, chr_nb); nb_len = (int)strlen(chrom) + 1; /* printf("adding key %s (len = %d) value %s (ac) (len = %d) to hash table\n", chrom, nb_len, ncbi_ac, ac_len); */ /* Store both chromosome number to NCBI identifier and NCBI identifier to NCBI identifier keys */ hash_table_add(ac_table, chrom, (size_t)nb_len, ncbi_ac, (size_t)ac_len); if (options.debug) { char *ac = hash_table_lookup(ac_table, chrom, (size_t)nb_len); fprintf (stderr, " AC Hash table: %s (len = %d) -> %s (len = %d)\n", chrom, nb_len, ac, ac_len); } hash_table_add(ac_table, ncbi_ac, (size_t)ac_len, ncbi_ac, (size_t)ac_len); if (options.debug) { char *ac = hash_table_lookup(ac_table, ncbi_ac, (size_t)ac_len); fprintf (stderr, " AC Hash table: %s (len = %d) -> %s (len = %d)\n", ncbi_ac, ac_len, ac, ac_len); } } } return 0; } int process_bed(FILE *input, char *iFile) { unsigned long start, end; int score; char *s, *res, *buf; size_t bLen = LINE_SIZE; if (options.debug) fprintf(stderr, " Processing BED file %s\n", iFile); if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_bed: malloc"); exit(1); } #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char bed_fld[12][FIELD_MAX]; char ext_buf[LINE_SIZE]; char strand = '\0'; char field[32]; float signal_val = 0.0; unsigned long pos = 0; unsigned long pos2 = 0; int count = 0; int id_len = 0; char *ac; int i = 0; while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; else if (s[cLen - 1] == '\n' && s[cLen - 2] == '\r') s[cLen - 2] = 0; buf = s; /* printf ("BED line : %s\n", s); */ /* Check line */ /* Get first word/field */ if ( sscanf(s, "%31[^ ]", field) == 1 ) { if ( (strcmp(field, "track") == 0) || (strcmp(field, "browser") == 0) || (strcmp(field, "itemRgb") == 0) ) continue; } /* Get BED fields */ /* Chrom NB */ while (*buf != 0 && !isspace(*buf)) { if (i >= CHR_NB) { bed_fld[0][i] = 0; fprintf(stderr, "Chrom NB too long \"%s\" Skipping line...\n", bed_fld[0]); break; } bed_fld[0][i++] = *buf++; } if (i < AC_MAX) bed_fld[0][i] = 0; else continue; while (isspace(*buf)) buf++; /* Start Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Start position too large \"%s\" \n", buf); exit(1); } bed_fld[1][i++] = *buf++; } bed_fld[1][i] = 0; start = (unsigned long)atoi(bed_fld[1]); while (isspace(*buf)) buf++; /* End Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "End position too large \"%s\" \n", buf); exit(1); } bed_fld[2][i++] = *buf++; } bed_fld[2][i] = 0; end = (unsigned long)atoi(bed_fld[2]); while (isspace(*buf)) buf++; /* Name */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field Name too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[3][i++] = *buf++; } bed_fld[3][i] = 0; while (isspace(*buf)) buf++; /* Score */ i = 0; while (isdigit(*buf) || *buf == '+' || *buf == '-' || *buf == '.') { if (i >= SCORE_MAX) { fprintf(stderr, "Score too large \"%s\" \n", buf); exit(1); } bed_fld[4][i++] = *buf++; } bed_fld[4][i] = 0; score = atoi(bed_fld[4]); while (isspace(*buf)) buf++; /* Field 6/Strand */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 6 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[5][i++] = *buf++; } bed_fld[5][i] = 0; while (isspace(*buf)) buf++; /* Field 7 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 7 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[6][i++] = *buf++; } bed_fld[6][i] = 0; while (isspace(*buf)) buf++; /* Field 8 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 8 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[7][i++] = *buf++; } bed_fld[7][i] = 0; while (isspace(*buf)) buf++; /* Field 9 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 9 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[8][i++] = *buf++; } bed_fld[8][i] = 0; while (isspace(*buf)) buf++; /* Field 10 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 10 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[9][i++] = *buf++; } bed_fld[9][i] = 0; while (isspace(*buf)) buf++; /* Field 11 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 11 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[10][i++] = *buf++; } bed_fld[10][i] = 0; while (isspace(*buf)) buf++; /* Field 12 */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FIELD_MAX) { fprintf(stderr, "Field 12 too long \"%s\" \n", buf); fclose(input); exit(1); } bed_fld[11][i++] = *buf++; } bed_fld[11][i] = 0; while (isspace(*buf)) buf++; #ifdef DEBUG printf(" [%d] Chr nb: %s Start: %lu End: %lu Name: %s Score: %s Strand: %c Field #7 %s Field #8 %s Field #9 %s Field #10 %s Field #11 %s Field #12 %s\n", c++, bed_fld[0], start, end, bed_fld[3], score, bed_fld[5], bed_fld[6], bed_fld[7], bed_fld[8], bed_fld[9], bed_fld[10], bed_fld[11]); #endif /* Set SGA Strand field */ if (!options.regional) { if (bed_fld[5][0] == '+' || bed_fld[5][0] == '-') strand = bed_fld[5][0]; else strand = '0'; } /* Set SGA Feature field */ if (check_name_flag) { if (bed_fld[3][0] != '\0') strcpy(Feature, bed_fld[3]); } count = 1; if (!options.narrowPeak) { /* Set SGA Position field */ if (options.regional) { /* Set star (line +) and end positions (line -) */ pos = start + 1; pos2 = end; } else { if (options.center) { pos = (unsigned long)((long double)(start + end)/2 + 0.5); /* Set strand to zero - no strand */ //if ((strand == '\0') || (strand == '.') || (options.unoriented)) { if (options.unoriented) { strand = '0'; } } else { if (strand == '+') { pos = start + 1; } else if (strand == '-') { pos = end; } else { /* BED file has no strand: Center position */ pos = (unsigned long)((long double)(start + end)/2 + 0.5); /* printf ("Start %lu End %lu Position %lu\n", start, end, pos); */ strand = '0'; } } } /* Set SGA Count field */ if (options.useScore) { /* Use BED score field to set SGA count field */ count = score; } } else { /* ENCODE narrowPeak format */ /* Set SGA Position field */ long peak_offset = (long)atoi(bed_fld[9]); if (peak_offset == -1) { pos = (unsigned long)((long double)(start + end)/2 + 0.5); strand = '0'; } else { pos = start + peak_offset + 1; } /* Set SGA Count field */ if (options.useSigVal) { /* Use BED signalValue field to set SGA count field */ signal_val = atof(bed_fld[6]); count = (int)(signal_val + 0.5); } } /* Check for extension option */ if (options.extend) { /* printf (" Extended fileds: %d\n", extLen); */ /* printf (" concatenating field: %d\n", extIdx[0]); */ strcpy(ext_buf, bed_fld[extIdx[0]-1]); for (i = 1; i < extLen; i++) { /* printf (" concatenating field: %d %s\n", extIdx[i], bed_fld[extIdx[i]-1]); */ strcat(ext_buf, "\t"); strcat(ext_buf, bed_fld[extIdx[i]-1]); } /* printf(" Extended fields: %s\n", ext_buf); */ if (options.debug) fprintf(stderr, " Extended fields: %s\n", ext_buf); } if (Species != NULL) { id_len = (int)strlen(bed_fld[0]) + 1; ac = hash_table_lookup(ac_table, bed_fld[0], (size_t)id_len); if (ac == NULL) continue; } else { ac = malloc (strlen(bed_fld[0]) + 1); strcpy(ac, bed_fld[0]); } /* Print out SGA line(s) */ if (options.regional) { /* 2-line SGA format */ if (options.extend) { printf("%s\t%s\t%lu\t+\t%d\t%s\n", ac, Feature, pos, count, ext_buf); printf("%s\t%s\t%lu\t-\t%d\t%s\n", ac, Feature, pos2, count, ext_buf); } else { printf("%s\t%s\t%lu\t+\t%d\n", ac, Feature, pos, count); printf("%s\t%s\t%lu\t-\t%d\n", ac, Feature, pos, count); } } else { if (options.extend) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n", ac, Feature, pos, strand, count, ext_buf); } else { printf("%s\t%s\t%lu\t%c\t%d\n", ac, Feature, pos, strand, count); } } } /* End of While */ if (input != stdin) { fclose(input); } return 0; } char** str_split(char* a_str, const char a_delim) { char** result = 0; size_t count = 0; char* tmp = a_str; char* last_comma = 0; char delim[2]; delim[0] = a_delim; delim[1] = 0; /* Count how many elements will be extracted. */ while (*tmp) { if (a_delim == *tmp) { count++; last_comma = tmp; } tmp++; } /* Add space for trailing token. */ count += last_comma < (a_str + strlen(a_str) - 1); /* Add space for terminating null string so caller knows where the list of returned strings ends. */ count++; result = malloc(sizeof(char*) *count); if (result) { size_t idx = 0; char* token = strtok(a_str, delim); while (token) { assert(idx < count); *(result + idx++) = strdup(token); token = strtok(0, delim); } assert(idx == count - 1); *(result + idx) = 0; } return result; } int main(int argc, char *argv[]) { char** tokens; #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; int i = 0; int usage = 0; static struct option long_options[] = { /* These options may or may not set a flag. We distinguish them by their indices. */ {"debug", no_argument, 0, 'd'}, {"help", no_argument, 0, 'h'}, {"db", required_argument, 0, 'i'}, {"feature", required_argument, 0, 'f'}, {"species", required_argument, 0, 's'}, {"extend", required_argument, 0, 'e'}, {"center", no_argument, 0, 'c'}, {"unoriented", no_argument, 0, 'u'}, {"regional", no_argument, 0, 'r'}, /* These options only set a flag. */ {"useScore", no_argument, &options.useScore, 1}, {"useSigVal", no_argument, &options.useSigVal, 1}, {"narrowPeak", no_argument, &options.narrowPeak, 1}, {0, 0, 0, 0} }; int option_index = 0; while (1) { int c = getopt_long(argc, argv, "dhf:i:s:cure:", long_options, &option_index); if (c == -1) break; switch (c) { case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'i': options.dbPath = optarg; options.db = 1; break; case 'f': Feature = optarg; break; case 's': Species = optarg; break; case 'c': options.center = 1; break; case 'e': options.extField = optarg; options.extend = 1; break; case 'u': options.unoriented = 1; break; case 'r': options.regional = 1; break; case 0: /* If this option set a flag, do nothing else now. */ if (long_options[option_index].flag != 0) break; default: printf ("?? getopt returned character code 0%o ??\n", c); usage = 1; } } if (optind > argc || options.help == 1 || usage) { fprintf(stderr, "Usage: %s [options] [-s ] [<] \n" " - version %s\n" " where options are:\n" " \t\t -d|--debug Produce Debug information\n" " \t\t -h|--help Show this Help text\n" " \t\t -i|--db Use to locate the assembly-specific chr_NC_gi file\n" " \t\t [default is: $HOME/db/genome]\n" " \t\t -f|--feature Set Feature name \n" " \t\t -s|--species Assembly (i.e hg19)\n" " \t\t -c|--center Generate a Centered SGA file\n" " \t\t -u|--unoriented Generate an unoriented SGA file\n" " \t\t -r|--regional Generate a 2-line[+/-] SGA file representing BED regions\n" " \t\t (e.g. RepeatMasker regions)\n" " \t\t -e|--extend Produce an extended SGA file with additional fields specified\n" " \t\t by a comma-separated list of BED column numbers (from 1 to n)\n" " \t\t --useScore Use the BED 'score' field (#5) to set the SGA 'count' field\n" " \t\t --useSigVal Use the BED 'Signal Value' field (#7) to set the SGA 'count' field\n" " \t\t [This option is only valid for ENCODE narrowPeak]\n" " \t\t --narrowPeak Use this option for ENCODE narrowPeak format\n" " \t\t [Options --useScore, -c, and -u are ignored]\n" "\n\tConvert BED format into SGA format.\n\n", argv[0], VERSION); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (Feature == NULL) { if ((Feature = malloc(6 * sizeof(char))) == NULL) { perror("main: malloc Feature"); exit(1); } strcpy(Feature, "chIP"); check_name_flag = 1; } else { char *s = Feature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Name too long \"%s\" \n", Feature); return 1; } s++; } } if (options.narrowPeak) { options.center = 0; options.unoriented = 0; options.useScore = 0; } else { options.useSigVal = 0; } if (options.regional) { options.center = 0; options.unoriented = 0; } if (options.extend) { tokens = str_split(options.extField, ','); if (tokens) { for (i = 0; *(tokens + i); i++) { extIdx[i] = atoi(*(tokens + i)); free(*(tokens + i)); } extLen = i; if (options.debug) { fprintf(stderr, " Number of TOKENS: %d\n", extLen); } free(tokens); } } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Feature : %s\n", Feature); if (Species != NULL) fprintf(stderr, " Species : %s\n", Species); fprintf(stderr, " Centered SGA [on/off]: %d\n", options.center); fprintf(stderr, " Unoriented SGA [on/off]: %d\n", options.unoriented); fprintf(stderr, " Extended SGA [on/off]: %d. BED fields:", options.extend); for (i = 0; i < extLen; i++) { if (i == (extLen - 1)) { fprintf(stderr, " %d", extIdx[i]); } else { fprintf(stderr, " %d,", extIdx[i]); } } fprintf(stderr, "\n"); fprintf(stderr, " Use Score [on/off]: %d\n", options.useScore); fprintf(stderr, " NarrowPeak format [on/off]: %d\n", options.narrowPeak); fprintf(stderr, " Use Signal Value [on/off]: %d\n", options.useSigVal); } if (Species != NULL) { if (process_ac() == 0) { if (options.debug) fprintf(stderr, " HASH Table for chromosome access identifier initialized\n"); } else { return 1; } } if (process_bed(input, argv[optind++]) != 0) { return 1; } return 0; } chip-seq/tools/compactsga.c0000744022744200262270000001406213301537674016521 0ustar ambrosingr-bucher/* compactsga.c Merge equal tag positions within a SGA File # Arguments: # SGA file Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int help; int debug; } options_t; static options_t options; /* int process_sga(char *iFile) */ int process_sga() { /* FILE *f = fopen(iFile, "r"); */ unsigned long pos; char chr[SEQ_ID] = ""; unsigned long p = 0; int tc = 0; int cnt = 0; char ft[FT_MAX] = ""; char str = '\0'; char desc[EXT_MAX] = ""; char *s, *res, *buf; size_t bLen = LINE_SIZE; /* if (f == NULL) { fprintf(stderr, "Could not open file %s: %s(%d)\n", iFile, strerror(errno), errno); return 1; } */ if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } /* if (options.debug) fprintf(stderr, "Processing file %s\n", iFile); */ #ifdef DEBUG int lc = 1; #endif /* while ((res = fgets(s, (int) bLen, f)) != NULL) { */ while ((res = fgets(s, (int) bLen, stdin)) != NULL) { char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX] = ""; size_t cLen = strlen(s); unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } /* res = fgets(s + cLen, (int) (bLen - cLen), f); */ res = fgets(s + cLen, (int) (bLen - cLen), stdin); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = (unsigned long)atol(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s (%c) Pos: %lu Cnts: %d Ext: %s\n", lc++, seq_id, feature, strand, pos, cnt, ext); #endif if (!strcmp(feature, "END")) { continue; } if (pos == p && strcmp(seq_id, chr) == 0 && strand == str) { tc += cnt; } else { if (tc > 0) { if (strcmp(desc, "")) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n", chr, ft , p, str, tc, desc); } else { printf("%s\t%s\t%lu\t%c\t%d\n", chr, ft , p, str, tc); } } strcpy(chr, seq_id); strcpy(ft, feature); strcpy(desc, ext); p = pos; str = strand; tc = cnt; } } /* End of While */ free(s); if (tc > 0) { if (strcmp(desc, "")) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n", chr, ft , p, str, tc, desc); } else { printf("%s\t%s\t%lu\t%c\t%d\n", chr, ft , p, str, tc); } } return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif while (1) { int c = getopt(argc, argv, "dh"); if (c == -1) break; switch (c) { case 'd': options.debug = 1; break; case 'h': options.help = 1; break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } /* if (optind == argc || options.help == 1) { */ if (options.help == 1) { fprintf(stderr, "Usage: %s [options] < \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information\n" "\n\tThe program reads a ChIP-seq data file in SGA format (),\n" "\tand merges equal tag positions into a single line adjusting the count\n" "\tfield accordingly\n\n", argv[0], VERSION); return 1; } /* if (process_sga(argv[optind++]) != 0) { return 1; } */ if (process_sga() != 0) { return 1; } return 0; } chip-seq/tools/featreplace.c0000744022744200262270000001503713362373530016652 0ustar ambrosingr-bucher/* featreplace.c Replace Feature field within an SGA File # Arguments: # feature name Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int help; int debug; } options_t; static options_t options; char *Feature = NULL; int process_sga(FILE *input, char *iFile) { unsigned long pos; int cnt; char *s, *res, *buf; size_t bLen = LINE_SIZE; if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } if (options.debug) fprintf(stderr, "Processing file %s\n", iFile); #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char ft[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } ft[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = (unsigned long)atol(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s (%c) Pos: %lu Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext); #endif if (!strcmp(ext, "")) { if (!strcmp(ft, "END")) { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, ft , pos, strand, cnt); } else { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, Feature, pos, strand, cnt); } } else { if (!strcmp(ft, "END")) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n", seq_id, ft, pos, strand, cnt, ext); } else { printf("%s\t%s\t%lu\t%c\t%d\t%s\n", seq_id, Feature, pos, strand, cnt, ext); } } } /* End of While */ free(s); if (input != stdin) { fclose(input); } return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; int free_flag = 0; while (1) { int c = getopt(argc, argv, "f:dh"); if (c == -1) break; switch (c) { case 'f': Feature = optarg; break; case 'd': options.debug = 1; break; case 'h': options.help = 1; break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1) { fprintf(stderr, "Usage: %s [options] -f [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information\n" "\n\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (),\n" "\tand changes the name of the feature field according to the specified parameter \n" "\t(by default ='FT').\n\n", argv[0], VERSION); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Feature name: %s\n\n", Feature); } /* Process Feature Specs */ if (Feature == NULL) { if ((Feature = malloc(3 * sizeof(char))) == NULL) { perror("process_sga: malloc Feature"); exit(1); } strcpy(Feature, "FT"); free_flag = 1; } else { char *s = Feature; int i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Name too long \"%s\" \n", Feature); return 1; } s++; i++; } } if (process_sga(input, argv[optind++]) != 0) { return 1; } if (free_flag) free(Feature); return 0; } chip-seq/tools/sga2bed.c0000744022744200262270000005337713401230641015704 0ustar ambrosingr-bucher/* sga2bed.c Convert SGA file to BED format. # Arguments: # SGA File # Options: # Set Path to locate chr_NC_gi file # (used for NCBI id to chromosome name conversion) # Set Read length (def=0) # Set Score factor for BED score field (5th field) (def=1) # Create BED format without annotation track header lines # Transfer SGA optional field(s) to BED field(s) # Expand SGA lines into multiple BED lines # Set BED track name # Set BED track description # Set BED track color Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "hashtable.h" #ifdef DEBUG #include #endif #include "version.h" #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define CHR_NB 18 #define AC_MAX 18 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 #define FIELD_MAX 64 typedef struct _options_t { char *dbPath; char *extField; int help; int debug; int db; int readLen; int scoreFact; int normFact; int noHdr; int expand; int extend; char *trackName; char *trackDesc; char *trackColor; } options_t; static options_t options; static hash_table_t *ac_table = NULL; static hash_table_t *ext_fields = NULL; int extIdx[10]; int extLen = 0; int process_ac() { FILE *input; int c; char buf[LINE_SIZE]; char *chrFile; char chrom[12]; int cLen; if (options.db) { cLen = (int)strlen(options.dbPath) + 12; if ((chrFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrFile, options.dbPath); } else { cLen = 21 + 12; if ((chrFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrFile, "/home/local/db/genome"); } strcat(chrFile, "/chr_NC_gi"); input = fopen(chrFile, "r"); if (input == NULL) { fprintf(stderr, "Could not open file %s: %s(%d)\n", chrFile, strerror(errno), errno); return 1; } do { c = fgetc(input); } while(c != '\n'); ac_table = hash_table_new(MODE_COPY); while (fgets(buf, LINE_SIZE, input) != NULL) { char *s; char chr_nb[CHR_NB] = ""; char ncbi_ac[AC_MAX] = ""; int i = 0; int nb_len = 0; int ac_len = 0; /*int valid = 1;*/ s = buf; /* Check line */ /* Get first character: if # skip line */ if (*s == '#') continue; /* Chrom NB */ while (*s != 0 && !isspace(*s)) { if (i >= CHR_NB) { fprintf(stderr, "AC too long in %s\n", s); fclose(input); exit(1); } chr_nb[i++] = *s++; } if (i < CHR_NB) chr_nb[i] = 0; nb_len = i + 1; while (isspace(*s)) s++; /* Chromosome NCBI AC */ i = 0; while (*s != 0 && !isspace(*s)) { if (i >= AC_MAX) { fprintf(stderr, "AC too long \"%s\" \n", s); fclose(input); exit(1); } ncbi_ac[i++] = *s++; } if (i < AC_MAX) ncbi_ac[i] = 0; ac_len = i + 1; /* Check if chrom contains valid numeric characters */ /* for (i = 0; i < strlen(chr_nb); ++i) { if (!isdigit(chr_nb[i])) { valid = 0; break; } } if (valid) { strcpy(chrom, "chr"); strcat(chrom, chr_nb); } else { strcpy(chrom, chr_nb); } */ strcpy(chrom, "chr"); strcat(chrom, chr_nb); nb_len = (int)strlen(chrom) + 1; /* printf("adding key %s (len = %d) value %s (ac) (len = %d) to hash table\n", chrom, nb_len, ncbi_ac, ac_len); */ /* Store both NCBI identifier to chrom number and chrom number to chrom number keys */ hash_table_add(ac_table, ncbi_ac, (size_t)ac_len, chrom, (size_t)nb_len); if (options.debug) { char *cn = hash_table_lookup(ac_table, ncbi_ac, (size_t)ac_len); fprintf (stderr, " AC Hash table: %s (len = %d) -> %s (len = %d)\n", ncbi_ac, ac_len, cn, nb_len); } hash_table_add(ac_table, chrom, (size_t)nb_len, chrom, (size_t)nb_len); if (options.debug) { char *cn = hash_table_lookup(ac_table, chrom, (size_t)nb_len); fprintf (stderr, " AC Hash table: %s (len = %d) -> %s (len = %d)\n", chrom, nb_len, cn, nb_len); } } return 0; } char** str_split(char* a_str, const char a_delim) { char** result = 0; size_t count = 0; char* tmp = a_str; char* last_comma = 0; char delim[2]; delim[0] = a_delim; delim[1] = 0; /* Count how many elements will be extracted. */ while (*tmp) { if (a_delim == *tmp) { count++; last_comma = tmp; } tmp++; } /* Add space for trailing token. */ count += last_comma < (a_str + strlen(a_str) - 1); /* Add space for terminating null string so caller knows where the list of returned strings ends. */ count++; result = malloc(sizeof(char*) *count); if (result) { size_t idx = 0; char* token = strtok(a_str, delim); while (token) { assert(idx < count); *(result + idx++) = strdup(token); token = strtok(0, delim); } assert(idx == count - 1); *(result + idx) = 0; } return result; } int process_sga(FILE *input, char *iFile) { unsigned long start, end; unsigned long pos; int cnt; char *name = NULL; int score; int value; char *ext_str = NULL; int first = 1; char *s, *res, *buf; size_t bLen = LINE_SIZE; if (options.debug) fprintf(stderr, " Processing SGA file %s\n", iFile); if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char ft[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; char sga_ext_f[12][FIELD_MAX]; int first_name_ext = 1; int first_val_ext = 1; int i = 0; int id_len = 0; char *cn; char** tokens; int no_strand = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } ft[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s (%c) Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext); #endif //printf(" seq ID: %s Feat: %s (%c) Pos: %d Cnts: %d Ext: %s\n", seq_id, ft, strand, pos, cnt, ext); /* Set BED annotation track line */ if (!options.noHdr) { if (first) { if (options.trackName == NULL) { if ( (options.trackName = malloc((strlen(ft) + 1) * sizeof(char))) == NULL) { perror("process_sga: malloc Feature"); exit(1); } strcpy(options.trackName, ft); } printf("track name=\"%s\" description=\"%s\" visibility=1 color=%s\n", options.trackName, options.trackDesc, options.trackColor); first = 0; } } /* Set BED start/end positions and strand */ if (options.readLen == 0) { start = pos - 1; end = pos; } else { if (strand == '+') { start = pos - 1; end = start + options.readLen; } else if (strand == '-') { start = pos - options.readLen; end = pos; } else { start = pos - 1 - (unsigned long)((long double)options.readLen/2 + 0.5); end = pos - 1 + (unsigned long)((long double)options.readLen/2 + 0.5); } } if (strand == '0') { no_strand = 1; strand ='.'; } /* Set BED name field (4th) with SGA feature */ if ((name = malloc((strlen(ft) + 1) * sizeof(char))) == NULL) { perror("process_sga: malloc Name field"); exit(1); } strcpy(name, ft); /* Set BED score field */ score = cnt * options.scoreFact; /* Check extension flag - if present only redefine BED fields 4 and/or 5 and/or 7 */ if (options.extend) { /* Extract SGA extended fields */ tokens = str_split(ext, '\t'); if (tokens) { for (i = 0; *(tokens + i); i++) { strcpy (sga_ext_f[i], *(tokens + i)); //printf(" SGA ext field %s\n", sga_ext_f[i]); free(*(tokens + i)); } free(tokens); } /* Extract extension keys from ext_fields hash table */ void **keys = NULL; size_t key_num = hash_table_get_keys(ext_fields, &keys); //for (i = 0; i < key_num; i++) { for (i = key_num - 1; i >= 0; i--) { hash_table_element_t *el = keys[i]; int *sga_f = el->key; int *bed_f = NULL; bed_f = (int* ) HT_LOOKUP(ext_fields, sga_f); //fprintf(stderr, " Ext fields: found SGA field %d -> BED field %d\n", *sga_f, *bed_f); if (*bed_f == 4) { if (*sga_f > 5) { if (sga_ext_f[*sga_f - 6] != NULL) { if ((name = realloc(name, (strlen(sga_ext_f[*sga_f - 6]) + 1) * sizeof(char))) == NULL) { perror("process_sga: malloc SGA field"); exit(1); } } } else { perror("process_sga: SGA extension field must be > 5"); exit(1); } if (first_name_ext) { strcpy(name, sga_ext_f[*sga_f - 6]); first_name_ext = 0; } else { strcat(name, " "); strcat(name, sga_ext_f[*sga_f - 6]); } } else if (*bed_f == 5) { /* Set BED score field */ if (*sga_f > 5) { if (sga_ext_f[*sga_f - 6] != NULL) { score = round(atof(sga_ext_f[*sga_f - 6])) * options.scoreFact; } } else { perror("process_sga: SGA extension field must be > 5"); exit(1); } } else if (*bed_f == 7) { /* Set Extention fields, from 7 and on... */ if (*sga_f > 5) { if (sga_ext_f[*sga_f - 6] != NULL) { value = round(atof(sga_ext_f[*sga_f - 6])*options.normFact); if (value == INT_MIN || value == INT_MAX) value = 0; //printf("value = %d \n", value); } if (first_val_ext) { if ((ext_str = malloc((strlen(sga_ext_f[*sga_f - 6]) + 1) * sizeof(char))) == NULL) { perror("process_sga: malloc ext String"); exit(1); } sprintf(ext_str, "%d", value); first_val_ext = 0; //printf("Extension String: %s\n", ext_str); } else { if ((ext_str = realloc(ext_str, (strlen(sga_ext_f[*sga_f - 6]) + 1) * sizeof(char))) == NULL) { perror("process_sga: malloc SGA field"); exit(1); } sprintf(ext_str, "%s\t%d", ext_str, value); } } else { perror("process_sga: SGA extension field must be > 5"); exit(1); } } else { perror("process_sga: Allowed BED extension fields are 4, 5, and 7"); exit(1); } } } id_len = (int)strlen(seq_id) + 1; cn = hash_table_lookup(ac_table, seq_id, (size_t)id_len); if (cn == NULL) continue; /* Print out BED line */ if (options.expand) { for (i = 0; i < cnt; i++) { if (ext_str == NULL) { if (no_strand) printf("%s\t%lu\t%lu\t%s\t%d\n", cn, start, end, name, 0); else printf("%s\t%lu\t%lu\t%s\t%d\t%c\n", cn, start, end, name, 0, strand); } else { printf("%s\t%lu\t%lu\t%s\t%d\t%c\t%s\n", cn, start, end, name, 0, strand, ext_str); } } } else { if (ext_str == NULL) { if (no_strand) printf("%s\t%lu\t%lu\t%s\t%d\n", cn, start, end, name, score); else printf("%s\t%lu\t%lu\t%s\t%d\t%c\n", cn, start, end, name, score, strand); } else { printf("%s\t%lu\t%lu\t%s\t%d\t%c\t%s\n", cn, start, end, name, score, strand, ext_str); } } } /* End of While */ if (input != stdin) { fclose(input); } return 0; } int main(int argc, char *argv[]) { char** tokens_1; char** tokens_2; #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; int i = 0; int j = 0; int usage = 0; options.scoreFact = 1; options.normFact = 1; static struct option long_options[] = { /* These options may or may not set a flag. We distinguish them by their indices. */ {"debug", no_argument, 0, 'd'}, {"help", no_argument, 0, 'h'}, {"db", required_argument, 0, 'i'}, {"readlen", required_argument, 0, 'l'}, {"score", required_argument, 0, 'c'}, {"norm", required_argument, 0, 'n'}, {"extend", required_argument, 0, 'e'}, {"nohdr", no_argument, 0, 'r'}, {"expand", no_argument, 0, 'x'}, {"name", required_argument, 0, 0 }, {"desc", required_argument, 0, 0 }, {"color", required_argument, 0, 0 }, {0, 0, 0, 0} }; int option_index = 0; while (1) { int c = getopt_long(argc, argv, "dhi:l:c:e:n:rx", long_options, &option_index); if (c == -1) break; switch (c) { case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'i': options.dbPath = optarg; options.db = 1; break; case 'l': options.readLen = atoi(optarg); break; case 'c': options.scoreFact = atoi(optarg); break; case 'e': options.extField = optarg; options.extend = 1; break; case 'n': options.normFact = atoi(optarg); break; case 'r': options.noHdr = 1; break; case 'x': options.expand = 1; break; case 0: /* This option is to set the annotation track line */ if (strcmp(long_options[option_index].name, "name") == 0) { options.trackName = optarg; } if (strcmp(long_options[option_index].name, "desc") == 0) { options.trackDesc = optarg; } if (strcmp(long_options[option_index].name, "color") == 0) { options.trackColor = optarg; } break; default: printf ("?? getopt returned character code 0%o ??\n", c); usage = 1; } } /*printf("optind: %d argc: %d\n", optind, argc); */ if (optind > argc || options.help == 1 || usage) { fprintf(stderr, "Usage: %s [options] [<] \n" " - version %s\n" " where options are:\n" " \t\t -d|--debug Produce Debug information\n" " \t\t -h|--help Show this Help text\n" " \t\t -i|--db Use to locate the chr_NC_gi file\n" " \t\t [default is: $HOME/db/genome]\n" " \t\t -l|--readlen Set Read length \n" " \t\t Unoriented SGA files are extended by +/-/2\n" " \t\t -c|--score Normalisation factor for BED score field (5th) [score=1]\n" " \t\t -n|--norm Normalisation factor for BED score field (7th) [normf=1]\n" " \t\t -e|--extend f1:F1[,f2:F2] Set SGA optional field(s) f1(,f2,...) to BED field(s) F1,(F2,..)\n" " \t\t Fields are specified by column numbers\n" " \t\t Accepted BED field values are 4, 5, and 7\n" " \t\t Except BED field 5 (score field), BED fields 4 and 7\n" " \t\t can be used to set multiple extension values from SGA\n" " \t\t Fields 5 and 7 convert into numerical values whereas\n" " \t\t BED field 4 takes character strings as they are\n" " \t\t -r|--nohdr BED format without annotation track header lines\n" " \t\t -x|--expand Expand SGA lines into multiple BED lines\n" " \t\t --name Set name for track name field [def. name=SGA-feature]\n" " \t\t --desc Set track description field [def. desc=\"ChIP-Seq Custom data\"]\n" " \t\t --color Define the track color in comma-separated RGB values [def. 100,100,100]\n" "\n\tConvert SGA format into BED format.\n\n", argv[0], VERSION); return 1; } /*printf("argc: %d optind: %d\n", argc, optind);*/ if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.trackDesc == NULL) { if ((options.trackDesc = malloc(32 * sizeof(char))) == NULL) { perror("main: malloc trackDesc"); exit(1); } strcpy(options.trackDesc, "ChIP-Seq Custom data"); } if (options.trackColor == NULL) { if ((options.trackColor = malloc(12 * sizeof(char))) == NULL) { perror("main: malloc trackColor"); exit(1); } strcpy(options.trackColor, "100,100,100"); } if (options.extend) { int ht_el[2]; ext_fields = hash_table_new(MODE_COPY); if (options.debug) fprintf(stderr, " Extend SGA specs: %s\n", options.extField); tokens_1 = str_split(options.extField, ','); if (tokens_1) { for (i = 0; *(tokens_1 + i); i++) { tokens_2 = str_split(*(tokens_1 + i), ':'); if (tokens_2) { for (j = 0; *(tokens_2 + j); j++) { ht_el[j] = atoi(*(tokens_2 + j)); } HT_ADD(ext_fields, &ht_el[0], &ht_el[1]); free(*(tokens_2 + j)); } free(*(tokens_1 + i)); } free(tokens_1); } if (options.debug) { void **keys = NULL; size_t key_num = hash_table_get_keys(ext_fields, &keys); for (i = 0; i < key_num; i++) { hash_table_element_t *el = keys[i]; int *key = el->key; int *val = NULL; val = (int* ) HT_LOOKUP(ext_fields, key); fprintf(stderr, " Ext fields: found (key) SGA field %d -> (val) BED field %d\n", *key, *val); } } } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Read length : %d\n", options.readLen); fprintf(stderr, " Score factor : %d\n", options.scoreFact); fprintf(stderr, " Normalization factor (BED 7th fiels) : %d\n", options.normFact); fprintf(stderr, " Extend SGA [on/off]: %d\n", options.extend); fprintf(stderr, " Expand SGA [on/off]: %d\n", options.expand); fprintf(stderr, " Track name: %s\n", options.trackName); fprintf(stderr, " Track description: %s\n", options.trackDesc); fprintf(stderr, " Track color: %s\n", options.trackColor); fprintf(stderr, "\n"); } if (process_ac() == 0) { if (options.debug) fprintf(stderr, " HASH Table for chromosome access identifier initialized\n"); } else { return 1; } if (process_sga(input, argv[optind++]) != 0) { return 1; } return 0; } chip-seq/tools/README0000744022744200262270000000353313362375067015120 0ustar ambrosingr-bucherChIP-Seq auxiliary Perl and C applications ============================================================================ In this directory we have collected a series of Perl scripts and C programs that can be used to perform format conversion tasks as well as other auxiliary tasks such as read counts filtering or SAG file compression. The ChIP-seq main programs use as a format a simplified BED format, called SGA (Simplified Genome Annotation), which is sorted by sequence name and position. In a typical data analysis pipeline, the SGA file is often generated from a variety of richer formats, such as the Solexa genome mapping foramt, BAM BED, or FPS (Functional Position Set). The latter is used by the Signal Search Analysis programs at SIB (SSA). We therefore provide simple and fast tools to convert SGA data files to other formats, especially BED, WIG (Wiggle Track Format) and FPS, and vice-versa. WIG and BED files are used for viewing ChIP-seq data and results at the UCSC genome browser. The binary file chro_idx.nstorage includes a Perl hash table that, for each supported assembly, stores chromosome number-NCBI identifier pairs as well as chromosome lengths indexed by chromosome NCBI identifiers. This file is used by most conversion scripts. When required, its location () must by set by using the --db option. The text file chr_NC_gi is used by the C format conversion programs (such as sga2bed, bed2sga, etc) to generate a hash table to link chromosome numbers to NCBI RefSeq identifiers and viceversa. When required, its location () must by set by using the -i|--db option. The text file chr_size is used by a few C program to fetch the chromosome size based on the corresponding chromosome NCBI identifier. In this way, it is possible to check whether the genome coordinates of the data files go beyong the chromosome boundaries. chip-seq/tools/fetch_sga.pl0000744022744200262270000000147311537713130016506 0ustar ambrosingr-bucher#!/usr/local/bin/perl # This tool is used to extract SGA lines # $fn = $ARGV[0]; if($ARGV[1] =~ m/(\S+):(\d+)\-(\d+)/) { $sq=$1; $b=$2; $e=$3} search(); while() { if(/(\S+)\s+\S+\s+(\S+)/) { if ($1 gt $sq) {last} elsif($2 > $e) {last} elsif($2 >= $b) {print} } } exit; sub search { # file initialization $size = -s $fn; open(SGA, "<$fn"); # dichotomy search $low = 0; $high = $size; for ($i=0; $i<20; $i++) { $n = $low + int(($high-$low)/2); seek SGA, $n, 0; ; $line = ; #print $line; if($line =~ m/(\S+)\s+\S+\s+(\S+)/) { if ($1 lt $sq) {$low = $n} elsif($1 gt $sq) {$high = $n} elsif($2 < $b-1000) {$low = $n} elsif($2 > $b) {$high = $n} else {last} } else {last} } } chip-seq/tools/chr_replace_sga.pl0000744022744200262270000000161711553015066017665 0ustar ambrosingr-bucher#!/usr/bin/perl # converts chrom names into RefSeq ids within SGA # usage: ./chr_replace_sga.pl <-s species> -f use strict; use Getopt::Std; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] my %opts; getopt('sf:', \%opts); # -s, & -f take arg. Values in %opts, Hash keys will be the switch names my $DB = "/db/genome/"; #my $DB = "/home/local/db/genome/"; my $chr2SV = retrieve($DB."chro_idx.nstorage"); &Usage() unless ($opts{'f'} and $opts{'s'}); # open the SGA file open(my $SGA, "$opts{'f'}") || die "can't open $opts{'f'} : $!"; while(<$SGA>) { chomp; if (/(\S+)\t(.*)/) { print "$chr2SV->{$opts{'s'}}->{$1}\t$2\n"; } } close($SGA); sub Usage { print STDERR <<"_USAGE_"; chr_replace_sga.pl -s -f _USAGE_ exit(1); } 1; chip-seq/tools/hashtable.h0000777022744200262270000000000013354703337020647 2../hashtable.hustar ambrosingr-bucherchip-seq/tools/partit2sga.pl0000744022744200262270000000505513354407153016647 0ustar ambrosingr-bucher#!/usr/bin/perl # # reformats partitioning output files (SGA) into bed format # usage: partit2bed.pl [-t track_name -d desc -n chr_nb -b chr_start -e -chr_end] use strict; use Getopt::Long; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] my %opt; my @options = ("help", "h", "db=s"); my $DB = "/home/local/db/genome/"; #my $DB = "/db/genome/"; if( ! GetOptions( \%opt, @options ) ) { &Usage(); } &Usage() if defined($opt{'help'}) || defined($opt{'h'}); #define options if ($opt{'db'} ne '') { $DB = $opt{'db'}; } open FH, $DB."chro_idx.nstorage" or die "Wrong Chrom Id Storable file $DB.\"chro_idx.nstorage\": $!"; my $chroac = retrieve($DB."chro_idx.nstorage"); &Usage() if $#ARGV < 0; my $file = $ARGV[0]; # Chromosome size table my %chr_size = (); my @pos = (); # Read First line of Partition File # # Get chrom sizes my $assembly = ""; # Read Firstline and get assembly from chrom identifier open(PART, "<$file") || die "can't read $file : $!"; my $firstline = ; chomp $firstline; my @field=split(/\t/,$firstline); if (defined($field[0])){ $assembly = $chroac->{'assembly'}->{$field[0]}; } if ($assembly ne "") { open (CHR, "/home/local/db/genome/$assembly/$assembly".".chrom.sizes") || die "can't open /home/local/db/genome/$assembly/$assembly'.'.chrom.sizes' : $!"; while () { chomp; my @f=split(/\t/,$_); $chr_size{$f[0]} = $f[1]; } close (CHR); } seek (PART, 0, 0); while (){ # Partitioning algorithm reports positions of start and end of tag, #NC_000001.9 CTCF 227595 + 1 Start Line #NC_000001.9 CTCF 227633 - 1 End Line #Generate centered SGA file my $lin=$_; chomp $lin; my @field=split(/\t/,$lin); if (defined($field[0])){ if ($field[3] eq '+' || $field[3] eq '0') { $pos[0] = $field[2]; } elsif ($field[3] eq '-') { $pos[1] = $field[2]; my $center = int(($pos[0] + $pos[1])/2); next if (defined($chr_size{$field[0]}) && $center > $chr_size{$field[0]}); print $field[0],"\t",$field[1],"\t",$center,"\t"."0"."\t$field[4]\n"; @pos = (); } } } close (PART); sub Usage { print STDERR <<"_USAGE_"; partit2sga.pl [options] where options are: -h|--help Show this stuff --db Use to locate Chrom Id Storable File 'chro_idx.nstorage' The program converts the output of the ChIP-Seq partitioning program to centered SGA format. _USAGE_ exit(1); } 1; chip-seq/tools/sga2fps.pl0000744022744200262270000001144512453211136016125 0ustar ambrosingr-bucher#!/usr/bin/perl # converts sga format to fps format # usage: ./sga2fps.pl [<-s species>] file.sga use strict; use Getopt::Long; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] #my %opts; #getopt('sf:', \%opts); # -s, & -f take arg. Values in %opts, Hash keys will be the switch names my %opt; my @options = ("help", "h", "species=s", "s=s", "f=s", "set0flag", "db=s"); my $file = ""; my $species = ""; my $feature = ""; my $ft_flag = 0; my $set0flag = 0; #my $DB = "/home/local/db/genome/"; my $DB = "/db/genome/"; if( ! GetOptions( \%opt, @options ) ) { &Usage(); } &Usage() if defined($opt{'help'}) || defined($opt{'h'}); #&Usage() if $#ARGV < 0; if ($opt{'db'} ne '') { $DB = $opt{'db'}; } open FH, $DB."chro_idx.nstorage" or die "Wrong Chrom Id Storable file $DB.\"chro_idx.nstorage\": $!"; my $chr2SV = retrieve($DB."chro_idx.nstorage"); if ($opt{'s'} ne '') { $species = $opt{'s'}; } if ($opt{'species'} ne '') { $species = $opt{'species'}; } if ($opt{'f'} ne '') { $feature = $opt{'f'}; $ft_flag = 1; } if ($opt{'set0flag'} ne '') { $set0flag = 1; } $file = $ARGV[0]; #print "SGA file : $file\n"; #print "Options: Species $species\n"; my $char29 = " "; my $count = 1; # open the SGA file my $SGA; if ($file ne "") { open ($SGA, "$file") || die "can't open $file : $!"; } else { $SGA = "STDIN"; } my $firstline = <$SGA>; my @f = split(/\t/,$firstline); chomp $f[0]; chomp $f[4]; chomp $f[5]; if ($f[0] =~ /[NA][CT]_\S+\.\d+/ && exists($chr2SV->{$f[0]})) { if ($f[3] eq "0") { $f[3] = "+"; if ($set0flag) { $char29 = "0"; } } if (!$ft_flag) { if ($f[5] ne "") { $feature=substr($f[5], 0, 19); } else { $feature=substr($f[1], 0, 19-length($f[4])).'_'.$f[4]; } } printf("FP %-20s:+U%1s EU:%-18s1%1s%10s;%6d.\n", $feature, $char29, $f[0], $f[3], $f[2], $count); print_fps_1($SGA); } elsif ($f[0] =~ /^chr/) { if ($species && exists($chr2SV->{$species}->{$f[0]})) { if ($f[3] eq "0") { $f[3] = "+"; if ($set0flag) { $char29 = "0"; } } if (!$ft_flag) { if ($f[5] ne "") { $feature=substr($f[5], 0, 19); } else { $feature=substr($f[1], 0, 19-length($f[4])).'_'.$f[4]; } } printf("FP %-20s:+U%1s EU:%-18s1%1s%10s;%6d.\n", $feature, $char29, $chr2SV->{$species}->{$f[0]}, $f[3], $f[2], $count); print_fps_2($SGA, $species); } else { print STDERR "Please, provide a valid genome assembly (e.g. -s hg18) for chrom name to RefSeq id conversion!\n"; exit(1); } } else { print STDERR "Unrecognized sequence version $f[0] : please, check the chromosome identifier (only valid RefSeq ids are accepted)!\n"; exit(1); } close ($SGA); sub print_fps_1 { my ($fh) = @_; while(<$fh>){ my $lin=$_; $count++; chomp $lin; my @ar=split(/\t/,$lin); if ($ar[3] eq "0") { $ar[3] = "+"; if ($set0flag) { $char29 = "0"; } } # truncate description to fit field length if (!$ft_flag) { if ($ar[5] ne "") { $feature=substr($ar[5], 0, 19); } else { $feature=substr($ar[1], 0, 19-length($ar[4])).'_'.$ar[4]; } } printf("FP %-20s:+U%1s EU:%-18s1%1s%10s;%6d.\n", $feature, $char29, $ar[0], $ar[3], $ar[2], $count); } } sub print_fps_2 { my ($fh, $species) = @_; while(<$fh>){ my $lin=$_; $count++; chomp $lin; my @ar=split(/\t/,$lin); if ($ar[3] eq "0") { $ar[3] = "+"; if ($set0flag) { $char29 = "0"; } } # truncate description to fit field length if (!$ft_flag) { if ($ar[5] ne "") { $feature=substr($ar[5], 0, 19); } else { $feature=substr($ar[1], 0, 19-length($ar[4])).'_'.$ar[4]; } } # convert chr to chromosome SV printf("FP %-20s:+U%1s EU:%-18s1%1s%10s;%6d.\n", $feature, $char29, $chr2SV->{$species}->{$ar[0]}, $ar[3], $ar[2], $count); } } sub Usage { print STDERR <<"_USAGE_"; sga2fps.pl [options] where options are: -h|--help Show this stuff --db Use to locate Chrom Id Storable File 'chro_idx.nstorage' -f Set field -s|--species Assembly (i.e hg18) --set0flag Set 0 flag at postion 29 of the FP line, forcing unoriented output (default : blank) _USAGE_ exit(1); } 1; chip-seq/tools/filter_counts.c0000744022744200262270000001672713362373540017267 0ustar ambrosingr-bucher/* filter_counts.c Filter Feature read counts that occur within user-defined regions of interest such as Repeat Mask regions. If the -r option is set, the prpgram retains read counts that occur within user-defined regions of interest. # Arguments: # Feature name of the user-defined regions of interest (e.g. RMSK) Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int help; int debug; int retain; } options_t; static options_t options; char *Feature = NULL; int process_sga(FILE *input, char *iFile) { unsigned long pos; int cnt; char *s, *res, *buf; size_t bLen = LINE_SIZE; int prt = 0; char annotation[EXT_MAX]; if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } if (options.debug) fprintf(stderr, "Processing file %s\n", iFile); #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char ft[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } ft[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = (unsigned long)atol(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } #ifdef DEBUG fprintf(stderr, " [%d] seq ID: %s Feat: %s (%c) Pos: %lu Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext); #endif if (!strcmp(ft, Feature)) { if (strand == '+') { prt++; strcpy(annotation, ext); } else if (strand == '-') { prt--; } } else { if (options.retain) { if (prt >= 1) { if (!strcmp(ext, "")) { printf("%s\t%s\t%lu\t%c\t%d\t%s\n", seq_id, ft , pos, strand, cnt, annotation); } else { printf("%s\t%s\t%lu\t%c\t%d\t%s\t%s\n", seq_id, ft, pos, strand, cnt, ext, annotation); } } } else { if (prt < 1) { if (!strcmp(ext, "")) { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, ft , pos, strand, cnt); } else { printf("%s\t%s\t%lu\t%c\t%d\t%s\n", seq_id, ft, pos, strand, cnt, ext); } } } } } /* End of While */ free(s); return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; int free_flag = 0; while (1) { int c = getopt(argc, argv, "f:dhr"); if (c == -1) break; switch (c) { case 'f': Feature = optarg; break; case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'r': options.retain = 1; break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1) { fprintf(stderr, "Usage: %s [options] [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information\n" " \t\t -f Feature defining the user-defined regions of interest (default=RMSK)\n" " \t\t -r Retain Mode on\n" "\n\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format ()\n" "\tand filters out all read counts that occur within user-defined regions of interest, such\n" "\tas Repeat Mask regions (feature=RMSK).\n" "\tIf 'Retain Mode' is on, it only retains those lines that are within the user-defined regions,\n" "\tin which case the annotation of the user-defined regions is added to the output SGA file.\n\n", argv[0], VERSION); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Feature name: %s\n\n", Feature); } /* Process Feature */ if (Feature == NULL) { if ((Feature = malloc(5 * sizeof(char))) == NULL) { perror("process_sga: malloc Feature"); exit(1); } strcpy(Feature, "RMSK"); free_flag = 1; } else { char *s = Feature; int i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Name too long \"%s\" \n", Feature); return 1; } s++; } } if (process_sga(input, argv[optind++]) != 0) { return 1; } if (free_flag) free(Feature); return 0; } chip-seq/tools/countsga.c0000744022744200262270000002056313046350703016216 0ustar ambrosingr-bucher/* countsga.c Count tags and compute total sequence length of an SGA file # Arguments: # feature type, count cut-off # SGA file Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* #define DEBUG */ #define _GNU_SOURCE #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int help; int debug; int cutOff; char *ftName; char strand; } options_t; static options_t options; char *Feature = NULL; int ft_specs = 1; unsigned long Counts = 0; /* Total Target Counts */ unsigned long Len = 0; /* Total Sequence Length */ int process_sga(FILE *input) { char seq_id_prev[SEQ_ID] = ""; unsigned long pos; int cnt = 0; unsigned long last_pos = 0; char *s, *res, *buf; size_t bLen = LINE_SIZE; if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } /* if (options.debug) fprintf(stderr, "Processing file %s\n", iFile); */ #ifdef DEBUG int lc = 1; #endif /* while ((res = fgets(s, (int) bLen, f)) != NULL) { */ while ((res = fgets(s, (int) bLen, input)) != NULL) { char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; size_t cLen = strlen(s); unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } /* res = fgets(s + cLen, (int) (bLen - cLen), f); */ res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = (unsigned long)atol(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s (%c) Pos: %lu Cnts: %d Ext: %s\n", lc++, seq_id, feature, strand, pos, cnt, ext); #endif if (!strcmp(feature, "END")) { continue; } if (strcmp(seq_id, seq_id_prev) != 0) { Len += last_pos; strcpy(seq_id_prev, seq_id); } if (ft_specs == 0) { if (cnt > options.cutOff) Counts += (unsigned int)options.cutOff; else Counts += (unsigned int)cnt; } else { if (options.strand == '\0') { if (strcmp(feature, options.ftName) == 0) { if (cnt > options.cutOff) Counts += (unsigned int)options.cutOff; else Counts += (unsigned int)cnt; } } else { if (strcmp(feature, options.ftName) == 0 && strand == options.strand) { if (cnt > options.cutOff) Counts += (unsigned int)options.cutOff; else Counts += (unsigned int)cnt; } } } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; /* fprintf (stderr, "Total Tag Counts : %lu , Total Sequence Len : %lu\n", Counts, Len); */ printf ("Total Tag Counts : %lu , Total Sequence Len : %lu\n", Counts, Len); return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; options.cutOff = 1; options.ftName = NULL; options.strand = '\0'; while (1) { int c = getopt(argc, argv, "f:dhc:"); if (c == -1) break; switch (c) { case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'c': options.cutOff = atoi(optarg); break; case 'f': Feature = optarg; break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } /* if (optind == argc || options.help == 1) { */ if (optind > argc || options.help == 1) { fprintf(stderr, "Usage: %s [options] [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information\n" " \t\t -f Feature specifications [name [+|-]]\n" " \t\t -c Count Cut-off (default is %d)\n" "\n\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (),\n" "\tand computes the total number of tag counts as well as the total sequence length.\n" "\tThe parameter is a name that corresponds to the second field of the SGA file.\n" "\tIt might optionally include the strand specification (+|-).\n" "\tIf no feature is given then all input tags are processed.\n\n", argv[0], VERSION, options.cutOff); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Selected Feature : %s\n", Feature); } /* Process Feature Specs */ if (Feature == NULL) { ft_specs = 0; /* Process all features */ } else { options.ftName = malloc((FT_MAX + 2) * sizeof(char)); char *s = Feature; int i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Description too long \"%s\" \n", Feature); return 1; } options.ftName[i++] = *s++; } options.ftName[i] = '\0'; while (isspace(*s++)) options.strand = *s; } if (options.debug) { if (!ft_specs) { fprintf(stderr, "Feature Specs: ALL -> Process all features\n"); } else { if (options.strand == '\0') fprintf(stderr, "Feature Specs: Name : %s\n", options.ftName); else fprintf(stderr, "Feature Specs: Name : %s Strand %c\n", options.ftName, options.strand); } } if (process_sga(input) != 0) { return 1; } if (input != stdin) { fclose(input); } return 0; } chip-seq/tools/partit2bed.pl0000744022744200262270000002255612705363767016647 0ustar ambrosingr-bucher#!/usr/bin/perl # # reformats partitioning output files (SGA) into bed format # usage: partit2bed.pl [-t track_name -d desc -n chr_nb -b chr_start -e -chr_end] use strict; use Getopt::Long; use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm] my %opt; my @options = ("help", "h", "desc=s", "d=s", "track=s", "t=s", "n=i", "b=i", "e=i", "db=s"); my $desc = "ChIP-Seq"; my $track_name = "Test-BED"; my $chr_nb = "chr0"; my $chr_start = -1; my $chr_end = -1; #my $DB = "/home/local/db/genome/"; my $DB = "/db/genome/"; if( ! GetOptions( \%opt, @options ) ) { &Usage(); } &Usage() if defined($opt{'help'}) || defined($opt{'h'}); #define options if ($opt{'db'} ne '') { $DB = $opt{'db'}; } open FH, $DB."chro_idx.nstorage" or die "Wrong Chrom Id Storable file $DB.\"chro_idx.nstorage\": $!"; my $chroac = retrieve($DB."chro_idx.nstorage"); if ($opt{'d'} ne '') { $desc = $opt{'d'}; } if ($opt{'desc'} ne '') { $desc = $opt{'desc'}; } if ($opt{'t'} ne '') { $track_name = $opt{'t'}; } if ($opt{'track'} ne '') { $track_name = $opt{'track'}; } if ($opt{'n'} ne '') { $chr_nb = "chr".$opt{'n'}; } if ($opt{'b'} ne '') { $chr_start = $opt{'b'}; } if ($opt{'e'} ne '') { $chr_end = $opt{'e'}; } &Usage() if $#ARGV < 0; my $file = $ARGV[0]; my $start_reg = -1; my $end_reg = -1; my $chr_start_flag = 0; my $chr_end_flag = 0; my $chr_nb_flag = 0; my $chr_name_flag = 1; my $assembly = ""; my @pos = (); # Read First line of Partition File # Print out BED File Header open(PART, "<$file") || die "can't read $file : $!"; my $lin=; my @field=split(/\t/,$lin); if ( $chr_start != -1) { $start_reg = $chr_start; $chr_start_flag = 1; } else { $start_reg = $field[2]; } if ( $chr_end != -1) { $end_reg = $chr_end; $chr_end_flag = 1; } else { $end_reg = $field[2] + 1000000; } if ($chr_nb ne "chr0") { $chr_nb_flag = 1; } if (exists($chroac->{$field[0]})) { $chr_name_flag = 0; } #if ($chr_nb_flag) { # print "browser position $chr_nb:$start_reg-$end_reg\n"; #} elsif (!$chr_name_flag) { # print "browser position $chroac->{$field[0]}:$start_reg-$end_reg\n"; #} else { # print "browser position $field[0]:$start_reg-$end_reg\n"; #} #print "browser full refGene\n"; chomp $track_name; chomp $desc; print "track name=$track_name description=\"$desc\" visibility=1 color=200,100,0\n"; #if ($chr_name_flag) { # print "track name=$track_name description=\"ChIP-Seq-$experiment/$exp_data Partitioning\" visibility=1 color=200,100,0\n"; #} else { # print "track name=$track_name description=\"",$chroac->{'assembly'}->{$field[0]},"-ChIP-Seq-$experiment/$exp_data Partitioning\" visibility=1 color=200,100,0\n"; #} close(PART); # generate BED-file open(PART, "<$file") || die "can't read $file : $!"; if ($chr_name_flag) { # we deal with chromosome names (chrx) my $i = 0; while (){ # Partitioning algorithm reports positions of start and end of tag, #NC_000001.9 CTCF 227595 + 1 Start Line #NC_000001.9 CTCF 227633 - 1 End Line my $lin=$_; chomp $lin; my @field=split(/\t/,$lin); if (defined($field[0])){ if ($field[3] eq '+' || $field[3] eq '0') { $pos[0] = $field[2] - 1; } elsif ($field[3] eq '-') { $i++; $pos[1] = $field[2]; if ( $chr_nb_flag && !$chr_start_flag && !$chr_end_flag ) { if ($field[0] eq $chr_nb) { #print $field[0],"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $field[0],"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } elsif ($chr_nb_flag && $chr_start_flag && !$chr_end_flag) { if ($field[0] eq $chr_nb && ($pos[0] >= $chr_start || $pos[1] >= $chr_start)) { #print $field[0],"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $field[0],"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } elsif ($chr_nb_flag && $chr_start_flag && $chr_end_flag) { if ($field[0] eq $chr_nb && (($pos[0] >= $chr_start || $pos[1] >= $chr_start) && ($pos[0] <= $chr_end || $pos[1] <= $chr_end))) { #print $field[0],"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $field[0],"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } else { #print $field[0],"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $field[0],"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } @pos = (); } } } } else { # Chrom name Flag = 0 (NCBI IDs) my $i = 1; my $firstline = ; chomp $firstline; my @field=split(/\t/,$firstline); if (defined($field[0])){ $assembly = $chroac->{'assembly'}->{$field[0]}; } close (CHR); $pos[0] = $field[2] - 1; my $secondline = ; chomp $secondline; my @field=split(/\t/,$secondline); $pos[1] = $field[2]; if ( $chr_nb_flag && !$chr_start_flag && !$chr_end_flag ) { if ($chroac->{$field[0]} eq $chr_nb) { print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } elsif ($chr_nb_flag && $chr_start_flag && !$chr_end_flag) { if ($chroac->{$field[0]} eq $chr_nb && ($pos[0] >= $chr_start || $pos[1] >= $chr_start)) { print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } elsif ($chr_nb_flag && $chr_start_flag && $chr_end_flag) { if ($chroac->{$field[0]} eq $chr_nb && (($pos[0] >= $chr_start || $pos[1] >= $chr_start) && ($pos[0] <= $chr_end || $pos[1] <= $chr_end))) { print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } else { print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } @pos = (); while (){ # Partitioning algorithm reports positions of start and end of tag, #NC_000001.9 CTCF 227595 + 1 Start Line #NC_000001.9 CTCF 227633 - 1 End Line my $lin=$_; chomp $lin; my @field=split(/\t/,$lin); if (defined($field[0])){ if ($assembly ne $chroac->{'assembly'}->{$field[0]}) { if ($assembly) { print STDERR "The partition output $file appears to contain entries from different species: $field[0] from ",$chroac->{'assembly'}->{$field[0]}," vs. preceeding sequences from $assembly\n"; } $assembly = $chroac->{'assembly'}->{$field[0]}; } if ($field[3] eq '+' || $field[3] eq '0') { $pos[0] = $field[2] - 1; # check chromosome boundaries next if ($pos[0] > $chroac->{'length'}->{$field[0]}); } elsif ($field[3] eq '-') { $i++; $pos[1] = $field[2]; # check chromosome boundaries next if ($pos[1] > $chroac->{'length'}->{$field[0]}); if ( $chr_nb_flag && !$chr_start_flag && !$chr_end_flag ) { if ($chroac->{$field[0]} eq $chr_nb) { #print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } elsif ($chr_nb_flag && $chr_start_flag && !$chr_end_flag) { if ($chroac->{$field[0]} eq $chr_nb && ($pos[0] >= $chr_start || $pos[1] >= $chr_start)) { #print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } elsif ($chr_nb_flag && $chr_start_flag && $chr_end_flag) { if ($chroac->{$field[0]} eq $chr_nb && (($pos[0] >= $chr_start || $pos[1] >= $chr_start) && ($pos[0] <= $chr_end || $pos[1] <= $chr_end))) { #print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } } else { #print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t\t\t\+\n"; print $chroac->{$field[0]},"\t",$pos[0],"\t",$pos[1],"\t"."R:$i"."\t$field[4]\n"; } @pos = (); } } } } close (PART); sub Usage { print STDERR <<"_USAGE_"; partit2bed.pl [options] where options are: -h|--help Show this stuff --db Use to locate Chrom Id Storable File 'chro_idx.nstorage' -t|--track BED track name [def: $track_name] -d|--desc Description field of the BED header line [def: $desc] -n Chromosome number (BED declaration lines) [def: 0 - all chromosomes] -b Chromosome start [def: $chr_start - entire chrom region] -e Chromosome end [def: $chr_start - entire chrom region] The program converts the output of the ChIP-Seq partitioning program to BED format. _USAGE_ exit(1); } 1; chip-seq/chippart.1.gz0000744022744200262270000000226413046354237015406 0ustar ambrosingr-bucherؙXchippart.1VMoFW tPhTC".#?ovERr1͛>PS.=ٯ3Vʺ 4э2j,7:o{꠭Ѧk[}z׬WQ"eTn7M_M0dl'ql|U%K;&v|!;.ycnɲ8m˸KtˇU{yi}=9V'4KsT\9#Jm7Ǝd|HtX7u +6N DHS gGLϨS۠jlbSa ) Е.h`eHmmgGkꓺuhlzOMqsڄ:j!eSJm'1C3 iaX"hRd\H%* 攭t݄"TG+/9]#;**h?E+2v PjFH6x{AV|<5y:M:ES4P\,I" kFP0DR_@H_9S$n9dx#i<}~v vp^aeb]0PEϙ'Ɠ.C%';]NkY~QβMcqbۧt$O(Xf ~@V<^E-!Nx"~ ϣK:m˦fi yj=C?ɀ`"ÁWfK$]1gO ]rxfztEJknxȤ'P<4Cd@︒b3ζI? ΙP.(:㤾*O1DXHi x,RǓ'!lq9{%$i9рR0yB z3q<+6tqs?ۮ>ݤb*&r\X=J`ЬK chip-seq/chipextract.1.gz0000744022744200262270000000222012555637341016106 0ustar ambrosingr-bucher>Uchipextract.1VM7Wtq P&v.[d`CbRޏ2C s3T;HIcLj|^K3_#u~4Ǐt'@#]GMfc^y|ѭ4,|G nr;/|ПJ)ΩG!ir-| 8aտ KG}ta1_R dK݊6&ži4.h8Okٴ:A%H mtj+1[|?r}&u2kFdpS&S'H[2IduŔSUuS[L\D7Jf;pIm@ġF9dN"]2g`Vo8j#W$'YTJsagGj @4bѣ˺ר WX%>/ȪMC!w"Rez4IF{lQX4ڶ6il_`ճ\.ӀJI J[kDH~P7\M{264<FqZ&fQ) ٥ZǶ5^GZ9ل1/)Q8J_4)Z;!FQv-:G×k.LjڔPG[ rSS "5NQ8%CAFQt]GN\:0M L\jrO]gqL:ͯgk^fGC^f$탯Iv3[T{{>jTy@Rv;yȟGKz47ר#m&>\\,uabz@:rn+_7VKpx6.$p'؏xk!IL*u ں#^@UF%HxU;Fs֛Zj  n[S 8n6=IDD;6?Kj^j7o|ih~=jpv-ΐ-Qaʇ;˕š't7wSykųpyhlB *ӄvytbYf4sЬgyV*ĖAB^WD)jY~wF.%m.A*[FV}D:Iŋͭf}Ymxq^W+bEs^R2EN_NއgiO}qҼP*\ݭnM0J&٭0]ಜKC·ɤii6bQA#! wUFjWq5 aIN7,8!wJN8M.SB>+͙1+IuH ؁Ti8S %LaA#$Ct]:Z׮m3@txJ_Dn8⋬K0\T3||A]WTQPecnuBEn(-4{ˊD.=ˢVّQ&&OXzk( 㦺(jwI%wFhZ=~O'=*0GőVY40 s貜űKh.)f2kbR BJ\P1l*i q+fK%YmjJgy2VUA߁X\P!Z]NG{qR@(i2Ct+w;A4[Ǡ'3r9bOz/5aKߘ+ѱ`rI/5-w1 ?b8X)+i1/wL chip-seq/chipcor.c0000744022744200262270000006030413243024607014657 0ustar ambrosingr-bucher/* chip_cor.c Feature correlation Tool. The program generates histograms showing the positional relationship of two features. # Arguments: # feature_1 type, feature_1 strand, feature_2 type, feature_2 strand, # beginning of range, end of range, window width (histogram step_size), # count cut-off value Giovanna Ambrosini, EPFL, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . #define DEBUG 1 */ #define _GNU_SOURCE #include #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int cutOff; int normFact; int help; int debug; int oriented; } options_t; static options_t options; typedef struct _feature_t { char ft[FT_MAX]; char strand; int *pos; int *cnt; int *ptr; char *str; } feature_t, *feature_p_t; feature_t ref_ft; feature_t tar_ft; typedef struct _histo_t { int *pos; long *val; } histo_t, *histo_p_t; int l5_p, l3_p; char *RefFeature = NULL; char *TarFeature = NULL; int From = 0; int To = 0; int Win = 0; int auto_cor = 0; /* Autocorrelation Flag */ /* For Histogram Normalization */ unsigned long Rtot = 0; /* Total Reference Counts */ unsigned long Ttot = 0; /* Total Target Counts */ unsigned long Len = 0; /* Total Sequence Length */ int ref_specs = 1; /* if = 1 feature specs : AND */ int tar_specs = 1; int or_same_strand = 0; int or_opposite_strand = 0; int or_any_strand = 0; char *norm_opt[3] = {"Raw Counts", "Count density", "Global Normalization"}; int histo_print(histo_p_t histo) { int i = 0; int j; if (options.debug && Rtot != 0) fprintf(stderr," Len %lu Rtot %lu Ttot %lu Len/Rtot %lu\n", Len, Rtot, Ttot, Len/Rtot); printf("#Norm : %s , Reference Feature : %s, Target feature : %s , Total Sequence Len : %lu , Total Reference Counts : %lu , Total Target Counts : %lu\n", norm_opt[options.normFact],RefFeature, TarFeature, Len, Rtot, Ttot); for (j = l5_p; j <= l3_p; j++) { if (auto_cor && (histo->pos[i] == 0)) { if (options.normFact == 1) /* Count density */ printf("%9.1f %9.3e\n", (float)histo->pos[i], ((float)histo->val[i] - Rtot)/((float)Rtot * Win)); else if (options.normFact == 2) /* Global Normalization */ printf("%9.1f %9.3e\n", (float)histo->pos[i], ((float)histo->val[i] - Rtot) * Len/((float)Rtot * Ttot * Win)); else printf("%9.1f %10ld\n", (float)histo->pos[i], histo->val[i] - Rtot); } else { if (options.normFact == 1) /* Count density */ printf("%9.1f %9.3e\n", (float)histo->pos[i], (float)histo->val[i]/((float)Rtot * Win)); else if (options.normFact == 2) /* Global Normalization */ printf("%9.1f %9.3e\n", (float)histo->pos[i], ((float)histo->val[i] * Len / ((float)Rtot * Ttot * Win))); else printf("%9.1f %10ld\n", (float)histo->pos[i], histo->val[i]); } i++; } free(histo->pos); free(histo->val); return 0; } int histo_init(histo_p_t histo) { /* Fit Range to accomodate an integer number of window sizes and make sure that one bin is always centered at 0 */ int xb, xc, xe; xb = -Win/2; xe = xb + Win - 1; xc = (xb + xe)/2; if (options.debug) fprintf(stderr, " xb %d, xe %d, xc %d\n", xb, xe, xc); if (From > xb) { l5_p = (From - xb)/Win + 1; } else { l5_p = -(xb - From)/Win; } if (To >= xe) { l3_p = (To - xe)/Win; } else { l3_p = -(xe - To)/Win + 1; } if (options.debug) fprintf(stderr, " l5_p: %d l3_p: %d\n", l5_p, l3_p); /* New range */ From = xb + l5_p * Win; To = xe + l3_p * Win; if (options.debug) fprintf(stderr, " New range: [%d, %d]\n\n", From, To); /* Initialize Histogram */ histo->pos = (int *) malloc((l3_p - l5_p + 1) * sizeof(int)); if (histo->pos == NULL) { fprintf(stderr, "Out of memory: %s(%d)\n",strerror(errno), errno); return 1; } histo->val = (long *) malloc((l3_p - l5_p + 1) * sizeof(long)); if (histo->val == NULL) { fprintf(stderr, "Out of memory: %s(%d)\n",strerror(errno), errno); return 1; } int i = 0; int j; for (j = l5_p; j <= l3_p; j++) { histo->pos[i] = xc + j * Win; histo->val[i] = 0; i++; } return 0; } /* Feature correlation measurement. For each Reference Feature (e.g.: A+), which is found in the same set of sequences (e.g : NC_000001.9), examine all Target Features whose positions lie within the histogram range [From, To]. The number of ref features found in the seq set being under examination is given by the ft_nb variable. j= 1 .. ft_nb (nr of ref. features) For each ref. feature: k = ref_ft.ptr[j] is the index of the target feature which is located at its closest upstream position (with respect to the feature itself), that is the preceding target feature index. The index k is then decreased in order to examine further upstream (left) tar features within the given window range. k = (ref_ft.ptr[j] + 1) is the index of the target feature which is located at its closest downstream position (with respect to the ref. feature itself), namely the subsequent target feature. The index k is then increased in order to examine all possible downstream target features within the given window. */ int histo_update(histo_p_t histo, unsigned int ft_nb, unsigned int tar_nb) { long j, k, n; #ifdef DEBUG printf(" histo_update: nb of ref features : %d\n", ft_nb); printf(" histo_update: nb of tar features : %d\n", tar_nb); printf(" histo_update: From: %d To: %d\n", From, To); #endif for (j = 1; j <= (long)ft_nb; j++) { k = ref_ft.ptr[j]; if (!options.oriented || (ref_ft.str[j] == '+')) { /* Ref Feat Un-oriented or Oriented on Pos Strand Examin Upstream Tar pos */ while (tar_ft.pos[k] > ref_ft.pos[j] + To) {k--;} while ((k >= 0) && tar_ft.pos[k] >= ref_ft.pos[j] + From) { if (!options.oriented || (or_same_strand && tar_ft.str[k] == '+') || (or_opposite_strand && tar_ft.str[k] == '-') || or_any_strand) { n = (tar_ft.pos[k] - ref_ft.pos[j] - From)/Win; } else { n = -1; Ttot--; /* decrease Target feature count (oriented mode) */ } if (n >= 0) { histo->val[n] += ref_ft.cnt[j] * tar_ft.cnt[k]; } #ifdef DEBUG printf(" histo_update feat idx:%d (1): bin:%d hval:%ld\n", j, n, histo->val[n]); #endif k--; /* examine further upstream positions within the window range */ } } else { /* Reverse Loop - Ref Feature Oriented on Neg Strand (Examin Upstream Tar pos) */ while (tar_ft.pos[k] > ref_ft.pos[j] - From) {k--;} while ((k >= 0) && (tar_ft.pos[k] >= ref_ft.pos[j] - To)) { if ((or_same_strand && tar_ft.str[k] == '-') || (or_opposite_strand && tar_ft.str[k] == '+') || or_any_strand) { n = (ref_ft.pos[j] - tar_ft.pos[k] - From)/Win; } else { n = -1; Ttot--; /* decrease Target feature count (oriented mode) */ } if (n >= 0) { histo->val[n] += ref_ft.cnt[j] * tar_ft.cnt[k]; } #ifdef DEBUG printf(" histo_update feat idx:%d (1): bin:%d hval:%d\n", j, n, histo->val[n]); #endif k--; /* examine further upstream positions within the window range */ } } k = ref_ft.ptr[j] + 1; if (!options.oriented || (ref_ft.str[j] == '+')) { /* Ref Feat Un-oriented or Oriented on Pos Strand (Examin Downstream Tar pos) */ while ((tar_ft.pos[k] < ref_ft.pos[j] + From) && k <= (long)tar_nb) {k++;} while ((tar_ft.pos[k] <= ref_ft.pos[j] + To) && k <= (long)tar_nb) { if (!options.oriented || (or_same_strand && tar_ft.str[k] == '+') || (or_opposite_strand && tar_ft.str[k] == '-') || or_any_strand) { n = (tar_ft.pos[k] - ref_ft.pos[j] - From)/Win; } else { n = -1; } if (n >= 0) { histo->val[n] += ref_ft.cnt[j] * tar_ft.cnt[k]; } #ifdef DEBUG printf(" histo_update feat idx:%d (2): bin:%d hval:%ld\n", j, n, histo->val[n]); #endif k++; /* examine further downstream positions within the window range */ } } else { /* Reverse Loop - Ref Feature Oriented on Neg Strand (Examin Downstream Tar pos) */ while ((tar_ft.pos[k] < ref_ft.pos[j] - To) && k <= (long)tar_nb) {k++;} while ((tar_ft.pos[k] <= ref_ft.pos[j] - From) && k <= (long)tar_nb) { if ((or_same_strand && tar_ft.str[k] == '-') || (or_opposite_strand && tar_ft.str[k] == '+') || or_any_strand) { n = (ref_ft.pos[j] - tar_ft.pos[k] - From)/Win; } else { n = -1; } if (n >= 0) { histo->val[n] += ref_ft.cnt[j] * tar_ft.cnt[k]; } #ifdef DEBUG printf(" histo_update feat idx:%d (2): bin:%d hval:%ld\n", j, n, histo->val[n]); #endif k++; /* examine further downstream positions within the window range */ } } } return 0; } int process_sga(FILE *input, char *iFile, histo_p_t histo) { unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); return 1; } if ((ref_ft.cnt = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); return 1; } if ((ref_ft.str = (char *)calloc(rf_mLen, sizeof(char))) == NULL) { perror("process_sga: malloc"); return 1; } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); return 1; } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); return 1; } if ((tar_ft.str = (char *)calloc(tf_mLen, sizeof(char))) == NULL) { perror("process_sga: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); return 1; } tar_ft.pos[0] = From - 1; #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("process_file: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); return 1; } if ((ref_ft.cnt = (int *)realloc(ref_ft.cnt, rf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); return 1; } if ((ref_ft.str = (char *)realloc(ref_ft.str, rf_mLen * sizeof(char))) == NULL) { perror("process_sga: realloc"); return 1; } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); return 1; } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); return 1; } if ((tar_ft.str = (char *)realloc(tar_ft.str, tf_mLen * sizeof(char))) == NULL) { perror("process_sga: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft.pos[k + 1] = ref_ft.pos[j] + To + Win + 1; histo_update(histo, j, k); strcpy(seq_id_prev, seq_id); j = 0; k = 0; Len += last_pos; } if (ref_specs) { if (strcmp(feature, ref_ft.ft) == 0 && strand == ref_ft.strand) { j++; ref_ft.pos[j] = pos; ref_ft.str[j] = strand; ref_ft.ptr[j] = k; if (cnt > options.cutOff) ref_ft.cnt[j] = options.cutOff; else ref_ft.cnt[j] = cnt; Rtot += ref_ft.cnt[j]; } } else { if (strcmp(feature, ref_ft.ft) == 0) { if (options.oriented && strand == '0') { fprintf(stderr, "Wrong reference feature strand (0). If oriented strand processing is set, reference features must be oriented (+|-).\n"); return 1; } j++; ref_ft.pos[j] = pos; ref_ft.str[j] = strand; ref_ft.ptr[j] = k; if (cnt > options.cutOff) ref_ft.cnt[j] = options.cutOff; else ref_ft.cnt[j] = cnt; Rtot += ref_ft.cnt[j]; } } if (tar_specs) { if (strcmp(feature, tar_ft.ft) == 0 && strand == tar_ft.strand) { k++; tar_ft.pos[k] = pos; tar_ft.str[k] = strand; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; Ttot += tar_ft.cnt[k]; } } else { if (strcmp(feature, tar_ft.ft) == 0) { k++; tar_ft.pos[k] = pos; tar_ft.str[k] = strand; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; Ttot += tar_ft.cnt[k]; } } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft.pos[k + 1] = ref_ft.pos[j] + To + Win + 1; histo_update(histo, j, k); histo_print(histo); free(ref_ft.pos); free(ref_ft.cnt); free(ref_ft.str); free(ref_ft.ptr); free(tar_ft.pos); free(tar_ft.cnt); free(tar_ft.str); return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; histo_t histo; options.cutOff = 1; options.normFact = 0; int i = 0; while (1) { int c = getopt(argc, argv, "c:n:dhoA:B:b:e:w:"); if (c == -1) break; switch (c) { case 'c': options.cutOff = atoi(optarg); break; case 'n': options.normFact = atoi(optarg); break; case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'o': options.oriented = 1; break; case 'A': RefFeature = optarg; break; case 'B': TarFeature = optarg; break; case 'b': From = atoi(optarg); break; case 'e': To = atoi(optarg); break; case 'w': Win = atoi(optarg); break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1 || RefFeature == NULL || TarFeature == NULL || From == To || Win == 0 || options.cutOff < 0) { fprintf(stderr, "Usage: %s [options] -A -B -b -e -w [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information and check SGA file\n" " \t\t -c Cut-Off value for feature counts (default is %d)\n" " \t\t -o Oriented strand processing\n" " \t\t -n Histogram Normalization (default is %d)\n" "\n\tFeature Correlation Tool for ChIP-seq data analysis.\n" "\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (),\n" "\tand generates histograms showing the positional relationship of two features,\n" "\ta reference feature () and a target feature () respectively.\n" "\tThe feature specification must have the following format:\n" " \t = [<+|->]\n\n" "\tthe strand specification (+|-) being optional.\n" "\tThe parameter is a name that corresponds to the second field of the SGA file.\n" "\tIf no feature is given then all input tags are processed.\n" "\tThe SGA input file MUST BE sorted by sequence name (or chromosome id), position,\n" "\tand strand.\n" "\tOne should check the input SGA file with the following command:\n" "\tsort -s -c -k1,1 -k3,3n -k4,4 .\n\n" "\tIn debug mode (-d), the program performs the sorting order check.\n\n" "\tThe relative distance between the two features is analysed within a\n" "\tgiven range: - that should be greater than 0.\n" "\tA value can be optionally specified as a cut-off for the feature counts.\n" "\tThe window width (-w) defines the histogram step size or bin. It must be\n" "\tan integer greater than 0.\n" "\tFor Histogram Normalization the following options are available:\n" " \t\t Off - Raw Counts (default)\n" " \t\t Show Count Density (-n 1)\n" " \t\t Show Global Normalization (-n 2)\n\n", argv[0], VERSION, options.cutOff, options.normFact); return 1; } /*printf("\noptind: %d; argc: %d; argv[0]: %s; argv[optind]: %s; argv[optind + 1]: %s; argv[optind + 2]: %s\n", optind, argc, argv[0], argv[optind], argv[optind + 1], argv[optind + 2]); **/ if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Feature A (ref): %s\n", RefFeature); fprintf(stderr, " Feature B (tar): %s\n", TarFeature); fprintf(stderr, " Range : [%d, %d]\n", From, To); fprintf(stderr, " Sliding Window : %d\n", Win); fprintf(stderr, " Cut-off : %d\n", options.cutOff); fprintf(stderr, " Normalization type : %d\n\n", options.normFact); } char *s = RefFeature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Description too long \"%s\" \n", RefFeature); return 1; } ref_ft.ft[i++] = *s++; } ref_ft.strand = '\0'; if (!options.oriented) { while (isspace(*s++)) ref_ft.strand = *s; } s = TarFeature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, " Feature Description too long \"%s\" \n", TarFeature); return 1; } tar_ft.ft[i++] = *s++; } tar_ft.strand = '\0'; while (isspace(*s++)) tar_ft.strand = *s; if (options.debug) fprintf(stderr, " Ref feature : %s %c (R)\n Tar Feature: %s %c (T)\n", ref_ft.ft, ref_ft.strand, tar_ft.ft, tar_ft.strand); size_t cLen = strlen(ref_ft.ft); if (!cLen) { fprintf(stderr, "Wrong Feature Description (ref) \"%s\". You must at least provide a name for your Ref Feature!\n Feature Specs Format: = []\n", RefFeature); return 1; } cLen = strlen(tar_ft.ft); if (!cLen) { fprintf(stderr, "Wrong Feature Description (tar) \"%s\". You must at least provide a name for you Target Feature! \n Feature Specs Format: []\n", TarFeature); return 1; } if (ref_ft.strand == '\0') { /* fprintf(stderr, " Warning Ref Feature \"%s\" !\n Feature Format: = []\n", RefFeature); */ ref_specs = 0; } if (tar_ft.strand == '\0') { /* fprintf(stderr, " Warning Tar Feature \"%s\" !\n Feature Format: = []\n", TarFeature); */ tar_specs = 0; } if ((strcmp(tar_ft.ft, ref_ft.ft) == 0)) { /* Check For Auto-Correlation */ if (ref_ft.strand == '\0' && tar_ft.strand == '\0' && (!options.oriented)) { auto_cor = 1; if (options.debug) fprintf(stderr, " Autocorrelation ON\n"); } else if ((ref_ft.strand == '+' && tar_ft.strand == '+') || (ref_ft.strand == '-' && tar_ft.strand == '-')) { auto_cor = 1; if (options.debug) fprintf(stderr, " Autocorrelation ON\n"); } else { auto_cor = 0; if (options.debug) fprintf(stderr, " Autocorrelation OFF\n"); } } if (options.oriented && !auto_cor) { if (tar_ft.strand == '+') { or_same_strand = 1; } else if (tar_ft.strand == '-') { or_opposite_strand = 1; } else { or_any_strand = 1; } tar_specs = 0; } if (histo_init(&histo) != 0) { return 1; } if (process_sga(input, argv[optind++], &histo) != 0) { return 1; } if (input != stdin) { fclose(input); } return 0; } chip-seq/Makefile0000744022744200262270000000755013433535234014534 0ustar ambrosingr-bucher# # Makefile for ChIP-Seq package # CC = gcc CFLAGS = -O3 -std=gnu99 -W -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 LDFLAGS = -O3 -std=gnu99 -fPIC -lm #CFLAGS = -O3 -std=gnu99 -W -pedantic #LDFLAGS = -O3 -std=gnu99 -fPIC -lm #binDir = $(PWD)/bin #manDir = /usr/share/man/man1/ ifeq ($(prefix),) prefix := $(DESTDIR). endif binDir = $(prefix)/bin datDir = $(prefix)/share/chip-seq docDir = $(prefix)/share/chip-seq/doc manDir = $(prefix)/share/man/chip-seq/man1 CHIPSEQ_SRC = chipcor.c chipextract.c chippart.c chipcenter.c chippeak.c chipscore.c tools/compactsga.c tools/featreplace.c tools/filter_counts.c tools/countsga.c tools/bed2sga.c tools/bed2bed_display.c tools/sga2bed.c tools/sga2wig.c PROGS = chipcor chipextract chippart chippeak chipcenter chipscore TOOLS = tools/compactsga tools/countsga tools/featreplace tools/filter_counts tools/bed2sga tools/bed2bed_display tools/sga2bed tools/sga2wig SCRIPTS = tools/check_bed.pl tools/chr_replace_sga.pl tools/eland2sga.pl tools/fps2sga.pl tools/gff2sga.pl tools/partit2bed.pl tools/partit2gff.pl tools/sga2fps.pl tools/sga2gff.pl MAN_PAGES = chipcenter.1.gz chipcor.1.gz chipextract.1.gz chippart.1.gz chippeak.1.gz chipscore.1.gz DATA_FILES = share/chr_NC_gi share/chro_idx.nstorage share/chr_size DOC_FILES = doc/ChIP-Seq_Tools-UsersGuide.pdf OBJS = hashtable.o all: $(PROGS) $(TOOLS) tools : $(TOOLS) CHIPCOR_SRC = chipcor.c CHIPEXTRACT_SRC = chipextract.c CHIPCENTER_SRC = chipcenter.c CHIPPEAK_SRC = chippeak.c CHIPPART_SCR = chippart.c CHIPSCORE_SRC = chipscore.c COMPACTSGA_SRC = tools/compactsga.c COUNTSGA_SRC = tools/countsga.c FEATREPLACE_SRC = tools/featreplace.c FILTER_COUNTS_SRC = tools/filter_counts.c BED2BED_DISPLAY_SRC = tools/bed2bed_display.c BED2SGA_SRC = tools/bed2sga.c SGA2BED_SRC = tools/sga2bed.c SGA2WIG_SRC = tools/sga2wig.c chipcenter : $(CHIPCENTER_SRC) $(OBJS) $(CC) $(LDFLAGS) -o chipcenter $^ chippeak : $(CHIPPEAK_SRC) $(OBJS) $(CC) $(LDFLAGS) -o chippeak $^ chipcor : $(CHIPCOR_SRC) $(CC) $(CFLAGS) -o chipcor $^ chipextract : $(CHIPEXTRACT_SRC) $(CC) $(CFLAGS) -o chipextract $^ chippart : $(CHIPPART_SCR) $(CC) $(CFLAGS) -o chippart $^ chipscore : $(CHIPSCORE_SRC) $(CC) $(CFLAGS) -o chipscore $^ tools/compactsga : $(COMPACTSGA_SRC) $(CC) $(CFLAGS) -o tools/compactsga $^ tools/countsga : $(COUNTSGA_SRC) $(CC) $(CFLAGS) -o tools/countsga $^ tools/featreplace : $(FEATREPLACE_SRC) $(CC) $(CFLAGS) -o tools/featreplace $^ tools/filter_counts : $(FILTER_COUNTS_SRC) $(CC) $(CFLAGS) -o tools/filter_counts $^ tools/bed2bed_display : $(BED2BED_DISPLAY_SRC) $(OBJS) $(CC) $(LDFLAGS) -o tools/bed2bed_display $^ tools/bed2sga : $(BED2SGA_SRC) $(OBJS) $(CC) $(LDFLAGS) -o tools/bed2sga $^ tools/sga2bed : $(SGA2BED_SRC) $(OBJS) $(CC) $(LDFLAGS) -o tools/sga2bed $^ tools/sga2wig : $(SGA2WIG_SRC) $(OBJS) $(CC) $(LDFLAGS) -o tools/sga2wig $^ install: install-bin install-bin: mkdir -p $(binDir) mv $(PROGS) $(TOOLS) $(binDir) cp -p $(SCRIPTS) $(binDir) install-man: mkdir -p $(manDir) cp -p $(MAN_PAGES) $(manDir) install-dat: mkdir -p $(datDir) cp -p $(DATA_FILES) $(datDir) install-doc: mkdir -p $(docDir) cp -p $(DOC_FILES) $(docDir) clean: rm -rf $(PROGS) $(TOOLS) *.o *~ uninstall: rm -f $(binDir)/chipcor $(binDir)/chipextract $(binDir)/chippeak $(binDir)/chippart $(binDir)/chippeak \ $(binDir)/chipcenter $(binDir)/chipscore $(binDir)/compactsga $(binDir)/countsga $(binDir)/bed2sga $(binDir)/sga2wig \ $(binDir)/bed2bed_display $(binDir)/featreplace $(binDir)/filter_counts $(binDir)/sga2bed $(binDir)/check_bed.pl \ $(binDir)/chr_replace_sga.pl $(binDir)/eland2sga.pl $(binDir)/fps2sga.pl $(binDir)/gff2sga.pl $(binDir)/partit2bed.pl \ $(binDir)/partit2gff.pl $(binDir)/sga2fps.pl $(binDir)/sga2gff.pl uninstall-man: rm -rf $(manDir)/* uninstall-dat: rm -rf $(datDir) uninstall-doc: rm -rf $(docDir) chip-seq/debug.h0000744022744200262270000000054013106272470014320 0ustar ambrosingr-bucher#ifndef _DEBUG_H #define _DEBUG_H #ifdef DEBUG #include #define INFO(format) fprintf(stderr,"%s:%d:%s -> " format "\n", __FILE__, __LINE__, __func__) #define LOG(format, ...) fprintf(stderr,"%s:%d:%s -> " format "\n", __FILE__, __LINE__, __func__, __VA_ARGS__) #else #define LOG(...) #define INFO(...) #endif // debug #endif // debug.h chip-seq/chipextract.c0000744022744200262270000022077013356611506015560 0ustar ambrosingr-bucher/* chipextract.c Feature correlation and extraction Tool. The program correlates tag count distributions for two features (reference, target), and extracts target feature reads/tags that fall into a distance range relative to the reference feature. # Arguments: # reference feature name, reference feature strand, # target feature name, target feature strand, # beginning of range, end of range, SGA file with merged tag count distributions. # # Optional arguments : # Count cut-off value for target feature [default=1] # Window width (histogram bin size) [default=1] Giovanna Ambrosini, EPFL, Giovanna.Ambrosini@epfl.ch Copyright (c) 2015 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _GNU_SOURCE #include #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int cutOff; int help; int debug; } options_t; static options_t options; typedef struct _feature_t { char ft[FT_MAX]; char strand; int *pos; int *cnt; int *ptr; char *str; } feature_t, *feature_p_t; feature_t ref_ft; feature_t tar_ft; feature_t tar_ft_plus; feature_t tar_ft_minus; typedef struct _table_t { int *pos; int *bin; long *val; } table_t, *table_p_t; int l5_p, l3_p; char *RefFeature = NULL; char *TarFeature = NULL; int From = 0; int To = 0; int Win = 1; unsigned long TarOffset = 0; unsigned long ref_cnt = 0; /* Table row count (reference) */ unsigned long Rtot = 0; /* Total Reference Counts */ unsigned long Ttot = 0; /* Total Target Counts */ unsigned long Len = 0; /* Total Sequence Length */ int table_init(table_p_t table) { /* Initialize Histogram (table) */ /* The windows or bins are placed such that one window will be centered at pos 0 (odd window size), -0.5 even (window size). The whole range [$from,$to] will be shortened to an integer number of window sizes. Example: $from = -20, $to = 20, $ win =5; Windows: [-17,-13], [-12,-8], [-7,-3], [-2,2], [3,7], [8,12], [13,17] New range: $from = -17, $to =17 */ /* begin (xb), end (xe), and center position (xe) of window near 0 */ int xb, xc, xe; xb = -Win/2; xe = xb + Win - 1; xc = (xb + xe)/2; if (options.debug) fprintf(stderr, " xb %d, xc %d, xe %d\n", xb, xe, xc); if (From > xb) { l5_p = (From - xb)/Win + 1; } else { l5_p = -(xb - From)/Win; } if (To >= xe) { l3_p = (To - xe)/Win; } else { l3_p = -(xe - To)/Win + 1; } if (options.debug) fprintf(stderr, " l5_p: %d l3_p: %d\n", l5_p, l3_p); /* New range */ From = xb + l5_p * Win; To = xe + l3_p * Win; fprintf(stderr, "New range: %d - %d\n", From, To); /* Allocate Table genomic positions (bp) */ table->pos = (int *) malloc((l3_p - l5_p + 1) * sizeof(int)); if (table->pos == NULL) { fprintf(stderr, "Out of memory: %s(%d)\n",strerror(errno), errno); return 1; } /* Allocate Table bins */ table->bin = (int *) malloc((To - From + 1) * sizeof(int)); if (table->bin == NULL) { fprintf(stderr, "Out of memory: %s(%d)\n",strerror(errno), errno); return 1; } /* Allocate Table count values */ table->val = (long *) malloc((l3_p - l5_p + 1) * sizeof(long)); if (table->val == NULL) { fprintf(stderr, "Out of memory: %s(%d)\n",strerror(errno), errno); return 1; } int i; for (i = 0; i <= To-From; i++) { table->bin[i] = (int)(i/Win); } int j; i = 0; for (j = l5_p; j <= l3_p; j++) { table->val[i] = 0; i++; } i = 0; for (j = l5_p; j <= l3_p; j++) { table->pos[i] = xc + j * Win;; i++; } /* Print out first table row (x coordinates) */ i = 0; char blanks[10]; memset(blanks, ' ', 10); blanks[9] = '\0'; printf("%10s", blanks); for (j = l5_p; j <= l3_p; j++) { printf(" %9.1f", (float)table->pos[i]); i++; } printf("\n"); TarOffset = abs(From) + abs(To) + 1; return 0; } void print_counts_basic_basic(table_p_t table, unsigned int ref_nb, unsigned int tar_nb) { long j, k, i, n; int from, to; int m; /* fprintf(stderr, "print_counts_basic_basic: ref feat : %u tar feat : %u\n", ref_nb, tar_nb); */ //printf("print_counts_basic_basic: ref feat : %u tar feat : %u\n", ref_nb, tar_nb); for (j = 1; j <= (long)ref_nb; j++) { i = 0; for (m = l5_p; m <= l3_p; m++) { table->val[i] = 0; i++; } k = ref_ft.ptr[j]; from = ref_ft.pos[j] + From; to = ref_ft.pos[j] + To; /*printf("UPSTREAM: k = %ld, Ref pos = %ld, Tar pos = %ld, FROM = %d, TO =%d\n", k, ref_ft.pos[j], tar_ft.pos[k], from, to); */ while (tar_ft.pos[k] > to) {k--;} while ((k >= 0) && tar_ft.pos[k] >= from) { n = tar_ft.pos[k] - from; /*printf("UPSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k--; /* examine further upstream positions within the window range */ } k = ref_ft.ptr[j] + 1; while ((tar_ft.pos[k] < from)) {k++;} while ((tar_ft.pos[k] <= to) && k <= (long)tar_nb) { n = tar_ft.pos[k] - from; /*printf("DOWNSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k++; /* examine further downstream positions within the window range */ } i = 0; ref_cnt++; printf("%10ld ", ref_cnt); for (m = l5_p; m <= l3_p; m++) { printf(" %ld", table->val[i]); i++; } printf("\n"); } } void print_counts_oriented_plus(table_p_t table, unsigned int ref_nb, unsigned int tar_p_nb, unsigned int tar_m_nb) { long j, k, i, n; int from, to; int m; /*printf("print_counts_oriented_plus: ref feat : %u tar feat (+): %u tar feat (-): %u\n", ref_nb, tar_p_nb, tar_m_nb); */ for (j = 1; j <= (long)ref_nb; j++) { i = 0; for (m = l5_p; m <= l3_p; m++) { table->val[i] = 0; i++; } if (ref_ft.str[j] == '+') { k = ref_ft.ptr[j]; from = ref_ft.pos[j] + From; to = ref_ft.pos[j] + To; /*printf("UPSTREAM: k = %ld, Ref pos = %ld, Tar pos = %ld, FROM = %d, TO =%d\n", k, ref_ft.pos[j], tar_ft_plus.pos[k], from, to); */ while (tar_ft_plus.pos[k] > to) {k--;} while ((k >= 0) && tar_ft_plus.pos[k] >= from) { n = tar_ft_plus.pos[k] - from; /*printf("UPSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_plus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_plus.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k--; /* examine further upstream positions within the window range */ } k = ref_ft.ptr[j] + 1; while ((tar_ft_plus.pos[k] < from)) {k++;} while ((tar_ft_plus.pos[k] <= to) && k <= (long)tar_p_nb) { n = tar_ft_plus.pos[k] - from; /*printf("DOWNSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_plus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_plus.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k++; /* examine further downstream positions within the window range */ } } else { /* Ref strand = '-' */ k = ref_ft.ptr[j]; from = ref_ft.pos[j] - To; to = ref_ft.pos[j] - From; /*printf("UPSTREAM: k = %ld, Ref pos = %ld, Tar pos = %ld, FROM = %d, TO =%d\n", k, ref_ft.pos[j], tar_ft_minus.pos[k], from, to); */ while (tar_ft_minus.pos[k] > to) {k--;} while ((k >= 0) && tar_ft_minus.pos[k] >= from) { n = to - tar_ft_minus.pos[k]; /*printf("UPSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_minus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_minus.cnt[k]; k--; /* examine further upstream positions within the window range */ } k = ref_ft.ptr[j] + 1; while ((tar_ft_minus.pos[k] < from)) {k++;} while ((tar_ft_minus.pos[k] <= to) && k <= (long)tar_m_nb) { n = to - tar_ft_minus.pos[k]; /*printf("DOWNSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_minus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_minus.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k++; /* examine further downstream positions within the window range */ } } i = 0; ref_cnt++; printf("%10ld ", ref_cnt); for (m = l5_p; m <= l3_p; m++) { printf(" %ld", table->val[i]); i++; } printf("\n"); } } void print_counts_oriented_minus(table_p_t table, unsigned int ref_nb, unsigned int tar_p_nb, unsigned int tar_m_nb) { long j, k, i, n; int from, to; int m; /*printf("print_counts_oriented_minus: ref feat : %u tar feat (+): %u tar feat (-): %u\n", ref_nb, tar_p_nb, tar_m_nb); */ for (j = 1; j <= (long)ref_nb; j++) { i = 0; for (m = l5_p; m <= l3_p; m++) { table->val[i] = 0; i++; } if (ref_ft.str[j] == '+') { k = ref_ft.ptr[j]; from = ref_ft.pos[j] + From; to = ref_ft.pos[j] + To; /*printf("UPSTREAM: k = %ld, Ref pos = %ld, Tar pos = %ld, FROM = %d, TO =%d\n", k, ref_ft.pos[j], tar_ft_minus.pos[k], from, to); */ while (tar_ft_minus.pos[k] > to) {k--;} while ((k >= 0) && tar_ft_minus.pos[k] >= from) { n = tar_ft_minus.pos[k] - from; /*printf("UPSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_minus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_minus.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k--; /* examine further upstream positions within the window range */ } k = ref_ft.ptr[j] + 1; while ((tar_ft_minus.pos[k] < from)) {k++;} while ((tar_ft_minus.pos[k] <= to) && k <= (long)tar_m_nb) { n = tar_ft_minus.pos[k] - from; /*printf("DOWNSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_minus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_minus.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k++; /* examine further downstream positions within the window range */ } } else { /* Ref strand = '-' */ k = ref_ft.ptr[j]; from = ref_ft.pos[j] - To; to = ref_ft.pos[j] - From; /*printf("UPSTREAM: k = %ld, Ref pos = %ld, Tar pos = %ld, FROM = %d, TO =%d\n", k, ref_ft.pos[j], tar_ft_plus.pos[k], from, to); */ while (tar_ft_plus.pos[k] > to) {k--;} while ((k >= 0) && tar_ft_plus.pos[k] >= from) { n = to - tar_ft_plus.pos[k]; /*printf("UPSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_plus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_plus.cnt[k]; k--; /* examine further upstream positions within the window range */ } k = ref_ft.ptr[j] + 1; while ((tar_ft_plus.pos[k] < from)) {k++;} while ((tar_ft_plus.pos[k] <= to) && k <= (long)tar_p_nb) { n = to - tar_ft_plus.pos[k]; /*printf("DOWNSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_plus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft_plus.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k++; /* examine further downstream positions within the window range */ } } i = 0; ref_cnt++; printf("%10ld ", ref_cnt); for (m = l5_p; m <= l3_p; m++) { printf(" %ld", table->val[i]); i++; } printf("\n"); } } void print_counts_oriented_both(table_p_t table, unsigned int ref_nb, unsigned int tar_nb) { long j, k, i, n; int from, to; int m; /*printf("print_counts_oriented_both: ref feat : %u tar feat : %u\n", ref_nb, tar_nb); */ for (j = 1; j <= (long)ref_nb; j++) { i = 0; for (m = l5_p; m <= l3_p; m++) { table->val[i] = 0; i++; } if (ref_ft.str[j] == '+') { k = ref_ft.ptr[j]; from = ref_ft.pos[j] + From; to = ref_ft.pos[j] + To; /*printf("UPSTREAM: k = %ld, Ref pos = %ld, Tar pos = %ld, FROM = %d, TO =%d\n", k, ref_ft.pos[j], tar_ft.pos[k], from, to); */ while (tar_ft.pos[k] > to) {k--;} while ((k >= 0) && tar_ft.pos[k] >= from) { n = tar_ft.pos[k] - from; /*printf("UPSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k--; /* examine further upstream positions within the window range */ } k = ref_ft.ptr[j] + 1; while ((tar_ft.pos[k] < from)) {k++;} while ((tar_ft.pos[k] <= to) && k <= (long)tar_nb) { n = tar_ft.pos[k] - from; /*printf("DOWNSTREAM: n = %ld, TAR pos = %d, FROM = %d, TO = %d, REF pos = %d, bin= %d\n", n, tar_ft.pos[k], from, to, ref_ft.pos[j], table->bin[n]);*/ table->val[table->bin[n]] += tar_ft.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k++; /* examine further downstream positions within the window range */ } } else { /* Ref strand = '-' */ k = ref_ft.ptr[j]; from = ref_ft.pos[j] - To; to = ref_ft.pos[j] - From; /*printf("UPSTREAM: k = %ld, Ref pos = %ld, Tar pos = %ld, FROM = %d, TO =%d\n", k, ref_ft.pos[j], tar_ft.pos[k], from, to); */ while (tar_ft.pos[k] > to) {k--;} while ((k >= 0) && tar_ft.pos[k] >= from) { n = to - tar_ft.pos[k]; /*printf("UPSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft.cnt[k]; k--; /* examine further upstream positions within the window range */ } k = ref_ft.ptr[j] + 1; while ((tar_ft.pos[k] < from)) {k++;} while ((tar_ft.pos[k] <= to) && k <= (long)tar_nb) { n = to - tar_ft.pos[k]; /*printf("DOWNSTREAM: n = %ld, TAR pos = %ld, FROM = %d bin= %d\n", n, tar_ft_plus.pos[k], from, table->bin[n]); */ table->val[table->bin[n]] += tar_ft.cnt[k]; /*printf("table val[%d] = %d\n", table->bin[n], table->val[table->bin[n]]); */ k++; /* examine further downstream positions within the window range */ } } i = 0; ref_cnt++; printf("%10ld ", ref_cnt); for (m = l5_p; m <= l3_p; m++) { printf(" %ld", table->val[i]); i++; } printf("\n"); } } int extract_basic_basic(FILE *input, char *iFile, table_p_t table) { unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_basic_basic: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_basic_basic: malloc"); return 1; } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_basic_basic: malloc"); return 1; } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_basic_basic: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_basic_basic: malloc"); return 1; } tar_ft.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char ext[EXT_MAX]; char strand = '\0'; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_basic_basic: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_basic_basic: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_basic_basic: realloc"); return 1; } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("extract_basic_basic: realloc"); return 1; } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("extract_basic_basic: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); strcpy(seq_id_prev, seq_id); j = 0; k = 0; tar_ft.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0 && strand == ref_ft.strand) { j++; ref_ft.ptr[j] = k; ref_ft.pos[j] = pos; } else if (strcmp(feature, tar_ft.ft) == 0 && strand == tar_ft.strand) { k++; tar_ft.pos[k] = pos; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); free(ref_ft.ptr); free(ref_ft.pos); free(tar_ft.pos); free(tar_ft.cnt); return 0; } int extract_basic_both(FILE *input, char *iFile, table_p_t table) { unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_basic_both: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_basic_both: malloc"); return 1; } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_basic_both: malloc"); return 1; } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_basic_both: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_basic_both: malloc"); return 1; } tar_ft.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_basic_both: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_basic_both: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_basic_both: realloc"); return 1; } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("extract_basic_both: realloc"); return 1; } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("extract_basic_both: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); strcpy(seq_id_prev, seq_id); j = 0; k = 0; tar_ft.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0 && strand == ref_ft.strand) { j++; ref_ft.ptr[j] = k; ref_ft.pos[j] = pos; } else if (strcmp(feature, tar_ft.ft) == 0) { k++; tar_ft.pos[k] = pos; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); free(ref_ft.ptr); free(ref_ft.pos); free(tar_ft.pos); free(tar_ft.cnt); return 0; } int extract_both_basic(FILE *input, char *iFile, table_p_t table) { unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_both_basic: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_both_basic: malloc"); return 1; } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_both_basic: malloc"); return 1; } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_both_basic: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_both_basic: malloc"); return 1; } tar_ft.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_both_basic: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_both_basic: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_both_basic: realloc"); return 1; } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("extract_both_basic: realloc"); return 1; } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("extract_both_basic: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); strcpy(seq_id_prev, seq_id); j = 0; k = 0; tar_ft.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0) { j++; ref_ft.ptr[j] = k; ref_ft.pos[j] = pos; } else if (strcmp(feature, tar_ft.ft) == 0 && strand == tar_ft.strand) { k++; tar_ft.pos[k] = pos; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); free(ref_ft.ptr); free(ref_ft.pos); free(tar_ft.pos); free(tar_ft.cnt); return 0; } int extract_both_both(FILE *input, char *iFile, table_p_t table) { unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_both_both: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_both_both: malloc"); return 1; } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_both_both: malloc"); return 1; } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_both_both: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_both_both: malloc"); return 1; } tar_ft.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_both_both: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_both_both: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_both_both: realloc"); return 1; } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("extract_both_both: realloc"); return 1; } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("extract_both_both: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); strcpy(seq_id_prev, seq_id); j = 0; k = 0; tar_ft.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0) { j++; ref_ft.ptr[j] = k; ref_ft.pos[j] = pos; } else if (strcmp(feature, tar_ft.ft) == 0) { k++; tar_ft.pos[k] = pos; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_basic_basic(table, j, k); free(ref_ft.ptr); free(ref_ft.pos); free(tar_ft.pos); free(tar_ft.cnt); return 0; } int extract_oriented_plus(FILE *input, char *iFile, table_p_t table) { unsigned int k1 = 0; unsigned int k2 = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_p_mLen = BUF_SIZE; size_t tf_m_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } if ((ref_ft.str = (char *)calloc(rf_mLen, sizeof(char))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } if ((tar_ft_plus.pos = (int *)calloc(tf_p_mLen, sizeof(int))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } if ((tar_ft_plus.cnt = (int *)calloc(tf_p_mLen, sizeof(int))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } if ((tar_ft_minus.pos = (int *)calloc(tf_m_mLen, sizeof(int))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } if ((tar_ft_minus.cnt = (int *)calloc(tf_m_mLen, sizeof(int))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_oriented_plus: malloc"); return 1; } tar_ft_plus.pos[0] = -TarOffset; tar_ft_minus.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } if ((ref_ft.str = (char *)realloc(ref_ft.str, rf_mLen * sizeof(char))) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } } if (k1 >= tf_p_mLen - 1) { tf_p_mLen *= 2; if ((tar_ft_plus.pos = (int *)realloc(tar_ft_plus.pos, tf_p_mLen * sizeof(int))) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } if ((tar_ft_plus.cnt = (int *)realloc(tar_ft_plus.cnt, tf_p_mLen * sizeof(int))) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } } if (k2 >= tf_m_mLen - 1) { tf_m_mLen *= 2; if ((tar_ft_minus.pos = (int *)realloc(tar_ft_minus.pos, tf_m_mLen * sizeof(int))) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } if ((tar_ft_minus.cnt = (int *)realloc(tar_ft_minus.cnt, tf_m_mLen * sizeof(int))) == NULL) { perror("extract_oriented_plus: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft_plus.pos[k1 + 1] = ref_ft.pos[j] + TarOffset; tar_ft_minus.pos[k2 + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_plus(table, j, k1, k2); strcpy(seq_id_prev, seq_id); j = 0; k1 = 0; k2 = 0; tar_ft_plus.pos[0] = -TarOffset; tar_ft_minus.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0) { j++; if (strand == '+') { ref_ft.ptr[j] = k1; } else if (strand == '-') { ref_ft.ptr[j] = k2; } ref_ft.pos[j] = pos; ref_ft.str[j] = strand; } else if (strcmp(feature, tar_ft.ft) == 0 && strand == '+') { k1++; tar_ft_plus.pos[k1] = pos; if (cnt > options.cutOff) tar_ft_plus.cnt[k1] = options.cutOff; else tar_ft_plus.cnt[k1] = cnt; } else if (strcmp(feature, tar_ft.ft) == 0 && strand == '-') { k2++; tar_ft_minus.pos[k2] = pos; if (cnt > options.cutOff) tar_ft_minus.cnt[k2] = options.cutOff; else tar_ft_minus.cnt[k2] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft_plus.pos[k1 + 1] = ref_ft.pos[j] + TarOffset; tar_ft_minus.pos[k2 + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_plus(table, j, k1, k2); free(ref_ft.ptr); free(ref_ft.pos); free(ref_ft.str); free(tar_ft_plus.pos); free(tar_ft_plus.cnt); free(tar_ft_minus.pos); free(tar_ft_minus.cnt); return 0; } int extract_oriented_minus(FILE *input, char *iFile, table_p_t table) { unsigned int k1 = 0; unsigned int k2 = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_p_mLen = BUF_SIZE; size_t tf_m_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } if ((ref_ft.str = (char *)calloc(rf_mLen, sizeof(char))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } if ((tar_ft_plus.pos = (int *)calloc(tf_p_mLen, sizeof(int))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } if ((tar_ft_plus.cnt = (int *)calloc(tf_p_mLen, sizeof(int))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } if ((tar_ft_minus.pos = (int *)calloc(tf_m_mLen, sizeof(int))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } if ((tar_ft_minus.cnt = (int *)calloc(tf_m_mLen, sizeof(int))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_oriented_minus: malloc"); return 1; } tar_ft_plus.pos[0] = -TarOffset; tar_ft_minus.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } if ((ref_ft.str = (char *)realloc(ref_ft.str, rf_mLen * sizeof(char))) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } } if (k1 >= tf_p_mLen - 1) { tf_p_mLen *= 2; if ((tar_ft_plus.pos = (int *)realloc(tar_ft_plus.pos, tf_p_mLen * sizeof(int))) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } if ((tar_ft_plus.cnt = (int *)realloc(tar_ft_plus.cnt, tf_p_mLen * sizeof(int))) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } } if (k2 >= tf_m_mLen - 1) { tf_m_mLen *= 2; if ((tar_ft_minus.pos = (int *)realloc(tar_ft_minus.pos, tf_m_mLen * sizeof(int))) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } if ((tar_ft_minus.cnt = (int *)realloc(tar_ft_minus.cnt, tf_m_mLen * sizeof(int))) == NULL) { perror("extract_oriented_minus: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft_plus.pos[k1 + 1] = ref_ft.pos[j] + TarOffset; tar_ft_minus.pos[k2 + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_minus(table, j, k1, k2); strcpy(seq_id_prev, seq_id); j = 0; k1 = 0; k2 = 0; tar_ft_plus.pos[0] = -TarOffset; tar_ft_minus.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0) { j++; if (strand == '+') { ref_ft.ptr[j] = k2; } else if (strand == '-') { ref_ft.ptr[j] = k1; } ref_ft.pos[j] = pos; ref_ft.str[j] = strand; } else if (strcmp(feature, tar_ft.ft) == 0 && strand == '+') { k1++; tar_ft_plus.pos[k1] = pos; if (cnt > options.cutOff) tar_ft_plus.cnt[k1] = options.cutOff; else tar_ft_plus.cnt[k1] = cnt; } else if (strcmp(feature, tar_ft.ft) == 0 && strand == '-') { k2++; tar_ft_minus.pos[k2] = pos; if (cnt > options.cutOff) tar_ft_minus.cnt[k2] = options.cutOff; else tar_ft_minus.cnt[k2] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft_plus.pos[k1 + 1] = ref_ft.pos[j] + TarOffset; tar_ft_minus.pos[k2 + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_minus(table, j, k1, k2); free(ref_ft.ptr); free(ref_ft.pos); free(ref_ft.str); free(tar_ft_plus.pos); free(tar_ft_plus.cnt); free(tar_ft_minus.pos); free(tar_ft_minus.cnt); return 0; } int extract_oriented_zero(FILE *input, char *iFile, table_p_t table) { unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_zero: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_zero: malloc"); return 1; } if ((ref_ft.str = (char *)calloc(rf_mLen, sizeof(char))) == NULL) { perror("extract_oriented_zero: malloc"); return 1; } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_zero: malloc"); return 1; } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_zero: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_oriented_zero: malloc"); return 1; } tar_ft.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_oriented_zero: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_zero: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_zero: realloc"); return 1; } if ((ref_ft.str = (char *)realloc(ref_ft.str, rf_mLen * sizeof(char))) == NULL) { perror("extract_oriented_zero: realloc"); return 1; } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_zero: realloc"); return 1; } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_zero: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_both(table, j, k); strcpy(seq_id_prev, seq_id); j = 0; k = 0; tar_ft.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0) { if (strand == '+' || strand == '-') { j++; ref_ft.ptr[j] = k; ref_ft.pos[j] = pos; ref_ft.str[j] = strand; } } else if (strcmp(feature, tar_ft.ft) == 0 && strand == tar_ft.strand) { k++; tar_ft.pos[k] = pos; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_both(table, j, k); free(ref_ft.ptr); free(ref_ft.pos); free(ref_ft.str); free(tar_ft.pos); free(tar_ft.cnt); return 0; } int extract_oriented_both(FILE *input, char *iFile, table_p_t table) { unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_both: malloc"); return 1; } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_both: malloc"); return 1; } if ((ref_ft.str = (char *)calloc(rf_mLen, sizeof(char))) == NULL) { perror("extract_oriented_both: malloc"); return 1; } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_both: malloc"); return 1; } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("extract_oriented_both: malloc"); return 1; } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("extract_oriented_both: malloc"); return 1; } tar_ft.pos[0] = -TarOffset; #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("extract_oriented_both: realloc"); return 1; } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); return 1; } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); return 1; } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); return 1; } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); return 1; } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); return 1; } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if (( ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_both: realloc"); return 1; } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_both: realloc"); return 1; } if ((ref_ft.str = (char *)realloc(ref_ft.str, rf_mLen * sizeof(char))) == NULL) { perror("extract_oriented_both: realloc"); return 1; } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_both: realloc"); return 1; } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("extract_oriented_both: realloc"); return 1; } } if (strcmp(seq_id, seq_id_prev) != 0) { tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_both(table, j, k); strcpy(seq_id_prev, seq_id); j = 0; k = 0; tar_ft.pos[0] = -TarOffset; Len += last_pos; } if (strcmp(feature, ref_ft.ft) == 0) { if (strand == '+' || strand == '-') { j++; ref_ft.ptr[j] = k; ref_ft.pos[j] = pos; ref_ft.str[j] = strand; } } else if (strcmp(feature, tar_ft.ft) == 0) { k++; tar_ft.pos[k] = pos; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ Len += last_pos; tar_ft.pos[k + 1] = ref_ft.pos[j] + TarOffset; print_counts_oriented_both(table, j, k); free(ref_ft.ptr); free(ref_ft.pos); free(ref_ft.str); free(tar_ft.pos); free(tar_ft.cnt); return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; table_t table; options.cutOff = 1; int i = 0; while (1) { int c = getopt(argc, argv, "c:dhA:B:b:e:w:"); if (c == -1) break; switch (c) { case 'c': options.cutOff = atoi(optarg); break; case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'A': RefFeature = optarg; break; case 'B': TarFeature = optarg; break; case 'b': From = atoi(optarg); break; case 'e': To = atoi(optarg); break; case 'w': Win = atoi(optarg); break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1 || RefFeature == NULL || TarFeature == NULL || From == To || Win == 0 || options.cutOff < 0) { fprintf(stderr, "Usage: %s [options] -A -B -b -e -w [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information and check SGA file\n" " \t\t -c Cut-Off value for target feature counts (default is %d)\n" "\n\tFeature Correlation and Extraction Tool for ChIP-seq data analysis.\n" "\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (),\n" "\tcontaining two features, a reference feature () and a target feature (),\n" "\tcorrelates the genomic tag count distributions for the two features, and for each reference \n" "\tfeature, it extracts target feature tags that fall into a distance range ([from,to]) relative\n" "\tto the reference feature.\n" "\tThe feature specification must have the following format:\n" " \t = [<+|-|0[strandless]|a[any]|o[oriented]>]\n\n" "\tmeaning that the feature field has a name and a strand specification.\n" "\tAccepted strand values are the following: +|-|0[strandless]|a[any]|o[oriented].\n" "\tIf the strand is not specified, it is set to a[any] by default.\n" "\tThe name corresponds to the second field of the SGA file.\n" "\tThe SGA input file MUST BE sorted by sequence name (or chromosome id), position, and strand.\n" "\tOne should check the input SGA file with the following command:\n" "\tsort -s -c -k1,1 -k3,3n -k4,4 .\n\n" "\tIn debug mode (-d), the program performs the sorting order check.\n\n" "\tThe relative distance between the two features is analysed within a given range: -\n" "\tthat should be greater than 0. A cut-off value may be specified for the target feature counts.\n" "\tThe window width (-w) defines the histogram step size or bin. It must be an integer greater than 0.\n" "\tThe output is a table in text format consisting of all reference features (rows) with relative\n" "\ttarget tag counts in bins of a given size defined by window width (-w) (columns).\n\n", argv[0], VERSION, options.cutOff); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Feature A (ref): %s\n", RefFeature); fprintf(stderr, " Feature B (tar): %s\n", TarFeature); fprintf(stderr, " Range : [%d, %d]\n", From, To); fprintf(stderr, " Sliding Window : %d\n", Win); fprintf(stderr, " Cut-off : %d\n", options.cutOff); } char *s = RefFeature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Description too long \"%s\" \n", RefFeature); return 1; } ref_ft.ft[i++] = *s++; } ref_ft.strand = '\0'; while (isspace(*s++)) ref_ft.strand = *s; s = TarFeature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, " Feature Description too long \"%s\" \n", TarFeature); return 1; } tar_ft.ft[i++] = *s++; } tar_ft.strand = '\0'; while (isspace(*s++)) tar_ft.strand = *s; if (options.debug) fprintf(stderr, " Ref feature : %s %c (R)\n Tar Feature: %s %c (T)\n", ref_ft.ft, ref_ft.strand, tar_ft.ft, tar_ft.strand); size_t cLen = strlen(ref_ft.ft); if (!cLen) { fprintf(stderr, "Wrong Feature Description (ref) \"%s\". You must at least provide a name for your Ref Feature!\n Feature Specs Format: = []\n", RefFeature); return 1; } cLen = strlen(tar_ft.ft); if (!cLen) { fprintf(stderr, "Wrong Feature Description (tar) \"%s\". You must at least provide a name for you Target Feature! \n Feature Specs Format: []\n", TarFeature); return 1; } if (ref_ft.strand == '\0') { ref_ft.strand = 'a'; } if (tar_ft.strand == '\0') { tar_ft.strand = 'a'; } if (table_init(&table) != 0) { return 1; } int status = 0; if (ref_ft.strand == '+' || ref_ft.strand == '-' || ref_ft.strand == '0') { if (tar_ft.strand == '+' || tar_ft.strand == '-' || tar_ft.strand == '0') status = extract_basic_basic(input, argv[optind++], &table); else if (tar_ft.strand == 'a') status = extract_basic_both(input, argv[optind++], &table); } else if (ref_ft.strand == 'a') { if (tar_ft.strand == '+' || tar_ft.strand == '-' || tar_ft.strand == '0') status = extract_both_basic(input, argv[optind++], &table); else if (tar_ft.strand == 'a') status = extract_both_both(input, argv[optind++], &table); } else if (ref_ft.strand == 'o') { if (tar_ft.strand == '+') status = extract_oriented_plus(input, argv[optind++], &table); else if (tar_ft.strand == '-') status = extract_oriented_minus(input, argv[optind++], &table); else if (tar_ft.strand == '0') status = extract_oriented_zero(input, argv[optind++], &table); else if (tar_ft.strand == 'a') status = extract_oriented_both(input, argv[optind++], &table); } fprintf(stderr, "Total Sequence Length: %ld\n", Len); if (input != stdin) { fclose(input); } free(table.bin); free(table.val); return status; } chip-seq/bin/0000755022744200262270000000000013433535261013634 5ustar ambrosingr-bucherchip-seq/hashtable.c0000744022744200262270000004436113107337006015167 0ustar ambrosingr-bucher/** * License GPLv3+ * @file hashtable.c * @brief a simple hash table implementation * @author Ankur Shrivastava * @author Giovanna Ambrosini * @modif Change hash function with efficient Murmur3 algorithm */ #include "hashtable.h" #include "debug.h" #include #include // element operations /** * Function to create a now hash_table element * @returns hash_table_element_t object when success * @returns NULL when no memory */ hash_table_element_t * hash_table_element_new() { INFO("creating a new hash table element"); return calloc(1, hash_table_element_s); } /** * Function to delete an hash table element * @param table table from which element has to be deleted * @param element hash table element to be deleted */ void hash_table_element_delete(hash_table_t * table, hash_table_element_t * element) { INFO("Deleting an hash table element"); if (table->mode == MODE_COPY) { free(element->value); free(element->key); } else if (table->mode == MODE_VALUEREF) { free(element->key); } free(element); } // hash table operations /** * Fuction to create a new hash table * @param mode hash_table_mode which the hash table should follow * @returns hash_table_t object which references the hash table * @returns NULL when no memory */ hash_table_t * hash_table_new(hash_table_mode_t mode) { INFO("Creating a new hash table"); hash_table_t *table = calloc(1, hash_table_s); if (!table) { INFO("No Memory while allocating hash_table"); return NULL; } table->mode = mode; table->key_num = 128; table->key_ratio = 4; table->store_house = (hash_table_element_t **) calloc(table->key_num, sizeof(hash_table_element_t *)); if (!table->store_house) { INFO("No Memory while allocating hash_table store house"); free(table); return NULL; } return table; } /** * Function to delete the hash table * @param table hash table to be deleted */ void hash_table_delete(hash_table_t * table) { INFO("Deleating a hash table"); size_t i=0; for (;istore_house[i]) { hash_table_element_t * temp = table->store_house[i]; table->store_house[i] = table->store_house[i]->next; hash_table_element_delete(table, temp); } } free(table->store_house); free(table); } /** * Function to add a key - value pair to the hash table, use HT_ADD macro * @param table hash table to add element to * @param key pointer to the key for the hash table * @param key_len length of the key in bytes * @param value pointer to the value to be added against the key * @param value_len length of the value in bytes * @returns 0 on sucess * @returns -1 when no memory */ int hash_table_add(hash_table_t * table, void * key, size_t key_len, void * value, size_t value_len) { if ((table->key_count / table->key_num) >= table->key_ratio) { LOG("Ratio(%d) reached the set limit %d\nExpanding hash_table", (int)(table->key_count / table->key_num), (int)table->key_ratio); hash_table_resize(table, table->key_num*2); //exit(0); } size_t hash = HASH(key, key_len); hash_table_element_t * element = hash_table_element_new(); if (!element) { INFO("Cannot allocate memory for element"); return -1; // No Memory } if (table->mode == MODE_COPY) { LOG("Adding a key-value pair to the hash table with hash -> %d, in COPY MODE", (int)hash); element->key = malloc(key_len); element->value = malloc(value_len); if (element->key && element->value) { memcpy(element->key, key, key_len); memcpy(element->value, value, value_len); } else { if (element->key) { free(element->key); INFO("Cannot allocate memory for value"); } if (element->value) { free(element->value); INFO("Cannot allocate memory for key"); } free(element); return -1; //No Memory } } else if (table->mode == MODE_VALUEREF) { LOG("Adding a key-value pair to the hash table with hash -> %d, in VALUEREF MODE", (int)hash); element->key = malloc(key_len); if (element->key) { memcpy(element->key, key, key_len); } else { INFO("Cannot allocate memory for key"); free(element); return -1; //No Memory } element->value = value; } else if (table->mode == MODE_ALLREF) { LOG("Adding a key-value pair to the hash table with hash -> %d, in ALLREF MODE", (int)hash); element->key = key; element->value = value; } element->key_len = key_len; element->value_len = value_len; element->next = NULL; // find the key position for chaining if (!table->store_house[hash]) { LOG("No Conflicts adding the first element at %d", (int)hash); table->store_house[hash] = element; table->key_count++; } else { LOG("Conflicts adding element at %d", (int)hash); hash_table_element_t * temp = table->store_house[hash]; while(temp->next) { while(temp->next && temp->next->key_len!=key_len) { temp = temp->next; } if(temp->next) { if (!memcmp(temp->next->key, key, key_len)) { LOG("Found Key at hash -> %d", (int)hash); hash_table_element_t *to_delete = temp->next; temp->next = element; element->next = to_delete->next; hash_table_element_delete(table, to_delete); // since we are replacing values no need to change key_count return 0; } else { temp = temp->next; } } } temp->next = element; table->key_count++; } return 0; } /** * Function to remove an hash table element (for a given key) from a given hash table * @param table hash table from which element has to be removed * @param key pointer to the key which has to be removed * @param key_len size of the key in bytes * @returns 0 on sucess * @returns -1 when key is not found */ int hash_table_remove(hash_table_t * table, void * key, size_t key_len) { INFO("Deleting a key-value pair from the hash table"); if ((table->key_num/ table->key_count) >= table->key_ratio) { LOG("Ratio(%d) reached the set limit %d\nContracting hash_table", (int)(table->key_num / table->key_count), (int)table->key_ratio); hash_table_resize(table, table->key_num/2); //exit(0); } size_t hash = HASH(key, key_len); if (!table->store_house[hash]) { LOG("Key Not Found -> No element at %d", (int)hash); return -1; // key not found } hash_table_element_t *temp = table->store_house[hash]; hash_table_element_t *prev = temp; while(temp) { while(temp && temp->key_len!=key_len) { prev = temp; temp = temp->next; } if(temp) { if (!memcmp(temp->key, key, key_len)) { if (prev == table->store_house[hash]) { table->store_house[hash] = temp->next; } else { prev->next = temp->next; } hash_table_element_delete(table, temp); INFO("Deleted a key-value pair from the hash table"); table->key_count--; return 0; } prev=temp; temp=temp->next; } } INFO("Key Not Found"); return -1; // key not found } /** * Function to lookup a key in a particular table * @param table table to look key in * @param key pointer to key to be looked for * @param key_len size of the key to be searched * @returns NULL when key is not found in the hash table * @returns void* pointer to the value in the table */ void * hash_table_lookup(hash_table_t * table, void * key, size_t key_len) { size_t hash = HASH(key, key_len); LOG("Looking up a key-value pair for hash -> %d", (int)hash); if (!table->store_house[hash]) { LOG("Key not found at hash %d, no entries", (int)hash); return NULL; // key not found } hash_table_element_t *temp = table->store_house[hash]; while(temp) { while(temp && temp->key_len!=key_len) { temp = temp->next; } if(temp) { if (!memcmp(temp->key, key, key_len)) { LOG("Found Key at hash -> %d", (int)hash); LOG("Value %s value_len %d", (char *)temp->value, (int)temp->value_len); return temp->value; } else { temp = temp->next; } } } LOG("Key not found at hash %d", (int)hash); return NULL; // key not found } /** * Function to look if the exists in the hash table * @param key pointer to key to be looked for * @param key_len size of the key to be searched * @returns 0 when key is not found * @returns 1 when key is found */ int hash_table_has_key(hash_table_t * table, void * key, size_t key_len) { size_t hash = HASH(key, key_len); LOG("Searching for key with hash -> %d", (int)hash); if (!table->store_house[hash]) { LOG("Key not found with hash -> %d, no entries", (int)hash); return 0; // key not found } hash_table_element_t *temp = table->store_house[hash]; while(temp) { while(temp && temp->key_len!=key_len) { temp = temp->next; } if(temp) { if (!memcmp(temp->key, key, key_len)) { LOG("Key Found with hash -> %d", (int)hash); return 1; // key found } temp=temp->next; } } LOG("Key not found with hash -> %d", (int)hash); return 0; // key not found } /** * Function to return all the keys in a given hash table * @param table hash table from which key are to be reterived * @param keys a void** pointer where keys are filled in (memory allocated internally and must be freed) * @return total number of keys filled in keys */ size_t hash_table_get_keys(hash_table_t * table, void *** keys) { size_t i = 0; size_t count = 0; (*keys) = calloc(table->key_count, sizeof(void *)); for(i=0;istore_house[i]) { (*keys)[count++] = table->store_house[i]; hash_table_element_t *temp = table->store_house[i]; #ifdef DEBUG size_t num = 1; #endif while(temp->next) { (*keys)[count++] = temp->next; temp = temp->next; #ifdef DEBUG num++; #endif } #ifdef DEBUG LOG("found %d key(s) at hash -> %d", (int)num, (int)i); #endif } } return count; } /** * Function to get all elements (key - value pairs) from the given hash table * @param table hash table from which elements have to be retrieved * @param elements a pointer to an array of hash_table_element_t pointer (malloced by function) * @returns 1 when no memory * @returns count of elements */ size_t hash_table_get_elements(hash_table_t * table, hash_table_element_t *** elements) { size_t i = 0; size_t count = 0; (*elements) = (hash_table_element_t **) calloc(table->key_count, sizeof(hash_table_element_t *)); if (!*elements) { INFO("No Memory to allocate elements array"); return 1; } for(i=0;istore_house[i]) { (*elements)[count++] = table->store_house[i]; hash_table_element_t *temp = table->store_house[i]; #ifdef DEBUG size_t num = 1; #endif while(temp->next) { (*elements)[count++] = temp->next; temp = temp->next; #ifdef DEBUG num++; #endif } #ifdef DEBUG LOG("found %d key(s) at hash -> %d", (int)num, (int)i); #endif } } #ifdef DEBUG LOG("hash_table_get_elements: total counts (nb of keys) %d", (int)count); #endif return count; } /** * Function that returns a hash value for a given key and key_len * @param key pointer to the key * @param key_len length of the key * @param max_key max value of the hash to be returned by the function * @returns hash value belonging to [0, max_key) */ /* uint32_t hash_table_do_hash(void * key, size_t key_len, uint32_t max_key) { uint16_t *ptr = (uint16_t *) key; uint32_t hash = 0xbabe; // WHY NOT size_t i = 0; for(;i<(key_len/2);i++) { hash^=(i<<4 ^ *ptr<<8 ^ *ptr); ptr++; } hash = hash % max_key; return hash; } */ /** * MurmurHash3_x86_32 Algorithm * */ //----------------------------------------------------------------------------- // Platform-specific functions and macros #ifdef __GNUC__ #define FORCE_INLINE __attribute__((always_inline)) inline #else #define FORCE_INLINE #endif static inline FORCE_INLINE uint32_t rotl32 ( uint32_t x, int8_t r ) { return (x << r) | (x >> (32 - r)); } #define ROTL32(x,y) rotl32(x,y) #define BIG_CONSTANT(x) (x##LLU) //----------------------------------------------------------------------------- // Block read - if your platform needs to do endian-swapping or can only // handle aligned reads, do the conversion here #define getblock(p, i) (p[i]) //----------------------------------------------------------------------------- // Finalization mix - force all bits of a hash block to avalanche static inline FORCE_INLINE uint32_t fmix32 ( uint32_t h ) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } //---------- //----------------------------------------------------------------------------- void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; const int nblocks = len / 4; int i; uint32_t h1 = seed; uint32_t c1 = 0xcc9e2d51; uint32_t c2 = 0x1b873593; //---------- // body const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); for(i = -nblocks; i; i++) { uint32_t k1 = getblock(blocks,i); k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; h1 = ROTL32(h1,13); h1 = h1*5+0xe6546b64; } //---------- // tail const uint8_t * tail = (const uint8_t*)(data + nblocks*4); uint32_t k1 = 0; switch(len & 3) { case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0]; k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h1 = fmix32(h1); *(uint32_t*)out = h1; } uint32_t hash_table_do_hash(void * key, size_t key_len, uint32_t max_key) { uint32_t hash; uint32_t seed = 42; MurmurHash3_x86_32(key, key_len, seed, &hash); hash = hash % max_key; return hash; } /** * Function to resize the hash table store house * @param table hash table to be resized * @param len new length of the hash table * @returns -1 when no elements in hash table * @returns -2 when no emmory for new store house * @returns 0 when sucess */ int hash_table_resize(hash_table_t *table, size_t len) { LOG("resizing hash table from %d to %d", table->key_num, (int)len); hash_table_element_t ** elements; size_t count; // FIXME traversing the elements twice, change it some time soon count = hash_table_get_elements(table, &elements); if (!count) { INFO("Got No Elements from the hash table"); return -1; } #ifdef DEBUG LOG("hash_table_resize: count %d", (int)count); #endif // keep the current store house in case we dont get more memory hash_table_element_t ** temp = table->store_house; table->store_house = calloc(len, sizeof(hash_table_element_t *)); if (!table->store_house) { table->store_house = temp; INFO("No Memory for new store house"); return -2; } table->key_num = len; // fool the new hash table so it refers even previously copied values int mode = table->mode; table->mode = MODE_ALLREF; // the new table starts from scratch table->key_count = 0; while(count>0) { hash_table_element_t *elem = elements[--count]; hash_table_add(table, elem->key, elem->key_len, elem->value, elem->value_len); } table->mode = mode; // free old store house free(temp); // free elements array free(elements); return 0; } /** * Function to iterate through all elements of the hashtable * @param table hash table to be iterated * @param fct pointer to a function returning 1 if the element has to be removed * @param user arbitrary user pointer passed to the fct callback * @returns 0 when success */ int hash_table_iterate(hash_table_t *table, int (*fct)(void *user, void *value, void *key, size_t key_len), void *user) { INFO("iterating hash table"); unsigned int i; for(i=0;i. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "version.h" /*#define BUF_SIZE 4096 */ #define BUF_SIZE 8192 #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 typedef struct _options_t { int cutOff; int report; int help; int debug; int oriented; int reverse; } options_t; static options_t options; typedef struct _feature_t { char seq_id[SEQ_ID]; char ft[FT_MAX]; char strand; int *pos; int *cnt; int *ptr; char *str; char **ext; } feature_t, *feature_p_t; feature_t ref_ft; feature_t tar_ft; char *RefFeature = NULL; char *TarFeature = NULL; int From = 0; int To = 0; int Tmp = 0; int Thres = 0; int Rtot = 0; /* Total Reference Counts */ int Ttot = 0; /* Total Target Counts */ unsigned long Len = 0; /* Total Sequence Length */ int ref_specs = 1; /* if = 1 feature specs : AND */ int tar_specs = 1; int no_tar_ft = 0; int or_same_strand = 0; int or_opposite_strand = 0; int or_any_strand = 0; void score_update(unsigned int ft_nb, unsigned int tar_nb) { int j, k; unsigned long long sum = 0; char B_score[128]; #ifdef DEBUG printf(" score_update: nb of ref features : %d\n", ft_nb); #endif for (j = 1; j <= (int)ft_nb; j++) { sum = 0; k = ref_ft.ptr[j]; if (!options.oriented || (ref_ft.str[j] == '+')) { /* Ref Feat Un-oriented or Oriented on Pos Strand Examin Upstream Tar pos */ while (tar_ft.pos[k] > ref_ft.pos[j] + To) {k--;} while ((k >= 0) && (tar_ft.pos[k] >= ref_ft.pos[j] + From)) { if (!options.oriented || (or_same_strand && tar_ft.str[k] == '+') || (or_opposite_strand && tar_ft.str[k] == '-') || or_any_strand) { sum += tar_ft.cnt[k]; } #ifdef DEBUG printf(" score_update : Sum = %llu Tar Cnts = %d\n", sum, tar_ft.cnt[k]); #endif k--; /* examine further upstream positions within the window range */ } } else { /* Reverse Loop - Ref Feature Oriented on Neg Strand (Examin Upstream Tar pos) */ while (tar_ft.pos[k] > ref_ft.pos[j] - From) {k--;} while ((k >= 0) && (tar_ft.pos[k] >= ref_ft.pos[j] - To)) { if ((or_same_strand && tar_ft.str[k] == '-') || (or_opposite_strand && tar_ft.str[k] == '+') || or_any_strand) { sum += tar_ft.cnt[k]; } k--; /* examine further upstream positions within the window range */ } } k = ref_ft.ptr[j] + 1; if (!options.oriented || (ref_ft.str[j] == '+')) { /* Ref Feat Un-oriented or Oriented on Pos Strand (Examin Downstream Tar pos) */ while ((tar_ft.pos[k] < ref_ft.pos[j] + From) && k <= (int)tar_nb) {k++;} while ((tar_ft.pos[k] <= ref_ft.pos[j] + To) && k <= (int)tar_nb) { if (!options.oriented || (or_same_strand && tar_ft.str[k] == '+') || (or_opposite_strand && tar_ft.str[k] == '-') || or_any_strand) { sum += tar_ft.cnt[k]; } #ifdef DEBUG printf(" score_update : Sum = %llu Tar Cnts = %d\n", sum, tar_ft.cnt[k]); #endif k++; /* examine further downstream positions within the window range */ } } else { /* Reverse Loop - Ref Feature Oriented on Neg Strand (Examin Downstream Tar pos) */ while ((tar_ft.pos[k] < ref_ft.pos[j] - To) && k <= (int)tar_nb) {k++;} while ((tar_ft.pos[k] <= ref_ft.pos[j] - From) && k <= (int)tar_nb) { if ((or_same_strand && tar_ft.str[k] == '-') || (or_opposite_strand && tar_ft.str[k] == '+') || or_any_strand) { sum += tar_ft.cnt[k]; } k++; /* examine further downstream positions within the window range */ } } if (options.reverse) { if ( sum < (unsigned long long)Thres) { if (options.report) { sprintf(B_score," %s=%llu", tar_ft.ft, sum); } else { sprintf(B_score," %llu", sum); } if (ref_ft.ext[j] != NULL) { printf("%s\t%s\t%d\t%c\t%d\t%s\t%s\n", ref_ft.seq_id, ref_ft.ft, ref_ft.pos[j], ref_ft.str[j], ref_ft.cnt[j], ref_ft.ext[j], B_score); } else { printf("%s\t%s\t%d\t%c\t%d\t%s\n", ref_ft.seq_id, ref_ft.ft, ref_ft.pos[j], ref_ft.str[j], ref_ft.cnt[j], B_score); } } } else { if ( sum >= (unsigned long long)Thres) { if (options.report) { sprintf(B_score," %s=%llu", tar_ft.ft, sum); } else { sprintf(B_score," %llu", sum); } if (ref_ft.ext[j] != NULL) { printf("%s\t%s\t%d\t%c\t%d\t%s\t%s\n", ref_ft.seq_id, ref_ft.ft, ref_ft.pos[j], ref_ft.str[j], ref_ft.cnt[j], ref_ft.ext[j], B_score); } else { printf("%s\t%s\t%d\t%c\t%d\t%s\n", ref_ft.seq_id, ref_ft.ft, ref_ft.pos[j], ref_ft.str[j], ref_ft.cnt[j], B_score); } } } } /* Loop over Reference positions (j) */ return; } int process_sga(FILE *input, char *iFile) { unsigned int l = 0; unsigned int m = 0; unsigned int k = 0; unsigned int j = 0; char seq_id_prev[SEQ_ID] = ""; int pos, cnt, last_pos = 0; char *s, *res, *buf; size_t rf_mLen = BUF_SIZE; size_t tf_mLen = BUF_SIZE; size_t mLen = LINE_SIZE; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); /*fprintf(stderr,"system command sort : return code %d\n", sys_code);*/ if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if ((ref_ft.pos = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.cnt = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.ptr = (int *)calloc(rf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.str = (char *)calloc(rf_mLen, sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.ext = (char**)calloc(rf_mLen, sizeof(*(ref_ft.ext)))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((tar_ft.pos = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((tar_ft.cnt = (int *)calloc(tf_mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((tar_ft.str = (char *)calloc(tf_mLen, sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((s = malloc(mLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } tar_ft.pos[0] = From - 1; if (options.debug) fprintf(stderr, "Processing file %s\n", iFile); #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d %*s", seq_id, feature, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) mLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char feature[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == mLen && s[cLen - 1] != '\n') { mLen *= 2; if ((s = realloc(s, mLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (mLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } feature[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } ext[i] = 0; #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s%c Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, feature, strand, pos, cnt, ext); #endif if (j >= rf_mLen - 1) { rf_mLen *= 2; if ((ref_ft.pos = (int *)realloc(ref_ft.pos, rf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.cnt = (int *)realloc(ref_ft.cnt, rf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.ptr = (int *)realloc(ref_ft.ptr, rf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.str = (char *)realloc(ref_ft.str, rf_mLen * sizeof(char))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.ext = (char**)realloc(ref_ft.ext, rf_mLen * sizeof(*(ref_ft.ext)))) == NULL) { perror("process_sga: realloc"); exit(1); } } if (k >= tf_mLen - 1) { tf_mLen *= 2; if ((tar_ft.pos = (int *)realloc(tar_ft.pos, tf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((tar_ft.cnt = (int *)realloc(tar_ft.cnt, tf_mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((tar_ft.str = (char *)realloc(tar_ft.str, tf_mLen * sizeof(char))) == NULL) { perror("process_sga: realloc"); exit(1); } } if (strcmp(seq_id, seq_id_prev) != 0) { l = j; tar_ft.pos[k + 1] = ref_ft.pos[l] + To + 1; m = k + 1; score_update(l, m); strcpy(seq_id_prev, seq_id); for (unsigned int i = 1; i <= j; i++) { if (ref_ft.ext[i] != NULL) free(ref_ft.ext[i]); } j = 0; k = 0; Len += last_pos; } if (ref_specs) { if (strcmp(feature, ref_ft.ft) == 0 && strand == ref_ft.strand) { j++; strcpy(ref_ft.seq_id, seq_id); ref_ft.pos[j] = pos; ref_ft.str[j] = strand; ref_ft.ptr[j] = k; if (cnt > options.cutOff) ref_ft.cnt[j] = options.cutOff; else ref_ft.cnt[j] = cnt; if (ext[0] == 0) { ref_ft.ext[j] = NULL; } else { ref_ft.ext[j] = (char *)malloc(strlen(ext) + 1); strcpy(ref_ft.ext[j], ext); } Rtot += ref_ft.cnt[j]; } } else { if (strcmp(feature, ref_ft.ft) == 0) { j++; strcpy(ref_ft.seq_id, seq_id); ref_ft.pos[j] = pos; ref_ft.str[j] = strand; ref_ft.ptr[j] = k; if (cnt > options.cutOff) ref_ft.cnt[j] = options.cutOff; else ref_ft.cnt[j] = cnt; if (ext[0] == 0) { ref_ft.ext[j] = NULL; } else { ref_ft.ext[j] = (char *)malloc(strlen(ext) + 1); strcpy(ref_ft.ext[j], ext); } Rtot += ref_ft.cnt[j]; } } if (tar_specs) { if (strcmp(feature, tar_ft.ft) == 0 && strand == tar_ft.strand) { k++; tar_ft.pos[k] = pos; tar_ft.str[k] = strand; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; Ttot += tar_ft.cnt[k]; } } else if (strcmp(feature, tar_ft.ft) == 0) { k++; tar_ft.pos[k] = pos; tar_ft.str[k] = strand; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; Ttot += tar_ft.cnt[k]; } else if (no_tar_ft) { if (strcmp(feature, ref_ft.ft) != 0) { k++; tar_ft.pos[k] = pos; tar_ft.str[k] = strand; if (cnt > options.cutOff) tar_ft.cnt[k] = options.cutOff; else tar_ft.cnt[k] = cnt; Ttot += tar_ft.cnt[k]; } } last_pos = pos; } /* End of While */ free(s); /* The last time (at EOF) */ if (input != stdin) { fclose(input); } Len += last_pos; l = j; tar_ft.pos[k + 1] = ref_ft.pos[l] + To + 1; m = k + 1; score_update(l, m); fprintf(stderr,"# Total Sequence Len : %lu , Total Reference Counts : %d , Total Target Counts : %d\n", Len, Rtot, Ttot); for (unsigned int i = 1; i <= j; i++) { free(ref_ft.ext[i]); } free(ref_ft.pos); free(ref_ft.cnt); free(ref_ft.str); free(ref_ft.ptr); free(ref_ft.ext); free(tar_ft.pos); free(tar_ft.cnt); free(tar_ft.str); return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; options.cutOff = 9999; options.report = 0; options.reverse = 0; int i = 0; while (1) { int c = getopt(argc, argv, "c:qdhorA:B:b:e:t:"); if (c == -1) break; switch (c) { case 'c': options.cutOff = atoi(optarg); break; case 'd': options.debug = 1; break; case 'h': options.help = 1; break; case 'A': RefFeature = optarg; break; case 'B': TarFeature = optarg; break; case 'b': From = atoi(optarg); break; case 'e': To = atoi(optarg); break; case 'o': options.oriented = 1; break; case 'q': options.report = 1; break; case 'r': options.reverse = 1; break; case 't': Thres = atoi(optarg); break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1 || RefFeature == NULL || TarFeature == NULL || From > To || options.cutOff < 0) { fprintf(stderr, "Usage: %s [options] -A -B -b -e [-t ] [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information and check SGA file\n" " \t\t -o Oriented strand processing\n" " \t\t -r Reverse extraction process\n" " \t\t -c Cut-Off value for feature counts (default is %d)\n" " \t\t -q Report feature B tag counts as 'feature_name='\n" "\n\tFeature Extraction Tool for ChIP-seq data analysis.\n" "\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (),\n" "\tand compares two features, a reference feature () and a target feature\n" "\t(), with respect to their relative position.\n" "\tBy default, the program extracts feature A tags that are enriched in feature B\n" "\ttags according to a given count output threshold or score() that is %d by default.\n" "\tIf option -r is specified, the program extracts sites that are depleted in feature B.\n" "\tThe feature specification must have the following format:\n" " \t = [<+|->]\n\n" "\tthe strand specification being optional.\n" "\tThe parameter is a name that corresponds to the second field of the SGA file.\n" "\tIf no feature is given then all input tags are processed.\n" "\tThe SGA input file MUST BE sorted by sequence name (or chromosome id), position,\n" "\tand strand.\n" "\tOne should check the input SGA file with the following command:\n" "\tsort -s -c -k1,1 -k3,3n -k4,4 .\n\n" "\tIn debug mode (-d), the program performs the sorting order check.\n\n" "\tThe relative distance between the two features is analysed within a\n" "\tgiven range: -.\n" "\tA value can be optionally specified as a cut-off for the feature counts.\n\n", argv[0], VERSION, options.cutOff, Thres); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Feature A (ref): %s\n", RefFeature); fprintf(stderr, " Feature B (tar): %s\n", TarFeature); fprintf(stderr, " Range : [%d, %d]\n", From, To); fprintf(stderr, " Cut-off : %d\n", options.cutOff); fprintf(stderr, " Output Threshold : %d\n\n", Thres); fprintf(stderr, " Feature B tag count report : %d (1:ON, 0:OFF)\n", options.report); } char *s = RefFeature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Description too long \"%s\" \n", RefFeature); return 1; } ref_ft.ft[i++] = *s++; } ref_ft.strand = '\0'; if (!options.oriented) { while (isspace(*s++)) ref_ft.strand = *s; } s = TarFeature; i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, " Feature Description too long \"%s\" \n", TarFeature); return 1; } tar_ft.ft[i++] = *s++; } tar_ft.strand = '\0'; while (isspace(*s++)) tar_ft.strand = *s; if (options.debug) fprintf(stderr, " Ref feature : %s %c (R)\n Tar Feature: %s %c (T)\n", ref_ft.ft, ref_ft.strand, tar_ft.ft, tar_ft.strand); size_t cLen = strlen(ref_ft.ft); if (!cLen) { fprintf(stderr, " Wrong Reference Feature Description (ref) \"%s\". You must at least provide a name for your Ref Feature!\n Feature Specs Format: = []\n", RefFeature); return 1; } cLen = strlen(tar_ft.ft); if (!cLen) { fprintf(stderr, " No Target Feature Description (tar) has been provided. Define Feature B name as 'score'\n"); strcpy(tar_ft.ft, "score"); no_tar_ft = 1; } if (ref_ft.strand == '\0') { /* fprintf(stderr, " Warning Ref Feature \"%s\" !\n Feature Format: = []\n", RefFeature); */ ref_specs = 0; } if (tar_ft.strand == '\0') { /* fprintf(stderr, " Warning Tar Feature \"%s\" !\n Feature Format: = []\n", TarFeature); */ tar_specs = 0; } if ((strcmp(tar_ft.ft, ref_ft.ft) == 0)) { /* Check For Auto-Correlation */ if (ref_ft.strand == '\0' && tar_ft.strand == '\0') { fprintf(stderr, " Autocorrelation: Reference and Target Feature are the same!\n"); return 1; } else if ((ref_ft.strand == '+' && tar_ft.strand == '+') || (ref_ft.strand == '-' && tar_ft.strand == '-')) { fprintf(stderr, " Autocorrelation: Reference and Target Feature are the same!\n"); return 1; } } if (options.oriented) { if (tar_ft.strand == '+') { or_same_strand = 1; } else if (tar_ft.strand == '-') { or_opposite_strand = 1; } else { or_any_strand = 1; } tar_specs = 0; } if (process_sga(input, argv[optind++]) != 0) { return 1; } return 0; } chip-seq/doc/0000744022744200262270000000000013354703556013635 5ustar ambrosingr-bucherchip-seq/doc/ChIP-Seq_Tools-UsersGuide.pdf0000744022744200262270000103770513354421002021075 0ustar ambrosingr-bucher%PDF-1.3 % 4 0 obj << /Length 5 0 R /Filter /FlateDecode >> stream x]Kׯng2ɪ,M2YLNv_dr{@}(VFsnxq/\?O0C?hz2?Gl^rosST;Sݷ߳nLcv}sSOs揷Us(gY_\]Tfg/؟O7%>yxcŤ^1wC?$A卹X%Ps`DVAu|r4|RhIyto7gwSeW%bP'ϟO/ݛGrL^w34eiª[Ju `*NO N{EGmTR9ڪ)C,ٶ]<98eBKaXU?NMHN䤳"bw~h_,Y}y8ݴs2s"a]fa(ہ`OKh+&ʛ"h-8:^m8mzx S`x5;X}>j?g$b~y%W .;tXMc0{خg$d nu]̛Tuժ!,mMO|ܞNpJ˸qʦiY(O&EZẠAxuk&GcL3dE p;'8UӗMLoWILdr}} lUnF-agv܈YRTRx>J4vf@/[|>/gLs&H"٦iӠLZ tS㽡?93F}~©rlHvLL>XL TImjjLS:djtRV% p8B`A'U#5>yR T/G!oT&yӏIw tV#i %Fk#Fç"<(zP4C[-y F8z=s|s'n&oCQ?wSμ`&g H.qt$#:ѭzwd8«^JsT{}H9/r.H \!n r|7QT 5̇kZ1YJ}_}@ɐtH;IyUuPYK1|{QFO T`*S fbwZZxʎ(>|w5"LڃZdQv$_ KcYOuhvox05Ǿbi{WuH.rv] _. 6%br$ĭ"?$(CrОUYB Q]rASM@`A97qB m_Oj!ZOOG}`K?M)=ryRZ u_v쑦iP=S|5FL5U(AZpWjsaKt[G /.imrڕLn_"˃+lV*{ɸ =VmSBWiVZFu@lJ H)mL.IL eRt 0f+rFڰF6_Xن|iSu(VoL!ms骆ueBЫybp(IL i糯|O_U.Fv\.O#W܇7~, ;jJ9@sQT wr+儚{ n+zsT|J0jЭ~"KR#t&Ut|"rokuT,\0u6q Nkv!䣆 XCBt֞I2^# %s57>.ǣ%^':sm`Xa@@_Keʄh$F?~/Nb`YϦezӅ k)LSD>y*LRM}^>Lmb,di$lD }&,Zs}Ju񄡺LۦB]~',1P$V6ЖmەO5P\O@\[2Qa(*hs-Q61z f 3&;מZJ' ړqU@luyI 赟X|'Zl ̸: KUfɆ9uwG YU9(ܺpt)9_;bՈ,ȴY?q( カ:h&ۜ mtlZƉT3LL8xXAdhh {ʏj} P}oFWº3km]onpQU"P|KƜ捠dr)(-vؽ_[2 A7\z9i)0PIlP{';%[QqoP2d`O:?c0`T$:IP7 byЬa8ePŦܽz{f~) \f'tJ3!_uÈF iK9)Pv%4s0yӨrj<ں13᰾ـv8n`8" PO)΢W$K䅃3Dnv}:?I߾_ ·XU0j'[_}ޒ9LC;퓅,(6 )HGVHlw#xWND-Ӽ;>yp`~DɜvcU!JLרSҝ6لnod&lOOIyAAkFZ.Soso{T1&=7V'M -Tf2)h StRTP]/ c&hZ hӶ$6{^ ȷHC'r ;xӍ7%MI:ά|/kMhub_i+̫'sE@P%GfucŰh'42+_]\X5TBf/: {}7^Lyxq7>u`bԞzE6ͥM6tTgS*LJ?: @n .HA}< S- ] PYS0-CX'.}}*ܑD ٟE!BeDa#wy>fDZ^K)ǒiʳ@ ̝zio|c &$8p#q]{;9/pN$2˷f"4n2r u ک'AB[fQg,m¦ Zvеڑp+$K ufi{CzYxn)YKȰ|0Vv\J[k%>.'G,j&EWH~Ɇj.k$%G_DVt2aE 6v ,7wtaFj3Ub7Gser5/Z(d-z'^RRCǴh͵Ҽ5@r~Q2>O.pE҄I!5XBFSGzr|x['5}[{Q^VIu;1%#u)\D[Z wܒ6du8hsMöM8%vC.1K *` 48t0; dL WxXi)ֹcZf>ۦ zrwW\ [mϳwlr)8v v0+UAFNy?kzVuVï`84o‹,7@<}@bA?'[BbWpgfCh8H:3G$l'bZgTV *82P7>Dɀorб-tJd?8x^D'q-9LFVm+`y_l~`LLRST7 ph+ x,.Ѻg%ݴ`%րߍ;LdNn$P80#Y2ÎJu\+$^v :ΫTi";41f;jIvpW+7:mA_C ӑvs|f1}v j4Gt93\L|z [Jԇ * VRRpn&ɺB1H56TÝB5_38 S:s@YfCK"ٺ*)F ?/Y({{WHz|@l#7(JS'qUhuݢ07-*+4&11W؞wcKϻE)\}0(5ٴV=b{vRrRz za^jҖ Tm]qre"нLL B<1vǘT;Y- lqukQ! v,q1A>>C.|1Lw0J)  W._6:#øtb xRmSj#V"r+Q#^hv3qT&u?`e5rEq60X[(0qTv <8p-bb>kL`)^PMmU3,i_GKEYiUձI3 A*fz`l5X:U%XBGiȳ26 flZ븦XנWG|H<7plF4_d >VZ4y{y<*AܔZ CFksNyܮ&J1nS&,mZ[>>'ŁV 2 k}BBm ;qq_J],٧ ] cϕC^S eUvmWbidm``)h}GjQS乯qνMo4Oe;)+zVJi؟V=*$Ύ-X+ԑvгdA^s~zvB-7ʈe/$`& }EҴVD-W*[xO!i;+#\65.B~r|B+]sLSZeSVis`! s)> GA˝%m?% aO`~a.- 6RU|BE /1]>rwͳS^Dz-8xXYfCT؁X?C|}DyV0-NhQb{C Z1bLwp(y)+oL\ .ji_R7 endstream endobj 5 0 obj 5991 endobj 2 0 obj << /Type /Page /Parent 3 0 R /Resources 6 0 R /Contents 4 0 R /MediaBox [0 0 612 792] >> endobj 6 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT2 9 0 R /TT1 8 0 R /TT4 11 0 R /TT3 10 0 R /TT5 12 0 R >> >> endobj 13 0 obj << /Length 14 0 R /N 3 /Alternate /DeviceRGB /Filter /FlateDecode >> stream xwTSϽ7" %z ;HQIP&vDF)VdTG"cE b PQDE݌k 5ޚYg}׺PtX4X\XffGD=HƳ.d,P&s"7C$ E6<~&S2)212 "įl+ɘ&Y4Pޚ%ᣌ\%g|eTI(L0_&l2E9r9hxgIbטifSb1+MxL 0oE%YmhYh~S=zU&ϞAYl/$ZUm@O ޜl^ ' lsk.+7oʿ9V;?#I3eE妧KD d9i,UQ h A1vjpԁzN6p\W p G@ K0ށiABZyCAP8C@&*CP=#t] 4}a ٰ;GDxJ>,_“@FXDBX$!k"EHqaYbVabJ0՘cVL6f3bձX'?v 6-V``[a;p~\2n5׌ &x*sb|! ߏƿ' Zk! $l$T4QOt"y\b)AI&NI$R$)TIj"]&=&!:dGrY@^O$ _%?P(&OJEBN9J@y@yCR nXZOD}J}/G3ɭk{%Oחw_.'_!JQ@SVF=IEbbbb5Q%O@%!BӥyҸM:e0G7ӓ e%e[(R0`3R46i^)*n*|"fLUo՝mO0j&jajj.ϧwϝ_4갺zj=U45nɚ4ǴhZ ZZ^0Tf%9->ݫ=cXgN].[7A\SwBOK/X/_Q>QG[ `Aaac#*Z;8cq>[&IIMST`ϴ kh&45ǢYYF֠9<|y+ =X_,,S-,Y)YXmĚk]c}džjcΦ浭-v};]N"&1=xtv(}'{'IߝY) Σ -rqr.d._xpUەZM׍vm=+KGǔ ^WWbj>:>>>v}/avO8 FV> 2 u/_$\BCv< 5 ]s.,4&yUx~xw-bEDCĻHGKwFGEGME{EEKX,YFZ ={$vrK .3\rϮ_Yq*©L_wד+]eD]cIIIOAu_䩔)3ѩiB%a+]3='/40CiU@ёL(sYfLH$%Y jgGeQn~5f5wugv5k֮\۹Nw]m mHFˍenQQ`hBBQ-[lllfjۗ"^bO%ܒY}WwvwXbY^Ю]WVa[q`id2JjGէ{׿m>PkAma꺿g_DHGGu;776ƱqoC{P38!9 ҝˁ^r۽Ug9];}}_~imp㭎}]/}.{^=}^?z8hc' O*?f`ϳgC/Oϩ+FFGGόzˌㅿ)ѫ~wgbk?Jި9mdwi獵ޫ?cǑOO?w| x&mf endstream endobj 14 0 obj 2612 endobj 7 0 obj [ /ICCBased 13 0 R ] endobj 16 0 obj << /Length 17 0 R /Filter /FlateDecode >> stream x]mq޿gi/a@K @ҞvmfV#Iʯ ?|&Gs3a뽊"3E)ޜ~6e7߼/&5/Cn|׵y<;}ܾ5Lanߘ卼nvo?/y:jner;"NX;m"֤=~F?t.b~z=9N|g Spo*ɿ8}?X>ۉp|ȴɕN7ߑeQ?H$"bII)x/P-&<.{Ibqubhj1 ёcPKm<9Z2ͯOɪRDWyXNLv+,0$΍:nY-6z{oC#oVV{T>(9x@^oqq1j~ؖhRpr?y0w/zN2TAt.>K`q1<FN(1US#y WbZ,+i鴞|2~rD:u(}M9ngj@<Gb#d }hN( {]N)A 4Ҭe| ~.lEOr큡.?3}b)TeX!FEi)dύl*qel qXG&VeQk7s9 )lijQSD1˙^ Ψd1Wp:W1l("/dASd R%̪a\)5yJ׫$DSIlJ׫V?eM2BeoJ4X׍>x#ˆ;S/S\KB2wmg+.gluwI‹[UvlɵF ni[Ä@ L9]VU{H2'W WFEߎ6)hZ>\KkA*٬hWDڊgsCaC C2u% G"ZgkqgU&Z}. H;L>K3iG/I $JHF5dϧ |OkOttW3)yXM \٣~S|%LXB;zӯd>F%Uk|'/Z8)M"zOj`eas_%*8&"$G_d>P bP#oD8S)eW6ڨRIFg,`f(^ \aŐMOh;4Yh(=qA{FظDqPNZkCs߈&X_q>opCƷ 9)sIT+IE8;$(f&rߤNh1HwXIýta ]]/6(̬@7T0PHCMTMԸ肫Q 'HMdEeRE-q<˪ pZ{ Tze@$LT@"3Qӣ^ p+⬩T|&^6p|+=u!ϧ|V4U|4JZ|#>N> /U٤ɑkՋ$!͸cUh!_`#(ms*$q`^GJq_#_z9:9Ntd ,Båm69WMG}k} lߋh =P Y>y dS`Pq5:d!"fjO'^UN,cy4$jpdL8{oǧ;q}+Pu,7tu?fQ"R&[u>!YE9E9\)3>qh|ٳq)\R8uGfZ3z궪G[aB=[Aj}-˅jvIol[."lg#+MM0聯%4:)L7)YTBd'B#dtR^F(F%N)0c$,)&ok>>E| @;B:Ru4u,н3t2*pZl{N=a/=xHfo불HLwdĮj;甗ǷIޑH"rgJ٧na/'3%|ex6[yWVi *y. Igf[>&_3fݼjz+9iؗ-`OZ#JcVѭ-buiO",b¡Bt7AO8U+ 8VURGT?y&k<p/Y'-aKC(JM LlU-%w'ǰROq2[>F9Dǁ힅$ĉwV'v4A@[Gr=w%Z`$3^8 ?1ٹL?';<ѨM`^?˗$Di6LSc2r6G u T$0d|F+;ZUWq_Ib 馞s' +Ll+-6`Y9gb3%.k&ZAdeEKaP.sgsh!UBZ o'ͦFi ?)0<\Z!r2_3-*-ַ qu~Oz\$we{`?#e*>QhVbA/S_d3F&JPE96a,!{:FEd!hnVLdzL&U'WU(1q#q GԫfJ6_. W|+0UK*Eo/ߪm2*Q윉2}o߾ k7}nΉ\ .l{c)/CPlk?<@mCh™+_j bĆ˷*::8MO.^ڶ.YVX3g[m]e&[vN ` !EIaLQSvEAN,C8L*>hNhf'YOR]x1D0S0 =K޿Z.*P>tk AZ!tjތemn)aW|y&7r9-[ ,G<ZJ!\|v =aBKV,!u}Y,Wo%wlIqiٺI rO+nq=d3ָPY <5>u< _lWu<U>^x5] Jh":z\:ρf?R(mGJA(_P @)+([AEoܩP\z|JHebq襺d BvTo`D@޸f$?y-dm] |hmZZ5ayIB0*nH(Iȁ -! t fb0BB=bgcN|ahl+l \n!I& I#΅zB^H!e*va d@~tTtraBZμ⊓#/-`*1j$|@WB#Kt3 2؉կ[ ^RJo}ǜa1&קػE(s}Yz[4reK錎,mY7ksrmX"Xq0Y5?5_9D`0NqRj*p%$(6-<)8T@ AUMGQ+4] lPW6ݍeg$y}8 q ׊Hq7 4 e;[+iæ,8-hiZ:B- .˴BG)&Mk6pP>*4d[Y+a$^wdcgKhݘӳ7U Y opnoIrkԀuJo LO/GjwT6* %M)^wWk] o<$SzOiRFA~όi,cxJuc#1ui ?ue$<( ENĞ2=h(34 " endstream endobj 17 0 obj 5820 endobj 15 0 obj << /Type /Page /Parent 3 0 R /Resources 18 0 R /Contents 16 0 R /MediaBox [0 0 612 792] >> endobj 18 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT5 12 0 R /TT2 9 0 R /TT3 10 0 R /TT1 8 0 R /TT4 11 0 R /TT6 19 0 R /TT7 20 0 R >> >> endobj 22 0 obj << /Length 23 0 R /Filter /FlateDecode >> stream xŝ[mSDRڵm.JDN6Cˋ|]2(>?ꪞH;3]B(TU__WF?Vo_T'OV߷i6~]u؝]W=fkKsM榭eruuiW}xq ϫ/oyulze Օ2W>.\=QU.?%79l.uqcCF9~?U/7 Q?þ)kb5Wk7ےֻZO-0ZKXӴ u~#vMu$ū+Pm{Oaش Rd_FhM-N͘l]QLi/~Ddw~cܿur~n+ot+ersu]>5 jQ$VoWoܛhn~%l2{:pi~nqM]jx*E=5x(aןA>Epf*kqfۈH3h]PwۂrjTyPǙ.e}v>PˏVP ?pmO/ͼoۺ;adTf+j z;P{^&T-^n{dflZ1@B=lLh 䄪U"SjqS9tXJj,hfI|YL~#U!MpMRo늙dgxu/_FL|~r'E#U S Kpy >*|#82~/f7^訰wWNdӑΘRQuDYu+Z,~ՙWJJL` `d3Ե->W#bljc?|) / xv_c%L&MGHvm >{M_oWOU.2Pc,CWKŐpIeQūL8[Hx-hkߏxu3yyoTϮ &Ύ?.r*O'$1%M-r9tl!݋QGxsR;p ق/ "o I]'ggXb=^YfUA!b6# 6`# 5.|TtE9<ß @8gT)M=V՗(O6|QG -٠TS/.QbYi{6c+ &6LXޥlr_dDf78 ޿|k;ldAѵ~>Z:a$j$ME/s//S{ʾ^.2~GHlCA1s].iR=t-вJe HXrU@Ֆ2rD)3 }#Ia L9VV~RqPǐDkB--Jd. f^rkw 튀f1S1)1zm2Hs_Zf+^oEXnӥhtqZ;Aevt9$6M65MwICjU9]#N`FRASʖ2-Fdy_Ez'%ƛĊq"kS2> P l> lX 64`s͵A}JL}$FK)GFc|W?RQ=䭒d,8aSX_usvp-3(Q+E!:T HWNi ɔƁYfkqJGܟWŢw4a3 Af`Oxy M˿?p%6U}?Y+:ӻ` Efw7dYvP Ϋ=׶{MBGi!h͸¥|& 5 ,R}F P+2ʍt<͠\)(zE u5buU_zԂcO.6ؑ RqLjUzc\ ^mto&{W[M!mޔ@`]N fM--"P%u9ZlU UXYc)#GJq euޯz zcOAkFLrՊ= WYY{?Jn;E}D@+ ZzU%Yu8;"UBK* w3Vׂ r|gD0Lިy /lp?AxLW *?et~:FChpH}XHymO'%$A009Z"[>@2Xv}֗[d( vj/ ƺf4gaR#v3nڙNՖyO(͋j58 1B qM;jJDb;s78d]S%H7=dn#f9)4Dt$3=-Qcw57a4z24d\O#31x1<3C3MP Kjf0 2)daryoJ(SHج k[V.d"zvQ,4k4'>17MӬU eqZly4k8T2Yz5,ً M5H{BAZׁHK:xq18U<O9hݎۇ .б,\z3[J6R(0i"xUJ3ZD jMY{lj'PE@mx<~VuW3@d84^~ j_a$/4r6[a3 Y%K0ZsviWd@jD9zYH9F XIW=$ 0!@Nh}'nLLdc:J5 -&Jh0`4a]7)|PSl7`.8fltb6˃{J*FС螓 ڇ섂?$-E)TgEj齅n'Ky 性waA7@B ! g[j7(OP+Ⳃ lǜX3c\w[nU:KYWaKǜ z}r>;cRqnOF2}I>ףfIyGmx`LFraᧄL)j0 _rT")+Z%[D%JKfEIJ=Glz,)9%cy@Hcj7䀸=OJv[D<8O"t[ s~4-4ZHB_Kb~xfiqq!X@K dd_.=Qz'P7wݧ::8b)sdHY,aPU$V˕bLlȲS}{a{bH0Ig̫||5OG7uvCM‘cD$1!nO0NTi.qCS5!]Ntk}:vZbŵ2iLE/h0=e[J5W@ ēY֜wecqxi \_p o Zo+m[.iL^t_@F#"!pA~_K0!n*)w@T?BnO{֛7"bF!JE,X8/p1_$:vc&>USFq82mjfӠJxI 7BNh$AkeH ߎ)JFz6&!a rHmMƺcI43$8Ok,Ij`_{dX#ĘwYmz1yɿEp[VCyK4*PqO8"4χ\ ?F1M4Z#"gjC▇{:BJA &e5d(0EԨv28Ƌf>K,f CT5KN-h5"m[4,K)Z狀DB.P0Ƃ 5QЅG`a"NQټQU3tu8.>9 |Ĥµb2HJA.A)5OOCC)9GџtSW4zAo6zfZÐ k)W*iT,;FgS JU׃)06۴<L+BcABUZ[$ĩ/ `,0AM\-V+tJe[#̿cSBi{좬#vзn,&#S$+#boP>< 0ʄItҍl+[Y>U3r()Lo(5#(;yݠ}炳Zsq&h OjNA KIbDϯ=!ks t3kʵ;*vUz,,}D1\!mR`/ O7MjHv'$ƚͧRR\ɦg:}!M=W;h|+jV?m7n~C 9}v wC_y}s _o~Ϳ`~Up7Fl^i@70q߀_2n}<إ_>+H%!v9D֕6o4JLl,7_xϚ3碊^3obrA-UC;xoJ3#R=5F+ E $|s7A0.+Y]\s\:kX9d#ϽrĤK%1+U:zu)z|Tk៷+ Rx (<vfe9[4}LjP\DK4J9A0Ut+9 keW_n4fOwqFL(x9 K)̔&4K[w.TQn8WO":s.ZwCiQUZql<*¤^8=id:b6 \|Ʋt!y,X^ʃz``% вrOmԳdRk!0C"}1 7o7v)I"L-=da#ωZڱn[;L$;Y[16 =Cеc06F=qw{ܮ춠M }NL n`uIЇ97dڳ Pu 19VbGT:2Z8?x dD6 X$k=xY/ٲ7t|;|'۪moKdv7 np::Ϯ겎Rl_-Z'xhDfJ|f)oG2a&[T%}d-s}ZZ!  g'eL$![nXGyP†Dth+{CU%ds$4[.5ev|)bZHةF0?Ջdr%|BP4RԠX|5j .u맥rh (5.1>y ߈>ȸXXZV*yA 94P|PB7A#b%ZGkp/6i{a zqq#[N ҆4_B=ʤ*Pq<' {gP<@69M@caB*k 8)Y6JͲp6ͲZŐMA8͖YBh_CvLmP |z%k'n:9?yv&F#o}LTsw.+)o18Le=eTOgے7 C̜%fL91,df*gkJK#Ipn;nцvv^G›9x~|[%Iͫs Jke;7lKX,h%RܱԳRPV.Vew\A5Vox  rf2%%D$o~b+nd2EVcFfٜu&*> endobj 24 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT9 26 0 R /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT11 28 0 R >> >> endobj 31 0 obj << /Length 32 0 R /Filter /FlateDecode >> stream x]kq>bX^gAMKؑ8(&/]Ҕt+?9էz=f ;S]]usHMHB.m.}!=ӣs;fEK^4i{쳪JM~s\M^_i^LJvMY7'<n;ENb,]'Vˬ~3O+Szx>p_ҿ+_/|*qw߼O">o} O,}r2{#SoezxNN 5jv6w{ru~^۲b.z]ښm jxtM?3?`mUnv0®AGy :2E;"kG4-a6<K0 ~S0] *Y?ݥwlndl{j=0fȢr?:l>Yuf/KeFsLSe2p_3zv /?&5m8c  [4 9Px)9n u֢u=>϶l*x6xiQ'd ,aLg+a fBS;pݼ,&e fp@γLѐ,ΠԐY&)89Իd[[c,&*ORQ)"i@Qp )m>Les6Pp+* %EOSЉ2杻z7bTjnLͺXHzJ}}eCA|? ^M'Ԁ\/YNL1!y֪*պ< a7uTȃ"*ͳۻ3(* \%:}̼!oG 2lf0:z"M}4H2„LW˴xU \&+M$1`R$Upm"pJl@ug})At܃.Wqb J}(qRcdIr>\j)tDg&}8J(˨41r6fE '͖܊.S3~1ّ2rr䢄;<ʽi]Dz3V{Ђcp*k0Q^'<+yAvCM2]e, " [$V[C$ļD/ rBGBk͙>)9 $[2QcLE*x-ӅE!Ax7}d8_axP*Op;]nZKS V Ja-V3 R]\% Bͧ&\-}`r?'Kx*q5R5u "TP)&1 r E8c" #<8?,I9#`Ug(-?@GA돬. 6,urDfl@Y[5_F>l{j%|xH[Y;!,Z9i,+˱MN )e$<3HC `^4ѝμ@AVИ#9s̪+$u]P@DK/J\h=#rU+YW.v#JN`{}d x BͩnS[ye'$ۢ ~mV2٠hVc۵Jvxcl²cRޛ}Bxnˁ$5Kx1v o!V%e@Q((`B^ejI(6SS@5汎;[W.{5ԧK|`ߛL! 1 MIuIG-eib3(Rcz#ڱ|޹6BcLo*;X]YsYݵ1e:bO{ , ^#h,]3[v`VCbf#yB7,m bJEE- hX+7΃WnwB9[;ȬN(VL / | q m,/k5$9/f\5>1Zuqy妣BF z(󟜢Դ_%0TLqD˱x0H. 0e]`"ZҰ7a-B qBL"+JlbdCkha%2mndnPC\,a/iCLmwmvP1 aW|`]t-l neHm`=$KZ >W-s(=5r #,Vbc \#A kU^Œ&]e|(L.98=+or e MnUb@r-} 1/rJY_B,(5HR(`-dE\,YـDPݗ]J=-_@(;@0/sJp2 /_Mw#Ďm? 1%pa#.9PVRCflxn/VXDBCTvdʱ +0)S_O j!k0#EfQ׃>B=HVDty2tcPnYE0uZgd?_)Fh-,%f%O"U ]]Jwpxs/8D.2/ʬ.qɡy#ҪPJ5@߰\~2Zy]NLB:aQ8}E1&dr~,9xlY#1ayr #E! 0] ՓIFE%[5>"٪y1i6& >!M2L 2ZHV2m Z虜e"6-d6M3PzHb""W7 Hsf c 3q ; i1؆7iL\".)HZGgc2ΰ@a3uDcS+.ZYuJ{5Hm:ى,šlӸe0 ۛ-hlknW&_BJ[18X$uJkK,mXK :òQm.:&^ժAgA)vg{ڒ"=Tt؟Jh;-l;.!D\ti)V*o-m,"bPBA~NOAgV88ȟ v f&oY)l5:=1 ^c2NI+>Y(E[Ǵ yr !<|1RXhOB/%qLyNM:xËePxDtuB# 1uL52fA+M)6kp I̡ñox_2R[EPj:3q%Kv^LU<.t)C!AWn9r2[^`&l0,a)Jp\G˛2a|&)_כ>q{ŵ]˶1q3- |z4(psCkgCcRlUKXqCAO2}/X6XA 3d fjIZEjwhk(Ur7DhC(I-nF\;+x9۷P LlL32I fKaF RB ( `hVTh'1;xgڕi˚ +b3!y'[ ixeIgvry?]h S>*Jupb48jʃ[#D@e$E;oc2gBʧ80;;:>\05{np:13 h2N0ȗo a gW\ʂ } YީKJ7u#uLbʞ*RҨ | f{9&ǺEd&b5?%AR3NM1!csuLUF7K(֎'丒inJ.ӗ"aj*1vxK[9:9.T !^OM\O,'=ù`cUoi`0-x _ߡ[-2E1URERaBQ];{>}I Ǒ7v _84rLɱ̽?W{tln^+I^bͭnZ,p.%; ÚއwF}O=|Fi2G"|P!2ul8͈~9ymL8m`@tϟ^eQ|]j>ivpLk/`L)B_ 8D5&83^Zb%Y5ZdCYΫo{a\QS]³ʪKM_w0\4J'B:0lTW^Xۡ>'H< kd-0GU$ߢ*y+8M)UBA$^FD^:"IVij %dKQH4CZVqHH̥`D3,DJ$>akաG x췈-xlHtp:rk@**_WQa9'||V'@tP.NG5jҢ2Z;/8Co]]Wf3yNt,XF1nT˲qKE+9w ;A|WeUZPS&xy$n%2}}cw5$-HSTĎT z(TfXD·YgQ˖(xLj(Ӷ y|X'0;(|*>qf.BtExDt1p||׸AA\-!V<wwz&|~!D!?}l^* >Oye$@[@r0EJƨ-FOmݗN[5exR9GW,7> endobj 33 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT11 28 0 R /TT9 26 0 R /TT4 11 0 R /TT3 10 0 R /TT5 12 0 R /TT12 29 0 R >> >> endobj 35 0 obj << /Length 36 0 R /Filter /FlateDecode >> stream xŝ{qO1%zO:-繳($e8H$EڈN'S)OO~x3=oo9NUmwe⻼t}"?7Ǯʻ==V缬M#΋sV}Sr|>U]<]ctlO!0L1?Y{l ׺>@YH} rCyϝ0(&yC?7ӛRsuy~8>?,Wy]1}W/XCU,'X ·s]7Ze+]FY}Cm*: ˮ AO]'>9vcBN Ռd?QgtA 9 ( Q]׹#0ɏuδ:n&W/ҡoXIW;<ʜ*${`Nqz3W?r[U~x~c'f9۲.1Sz(bZh ex! r.UJ_g//q0p5T 3?F ut/H\< s"4 g""z$~^Abd_]w%f$Ź&( 2rzN6tz%~@@AG ڂ敻ӫdPO,}E!z?sW[Scx(0l G[T}dAߟ\cݕC` ESvxn$ݵS*V_z;y&?| ?|> B,k]meDk_% j%SZIEBY/&*,7XJ?ы~*a32s\6L FvZ)i&{%ɓd=ГIP4yl퉲(@Qዓ$p(`s0FAH\j1wqxERsa{=R0#=@6<'F1 V2I*#kQ+` :7GDk߻^~ȿ^xu78_s4e[{U:ޢ׉ I8=GT8e*,xfS$e1.HHGI]'p7%L'? Ғuw+e__V%Mbʤi~ĮwGS֐n,1ȘX?x>M)1smq4ۜI֦cS ^*AҤlJE3O|j =|QB r#3FrYzܮ(%6I&ܮ\ 1CƌS Św3/Q/Yb Q3nrMyu#/0 #hةcE<)l-/p;S(SLFt2:bO!ye{x% 3PQɠ>ŧP&G甙S_t?Lfޥ~ϓU~ı'&dY rlI%xN?IX:K<&MZ߰{nSޕ>GG8h{;ǢGtm)`0{+ A ),zs͎r٢[VTRm*Ui+9mȤZO n%[JG٤&L&R}7tK:;a; & ս~' 0sK ;WN_<}3p3cD]i\ܓ^'o++l(bSid_< V|)B]L}$5pG~̃%·yGemqMG!bRޱW φ+ hxG 19P)fU m$y]؉`ZmcU-V+.Jſ|.Z]dYds)OѬ}J?#CKuxT=,W<^h@(ȋIEBY3ei)ޤ8f堐CA/tq) rNraT ޭ! (A]1ʝE(7 aNT|vtQҲU-d7w/!]uOٶ 7s\4ĭg.q:'3op6QK&I}VNz+9"PF0Q__,e[UO8O׵/3+} C_nkBɣ0޳]tAe$Wsg?(z>ʯObf@\^sGi*90UʀC♂'ML,0l cWt~\8+:zLrr=Fw~OIz#u1bzpi{2#9{"{RfcUNwzv?M^Xrէr̥aTRyR`]qnm9w=dx-Ng羮mvxD OE(1A8\R=v?Vwׯ'mk]T$evaq<Ĺid9 1}")X(d*o37~5gnXXٞ\ޘŦAě[Ι)n&9:8[P,!jr, 9#R69:zr%'Hi, *hƨ^}8_dÐ +\.NAٲD FHrPvpfrQ9A(Z`?R\M૸'hh>Ӻu Paɭ#J& yq"`LΗNV(uA c1R)y{X,V6TJ´vlWwGe7i=y(U#9Pt2O-q|Z-=6rC2ȆHx:|p6DlJlvR1Z6P uLNjٛ?\IKO~,^֢atlHMƉk/]O([#Il#S/qDi߆aFvn+/=S җk}hR0@  y'0è?]2Z IAXv04HؤfiD%dz7oSoT mB†6H+80ivM 6"JR^~?& dݪ\b[~$1oq F8ўAE}"`nw/]n۽_ Vb/Pb[oK h/choV?B|N䯙pU(.F4<*쌲u /*yր\ngd6L$k~DIJ4 6GR{Q#zjB4CaIKi]IEuy] 㔂xu{&ݽ~yV?\}EeHLc8ztY1p 4# PI034AK Jg;S׻>1$X9Ch| ߹K slϺi/d a gHǛZėեxɒtDl`߱mݰVXLb4RXSvϔgIQ FwDxէ>xHm8QX\ɫB9`+\4JSc^Z񏼜Fo1u׵_Q.$ ֥9Ft4cuSԕdg>K},Lm,z0F/+@]JCS%gر4uW{. Z^YJ"Oɛ{ȟo-k컩E#if}>5!v:4-6P} 'J΋4ygcvI'荰3)fߛX6< ó>XUxw2| o|dЊbAj܎`W^ە{ߌJ!gF_\RSM\_Uk:XlsbKv%u``z4K:+ek1cGf1dcWEITpLmޭڊWr+߽P:3 }Y7ϿƢAYylܽ|Er[s.߻H$NJ6IpH|uJza75ͪ&Ֆ`mjt-gl;0= k i~5^+%Ĭ|6ggR٪>3&e3scc'޴:Sٷ=Tzn(^$ rF}e򕊽-kQVtCn7$47QF |yyw0_ɛx(.,Eb4C2r+*>-Nb]t /jߢݖlWaLH"׈xkNL'E]77Xl,!5J {U3Qe=-B' 9A]HG2R`RR'T*u *i] (t u$I pB KJfʠCVpZyShbHlᬷzޚ\Whet֯@P,Jʦɚ8d }ݲ/ Y˻P1|JBMd-%ob!wE@MMON#'i %jx$9* 1^ݮilsbOoS^c%14`&Lp~}6yKŹ^ y3|b_C&_xmŻ= 5A~me(Q8^-YN\9'(HJ"(b/gyX!4}!^^1V&$24  t1=):cVy& w)BAx_Y%B5f32&ʼnBPF\d)uIߒ+TnD7h-ɶxTp$kK{;V =5+\ mH1XY] osfUVxڤ8dUenp,˶Fɟ(Z AUAen0ҿE7M'*_5LgYђX`U %u}jg 7j̄E-p)T֤{~bAfbDΡ&Ѹ;"Ŧcz6Pu(Kl}dpno ' s^$O6MJA e35BTy:w`/kO!Py:J 6!&oL`AQ]&z`6ڠDvGCERFh$(64Y"lj,(^@uԖYK3;͜pq+$U-`]q L6=-Abm0d|inɏ A7: afdOwSn "gm#gD& |&Ӄm5j i ZҜ3E`̚-޽ N? XKQdkA([H`mfd3yLwMk=ݠ0'&[-vp91I䁟suD.jk ͉^Ep7of8ָoZ-l޿}HfLe褯L.vaFŒBxG/>_ͷޮlk\-|&eRw v/kk08e1 9v0$ IfZ2mO~6G'8dk84Dwv/Im{b44 B')/ KX#ʆ$mZ6Θj!p0_esR0όԊ\o3bXWӥr8}'uVQc€̎3HGxgE"Y $,&ۻ]ή{O\x-&+Up҂'+vղδrz(_JO_bCV&m{JAGh܉#LL8`"~GӺc1';{Ҭ|J9@~Ƌ=ڽEhhBL k@'Mx0%lul "6a$tL!hdgxQ/23,XOIAgG3<176:Zs&k4x!XTR}Ytr;\e\y .MX۬+ŹG8/H]#/*˶f@9H%Qս/3w^D-UjZ*Deܺѱ6H?sM؎hhmG <~Tܥ-.MEoCv尫k}ϑI~c4:] !@aic+GO}'FV~ [h*E(3SҪQ#t¶!q79e[xlIs;YE0Ntt2aNN;[懑Z¬І C\A% @QJbP*zxaGr7NAyEv T>__ endstream endobj 36 0 obj 7637 endobj 34 0 obj << /Type /Page /Parent 3 0 R /Resources 37 0 R /Contents 35 0 R /MediaBox [0 0 612 792] >> endobj 37 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT4 11 0 R /TT9 26 0 R /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT11 28 0 R >> >> endobj 39 0 obj << /Length 40 0 R /Filter /FlateDecode >> stream xmqS")IzصcDzL'e;*0]*GI[hV>e*|z{<)JIŽ===~΋RoN=mOy{*?̏r_y!2;u{js]_cMϟ.$񴡳 yUY^T?bYw8c"o'o_\p8WȳqmBte|w{*WS}xp7'"=z'm~ϯ׿Ο]A .}{C:SǼ=^w_>O:EIO ~ƹ|Б?zr>Rq|v'1Vr{Hق0!V6P_\)硍 nc՞&SR7m&EE[Zl=mAO][3g014| `l f)WqcʞbPLdz#>)KSW>7CYbBG|@ŕr,dn;ko,8+Z*sihdO#A'~h\78S-.3d8Y]Oqgj8"g(Kq^LѶuwV|ѽ!R2*op]}7gc+f-Sy(2z7<M΁Nl#l}W-+9FԬ?csܡ:MKNQ;f b#ط}ub@,8󒁟Up, &bh`{ UwR>,$C>gJ (Q!6(AuN@|UU84On><s@}kO~AXP[.RU|L$unWTy_bHMoNK[Qy_ ? k *@"sۃ #^ "·[uPZJQ=X~`_c Tm!Me[/acq"{ya> D"6-jxq̓#`|>\V q€aLՒ6-@FQ%0ۢcEQEueiX18 1u ~XFC˿e;" yO=R3%&@q>%PΌ2Jv)> 9BlLœKܧBDqPc$g]=@d&0 mb5}Eo}WcmuoSڦ&,>RLS;rpD u56ĶwEi<%o>qJH"+Bf+mBVcGYJ"]ǀVil|.(}x)E3 ޺{_ݩMoyo%{ZUVE Ub!lϳQ7uEiL"p[.|:Z!ZL*~ʱ'RWٲUM%AHLGU,n%Pd8eWrulf&"1NDBR^', L돟,0O5Db-q`OI7PEX֋Ann')oG0 ,^<7x&A0w;GYw&%*=}38Xk4C jB )D36 6A{1 AHZKn6Fv* O=f/;e._&c?+Z[m<$g¡Դ%'VZ-xChY.63[硏 ,QkKڪ]XYݩΌJTZћl(QQ@>5,LSŽ`uI: ũT u1lS1hqZzAA^OٜrzP{ՠK-.M|i =8m+@Mdg/HuX2SILXVeڧfw7uWdnIbx/ %D$a/fJۦ!.ݲ.Y% %Th}p\)p>'8:eEN9QbFAJ-wڟ2l=@NOBՎPPV: 찢 yY9[0/"p|]Դh33OմYG<2G2$j]= qJ&BkqSAse]7tzi| (SX'YOZuTt1%*X#/oݽ?8O)??cʟaEmVq\dW[17WS8+P O{XBb#'˝8…&"GwZk],T*l HTFyLV]B sRg;dSvMNa8I(zNr ی$ O*ox[9d)#5++NDaݟCx6"tޠxdeEC.:#n/X.A[k3xT PungiH@YoFbycn5Z9؃/2jXP/;S?UdnɼҪ:W(au`_;g/6Ofd4CUM#MFI +}{qV~绎nI8խEb+4"K)9jh]vm_S8I3'"0O&}oGwqFa .cЌ:jI9wF ~)jKo8p6sD1V,>xV*J8BF-.h>NS:YdЅ) ;iRڰ9Җ9%h#_!刦r.+,yB"nJ1*6^ΰPBBĹs$Rur5LcR=(](乛J@u9άn!)ʤBڤ! pUمd|Yo_Pž>zެi)Og+IMۈuӴI<"璵m"9*a!{ΊoXD8"憱ͿOrr_AxoX :N~yveQYQLsܬFrEI+s h{\dE7DS艹ePJ8$=~ɻ|ʆ#*M#p*a@QԹG77=׺y|E8% D~/l?0"h ruGG.5dKx. )N= bqr'm!7ΔGiSNJv3ZX--_khź.kzбOm^& txqW@;᫊ANl*T"cr3F% r=E~ {(FNg|2!7e9j{^kBXm,G-GFia^}Mㄮֱqͭ:"Sc@Z\L@s{iu9<ջ/9ˑJ3F~.ZH+_gE]R_"2p&$FVDe 1=`&;ڴntEje*댩Yll΂785ፔp{>s{ ۋ,eɐ!/מlJ+\p pR&c`ۈ-!ۻTU_"eHH:1ELo/g9fIʫu5HrK6SbNrph$떪ڀ q +⽤*˪|%'=%[Co$c/0Oax1D<LJ:gc~- 1) Wəw?K孎m["15 MuV@=%9=\c>ihE}$f{ L"oS,=S1h\A[Ixm5^f"u H.\"m[Y 2t6STbge٬l5[7 zגv,-9VlDV[+΁m5j[h4{6]/wXɚCMQb U/]Hk3ď rMۉ1% cq]:N߫80"O*%ȼǬj&{x,^ҟ^iTRZ(4񎲴QLۉK&؂|::&'~eArl^^.kuOΞQ1A*қv ¨%%dĬTleݔ'!-L0:JZD?@ϔFUeF* O&z Vr7>C~nHL]BLnj"r>JbAGjK׾֓D{`^#LOy!8lk<@dp"Q89jT/X)ⷃ%\jpT N볉,ĸ%?rq?Dq{,/b&`zʤ0 z@cf1cS=6!wAq'Gz?WJ:f&n,M*m*TmF'T6ΊF(0{p:1|. M0唏Pfů}L~y RPʼnԈZb %ly[ lE@NB@j~lCI4(@&sXA~>[G$pv%cJm?$%/>}0X'=m=`L[I-qdvOv^l<~$F{}NΗJ<ŭɷ7XmEB}}F=K ׈Y_Y#˫KRlOA8ٹr8S#s4#%ikAcӂ=u@@{xP8y1e\ i^ddGT{(u j鸲U^߼} 㒾wB endstream endobj 40 0 obj 8324 endobj 38 0 obj << /Type /Page /Parent 3 0 R /Resources 41 0 R /Contents 39 0 R /MediaBox [0 0 612 792] >> endobj 41 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT9 26 0 R /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT11 28 0 R >> >> endobj 43 0 obj << /Length 44 0 R /Filter /FlateDecode >> stream x]{qgEx.DvK;Öt{:OJP~=<XS)n??UMa߽O>{S9ڕE5}cޗcѶ]7'WW]^W/oW)c%?^$5]uX.qX:?Ȕ\ݗ'hrs^v]~Xhw_}LzhspV0y\a7j_n؇IL$CLޭsjH%gyrsn<6pMnK")qt 4%u #xju?}z߶hEu;Ű-@s!Wŕ*lӴ[mmj0u `,ÌYli[fl)0{do+MXg~0X!590pkXjN1b 9UZ} ͗ .8?XZm 60 .˰O73ca7~fCxBN`NP5a"cTܑr'X"Nk>[7c1[9+Td`!t9F`4͍7i+[7*J#~Vr(^+Wje5ñ8:̖"1XP!)2X007C\شźDCmq|U YKCpO8keIߥ8Qu .u[4 b{a0r$׻lh/ǪzLy/]f8HH;d*$C[|j*^8u,@;9ZPM~ x-6.uֵB,vM|ѽ=ug݋h1!i1~Ra(~_ XkњGEG[α.-;k(),G~FjSCF")%t{G0x%ҷ4M_u]Y0DRfݼ]ٔ5~T";#$wi)'bC"ŀԁDllDg'n=55PLgT6}bM@Te; 9:켃eA\_f;k)]SXSU*\sC263@$hO~ƈ?@\{bKW!6A$H$2A#${; Lר_+*,zH?/Q yF߷Bj&i% !sݢzԂrid=@xE^Si_FI2T[ysҋ@BwD( SEV~_sS0'>WJ2l܊NE1tPU{-* /( SMdD8Ym 4e;1: R<rHIAmHo!Ibb$-ž^`}&mM%E.ЦXBV;QY9b5,g:f5,6uIy-:Օ ⹛yq=2uj^Jf-QXr}$cBEyG hY,+u] <YǤ@m`]J06NT9 x=)rG0R񗉍aE8o 4S(0 8a"\}X0C{R b(gRSkI!ye5r*QX; 3ڗG; 3J6ky0e,C0y0F{J(ObW0kLp7e!>m1EO=oY,-3(ī(8vI|)ۃ8e/8J+iO0EJǪ_RPȉ @›k̖%*t[\hcY@G&Ҭ#`k)܉04V[U'm#ӝ 4ׯ%?:>)rLUS VӸ,'?L Uz, b٢w0lT*U9T] <-(ղzlUvN{gf!l)%5=h$K<vR * PTr̨ra!qC.V~3Z[PM;5c<-R3o4c9ůd=7=?BޅYܔ}h`" LaLhEn5f=2GG35&*Xj4#6+T4lI:1D[„i _!ǃ/!4*ۤ"0Vt<ղRZ nĞ b&)^\l(vt9aIh`mC' ^@NE͈"Iڪ77xx8vI.q墛Ҽw2n0yh5ӾV^tM:G /? L$|Q`HI`% +EDHQEh1U?chĠ] :VxuSOQ hBL+Pޅ2dk3"ڢ vd⋩$xL 7F2s~IF+À%d1X&K~jQ2#m\ H/H%\< eJ>B;3|yDi'="Y>YϢvBP\9CX! G<‡F)5,},8| ) no}X?!Ҭ .ΐ9#PvrRP&>Ӄ[H'L~)3*8(9q!11a$a@fvwm=o=k!D) l[sB0Ti ܐ`lqziw GeA$q_0%/6``aRxՉl,K'w 0cOj {Ihq[5nC9z6M&I@(D+1԰8~@6dfa!+2S3 ҩn,AnYRXy&X\)[\\܃G>𺹧C.X2"Pc<%T f*삘:dki HA>H.<4𗴅=y{C$[ؾ)QWkth:`ch.KMX KD|P8=G,{/ |4fAbv^$K8]gAN^TBlOQLSFw }m L&".ӂ-tǚS&+9{P"Y|a(P G,qPhV$[C#hN~Ub 0ӄ;6Xg&oGPF&+:n(6ReYD;T*.5f{ ~9F-؝w`I=q6cעF 9FV'Ik׍I,N ~ %k.$bD,ʚ5c"L#ɧlJ jV7 K`wr::0H8Ϝ&I˦@ o L<V *lJ"'`j8$iCgl o>i%|Q3V&YHbN]CVwGPIh~XQ NG {U)#d;Le)p-m W8`;mF2_)WSFD 4i79^x($eD yˮAz1 d0]]XSA#…E $'%8H(>✆.G\X$<:[ɱƔKy1u Q|$a² oS>,_g_fcZ"K[ue+++y_ea7#ICM۩RZ#1ll5RA@haW4i&|jI3,raZ@Bp) oي#xőoP,bmQQ~fRtI5ٳЈ&v aGk*5>B|S řxp:lCg Key9.Ywb3B#"΋Z+DB Sr2#\x%U{"O/[$9"J24}Y14wf4UчvI H*D f"!vȋ*[@Iv@c̱J%Zri P=ic_A|K߻F'Y 4/aU 'qI밺%6O\#*(3Ug\ q9z9b&TF%!EHfX~ٰxCj ' ZD&4WPIWy}`׫;eݻȍc 4P|"?z!-loA>o//Am|InܽO k8ZHa峀aІ豯]ItmPVnz‰ċcEu4,`;8(T]錀yH`/];Xʂ~ _ў~o[Zvxd{[OS"8m?:Ň.v)PH)sd*wd#';#?Bv[t~۬rGٟ ;}m9B_311]]Yw kݩ{mǨQaAR3btH*"zD\⟈3!qv`gpA۾_<˦0:!c{Sq,ب "s",HfJwbMYTqv*La T`+bՀ #0q To:}D/1Fp3-֞߾uT_ Z{O(ĭE-T$u &e0@wKS ? X`pXe &Q ,v6QyiVSK9J "1,6W|xJn5l;u v`+tj9q-nt2^1Ñy '0;o|!edYMJ[Se cMN2yĭ_82<`1#G? X^!a'*+KHzngqWVܝԶEQQR:mEqv/7'ǽ ["g>&]u\[KB?!+*Kgp+=f*j:,^`($T'!S9Ϯݯ@mRT"=dQl$Gh/dƭ%@Q5X*Jq8m:7~G,ЎT] MҮ<>gpd-*&iψX|flYl$5):0z֣,|Gni + C~t]=YSd"Z%{hZ˜7ˤEJ'ZD6 &<_onª9;,iLНc mUjb29$8#_̅aj,NZt9K@ge6VmC&jEw#9<]V~5ta:!xɚ '`da[۳8 x1P@ք Z@`hm ;~ 6bPx#϶Xa[#:ʝ.,Lkkaݱƒr&k~G.YcSsj}}Z i/0lxضO{gvl[Adcԗ'fQ&j+u"|:yK>΍m30UWRZIPH*gg%IȨz< ΛlLF;V`cJɣ{ME#FpVH6!ܥ|m߹z'q7g8tFU '8-~c1 $iy P^F4'|,v=̧#'PȂBOS8 {<4l2Y+ 9GPY^T@c@ [{Pߍ69 Sx6nD8}ԁ aB='<7.'F6.<>G>$ V_hN#?TGNC^5.jqJ,]waLe"3A>WxѾg+L]5>$!ьUɽ$FJ,oA kx,mˇvp8l+p(o'q`w0:j^oZ,qn_ 9."v Jl[é { ݘr}|! '[tn v1Xڈ XZ{`Le۱vuA endstream endobj 44 0 obj 8970 endobj 42 0 obj << /Type /Page /Parent 3 0 R /Resources 45 0 R /Contents 43 0 R /MediaBox [0 0 612 792] >> endobj 45 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT11 28 0 R /TT9 26 0 R /TT4 11 0 R /TT3 10 0 R /TT5 12 0 R /TT12 29 0 R >> >> endobj 47 0 obj << /Length 48 0 R /Filter /FlateDecode >> stream xmqW,QOb(R9VXU9)Rx&)ڣ|O `"߻ϿʿϋRo?Ϗ_y{,?۶]]=yQmZ?ϿʾO5 ڡ`rnw۲wG <.ۼn;~?R~<a2`=USv`n U7 ʊCF˪.~pɀ9Zg@ W1r[:!]1n+xvl|Gyop6/eYo>5C1Zﵱ^o:H) #lkn@Q$ƲOyJ9ˇ qoE.S<@pqɶk ٶ.!?A '|}B?1 W\8Ja5.׾ ʯVw5bN^,ը4 dVj)]UUBi6E{p QvN#t_ .Ggbzǘ_](f>~3lL {;֎lOlűG08Vi߅QhEէ2ƱHKm-e?]o4ړ$ؓc}g^:[d=C1U~< yD<%[v:61GF)LlH/Wo;D>l!G{(l6(2``~Ǚo^:6s!z4d$yCIL?c("*prtI:fջj1Cfss ABʞ&T++BLz cXQQKHc=jDϾ[)(h Kas5,xz+˟_P졯^7* [ŶbKFL@y7qo N;k2IYj ȬΜ؆$Do\\bfA n!V:g8ک%my$Z=f@aQN1HOx7e1OYնbw096ŞLc AXE.gF#ɨIƇLx ʻ1V^h ڶTnl,6n䏸0_q_JoBnKZm FK)XnFFT>שw`&eTzC0{FL&q))u ,^v;3a.qUD˷%VkhgE"JLT;1r pL;dT4U38Ιm_-_:QZ[+%B1 1T5!+Gfj}EWoe<u=`#/5'G:@o1eHE)uW"abze_GԤքM\9мe`\ꓤUW`*Wu#(4.0~)ѷ(.csT^v4@aoQ' DVښwIjysB%k&Y#112O&\l27̲hQHlb@fM4g.jl͛(yoEW=1|Qftfxs*D4"Y.j[Q=tEy'\vV'qiېUg֢WkYH%/}͌_zR{ww޵R&M$nIf۔b#_QI0SBPa.#RLfzi#Bp)JI mC!_غn~򢅧D֜ +az};KAݔ?0t=lnYcEMI4 Nlߘk:wn})Q&Z*k5<]n.G/Xrͪ֜ ;, `Q׀]&?z2%$&S.)?_Cf9#`)z0,4H3Y[&zʼn4x{N԰WNHA`"o%O'?,9Ν6xȳy38 E٩1 L6Z1pY 9ɍV;U=t0^ӃItr0sFvb!Od:,qtK:jĚĕ \op XKY"XGȕI=9<@2k֑ПC lяs,~;O\ K e|2@Qk3OM;ށMzǁlDjq #'+̊38%8k"(˺^C n(iD>{rN^v}j$,#JL_^ R¢p]sv!lյϝݹOxdM\7p FחtTÚǰ6m%Jw\"{Ms`AZw'[oIõVAJ{棌78>ıozM ]pWCiq oXbIlRO[cR -;6abͱO{,B2:,oqŷr>ZTj3r^`}?X亄m]<0t"DO .*zY]yAիb#pFp52m^rnZ,8Zc -W0Q:a~E[C)K"8[l.p#kfW*L|^s-1!Uw5*Q-%}*X)HQAyӨ6|~dS%NCd+Y`*J)Ŀ> _IYDZSҹaϐY%B+9jB}آ,ғ&2.A&`a=)B >0!󀀽ܟl! `W#e $L*`QP8B87M7}BՕmdL&́} CBd'l`n:JLyY18aėx" [ Z/9G,ʚqФtfs^TL(B@4"RPQ^tfa?ESkܻ"}V]aPaRʚrmƍ>ƈpAG]ς>PTJCJ,5;" sxc gk) ²]m{qF"!C㲄n F_^uz*+P -`Os$&vow |dV+9egv8= Աg"j7%c##_ښQ qT\tS08ϸM?VcCLvVIeb`xb؝c\QdF Y(ԚKl QaRj}˨ݜh/&`~t?i%*FJVQ21/QjDe #hL}O72NPPD'=Z 45gx(H/d0Gh` Ex&ґ!Ba_#v>iJhˌ %?ͤ 7*z-bD,dIT]6qDX5Dna`RayG45,n ?8`1ō%[Y;h$}Yrgtl9EsRIVGa=2$[3uB8ln$sa.gX@8/,,<): xiY?Ip jGKn[=Mߣߏ-Sp/K|9'vL>B,R}c*>%y%Fºu$#BhYd3ud(,sY$t֓f>a7 /+!5&+)Lbv#g7wVWrǕdU#=RUd}~Bǩx2RN$KG‘bZr "ϿhMH <%P*b ߐ.&ovlSAGaGKHx2||R"k2MF)JD#@)QHMgDF9W+Tl^Mk&M_̵$.v{Ic8ϧƥφXjOU~EOV% !wO a"|gE{3ioDxItj,DQPaϼhf['t>8o;6`ϰ8SdIx~Y^Y{5ܵ,A|bRq%#bҲ ӿxX/7HM.`; ?2"0Ә<_V"rP2ǹ +{=$3HI7`hcAxHk2Q$A鍳B9 k& ]@#q3UG'X k@[Ki R }# J}Ol-1(hsY*hR:(Nd'Z&4i c(pnU؍W f.'-~gy1g_JC \خOMhW˒SBkf#f x}YE|Tfixmȇ_lgi>QNAi52!cr_|>C`L PޜdEHV%Z)!MrpTZ 的Vٮ*АXc]Y>M6uڅCB>3tNI('(Q\G{|}d*-;L{[b/?@m~$Fz`#3|XG5&g?#C"̡y~xڬ6rdpV h[0?ԯ4@n0˧$x_d[Fܬ"us d..6ְJ|OlSl'|Idd/"XŊWR3H$M;ĸafw9(.w+? G6./.ӿۅ! ~o:Fr/Nߴ:F9$]4̜1Q?q/ 煓(Ǥa)Q& YyK`!Zp`%pπT)$\jʅ` `7>GF͉\> endobj 49 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT4 11 0 R /TT9 26 0 R /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT11 28 0 R >> >> endobj 52 0 obj << /Length 53 0 R /Filter /FlateDecode >> stream x]mq>1i&yT*ZflJU^?)uGh\t f9vwv4~AFtMw㨆~TwQ=VWTe}UY3U]VZjmח˷e|Ϟ=+/Rh֐R :uﺤ.APuxuAT+/2K ouedXM f+s3aDfH:(s\7<R#uWN cf#x1YݽNv^7M CwY`N;W0Brd|NjjxV7BQzk%zvqSD9Uz `2FˋCbX. tÜ3I~ş\nN7+Y W07u-=>-7h;Z_D0%p:2㍒| ޼?s0Z͂ȷ UC&X,'I80ݍm|087{|E[u}/~f2L%0ssI{ÏEa%u̖(! y1J=Gk0Hp1L>T8A9i'_K[z˦tn瀆EwFrM;d|'m"Z:a99Kl%8Jh]R&RΟ!Ո2G)PgKl?gdbS,f$ɋ2ycR Gi\b EiYٖܓ-CXN-KQTbߪ)ͅ dqaӕ^%\%N>s{ᓽY%eZofPCGF ӽ \ NVX3`ᤣ5M刈XCymbEHR,=ji IFc豾.Y K3H^^)`3=^l4)q^F>P:BMF!pL5]2MǽxK3+Nzcb /v=ve2VS<`|^*h+ͲL`fݴ4M]YsfR_K0"3c8'/, ժ4f_cz3s(߀)Bg]Ϝ3gS3g&n~hj99u|U0}U=4UUפnw@C]Vx~-/$#{yVޘrn*?V+}c:rN(FIF FSyG(3Ȱw Yv^}Biݸ%1% ,V @rl:Uz75djz2d*c#K,h j@=8 =zKj]0ؗOR5}w{m}vQ+Pur  "@`']iL+7<jE' F4ۢ4nX8ANL6rYcQR(0 .F8g]+R'Lɬ)%+D@ٚt&Lo( 4FΑ{#uEN?9$ȶlPjJF/Y+P%v("2`L1xV;gg19l Wi1 MvB_' Bh3K )e8H8Pr_=|[ zpc: BZs grݟ8 h15qG#̔.8İa9R3j˫bFűp~`8 /Hqgx:ه9~CU^S LZu$J{#M_QnX_26+ |e)8<ŀ>뵵(VլNHѺ;PTQS|L!$-PL:$vЯZO~Zn6Ն{vs11G@539;jZk tkσvft6Mq3?4٨i~XڕE :ʚDIT%u,Z].DD ;x) i.&ysKXX(ç 6|_{Huڤ ,)qƐaэEg΀e^HaŶLۀѡ iჳ E|c_˾|aO(gʟ ٰ}"| OX@Rh6,Ϭ9hػPIolx(;PlBKHQa,LȇxDtx4QJ7 ʸmgm " 25KDǂxťYp dO-8<-WܛۗRHe8vh(YʵqVx~-VqtםG ,+ 8Mߺdً|]{> [9k@.m[@HQ%/}k[ߴ\hySx/s+Ũ.o$VϹkkjU>nc2žgeNh*7f٪0V'A1c)~s7T6w)m!f5AgRzڵ0 Yas+[q "wCl߷H3iXUSL,[-QXN%UE~!5J0(ů;1[y#讶6l GȮ'̃^<$hGᤶ5p(% h ?-G~K=Up|<:-3H._MhsË7l/lQ؆2& tR-kD |iv[" +abiTHk{Cvmr*xhgԭGX:066x*m(m' ,}4=h cdo\T$fj ~[;t5c )~zמ^CZ">*n's{c.?ҳA"C, Z᯻F@{n3Del&g3O$ =K04a@9$zD4:;~4`:p0G2TXq}phF×mjnXCQ0: g-rIJ݉F bf~3}PK5̙G@;sDsD`EF$Sdp$2+djp*H'{^Z '.[I+CN"j7]/3"EW4`0gHQ튶9d`#䧮WT6RFB !&Jx^~+M4T\7 |h ́], .>QMc>k}$J(Tr۩eZm'ոoᚴMeY.j%/n}V00A<.}NKSQ=H<@VhVC.cG>N[6)a[_`,3@uW4Ǚ81YqűE?ZH&[IpwqIMvCylJfPVa_t8muZ)I49$0 (@lb"y1&=ۦ3JBHF$qKSl0yĸ\Yľݎ1DYVB:s lab6Y`g3^J2 hx&cq3^V8F j Ss')H 7l:KDں#:rgܚS5iQ}C茎lj ;1=bPmlrJ)PdXLY^˿k;hßm) J Π2GFƎfLC,=G C!}MuHa1]ݧԏAL$5$3m&%X,?|GyvRZ@cH9Aeyqtʪ7պ^yՓz3;,~뫰W a3$bs:DK&;~A(=,(P;/*npN-ߧJ]y.1t$!e\fv7l,2%vO@`b쎎!(\&zn ҪUHv38LaH=g/I jl˷$PPn $/s߳b %|""'RldVLͬțނͦ$@wW}+RRt&* @g#l$ )GiJ$E)(^?>y8@Me1<ICviMYc mV`Ch92&EQg/]wEe8#E|l 翅yZJI m783BDL,.[;\2ƤόqƲ 6fwtɼceQ_ԯ: 鬔8I9#`r1DUxʙ8EIDѓxg7߮6Cs5‚V2#HAΩN89t( c&C.0T'U49 i,bJxl+W=[YyߜQ~B$e77ϾNf 4p.xbäxssYy?n{W-r=_bVvmIܨ2]_BpC D#]xc6N 8qŅ\Lc\ V$ I.F+O0[ڬ-O8@,xfةVu̱jZ<%e܏Qd 4Ve7x'#!}њ.(f$MTۭsh70Yl=ğX69`崰'᭬Җ5aƙe[HLM8/`Q&1?WX0a(@\G7 [)AL,bXd?i;(CVq6DRjSaA9uU7b1lmEi.T.H^I#qM9,"ۇ=9D3[ eoP4D:ٜu€T:h R e8nnF)yM /Kwg gJ\ E@㇜yHVU볚q_7SF .H_@i%\j ^u$y"7\ҁµ\eHm"zuYBM+ \0Izwgɺ ˅Sr^ֱʊ7m0ph4Y"hՓglP\"urPMȂXvj_oRs5~Y0)90OM]s7pW;ePz VBHjP A2 { qׅ+\:# ynSy&mt~z C 0()9D=e6'ɲGUZfq$\qۙ{vTђ\BJe~$5@GMI/Fv9VV{~ $g> endobj 54 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT11 28 0 R /TT7 20 0 R /TT9 26 0 R /TT4 11 0 R /TT3 10 0 R /TT5 12 0 R /TT12 29 0 R >> >> endobj 56 0 obj << /Length 57 0 R /Filter /FlateDecode >> stream x՝[$GqW4Jcӗ  D2EY"@̪[Mc7tWgU=\sU7 `?}z]=Ϯz?~_l3a=:C7atAW|4ab7"ԟv%`Q+:'| ~}p_Z-Oi>p7fH,OiwvLHwJ 2 Bt:~5^gI j{OR7+a/^J |k~{ J+Cͱ'~X CY~ =þb0j6cnL oD0@~)5C,R]o 0]#c 6!豺j3-O`vO`Tpx(PoO`kU_(hg"0=7˺;F?6WO.+ToC fQ:F-wTmuv\؄%D!Xvם11e xlLߙp0~EG8l pusK}y'+G9vDz<է~+~hIY;5Ҧ2teA!P`EEquTAm? g:\y[4P7"7Tb\|w}o]*uElOK Kyo>9ŗqRջ8B iz/ѱ ~ _7rHq$O>/PX׷d2Ah#j|.6Wle}O<6l8Oz꓈> 6ʞd8}c#GKpB( U}}yS4rXO\(cEE0Ȳ:H+Y֮=^ ItƢ;bvhՖ˒!qd:dfsUVCN+[Hȼԉwج"VyOjXSi(ʿ&'qFlWm]MԪ;SOycfξv*^{ය1+G:i}$~Ww1+ff6QfX5E`t:WwrL*& Pe{Q)K7Km(]A2-KɚZ2$ك'#]m8\<VP~Ir^'pV?b ,q^OlFk,aOW ٫Ⱥ;v)m}"$JLH @ jl%Ē3l3Ho S9[-KDak`N'֡Z&$iVh1( eD H8,8+UQ WXS+N[g>|!C-n-spI78tscnd;IIōԌx:u5a,vno|=k¢rQJ%j $ZJggSCJJKkwT3t S D;aqQ X+ t 4$ fpՄ ~@\aB^ Q!o$Kׁ$Q$a{VI 'F'nd=s6J`5R҈\,o"&@wtGfGCd\&>h̽0;ɮTC h-iu]e^xmY0QG ha#1 5XYM`ro_om@w^nw  ]@~|fasn_2w gNZ/Y_lv XF-Ȼ_,i'D(rt@w]'XA`[Źя8qqę8R{GDRݼ%9U,bݽ*yx&NZsmAmncÈ Pl"@ʚ&PĎNY&ηE"'vxǂ2X \ЅۣͱȨ{!No_ob^tl8}ХQF/&xr ,v}w aL 5>3r(fMj)""2lO&k IěˀOH>R9u^\VK| .M7M}S$e)8dnH:m@TPDy27rq.Jwdq@5Am-HyW*ڬl rP[3v"xj m!p2M? d4 仔m$ K6rñ_Va' +Uv!&c9 JI'WP?lp܅܅|Uuuw"j҉S J2(?hI1Ĵw{ Y9șh8Xl;3z ./:V:)ߟ$߿džLѰOfv=*n{U/Ӱ8f!(//bkaI- CWb7o4x̆b덳+z3aVDGvN,\6PyL E`݌=ճ`aU{F.M?=iN6;8T1~Q@m) Al #zj(W[GSy* S9AnXm +Z|w~u4f2匃VBFH y#d ts%'^W6K' QcHyN@nn^@W+0UIEY; _*C0f+Ttе|RGD`3lƳvC0JX.mv&Liy>//\]f# Kpi ZWp# /m>n6WTqp[ő z9߱4۹I Va Pl'Q(UPy590^$',Qm§JCסD!/q }ʕv^|Fv 9Ӟ.],#R .*{)m|L6|Udo^A)d=  D>OŰ NfRm`'-6_7\~\,ۀh09`#s¿ "R6G& +]pMb}Z"e8GSsԄ4 Ž"O8v f5+4m &%%3P4b9A@N]P] 4ܢ>mLkgu)6 g 3XM s^&ܚIeu'j7惛S sXQQ{H1u}B0TއA1>I`;(YիBlS=,=`s3SX"x?  k0kٲkYE(3 Y󚱬kB8ҬEBCG5YWu#ô| qOWF\rBhp8{"0WK] ˸//fi3UOg IK#^5ԓ)WOӍS3U+i,5qpL1٭.)AaU9)[d^S5e ΦµnXN8gwl" YnNA+ s< C(F6Te [LdHg kb|xl mc2ZcɫQ^;b A^MM 8DBP@9"N!қ=[d`Ca˫/D(d|{" q#˪K*= uygz}m.H'h_+%܋ KZ^X_uxg?d9Zjd㷡۠ nCS -Մ>$;,ZCKԕlВ\[XpNI WCS,q=Rlz6F.ܒ/@'1cK(yBUK؊jQ 5~sh{*zA?% ۇ&[[I:q=]M׿R:eFxfW% *kG^X0&toA8.g ZULjZRH_}k d?Y! ])"KHGhvqR ڧq!i2(7ĂW%ȴ6y EcsoQH k]DOlȍoy}@KLYkJ™`@W8 VI\ m71?ɠ =惉$4xΖE|6ol+R Vs@ >\"gGxsax٤8|z-Hr()/Љ,JyǘN9">n^k)cledޏ?C[C-_$S <)đ}N/!lPB0P|dݑ1\}^Uٗn9m Xݽ ]`/Ku |gőPoɆrRG[bypOv] !2QWY տH[Iό&:z^ P\Z?=qcBtZr np/&[Nt.0>t0{D#gEP8Gvmniޕ q<`.5q.CK|& QN ;L>o_Ly%[bOX$y@ITQ笡 i⪬ӻxHzrczB_V^|3L5&]w-G5:s yiN*+6O$Dk-uɰUdV +WoI. vzsHpE1f0ie x-ca0+0|\s>%EzfL+@K`fAl6A!Ҵy9b+@;ѶJ{$w_nQI}'M{0%^ VNGse8Y3 j4GԢ )|8>ֲ QoK"9r!)U]L-ǧ; >7L-%X ez[vy͇0ˢ vl3Aw x]JXH^d ']̄v6p@T%h"̌Vڭnt\(s%b-ӫA X7UtAMteaK3|Hvru\)er@ Wg TDe"(v/pEI(|# z)q%,.jy%mW|ĺ >e(jD9=_,֘#Ͽ-|PSl5?^NVH&+6PR|AQX k"!Ǵ_Ӌb]rDeh ؏[VdU| =Uoc~d?'Va?+ZL- 9~ yWD-<Ⱥ4eq3aeR(EF{'0*_@ 2I 9Fb* )#C-=o zUhg⌋Ƅ/|gR%๫mORPrK^:4ץeEׁ YetLF}A[c endstream endobj 57 0 obj 9994 endobj 55 0 obj << /Type /Page /Parent 51 0 R /Resources 58 0 R /Contents 56 0 R /MediaBox [0 0 612 792] >> endobj 58 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT11 28 0 R /TT9 26 0 R /TT4 11 0 R /TT3 10 0 R /TT5 12 0 R /TT12 29 0 R >> >> endobj 60 0 obj << /Length 61 0 R /Filter /FlateDecode >> stream x]ێɍ}싫)'3n, Hal ?XBRK걬*MK=?{d-Qj-fb2K_O; sݜw>U'S>uea;lWt8*yzm]~\0]_2o\=ߙR*~FZj 0oLkwW3wW1G+3ɧ/?ˢk!ꊡ-+ӍCQuߊ r!2+sQ@0ts7ʀ K Kpmf%ϞkVX} ͺustSbK–t=(W}tſCp@?P5XoYvCuF9%Hl+ysn"|Y$޿vS{жci:so` `;??oamxhK^ո{*c鋖X?`3d`P`T=blYKEAneYR)tUU4e τJ' <|J00s{мu D .b̘󉿄`WN❁pŻ2A2drֽ& @oq8~|ܨت˅;t¢ᥞn3 z%g,Pu-󭽣7P{p8r}4aǔk,HK1]ȓ \d*u@ }B iT$ i8M%U7 iaĔ$(Lb`ޤE"4z#c#ybO(HQ~q/_V͢PGm-U {^wmlRr&JN[ !4r쁀]NA65؏"vg;ImNLptMT潅 CW;۫j:ɭWd;Xx[{|^!>FdSV(rF z <+vYیV%k\rK,b7=ifqʄ-#`S Ʈ=~_šߐåaX8o, ?F|mdPxp!8T +6QcǬ\N~Mz5 :t8C\ A`1m:òbC0$X߳{4dF9(aδ^NH(=(^b84#iZMl(j!IbV^;q k_Q BڄtGMD9L5s;IQ9 WMWcZdG{+k}d3|bxYbǢ~: )Dcҝ Kr0zISc(4QEXrhQgSZ$_mmC(f:8Mfl=48THh&n(j)pH"BV:jĮ=>q fI'eOt'Xl6"a Bh߻dFE{:z`;Ns{:Vnxx:9=<moR۪~`x[&ny[#wNg4#B+Ȑ+_wcÔOp'L7$\VM,]':t͜^-t3~44; 8 RYn|_ /oWŗi*4.EiwSm=a؛{vHU8v@[X<ΥVyE;6i=Ԗ9; y $eV[K˚ -aG (l{h}~=Ӹd\3hFhΥGľ@eSjG~fkF.f x"0QFN

4#s+<>xK~"eK:nq CgkҔ\NjG3H-^+*w$1.45V4bHk&u+DEJk-R Ex Tƽ@r?.O Hɷh^D.!O(p^,$/WI;;aYJ5&6~{/)_'W*zEeўu?5HSںK:ȸS;1/#@}p{~C ET ˸M9\ {MbT/ ɺSOM+ʉ3^lE ҍE>iG\VG!3Cʢ`m3DKY^,U!pDqrhEGI %0{&'cIkZ^5MWF~[e_pO@0Y9A!޲g,k .<r{`T \jO/LI7Lb{63/C5]+҈ǘ,=L*jVK{4yC&s9%_%Nxz<>]9PоI(P\>r1K ltĴ *3ؾ#;!'yC NG=Y[ď@s VVY5vZ8QLlؿU(]D l\(N55h']SWv cyWGϩ.̀2)"]ioPnC |VtKңpsI%)=->KMҗ^3uMQL:`.Yט w_<_)xT׃VR;ZPy6 |yէŇ|#plR)6xZ!0;"(c{ e}U?>kAMZ \î҆Orr;O#4̜gPS*ώg.#lmx,,^k endstream endobj 61 0 obj 5968 endobj 59 0 obj << /Type /Page /Parent 51 0 R /Resources 62 0 R /Contents 60 0 R /MediaBox [0 0 612 792] >> endobj 62 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT2 9 0 R /TT1 8 0 R /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT4 11 0 R >> >> endobj 64 0 obj << /Length 65 0 R /Filter /FlateDecode >> stream x][u~ǯ<,Q L,:UT*e\KBڵcKP+w;=}gɊ`,pߺCCQVx,ݱ~_{q.y,,ÿ{]ո/o}q)X|{i7͏777uQ7?*o^_w|+)~޵r񅬲'  ?+^YF68EڗP&1[E(x["x_s 5vwU|{]ӓl-;m~7M?zIMzn0$rz'i]{4x Ń=ݍ$ Dy9_2v9[281-XW~֓-]]q_es<T]:JxXT`0,uv!j<Ю0m#z!8\ϲրU @ݪ0?P})$- V l JGx(/r 2WDhWe=rBY h.X"f됻| \t>( aٿуzmG CJ燐u9 df,f,f@dyI+5lU,z`ϙMpd]p;YwOC{\eӦwmz1gv,Ir?'X`vDjN4}ߤq٪:vvu)"zĂ'1vwDK&2@~Tk`ߕǗ¬d9*E8}m1z/ RFz]%%6K"pG|诬p{]᎔_ RQXD ,UUY%%P4++qnB^rp(~!5π"? Xl2Zڕ, 31 Ve5'P0v+˭Ue .5T5JG'ʬKjRPqU0< ZI$ntMdر{I:hͬZ҇D=[Q6D%K,Dwh:<AZumJw0tRv"V"fS !Ro5}UD:M58lAiwM#SYA,b^іFqiu#&`@shŭl#i Qσ2_'f*i vqW&0ZJzOvὯ`ƿ?:[3~KͽpD/)4>(6+ FciIH^^W7T%Pi-:p6(lf+53՚R-&r&?0w!~95&TUKCr~+Ū.щb& mQQUٰ\`lq+G夅óĖfT"K4T| coh5(ǡ`4\dZK{ }u@MPvUO(P|T>1(潿I-ײ=p6lO'E?a)o\h́6҈KC˹HP'|l 5mQ~Y3a`,uߕ݂.c@=LeFW()l KũMC)|u2Lm}3jnGuJpi0Yixwzm\p,m)@w~dC%K9MYbgbd|+\f^s6o_6LʀWH)3#3H،@ `VƹuѽG9uP [`׊ Fco04Cr ,_ fSt %|ɉf{"V0<٣Q&`Hyv d`tQL:S`319֣F *!B_Waޝ=C5 .*D|6^ *}g/1rS뀁4zbʱ 2 }F_WJ_U+ o>^bJF tC x T V 4,I%!:3~ /] 0|!YNQ`.̳RRd< 3in@s 2j(Z:!ۃ*qr4)LM;'x2o4jS~$+B(FK"S?79J!QhwjL"{=լ;/e.oANgp5Sk!M˷TqR;Öw4} 1,fͬdޙ\Bq4_7a*q Ia %T’+`EdNmUIQi= ginha~D=>Uc:3WeUsqՒ?V^+(b%K(vq0ip8E`OiAJZ1i8B1@l t╌6FV(Q3v8EʲIR$Y KqJ\GeX조%]i 3>T7fm~+Zdn7;&ҽX$L?h08"?[m#:2%\@w[3JOv,{F̐p7r;R{>}y2"\b҇ZÒ8V% ٨j,mb/C R,fyu#%N elpXnFAKk9Vvb0 M'HVT䘄AϾDU㝩&=fhτEGu)¨hG*b~0?>0)+O}<0O8JY%'<#ŌO{|huᮛn$/a۵߽3N废I2vXC+9 ٢8<m!qr=h?H(5R P$N1>Vqz,y}kym:b(=xbK]3-(R+ǀuni:իvIy'8mJ{} T&Z DPڟQ1 CL*URiX0 3 !ypzT7*\1E8F &{"CUxo`󆳵ا?pFz|p'*`JY9\,x.+TZ8ܶU2:w5.Y k0y'h>4Gm;b~ޖq,ka T;*]~/.ݰۭ:r{R DiezUr3c `$?E}^&sPnHnp5nKEoKȨ8"Zq6b7?OH|ےy8;(W3 ١#^j!tҋRnK^LJ;hl+Pdv 00n~jԕ,emaYj%7 "s[kЍйr3 !='Ԅ#;9Qtd8u3^<8+{ ,g5;:ΣGeW-Ia擽OJOnxuD+v-g/h&pw( Ċ[b.8;U\.qxj y.m+qrC= bntgBrX س%FgĈy0^k$ BO1 1lC>L%fxB`<-8k#7Ur QLn<٠UnȢxf[ yG#,;D;E0h".MeiMLLUPA2onY#ZȪ PY{Ɛ fVCgC=2/WFvw=G $Vz68sYJGM#G=B-` U&㊣>/j6k[[_r]޳dεxMщz#aus<|o#-&H >9d I˨jIQ:^(] a]G^`yƠ20pW#;ԣF.8@-ƃF2Ak9#O yef0lZ(ԡ;t8:A@xzxZLQtHqoD̾zdsZYz(HBu_ǎ84%|hOZ^pۂ |:$TFJx>a6*X&Ck`7agurLƠЎ1vr,6-`/f2EJ ls5>@Dg봄A!G\`!1/J `pBFLZQEp W;l(&OLGNV$,1O0h =/1*Z endstream endobj 65 0 obj 6827 endobj 63 0 obj << /Type /Page /Parent 51 0 R /Resources 66 0 R /Contents 64 0 R /MediaBox [0 0 612 792] >> endobj 66 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT6 19 0 R /TT1 8 0 R /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT13 67 0 R >> >> endobj 69 0 obj << /Length 70 0 R /Filter /FlateDecode >> stream x\ێ}W,"Qdl/~c+*Yl|RNCeTWWtUqީ;kj[ԭ:P?z.Ww*}YK7uU&ҺL.ߪ/qO*^{9*W/f/O;W+%tӺ.OZ߿W?J*-݌7 -kt58i$e)YIRt*%Ih4k;h?T$\`iҺ˺xbPM\m_aB\QhWHkDeZUB Tyۺъ5<4Ӷv0d'2UC].&bha K"d~Vgg1<}/>F_ndHW+~_{=SVXJ,U=pnNI٤NT'VTXz[/6c4]T},] :-Rj΢L0pitO{y9huaXJQ(_v3aVFDADoXl7߹;KLH/n%(WD|˚=jMlQ~9ymLqŜxK=04{;~܍D[Hywc<,:߻|^jϹʙN``0q^-ز'\EcWFk/X􀓪a"5WfĂZ kUf @b_^9D\B'PF+XY,WU\@Je&ì꿃';Ч i?QmuuEVفP4g '&l.ؿgx+:KDVgW|XEle|u\¥mSEK Vt6bNEa=mFmʸM^(e=.h5qxP_- @.-4_'yO4A#Ys1ۘp|-ӝPk>G];UEni> `nWn8F ڲ+*t4Y7eUve|%|͟a&^ \86? 4s)?x7S>1d'X0]udz~2+FL\8N\}fb:/I ?,5]3ҝO}T*ˮXj|\445C` ~);WT ~q¼No!6q^ e// l\?_Wt֥yY5IPx]olޚR;0-Q{ɓ1; n5ߣ3B"=Wr _8@-3?} vR W]#mDQ7(P ̠F>{*<11٢6?X5ʶ$Gg X ٢A|Wy_* FBEȹco!RfCH!=X `Pg ,ļ$e ZV;#`^ (Dn)&͸ Iۘ+{F~Gc~⇫sw+SxsDLv)D}3 {K&ePg.gd_b>:& G.s-;2ǼhIPY|2u o|CG[A)ͰO4 WZv)lU*dr\}! /Xbʟ2/ YQΤgZCi-fxfiQ;7nkNγmwp?a\%yvA_}Lj6qw3P`ux{8c{́9"3ƤǤ+bJ2O8F "^z`dGG :FamVmܘ( Sh͠B^P(d0g`wݑ4p+ԉUirQ?'HC 27)]T>̛MU"^LI'$FOEC%fs:RћRiȧ(=V2d8 KC俴KXE+. DC}j5\ RQlB^Z>w&kV/@!`~sL};(]Xb?Ę|9c0rG$]QgtHQ_$8ay7&5RCyqy޳4n{>#c"^6';2xtuegw:9G#IbV)uXW\|?@T\>;QVŘ.IMĠ{{3V<C3Jм&dir^faHJd;M^EKgB{>"LgR^ @!l:-ʸ 79sZ c>7߼4|&kAlp0[?x\,4?jSc>&'-OdτG⑖6L-}UZkџ0Bu v8,ʉ5|maubn?1հe88FxN>Op^>4KYaA뉥3<26G-5ln7>E9Іlꯞçj䴭k<̂~lH4p) OgܓsDGtIh7^X/aYhnMfH3- frY)"lfxWs 7+3&C+Do ha/Q iEΗyP'BRv%14Լn+3B 㓉!4 g.&671Ϧ~:Zq`6/!Iͨ]vs1bFyO.B:*;,0?a5|)x. sn S v8t{B!4RIK+:DI+Pl3M)kR$+e`$Hݫ"et50F%[Y}ܞ!?2.<χ겊}>sU]9?{l) >wvfñ8C:0m%ӈ ClId+ϑ+HU9\_hWA"x=i\EbYg8*TYNpQHp4&umULc,)/=cְA[tD: c :/TtRRҦ!3 ϩM<MYzklk pL\Ʊm#3۸I 8C+#c;ɯ  g-osCFSWuzJhEi=Hؿf=ŜpřcԈ]y`#AC9Հ(gaJE_Jb(k8y&Ʈ!E ,N&0SܺhGS{\_s@ 6w(TH *(YFnlXBNjc1 W< 1[8cVYIl] MYۓEgHUUZpғXz%o*+auC`x/+<觸=H.Dd+AMtlN"ƆUqyp;(|HM`Edt$ b׌cyZd/p * \\|\4II HPE33R1$Mh%D BppT沶K*r;b\%lHR XbBh]9DAW7kc~0)wQ—!~`Ȝ>@E\tn9$J&J"Dl;GXƻ<"USNrٷY.޽]Yobݳ[Y❬#+Qk:<ȰcH%G#ӛoqJ%JddeMEfKLj1gd߉Y|72r 5Զ}Z>KT)"dҌאd-iؿ` EX̙d@K~? e endstream endobj 70 0 obj 4858 endobj 68 0 obj << /Type /Page /Parent 51 0 R /Resources 71 0 R /Contents 69 0 R /MediaBox [0 0 612 792] >> endobj 71 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT13 67 0 R /TT6 19 0 R >> >> endobj 73 0 obj << /Length 74 0 R /Filter /FlateDecode >> stream x}kqmQΑkS^2|#)S_w(t!%眙BUPmǮ]uܽ>|C}Cw %Na۩ni̔w݇ϟ]==}R}yax:]cGp=iLYA* lsO $_w}/m*W̢Ý1gu`?ww.+  );ex2W_췦Ϸ3R1otf(}+7hW?P?8SVW)W/06_6xa>U\F{qgRcBMW?(/5ӝ9΍ҼxT+akJlUZ!`Q Re8p,@}g[b1to^*3Q^L2}}ӾMW Qp;q|a&"^7=_󸜸ʹWk%\Jmә5W_]JK-#aMk!O*ʼ9x۫_w߼~pYAR3+A=ҏ]*2a@ΧkW*zſU6H8Pˇ.r2I2³awOe/7SS2UoUQK(^U8peb x^^ k l6[Ul-y-pݎ'Rr2Poi8h݄@GAMtmhŦ#OƇ" aAA`{꺚\# ^JTWHfF/uO/oE _8)3q>-;4DHN8 74amU &f/ݿP]&)t9N4ƖEۖ6@}9I'ZKFXuHgqcC|-ɋ͸7#󜆝[km"c OvDq:NWGtݮ) /$)pd>8|FcPwFc>D-$h{pFesNb]Ɗ*'Yv! KH+R yi&<.l5@kQ 2Ŷ(tZm͉{[l?C9~wy2$Y84x;PR<q s@:!ϗ8H$#C5㟇 iI!uH-R qCqJ-ϋ3yj ؋Dn,_;xDB}?M3_Uc9c,1މT1hc5d !PYVcXK, ^ütX6Gyk0aeDJ8<Ǜ)HB6!Z3t.o ?>WT` #JYjaL(*,)CXoD*\}^;.1$P E!bw^oc  #>>bw~O&>̯ r"KS%j:HL.\TM@\ac̛fb8]v}WgѸ£AEçOk5Q=D:K$YS,TϞ3tAh}4k5k c3/ _aSpk?di̗R .q Ґ 6E!]o/WWy\:,(^pmx: T:-/Dl)%i6 %Zx:a2=k1F9XV)_@#˭Fkhڞn%z~i -mCt1Wv$:6qaG=~+(#t~YX=U⏫V\2rZ 5=mK$KZvUK(M`]}hKlP`p_=y+{nS?fS=_c<~:XhZiixښ&b`M@4ù*% ,ep/)3=ąw4(P<*^"u`uA0F`p .~8R%ׯE瘭* >ĨJ$B2Hq[ӈs2N=z!~< ]ɢ͚3- E%1}2Ɗ7ՖLҕn%Ubz?ƃ0=jA{$`g:n۹u!%yy߸4>S51┘kYLc+QiN.y (Z1j}$au;TE,,NJ!QbS:"B5ٜ FoD9/e5uö&1jO y kz%vxFw ST#ɍgyrV?`<@!j X}~ 8APH_gVLSdx|/h,O~<pI̋_W]d1kz|B0e}i,!r$$;@d.]!j#x'0ե' gꆶk35;.,4P#!hw(׃+"|E6J}iZhTVhֿ4Jqr <΀lIcaIi Jr~]U1nX;T"IfmXn371%~D ,g\XX5XP`&][¨ڜą\HEJ1ja9ňJrFŦtDik)3Q΍b9=8^Pі OQg0i-+k.":)Ev '~<>€7hEftE_% Fq?xEYEGRl}^zp4$Љ$nCጿWst%Fq3#S>HZ8q=Y8Gc|40#KT$,f(g)/Ք2i9t hMbFU>x<Ǣ σSX) #JG^Q8nsD?*"Wc3W8 -@ J`q`4g^"X{_p#8|q',@"y gRBy SGK5Rfj9 #@QjS?2Kk4N~ĭ,k^"bK.G )zt=Ubp @5nvq|+ʅ2}CP_toM Okۙ%n-doSAO` 47xH?ֺo$˅h廁|jJ `^fq.k\{IhZWzbOY29a#i_H5y_Lb=@D= `;|tZ9[S! H&.s`xς_` ¼ghj`(2DT?U3nk|Q"Qï]-b= -SgÎ.M1:Q mPgL@v`e않O`<0۰ĵ'T[2w|OCˊvOAHZ1v~N(IỲa;bF |:4[,rFEKd=ji&ںlGsm#׍׻ڪ R#%'=4)!Xx~Mޒ0ޢϊ /'\gYV<\m'm]* v!v/Q5,r .EVA\VDdP`= ($ǩc6'8)QQh: %Z8(dqR{awHf+ Pz St;Ϥ6~邆J-I16 &Vu4 hQl!PhslDB*RR#OzV8Bdb$J("+HlmPC+& 4x@}6[Ѧ @  ViD36cz>E0 i?!'~: 5pRD*n5@#țC#rJ`[G%DB+2 A8+lOg yBo8 'xs鲎p =O8chv^YS0yL7Ulv؅*gRpR1CD+-ٴЄד/=׋bRԢɗwɁ?,c L嚦DSK}:5 6*ʱQ_Px2 )Ys{zUTBTQP -G&P׳,6S1 28 8pU,QPß*#j*j:T-"TFp_{<ս<|>'QO ^`T vw ?}䞾p5`0|#DdOA ;mN#pOh|H[>#YS@ًsC~?H|9nx_1nz8xb6~ &PA9^{LOciS [KeCvHXەLp; 6;^/fnxqIo:aF<;n{%""}nʑf]d~etvOdP&Wv/\˸!vu`FO)@'pnbty DЙhfb¸>`"lJ` ݡDtDJ,m(E3Q ̤ $N"*I$3Ά:(}94K( QC*ɐjjHgL"^Jc0aزބ7u `Gr^a3L\1 "a pt)_lYvck`i Tɮ.3%G7lY*z1\'o-xSٚ.ZOo<|idz]I:i`z &ImDFۈ> 1RA \ JTu'|K!jni9:L I--肤x)LEؗ{m1{?/hu0 '- PK`+kA̬sqFebnNzc—&},S3d_UC e u|v"N7.ew!NJ888Kq2Uq)4B "281VZ ZARDWHH-!zh9HD,bNbS*s3ihRPgU3Q΍NJ64;5Q!tp:pB@f#Pg=P!{+-4@E\Μ@y0 I[~k*#LϭݖiR(qT9hۥs^rc-9NN\$p"8T8@/`@n7>2fr;~W>>G)2Bǝ)>&r)Iorlr>^"Xp|Iq׻W<Q^|Ng$Mz"*:n-{>l OV=9=?.= zZR.yQCOCz^-݉{4m#[:)j( Cfh¼@ DQ8K@^`b@ 0%l X@cqH,s.;^{faN);|q8')q)As:HsM:&",^N`̅M=zH&ls:jeJVb*I(؎&fS"f k]W͹9tMSvmyCw`Biŕ/W+k |]Foq̦9tPXې#nCBA7 qU5>p~@ga@nc8g~# .-Xmwҝq"j R<m`Vߵ[и nbKHLl9&D6N@آ-[~4vA'jZUWڶ-|e ݉4mc6]%t{ MpA@e)nc[CTdyN>H ޼DE.)MNX&ߤRiҊ Pweb)QV'>HOY(=Cp;ZE#G}QlI n@l͹"`u"-vBhgy)BK=x7 8l!D8>EyEPE|p׃{'H0r6(SDx J+fk&!2REc"3{dE##& d@%xJ pIhGp}/\^obҡ 0"'Z_=n WWjl}'I*8ӏ2 INv jv&V/0-щ޸]*7G[HްXHYu d H 7rEw1 >g|YPHձ@$wH .1ŀߣ o1\M3| |!wxb:-M+/b C兙{h*cY6|SI8Y[ix98/wꖺکCJZXU0'n837oﴶAJ/DgLNB)Le(E2d1Sg|i8"N+FNTV6׸Ma尊G4u;C1|2*¼̗24++p{<%x`}QSn2èբE^V-E$Hdph Rxv p y71:Sv=0}+0a 0ƋW1oc<7 0F-0Kk`[0u, c=QFYs6umnNĨ*ʼeY/6qctVh0obk!B/pB^uYoҩg"?./r'upi|9]°릥Kwc؏\%\-5{sy?suF5D9ڱ/ JYt3iwKvK&=vC5:$ɛ"- 8it0YeK&B&jñPҒisRV #b7C82ĊpY 0*vW^֒InDbvhHѨRتBÌ%S'B+I3.ɁwSX.QsWVbl#}r;SȅzK-&yHC1DzV7d&q/RǨǬR6+L`A3 s t?暳kȆfX-š aEP#9(MM727(e;@'\AÙZMrfJ|qS:L%Y.ƍL⊞m*Դu,r~T& қk&! sD823hb3+0& Ba0J /AvL,M`@.0Iln4^R"HS:F1  G xe=MSg'X=s} +p2RGq ߷z}4߂PA ]<\?1J?4-ZZ!Hf`p߯ryw1ϿW t6Zhj4Դ,Wc}q+K.C sGei-._I~KS\D \2>.KLH/-|%SQEDMlI|IᒰrpspVx \b*sL,`K6bK%#IS TG &#JL0S{>2*ĉ,P8<f)zJPKg¼~3a3DL]č[tׯ= \bյ_VK yc0{L}}7 oI(^6PdӵFż3dw]9[K3֎`;`y8"tw~O9{'/?> S/^o~/K#-}΋lPR9p8T;8,7ތ'wtЇ> endstream endobj 74 0 obj 13883 endobj 72 0 obj << /Type /Page /Parent 51 0 R /Resources 75 0 R /Contents 73 0 R /MediaBox [0 0 612 792] >> endobj 75 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /Font << /TT9 26 0 R /TT2 9 0 R /TT1 8 0 R /TT5 12 0 R /TT12 29 0 R /TT3 10 0 R /TT13 67 0 R /TT11 28 0 R >> >> endobj 3 0 obj << /Type /Pages /Parent 76 0 R /Count 8 /Kids [ 2 0 R 15 0 R 21 0 R 30 0 R 34 0 R 38 0 R 42 0 R 46 0 R ] >> endobj 51 0 obj << /Type /Pages /Parent 76 0 R /Count 6 /Kids [ 50 0 R 55 0 R 59 0 R 63 0 R 68 0 R 72 0 R ] >> endobj 76 0 obj << /Type /Pages /MediaBox [0 0 612 792] /Count 14 /Kids [ 3 0 R 51 0 R ] >> endobj 77 0 obj << /Type /Catalog /Pages 76 0 R >> endobj 28 0 obj << /Type /Font /Subtype /TrueType /BaseFont /QUZOGI+Calibri /FontDescriptor 78 0 R /ToUnicode 79 0 R /FirstChar 33 /LastChar 100 /Widths [ 226 459 498 479 335 525 349 305 229 229 525 525 799 391 423 471 525 459 631 579 252 487 527 525 525 303 498 498 498 307 498 460 306 307 303 544 453 452 488 715 395 517 455 662 252 433 498 533 507 507 623 250 498 386 507 615 646 268 525 507 543 250 250 507 855 239 890 567 ] >> endobj 79 0 obj << /Length 80 0 R /Filter /FlateDecode >> stream x]n0E .E`J")xd2Ԓ }i1p= nVL>nI鲴qpЮokO͜mؼtڍ*sn-un:Oҥe_w:qu>kץt_ksJnc[wa޳ߊ99NĎvvynڴ41euZgi>8oKΪ@a+Y"m H@|H@H`zB 5H@Z|@RH@&E;$ ;Ʉ =J/A7hQܔ(X79 rTHxO~${usB$R~K N%1'H(9})H+ KsI RI )7jo`IKU0_9Uʢxe2X j]@6xԕWAs 6Р 2[]@+52,6heD*2dJ C^<~"m*!Z+Z*[ *6i`: e ZM#J}̴!:TA! {gdҀ ${gAs^8{ cziVJ endstream endobj 80 0 obj 626 endobj 78 0 obj << /Type /FontDescriptor /FontName /QUZOGI+Calibri /Flags 4 /FontBBox [-503 -307 1240 964] /ItalicAngle 0 /Ascent 952 /Descent -269 /CapHeight 632 /StemV 0 /XHeight 464 /AvgWidth 521 /MaxWidth 1328 /FontFile2 81 0 R >> endobj 81 0 obj << /Length 82 0 R /Length1 30896 /Filter /FlateDecode >> stream xսw|\6BCO!>BE3B|$ğP?  G! #{B+oxGWBR_o  B&υxUWxYBL !^E!^BHSB?y!N  !O q\A! qT#B)!)!Bq?9#|@硿q_y/p3y8}}N~CwrzG7~W_/8ERLy:oNq9W)pzG3N?~K^/p1q:?9?,g84x(#s:4!ѩg>hS?8=t)Z}ˣ=Npz7^Nct78}_p'vNc|-͜n N7ry=tk9]{ ~Uʽt%+R]鲔)7:/]i/|n7])rr |'.洝6N[7sڔr/]6mg紎ErZï[i3qZi ŜqZǟl<{pwn~ٜfqɩ+J"a3R.f)R+@SS~dN)9Mm)ץ8֚r]jIS)Gh$&N)t<K{˩!egS]> ڔTUcU*SbDV3Ǥ,a);k8K7sJq*7+))/egVE]œBlNY)[K)"_`xigxgxID9ui!m].8 |!3?Owo8 AGqC|Ѳ*wF7_~ |Q-7¯^9~xxO#gB%k/ׄl^yU/@r$>~`~ƴ%|´5i[80CQƱCK@?]0 4^~x xxx$]Cwp̓^o^w^w^Bܝm7q-͆i 0 h^#r] Z{ew}ƽԸ7w{ޤCcӽ{]ݗtypGdtUr\wo.N[T"m#eӶ-[n&[flٿjlwHd 5 <%j'l16wotpcԭ^}pUʺ+.^V{IEu} u/=޺8Nguu<=nZ4O=κݓں[xeˊd6Ӳ$$H'wU$<@8 Zyn䗭W|RWXf^3-,m#',mYyZ8V:Yְ9+6jVUJZqHc"'-cj۬Ybfٓ4#=|i61lӍRܖ4F(V~l YǞmP57̞HLԑu3kfd׼~͵{J;@]xo$'g wr~$${C&&nݾ5ض nK(,m ;i8oV(w_p_8@PD{& KWe~`p)v;v` l6:"`-X V+2`)X ,}`>0z f] `:0 L&@LڀVh&$4qXZJe@)P ( | @ @! @> x76 X3`:@ h50O "!pO߁> | x#~  5+/77ׁ^~  ~ "cG)࿀'g' p 8 0OO <|xx6p7p-Nv6V-M7u5UdJ+ˁˀ>R`/ .v;6`+ l6:"`-X V+2`)X ,}`>0z f34` 0:v`-@3YL?^?oBc7 d-JFr+y,%W@E H?yD޺%$#$d2*Va_öA ʵf5th,W 1װt UOJ?rAf.2' HYL j#%VADhB[gm$Fl# zk&ĎmVI.!n|Pb.%v'\J!g.#+J0\IB]C%!}躑 oȿ7^pfr3|6r;| /ީM#̰+nGI!?&Gȓ)rT2ؖ[DebMQO̭cZ,2 ]>ꊋ3vdֻg2LHlpWbYe@_}h}5A|22hWF}@9.”`0!n?N'''$EɣTcO52J8y@ yDKC1?@sSYpEq?ÃM9_gEcg>SYAA3%B<Fٳ\?$K4"ɲDo9)ّclkuqվK#%Y7_~9xՌ8kA aK2N䒌1[pHfئ~Wl"6"p5zKxlk,^ע`+sT;P $U=Go=г|exqp9HO0ڟzc=+zY.l TBھt8"=RPeH>bိ_Ã,G' [2g0u}q16(HAn5Hfvid|yK덈Չq/]b8c,&]Λ=8HX$(ؤ#'Ju_5+_<.V5k,$1<򄪅,Q}D?OP|9"OhBBrpnr$Ī:@r!2^~,PUdx6<f]E?~C~HwiXN9Op}tIO(· wB~#^\r;o`aK=Or|n_w9`|"`p \A[!Ą7Q҂-'$I /DHxHHIqUԓ}IûPŤw-"%HԠ7x%e$EJƐ j$j\iTzE,VW}nS] FOW;_ð6>ij0ܼXWZk[i'fG㈳+VGwomDmY Y>4|[#y ;: ϣaf2lTOid3 4#G--pI$Pڜ$@)vZsl%7•4eCqԗeo{O^חU{cʩ=jWHZK)5RuU4.6ҩxާ&ɗ3Ge"&Iz*\았3L왎nu7ijjrחٽvH{LOq?V <b|9*[XN<^SKژUmQ[^8ԫ6qlpƲTGS*??) XTo{5Kz^=Ѣexn2(BM#/&aN ۬Ì ; KwIwbvr1;\N.~ZhLLJp8CD93+%[Q2?3F;HjIә&ӲU9FuYThN^USEtiUYav*lsGIoaƷݶ“PT =י:n?ukWUkl>UO}S ܑp֫߳&{5Phw$J iv:`u`%x dl0+%!85=lcJY4>DuQ"P$ vTi>~8%ޮ#U=o~=Zssϳ ) e@~&GJ*+OSJ^8#x %d?AR߷y LiFl_,%vQc\{TnVBiάSДMJ=M:A5txjuk,#`ӦA%gHw>Y0uf Vҭ0L,o#l"iNf2#g1R3mtbDo#*< Ӧa=-^I+tHj{ʙTkmA3hAk֪P=FR4/IfgeY}X g@ ِ =3y.F|k&`%`&ǕApY R᜜Ԁ>@ S\x Y$Xi^;\Pkjc]z;kYJS3iMu w7zlŸv}ķѰ`1w|МyրAЙK|`!ш֔܈R5Cn<+珺tT,B`}9NjЪ_ 5tV_sǚӽOye1!X1° X1r"OM4 <Kl39OHϭwGGfJ@GH7nj޲2Cϧ44F)_d1 , , , , , ,=$,95]F\S tŀɁT%P0g׏/d#Q9lTU*ب:Ԣ9c~p7XSg G}/ ܝLBJBFJS#!VlflflLdz6G:јL!wiT X^X}F ;V3gIQY?ع}{ڪ\%l.ji%s }^0ia֏oIlZVIE-KU-ZB6?)l3@)siiӤJmݔɤw<,p$ب &j" 25'ri^M:<ސvxɇʨqQQ]8{Va,?c&&o>RURfѥZf*oye0M_lyª .Kbܴ1fWMYjScg4|F^zA?N&`l$w3L0L0jX1ՄA8H:]tJEҎoEn)cY kx,;OcZCA:yȟaVqJO|s 44#6i4)ȟISZgƋG&Յ<0SglRHP-G0Yt#T%*(wgj2/O4M8w׈R9csg]_sn5VϬىS95*IƲUYk7UI/ L"#90ڜXvFec*go޵oNv>Xvvļi**ڌ-i*l5ֽ }Co"45QC& ,ثf+t'P)E ֧&!'nCMuT.`mJ=z*=df$yfG}:?Vs(iXQߨ\vs_-73cX0`s tWIF-{{j'l 7R0R)}$>PXM{;&V5wܲݨ`\i d֕ ﲲSa mp@q&`exޝ3!nc\fS:-~CŘo'&QA5bz:IŠZ|P*Զ+̤5jdIklNn|dKø,[{⒇Kv_И_~t9[k;N9w v֖q^~[ӫ;s.Oyl*/i+2+<ʋj@y353)v6=Sф*8U vN%*WhVZlaͼhݕJ}F"tFa}I > w&0&:ިUv5,ȹ?sH3`-fNjr§*uXݲ&ݎIMҡL؜tʤ&*XVݣPv:dlu)Ӄ*k\ղ҄{L!J*DlB"=+z"6\S4ZWY݋?k/Le]_Fga̛OTN$N%t Ñ̒gof$<^>=^L0qNٲM̻b2E]W^Bvv7iAPqzxvOEySt]/bF;Ưn k)>KejF ǚbccUf$Tn% -X)`+(E|DWTJmf:jJxoPDk,O־YaD)_6xlCaur@HJ#Ź=]}hz[;V ࿱z ;lI+ T9$K~RQX4*w`A5Vj)Rj$O-ҪnTXFe$+\>)RtH굚7ZF٠(1vlu lwU!9x=؃zPEnOjh:&S1lÏ%lM?M7F֚oiWɹ^F7lyvw$JJ*"1RFlmJ-_-#| qP`~i+`Ԉ[-JӠ0.? +jR;1$:J:J46G[`2k$T^*MW"2m;eUޔ:u`]A)n.6ҲjY^OM;妎ޖr[IIs/R8c(BznG@ل1-EN4SD %<Y6f/RfGerMCF6rl0zmXcbRPYffx3l}i}4b; -f  ՄGYMA 77ѸƵH(+0X)`_25Z`QKO `xIYM&Ӕ~x)3f3LlƑ߲{j>\dq5-`k["_=y⥇;{:._Z_򩝗/Zx9lsW6 ؆3Bmb6X,4JqA As:l_ _mޭͅ-Faqmᔩ]%K0BVho[+&rbFMP K S|j{e9 Ǥ{XIZ kݓ 6&X`gnKs$PMe8V,M)(̝yʆ._ tֱm*~W#jh[qYFa#b- Z'-v,+aa)z>fsh{tI4w` G`Jjf XI:.&Xl@'AX">#nNP{h1ؼkiG$^f Q%Gi&4g&L奞]\SjE%* EA5|Fz) /'Me&OoSգaĵO°X Q AdŰ"dE,AD` )x N&_)d*}g b&̗\~[:=p'[2K-ش8j),> 7zEzr綖{ *hSWROJ=3Э1:2h95eEBwArfd>C~{ܰ`$ū 99㪗tn6qʳ "9EfMtzxBbd*°)X R!KX!4y%gb31lUgNή2lxw=9/;$h72&gSZE Km˓ٗZl^1{k'ys\:^c5yX<,X. ҆Ez^mt|3|^*jg%(|64Rlh%l8F™*V30i0Ž'n>GۛmLC.񖵗7nW~d0{ڸU-rxi鋚z"Vnا87cK5ae-'/LC\2EE@l'Gx'bTa6Z9CsshI̍҈oGi9vw{4ZI=hUt q҄{D :@72!>e`+c} B{֓l( 3uL:Nޯʻ$K*s *cemfǰ'9 zCv|Jo0i=T:AkreL%|&G=eȬ]=|V}%J9%HXϯJiS!Tz=rȌ!I: Y))D혌*#W'c`;J:`-RۏBV()m*JYi-a®Gh]1¦,W11=CYkVz _V<ۏq~QRٺ/up(6~&z;S_3uGc.*=sHIٹH$MN =F8*YcЦt2酬(pKf'Aɬ -5)_OKnPx;gaj:̼Ű_/k%2_ubW,n;ǩǥ*wjT"vIGolPKTƞʵkGlve*A^Yx=֛6ѨNKxr5|B)CTeC>[‹GhV01yFcDEA塇Eh\k46=uvƊ3ح>If&mjְL)O.enz-z=yuϺ,E^F}*+b IxE=s#3jͼ*߆y<3{656Vݕ⚊%A'(:mRs䡹F@-G3JnFO۬Nάj]ސMCIjkB&ՅXN뵅Tky}YVUS-f[Դ`94PͩvU>Qqֽ+](͎P$pv5gGaj /xNViNDW6&ZDshӠԜtLj:6TD5Ɓp Fl؊V63 Ō^jj./W}2HEeZ$/=eW0`EO֥ AjKP]{ϳ9JxxCޔdE(.񚩕}Wwʁ SYssoIoͼyMh¦lPtt;݅UVluZ-S?YW\a\75%]xX" ʾ#_r 7-'y%t:6@K:NH+~82zR$*+!|jTI6Ee'6,ꃖ:W4!i4( )%|͛f&Q6~kɨ9j9z`L͇:^QZ[/n7!g3iUط㋛a-=(ӍX1DfYnUi:6yA1c'b0cd\#J^WԣhJgg/=f!s86*>l8𥍏ֵ)xa5FCU+/殩wdUϨR=^4v}IWo<-UbǨBW5SÂo\HLm)9Bn8,ܘz7=MC@pK7Z_,ȊR7lt[$a*䊯]iBPYFư][pBCc[VNvT'M FrT]܅#;. ~ԲLS}}fTtdv5Hcw;Omqfנ;Ũ5MZ޼4kϜ=,_8[.G:;Q`,&{a[}[iYٸ)l IhD,-H߮=R8uQ~W(7^^5 U:^pg]n,)/xG50nW6ԛ 5fPfN=FQGa 2 (+YH{bL&0hgcd;>}A.2;5G=^<.5ZrA8\\by99&OSߕ0Z}N)l2X5 B26X>2B#۽vx`LC{k7\ti F_YvÏ5c;(FC,Fw2kH[F)xlàJcМѦWalڇrQvR=2ml,ϒۤiec,i[(J9`!']OH4/Yf5kq endstream endobj 82 0 obj 17692 endobj 8 0 obj << /Type /Font /Subtype /TrueType /BaseFont /PAQCUR+TimesNewRomanPS-BoldMT /FontDescriptor 83 0 R /Encoding /MacRomanEncoding /FirstChar 32 /LastChar 121 /Widths [ 250 0 0 0 0 0 0 278 0 0 0 0 0 333 250 0 0 500 500 500 500 0 0 0 0 0 333 0 0 0 0 0 0 722 667 722 722 667 0 0 0 389 0 0 0 0 0 0 611 0 0 556 667 722 0 0 0 0 0 0 0 0 0 0 0 500 556 444 556 444 333 500 556 278 0 0 278 833 556 500 556 556 444 389 333 556 0 0 500 500 ] >> endobj 83 0 obj << /Type /FontDescriptor /FontName /PAQCUR+TimesNewRomanPS-BoldMT /Flags 32 /FontBBox [-558 -307 2000 1026] /ItalicAngle 0 /Ascent 891 /Descent -216 /CapHeight 662 /StemV 0 /Leading 42 /XHeight 457 /AvgWidth 427 /MaxWidth 2000 /FontFile2 84 0 R >> endobj 84 0 obj << /Length 85 0 R /Length1 26032 /Filter /FlateDecode >> stream xy|T?|νw̚;3wfd2Y& YHna EMHAZ}V2LhM%m_-VMϝ^&|v=sg=W$&2Hx"|Y?Q.i +6m · W_ry>_!\rU|ZB;V\vq>OY|f/xu>|u+/|S'G>tŲWۯ,_aTʩGˆ+]NQWPb!eJ?H#E4#I&q5^mٴZ?yu( g2bU^i*p6?&jN_%U!]92DkAX7#,EYe]]%σ+`m,_B9jrDer{*Y!rAmUr^ev6rnX/\'FfKfj7M5[hB!|m$aAX"Gа!tUJ@@D1tW-_mbbj15*e2*;V|FY+1ǕRbk5d֐Of Ҍ j)xBzL#/ " |%GBrăYc*U)Q:;"*"=(ӭd+m巪 [m::VZ)eQz\/ɑ @k~;y{nn-wDm'<9Jy;X'MctG;'GSRx__Y[.i6T_ow*OX >cqȗec!#۸ט5GfD39Ti5ݚMkFiK.q#w+)1%#G<+t W2.%%!.g) }F)y q9#HE~Ļ8ra<*G9!r$J?(Dzߋ8Fd) q9K!HJю Jj/.CYR+#RRrI7=.E17#Sr"bA,s Bs<Aʥ-V!t|]r#܃탹yP_uR A# ?҈:"u\GWIX/OsAX-("9rAstapn]<1xL?V*#J|_V(#(-nOC%^+[ףoG͏EG$Fa1)bňوyIļ0ba]1sӋPv_28d~4d ChNA2h߯/fbȘsLœpyC.$zp\g (usrλ=g1h%n@1tEȩs:qNUsr21'Yšܪo{WR̺!q9\g3Z:=@h 9hG%%Os Gs[u77sN<+=AR+pCӏ\'?יX435K~MW2eU T/z^RT/!Jϳr %3Y'-f>"3hh+ݯ"h֔K1ט[DnZos5S(ab#)xܪ`nU;݉1;j'Mʠlk%!{HV)C68}8~ha͉*8(~Թ\[04^&$e U' 2fq8c`U}m9a$qLA.4ΞnNlo ^s!/5sxKpUw}ʈ/JL<%sUnR*:V#P*0JZV}Jj_ zLzr"m+M6 kZήtIgtJ鈎prlNbi`P3bQC+rsm&k'NK.O=tnvd<=u4L gR+璹Z=hndQ0dwۺJKo765{M3O)>#*'7쳁l%KLzfK ->]ƭmqCt5wY9уf J3]f4㖐& KjFx&D5,P-Vmg7omJ66ѷL`xR_F˔fl<ǭBӽ2J¯WUSU}R<8V6_CϙHo}'D޶i';<&V+V3levrF iFhtfӥIwviٌ7Lڳvyִg:ƞ5_￞5g g g͗+Ϛ{N+ս_GZ{0 qFiu M 4=}=YԚ5#0JZXHUYPl\OOU (Id} oq㕸0'6'ձdRB -f_o$+̷%m;;=kf1;ٳ$hܹa#2T)bdݳ* VAD<%{A&p჉9_/%:SEryBoleב@%]pE!fTҙ6 Ho!wt]COSycYH&_3i j5Pi=l'o$YLPE/]rd ]L/,$zyLN[J=Grj7N>Mѿ0(WaT͜\ %Awy$Zͷ %ŵd_1vxL>%+V,Vtr 2N8>y ͤ''j&!ZEkf.I\7ߧ}r7HkĝkMf"9JHFƨwqg3߀w_ߧUS@ڦ/&묟r҉^l*婟t:=-t7~>IQ!pspGrGQ>'#w/L|2iLNVM|x1e I46K ~7|8?@~F~uwL;A>|A5XM^(B%ZDKvnzI憐 %G8_pK/y/-/UՕ{NS mL{ߛ k&3Ys2knfb2d 8?c&pc!f$'|cfڰZJmz#A-( K=FߧZYx.BK m .Gs^m%N>ZW:T~޼C=[TB}Q?PD}B=[fX֠vA-ݡ}F{D{/S M/Tt{>wV މ\ cz浻~ tp7KM[G| ~xLmT7ٌq~;i}h7Rr'G;ә{9uν8w'+i-Fw1y|N=uƫJrIS:DN+f.9t>M'_ U*?8_  D}FXu5= F&zktνqc}upF~F#'oi~G"a'An*DKpzS!&5nhdFdj:HprI"yruΣp/xZ?1@z@xhVb57wUPO3MTGrR#DW%Zo~Z0Ȱ˸eF}<|8X܀^n$ 9 t yug.9< xBRH>:ۈ1>{{gpIe\ b'Ԑ.d Iǚ@Z_}}XlUԩߧ)?9[ÿL]EFa{TOBoρuMB2p aܿ$9Y?M҂MVg*+ed(JpH  >*p:6j1NQc׈vif_(˪RGGe(XvVA_6_o k-e\_-|KLK*Ic4.?!Hcʎ)N%[IqCݳzF(KBٙVl*6m!UJHʺDnI3ڳ^ >־lp'Um+Y”҄)jڲZ15Y -tdYޗ4],]lIw_>ڳ$;#rUC]~v 8ܹ=ݻ{aCO\lfΙx혩r7tgx$LVC^jV\ۇ̒s6s>|h8v.fԳlF~'y!~&U_bJg'V:%4g,ecfge![;Mcidi\=we/ƌv H v~F^lD>#3K-KNgl" s16)Ta_$]4 mX&ˑ.Cd4.Ռ)8 9s{|yZHAV?o\Y^{4wPξU;wrzP uSs(c)+XKiL)_,ꋇ:Jff|chv.|ukdS;׆g]]xNf9S ٷsr)$H;A)6÷3oXi/[.>XE9rm}=>UJ3M`Ya8R?$2U)J~bJY(Y1]OmQncIr>:r;,Dza*SIp!>@JTjY7| p+;$gĽKOM|Ó/Lҝc+\6fuG]'Bi$dSl kZeo' fCH6Jf.[޾l(}9o;BzKBC>q0TNQq-E V/(ZqŪ#jքB"6db鲮ZNuù1O'NaA/gIڛĀmN \]UYSSmRD[fv5 vO/4+9ȽrQ:P7zoTH>r"ӽPJðw0s©ށΓ'Hss27d׊\X=@7n`4R_P`֬=abkVgCVx(%p5Wg\հV*?u߼΋~%Y9<]֒D]Z{mG}C/ؓ cEm]ܜfǽgO;BΩM+װZ$lUP{UZf h/~~P}fĞjaz/1R`üE[D(JmV7uR ʎL[zΓ@Xɱ^中.5Oso,O'u18.Zĝ-Ј#EyucV֠ M7/}y;y^y&&_Pך Wϛv{Z ]xUz so{_M]W\b=rb.|4=f(X7:tf8LNLPh*r~ n٤jLGzt}'jVqVT>+a("^gW$SooFN5bd^T]'fZKAlUb6lmډeLk(-?ʹ}E_?/rCċS&^#Mv/}x=e9A0;q6:\qSExFY`3Z.U&*ipEA{yѨđ$ c{n9]@\4j *$\X)\X& h%mNEݒ5dc`"Z]uH$Z&K!<ƏǡbCq9ޅl\r h@=ub6 kn1>9lu햲aג=y0F|{vF+46j!@4IZMrقM`1ZxQp]a?93S~K(q[k/b1=/V\Bз5ΩB=O7o6?yppߧ6iWrpʟ?*U韃DNwCFEP@XCgD*() W$H,wbzg%t:zMWE\QjVV SPxz76G3Tg[AR&qeu@l c,c^+H!*w: Fre2\1M4&BiaQR[aOEʤ:&$)4b. ش1abRa&"=FaU=0CaEhԬTki)9~_s3z?ܱcֶhmM);xsLvE]$v/H謬ܥ72ٱp 3EiqM B jPAɧ8:}n(]jԾٶپi7h\$j)tſEՑ!_-^dqتY(DSF2nQ z4m6_+JEduzA4pN$|jg{, I1s7azBلzN㏛]"2z,1cI6dhP$h)٬hE!ѪJRykz=ۉ}/`#KV}Db6įqU~]2MCp;P/I /H`QE8V{.=T2\ĶmwhVXDt%"Y-\':{\IW]Zt yN?HZ+)8˝(;˝TɍLu1Α98MqQIQ|4w[DcI! I>DEB%QDbgS]yjbU4M0Q1ʬmeվ+I.lN.H.MKnMJ>%o\.哫hE3gn SCYS;;> @zLX㔼 U^}D)p: )EMhM*㤼5<(jRT3C+߰QM92 /.K5D}T5}RMط7S5qce({`ba]R aM{0h/1W$;TX-l6\$ 쑈?7dg~d4@I=3Zn j)" YH+pM=`2 M dfbA0 $bGAxCUCmudk;r,FG~ M;g O[^b[{NOQ%tBm$BCMD23\H2;)+,8 :\G|vy2e&&uLX%l ]?pVDtsc~1ݧV XJP>43Q72{`-Y4lj1߾]@^m˶m7q&ne+9"ܝ;8 CZє\)0PUԔ{dʚJlH(-I$/)Q#Lf8q8;#fdFjC1$R Z\L|=SPVZh,6Y7>a|U,U ?⭦8Ay`Չ0BPt@(ӺPw/$Oa"N@[bz+H^WWG6%HYZI0⍼ēkk[ܜ'ƟX=LCz*j,^13*x;@/*DaU% s&rWoJ=v/6}A3DlG/eEW^}˳\8L-lV@@!!e"~{&#] V%O( D"P D9~X4!GtVwOWА˳{BHCbU-4wW`BS`$ɼ&iNzA %gS,p ɦ6ECgėtէy\80RFyQQ =dй66:^5VVB&KdiIcsUGxUΪ몾QHw^yTUY48uaJݕ:km={RcouzίNb=J7tpt%Ɍ#zzy|-BaIbiTU:c\p[U!0=" r`'75qMON]͓~OZTR,N+W.uZNvgVܬIc']nSBCJ{LS?yv(-jʱנNhJzt}0y}bVN/ p[ 1A&|m 5DMS5tx̯ce)ÖY?K@ ,%\h1fk2x480cҊPu/ŎVUnZ;LrqKW|7ybp}] T,;.Z[[{ dqEf&W 1ϜҺY kj$@INPܤ.:xmQߏekqѲjn5#4"k}fnn5陮D.*/[-)іX,7a-OKF-?q f yB ħXu]Z]dC$ߔ=5U2Ԕ#`6[硽9Tfu, }FbH Ab%M E_cZ']F=~RJwf= [ŭG5pCPb6BÑ/gI}n{o˛7T'5$6a^C슚V->M5+UO,t}[I'b|gr?ޡ- UKMM;`ј-8sq ǞaG^yFmCe l"* ~-4;|bGX BR;Վ(dvdhiw(Rrb:OW9jm : 2F+ @+ >"K62R:[Luazዺ6IgT7N-~s]C?<' LZ!KA2@d ¾dңi?7ͥE2zF-vÔ b $H\8}'(;Oy9C4őI1Nc3"xIl"ZdM*^H"Ik-[[kRw0S RˠukPw:OY̭@]W^FЖTP,X2/5Դ+g9zVnЛ[Ϲ4扥3)wq|/޼ߴƗdK.u ~tq*{Ac$Dn}׹ι`gS3<~o~N? [ /q>_WnEuz}-C.BgiܡU{!Qbkdo$ZC?&dђaPت/i,|QEB%ysRh=e՘FczS)NS$[,d~[L [ Fd] Ӆm)?H$}4Ƽc[LvP6*~|~q"7^zwc)V e ×vk {ox寴ZW쑀V*vLѤDNSCrV5VHb[&M&;~R5Y>& hrS)@lҚ|QuןK"2`dwIwR4XQlf7f& 1(K*3!Өa*7 v`ѳSwfy+K$|^ Xpg-6 'N @RRN%O~R)cc $s=\AI 6mRuUu~=[ fY 1g6W;ouLLרL;PǯQ/Xޞ=dZ&/Y }W-4Ήr2gߔ4]kTeF?a0T5u|d[/{Oj3ְăFM%I}XMIe@Tɔ7QV=ddy\t2uj7i~/=LVܟֶv  mvtx'Gk8*^4nF.)mq"ksZ[V#,BD(Ua jϔP f d){R$Sfk]|Wh)WzU'eEݞhd-ށ/ZƔb|T#l&VS:SÙdn 0\@̀3ØJp 0v $*OsIjn@<q|Jm VIЗo҆ g+s4SkϻG_t \}<;kAMcW*tpݦo95 }$m+n WT\P^6+obo4]9|+j)69m^[Ry KCOsƺ26L*-jƥkTpDHh"BƾS∊a]))fI*aw 8*JVYFalЉؔM8r!8  DEň)D("n,2G!Q'8b2*e˸tY?DS5{!t@@t@2Gɫq" iMD뭶>@Ў1(>yS[YXR`\`i0_;s7+S*10eea6=Wr0ma'L,2X*v*GXO=X\ z^Y9ligAsJݞ+:& ^[Q]}ݗVkQmط>!\>aERڌ."RR_]YhBL%$OT,<|¯25}Q.m=H!J̗L}-{v&W_ I: өLMwb ]AN AU:܅HxP?ƃÙ fBMc|J5#KA׏8|ie񅲼xemڦU}R N4WԪQ.^E:EAfZ?AT h MwKaVdd11P*Q 0Ġ,EY#݅SF{Y;7wGj;"a`,cS0tz#"חWhŦ3/x9{y<߼s&7_gqneCVgu৺fi35a޽} "miR$HT,yQhmTl@ZNHQq$i#TU~Kj0tȐI0@S IsvhI">+V|#W茊ּCfK"qwriPjԼJ뇑?46(&_T$i D&ҝXV|RĽ_tEYE:Xɼ/,Y=P8Hod~F:[@,hl底u%UƘ\2nEu뙝 w!Wh=2?f_6«dQ['})(ZZ9{.Sz̻f¸fZd tq^g *`*9|{  ari$YX^Gp:j]5REp_k7z#c@Q4x_gM_-ٙieqrd.&CANt&1to!Ex@ hEf o R$jwi'̝ fXQLlaIݯj*{5LAEV-g/ζHeO,ƅ}L#O5[}􏞆 0+ ]5ӎ +݂G$&I\ .z~Mm*mj" 86ъs׾AOdͭf-rrb1-YqjDŧ,˜:ozʿ,<v*3=L-k"P$i83,ŚT#]ѐ%%RelzId|?[}x⚲e=W6~2 j:My~)*o ⴓ7x l84e) ic5^^]ץ| O4M+ҞؼDk]QS/{˿h_Xl}긣풉޾isឺ^BNg` & >`NV`*(P.r2Bgܝ^JĀJ4vv P )*0 V99WXlD ,/q,}qvT{\˃ FBb+*(g- ̌Dh?6#~wyҊ wdؘ܈Xx4hmYiRͫ]0OcE#{im-esWRZSzڄvzRgOΥmsf4H+ߞV'p(p.W!ΏbN~9ɪY%KBDq^VA["ӄvҽgߖ$#Nu%Yƹ&lGGJDJ7?T8ڎ/(Ȥ=~^nwwv:ڃqq{^hOSsK;/F5EBp,,ͬlh2v]uby#3TWRŸx뉱boL΃C(/BPN-:n5H\1dnl- 3z]N,e=_b#CBEܙ='lWR$s* ~D7"+(98a;GT>~=k\h.~>-Iߞ~ ׿T/_Vv#|`i+F=1[ vOsSSoT)j7۝5-ǿ6a~U/oU6|ʌ|o!.2NY#Xִn,ŴXOĀLu3)oMuTx=185//ZmFA[zQgzwePM ? "yr8xK29/uβ$;ߎ^_<6V);<gbk4ك֘PDq _``u)i|,h*st,, H8gv˒ץ{=ﱭkӽI/B|$1^\[}^|>tHCv,@;6SeEkJZ)YT[jJFxlQ?=FN5cG:&0l$umv4F76f%Dmi#mR_hL =/Mz^D$zAx Ml,8@.$3oǐz>m>Q;6J~xi { *6%YXQ}~6fqz =ZZ &Tn_my@];JQAi  ϱ 좾iyt!`X|b5*۲K>6UdQDC)5i߶L 1d9{|Ge4kGM2cz<0K(1r|u\Y6!;GF$"\1qjc>{{vyAS1&aB<N1!uXo~?@O@:h"Ev(9\սQpq8sEg9EOf&MZoUyJ@X7s aٜz4ݠxZ_ϩ#'gB10^c(DNIH#8Q%81?^qlEЩ-`'Kq _uRY,,/Y۝Kkghm2 endstream endobj 85 0 obj 19189 endobj 12 0 obj << /Type /Font /Subtype /TrueType /BaseFont /MCNSMA+TimesNewRomanPS-ItalicMT /FontDescriptor 86 0 R /Encoding /MacRomanEncoding /FirstChar 32 /LastChar 211 /Widths [ 250 0 420 0 0 0 0 214 333 333 0 675 250 333 250 278 500 500 500 500 500 500 500 500 500 500 333 0 675 0 675 0 0 611 611 667 722 611 611 722 722 333 0 667 0 833 0 722 611 0 611 500 556 0 0 0 0 0 0 0 278 0 0 500 0 500 500 444 500 444 278 500 500 278 0 444 278 722 500 500 500 500 389 389 278 500 444 667 444 444 389 0 275 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 500 0 556 556 ] >> endobj 86 0 obj << /Type /FontDescriptor /FontName /MCNSMA+TimesNewRomanPS-ItalicMT /Flags 96 /FontBBox [-498 -307 1333 1023] /ItalicAngle -8 /Ascent 891 /Descent -216 /CapHeight 662 /StemV 0 /Leading 42 /XHeight 430 /AvgWidth 402 /MaxWidth 1278 /FontFile2 87 0 R >> endobj 87 0 obj << /Length 88 0 R /Length1 33000 /Filter /FlateDecode >> stream x̼y|?|}}Mnro{BdU qdQ@$źஸ/aKVmm?jmj%>37]~̙̙s>s+V'ZIx"νl2"_'v]2X,ks˖XxŲτ̍/?g^LN#YbG]bՍ{/]:w;?lUC'x ^>{-]r, ^vt:!^FѰbes)(CIȵDF8b$r!»t"e7~E.%*y4<_/OoRTVJp"o!O3I85;g42R&H"CHH1PoKK؏cHw5tQ3x[Z^H5ON"q|?$w&U_4_'w#Q:퉖V{D 8% D* ?Gz/z7]IEm`Am6EnB Ǩǵpw{IW멃 pSĊD1Ŋ7(=boWØn>^C%x?{#ײ+I_@{cE01G]Y2UF6yfN *=1Y52Q?=]Sefͬk*!rLND/WTUax7:PUAt6 țn7w{vnE0_x iT d"x"@&骉ArOZ~PptSDw-%TDE9$٤Gju\䈎ε#s[rsE9]{N7-ӕtɜn&Av{ m:JI{06l=\ě){#]2]HPΓ)dT~G"|Kb=s>En8.z"Y6RE# +duOíoPW북Y7@ۅӀYXH@On3oϮ@KN#,#>e>1?#3>ˌRhĩ}pDR#R7g;;\?vk⩣h{dThL eFtzR/.ѤڻՑQݚ(LhC햣Nfxy@0嬻պ#Y5Qmjr^3<<5dD /./i.,N-?ߪUVXU!>{-ݮ$RnR(ZEܴbEJ/ZUOV+R$(r!~uRYg<=P߸ǜe 6>܏5] i yIHCOYʏ/7 1F5d=RE#K22%}2^W"AbsZ6(ϸAo!!" s/ K$JɈBbԐŸBȑ .X#mSNY#%Waj"ϓcQMKd! ADvd+ πpg9IkT Rᤉl"w;4MG7!i-O8,&r'I~E~Etg˞ f)Fl%'jz7z /Z4* _$GcNH}t]X)ܘMx^1`Er܈y@#'7TJj/p{^xP(fZHUd3قxifJNKk]"*|R]x"3hKq2v #oKr 3n{h7g~/QeilH Ca,{R7ȟkfENB|7K UVqdŜ{ : 4bW&<(ołt ^M7[C]n,[-)#|"]odke[|{!-F5a8=Ed޲Vnr 3@W;:kh8>^HEt=}ѿsFɅ[;Gr^w|^(4*v|.?oTR\xR`/k|"?5}PazauÅ' v1Dp|8$ i%ȅ%d9`܊I|C/K^O>1+M_&'JK-E/ 2V:w{}kwGzY W5sp)\n> }~ 2V;G\b%Z|!fd T?`nQH1^X(\'%1OdKd;p ȗ@SJBH**&)P( 6`qOn&Fv# pzN+mcF>Fx|3C=wMgdlIR7@DiE- HL%:Q :i+=Z-&捀w7sN,.hAv;Vi ri2oJ @?n^` |AK!>N^~H9}N0-\Xr**pܿ?w:n_F)Tv NgGzr La/&\p[!&_C뺩H.΋̛3kz7?h6nx8D?l]݂sW`C^O=\l1xMX)`plS5?jf/ v""zɔ5[d;Fì㯝yXݪѳSIe1c$Gq͜ya 8W7uRݥ .cT./[=D# <b_$9yz${zf-δc-gZ>; [2~`[ZwS<:5 !5{T[TlgyCUet1V@g0]-/ y^B PjhpLq] !—.)O 8?*hx.u :mFWGmc@Dcfw/t^ #] vw-k)hVO)h=#D{De`n6٣D6@Vֲ>AV")&N UH0mHuNdn?W3J !:#,InvBG \pzZHAf$iFOE{;YZ7I‹-үem(w_xU<֢M +-\yO (W 7aE.^EΊ|"a}G^H)+ }G2)\GW,@ +h*L @jؔx2\ʡ(QCd$eoœ@*"؟xү8R)t8$%b ΥAJCg q >Z p#q4z(-S3BH᫞"BOOwwWkD.xF9^ycUz Ϳ=?_5t6XM3ʺ{2}O ~z>؄gɷJn),L[J+齙;*xPevO^H^,7no~nh!27r7G O5 6&\OTY2L1W Ӣ YP8ġ=X#&ƨ x$AƧB?Aެm5$6~z}RiB&|pkkb=jd˪M#8SQ*6-?&Mm j2ῲbyG頖Z5*rTmi8"I㮛t:9~V>BWXŰ+?䆲ڤ+:.wI= *!}e84GKM&-!QBK*Z֍ה5&o|#^IM|KFҰ_Ԣ2^yӖ^z8y~aeu3O&鵊x"^m5S\!lV1|e.}?y`m3}nKU'qۃO}}nP~<?z 7,X~Qmd̍}gm?FbQV\+% aYa_1\;y‹VŸE(KT;BdwygxKki:NxO)ǩc1S:dʚB S鿦 S:y@Vj=٤To:HWO5.e_g$ӱ|je_WK٤犫-WZ3QP+'Kna8 ɞjUվ YCri%q7q.m-'ڌA"T/O -lڄZ a=r Pzeʥ=3@pg-Cy$35tľPC^1\?IR%G5|qhv~(Nּ ihj,K6I=9xݠ|:WmTٳ-|K&JnϭWO LNsγs\j{cwcx4| OnεSGuXxW'<'/ڥ!3a4 4 HeÁz\[9pƞs/uv &^{‡_WԊ)f_ue~ 4+73w~haR*Jt1Vm=̂&nz>[ͭd2: ( @aJE n(1rz4PvAtD4 ' =EܼNTy!jԋ:X{\A*Ր2;N4|Ř@f`>Zo|)e2;@4T|aM_YNs p0"\GŠD ěP=8.Ÿ8>rd٢v-%1Wq%F/\1W0W>'nQޠ_y.QAvO>ty̑ s%a;]Ef9sU2\ΏZQfo9r 1au;YƯO`> M'0`@: Aj|u=m2!{tWV7uBrf"JxSUMm{qh\pH+6v*6?@MYHK^yݢ67޲魫n}?As1/ib-ǘM嗕/V~o꧝q}ΣzX,)7ߺ8A[y;>PHW6`Z9A> zR} hg٣w53zY9hhh5BS4ћ a %? S p kPS*:40` `s|9]ޑtvFCtUlNW?erY}ۍO.Zﺮn1&>.x­W|鬝S/_ݍedJ)wv:ӏQ# \1uj,j5+ԛeīUvzZ TƜHWN\lG"(y"S(y\4r"},;`fRGjUJFRk:9i z!z7JꤚL@'q\/Q?4*BT2Y?S*A̾}Z ZC| }^IQտ<QTjqXM0HZkdmȾD4H1Ґ2`ُ$lجLdNF. aHX2CV#P-i(?Hŷ/Km譲 :&|~8asq1Σd*E7vܧ{_|{;F覇uGcN- e)fY[.p˞"L*ӻă:]mv&UEgA[e$( 5E aWșR)wv7}Oۛ^gn+Wt_+mgW?}3/_˶t쉥5ƞle릝h|eZ:0l8t@Œ9<@.!<^8y+N/mAj@sD!E*bZ63mDs];vٳ|9_\:-.:o''U~e/-%\;xU}jI  Q V`lgs9/I Ί?Pb\h4 ;K뇗@ỠְeKKJ`z6y`7%<瀯˗eV9etJa2Jxd1!B8hrԐmrsG=80:0HgJ`̅D,o?!N_? //AS cn|XOU/y> W; \IQ!?%2U& 2+862:?H1rNVRRǎ1Kcc{ ^B ڿ{qznΧϧ;gK "̆$LbVq#o#vɇauDUh#4iL\Y&-roPA,$Ȣl"k 7/ ں*TmCD [O(!e7>-9K,bF#hڬ}P?LJC~OE>y#6[$B9b33YcE&U3ȇMa.L"!5*6jcASG .O.jP ED9QC?/5& ~$Hu*uTLX_C?T̾p~2cCSu}Tn{ ]AmgjR)755zw&Ry_9ʢu< gb5Sˡ㲎8Pt[?Z|V$"V-֓s>lX;RlDe_4IPK;0NQ[UCu $i86T%! rڽ$Y-|ˌX{[Stz&g_6ᑾ~u#۟e؄-V=_{NSә-|Xj\ߖ+~x]bWCà, rGkFgjVWk`¿juOa6Qަ&7sbmՓ°ub1+Ӛֺ^ꝟh_dK4v9xa # L|$1#HmV.DQpYs4[% GYt{o5g[\?ut殫q:K.o%qWoӷO2cf@c_QoS1k\YXښuVOQڠ"i]Yr:5Y% D^QAd`SPYeh Ook6XA}ZeuXK o.-:E `*:?ϊ}F\3N"fsO@QV4?-"BU\|A7ѓ_zsh[_{wqߦwAӚ|ܜDi /S;蘷>b Z6w7$UCBϻ\л{ aϊH:Wbkt{„h$2"OዂAsG2|,͑/>PFb)_1`Rh6]ƥs}d,j?#榌*O{D3dܖע(?YΕ븝4ixcdK:JEj,#Z$x?"xGJP x,g/ ;񩏙Ěj;u::H1 `A+@tHv2k$n6efB-v_Xc IG۰emYlkq,.SU/~Sݹͦ=-pGV<+?,om,K\aUߵ+e\WSt:vDб}X&:32MWBi\AzBހ29I_ VMј?[}>&:6"V6?po*>2qW,$oDf/Nڿb_ ojŞC$`+.o\">DmXgZ?f9_ EG ^\Y JK }C=W]9it`%A;iN Z;/W*?&onH_xY4dlPNr0w?ݘfT[m+nUC^'XN:k]5Sk #}H11ֱ5GARo'0Dq&j 's5ֶ+I;/7*[ X?,`OoKÆ:;mM wK RN8 FrUjJ68lZSjy9 "f˨6nQTXuV'UͲjOصHVwRW}6֊}ʡ H28Pc6h4D''Iz2MMV ZX.Dz@({E-Ҥb+5Jr lSTUb*%iLgOEP¼о7p:t,Ob?pֆ@N V?Sf Ehh5b<<認I şYW-ȹ$Nzy`_6־I㣼Mi6:d1*ư*)hF6V:)y^˧9(Q< ܁Gİ:0|21_6SEp_on3VlHgFBe"|f1V2ANgʍZ+ҰD"9v\7%FlN_%r_9=kR&34 ~3 XUʊhUmG̚nV8)'l`0ТNZ(Df:"oh?+̈l[3w7306$Q^SFb_) f (K6:iٚ#:*?)4r#i'=V-T7Rb~s,=sd {y~ۮXL s|ӥxQ΁ bta]T"]?F/[궙òXL'/J(B+9ocb|:ϙj91 |Yzo:[}wrU)top?CVLFʀh4V=v<yg/rф'r;wJp%Z6X!=Gg> +yuϰfxj_1K&&׉q"pu8p.@r" ɒFlX2RRWr7-R8=)NjτdϒYɏx#4jMS(t.=Xk*abvc==.Ia(UѫXo$w#l`B 89W>m O.Og?.%Ֆޫ|w׷vߣ+-)KxwVZlMZZqVVP&2N&jAgbBAg3MHQxKI5QD4alR,/c[{wisínk|6%>Z^>tzK pWie7ҧ֧7a/'䜝;'%vG'}!\MKh_]V, ]\{ڧ;:AMaїT}ɤlB^1i $5aj/Oe60?$E%Ia&,:zKok,{͝vjgH I]UWً 1@K Mg%۳f36+l3ga`l>0c|B?dObAXY8Ҩ ɾh:3o]?}ׅ^YHdjdݜ_,ow4ϑMv kmחKظyak& >뛱w&@d/UtvUt,P:>5,!ȕ5w-Yzqh|{y>QY)h-:V#}:j< Rbg~4BC$vtsq-bR2RZells[zD =Ko lA„D {0\Ta,ՈSGX.e3A.[nxԄtZAHTI DBӐ^¹{ :E8PŢ+COY% 0v~&b4%`d:FKҫ}fpX/ׯqSc~RY +HcOxݻۦcܟTG-]}C|п8%;D%V()tJp@8tE/ +v|ݖ@O La[tT~k]pUËMͼ^Wbk];ĭ\%aU!XJgQId2k --\pKKrhS%BL1Յ=YWFQ *>a_.< jY'=Fo͒tr<aV,E½47,qXΐ8?h(V qKyKvG>ԗ)ɫn&8.==$^L?_lW'kgDR6A2#@DbgQ K eQCnQa]BS5kic1҈!3IF9#xH5}WtP5^_3})MZkc*ANvUkdNpZE-wy(mZ=B,"O$n[mx? ,ϧE@5dfuiϳN?_Gb9b3*l$~gjvR3@& ; Ω;|'}q5,cȺ&I&vS/@LʝCeuR )@R,qDb"B8:#MQn#Ա@D?63.P>*p%ՅI)/I>[bI4HruAjk!GJWff޵;jIlԍɕ?ΫZ-12r[c*e]w }łmїű v4]ֹk:Q2f:]*Ժr͒tΒi+ {67O|MH =@?`l0P*TC^!& rL2hmQ8ZK+mN Aj΀No.F%:#<ѠzߢxSP18hҊ/IVKY\YQto~:/sb 3T A1AZ_V\W z6Hr`XpkJ`$d0$&~4 ˜Ym;v.{K%⫾X|M9,[VMop l e/*v4 j$XӐSƙiĪ,U9ڭY^@Fdhv^Vp}zқY=A~uooZ?8Z'E44b$ o5 .K7d~/1LWd"l]ES_{A>`CcgAE]=X8z!!>|^Ճ9M5 vb1Z8[􈾨 2hU?WuՓE2~+Azj=2~D|83 r̰tI:*Q(B.W+X碐~IBH$rKi&{.'nrJG_Gg|5-ݼ~YmwuEY쫏?|tʒʲtV̎sI\!}u"}]Cb9pߍļXc0jbN3&`ג3OUNVqIUiljbrjzyۅG-=¾1}DZU_DT)U2QlAFLJǣD YB C d CkKLNjҋxsS C[Kuf5͖N vJZ‚'~Iݤ,WiYfrƐg^!gJc7]gx;% F]XrGtMU}nÞ= sGM%UhQij9jQuD0 FXS PE#9\ 誇ڐ 4ӈDn1%rb Rl oQ1 '=JD7:1 W KhS-id"`Ǧ~.sh@g@OCw qn=#0 c@C/ CGie0_<'zp!MlMp;C݄X8ة2pHyQ B4*b! Wpsͷ]>n5f51O _UEC=:lxGNnT53_>*ߚu%n+T2E%K?~ըHEe( uz4^vf~Uu~ؼӲg6ůV1iV`466nr뺔|n_de`6YrAi !xw0<__dU%ȥ%#뽇!0x2g,oВm [&~ƶ.`+*b&4O!upqC7f2a]kXgo:ᷜM%gض]?Y]vqWVjӖcdžNL35txyGKŪK&֨%S%qnj5 -Hr2bִg$+jhYOZmzYN]GS`M9)g"ئ0\Ļ {\!(C1׿}Gj[ @b uUQ. }+@RsQ.=WTzld-W@첊j=ɾ;*DZI3 \e"+.8۝[J=t\$0lmyQ;&(f]ٹ XUg)^zEOc!\u;)#^KAQBbqy}}u}9%S,]K]jwf$%mZL/;> אG/MFG,Txm?=-%멖ËX,+*_ %ŞRCkQg/pbp>ˇtv1u[Cz`h氢=!¤΁c}:h8[(?X$;$KQKq$ƑZt[]}yVscIum0౷]$ET(s>=v9 OY'Vh]T- jV]-֩qiMǍZGSs[$dT ImUE "OBSW,J2_dl+ ~juLE&JD2aJ@gW贂BͧW%GZ@4 8~ڑtXeJXMMe$Y 'e-)IڿݝkR%F߅_>y"o=?g7$=S;F=^b/Jb W!I߀811N(+[ޫ]RwKjV  ! 0&MƎ-cx;o1 3&gl2ɱLN$x2IqgQ3W@N2jQ{ևp(Xek =CSsSl(-BA\t!CP??KF6Wd#h!<胩]ߵA\1r2 >6{dGpu_]$F"hdm?̑1a <~K}b:A:1ad2X}'̸B+2)tm+cL "6ŴboEy}n1]-|2Jư3PkBz5ނe0ZuZ`OSҀֲP =;v4Zv\*%,`~_)#XIg"9.]87`N6!,~h6@FK?'=~@Š5#|K$HO#{[c nz{EBqP44aƁ(@1NCI&P7UGtq;W:2@!H2"A%߳w|sGw-74*V(v;WF*=q0Õo儫Snww=ԙm}sJ4jkjRg&{ҿUoFcN^3zQܪqt怵ct%71tuKգQRVr[n bj:VjVzW2m^ͧoRƻ ː`0Vg$ӝm)L΀_ԞA{ϵ0'.%T򡔧G'G,Y/AF?_ #NG/ǭ< =^ ? ;3QvFo&JT&.?u>tSߘ\[_U3rh{$<: wYyv6WB@m>1c@_R t*-e~11?:U½/9X:%a&hYnz9SsJ%R>7E}XN}\PP PulFin{H<4Fb3J Y1T-8<LS e䐲ٽ;~݇_"]j߭=;vy1;Uو6kxL'{Q,ڇ2㭮CZ^82 0ˍ H G l'jG.>! d2H#q#o#gA:T+h%tw3jHv1|a56s3j4^S\;<%g_;.'E.dsB`eА3mBD3ْmT~3ɰ03f'ieV?iJdrh)$X TjּNLQ_u.|nNyؖ)eh-_gۿ׋OwᰢNn˘UG K~d'/H~hUU 9jq؛KÁ"E8l3_xYLP_O)O ؐ}"ZD0>G Z\kc~єH 9J{;?'RtEQD)^؋-*F'!'^/*>L| Pu үd f/Gr&)tJ&a2ԡˌ\h'6Hd#/Gvޯ`-:s+?.M'_[m~{"[9vϪji1J<=gl;{ޤӿ"gWP'!| թʘ.$ǥIj .FQve2je(N!:y][ȬcbF✱kS|"{\SY4g;8Yd\ ` Biry! "@A|%.)O@_F7nq֖g]W-|7ھ~фs˸ќlvd{& _cY5|s~}J.J +FՉ5i/?e)淄 UFUl;1X/cl% ZʹynyJ=& B=j@9&֊=Kйl#)i >]̝q`Ag!rQS+I:(~{@Q FtƚTmPC? @}Rѓ&()!(d XےyL:!)zJ_M[1[}H!ٽi-QoAЛ򄴫$XK> ;^վqfYxy3+ivكRņckbVTm$S,9xebDݽ]]M8E 6V XM)Z|a+0Ur 9+-uغuUiEt(:HGXcyc%9zNOc*'2nɽI?W+tESЁR+D֞XQ|~mнAٷB9 z~RS$խ7{:n*$s;?j{`$Ma$E?J'xU+qOكM9z(vD ى'|%jkIYڐ=;dE]D&q|_d8&FDy Ï]{ |".q?\%C7zhO>?o,!mW#M 3i ˄ktD4 3M6et]dr=Ua"$tpCՅ=OS|v $y]]E?> {.u2_4$[!j nŮqPD .ežvW5Չ[|GO$u9)5+rZ^=k$YPAO ɹq2d$ݨ6աaR4 ӥu}:N+Tӊ'S{\)*)Ox8&ĆNqc<+FE vtG?HfAz6LD ,H>^ӓ0Pc]QNˎit S dwN1)r\}NM BUIŰd 5l|au|]VQptVQקԡ֠N|JAX&]?[e{bz9" },<:f.>i(TP3K6b³%@GL&~F9R'FBpD"Y)./&b1iϙH>{IehGGDHUԾhsw,`$\(|lWP*A2aQ!YV87T2],/ˌ{C\؆@.6!fkVD5i m1+XW8ے]_]>juB4ط<}uo1wb˨&TABOʬ0i;1yr{_ -7/IEIK:aJM? Qˮ2>XXUOeIYkzi׉ةγY.C=،&lKz]YYA7I<VR@[xE~笍Q%VgB,,V̚c#ɬ3ZqM~ FnQj ;PE.}qXۆ 3-NQ΋ *J RŸf!p?R\_e~<{3W< gh:.E s:Oޗx|NrP=yV&m#z,2"qK v躀6h`  ":Z8q$%#oöG xPBs}:{NgIӓGX.?FDUiR+%TWtJyH_$}ºjDJ) 3Ri>4#u9v?ݤj>ZF1'H" QMM?bo _דn )EmtƔ}O̙g󙏑/Z77jOqmxqXAcʇ>9ĸyKaF7f'oڐ)VT8/ 7^tp)":Zڊj~3{MLD{3:ݠ{Oi,𚼀 FxBۀ0ʆ@SWٗu\J]AڛlCkWB|"_՛yٜϗo |=%5[R}}˟wwvr ,ΌIF>8C[gnh _n&R xgSLM+nWn w5s_2 S"ÍYW4Vmmfn+,/rz㸭lW;=`FodIV o݁(x6r̵ۗ/ 8;8=Cw*sj~%=O9ɷ?; .aR{7ƌ3˰ 8Va?Iv =Db| xMoqɆ[F^MlZ5`+ endstream endobj 88 0 obj 24533 endobj 29 0 obj << /Type /Font /Subtype /TrueType /BaseFont /OBMPZC+TimesNewRomanPS-ItalicMT /FontDescriptor 89 0 R /ToUnicode 90 0 R /FirstChar 33 /LastChar 33 /Widths [ 675 ] >> endobj 90 0 obj << /Length 91 0 R /Filter /FlateDecode >> stream x]n D{b˻!E")N>Bƅ>@b f?;;`ɓc\aٓh;pަT5(d}M4PJw8=0ho=pUpAJᔯ{1,eߧrL?lpFc (ThuiY0NGkӵ]:-_|TsnSPby7op endstream endobj 91 0 obj 222 endobj 89 0 obj << /Type /FontDescriptor /FontName /OBMPZC+TimesNewRomanPS-ItalicMT /Flags 68 /FontBBox [-498 -307 1333 1023] /ItalicAngle -8 /Ascent 891 /Descent -216 /CapHeight 662 /StemV 0 /Leading 42 /XHeight 430 /AvgWidth 402 /MaxWidth 1278 /FontFile2 92 0 R >> endobj 92 0 obj << /Length 93 0 R /Length1 6872 /Filter /FlateDecode >> stream xY xTյ^k3 ϼ g&  $G& h PH@@(` Z+ *mU꣊%%hN&@Im֊OE(b@ϙ_}zk2nBiAB6e&ݸfvh[6?Id9rI< [['q8$Υyk7lJ}ͺem4?k^ے1`iۺ-nhQʙ׭e0/_i0-dAI4HyMDHn-i^|׹%CgA xʀs-HgAT\|ڐ2f0.QB~YC}HO8%E&5Y@Gpy w!T<"h,6ʤd'0VULЇB&d/Kj+ F!$F*o z!-^J=qr!7 !Dºz CdYaroO;ԧ@vJEv)Ӓvwz}+^ %L?holI8e¸')|(!]eHǣ{*|G-F ;TCEƹh9>G5&fr|384xs x͜@g SB|3ü5S(:0ClZ`48?B#ӂމ N4`By i}xw@f MGy˽QHSȩQ MHU'1zTsTT˖غlb*rC~btF(eCj pJ{<Džd?ȁ3m?ف X i2vWT]쩬 =iR}_P1W]]j@8`Pژ`oB :vbO+F"VbkU]xQ({d<&_:HRŲRΖKeHDqS䷐%D^gb]!("6bf҈bKc}˸8!PV`A,N/ R7J̲,CY]o:ZKRGccf1QEm|,a fYr9κO].G˱u9CB91uĄ51:AA"KnP軕L0BPsVr`tY}o\4s\W^Btw} xS%BKN*١K>zܛP $$ ldd}ɶcyЭpfghz sJmC3=7M}cZ0<؃gM5>ꌒjS)9N,V+TNeӄ"NulI:(lClml+l6Vh˵m#N`{=n[]>" pi+N)&Ŭ;QG]MpY/UkeMA #wE^[FK]'< N@xYVOSc$q[6&CCݙmmwF4ʌaӇN?&Tj2hE}cZcڌyE^U|3\+ :5<נs]mj X;bvSlVCkCl/{ 1!ݩkUQڨӔTڒ2^Sd,N:mʜ8M<4S"̓@w^ɞ9ۓdoM>g}I>7RTëUs}CNŀm8zpZGa-_TTKTkj;r׭i46S7gV.7@v ` y+)ntwy(:AC\^/8kPngU7h<f2`wQ<8 i767h 8HyTNiJn9AtG):L!}?`/4 ' ~r(G^"&K^Jǘ(cHEE` (egan })5$J}ޣ? 2S=w=F~1 m??!u>uN;0?3:󬰝3bx]VߎYs-Uomi;R/4~.AF,:"E꿀kɃ䣹խ]=Gxr5n>#~Rm/ <8-qz>O9'|;.w[6.ogR)Ֆ9p] #&~?I:ImOṼ6ŏkbX-։6rȱr*-7Yvzy6 Z^nB ?tQ֯!&0ϩ<D^/Jɿn>.1|$Q giCzyXwUB‡CbbRm^ľSt Jկբߏ܇5R\/\ND5iRm} ?mQx,mf(t܎vS&+k,c(w x*]΅ bgm]XyI#v.Zb>%7b9D"ڷ`o.i幸:1^TH >RVf?ӴOC_GމU7SLV"4g-Kj UN6uJ򲉥@Ʉqc}jÆ: v ۬EiT54_<3g/4HuWh.C+$C\%PR2tY4W̉~g'ΘYf;f݁ W8֥q+ 7pwjJ%SRQMEMKust6+"=\эCԲ}hR ֊ӐfWjXPo\e5-ߘnmu мfղYP% endstream endobj 93 0 obj 4826 endobj 9 0 obj << /Type /Font /Subtype /TrueType /BaseFont /KCVKCG+Arial-BoldMT /FontDescriptor 94 0 R /Encoding /MacRomanEncoding /FirstChar 32 /LastChar 32 /Widths [ 278 ] >> endobj 94 0 obj << /Type /FontDescriptor /FontName /KCVKCG+Arial-BoldMT /Flags 32 /FontBBox [-628 -376 2000 1018] /ItalicAngle 0 /Ascent 905 /Descent -212 /CapHeight 716 /StemV 0 /Leading 33 /XHeight 519 /AvgWidth 479 /MaxWidth 2000 /FontFile2 95 0 R >> endobj 95 0 obj << /Length 96 0 R /Length1 8520 /Filter /FlateDecode >> stream xZ xTյ^{3bZZX 2굟`m{ۚնZ䶵z) /{kֿk?uKw;eP 2[Wtz`kT}t)g:*ULz+oZnry{K[Ng,GCΦOZT݈{Wn^r5-(&wc|br-,L4ݫW1(:V2"N*zܐ|4c$Uer.ϻ>y@U$uŧN>#$ئ){3wѩSR{BYcDg 3>jvNbIgٗG+zt]bټ;nIޔ3B6){Xh0ȍv{ $.nt#v`&# $e}<ܢ!iS U *:rj@H@vbxPx6e3F:zvܧrx<{toQ%QÀ=LvTxzV[.P2U7!I-92 w]>-%}M­DO_&(SiƳ}=W j1.:$rNS2붳R~%qU&"+P51/o'x"طڙ9r;8eX¸aaTNE(_/xt$x&O.z >%_h&xΒ!XG=[mJ՚|79(]IzeOG,>:&9JI}x/5s+̀YfeF8@q_e%dz}Vi`Lr\zP))CIҧ$=j8XEuAwT-]AkqtD D@t(DBD"Q @D*7 DT!h &h &  & TS!L L L(r r ʁ(Wr ʁ(WC! 000>O!|@) @ 1@ 1ĠB 1 _/^( G9 G9 Q@*Q@rtx2r$M6I`&M*lI` kaaa)BD} D}@)D}@)D/@ѫ@ DB DB^vwRwNoJRjŻH2[ tQH>ݜe{{yx{\{<>vq*BhʏAAYj> ~᜝i|9q ;R=%JW2MtUr$͌1PeQ\L;>Nf v0.5v6*A2P*J <^ *Jk⨑^sgW3)M) ,a{.^>*oEl{l}Kgmjl}ve`oLtMB"L/ka/ EҺ^7!+'ρD[%T,'L,">`a##qg$]# $صf~ t;.?-_wӷ/ا?_?X}ram{r}Sޢ/ԯ֗eaa{ބ(~e 6ԋYA_)]c%R$ﶾ2F%^u3sggwfxӽ^۫ycΠ csɊd2 ęӗ-yh%[qa}?,\fjkf1qZFt]#hp9iS5J@1HrsTV;rVCQ r/'X5. [LXRp&DKoE-kZ= }T(.k].yKo 9+R=_O7_1zK]$^]]sU_JvsBR]-}UmV+_+o {&Ry)`'S:kvT'cTO^[et|w˚|R" VXjIpU+zkE"4y_$$ Ǧg6gh}TS|O3-|7clMbi lovEyG"-~1+/vP|@;{i͡,Zm}8G3xC_(䜦_uK{:+4bEqK?_Vwc3r$ ;D>wF<,^Ҧk5Mگ\i 9sCo:/:obd"\ଠ*vaz K%6GsU&&9^W[(7 3DD1C|C<),G4V]MkK43Sҵȵ˵뤻pɳsgJvYCq]/V:d % rq^GFo|VȊ,<^Mmmgc E{e۸P9 jD\'npw2xFo??q%ZN{\{Z۫k7]]ܝѽ;3 _l<+Aa^Ÿc '##baW%KXpYxbh:{66H65^W/YiO]BڍӨX UZ]A*~~+=nb`ٝm_CUv4v5;I2;p>%1b.8N7ӨXHzcm~ }27+sut>tNJiǡJ ; va-+c>*9-NOYGj%<Î|׹۹YM?+eXvD*{].ۂ}xmj$}rYU`?pqqu{*}mŊVs:FJoG)bn򨔦!ޙ=L+yDZOسq lD|{$ΉCܟq6#j/iDoNډ҆Sqg(%==S+~M@W)΅:v-Bm0X?f`Z'~|Ob>abf{S5LBo>z"8Cc|>1ńf(νbJ sbjk偆hL5O*o- .PJ/%hXi؍QLM~̢3HF9/ c(xi])+L%?+{XȼPhGS:%)s)5.> endobj 97 0 obj << /Type /FontDescriptor /FontName /RAKCTO+ArialMT /Flags 32 /FontBBox [-665 -325 2000 1006] /ItalicAngle 0 /Ascent 905 /Descent -212 /CapHeight 716 /StemV 0 /Leading 33 /XHeight 519 /AvgWidth 441 /MaxWidth 2000 /FontFile2 98 0 R >> endobj 98 0 obj << /Length 99 0 R /Length1 6780 /Filter /FlateDecode >> stream xY |T?7K62 uyÐA2 $L ؈lAgh I$ H" URK\Vy (j+)պh?"-79sλI%mL!mYIƕXNn5VO"27^9oQ>x![x5z5VEƀo_Խ,V~ܺ%nϑjeIIvvtuUʑ]Թ-DIZұպ "YM{F%t%G%LK){7'u—\ ?Q( wϽ]?̳5_R9p]d$-g/G^w~1N\x}bȋT8QMZZ],TXbP#aB.p0J t;Y {DuتGlzSE&t@6;'5Jp0WdF3# ֻ`aQmUfޫ1>yZ]s+S_\#b<4{SJ3Dw2<2F_ % S54^;]P .C,VZu~?b~wPZ_ 8~'dAp0(QS*J*`8,=PWfA\p.e߆6[GƗ$.8 Bfn\H("ߌDFqQ0bX`#*";^㸿~4c&01R@̐ހ; @^ޠK/0 G"Gu2h  <CH`'K`7g{;4~tU`0|XՑN!<O =h%]T%q_ dͽPĽi $Iܷm$U$q/\ IwHǵ${J#$(OG8ʦ\T~ttt)YEHa!"Ye,4d6ZBXh ]BP> yYhPw,:BOP YʼQ\gjqg凌}Ru"t慓:,朝/ªXҎI m8H7 :9RA9@?p3a ZTsilL4©TNq1*`(Pͳm$Rٔ|=QF4kZ:_PBu7Snq9M#9Sc QQKvԏ!;4bR#"^6HS{CľUX7h8f_x$jf;k}C*G+$>q0 WwMuLwrLBuo㨲_+q\)xbb!&;n 7:Ye"V2 K8,y\kfdM&ZVUr+YD^| 1/?3b60Ln3ę/I,xÌ֠P\Uj+L7PCc6ӵ2Of+cj|mQ?tZs%h@2VeUW4:ϏW֏'˓mm hR ogM>;cKJoԋʺ@!ʮ4He?+^ҏTk~o{̯7\2%$PW`)Lt OJ]OWzϡ'#D C!Up !bg91å$:c$# I9q'| j<[hi|A j뗶giUܖvɛ۴Nkqթ=FUCMFO.R)`u3gƚ*;#ǚhXe\' Zj,m.]Y]R/ǔCY.8(u.!!` jD}̛Ϻ+ Fq4˿g) `]ú&auKR<qnvyu6$g`ĈU[i/e9G)[qO$"bAdI:@ϱ3hh7#PCNZ,htwl}7L : ߫h J+ixVS jJ]/&:Jet9]G,M0?PP a3w-~G8ے4y1Jb.f+L8AtszoY[.jC?/;ͦvN{X6;MMd0e`euEhJY eS]WYa@fBFxX::\YaJ6c4FLQ_5_R5[MѦJv%;}b1Y1(Vx߅GxHyBΜ7pB;⦻^z`*bfoy-'ŝckfjZD qlkgfG'7ki.(5(3.VOz~;MC>Gae}tB9N'%A(*sfl#{d(;>+KǛy.?q¼Ï߈L1LxX1ADfF܁@Q(:\jjaiz9k_ xl ?x{lfߌ{+2nΒV*]ϖ!acOқ4Ɯ/cy r5o0ooE$T1TbhFUh8)ߣJP)nţLT(KMMM0'o7G͟TSijfmlc9f ";'d๋O[Zw:x-*hfczOHuBP`V7 g4ncvi-#J"/Ot֎'Wэ&G]I nE_] {<{Td!s.G^. 2h>񫰋J͍+kVX(SR{9VEKY~4TĶ9P^;zi.*Zh?VWinn%8[ )-s̓.dn=KC5_-c\c7p5hx64^Pe@76/r9tR°Ng]p\'kJFRF-Vdajf N@fFQleŗ{M[~â[>NT8CCߩj4Of1N"y2X@nzcnQzpKQ=4ץ\>.nKw}V!r<jz\l/[;c?qkxm&36J5\jRB p% VPabN1jv֏Ct^C''F !=nCW29 RERbBBLsW^{MJ?w8šQz|d)Nn?, endstream endobj 99 0 obj 4537 endobj 20 0 obj << /Type /Font /Subtype /TrueType /BaseFont /SXWZRN+TimesNewRomanPS-BoldItalicMT /FontDescriptor 100 0 R /Encoding /MacRomanEncoding /FirstChar 32 /LastChar 121 /Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 500 0 0 500 444 0 0 556 0 0 0 278 0 556 500 500 0 389 389 278 0 0 0 0 444 ] >> endobj 100 0 obj << /Type /FontDescriptor /FontName /SXWZRN+TimesNewRomanPS-BoldItalicMT /Flags 96 /FontBBox [-547 -307 1401 1032] /ItalicAngle -8 /Ascent 891 /Descent -216 /CapHeight 662 /StemV 0 /Leading 42 /XHeight 439 /AvgWidth 412 /MaxWidth 1333 /FontFile2 101 0 R >> endobj 101 0 obj << /Length 102 0 R /Length1 11972 /Filter /FlateDecode >> stream xzi`[ŵ𙙫Z,˖dK%َ$ݺ8g8/lBb BXZi_deSHu  }چDν2Y{jrf;g2sշfBH-@wHՄ9SyV-^g-iT>i Ry0 RyBYT;7?Ro=yNJFys)څQ媕k,G(Zptk׭\Ab ČpȀaUy^vn'3kVZXIl_.M飒 lp%ao}Ӣ) (Ċ$Le?gy^ d}ax <nn.E*;!VIJʞggk<ʑj54$•a_aGyeϳ pAQWqQGGAO$bA!N_;l F=Va` QKRG0L 0$ U֔ =oxYǛxx"O7xֹߞ;=rй3sεl'Ϸs2nfT[#%K2l;0n"V͌G3-ʥ*b\Y1<!WJ\)().b^R/Ř-*=Ca.0|A1 +>:tcc`:!Lw1])Nɣ-,1+.bLߥ][Ҏr`E^)VvG;ˣmhs=bVp`# )M;ڿ:8:ph9څd5A/h*Vh + mf7AufBv\kqZkT}iZ 5b}h O#'ki`=yB*w@c [N,8AVUOcgb!x!~?MNF"cb~Qb{"&#§աX NXck>s1W c98cP...|v[ $ F7!\52JbԚXNqECsř7"8L AU`'p b51w"5E"s5qc(ccAXx 8(_^LV 8?:?^' z1\CH]P9rUoo׋8{5DC'Q4\hÏmsoSqck.߈y,}2?!7E>cy?CTn|q&1~Zi:] ;"(ULW+B(E«p*LJҠ)ӔjR)WrJ)KK^-wbY<@g1/8a.z2胷0H=ү` TTY!耯|>@yONA+z;}ٰ}p?}~ Q8,HTfvʝV\s B: >,O$D i6dIHEM=UH{`'E6$$%yq%]r'+'&!yn8Ζȍi;̄%0AO/mze<Þ`؛e_%Oޖ?=HiЉcmE\)x ^F ^'j~dH$7/7OWٽS wKeI r`jqqՋVdSx^?5&iHr|pő^%_:A;q0|=゜>>㾕.%;M$Bg$cx*:wD?81Rg|qH GEd&';c%Q9ͥaZChCo#2e|gk.v~鹻e2YWv!3|X|"yegɟ'?L~% ;ǀkQm0{3JaA!\相ނ_x$b=o+ ed#V\o?MAK3>ӿЯUfb?Cد٧\7spe6-'de/)ϒ)lGѡxV+ERH##ۆ8 ܽ>Jq^)ȏi/jc5 Ooj^(DZ6xh$xQ&봕k"\8 - q nr3s,ܽm.8=*[_rZ ½uNaӏ۠wQʈ rty~ Y S.y'B~ bx<'Α}4IYtـgCm{gK)`-yUhi 2VNא]\#J4tG@})AX'ɄnDqd.;nx4O88^/Zrr#xd ;B}!W~6}޼< l.-'bU$j 42 r+VRJX8ro˺˽̇^x xk砤oG)6,q؞m]h"hPm@.j{jOPO&`,qT8CIHű[X騟[!@=UTWUVe%EoTA~u9yݖm2Mt^MӨUJ\1J nqFz\{B1ł zN,j'b b.OB SI Z- 8&3NfN&w3:,ۥ)Ŵ˅ ͖%M(q6G[nY2T uq0j &5ݫH jnk -.1njZaюIM9.WWa JEA 4JDQ4siW;O[^;3zhm7\\b6 gv8'u^6%Յ}`[mhw!''N_\JjQ bI2gTnp/XփXvp"yS;ݮh$d4zcMa`АN?H^XNI) ]LMJN"pwL:ݸJ1ZX +tl]lXU5 ܀$Qv| v_n,){ _X)n+JzKG' E#2X/ GU'$ttbb%rug\ynԙ;a^N btiXsib͖j6q="!3̻2TGIzammNg@VmzC.U/u#hFc'ˡaR-3`3-y'vB[Q*!Ζgl*R\#jO~%f#ˈVG&v K`mSQж37Եhq;[zz-N{;NjFݓh0Y?lpysLEh$&'yk4ySZ5xqZ+*Egj.T}@1։L;lD]lUy 9U6v!n152܍hYu"[7_0\^=,UbN7\[%f#ם8TYrgkGg;жxO~4>.\ǡTĶ;L{S>4KcHMȸ)-uܗ8]`euWUJZw!(6,p¤q29O(J* UFQũi\j5t`#Dž RyG͓Q6<-Vd,d²e>c/0Ey=ѻGYDY\|@L0\wI,SMDg FMD2mś9FGU(~JK{M1=(t&0#'jFknr>=u:uMgowo):6iZ[/}}yZɧ-l k"'G*'=aWo2zy~\"} ,[ \UPS FBmk+Zܽu[nfkKpK̓G\ = sr-|.W(ʥ,T< z>7tڒcI] tmru8x.lqq 8T^+~9b'Aq ܎Awq{~a9` NV4)~a"*Z̢l0GEzMPPp %$^[ ɮ 5VWdȞ17t~!},qyGݬj:UZӣ2OŲ1-]lO2d+Hl_mY8]OJLP -{˄$\I={ܲU jp8h?;|-4nH'*3*ˠ1y5ih;u9u?ƒ1[37@!$ǙETz͐ZR z4SDPQWG՜:a rr̢:5Mff1gȶnGBT)|ZNG;V: lt\Օ' oDwnd.GL^|\G,^5&'FCIMّ1 ,DiҐI^`uGƔy'[-:)@M"^cݨtNuƩ W9"mLZ˨e( <|.p\w>,& ^ tl5^e(\SH:=ׇrQ7٢"7(CPpS( /%{͜@^^k/\l'8 FWyQo-f{&/^@zHY"gQJ>,&%4HUG2H`^?Ӓ֧_=&\B*T'eʮ)Fqo~ֶ8^ָQz{1-/'Kc!_5 Kl eYhd('sF9;M@+~B%Zl\nx|$Qzo U'Ʊ7Bnϑ,tw8S_HKY[m;Ld('d0Pfi(a\I~c`M7>Ols?n;f`!#r9U,&%kb 6Ca@%P$ڇ UJPVΨ%aG<v6hl+G5v#.}4ע R}( ( 虉"po0=͌`eEV8ޠ"e) *ʗr…#e%Y_uI[m s}*TWC-L^18+8+9"ֱŕO4޻4_Y :ZNO7䗺˞io1fA-[%Efc8w'a]e-n͌ӟO],3ZrsvYoFadmX] Pii4g~P-N޹ɜNd j1)\G$Ac TCL:[Xs-uxbÕ pB#B92Ĕ1d~ET܍D*/nJG}CrS ?3G E!5k))ŊcU9 (JX%s(jc UeE䜊:_-(0Ԛ1KC{ʘ Hw䑹u qZyZ{E8t|D+ r)H€\#ת ~4_i-W-mJgsKsK7rK/}hץgˉ5 @u|\(4ΙSJJeYNcҗs[96lNJAYMhQŔ!]eMQ])|֡M!1МϕUz[>%ň,ȧYC'$nt]>VJEզFO<'<ҺE2ޖYTqV<ʑA .W>!RTeW[Zg*gZZ毷lVUQ VF'[L}~A. drS $S: YI"=迯ߣ߯zdD<%Z+'ehIʲUe[8px`n.Y-&[4A1À IBAwƜ JJJD1RPD* PPHȫd*z-Imϭ>﫚ZZna2]gEq)ݟeV5~}jWFGEx5T!L!ciQ.C _>b҅_M{"L¯S? a@|1\pS:g̜> endobj 104 0 obj << /Length 105 0 R /Filter /FlateDecode >> stream x]j@FzYfl% !!ERԒ߾IS,ܙI0|_Vct]tlnhg eMO,36?h=uSٷK0ݯ=9^w:qu>*ץk}NncmNؾN]uz<*__,h*;_ee~+ (x_҈NCI(8J V}DA2mP@E6d^ 4I{gMVޅ\% dއVJVA^JVAJ*XY[m zM,oQJVAUyY9UP}Upn-%jC5X†$+h^N @0UЫ1mp"07Uڙ/c:7U(,1U(Up"D md,VHV]E ^)Y9;R%`MEV2ߟWijIduY*15 endstream endobj 105 0 obj 501 endobj 103 0 obj << /Type /FontDescriptor /FontName /DQDYYF+Calibri-Bold /Flags 4 /FontBBox [-519 -306 1240 971] /ItalicAngle 0 /Ascent 952 /Descent -269 /CapHeight 632 /StemV 0 /XHeight 469 /AvgWidth 536 /MaxWidth 1328 /FontFile2 106 0 R >> endobj 106 0 obj << /Length 107 0 R /Length1 22688 /Filter /FlateDecode >> stream xy`T}LfddLf&$3dBA|s""A ψ?!ފP?b7L|#~wF#^CfyCE %įB%#~8x<0O"Aq4)ďObO }{ ģ=GqH<' !vs 0!ăsJ!}#C܋q7.;w nG܆Eߊo=M7}#!C|q-XV*ĕ-+ !.E\qxv9f"AlDl@X8 1XXX8*J|qtiS++ #!" #E|" qT(,[Dύ[A.F8v aEXf|B>#€#t-BP#T%P`!C!(H$b KQ!>K*~">A|q!(}{w FN?!ފ[`?"TCMq >nix j[ ~x %,Xaį/ba~9gC}?ŢC+,|ޏAi|SXaaO"@<؏؇x ދE'G=GAq>6؉x~ b⇈Ͱ7n 'n;n +n,wbr;݆9o-f7͝P xmJŜbkW]pVyJĖxV]\Z,54$5p1]9/,;!}C] lS ?9$wd$a@C<r?} r7] d;[@nM 7r=6@ rrA\2]}%'7%pq&X2yL6!ƍlԮAXX "jSVD QDT ehD&ˆ0 BNIP BP! "e]-?y=wAށ= ;W@~ 2o@yd?>@n $A77ƍlrl@圍X8 1h@=LGzDb6ٌB^#O o oByy<gA1AA Byd7.[YscMs+F0 )e%ň! bb!b11яC8 ыADaTu1B"  "c!|B@AqF.q$۠ؗ@~ rW / ?(z/%}1v_D [6^cs-z۱WiMzS6M;6nܖ+lȩ6={^z9egO놯> {w͛ #TϢj]5kw֔~r%kh5ڵ&/Xcq4֔g]cUUVj'W_u*n'\q*;-g JsrU$z|%qz(TP)+vһ ,|KѬng??ضh!;%$mҁrA!_O Ί/!O?wK j9`}e"D rb%d;ҺI1 db3GOKnK&?A=Gz0^eZzɝ5`5YCdH!y|r\F.'W.·+Ud+\C%%בmzrDGn&[A;T nRY]> nrOA!c0IA>A bd. P {ȣ$ABo# гOIq,&ir'#_Ѓ_I,e$ ͣ}NĽn0RXZB \rZAh C8 )V"ttrT6+ Vyt3cV{[k iSTWUVDE/m2ZJ<cBR¾!1@Tybv$SO)bNq"'5xjImqjyt^W\om{but-^/ih蠧)|֊-MEtDᛱLU\DFTjTU,߷zQo2‘ -{l7 -uv55f{R!ψ)<+cPgrgx0Y[:/M[-[.C_c`[6PX)ANJ%=Dž0bӗkIʺU ϔ[ɞ--T3:Gh:&k3,!i2߼xˈ(nY4b L-֥[|}ЗҼߔ=:"X{F|^=o/ز{x #yַC(r,E,`%ͅ@?{Hf)U"p8q,IpgH N8Qa7y9Ŗ~6~iHՁ.Te 1׳zx!F-5iˠ)r}$0=>~/L b2dg6/b 0l]s!] di)e,Æ#ܴ:P3bc!оF!FZ|S۱LY=(ҿ%eTP7RL6a)4P%>HZ2Ȓn긖XA2X2IT٩DšZUL]P *A)RTx!&2uhZY]2<#VLW K#($Ǵ!X~57CY~8 r $m`+@660I^ؤˉ⢌cR-o B ~ >dPKd+ &Sh '+y|?i ]N$io$')/n^{KI+ #cBe`g'{i{ϬgJZ]25e^mOTWMO7Es{"ݍUtΩK2xf#yx[挿'hd>W9$,,66:U @MyVH*+IqQyz4v j[ʟ<涜u \0q ^n;xʥ*9/tJ}uYsdžޢt4/ohU gϴy-s{)yx^3q:4~k?›ɰc6H˄Lӓ h[JK J]W퉋Ic)8rFl890&Fɬ>Т6\r/)nَkw/c>G%Ц ً-QE$."_O;6Rp=Ək.O ra2,y-v٥ 6BDkFl_,wB3ly=!=ɠj2O*ujtVe.mb]&yʖr;IڠpCQPm/Dm`%7:CU4+w(c9W3A‡k@0 *Ϥؼ=<3t(s2'@\O .9Ğ/~ktp JUH+](`7qU-H3nVA~8Wc`W\ lX3`'&{YMFF VL끭'A}ì [lNJHվP=D+'p/PUo 䙾59ܰ]gSQnrԖkfWR WF"ZWE0 uww7[2hd2AkN;2YckN9."ԚcO>iXa!EYǎ9LT%g+w7yc;moԑ|ӱypNg!F'Gut_ξ=_?uO-VVxXr݌ci$Oְ$F.0Br R ]mذceϿvcѾ,-oͧ&4׭)J4w>2&gpU$-:qc_:ٚ;7hy%i$cw>p>n|6me(Z#ٲGvYӖ [z%EHanLu{g ,|K%bu)KjXhy #5%ZujuRpnZq`)kȞW\X,+7W%ҐUV|֥=9j fQCIӄ>o* O3kZT˔ɓ.mdkh;U0Z>^2鑼h^TlC ŤhcԦ[Ţfz" @Kf>1B*1cb"'RA3<>4Wͺ%3Mu^+\Z2"c%M :2WhU!1dYbz[N)sawCZ Eiu6Ws8o_ aZл,wadԌ&r,v̴ٌz3%{9iFi!6PnF&m:xJLg;U:LkA; /G'Ic/p jC-";oljݑ:yvne6ZsmYS0L5Z y\b~QX`drVZ&H;)x̐WͯɷB|^[.QTJ+2.3)4aQ!Cc 0OIPeroUe&ଣ -آ"EBg {@Q.Q\ic.,l#U0{N`YΈ,ZYş*,.0Vm=iKmؽdcpzIYeQ-W4/\5u EN;>? n-׶Uf;s z/]Jf^Z#獿#j8;FnL[4] [YUYd%Z e7fHYMb֑v뙿k-CuM]:X-z>%CZwܪlX9 3ܐo}|D"Qe6kق0=`hݢC6U1GX`We͚e{+\kvС1Y59ѼRz$Uy뻂: | ?54WewVE̟g^>bʙoUϿb~A8INnEtP/}=ƻhۛi(6sMmݴCjږBOQxWJUS?37`2Iv3 8؃du?چ8WpX aXSNCX 2e=ez yn ԁ{$q]J|芞lT])v)T|U/[FsV?sfoR6ɗ-]mӼ7t:m2چᓢyb*jΉL >X,J~iո N*.d&e[f`_D85(-Ý]`(OvF;DXܖl;Ώ̔ ]O^ō#Zh259y)&P"m+9:rsӃMop6ۙl'+\Əҭ`HyIIbង 3W*GD[eNWv|R0=2tcizzN;8ԫhɬ)խ֤ziR9Yg)0h/*.ܣN$\,58Mૂ'+x }M;7eԈ >IDAS;eLE7VI32['a ޚL`/z^Jw.dJKZkJ,rbFhཚך)gj ;mƮJJҦ'5^c WIv`F}.+ _ 2< ?t&V\\Z<,IJ0j6˜"Rc֜Ux7˕ĩ6* *ṋ /*GShUmk;ۃ='L̯禇)P?T4՞cb> R}PhbJwIqbudҢ\UZ'{KSiE+wo_&Tvs?XS c֯  gҙg83n83Ι S{Igh?D;[Όvcu6>zٱ1s2RSWSy[y<~Y4.j&`*hy{+=Kj{I~}4 Nij] gwG3`FbLG?yiv'|Y? sK~eFkk9=yBJiέ11oUO C8m ëmՒ[ ݢCbq^ݿ[$V#7֔aֱ̙e !u;3`+_zݠHm[>\䁴R@r`< 'ߝ67Iv4XF eЫܪ*nECmj[8]OR 35L&ϤȲ2ͅ`,TK:G (Ƿk,4N7"*:=Jއ} o%`zxlQKD[U!b"\x]_K;}Y >1'@?!`h@:m_42qGӎ>>Yquwt~KEgJuH̭futuLG^S8G,2e^0+Y Bamm`']3`Kj01[$MŭA /ta` ZlFK쨅O&YIeD[e 6uxƃݕ7h*}~5a^TDD2Y v~(&nȽ7 jDzՎVujb<$b;2捂*)[M*.˄2cpFeŌQ uN>|^. ͉.-~v4f"A4jδ۹k4Fp+Tzf#Ə~k"7fA33\Td(8Q4T ?e6DJ)_EbV,|R?>`3H oge~OJA|el}0I_3>JK|F"J RßQ#wuDp9id;1]r.r4CSF N-#hh+h9BB-'5+a#V& zHa_+J~O>ozO8[v@[(3Np|mz/8{QT{ؒvN52p: AI_$ x#D ;>,_YM&+bJ "f_UT/"*r UP2xlIp)?RQ CTϫ%=&;e'3J+EmE6LkǾavh3 ^:t\VzdxtiQ{\|~›c9OEexr=s~A]MaR![3%.iim֖57$Ӝu}#kuy]~&'ku[6V6SKƭ;μ>rk$6{+W[ie$r!d rVHVJpmrI?FuZAVpIipzJx_)yee]/suڂ> endobj 108 0 obj << /Type /FontDescriptor /FontName /JLXLRQ+TimesNewRomanPSMT /Flags 32 /FontBBox [-568 -307 2000 1006] /ItalicAngle 0 /Ascent 891 /Descent -216 /CapHeight 662 /StemV 0 /Leading 42 /XHeight 447 /AvgWidth 401 /MaxWidth 2000 /FontFile2 109 0 R >> endobj 109 0 obj << /Length 110 0 R /Length1 50548 /Filter /FlateDecode >> stream xԼy|U7|-Uw'-}K:I7 ! $lA!avT$, Dn@EQphθuY`ttAt{~ݫ9{ιjeתE@@W oBh+}KKQXlL> 3yy 3yrqRdqh+=)\Ҳ Q?](I:XɿЍ]B A_ @礆h#IYyX*w'k:(%y˭߮U"Z\i2["߮愔9)|/zkrS]>$'"P"!U @z:&T@/QXA!^+gD'rJ͟ƍNT$ KOe "0O ˜+)=SgD~B̔/HRFdPtQ5Կ"L*g₿&V k,nppr qIH -}z @/{]8ߌ0X'P~$so&9oRG=B; ߇|.ۇ{QF~@ u7Rwuw#GRa҃Kq+31Th,J끹z"ͺme fڬCuJRXB  F[<'\ <Wb^0ے*-tYLñ]]ԯ3ELVWԙZ#Cr@=@4r&A nPzj=*f@/ Ť skv]['H:.u:raEBBj`X ǀ\(' r8'3(5ԌR4^Ps DQk•L:&#9#rF:ΡB (e' <_oG& ): s*tkkjJ:787<76w<<<|l 7Śvpmv&‰Xb zޘwe꾩G1U;u S ]_<\;'"Lj"ۇ˞D(}4T8IoFy/ P)Hc2d{˚@^WiISSO*My^ psx?z!0_98f ٓ8Bl%mT'1xHw+v%ܬJMS_N1>;xc #u8n%:SuƦ:cAW?1%T~ӔH7o70v,[ <۔pFd8k5>qJQB7ODw~Nq=WS`DP_Mwj&"ف蛾Я2/Bu9,,bI'=ȟA񣤆?Wso0οiVI'+?>}EkpI+:һ6#h-}aj uo;N•!?1srC_?` @|Ir;o_Py|T:GJ$&jV:o$%UOOy9|A};y7 =7qty( NJ8Z4~ 9+Z}2 YS} 8{_>uEn+ijPZ4;fwBxZ,ou[QtrJs[;FJW.yU ]%iVjkfikFk4&_Z֤5hZZ+jhNqeM M%FhsjN&4hHgK7h.N7?jOȥlCh;erPx6JSԗ3YTqQkkPڕw?wIm3ZRO䷦Jyb(15qҖC-o?:xre rB3`hFjxěoF_i6U4P?7zN>+(ZTOLppEkfѸWpAâ`C{a=uꥎT|o? _J_\TZퟧ x`~afyG7o6_ {mrܫW?rƋZ2u<~xhw[Jc1H0m)Cp\㦸Wat*UU-q$N _Xb%UWH`g4&LӒI4Vʩj7EּQÖlRf_jժV[ -}5\3 DWo[#ƟgWBgV @vU-u.f^Ll 0"D[_wX(np\V؊+"?YQ:jřxƜL0-ԕ:PW[UgMa @% gk]AV)EYɃ4^"GGr^XE )+i b퀩 ȃu&dπSSΩ C'?9 6:b"kfn /b*ry\Jȇ$F'juHƪO6UO~Net $VLɖcNbCOɇ44LB#bv3r|]G?nNnŞ+hrMQ;0cC'J\W܌cD%>!c42גиPKd.<lCNA SǛwѷF%8w/9DG %Bv8m}NإJkr:4D I z<{IR+U q&' oZ'lC/Zt2,'gO-ӡ⋪3Cw2}oB-RB}xQt.[6:@ߣ15c^ VTCոR.%RP'xE2͡Z'zFz7؟oz{ߵO CAϷ ABf ¯=ɪy=yo5\PjpR)'#?MW8:r!_$wQ2y| G/ݯm㸏K~dl<8f;c!OXރ!U)I[T_44󵯝;=X8:4I?JoK?Ь5tzyxp'K5;SFUx @Z:dz|M7m؎gEq͖ | sT;RcExnV'T;؛'d6rz'BZrշ2q5Xv3-w m)bU=CNNA!2=BHSJ1# g낟%dVGa9aϨ 0{ߐTI_\p7B5@l.Un=$IWI5Ƈ8ZMLl;Y7MB_6zHK;E. @ŭ ~#;0kn !o! S}@[D턈pȏ1wC}$Cbzd'MUzy,y8 wx0GMŜ2l^2x膡[ܡ.;ch7>RI6Z,U\,}I}ȣ0u9?Vu􈿃umxYc93JwmpCP'dž4,Z ٥QAtjxq1K$&Pzj'!KfʵcԌ5tD2QR\/,E#P0y=ynaͶY-d4tZZ% 0}H{J'M*<^A{ʇ ?l-e\-LKBK*jHMq!K^ 9[>KV)Jڈߏ| m5&^^_\D/*."HJك},UPrR Nepü- n(E/O5Ǖ&drz|JwY OCn/:sD ]ڒ )KOמr|šo~[聆{z6R[wϯڊkXxB{VЩo)%E7↰03e.c/tq=0x $_̖?UΫ۟Mz.^},7LRTJya4| |IK4F~Z)J-=.KƷH(4 KA_%Ka +9\`&aKB (8VW``C4Z@{ˀL#ޒ|wX;9v&^}AS Ii#f)ְ:Es7Ջ23| =<8L=G(͜pJ] c OPCS-<܂r)s_gZ ZbX P߄>)ft~}w3Oy=C8҉5Μӓ {== ==}R"t4@be?0twj­xLȸAz2yƜCp~n(>uu-|J)㥼7 <>UڻɄt+Rep*ev ebLVZb̖a|)8'V<Fd&D$xU `UD@@`hLLGv~ _I5S! Gze|Ї}q7ڏE`F܈ hn#E2- =(oBz ԰H[" 5z7 d!곑g aDp;9 a D;w"%G[CK}GFXNd%6 '8\GHb7`PD h%qX/H$ RRH{UzFC;ݧЎơ&Mi ,o/MWFɟ&_t[ʎcsp_">zD}hw\Ym%5[tϭ~}*g=ZέΛ[O~y##"7n(UVјGfQG[WVgLxu)wM=9iLSkмmd4D_\#Y3hQ٧8jQ3"WG\fftRtⷄ< *-13=?ί a`p'd`r>N(_`7%F35˄/[ BSFchBBu얬+_گ$jλZlZv8lhʲܜlare.UU^4鬭qɂilm\=zD{|NG m ew۳Rj@k?jdta,Awmity԰#R H.V:6>$wd03=gF6cvaemYv6Tݝ=mkj$Pth͉D#B"#sjm0E[_^c5e}zyx;h[]UXYzۋy~}JI*76,t /sZa'h"מk͑ZQ-a%my0Wl\ZXظo_Iӫ +^|;()&ߖ3sGLG] /'Hh(sktbd儲R@ɮ=]0=a{Z\܆5y+Ưt㦼7Oؖw퓞py|X:|BGCvIfaѿT̊ڒu,=-M4l7S};Z$܏Ont&g֜  PIf`j frdek`qu0&QdҺr$58u$?xҿB=~\_<Ƌ4{<a9;)1FMɂwѕ{S++K6E|K۔;.)*<"^5~u<62?$48W*sNٕO 7&?nJtb$_N뱃6N4Bd!;Kn-R:aA(lC!4mR~@.U;nwǒL8S]jhɌγmH)TT#=OeP[,&y3=6Fu1g|qȟ 9Z/ :{SQEJZe^6xssZ5Z\33;b@:]469}Zp Fa`DV$%:FfndKzOQ`KͨUbw3h7kavVnPw1 IVVKS5\[͜Yw[ ]ۀL5-Y9t.EOrzA:b1FCftBg s9=̪$互異$! CX{\,UJNP#C1+!rOH B+${UPYКښMz>qƦs駪ۿz"S>V-"inPTht PlչHD09鍜 /6ͅ|PS@[>~<@pؘZs=nAmiw2_'^k>6!k@*D}[*',|S~ (^g+Z0e0Pؔ[Wip7v]BCUgJOI2TUP]_16]\g^^Z+13Fg0X.WΖ^IHClA酆*(i-T鿱12\JeU&`+4,[M&lH7{͢i?B0r5I\ *sx|Xe\{b,C \fr4aWx5a?b-݅>aqCk*#oLCFٓ[LU趋0$>NuB!ܯ[t #mSDYU2MZ͉VS&I$j&&q*+(Ԛ܇i#=sϝƷ㪅;#acrʣ_͞ࢪa:nGWuz魭K@(i_Q> ,S}*Xc5dM77t`T\^F46ז<::\rɉK$l-b`'ir@(? \;@wȗ|0ꗌ#t) : @lBP?elkqo1+F x%c 6l;jc6g88i;݆ )=]NsZA% O$,?E*lD \b|1#G,]:9z\H5KͅuuN(L rD+Z/'/>|rjV3 e.\|ook9S6syӜw8xm"V߱qT~ΓlxpF!P7 XIQ6 h :61dFhQj#AXzS6ZU*``@t)0@V^fyu,ղjB5@H~$qk*ͣF:- vX,|T1_lDBEs tZ |jW:Dޞ.?w+{h$z̳AA!V|M_"\"ojM&{Xح ,Y\63/2OrE4Qҝ>v!d~@~?t,qӉ5 Az/"|+18O>1/AAYҘORf$dl]w=]SCpA9B\n]PuZdiQѧ/(бkTX3-8+֬]^tEڿ+j;n`wtҼ/߿ڷ5̹t;sN.\[uz{Oj@ kŵb6V Ag̩|sBvEh$brv[ro6bC 9dE B&, $ϖɶ^,NE9԰ t]@ m=YVGQ^Xg1%luAH=MH"i7 }*\QCh(" "*b)*˨:'!m7>wڒu2;W f8l7V- ~ NKiA:)2XiF%پFԡS11Zs~{,H8T%n5B_fwI#P(Q%-L*ybF}{T4*QfWTϰaX5:m,AxF?dZ0VVoTݨ3dfHhP_4bK`7}KgrfYjjg%zB~,ߜ ѵVe޲򲵁n}>){ܱXȑY/gՐ,62.kưJo0n4=NLI5vM:96(zirz[ #+i[Y~}XaA>u6sAU %W qM|Z]8X1#DLs瓄wxϏ\߅9=:pηy4<[#^'U5-F^xqwňͷ]DY7뚺㊛-y+gˮ^T4]=v!c$OIk~x/*.רk6\c\cwVZU:T',0]@T~*ڌ I%М TNuIuSv;1:rQ3^h>` A.h/-8Y `u4e9c?gs? &:EZ\TUi-΋#^>7sIOdAЅ/ 2'عwjZqd lkre(Lˮ?fO7lym/=vzgYuܚZoj[0[EC[5]]?mޟ۫g )X[Do7.',.ߚӛraS"I ~Ϝ}/M0i]c!KmJAI8>3ŀ ~β% f0m9pa^쀝oyaWj _!nCq7:9:!qWUB* o;tre0%%7wwG PaVCeH"WzWS<^F[T}Xq}AXQ=r^.UV\AխUR*UEU*֎b&:T SaO9ޢfL}FqLmrx\u*>D[isp@$> }ǡ!jl3T,?7_kyvN+,Y\. Z#H5uޞ ee˕c"UL(-t'S@\*؉a6$L&d++xfL؅׍9R#OA1 tQ.!ׄt,z't!V^RX {7̱ 3Mz=̓O|/4,ٮey'ShLK\z_ ?OZ$[vkMfQ~u&,.;ϑd W?R%+"b'SlWrHWz\<|muמ"ͼpI$әܺ<;W4nm%۝s,$O^^"e[:mGla8<] z@6h%yxI֙qe-,䡞lɦf`zwE"+."_ -dX LN;tJ{IZBŌP4Z.~2=ﴌfNVτui`%8?Um3?^K4E M8lv$/vUX/K,4xЈtXVuGrrLnl.^'E0"kk/lW色w]Fߍe%՜iÄt+,gۺxxK1xы94qB I`NxC5.H*YY$戔фfD z"eB9Q&De4*)#@ epS]x@@HW[-hFUeT=p.b3ƒhJ#6a^. ),ضcUw5 Wvxv[ƶSqn\tpeKXQz5/zw_tLQ)CV!CtM`e)faAVփؾ6ۄܟ \{# D? [U d aҩ07xw*'vITl142【:kTڻBZ d0f {8&.Px[EܟeW'LN-owӟ y}9n>VuĐ@T,v.P.vuQ[PA:}#j1+qʍz~Цׯ>WxI;s-VwK"]N ?ڹ;o{<2)NY2Yr rޭϿIGz[RVgoyr[af{MbZM`zFwzc&9cxVWd!i*П1ecqlm|I rەhWF]AAhT+j?_#0o|,ZK( C,1Hǜ7r[uYÁhX'pfr]1}.f}LKufΊH a_Q'o><:;>:)`VʵrŒH>؛< ٓZ8@ Z܁X5¼l>%MpF2f#'gF,/QƠa ڧMSYKN#[+,ˤmqzMlfr&'0gcCr2_\^yἦpc3H9ʼnMzQYj쨼U|R#^(sЗ8ECd+O+_܆h@]@/lZ.HP\u,|@ (f|RS?B%cXI^# Ų|g)Mcrǰ-wvLx|ɉm<I;(K{?,uAL`={1z}+).skD@G7p?s9t[ 9O&Y"!g3"ja$BqiF0إNJП|^3.3ʠ,]~e+cN1;13 Z>BAku@)Vp1;MNG6n+\7n'O>$)I#vڂ;::a pSqH&krڨq$?PS7]( v0fGWGSB&c6:m/3d9:UCԕFl k]tL::jFŢc "{]VEJ[shJ&bv`%dcui +k(aVf"̊4301qox@͙G7.{ K.3m  ."rx};/pR.|i@(+K+/pV#XˮwNdT77$"I5C>|R9W9&?=xmu"] g'ƕ$U`K?/tjv&miሔ7ҍߓB˖\ֹ0=϶J?yAhB>ԟc%f54B1ctǽīOF&:9!)T:*Bn2$Cۅ8)@*G"Y[TﮮvqL͎ȽɣTkJRKOdW0>cC[ۯqx1䇎oue`o_+\y>}Pd";'x wlrڙNo%ј6#Hlg${)?'jR/a>yF(rg0' H1ЇY_]iA ßn'xU٬NHE3TpXhUUdvUfalh m~ s0a;ܜLǒMM7J?hwb6lpsbXrv0w qչ뽅pKX=' {QɞP-C zT8>'7Xu b |F6vm;v 1-fYhekٜAB XVk 0@&{EIGۥ '%%Y$ ܧ, g!O(.iG{% vd+es=db  qؼ +ǘ Aj܊3&)z5 4({S`T)$ `PL%sֵJz*Z(ׇ[يvBZ"lhwZ [P?>xo8r+*h]dlH A[Iv 2Pva .йݑ&0gcFVM=LYb_ l:xu ,,w/׺[xx5]Doֈrbj,&sj+|r>˷̲a]$u-$ۥ[J|7+b bl`rl R*,c1u2VmN1 ڮ>pd%BYxi7oѯ^+~*"o_M/:7N?~u=7s$W4"z!RG"Qʱҽ2o]<ӡñ߻~HJ%HU*::,,Q]PW^A^cwv+ê!ȕi69 jM_ôlswIˮ5^:RBT YrxPRD*JU06GPӎzu;ya[vG2sD[-5+[ (^ԼP=E pDe4U7@o5 hK(؝ԁe|8Cz~% AE.*ʷnBp<i칀ٕs=ԉ:'VW4H:(W<ƒoqetAICI_i%7ao=|wՔM4eWŸ& y ARUR2C4+4$i>)߶߱k&_W7R|*gHy Spo4oyxu,?7Ɩ{ŝ7`n-֯ӫȚZ?I_{SMw_a'>EIПE(6[vZ)ҾiNoM,t|rTKpsss]sjQe"ǪŕƕO#ёܬIɸtyS|LˌƊ S,<>oAA1XDzZ1Lz^P>bqRL,liV8V]pWttt^ZviVi]G$]͡b?V}:{vEuo`q^';˽np>VrxA`6LqCIyF_?'PslI쳜Xw'.hYBd";7H$ A+Тh\%&Ue>ͶRwzn^yXͣ;e[^Oό.»wO.[3ga&oc/Щjw8tq+oyo+^bM4SʜOMbsxW+hZ^nգn IjAl1Z;&%Tk5/v0cKSNi:E\oçZ?x$\OG7hs>\U_.[FgCӛˁ8^Qd}|g@+J/Jnmd0eCq-b2G6G &jscS4I55PoϞWEɶRVC'Uz![(O3scjThC!+)VEYsrZѣG5 ?Pxt0 1Op̨+mϧ8֐H1 tZPŋn>m>Y![TSܺZ5?̸x )I;TdRLz} 1} 1} 1}u.Q2wٰ oLvB$a:8_uYn} ;?RI)eY{LmG!xKP_|_ m mXi'=w*3 Nja7t&IO5s~O%} ||fș?uҍ3*vڟ/up"5鵮)+XXa 7<=fwY=ueFXsZ6X qHc57np 6ƆPĉJs Lܔ2}3>s=d9)CPΤF2s4QR,`50MtTOCIk|ݮ{^tdh%Ή|t|TaҔks_`*eҿ5 MdtA`cp$uJɴYl/rMZju7֚Mj8_",wT *f쉔: Δ*"΀! 9N3"n-ɞ}Yr F'XT@:rZpp2&٥YH!8&iٔtAzdZ`*p:` Vk~iI'j8"lX11!Dhr,Iv`NE޸/|h;gCz>X{hRj?HsȂQd"C]JBc yl-~%̏M>3R)@ynH5IJw9x[<j *8o8-JfXr1s;{peL'g$q׎9c{^9߸[j/LbV DCnbN24;HQ`łEE$Ō f57nnH<`hN2|Me@a0{7wLnqopᎇT=~*TɁígϪ7Gᘧ5S*TkJSWRf:\>1l|>Qvgiw͸4jMp:520o,S47ƶ0F;WBjfQD#戅E47kǬգV?izt 7X+HuKAB{3A  4Y8 +=A05y-?OR5R#"0<"!n~6ʥY VmU^c|3CAy.ſN4w(&+EHNz!s)R _Dm:b.m8~l=hh,w~.X9—%jk(}}5 9s [r6[.g.]ɾ~Q5VF6&<4{$ϵF`)ǵꬨe Ak &t"m4L}fe2g;.BuLuZ׃P͟#@ Q 그S2vdc*@HNT8σ~MZ.iKYćpW?ag_ŽW4佊5xv/2}ܒ(͍k[ٜXek|GhⲮAEVqwSW-Zt::ʃʆbҝ?}Or |iP<\j|YddOXmm3BC W}x CA45zIw(hjKD%5)Psp*eeZ}>0r1Dt9*o=A\\gğ'1J^[ao c}j:ƻ iJl; ߢPWҕ$n<`zÎnfݮ5r'@š:N;N8$fC x+u&9@;AZ-w-@!lRN5$TC1!cl%)mQJ*O'E[#Er^RiR%sQTr0yw?=u:FA|_4pexQFJENk:HkbdmٗnkۖN/:}ǿ̿z9r듉߾z~љ}~!M[!#w&gX'T~4с,5$q؊'=9լND|RLs/A8L#c}P[*&L,4a }V)R$nFyo=8̵*jK:s#ycc[zQwҥ54AxV㭦O'k֦kVB-/7+}IB+;դ钨zԎ@xH]^^&_ެHG:m97o$|`8/Jc6cϟZl);\yCq1FqČkEw^Z?/ wQ`OBKfGGdQY:&#ђ,EU((HYuʜ18\2*r; LL{m8 qFJY 7[hX_zݴ2Rr0wŕ D+U%M7fKl3^1wzK&kxMs֢V)` @)UZv֏ Lk$ΣǙi+i fcѬ[l1iA؅(a J9'X#vy sO t `/BF=-C$~ ǡ8 sƯQJ­L}|[=-VC sqch׊+%rbKh,<.^4j jZXA/\kIMԒvV{uTP,Jɿܼ[yʝzs/4x=Ҹewyi Nr!4O̘'+Kĥ5u:ۻ;|o+P wG[Qo{V9M?@>at=麻58/699~nu7}b%}koFco0量 n h}h Rn6}F(Vh-W]E4ifgYH1o5t&О6hRx&[2@~Ex1NM+ӭQB "iZ9׃י.ToAƴ D[IנS V `k-%?lelLt(箍B!A9 ϚgIJ μ\,>p`ۯ4؛H&sU-aLbeCED#qY.1_ڧH jlUn]Ne:+F|wY46 ۉJBi]1ti f#ITw}Z)jY ץf)|;\vC.eSXmkI`L ׺^Ѭ&ױutom o{  -0R"zؚDap`,8H1xKnZN\߬mއgnL콛{-^%]j9{M I1k:i{/:1;jA+T^4k뷹}O' W^IRL~l7:VŎ*8nv$OoGj WIDS-cBCT@JIW)r9(eIWY15ǙKY֋]psʍ3X<bp twtvù5j3!.bPIhec*Z0Ԋ #1:- -1ac|0ãU]+TrEO C1vnx*,.D\b~&D:?J?$FΝM\U@ TA!ߜ $[4 G~" ,x*4ASLq`^>E\aYc>ݒ Ǿhi9&~<lU_};wҖRWl];=[>͞[4&þ\V%duLm X/Iv%kq y{6e;F}T kOӖ=$n B9-.ar9#i$bϥz K>ϑ ^ .+ 9>^0Xa(3%xU+/ X>!@yxN0mrrn*]6ڼ|/NJte.~e.re.嶧jB,ŏL)~d ws{~1@z0 G @[jsÆ,f$cGڀِơ7ԀqCF&h6TKJ;*D~E\L(+ҩb"wԪF:ܛh,bjOw Vp(c~ y0rL5YW&*TE_aT*crb?f(YB52q t=|.} h.f HqmOw ScI.YL9faH6K4*4zۧϝzr]]׺eCrQi_ȕkN1}voP{dgd9\Kz.&3<0_)'e &tQ!A'HqOF-!oq^+1xO$ ~|9>G,n/>NGTrפvB wlG rf6"1eݔYpS<@/8v5w@w  e|xq>S6LeeNgLjf$#jȐA} LFZkpĻQLx0-a5Zd۽S&ҋ#O!$|`!VjVp;2!ą#7;b#!6M-)t }ot˦~^~JWSat/ M]DuJZz5)@9o^gмOg.P N3}7Q74Aȑc_,aWfQKf==";󶂗A$~o"D脇KWOegG0x*i4+}$)yW{;>+p'qQf9.a0TcZk7`Zi4UzP|_ɬbcdPDp>QX[2FZKD$$|k/ f$%{T]4AdrJe.ӪAUW#Pt)E9O"wLoׇjCNEv%tO؁3I%p٘+c ' <F_ &Q2V\x#tS( @ۼ Kg44'Iƿ?RSC6Ԓ7<ӦR,J3vcrK! kFhc9`IVx + U.Gf'`$+kVRU;iɨ M_,<m!'4@{>- =ό $#ǡįJJm%bji+ F$!N$%Վɦ1+ ?o4l:Nd%uZkC8=nLҺPR7xj%?̷*oPMxNr9450=~|@A]jtS{ K+/ dRNw3D&,ƫWkGBȄ Q Q-+]0 smDFaT@u*&pAՇҥb/ǡp1mAz}~EbjY,X)tFaelFt R(ObMU6WEfX-elO7gVJY-NZ- jþ@MG vwr!y>@. P/ 9l\1<ݶށbl:ru8فFnMpoU|ڍh{?=n_ٕ˷ow^*( Wplת͔*kVkuGw1_I|s+׹uˍ+Wu j~PIGا'Zc4[Jc_//苢ܼ/a;f8Yx58'#G/W M,JLYRfw3W9.@f‰=oi9!ΙI(O/rZ+9RxzOZvqˬAݩ _^i֌+-E:JV5c>aC(ks#XvQ-@Rw1. 30pNa\ -q~d7F|@_6uA7lfi7Fge10UzL4&a878vL7 qKp鎷b1Ya)&+b";޺DעO4N#)Q2Ҕ0{-?zZCx3YA#,N BQAVLx|# fc6lZ"s%R,5?y0O)t6k;VV^sfy8mN%]yx+7 :{d\k ml|,L[Ʋ\iy⢽3Ġ҄sPۦS mIӛ4м3V(B%aJqph*bqQ >89 =A\4,_BaŞI?{0p8,XV72[f]迓}ƺԺF؛d$[nnZ+A_R41'SFMj{a. kW9%Ͽ`ӄFiI.m!bF zC(9D4S8,`($au(*ۂ6aA"45P::Λ=7 ] kH`)f/0N?f5 Q=T C`g‰O^}Ovo>yRã_-O~㞿on}g{ofFǙ?ή0< Z MMkyo-wF,Cް^΃*Cq`>QPABCAf}fd8 D4p?BG\.+lB'B얉&3y:N^#{6 fg✸] ]-ECP^7AD՟,W6+ƻl^5oeys*Uk-7o*aVG}ަkz`m8puglw m{|k緶&͵.4[+^4|U[&K͏zz3KM/N/=Aec4w~0x[&zpuI+eL&9xcS ߐ`CRVq#`jRT1f ǕtIz֭םr-@S5]~g]m=ͲҢۚ qYT^+i^rtG,NZ@ =ݜZ9ǠC;6XK9k㢈tr,zcc19G& L (f f0 '{Cx V*N8l>F5Q?_|UU4C:cbRUBtqb.Zb€AIS*qNǿ'~x|`bgx/+5OES{;[C=Ӆgo-]S?s~qsjrSċj0lܬbW ȪSTH z*@) ZٱBI] ҹPWi@pi@>Us N#1x/IngC9{sir3Ss]څP;nCmS .٠&|O_?Hb_xƏR=nc7<:ZQ(K/m&[Ϟ\|`s]xCFLeh}ě6;w^f 2Z qi6Yʮ ~l Nkh_©7_Rǹ1~׸7>c TqEJ[$&}Rwۢčw4):? } J<5sbllZd{/\mV^g_ K{lwIVk[[Q G >fWkfޠ{ұ.E$(WV!<~$NhFlҋww{}I4B-a og)pLB:IAork\( F M{ |zrQuS__[ ߙ%3]P,\|IϖK+R+.%(·1cm 4tfS @[Ry4s'uLF<GP[rP]\"ݠ`t=sՋ&Q2})baȩ LU*z;r;BPii+spW=Hhy*꤀y e R{EMrsxqS]ī@@r )fĝ>nh,_Šņ"^_:6TqhtwͩYt ĝV L݉%e0qfKp=[r)X&.3y Zs9bߐ+b%$OxhYB&u ס:3˕TZ^KycP+rXtgjuKJDwto|4ٛV[aI+Uʹfn}s[?ӟ_#b喥%ՅŻIeٻ w')_O)GG T>_yդ`2"Uf`.6 ҆ݶ=ݡ黳w&]ֻruݠ6bNfeJ0%N(j:Pb9!dg• '0:H#gnӴP6)|bԐ-VZB; 3r'Y􄪇¾P(\̡j %SML{`ܴN&^DɄ2$ =Ů= .C6}ץ;4-\ZLS֌_Fl_=IWz0O,j0lSL͏nږryz[fyOMNS<~|M/ע#E6I]𔢊>xE8^95BpI@s":*E/4թ#1$X C$x&`n =2^TW5!Ä;r N=Eή|u@gyo=YMsPt\BNBی8$g{C;,MXqgE|`Ę%3~PIg~S 1u\' Qĝ3`}[gPoXp̡W08,Tn @bB)(Y,===]]/^ "Ȏ-c j.3V[rZfo}pmƽ$RmЛ1z' r {&FErި v =ccViVtц9%B'H ר yi֘@@:; rTDo4%*GA<6n{Nv5>CJ+Xucu؁7|y'rJo N0$Wn]e:  \ Z[;\$~jC\ ZA ioYr[M(Lh@7oҷEeZ͇#89G/p&bkdeSi[`x岎teّ w}~Q2L4wV7) u+W\qI[ uxpM]L ǎLpk1Wp}li ܘg8,/(>D͇TRȇ" Cݘ"MkՎ6EZ>x3T ȡ6op|PX`r}ͮ~hQ]!|A@YV&7,}(z{}4Vc ["֒/f<T`}Ft "D(hI)|C˻_~9S‰7JNqM=L},j8w>P.b_G:#Vl)caIU/?!oX0عW *6MNw;B^[Y[t !c& B.7.]῟#V\4{v9b ֢d@ a` dAf; `@AQZ}{.g94 ܩYE1'U/:N3cB"]py~ln-ٴJ:eOq)n9(_=pUw.VK;O bN2Եl+^8sf)0prF=SpQ`b{֣Ko"F-xHdlaQ1Y|`#R$chۈzaMzsLS:/<>E )ۗevKD݅RsU普.-h%Y*=܎uLkZhqkIǯ4 &g*gN]4楲s(ŵ/˳iD;^=:pim~#@Xn;DP1Zy!,{_~pH5낟v41`{:ܩelafub#$0T89qO';j;(%XéOdYW}ڔh_{iK9@ 9wK6nA}sXgsl>b*՗x2K祝eA:&!^ߙU6&AFYN -xP#'1AQUꛥQG)PE.eB/ *C茣 a U>V+Uj\ԟ8_X+ &aT@,\Ca Y~?3HXjӪ+^{7 ANi endstream endobj 110 0 obj 37035 endobj 67 0 obj << /Type /Font /Subtype /TrueType /BaseFont /VUHJMZ+TimesNewRomanPSMT /FontDescriptor 111 0 R /ToUnicode 112 0 R /FirstChar 33 /LastChar 33 /Widths [ 564 ] >> endobj 112 0 obj << /Length 113 0 R /Filter /FlateDecode >> stream x]n D{b˻!E")N>Bƅ>@b f?;;`ɓc\aٓh;pަT5(d}M4PJw8=0ho=pUpAJᔯ{1,eߧrL?lpFc (ThuiY0NGkӵ]:-_|TsnSPby7op endstream endobj 113 0 obj 222 endobj 111 0 obj << /Type /FontDescriptor /FontName /VUHJMZ+TimesNewRomanPSMT /Flags 4 /FontBBox [-568 -307 2000 1006] /ItalicAngle 0 /Ascent 891 /Descent -216 /CapHeight 662 /StemV 0 /Leading 42 /XHeight 447 /AvgWidth 401 /MaxWidth 2000 /FontFile2 114 0 R >> endobj 114 0 obj << /Length 115 0 R /Length1 8172 /Filter /FlateDecode >> stream xY |TŹfM&;B -<6#@]DK/DQ0ŀJQZԂTlnJ`}TVR_-ssvA鯿{ww}|9ߙ9i~m#vhXk&뗛ڰ՝]{o-k^&g'YeI>mc+cK<|I_$k:V75ݘdkckZ5Z|}cʞ{Vlb{x&/O5 UD*xW0{GI}k VVf[]@gfK=AZ袳8J$5~ğhbx#>S\P+4nTG Y4^0[*CRH 8ciމWV*OLVFH. ٪3N#?ou(s\j|ٷF&GR#]}KE܉($66aBY0[JAq~C,b@`@AvAA{-SС<!I?y%Q_'ZZSthj\݀@mj;bA'h)z\X qnцCJ" Dn)Yf}ҦHzجzDD`,n@[w)7{'~WrqChl˻&J˟X5dxotuehmlHicI k*2E}ip>`x~vT̢5 djmM)mRRT'P~0لhZsZ{pJiijhۄD(EPfO^6$plFFO հNNTڢf[mm-mmG333+ÝQȨP5;[I,4 hl(q=nq5Q] 98$AUpY˂]YfAJRS RkFڟHh3S&q 9j:CP 4j5t%)}FRvR~.3Ql(WW{ )Mzi2[]0{R{ )zxW+)fq|ƉJm3,]WԢy^Ić -U1tNiŀr@@ᇁ58C8Tx^|@jP2ecgWLGʭ}YaKn䳁^6 nXNx9,=kfa", (! 8 h mu*\dNw~3tmӝ#QT;, dT8pήpp!'haOea_`wKu<0n k͹PsNМNps$f_}wJWl9si@^.=q*z^ d`'[U*)>>rd:8͡8@< Y66#Ū@4Ǚ1ͫdF* ݜkL, +Ýg Fu3hSѯ*Mo3GDXi<#}"C+iк~SN(j Յ=61Je Ws/wxS7K wuRn\4< 3K"ͺ(OQfXa;ѳl4y2Zna_'f_Z E`Z%j)le0֖BHp^P`A@`d[d3T wfU.}ǽs52O0~<D^I{(ΰ1, QvtS&^D4u0 _h54ޤt=}@qk; R3nOCU:U([BM>DKLKXISQtFw^0W|ZBEtd:Bj3M`4?2MMax_`'Z\U-=+f2xqFomB|q0"jut=U}~Eegy,O(ϩ7g{T gzނUSu̲/sc,6-gN`o7{TEreic,X[{0=3zy5#Gl۔sm{{j09FUϷ'Dp ]NDfCrrPyKԃXﵽ/!]-;Gh2k_3n ڋn +1M.]{=Ǟgody(r^Ϋx _7'c1L4ݎDVS-EnQh/O/qgGz^ٻޏMKEt<^܇vk- ƙtV]46A-B%lJkg?`ٝGVs~ƞD;;>d/861^>IisQ&f~=_zwn/ȶ1q%~.+\)Ŋ_Y,W6*'7nY[mmm~aǶsvWË_kQ(7Sx.Ez;xX-W3bb|D&gyBS)g+ ?R: +PfMr+@e1ß×/LNVNt O>4_ɷPX)|3-^UvB_vY%6]WI 2yK먙)Gx}Zw WE:Elj>_q9I`%;~t-|$rZVJtWdVP`=$|4J*"%LQTf;[?mT2-÷6/xFf-JEi|V̻\;jމJ>57[|\<:3Nj/P qȱϳ_}l Tz ˲S)Sԡj yf: 6UYӬxC9棦i=픫ݢ,%w bŐ.V*ʧ|?l 7tLIeEѣ Fy;|X!ك ߯++'#=aUkncDPFSI^A j!pv1. r?Ynjݝ`QZϬLݪ;Qx^Q6X4j֭F|3#JjL/QgzN6x *|pÉ)C1DGSt#RvN8XD|jЗ$Oͅ UY*n ^i`6ؚpђha!b#h-ĸE8]m -;:nw{焿6#{DЇ5ю 7o 攜]㍮rizcU 3à7yC44 > endobj 116 0 obj << /Type /FontDescriptor /FontName /IUYLJN+Courier /Flags 32 /FontBBox [-655 -409 1063 1090] /ItalicAngle 0 /Ascent 754 /Descent -246 /CapHeight 587 /StemV 76 /XHeight 457 /StemH 70 /MaxWidth 823 /FontFile2 117 0 R >> endobj 117 0 obj << /Length 118 0 R /Length1 12572 /Filter /FlateDecode >> stream x tՕϑd!۲dYޖdɖ'qB'!y $iCf(mB(4h B KY0 ]K3Ni(O֬ OG{-ͫUڡj*_JY_qfz+l }1[l4 k6ܑ{kW/_e]k0ktM̾\meeuq7.ߖ|u~W7?&[o}Su٭ݯUXݩ R%<&x^mp϶ΩhoP4ot?<7\yxT:EYn=ó\sR**KJ+xF2*Wz-U4ݣ^b4mKfUs/~۰* S,&8uս|bOmUPJY *}qkHKolR: ^{X/cI3eM|?23o՞bƕVខꁁcyx `CT 6,ޚ=5Pp~&40]byzsͅE,W99ꏄسfϑ h5~'7%K;3HF"Hu龫g@\WoUG3qfs]fy`.~0o{iˀ|*Yw=T)TW1Y&^誈]UL78+ϥ4V:KT\'횑jGZudG=-'?]]PRpjQyqiIIIֱe'_{bes,j06XOV.G*'+rѲ-+T]lc\-Ŗjit\+4hSn֜evoafOʢZ m_Uu1hSN|tr>FK3 RK'r\}'ZZu[GB.:]!tt\%a}S)>7e4P8a'6G"{o4DYoQY߷F[%òxB^ 7L2{Nz|>uoB}z+׻\5A cQ 5jy`w{)pe7ݏ25G1^vIfk|<σe7(M5fͰ>ځ 7y>fv7nfw3fv74 ʕ)}GHTݙ"vvsž='9a {Nsžy4wgqꞮMNG^^e3]*zYRsE˶\w{+-OSwo*̘Rbthʹ;^سn7GNҚS<!3⋰hTpm4G,A/HJDHqDJXŎr#ϰņ?C"%;Jbn+6bPqH hL0딮pvv#M&m]U,w;Nmvzj-֢š#=x%@2:HX~/h]a/{EC(!A һfdyc䷷-y]O--m)j3͘\_ΒdP]<Kz3[1>'Xڰv.+b"+b"+b<1^1^\ x;\I5bpe+;\WvÕp%wP^Mf-v/6XjXa unaJE/j kNZaMyT++\I"*VNVn 4q!"2ؒbipW-E~'_QO߲(W|s˝oΜ xEg/`I 7c9j3Ub/f9Шwȹ1KA,V/o]A :O!.ˁ -+]{|V7kF#% jJ"g-i GKJlKj5xHneY3TaHw7&L1\G; Q6.ylQTU~pc8+FM*nhU-6Vё;duDB@I@=TVe']Vt)+K fb]7߲kv3{֬Rh=V6_ޱ~λ_{ղG~oۖc^/*G9@c~#9_J 6cl尙P P P P P P  Pʜ~)9MW´(Z0{C4 ]ScQ 9q8?`E~nT< NA^zzzzz7oC/6qLNF,lV He\${%Hc.k6x ~ oG3T '$$&Ҙ Z !0^9$ ܌~ínp[?~í?ϭnEČ\vJJ̩+Y_ߝ7 ]W/k5|jizd6]8"d H뢑h&{ $ѐqXH"Gjl?2;}`a:9\~ܯ9CNvpA+a0. mJ,> ڸ=5ʵk׹#U0FQDŽ1HM24fP MqI*wL5v6"}G1(Azˣ Aؐ@/}KFɵ:{d$[gMiiߛbTwԦX;MXcWut7Lwk͆ ƒh4^oL"d"}B_n0FChz\aYbY{h7Ƭ~`ߌ~̞ErGixIMcc1 ü11T,:˺wa*hCz5744Y"KȃQ2&綋s=sL)x7*i+gzkY7xb?ƉD:*qN*qN*qN*qN*qN*qN*qN*qN*qN*qN*qJ7Nk3 S(s-%5V.=%ڞ&6f|Ty_,b(V$FtN"nvBKQS0+Y4Ty Skf/aO>M Y k4/ :fg߻`x0~/'H,(sI u!"Pvxx$V/a%kCǶ{ʃeT` N)1´.b1rGzcJ䎒1[́"c#I@JmAV)]hMcFQ{KӮmSF|Q(k}MԼ6֔JS'%<76LY&ִjs>q>!KUܷވs: >BVᣈp0*ٗRM]HF!1B0j ̬ly`w{)pgZ );µqπX:0 4 $x]HYďȤX9n1ct?zg/HlT4sg?͝mٴM綇Hξ)6=kC"aR9?Iu|"y`=,zzzzz1'yʪ EQhIF~TuOT:9 tr4i@'NN:9 t/i|T^z' /@cC"˨&wXH%z6걙dqi4Y\,.M&Kťdq|&O*9Atê]?H nm9rnh6Hc.k6x KGF?ReE4Tb~_|򉿦즩wL? \G9ξJM˚>+yW,mz"&/G,}X.0kܖ 5ץZ[vJ۲Nf%_mW, ڋ dRfggnnnnn<ݰmod0+)TgNge(VM7-%>&n f˶"m,`0D@D@D@D@D@!giv `aё >\ւm`7!xH50'e&PDd9i`-v}wP&6 9Oog|w2S…%~̂h1i-#P}k"M_[פsܹe垺Ǿ@wB[%ƪmWx|O~ Kztv*Nq)xPԟPjم_ٚ?PixXqn]tk xjtJЦ# v.Q3/ʠKj\ \6m.hsa.hsAː2d`3e*U2U9|s0`>9ρ @4+;9>j">dC>dC>dC>dC>dC>65FEDe$|1ײF= 5ږ{ Έٷ=.xغ b+/bxhڗdXH"}3stqFBsF*FiR? HYQ"tZ !aB\6iaO7) Iʮ=EfO46z{/u7-|j#ڡ {_/#a^P&i1;Gj R<v<I+-gq|K=<}1{Qep /WoeF6Q͘lMd)f Um:'N N3s9qqqqq7vZI[T,E;M =r &tkދ~=WS?=iK'O3kLm el*/8)3N>ZPۼ}`v`_$=pQۮZ g-15)J,0\?Cv YyZd8rw*;&61L==x8TeI8euƛ/߹bzc4rGtfo|3E}vvO4%,;HK7N~CdzShKSn!6%qT[}$f:/ruQ~rh*) UABNf "_--exBM/"s[_.[n6=(_mZvRuTy޺iؤilLG@3fENJӇ_԰)tLܬʘOK>jKy5`Ԭ[6߶nmW@~ ߐo?pT\6/38 8 >xRP `*Xp!H[U/[[c95=?q\Ҹq /y}_9ոq70^Bm^)~ endstream endobj 118 0 obj 7365 endobj 119 0 obj (Microsoft Word - ChIP-Seq_Tools-UsersGuide.doc) endobj 120 0 obj (Mac OS X 10.13.6 Quartz PDFContext) endobj 121 0 obj (Word) endobj 122 0 obj (D:20181001133000Z00'00') endobj 123 0 obj () endobj 124 0 obj [ ] endobj 1 0 obj << /Title 119 0 R /Producer 120 0 R /Creator 121 0 R /CreationDate 122 0 R /ModDate 122 0 R /Keywords 123 0 R /AAPL:Keywords 124 0 R >> endobj xref 0 125 0000000000 65535 f 0000275657 00000 n 0000006107 00000 n 0000117145 00000 n 0000000022 00000 n 0000006087 00000 n 0000006211 00000 n 0000009091 00000 n 0000136716 00000 n 0000188028 00000 n 0000222770 00000 n 0000194133 00000 n 0000156726 00000 n 0000006355 00000 n 0000009070 00000 n 0000015044 00000 n 0000009127 00000 n 0000015023 00000 n 0000015151 00000 n 0000267319 00000 n 0000199203 00000 n 0000024468 00000 n 0000015320 00000 n 0000024447 00000 n 0000024575 00000 n 0000000000 00000 n 0000208848 00000 n 0000000000 00000 n 0000117522 00000 n 0000182326 00000 n 0000031929 00000 n 0000024724 00000 n 0000031908 00000 n 0000032036 00000 n 0000039931 00000 n 0000032197 00000 n 0000039910 00000 n 0000040038 00000 n 0000048620 00000 n 0000040199 00000 n 0000048599 00000 n 0000048727 00000 n 0000057943 00000 n 0000048876 00000 n 0000057922 00000 n 0000058050 00000 n 0000066053 00000 n 0000058211 00000 n 0000066032 00000 n 0000066160 00000 n 0000073496 00000 n 0000117268 00000 n 0000066321 00000 n 0000073475 00000 n 0000073604 00000 n 0000083868 00000 n 0000073777 00000 n 0000083847 00000 n 0000083976 00000 n 0000090202 00000 n 0000084137 00000 n 0000090181 00000 n 0000090310 00000 n 0000097392 00000 n 0000090468 00000 n 0000097371 00000 n 0000097500 00000 n 0000260887 00000 n 0000102615 00000 n 0000097660 00000 n 0000102594 00000 n 0000102723 00000 n 0000116853 00000 n 0000102872 00000 n 0000116831 00000 n 0000116961 00000 n 0000117379 00000 n 0000117471 00000 n 0000118676 00000 n 0000117954 00000 n 0000118656 00000 n 0000118911 00000 n 0000136694 00000 n 0000137160 00000 n 0000137424 00000 n 0000156704 00000 n 0000157413 00000 n 0000157680 00000 n 0000182304 00000 n 0000182824 00000 n 0000182506 00000 n 0000182804 00000 n 0000183091 00000 n 0000188007 00000 n 0000188205 00000 n 0000188459 00000 n 0000194112 00000 n 0000194306 00000 n 0000194555 00000 n 0000199182 00000 n 0000199601 00000 n 0000199874 00000 n 0000208826 00000 n 0000209798 00000 n 0000209198 00000 n 0000209777 00000 n 0000210040 00000 n 0000222747 00000 n 0000223475 00000 n 0000223736 00000 n 0000260864 00000 n 0000261383 00000 n 0000261062 00000 n 0000261362 00000 n 0000261643 00000 n 0000267297 00000 n 0000267714 00000 n 0000267949 00000 n 0000275407 00000 n 0000275429 00000 n 0000275495 00000 n 0000275549 00000 n 0000275573 00000 n 0000275616 00000 n 0000275636 00000 n trailer << /Size 125 /Root 77 0 R /Info 1 0 R /ID [ <876debf88b4df41e34f1eb18b08a898e> <876debf88b4df41e34f1eb18b08a898e> ] >> startxref 275808 %%EOF chip-seq/README0000744022744200262270000001710413431523361013744 0ustar ambrosingr-bucherChIP-Seq ============================================================================ The ChIP-seq software provides methods for the analysis of ChIP-seq data and other types of mass genome annotation data. Description ============================================================================ DNA sequencing has recently been pushed to a new era with the development of massively parallel sequencing technologies. Chromatin Immuno Precipitation (ChIP) allows the enrichment of genomic DNA fragments based on their interaction with specific proteins. In combination with high-throughput sequencing (ChIP-seq) of these fragments, the technique generates millions of short sequence reads (generally 30 to 50 bp in length) that are subsequently mapped back to the reference genome. The ChIP-seq protocol generates thereby a comprehensive definition of genomic loci sharing a common binding site or a particular epigenetic modification. The exploitation of such high-throughput experiments calls consequently for the development of new computational tools for handling ChIP-Seq data as well as other types of next generation sequencing (NGS) data. We propose a set of useful tools performing common ChIP-Seq data analysis tasks, including positional correlation analysis, peak detection, and genome partitioning into signal-rich and signal-poor regions. These tools exist as stand-alone programs and perform the following tasks: 1. Positional correlation and generation of an aggregation plot for two genomic features (chipcor); 2. Extraction of specific genome annotation features around reference genomic anchor points (chipextract); 3. Read shifting (chipcenter); 4. Narrow peak caller that uses a fixed width peak size (chippeak); 5. Broad peak caller algorithm used for broad regions of enrichment (i.e. histone marks) (chippart); 6. Feature selection tool based on a read count threshold (chipscore). The C programs are primarily optimized for speed. For this reason, they use their own compact format for ChIP-Seq data representation called SGA (Simplified Genome Annotation). SGA is a single-line-oriented and tab-delimited format, very similar to BED, with the following five obligatory fields: 1. Sequence name/ID (Char String), 2. Feature (Char String), 3. Sequence Position (Integer), 4. Strand (+/- or 0), 5. Read Counts (Integer). Additional fields may be added containing application-specific information used by other programs. In the case of ChIP-seq data, SGA files represent genome-wide read count distributions from one or several experiments. The 'feature' field (identified by field 2) contains a short code which identifies an experiment. It often corresponds to the name of the molecular target of a ChIP-seq experiment. Sequences are identified by NCBI/RefSeq chromosome IDs, which are assembly specific in order to prevent mixing of different assemblies. The position field (field 3) represents the start position of the sequence read. The strand field indicates the strand to which the feature has been mapped. Read counts represent the number of sequence reads that have been mapped to a specific position in the genome. Input features may be ChIP-seq read positions, peaks identified by ChIP-peak, or any type of genome annotation that can be mapped to a single base on a chromosome. An example of SGA-formatted file is shown here below: NC_000001.9 H3K4me3 4794 + 1 NC_000001.9 H3K4me3 6090 + 1 NC_000001.9 H3K4me3 6099 + 1 NC_000001.9 H3K4me3 6655 + 1 NC_000001.9 H3K4me3 18453 - 1 NC_000001.9 H3K4me3 19285 + 1 NC_000001.9 H3K4me3 44529 + 1 NC_000001.9 H3K4me3 46333 + 1 NC_000001.9 H3K4me3 46349 - 1 NC_000001.9 H3K4me3 52929 + 1 NC_000001.9 H3K4me3 59412 + 1 ... Chip-Seq programs require SGA intput files to be sorted by sequence name, position, and strand. In the UNIX environment, the command to properly sort SGA files is the following: sort -s -k1,1 -k3,3n -k4,4 SGA is a generic format can be used to represent a large variety of genome annotations, e.g. the location of transcription start sites (TSS), matches to consensus sequences, or sequence conservation scores. Orientation-less features will be associated with a strand value of 0. An example of use of the ChIP-Seq correlation tool (chipcor) is the following: chipcor -A "H3K4me3 +" -B "H3K4me3 -" -b -1000 -e 1000 -w 1 -c 20 -n 1 H3K4me3.sga > H3K4me3_fc_n1.out Where 'H3K4me3.sga' is the file containing the ChIP-Seq sequence read distribution, which correspond to the H3K4me3 histon modification data. The '-c' option specifies the cut-off in input counts. Reads corresponding to histone modifications along the positive strand (option '-A "H3K4me3 +"') are correlated with reads corresponding to the same histone modification pattern on the opposite strand (option '-B "H3K4me3 -"'), and their relative distances are distributed in a histogram within the range [- 1000; + 1000] (options: '-b -1000', '-e 1000'). The output file (H3K4me3_fc_n1.out) contains all histogram entries in simple text format. Histogram entries show count density values (option '-n 1') of the target feature at relative distances to the reference features, namely all bin entries are normalized by the total number of reference read counts and the histogram window width. Such types of histograms are also called aggregation plots (APs). An aggregation plot shows the distribution of a particular genomic feature (e.g. a ChIP-seq signal) relative to a specified anchor point (e.g. a transcription start site) within a set of genomic regions. ---------------------------------------------------------------------------- ChIP-Seq has a web interface which is freely available at: http://ccg.vital-it.ch/chipseq/ Program Installation ============================================================================ For code compilation and data/code installation a suitable makefile is provided. - To create the executable files, type: make - To install the man pages you should have root permissions and type: make man - To install the executable files (default $binDir is ./bin), type: make install - To install the executable files system-wide (e.g. in /usr/lcal/bin), type: sudo make prefix=/usr/local install - To delete the excutable files and all the object files from the compilation directory, type: make clean - To delete the excutable files and all the object files from the $binDir directory, type: make uninstall # Man Pages - To install man pages system-wide, type: sudo make prefix=/usr/local install-man This command installs the chip-seq man pages in /usr/local/share/man/chip-seq/man1. # Data files needed for format-conversion tasks - To install data files needed for some format conversion programs, type: sudo make prefix=/usr/local install-dat This command installs the chr_NC_gi, chro_idx.nstorage, and chr_size files in /usr/local/share/chip-seq/. # ChIP-Seq User's Manual - To install the User's manual system-wide, please type: sudo make prefix=/usr/local install-doc This command will install the ChIP-Seq_Tools-UsersGuide.pdf file in /usr/local/share/chip-seq/doc/. The DATA Sub-directory ============================================================================ This directory contains a few data sets that can be used to run some tests. Examples on how to use the ChIP-Seq tools with these data are described in the user's guide (doc/ChipSeq_Tools-UsersGuide.pdf). From release 1.5.5, the data referred to in the user's guide have been put on our FTP-Site at: ftp://ccg.epfl.ch/chip-seq/data chip-seq/chippeak.1.gz0000744022744200262270000000230113046354223015343 0ustar ambrosingr-bucherؙXchippeak.1VMo6W t+!nKa]"s(hHJR17m_z]ޛ8J)cWosVFAxU9?;iГ]wmv8WAM%tTEOlvWS94Eh4Nye4~YSO>7ʿI~U>~fRcVnȌ!T4lh𓕤 / ^Jיo}0>ZvS?[Hw$WԨ^VTasJQԡC"3cf k xeEtv]$tMPgĕv4[eUQB.wRSvR٦!mE;yv}RGZ@h%ueŤnO`EvbY'5Z=NRG˽A|_*ղSi'I:z&.i}x(V1 Hp&'+<]A%ѽA BZ2 PNt?!0]L>|>-9c}hUtdW, &Zfwu[uR!7{VNe2. GEE9>__k|YEp͡"`vW{9C#9So".O]*=|QM0MsgWQT!Ld `g?NyF*3AznTw"DwIDnlۙ=Ks>;Ήeooo?W#ď}rla`CS'H2 j` chip-seq/chipcor.1.gz0000744022744200262270000000242113046354175015217 0ustar ambrosingr-bucher}ؙXchipcor.1VMo7W tPKMT$a9"́j ɵ ?o9^,k̛7;OY:ɟMu/.~J[m 2W>JO|󞮗Ddh9U5tomEBo̫ IYz#g1_ن/4[©foov>UfWmr{;"zꗛwK*t] jn}{>+'$F~JgA6yeJcnl#19QNIsrɗxwJES#+2큅lA`Dxy2vI-QS3\Y L* QUǹdcٝCisGfLtUE&DmABXWYP;B6UGY5}9fYվJVUNJ~{yˏdǡ͞”M%jq jv\u (e5`CW(XuuA/[!dž/dSj4BlWy +k?miuE޺rkk12db蘔r27F1`J=DSI3!<2=\_W?_x&v}Kr b 8G~p#2H98'q+ &Fed2X~'WaplT'+7( #~jY;VRRh.`9TH"}$E xSoZ(}'lcv({}\1?یYf({T2io^p8tiD fdf46)> Nƶ9qVйT+ B&BrFk%B$T역}xS ෻pux~UM^ z=3HByCV{TFd4;yhɓ-BgԿ1+g& = FŸAi^"Mw!=.$6`fEX >2tyEA*U0FP{2;fct?q}p/̻nQC.<ܬqP5**I[A{ƱsPxNgceX5QpNwjڥh$ #include #define HASH_LEN table->key_num #define HASH(x,y) hash_table_do_hash(x,y,HASH_LEN) /* forward declaration */ typedef struct hash_table_element hash_table_element_t; /** * @struct hash_table_element "hashtable.h" * @brief stores an hash table element for use in the hash table */ struct hash_table_element { /** * store the length in bytes of the key */ size_t key_len; /** * stores the length in bytes of the key (only for copy mode) */ size_t value_len; /** * pointer to the key */ void * key; /** * pointer to the value */ void * value; /** * next chained key for this hash */ hash_table_element_t * next; }; #define hash_table_element_s sizeof(hash_table_element_t) /** * @enum hash_table_mode defines the mode of operation of hash table */ typedef enum hash_table_mode{ /** copy mode here values as well as key is copied */ MODE_COPY, /** value reference mode, here ONLY key is copies and value is always referred */ MODE_VALUEREF, /** in this mode all keys and values are referred */ MODE_ALLREF } hash_table_mode_t; /** * @struct hash_table "hashtable.h" * @brief identifies the hashtable for which operations are to be performed */ typedef struct hash_table { /** * the hash table array where all values are stored */ hash_table_element_t ** store_house; /** * mode of the hash table */ hash_table_mode_t mode; /** * number of keys in the hash table */ size_t key_count; /** * number of keys allocated in the hash table */ uint32_t key_num; /** * the ratio of key_count / key_num at which the hash table should be expanded */ size_t key_ratio; } hash_table_t; #define hash_table_s sizeof(hash_table_t) /* element operations */ /** * Function to create a now hash_table element * @returns hash_table_element_t object when success * @returns NULL when no memory */ hash_table_element_t * hash_table_element_new(); /** * Function to delete an hash table element * @param table table from which element has to be deleted * @param element hash table element to be deleted */ void hash_table_element_delete(hash_table_t *, hash_table_element_t *); /** * Function that returns a hash value for a given key and key_len * @param key pointer to the key * @param key_len length of the key * @param max_key max value of the hash to be returned by the function * @returns hash value belonging to [0, max_key) */ uint32_t hash_table_do_hash(void * key, size_t key_len, uint32_t max_key); void MurmurHash3_x86_32 (const void *key, int len, uint32_t seed, void *out); /* hash table operations */ /** * Fuction to create a new hash table * @param mode hash_table_mode which the hash table should follow * @returns hash_table_t object which references the hash table * @returns NULL when no memory */ hash_table_t * hash_table_new(hash_table_mode_t); /** * Function to delete the hash table * @param table hash table to be deleted */ void hash_table_delete(hash_table_t *); /** * macro to add a key - value pair to the hash table * @note use this macro when size of key and/or value can be given by sizeof * @param table hash table to add element to * @param key pointer to the key for the hash table * @param value pointer to the value to be added against the key * @returns 0 on sucess * @returns -1 when no memory */ #define HT_ADD(table, key, value) hash_table_add(table, (void *) key, sizeof(*key), (void *) value, sizeof(*value)) /** * Function to add a key - value pair to the hash table, use HT_ADD macro * @param table hash table to add element to * @param key pointer to the key for the hash table * @param key_len length of the key in bytes * @param value pointer to the value to be added against the key * @param value_len length of the value in bytes * @returns 0 on sucess * @returns -1 when no memory */ int hash_table_add(hash_table_t *, void *, size_t, void *, size_t); /** * macro to remove an hash table element (for a given key) from a given hash table * @note use this macro when size of key and/or value can be given by sizeof * @param table hash table from which element has to be removed * @param key pointer to the key which has to be removed * @returns 0 on sucess * @returns -1 when key is not found */ #define HT_REMOVE(table, key) hash_table_remove(table, key, sizeof(*key)) /** * Function to remove an hash table element (for a given key) from a given hash table * @param table hash table from which element has to be removed * @param key pointer to the key which has to be removed * @param key_len size of the key in bytes * @returns 0 on sucess * @returns -1 when key is not found */ int hash_table_remove(hash_table_t *, void *, size_t); /** * macro to lookup a key in a particular table * @param table table to look key in * @param key pointer to key to be looked for * @returns NULL when key is not found in the hash table * @returns void* pointer to the value in the table */ #define HT_LOOKUP(table, key) hash_table_lookup(table, key, sizeof(*key)) /** * Function to lookup a key in a particular table * @note use this macro when size of key and/or value can be given by sizeof * @param table table to look key in * @param key pointer to key to be looked for * @param key_len size of the key to be searched * @returns NULL when key is not found in the hash table * @returns void* pointer to the value in the table */ void * hash_table_lookup(hash_table_t *, void *, size_t); /** * macro to look if the exists in the hash table * @note use this macro when size of key and/or value can be given by sizeof * @param key pointer to key to be looked for * @returns 0 when key is not found * @returns 1 when key is found */ #define HT_HAS_KEY(table, key) hash_table_has_key(table, key, sizeof(*key)) /** * Function to look if the exists in the hash table * @param key pointer to key to be looked for * @param key_len size of the key to be searched * @returns 0 when key is not found * @returns 1 when key is found */ int hash_table_has_key(hash_table_t *, void *, size_t); /** * Function to return all the keys in a given hash table * @param table hash table from which key are to be reterived * @param keys a void** pointer where keys are filled in (memory allocated internally and must be freed) * @return total number of keys filled in keys */ size_t hash_table_get_keys(hash_table_t *, void ***); /** * Function to get all elements (key - value pairs) from the given hash table * @param table hash table from which elements have to be retrieved * @param elements a pointer to an array of hash_table_element_t pointer (malloced by function) * @returns 1 when no memory * @returns count of elements */ size_t hash_table_get_elements(hash_table_t *, hash_table_element_t ***); /** * Function to resize the hash table store house * @param table hash table to be resized * @param len new length of the hash table * @returns -1 when no elements in hash table * @returns -2 when no emmory for new store house * @returns 0 when sucess */ int hash_table_resize(hash_table_t *, size_t); /** * Function to iterate through all elements of the hashtable * @param table hash table to be iterated * @param fct pointer to a function returning 1 if the element has to be removed * @param user arbitrary user pointer passed to the fct callback * @returns 0 when success */ int hash_table_iterate(hash_table_t *table, int (*fct)(void *user, void *value, void *key, size_t key_len), void *user); #endif chip-seq/COPYING0000744022744200262270000010451312453222524014120 0ustar ambrosingr-bucher GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . chip-seq/INSTALL0000744022744200262270000000160713351221324014111 0ustar ambrosingr-bucherProgram Installation ============================================================================ For code compilation a suitable makefile is provided. - To create the executable files, type: make - To install the man pages (in /usr/share/man/man1/) you should have root permissions and type: make man - To install the executable files and perl scripts (default $binDir is ./bin.x86_64), type: make install - To delete the excutable files and all the object files from the root and tools directories, type: make clean - To delete all installed applications in $binDir, type: make cleanbin External packages ============================================================================ For Bowtie or fetchGWI, refer to the following links: - bowtie http://bowtie-bio.sourceforge.net/index.shtml - fetchGWI http://sourceforge.net/projects/tagger/files/fetchGWI-tagger/ chip-seq/chippeak.c0000744022744200262270000012123713351174534015024 0ustar ambrosingr-bucher/* chip_peak.c Signal Peak Tool. The program locates signal peaks within a SGA file. # Arguments: # feature type, strand, integration range (Win1) # minimal distance (Win2), counts threshold (Thres) Giovanna Ambrosini, EPFL/ISREC, Giovanna.Ambrosini@epfl.ch Copyright (c) 2014 EPFL and Swiss Institute of Bioinformatics. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ //#define DEBUG #define _GNU_SOURCE #include #include #include #include #include #include #include #include "hashtable.h" #ifdef DEBUG #include #endif #include "version.h" #define BUF_SIZE 1024 /*#define BUF_SIZE 4096*/ /*#define BUF_SIZE 8192 */ #define LINE_SIZE 1024 #define FT_MAX 64 #define SEQ_ID 32 #define POS_MAX 16 #define CNT_MAX 16 #define EXT_MAX 256 #define CHR_NB 18 #define AC_MAX 18 #define CHR_SIZE 10 typedef struct _options_t { int help; int debug; int db; char *dbPath; int refine; int oriented; } options_t; static options_t options; typedef struct _feature_t { char seq_id[SEQ_ID]; char *ft; char ft_str; char **name; unsigned long *pos; int *cnt; unsigned long *npo; int *nct; int *ptr; } feature_t, *feature_p_t; feature_t ref_ft; feature_t ref_ft_plus; feature_t ref_ft_minus; typedef struct _locmax_t { unsigned long *pos; int *cnt; char **name; } locmax_t, *locmax_p_t; locmax_t lm_plus; locmax_t lm_minus; int strand_flag = 0; static hash_table_t *size_table = NULL; char *Feature = NULL; int Win1 = 0; int Win2 = 0; int Thres = 50; int Coff = 1; int process_size() { FILE *input; int c; char buf[LINE_SIZE]; char *chrSizeFile; int cLen; if (options.db) { cLen = (int)strlen(options.dbPath) + 10; if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrSizeFile, options.dbPath); } else { cLen = 16 + 10; if ((chrSizeFile = (char*)malloc(cLen * sizeof(char))) == NULL) { perror("process_ac: malloc"); exit(1); } strcpy(chrSizeFile, "/local/db/genome"); } strcat(chrSizeFile, "/chr_size"); input = fopen(chrSizeFile, "r"); if (input == NULL) { fprintf(stderr, "Could not open file %s: %s(%d)\n", chrSizeFile, strerror(errno), errno); return 1; } do { c = fgetc(input); } while(c != '\n'); size_table = hash_table_new(MODE_COPY); while (fgets(buf, LINE_SIZE, input) != NULL) { char *s; char chr_size[CHR_SIZE] = ""; char ncbi_ac[AC_MAX] = ""; int i = 0; int ac_len = 0; int size_len = 0; /*int valid = 1;*/ s = buf; /* Check line */ /* Get first character: if # skip line */ if (*s == '#') continue; /* Chromosome NCBI AC */ i = 0; while (*s != 0 && !isspace(*s)) { if (i >= AC_MAX) { fprintf(stderr, "AC too long \"%s\" \n", s); fclose(input); exit(1); } ncbi_ac[i++] = *s++; } if (i < AC_MAX) ncbi_ac[i] = 0; ac_len = i + 1; while (isspace(*s)) s++; i = 0; /* Chrom SIZE */ while (*s != 0 && !isspace(*s)) { if (i >= CHR_SIZE) { fprintf(stderr, "Size too long in %s\n", s); fclose(input); exit(1); } chr_size[i++] = *s++; } if (i < CHR_NB) chr_size[i] = 0; size_len = i + 1; /* printf("adding key %s (len = %d) value %s (ac) (len = %d) to hash table\n", ncbi_ac, ac_len, chr_size, size_len); */ hash_table_add(size_table, ncbi_ac, (size_t)ac_len, chr_size, (size_t)size_len); if (options.debug) { char *csize = hash_table_lookup(size_table, ncbi_ac, (size_t)ac_len); fprintf (stderr, " SIZE Hash table: %s (len = %d) -> %s (len = %d)\n", ncbi_ac, ac_len, csize, size_len); } } return 0; } void locate_peaks(int len) { /* Compute sum in window Win1; store high values in shorter arrays (npo, nct) */ unsigned long long sum = 0; int cnts = 0; int i, k; int j = 0; size_t mLen = BUF_SIZE; unsigned int size; int *lm; char ft_name[FT_MAX + 4]; char str = '0'; int ac_len = 0; char *csize = NULL; int chr_size = 0; if (ref_ft.ft_str != '\0') { str = ref_ft.ft_str; } if (ref_ft.ft != NULL) { strcpy(ft_name, ref_ft.ft); strcat(ft_name, "_p"); } if (options.refine) { /* Get Chromosome size */ ac_len = (int)strlen(ref_ft.seq_id) + 1; csize = hash_table_lookup(size_table, ref_ft.seq_id, (size_t)ac_len); if (csize != NULL) { chr_size = (int) atoi(csize); } } for (i = 1; i <= len; i++) { sum = ref_ft.cnt[i]; /* Sum up all tag counts within the window range +-Win1/2 */ for (k = i - 1; (long)ref_ft.pos[k] >= (long)(ref_ft.pos[i] - Win1/2); k--) { sum += ref_ft.cnt[k]; } for (k = i + 1; ref_ft.pos[k] <= ref_ft.pos[i] + Win1/2; k++) { sum += ref_ft.cnt[k]; } if ((unsigned int)j >= mLen - 1) { mLen *= 2; if (( ref_ft.npo = (unsigned long *)realloc(ref_ft.npo, mLen * sizeof(unsigned long))) == NULL) { perror("locate_peaks: realloc"); exit(1); } if (( ref_ft.nct = (int *)realloc(ref_ft.nct, mLen * sizeof(int))) == NULL) { perror("locate_peaks: realloc"); exit(1); } if (( ref_ft.ptr = (int *)realloc(ref_ft.ptr, mLen * sizeof(int))) == NULL) { perror("locate_peaks: realloc"); exit(1); } } if ( sum >= (unsigned long long)Thres) { j++; ref_ft.npo[j] = ref_ft.pos[i]; ref_ft.nct[j] = sum; ref_ft.ptr[j] = i; } } /* Initialize Local Maxima Array lm */ size = (unsigned int)j + 1; if (( lm = (int*)calloc((size_t)size, sizeof(size_t))) == NULL) { perror("locate_peaks: calloc"); exit(1); } /* Keep only one maximum value (peak) within a vicinity range +-Win2 */ /* Record local maxima in lm flag Array */ /* We distinguish three different cases : - local maxima within Win2 distance in forward direction (lm = 1) - local maxima within Win2 distance in backward direction (lm = 2) - local maxima within +-Win2 in both forw/back directions (lm = 3) */ /* Select maxima forward path (alike segmentation algorithm) */ int max = 1; for (i = 2; i <= j; i++) { if (ref_ft.npo[i] > ref_ft.npo[max] + Win2) { /* if the distance between two local maxima (i, max) is greater than Win2 keep pos max as a local maxima and increment lm flag */ lm[max]++; max = i; } else if (ref_ft.nct[i] > ref_ft.nct[max]) { /* Else, max is not a local maxima */ max = i; } } lm[max]++; /* Select maxima backward path */ max = j; for (i = j - 1; i >= 0; i--) { if (ref_ft.npo[i] < ref_ft.npo[max] - Win2) { /* if the distance between two local maxima (i, max) is greater than Win2 keep pos max as a local maxima and increment by 2 lm flag */ lm[max] += 2; max = i; } else if (ref_ft.nct[i] >= ref_ft.nct[max]) { /* Else, max is not a local maxima */ max = i; } } lm[max] += 2; /* Print out local maxima positions */ /* Only positions with lm[i]=3 should be considered as signal peaks */ for (i = 1; i <= j; i++) { #ifdef DEBUG if (ref_ft.ft != NULL) { fprintf(stderr,"dbg: %s\t%s\t%lu\t%c\t%d\t%d\n", ref_ft.seq_id, ft_name, ref_ft.npo[i], str, ref_ft.nct[i], lm[i]); } else { fprintf(stderr,"dbg: %s\t%s\t%lu\t%c\t%d\t%d\n", ref_ft.seq_id, ref_ft.name[i], ref_ft.npo[i], str, ref_ft.nct[i], lm[i]); } #endif if (lm[i] == 3) { if (!options.refine) { if (ref_ft.ft != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n", ref_ft.seq_id, ft_name, ref_ft.npo[i], str, ref_ft.nct[i]); } else { printf("%s\t%s\t%lu\t%c\t%d\n", ref_ft.seq_id, ref_ft.name[i], ref_ft.npo[i], str, ref_ft.nct[i]); } } else { double sum2; /* Refine peak positions */ /* printf("\ndbg: Refine peak pos for POS %d i=%d : \tk=%d : ref_ft.cnt %d\tref_ft.pos %d\n", ref_ft.npo[i], i, ref_ft.ptr[i], ref_ft.cnt[ref_ft.ptr[i]], ref_ft.pos[ref_ft.ptr[i]]); */ sum2 = (double)(ref_ft.npo[i] * ref_ft.cnt[ref_ft.ptr[i]]); cnts = ref_ft.cnt[ref_ft.ptr[i]]; /* printf("\ndbg: INIT: sum %llu cnts %d \n\n", sum, cnts); */ /* Recompute peak position within the window range +-(Win1/2) */ for (k = ref_ft.ptr[i] - 1; ref_ft.pos[k] >= ref_ft.npo[i] - Win1/2; k--) { /* printf("i=%d ref_ft.npo %d\tk=%d : ref_ft.cnt %d\tref_ft.pos %d\n", i, ref_ft.npo[i], k, ref_ft.cnt[k], ref_ft.pos[k]); */ sum2 += (double)(ref_ft.cnt[k]*ref_ft.pos[k]); cnts += ref_ft.cnt[k]; } for (k = ref_ft.ptr[i] + 1; ref_ft.pos[k] <= ref_ft.npo[i] + Win1/2; k++) { /* printf("i=%d ref_ft.npo %d\tk=%d : ref_ft.cnt %d\tref_ft.pos %d\n", i, ref_ft.npo[i], k, ref_ft.cnt[k], ref_ft.pos[k]); */ sum2 += (double)(ref_ft.cnt[k]*ref_ft.pos[k]); cnts += ref_ft.cnt[k]; } /* printf("\ndbg: OLD POS %d : \t", ref_ft.npo[i]); printf("SUM = %llu CNTS = %d NEW POS : %int \n\n", sum2, cnts, (int)(sum2/cnts)); */ ref_ft.npo[i] = (unsigned long)(sum2/cnts); /* Check Chromosome Boundaries */ if (chr_size) { if (ref_ft.npo[i] > (unsigned int) chr_size) { fprintf(stderr, "WARNING: peak position %s\t%lut%c\t%d goes beyond chromosome boundaries (chrom size = %d)\n", ref_ft.seq_id, ref_ft.npo[i], str, ref_ft.nct[i], chr_size); continue; } } if (ref_ft.ft != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n", ref_ft.seq_id, ft_name, ref_ft.npo[i], str, ref_ft.nct[i]); } else { printf("%s\t%s\t%lu\t%c\t%d\n", ref_ft.seq_id, ref_ft.name[i], ref_ft.npo[i], str, ref_ft.nct[i]); } } /* If refine peak pos */ } } free(lm); } int locate_peaks_oriented(feature_p_t feat, locmax_p_t lmax, char strand, int len) { /* Compute sum in window Win1; store high values in shorter arrays (npo, nct) */ unsigned long long sum = 0; int cnts = 0; int i, k = 0; int j = 0; size_t mLen = BUF_SIZE; unsigned int size; int *lm; char ft_name[FT_MAX + 4]; int ac_len = 0; char *csize = NULL; int chr_size = 0; /* Feature name and strand are only passed for debugging reasons: they can be omitted */ if (ref_ft.ft != NULL) { strcpy(ft_name, ref_ft.ft); strcat(ft_name, "_p"); } if (options.refine) { /* Get Chromosome size */ ac_len = (int)strlen(feat->seq_id) + 1; csize = hash_table_lookup(size_table, feat->seq_id, (size_t)ac_len); if (csize != NULL) { chr_size = (int) atoi(csize); } } /*printf("locate_peaks_oriented : strand %c len %d feature %s Win %d\n", strand, len, ft_name, Win1);*/ /* for (i = 1; i <=10; i++) { printf ("%s ft pos: %lu ft cnts: %d\n", ft_name, feat->pos[i], feat->cnt[i]); } */ for (i = 1; i <= len; i++) { /* Loop on Feature Array */ sum = feat->cnt[i]; /* Sum up all tag counts within the window range +-Win1/2 */ for (k = i - 1; (long)feat->pos[k] >= (long)(feat->pos[i] - Win1/2); k--) { sum += feat->cnt[k]; } for (k = i + 1; feat->pos[k] <= feat->pos[i] + Win1/2; k++) { sum += feat->cnt[k]; } if ((unsigned int)j >= mLen - 1) { mLen *= 2; if (( feat->npo = (unsigned long *)realloc(feat->npo, mLen * sizeof(unsigned long))) == NULL) { perror("locate_peaks_oriented: realloc"); exit(1); } if (( feat->nct = (int *)realloc(feat->nct, mLen * sizeof(int))) == NULL) { perror("locate_peaks_oriented: realloc"); exit(1); } if (( feat->ptr = (int *)realloc(feat->ptr, mLen * sizeof(int))) == NULL) { perror("locate_peaks_oriented: realloc"); exit(1); } } /*printf ("feature %d : pos: %lu j: %d SUM: %llu Thres: %d\n", i, feat->pos[i], j, sum, Thres);*/ if ( sum >= (unsigned long long)Thres) { j++; feat->npo[j] = feat->pos[i]; feat->nct[j] = sum; feat->ptr[j] = i; } } /* Initialize Local Maxima Array lm */ size = (unsigned int)j + 1; if (( lm = (int*)calloc((size_t)size, sizeof(size_t))) == NULL) { perror("locate_peaks: calloc"); exit(1); } /* Keep only one maximum value (peak) within a vicinity range +-Win2 */ /* Record local maxima in lm flag Array */ /* We distinguish three different cases : - local maxima within Win2 distance in forward direction (lm = 1) - local maxima within Win2 distance in backward direction (lm = 2) - local maxima within +-Win2 in both forw/back directions (lm = 3) */ /* Select maxima forward path (alike segmentation algorithm) */ int max = 1; for (i = 2; i <= j; i++) { if (feat->npo[i] > feat->npo[max] + Win2) { /* If the distance between two local maxima (i, max) is greater than Win2 keep pos max as a local maxima and increment lm flag */ lm[max]++; max = i; } else if (feat->nct[i] > feat->nct[max]) { /* Else, max is not a local maxima */ max = i; } } lm[max]++; /* Select maxima backward path */ max = j; for (i = j - 1; i >= 0; i--) { if (feat->npo[i] < feat->npo[max] - Win2) { /* If the distance between two local maxima (i, max) is greater than Win2 keep pos max as a local maxima and increment by 2 lm flag */ lm[max] += 2; max = i; } else if (feat->nct[i] >= feat->nct[max]) { /* Else, max is not a local maxima */ max = i; } } lm[max] += 2; /* Store local maxima positions into locmax Array */ /* Only positions with lm[i]=3 should be considered as signal peaks */ unsigned int n = 0; mLen = BUF_SIZE; for (i = 1; i <= j; i++) { #ifdef DEBUG2 if (ref_ft.ft != NULL) { fprintf(stderr,"dbg: %s\t%s\t%lu\t%c\t%d\t%d\n", feat->seq_id, ft_name, feat->npo[i], strand, feat->nct[i], lm[i]); } else { fprintf(stderr,"dbg: %s\t%s\t%lu\t%c\t%d\t%d\n", feat->seq_id, feat->name[i], feat->npo[i], strand, feat->nct[i], lm[i]); } #endif if (n >= mLen - 1) { mLen *= 2; if ((lmax->pos = (unsigned long *)realloc(lmax->pos, mLen * sizeof(unsigned long))) == NULL) { perror("locate_peaks_oriented: realloc"); exit(1); } if ((lmax->cnt = (int *)realloc(lmax->cnt, mLen * sizeof(int))) == NULL) { perror("locate_peaks_oriented: realloc"); exit(1); } if (ref_ft.ft == NULL) { if ((lmax->name = (char**)realloc(lmax->name, mLen * sizeof(*(lmax->name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } } if (lm[i] == 3) { if (!options.refine) { if (options.debug) { if (ref_ft.ft != NULL) { printf("%s\t%lu\t%c\t%d\n", ft_name, feat->npo[i], strand, feat->nct[i]); } else { printf("%s\t%lu\t%c\t%d\n", feat->name[i], feat->npo[i], strand, feat->nct[i]); } } lmax->pos[n] = feat->npo[i]; lmax->cnt[n] = feat->nct[i]; if (ref_ft.ft == NULL) { lmax->name[n] = malloc(strlen(feat->name[i]) + 1); strcpy (lmax->name[n], feat->name[i]); } n++; } else { double sum2; /* Refine peak positions */ sum2 = (double)(feat->npo[i] * feat->cnt[feat->ptr[i]]); cnts = feat->cnt[feat->ptr[i]]; /* printf("\ndbg: INIT: sum %llu cnts %d \n\n", sum, cnts); */ /* Recompute peak position within the window range +-(Win1/2) */ for (k = feat->ptr[i] - 1; feat->pos[k] >= feat->npo[i] - Win1/2; k--) { sum2 += (double)(feat->cnt[k]*feat->pos[k]); cnts += feat->cnt[k]; } for (k = feat->ptr[i] + 1; feat->pos[k] <= feat->npo[i] + Win1/2; k++) { sum2 += (double)(feat->cnt[k]*feat->pos[k]); cnts += feat->cnt[k]; } /* printf("\ndbg: OLD POS %d : \t", feat->npo[i]); printf("SUM = %llu CNTS = %d NEW POS : %int \n\n", sum2, cnts, (int)(sum2/cnts)); */ feat->npo[i] = (unsigned long)(sum2/cnts); /* Check Chromosome Boundaries */ if (chr_size) { if (feat->npo[i] > (unsigned int) chr_size) { fprintf(stderr, "WARNING: peak position %s\t%lu\t%c\t%d goes beyond chromosome boundaries (chrom size = %d)\n", feat->seq_id, feat->npo[i], strand, feat->nct[i], chr_size); continue; } } if (options.debug) { if (ref_ft.ft != NULL) { printf("%s\t%lu\t%c\t%d\n", ft_name, feat->npo[i], strand, feat->nct[i]); } else { printf("%s\t%lu\t%c\t%d\n", feat->name[i], feat->npo[i], strand, feat->nct[i]); } } lmax->pos[n] = feat->npo[i]; lmax->cnt[n] = feat->nct[i]; if (ref_ft.ft == NULL) { lmax->name[n] = malloc(strlen(feat->name[i]) + 1); strcpy (lmax->name[n], feat->name[i]); } n++; } /* If refine peak pos */ } } free(lm); return n; } void merge_peaks(int m, int n, char *seq_id, char *ftname) { char ft_name[FT_MAX + 4]; int i, k, j; if (ftname != NULL) { strcpy(ft_name, ftname); strcat(ft_name, "_p"); } /*printf ("merge_peaks: m %d, n %d, seq_id %s , feature %s\n", m, n, seq_id, ft_name); */ k = j = 0; for (i = 0; i < m + n;) { if (k < m && j < n) { if (lm_plus.pos[k] < lm_minus.pos[j]) { if (ftname != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, ft_name, lm_plus.pos[k], '+', lm_plus.cnt[k]); } else { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, lm_plus.name[k], lm_plus.pos[k], '+', lm_plus.cnt[k]); } k++; } else { if (ftname != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, ft_name, lm_minus.pos[j], '-', lm_minus.cnt[j]); } else { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, lm_minus.name[j], lm_minus.pos[j], '-', lm_minus.cnt[j]); } j++; } i++; } else if (k == m) { /* print lm_minus array */ for (; i < m + n;) { if (ftname != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, ft_name, lm_minus.pos[j], '-', lm_minus.cnt[j]); } else { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, lm_minus.name[j], lm_minus.pos[j], '-', lm_minus.cnt[j]); } j++; i++; } } else { /* j = n print lm_plus array */ for (; i < m + n;) { if (ftname != NULL) { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, ft_name, lm_plus.pos[k], '+', lm_plus.cnt[k]); } else { printf("%s\t%s\t%lu\t%c\t%d\n", seq_id, lm_plus.name[k], lm_plus.pos[k], '+', lm_plus.cnt[k]); } k++; i++; } } } } int process_sga(FILE *input, char *iFile) { char seq_id_prev[SEQ_ID] = ""; int pos, cnt; size_t mLen = BUF_SIZE; size_t mLen1 = BUF_SIZE; char *s, *res, *buf; size_t bLen = LINE_SIZE; unsigned int k = 0; unsigned int j = 0; int k1 = 0, j1 = 0; if (options.debug && input != stdin) { char sort_cmd[1024] = "sort -s -c -k1,1 -k3,3n "; fprintf(stderr, "Check whether file %s is properly sorted\n", iFile); if (strcat(sort_cmd, iFile) == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } if (strcat(sort_cmd, " 2>/tmp/sortcheck.out") == NULL) { fprintf(stderr, "strcat failed\n"); return 1; } fprintf(stderr, "executing : %s\n", sort_cmd); int sys_code = system(sort_cmd); if (sys_code != 0) { fprintf(stderr, "system command failed\n"); return 1; } struct stat file_status; if(stat("/tmp/sortcheck.out", &file_status) != 0){ fprintf(stderr, "could not stat\n"); return 1; } if (file_status.st_size != 0) { fprintf(stderr, "SGA file %s is not properly sorted\n", iFile); return 1; } else { system("/bin/rm /tmp/sortcheck.out"); } } if (!options.oriented) { if ((ref_ft.pos = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.cnt = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } /* Peak Array (position, count, feature name) */ if ((ref_ft.npo = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.nct = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft.ptr = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if (ref_ft.ft == NULL) { if (( ref_ft.name = (char**)calloc(mLen, sizeof(*(ref_ft.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } } else { /* Peak Arrays on both strands (position, count, feature name) */ if ((ref_ft_plus.pos = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_plus.cnt = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_plus.npo = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_plus.nct = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_plus.ptr = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if (ref_ft.ft == NULL) { if (( ref_ft_plus.name = (char**)calloc(mLen, sizeof(*(ref_ft_plus.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } if ((ref_ft_minus.pos = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_minus.cnt = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_minus.npo = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_minus.nct = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((ref_ft_minus.ptr = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if (ref_ft.ft == NULL) { if (( ref_ft_minus.name = (char**)calloc(mLen, sizeof(*(ref_ft_minus.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } /* Local Maxima Arrays (position, count, feature name) */ if ((lm_plus.cnt = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((lm_plus.pos = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((lm_minus.cnt = (int*)calloc(mLen, sizeof(int))) == NULL) { perror("process_sga: malloc"); exit(1); } if ((lm_minus.pos = (unsigned long*)calloc(mLen, sizeof(unsigned long))) == NULL) { perror("process_sga: malloc"); exit(1); } if (ref_ft.ft == NULL) { if (( lm_plus.name = (char**)calloc(mLen, sizeof(*(lm_plus.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } if (( lm_minus.name = (char**)calloc(mLen, sizeof(*(lm_minus.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } } if ((s = malloc(bLen * sizeof(char))) == NULL) { perror("process_sga: malloc"); exit(1); } #ifdef DEBUG int c = 1; #endif /* while (fscanf(f,"%s %s %d %c %d", seq_id, ft, &pos, &strand, &cnt) != EOF) { */ while ((res = fgets(s, (int) bLen, input)) != NULL) { size_t cLen = strlen(s); char seq_id[SEQ_ID] = ""; char ft[FT_MAX] = ""; char position[POS_MAX] = ""; char count[CNT_MAX] = ""; char strand = '\0'; char ext[EXT_MAX]; unsigned int i = 0; memset(ext, 0, (size_t)EXT_MAX); while (cLen + 1 == bLen && s[cLen - 1] != '\n') { bLen *= 2; if ((s = realloc(s, bLen)) == NULL) { perror("process_file: realloc"); exit(1); } res = fgets(s + cLen, (int) (bLen - cLen), input); cLen = strlen(s); } if (s[cLen - 1] == '\n') s[cLen - 1] = 0; buf = s; /* Get SGA fields */ /* SEQ ID */ while (*buf != 0 && !isspace(*buf)) { if (i >= SEQ_ID) { fprintf(stderr, "Seq ID is too long \"%s\" \n", buf); exit(1); } seq_id[i++] = *buf++; } while (isspace(*buf)) buf++; /* FEATURE */ i = 0; while (*buf != 0 && !isspace(*buf)) { if (i >= FT_MAX) { fprintf(stderr, "Feature is too long \"%s\" \n", buf); exit(1); } ft[i++] = *buf++; } while (isspace(*buf)) buf++; /* Position */ i = 0; while (isdigit(*buf)) { if (i >= POS_MAX) { fprintf(stderr, "Position is too large \"%s\" \n", buf); exit(1); } position[i++] = *buf++; } position[i] = 0; pos = atoi(position); while (isspace(*buf)) buf++; /* Strand */ strand = *buf++; while (isspace(*buf)) buf++; /* Counts */ i = 0; while (isdigit(*buf)) { if (i >= CNT_MAX) { fprintf(stderr, "Count is too large \"%s\" \n", buf); exit(1); } count[i++] = *buf++; } count[i] = 0; cnt = atoi(count); while (isspace(*buf)) buf++; /* SGA Extension */ i = 0; while (*buf != 0) { if (i >= EXT_MAX) { fprintf(stderr, "Extension is too long \"%s\" \n", buf); exit(1); } ext[i++] = *buf++; } #ifdef DEBUG printf(" [%d] seq ID: %s Feat: %s (%c) Pos: %d Cnts: %d Ext: %s\n", c++, seq_id, ft, strand, pos, cnt, ext); #endif if (!options.oriented) { if (k >= mLen - 1) { mLen *= 2; #ifdef DEBUG fprintf(stderr, "reallocating memory for ref_ft.pos ref_ft.name ref_ft.cnt (k=%d, size=%d)\n", k, mLen); #endif if ((ref_ft.pos = (unsigned long *)realloc(ref_ft.pos, mLen * sizeof(unsigned long))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft.cnt = (int *)realloc(ref_ft.cnt, mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if (ref_ft.ft == NULL) { if ((ref_ft.name = (char**)realloc(ref_ft.name, mLen * sizeof(*(ref_ft.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } } } else { if (k >= mLen - 1) { mLen *= 2; #ifdef DEBUG fprintf(stderr, "reallocating memory for ref_ft_plus.pos ref_ft_plus.name ref_ft_plus.cnt (k=%d, size=%d)\n", k, mLen); #endif if ((ref_ft_plus.pos = (unsigned long *)realloc(ref_ft_plus.pos, mLen * sizeof(unsigned long))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft_plus.cnt = (int *)realloc(ref_ft_plus.cnt, mLen * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if (ref_ft.ft == NULL) { if ((ref_ft_plus.name = (char**)realloc(ref_ft_plus.name, mLen * sizeof(*(ref_ft_plus.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } } if (j >= mLen1 - 1) { mLen1 *= 2; #ifdef DEBUG fprintf(stderr, "reallocating memory for ref_ft_minus.pos ref_ft_minus.name ref_ft_minus.cnt (j=%d, size=%d)\n", j, mLen1); #endif if ((ref_ft_minus.pos = (unsigned long *)realloc(ref_ft_minus.pos, mLen1 * sizeof(unsigned long))) == NULL) { perror("process_sga: realloc"); exit(1); } if ((ref_ft_minus.cnt = (int *)realloc(ref_ft_minus.cnt, mLen1 * sizeof(int))) == NULL) { perror("process_sga: realloc"); exit(1); } if (ref_ft.ft == NULL) { if ((ref_ft_minus.name = (char**)realloc(ref_ft_minus.name, mLen1 * sizeof(*(ref_ft_minus.name)))) == NULL) { perror("process_sga: malloc"); exit(1); } } } } /* Check Chromosome BEGINNING, process previous signal peaks and printout results*/ if (strcmp(seq_id, seq_id_prev) != 0) { if (!options.oriented) { ref_ft.pos[0] = ref_ft.pos[1] - Win1/2 - 1; ref_ft.pos[k + 1] = ref_ft.pos[k] + Win1/2 + 1; locate_peaks((int)k); if (ref_ft.ft == NULL) { for (int i = 1; i <= (int)k; i++) { free(ref_ft.name[i]); } } k = 0; } else { /*printf("SEQID %s : k = %d j = %d calling locate_peaks_oriented\n", seq_id_prev, k, j);*/ ref_ft_plus.pos[0] = ref_ft_plus.pos[1] - Win1/2 - 1; ref_ft_plus.pos[k + 1] = ref_ft_plus.pos[k] + Win1/2 + 1; k1 = locate_peaks_oriented(&ref_ft_plus, &lm_plus, '+', (int)k); /*printf("after locate_peaks_oriented + : k1 = %d\n", k1); */ ref_ft_minus.pos[0] = ref_ft_minus.pos[1] - Win1/2 - 1; ref_ft_minus.pos[j + 1] = ref_ft_minus.pos[j] + Win1/2 + 1; j1 = locate_peaks_oriented(&ref_ft_minus, &lm_minus, '-', (int)j); /*printf("after locate_peaks_oriented - : j1 = %d\n", j1); */ /*printf("calling merge_peaks: k1 %d, j1 %d, seq_id %s feat %s\n", k1, j1, seq_id_prev, ref_ft.ft);*/ merge_peaks((int)k1, (int)j1, seq_id_prev, ref_ft.ft); if (ref_ft.ft == NULL) { for (int i = 1; i <= (int)k; i++) { free(ref_ft_plus.name[i]); } for (int i = 1; i <= (int)j; i++) { free(ref_ft_minus.name[i]); } for (int i = 0; i < (int)k1; i++) { free(lm_plus.name[i]); } for (int i = 0; i < (int)j1; i++) { free(lm_minus.name[i]); } } k = 0; j = 0; } strcpy(seq_id_prev, seq_id); } if (!options.oriented) { if (ref_ft.ft == NULL) { k++; strcpy(ref_ft.seq_id, seq_id); ref_ft.name[k] = malloc(strlen(ft) + 3); strcpy(ref_ft.name[k], ft); strcat(ref_ft.name[k], "_p"); ref_ft.pos[k] = (unsigned long)pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; } else if (ref_ft.ft_str == '\0') { if (strcmp(ft, ref_ft.ft) == 0) { k++; strcpy(ref_ft.seq_id, seq_id); /*strcpy(ref_ft.ft, ft); */ ref_ft.pos[k] = (unsigned long)pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; } } else if (strand_flag == 1) { if (strand == ref_ft.ft_str) { k++; strcpy(ref_ft.seq_id, seq_id); strcpy(ref_ft.ft, ft); ref_ft.pos[k] = (unsigned long)pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; } } else { if (strcmp(ft, ref_ft.ft) == 0 && strand == ref_ft.ft_str) { k++; strcpy(ref_ft.seq_id, seq_id); /*strcpy(ref_ft.ft, ft); */ ref_ft.pos[k] = (unsigned long)pos; if (cnt > Coff) ref_ft.cnt[k] = Coff; else ref_ft.cnt[k] = cnt; } } } else { /* Oriented */ if (ref_ft.ft == NULL) { if (strand == '+') { k++; strcpy(ref_ft_plus.seq_id, seq_id); ref_ft_plus.name[k] = malloc(strlen(ft) + 3); strcpy(ref_ft_plus.name[k], ft); strcat(ref_ft_plus.name[k], "_p"); ref_ft_plus.pos[k] = (unsigned long)pos; if (cnt > Coff) ref_ft_plus.cnt[k] = Coff; else ref_ft_plus.cnt[k] = cnt; } else if (strand == '-') { j++; strcpy(ref_ft_minus.seq_id, seq_id); ref_ft_minus.name[j] = malloc(strlen(ft) + 3); strcpy(ref_ft_minus.name[j], ft); strcat(ref_ft_minus.name[j], "_p"); ref_ft_minus.pos[j] = (unsigned long)pos; if (cnt > Coff) ref_ft_minus.cnt[j] = Coff; else ref_ft_minus.cnt[j] = cnt; } } else { if (strcmp(ft, ref_ft.ft) == 0 && strand == '+') { k++; strcpy(ref_ft_plus.seq_id, seq_id); ref_ft_plus.pos[k] = (unsigned long)pos; if (cnt > Coff) ref_ft_plus.cnt[k] = Coff; else ref_ft_plus.cnt[k] = cnt; } else if (strcmp(ft, ref_ft.ft) == 0 && strand == '-') { j++; strcpy(ref_ft_minus.seq_id, seq_id); ref_ft_minus.pos[j] = (unsigned long)pos; if (cnt > Coff) ref_ft_minus.cnt[j] = Coff; else ref_ft_minus.cnt[j] = cnt; } } } } /* End of While */ free(s); /* Locate signal peaks for the last chromosome */ if (!options.oriented) { ref_ft.pos[0] = ref_ft.pos[1] - Win1/2 - 1; ref_ft.pos[k + 1] = ref_ft.pos[k] + Win1/2 + 1; locate_peaks((int)k); if (ref_ft.ft == NULL) { for (int i = 1; i <= (int)k; i++) { free(ref_ft.name[i]); } } free(ref_ft.name); free(ref_ft.pos); free(ref_ft.cnt); free(ref_ft.nct); free(ref_ft.npo); free(ref_ft.ptr); free(lm_plus.pos); free(lm_plus.cnt); free(lm_minus.pos); free(lm_minus.cnt); } else { ref_ft_plus.pos[0] = ref_ft_plus.pos[1] - Win1/2 - 1; ref_ft_plus.pos[k + 1] = ref_ft_plus.pos[k] + Win1/2 + 1; k1 = locate_peaks_oriented(&ref_ft_plus, &lm_plus, '+', (int)k); ref_ft_minus.pos[0] = ref_ft_minus.pos[1] - Win1/2 - 1; ref_ft_minus.pos[j + 1] = ref_ft_minus.pos[j] + Win1/2 + 1; j1 = locate_peaks_oriented(&ref_ft_minus, &lm_minus, '-', (int)j); merge_peaks((int)k1, (int)j1, seq_id_prev, ref_ft.ft); if (ref_ft.ft == NULL) { for (int i = 1; i <= (int)k; i++) { free(ref_ft_plus.name[i]); } for (int i = 1; i <= (int)j; i++) { free(ref_ft_minus.name[i]); } for (int i = 0; i < (int)k1; i++) { free(lm_plus.name[i]); } for (int i = 0; i < (int)j1; i++) { free(lm_minus.name[i]); } } free(ref_ft_plus.name); free(ref_ft_minus.name); free(ref_ft_plus.pos); free(ref_ft_plus.cnt); free(ref_ft_minus.pos); free(ref_ft_minus.cnt); free(ref_ft_plus.nct); free(ref_ft_plus.npo); free(ref_ft_plus.ptr); free(ref_ft_minus.nct); free(ref_ft_minus.npo); free(ref_ft_minus.ptr); free(lm_plus.name); free(lm_plus.pos); free(lm_plus.cnt); free(lm_minus.name); free(lm_minus.pos); free(lm_minus.cnt); } if (input != stdin) { fclose(input); } return 0; } int main(int argc, char *argv[]) { #ifdef DEBUG mcheck(NULL); mtrace(); #endif FILE *input; while (1) { int c = getopt(argc, argv, "f:di:horw:v:t:c:"); if (c == -1) break; switch (c) { case 'c': Coff = atoi(optarg); break; case 'd': options.debug = 1; break; case 'f': Feature = optarg; break; case 'h': options.help = 1; break; case 'i': options.dbPath = optarg; options.db = 1; break; case 'o': options.oriented = 1; break; case 'r': options.refine = 1; break; case 'w': Win1 = atoi(optarg); break; case 'v': Win2 = atoi(optarg); break; case 't': Thres = atoi(optarg); break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind > argc || options.help == 1 || Win1 == 0 || Win2 == 0 || Coff < 0) { fprintf(stderr, "Usage: %s [options] [-f ] -t -w -v [<] \n" " - version %s\n" " where options are:\n" " \t\t -h Show this help text\n" " \t\t -d Print debug information and check SGA file\n" " \t\t -i Use to locate the chr_size file\n" " \t\t [Default=/local/db/genome]\n" " \t\t -o Oriented strand processing\n" " \t\t -r Refine Peak Positions\n" " \t\t -c Count Cut-off (default is %d)\n" "\n\tLocate signal peaks within SGA files.\n" "\n\tThe program reads a ChIP-seq data file (or from stdin [<]) in SGA format (),\n" "\tand detects signal peaks for ChIP-tag positions corresponding to a specific genomic\n" "\tfeature (). The parameter is a name that corresponds to the second \n" "\tfield of the input SGA file. It might optionally include the strand specification (+|-).\n" "\tIf no feature name is given then all input tags are processed.\n" "\tIf the oriented option is specified (-o), peaks are separately detected on plus and minus\n" "\tstrands, respectively.\n" "\tThe SGA input file MUST BE sorted by sequence name (or chromosome id), position, and strand.\n" "\tOne should check the input SGA file with the following command:\n" "\tsort -s -c -k1,1 -k3,3n -k4,4 .\n\n" "\tIn debug mode (-d), the program performs the sorting order check.\n\n" "\tAdditional input parameters are the integration range (), the minimal distance\n" "\tamongst a group of high count local peaks (), and the peak threshold ()\n" "\twhose default is %d. A value can be optionally specified as a cut-off for the feature counts.\n" "\tIf peak refinement is required (-r option), the program recomputes the position of each\n" "\tpeak by taking the center of gravity of the read counts within the peak region ().\n" "\tThe program also checks whether, after peak refinement, the new peak coordinates are still\n" "\twithin chromosome boundaries. For that reason, the file chr_size must be read.\n" "\tThe output is an SGA-formatted list containing signal peak locations.\n\n", argv[0], VERSION, Coff, Thres); return 1; } if (argc > optind) { if(!strcmp(argv[optind],"-")) { input = stdin; } else { input = fopen(argv[optind], "r"); if (NULL == input) { fprintf(stderr, "Unable to open '%s': %s(%d)\n", argv[optind], strerror(errno), errno); exit(EXIT_FAILURE); } if (options.debug) fprintf(stderr, "Processing file %s\n", argv[optind]); } } else { input = stdin; } if (options.debug) { fprintf(stderr, " Arguments:\n"); fprintf(stderr, " Selected Feature : %s\n", Feature); fprintf(stderr, " Integration range (Window) : %d\n\n", Win1); fprintf(stderr, " Minimal distance (Vicinity) : %d\n\n", Win2); fprintf(stderr, " Peak Threshold : %d\n\n", Thres); } /* Process Feature Specs */ if (Feature == NULL) { ref_ft.ft = NULL; /* Process all features */ ref_ft.ft_str = '\0'; } else { ref_ft.ft = malloc((FT_MAX + 2) * sizeof(char)); char *s = Feature; int i = 0; while (*s != 0 && !isspace(*s)) { if (i >= FT_MAX) { fprintf(stderr, "Feature Description too long \"%s\" \n", Feature); return 1; } ref_ft.ft[i++] = *s++; } ref_ft.ft[i] = '\0'; ref_ft.ft_str = '\0'; if (!options.oriented) { while (isspace(*s++)) ref_ft.ft_str = *s; } } if (options.debug) { if (ref_ft.ft_str == '\0' && ref_ft.ft == NULL) { fprintf(stderr, "Feature Specs: ALL -> Process all features\n"); } else if (ref_ft.ft_str == '\0') { fprintf(stderr, "Feature Specs: Feature name : %s\n", ref_ft.ft); } else { fprintf(stderr, "Feature Specs: Feature name/str : %s %c\n", ref_ft.ft, ref_ft.ft_str); } } if (!options.oriented) { if ( ref_ft.ft != NULL && (strcmp(ref_ft.ft, "+") == 0 || strcmp(ref_ft.ft, "-") == 0)) { strcpy(&ref_ft.ft_str, ref_ft.ft); strand_flag = 1; if (options.debug) fprintf(stderr, "Feature Specs: Process all features on str : %c\n", ref_ft.ft_str); } } if (process_size() == 0) { if (options.debug) fprintf(stderr, " HASH Table for chromosome size initialized\n"); } else { return 1; } if (process_sga(input, argv[optind++]) != 0) { return 1; } if (ref_ft.ft != NULL) { free(ref_ft.ft); } return 0; }