SAINT_v2.3.4/0000755000000000000000000000000012145226162011345 5ustar rootrootSAINT_v2.3.4/Makefile0000646000000000000000000000422011707255540013012 0ustar rootroot#--------------------------------------------------------------------------------- # TARGET is the name of the output # BUILD is the directory where object files & intermediate files will be placed # SOURCES is a list of directories containing source code # INCLUDES is a list of directories containing extra header files #--------------------------------------------------------------------------------- ## top directory where everything happens TOPDIR := $(shell pwd) TARGET := $(TOPDIR)/bin BUILD := $(TOPDIR)/build SRC := $(TOPDIR)/src CC = gcc CXX = g++ CFLAGS = -g -O3 CXXFLAGS = $(CFLAGS) #--------------------------------------------------------------------------------- # any extra libraries we wish to link with the projects #--------------------------------------------------------------------------------- LIBS := -lm -lgsl -lgslcblas ## create directory structure makeDirs: @mkdir -p $(TARGET) $(BUILD) clean: rm -rf $(BUILD) rm $(SRC)/*/*.gch all: makeDirs \ saint-reformat \ saint-spc-noctrl-matrix \ saint-spc-noctrl \ saint-spc-ctrl \ saint-int-ctrl \ clean @echo -e "\n\n### All executables are in $(TOPDIR)/bin ###\n\n" saint-reformat: $(CC) $(CFLAGS) -c $(SRC)/SAINTreformat/*.c $(SRC)/SAINTreformat/*.h mv *.o $(BUILD) $(CC) $(LIBS) $(BUILD)/*.o -o $(TARGET)/saint-reformat rm $(BUILD)/*.o @echo @echo saint-spc-noctrl-matrix: $(CC) $(CFLAGS) -c $(SRC)/SAINTspc-noctrl-matrix/*.c $(SRC)/SAINTspc-noctrl-matrix/*.h mv *.o $(BUILD) $(CC) $(LIBS) $(BUILD)/*.o -o $(TARGET)/saint-spc-noctrl-matrix rm $(BUILD)/*.o @echo @echo saint-spc-noctrl: $(CC) $(CFLAGS) -c $(SRC)/SAINTspc-noctrl/*.c $(SRC)/SAINTspc-noctrl/*.h mv *.o $(BUILD) $(CC) $(LIBS) $(BUILD)/*.o -o $(TARGET)/saint-spc-noctrl rm $(BUILD)/*.o @echo @echo saint-spc-ctrl: $(CC) $(CFLAGS) -c $(SRC)/SAINTspc-ctrl/*.c $(SRC)/SAINTspc-ctrl/*.h mv *.o $(BUILD) $(CC) $(LIBS) $(BUILD)/*.o -o $(TARGET)/saint-spc-ctrl rm $(BUILD)/*.o @echo @echo saint-int-ctrl: $(CC) $(CFLAGS) -c $(SRC)/SAINTint-ctrl/*.c $(SRC)/SAINTint-ctrl/*.h mv *.o $(BUILD) $(CC) $(LIBS) $(BUILD)/*.o -o $(TARGET)/saint-int-ctrl rm $(BUILD)/*.o @echo @echo SAINT_v2.3.4/src/0000755000000000000000000000000012145226162012134 5ustar rootrootSAINT_v2.3.4/src/SAINTreformat/0000775000000000000000000000000012014654452014556 5ustar rootrootSAINT_v2.3.4/src/SAINTreformat/append.c0000644000000000000000000000673712014654447016210 0ustar rootroot#include "saint.h" int intersect(int *x, int *y, int *res, int nx, int ny) { int i,j; int n = 0; int found[nx]; for(i=0;inbait;j++) { nIP = data->baitNIP[j]; for(i=0;iIP[data->b2IP[j][i]]); isCtrl = 0; for(i=0;ictrl[data->b2IP[j][i]] == 1) isCtrl = 1; } /* check if this is test bait or control bait */ if(isCtrl == 0) { for(i=0;inprey;i++) { ninter = intersect(data->b2i[j], data->p2i[i], common, data->baitNinter[j], data->preyNinter[i]); if(ninter > 0) { for(k=0;kip[common[l]]) == 0) { isMatch = common[l]; break; } } if(isMatch >= 0) { if(strcmp(data->BAIT[j], data->bait[isMatch]) != 0) fprintf(stderr, "match error - bait\n"); if(strcmp(data->PREY[i], data->prey[isMatch]) != 0) fprintf(stderr, "match error - prey\n"); if(data->type) fprintf(fp, "%s\t%s\t%s\t%f\n", data->ip[isMatch], data->bait[isMatch], data->prey[isMatch], data->d[isMatch]); else fprintf(fp, "%s\t%s\t%s\t%d\n", data->ip[isMatch], data->bait[isMatch], data->prey[isMatch], (int) data->d[isMatch]); } else { if(strcmp(data->BAIT[j], data->PREY[i]) != 0) { if(data->type) fprintf(fp, "%s\t%s\t%s\t%.1f\n", IP[k], data->BAIT[j], data->PREY[i], 0.0); else fprintf(fp, "%s\t%s\t%s\t%d\n", IP[k], data->BAIT[j], data->PREY[i], 0); } } } } } } else { for(i=0;inprey;i++) { ninter = intersect(data->b2i[j], data->p2i[i], common, data->baitNinter[j], data->preyNinter[i]); for(k=0;kip[common[l]]) == 0) { isMatch = common[l]; break; } } if(isMatch >= 0) { if(strcmp(data->BAIT[j], data->bait[isMatch]) != 0) fprintf(stderr, "match error - bait\n"); if(strcmp(data->PREY[i], data->prey[isMatch]) != 0) fprintf(stderr, "match error - prey\n"); if(strcmp(data->bait[isMatch], data->prey[isMatch]) != 0) { if(data->type) fprintf(fp, "%s\t%s\t%s\t%f\n", data->ip[isMatch], data->bait[isMatch], data->prey[isMatch], data->d[isMatch]); else fprintf(fp, "%s\t%s\t%s\t%d\n", data->ip[isMatch], data->bait[isMatch], data->prey[isMatch], (int) data->d[isMatch]); } } else { if(strcmp(data->BAIT[j], data->PREY[i]) != 0) { if(data->type) fprintf(fp, "%s\t%s\t%s\t%.1f\n", IP[k], data->BAIT[j], data->PREY[i], 0.0); else fprintf(fp, "%s\t%s\t%s\t%d\n", IP[k], data->BAIT[j], data->PREY[i], 0); } } } } } } fclose(fp); } SAINT_v2.3.4/src/SAINTreformat/compile0000755000000000000000000000016112014654447016134 0ustar rootroot#! /bin/sh gcc -Wall -c *.c *.h gcc *.o -lgsl -lgslcblas -lm -o ../../bin/saint-reformat rm -rf *.o rm -rf *.gch SAINT_v2.3.4/src/SAINTreformat/initdata.c0000644000000000000000000002605512014654447016531 0ustar rootroot#include "saint.h" int takeMIN(int a, int b) { int res = a; if(b < a) res = b; return res; } /*************************/ /* read interaction data */ /*************************/ int read_all_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data) { int i,j; char buf[10000]; int nIP, nprey, nuIP, nuprey, ninter, nuinter, cur; int *uPreyCount; int *uPreyLen; char **uniquePrey; char **uniquePreyGene; char **uniqueIP; int *uniqueInter; char **allInter; double resid; int *prey_appear; char **t_prey; char **t_bait; char **t_ip; double *t_d; char **t_preyname; int *t_preylen; /******************************/ /* read interaction text file */ /******************************/ ninter = nrow(fpinter); rewind(fpinter); data->ninter = ninter; assert(allInter = (char **) calloc(ninter, sizeof(char *))); for(i=0;ininter;i++) assert(allInter[i] = (char *) calloc(500, sizeof(char))); assert(uniqueInter = (int *) calloc(ninter, sizeof(int))); /* assert(uniqueInter = (char **) calloc(ninter, sizeof(char *))); for(i=0;ininter;i++) assert(uniqueInter[i] = (char *) calloc(500, sizeof(char))); */ assert(data->prey = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->prey[i] = (char *) calloc(500, sizeof(char))); assert(data->bait = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->bait[i] = (char *) calloc(500, sizeof(char))); assert(data->ip = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->ip[i] = (char *) calloc(500, sizeof(char))); assert(data->d = (double *) calloc(data->ninter, sizeof(double))); assert(data->iprob = (double *) calloc(data->ninter, sizeof(double))); assert(data->a2u = (int *) calloc(data->ninter, sizeof(int))); assert(t_prey = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(t_prey[i] = (char *) calloc(500, sizeof(char))); assert(t_bait = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(t_bait[i] = (char *) calloc(500, sizeof(char))); assert(t_ip = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(t_ip[i] = (char *) calloc(500, sizeof(char))); assert(t_d = (double *) calloc(data->ninter, sizeof(double))); for(i=0;ininter;i++) { fscanf(fpinter, "%s", buf); strcpy(data->ip[i], buf); strcpy(allInter[i], buf); strcat(allInter[i], " "); fscanf(fpinter, "%s", buf); strcpy(data->bait[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->prey[i], buf); strcat(allInter[i], buf); fscanf(fpinter, "%s", buf); data->d[i] = atof(buf); } data->type = 0; for(i=0;ininter;i++) { resid = data->d[i] - ((double) ((int) data->d[i])); if(resid > 0.0) { data->type = 1; break; } } nuinter = unique_elements(allInter, uniqueInter, ninter); if(nuinter != ninter) { fprintf(stderr, "Duplicate Warning: IP-prey pair must be unique in the interaction file..."); cur = 0; for(i=0;ininter;i++) { if(uniqueInter[i]) { strcpy(t_prey[cur], data->prey[i]); strcpy(t_bait[cur], data->bait[i]); strcpy(t_ip[cur], data->ip[i]); t_d[cur] = data->d[i]; cur++; } } data->ninter = nuinter; for(i=0;ininter;i++) { strcpy(data->prey[i], t_prey[i]); strcpy(data->bait[i], t_bait[i]); strcpy(data->ip[i], t_ip[i]); data->d[i] = t_d[i]; } fprintf(stderr, "fixed.\n"); } for(i=0;inprey = nprey; assert(uPreyCount = (int *) calloc(nprey, sizeof(int))); assert(uPreyLen = (int *) calloc(nprey, sizeof(int))); assert(data->PREY = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREY[i] = (char *) calloc(500, sizeof(char))); assert(data->PREYGENE = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREYGENE[i] = (char *) calloc(500, sizeof(char))); assert(uniquePrey = (char **) calloc(nprey, sizeof(char *))); for(i=0;ipreyLen = (int *) calloc(nprey, sizeof(int))); for(i=0;i 500) { fprintf(stderr, "Prey name %s is longer than 500 characters.\n", buf); return 1; } strcpy(data->PREY[i], buf); if(data->type == 0) { fscanf(fpprey, "%s", buf); data->preyLen[i] = atoi(buf); /* not unique at this point */ } fscanf(fpprey, "%s", buf); strcpy(data->PREYGENE[i], buf); } /* if preys are not unique, then write out prey file and reread. */ nuprey = unique_elements_copy(data->PREY, uniquePrey, nprey); if(nuprey != nprey) { fprintf(stderr, "Duplicate Warning: Preys must be unique in the prey file...fixed.\n"); } for(i=0;iPREY[i], uniquePrey[j]) == 0) { (uPreyCount[j])++; break; } } } for(j=0;jPREY[i]) == 0) { if(data->type == 0) { uPreyLen[j] = data->preyLen[i]; } strcpy(uniquePreyGene[j], data->PREYGENE[i]); break; } } } data->nprey = nuprey; assert(prey_appear = (int *) calloc(nuprey, sizeof(int))); for(i=0;ininter;j++) { if(strcmp(uniquePrey[i], data->prey[j]) == 0) { prey_appear[i] = 1; break; } } } assert(t_preyname = (char **) calloc(nuprey, sizeof(char *))); for(i=0;itype) fprintf(fpp, "%s\t%s\n", uniquePrey[j], uniquePreyGene[j]); else fprintf(fpp, "%s\t%d\t%s\n", uniquePrey[j], uPreyLen[j], uniquePreyGene[j]); strcpy(t_preyname[cur], uniquePrey[j]); if(data->type == 0) t_preylen[cur] = uPreyLen[j]; cur++; } } fclose(fpp); data->nprey = cur; for(i=0;iPREY[i], t_preyname[i]); if(data->type == 0) data->preyLen[i] = t_preylen[i]; } free(prey_appear); for(i=0;inIP = nIP; data->nctrl = 0; data->ntest = 0; assert(data->BAIT = (char **) calloc(nIP, sizeof(char *))); for(i=0;iBAIT[i] = (char *) calloc(500, sizeof(char))); assert(data->IP = (char **) calloc(nIP, sizeof(char *))); for(i=0;iIP[i] = (char *) calloc(500, sizeof(char))); assert(uniqueIP = (char **) calloc(nIP, sizeof(char *))); for(i=0;ictrl = (int *) calloc(nIP, sizeof(int))); assert(data->IPNinter = (int *) calloc(nIP, sizeof(int))); for(i=0;iIP[i], buf); fscanf(fpbait, "%s", buf); strcpy(data->BAIT[i], buf); /* not unique at this point */ fscanf(fpbait, "%s", buf); if(buf[0] == 'C' || buf[0] == 'c') { data->ctrl[i] = 1; /* note that control is marked as 1, test is as 0 */ (data->nctrl)++; } else { data->ctrl[i] = 0; (data->ntest)++; } } /* if IP names are duplicated, return 1 */ nuIP = unique_elements_copy(data->IP, uniqueIP, nIP); if(nuIP != nIP) { fprintf(stderr, "Duplicate Warning: IP names must be unique in the bait file. User must resolve this.\n"); return 1; } for(i=0;inprey][data->_K_]; double allspec[data->nctrl]; int ctrlCounts[data->nprey]; // fprintf(stderr, "%d\n", data->nctrl); /* reformat control data: do this only if(K > 5) */ for(i=0;inprey;i++) { for(j=0;j_K_;j++) spec[i][j] = 0.0; } if(data->nctrl > data->_K_) { for(i=0;inprey;i++) { cur = 0; for(j=0;jnctrl;j++) allspec[j] = 0; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; // fprintf(stderr, "%s %s\n", data->bait[id], data->prey[id]); if(data->ctrl[data->i2IP[id]]) { allspec[cur] = data->d[id]; cur++; } } ctrlCounts[i] = cur; if(cur > 0) { gsl_sort(allspec, 1, data->nctrl); for(j=0;jnctrl;j++) { spec[i][j] = allspec[data->nctrl-1-j]; if(j >= data->_K_) break; } } } } /* write interaction_intermediate */ FILE *fpi = fopen("interaction.intermediate", "w"); if(data->nctrl > data->_K_) { for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]] == 0) { if(data->type) fprintf(fpi, "%s\t%s\t%s\t%f\n", data->ip[i], data->bait[i], data->prey[i], data->d[i]); else fprintf(fpi, "%s\t%s\t%s\t%d\n", data->ip[i], data->bait[i], data->prey[i], ((int) data->d[i])); } } for(i=0;inprey;i++) { if(ctrlCounts[i] > 0) { for(j=0;j_K_;j++) { if(spec[i][j] > 0.0) { if(data->type) fprintf(fpi, "CTRL%d\tCTRL%d\t%s\t%.3f\n", j+1, j+1, data->PREY[i], spec[i][j]); else fprintf(fpi, "CTRL%d\tCTRL%d\t%s\t%d\n", j+1, j+1, data->PREY[i], (int) spec[i][j]); } } } } } else { for(i=0;ininter;i++) { if(data->type) fprintf(fpi, "%s\t%s\t%s\t%f\n", data->ip[i], data->bait[i], data->prey[i], data->d[i]); else fprintf(fpi, "%s\t%s\t%s\t%d\n", data->ip[i], data->bait[i], data->prey[i], ((int) data->d[i])); } } fclose(fpi); /* write bait.new */ FILE *fpb = fopen("bait.new", "w"); if(data->nctrl > data->_K_) { for(i=0;inIP;i++) { if(data->ctrl[i] == 0) { fprintf(fpb, "%s\t%s\tT\n", data->IP[i], data->BAIT[data->IP2b[i]]); } } for(j=0;j_K_;j++) fprintf(fpb, "CTRL%d\tCTRL%d\tC\n", j+1, j+1); } else { for(i=0;inIP;i++) { fprintf(fpb, "%s\t%s\t%s\n", data->IP[i], data->BAIT[data->IP2b[i]], data->ctrl[i] ? "C" : "T"); } } fclose(fpb); return 0; } SAINT_v2.3.4/src/SAINTreformat/mapping.c0000644000000000000000000002700312014654447016361 0ustar rootroot#include "saint.h" /*********************************************************************************/ /* make unique interaction data and identify mapping between unique and all data */ /*********************************************************************************/ void find_unique_interaction(DATA *data) { int i,j,cur; int baitCompare, preyCompare; int isUnique[data->ninter]; int nInstance[data->ninter]; /* this counts at the level of unique interactions */ int counter[data->ninter]; /* same as above, used for mapping unique->individual */ for(i=0;ininter;i++) { isUnique[i] = 1; nInstance[i] = 0; counter[i] = 0; } /* scan 1~n to mark unique interactions and count instances of each */ cur = 0; for(i=0;i<(data->ninter-1);i++) { if(isUnique[i]) { (nInstance[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j]) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { isUnique[j] = 0; (nInstance[cur])++; } } } cur++; } } /* count # unique interactions */ data->nuinter = 0; for(i=0;ininter;i++) { if(isUnique[i]) (data->nuinter)++; } /* memory business for unique interactions */ assert(data->uprey = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->uprey[i] = (char *) calloc(500, sizeof(char))); assert(data->ubait = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->ubait[i] = (char *) calloc(500, sizeof(char))); assert(data->prob = (double *) calloc(data->nuinter, sizeof(double))); /* copy unique interactions */ cur = 0; for(i=0;ininter;i++) { if(isUnique[i]) { strcpy(data->uprey[cur], data->prey[i]); strcpy(data->ubait[cur], data->bait[i]); data->prob[cur] = 0.0; cur++; } } if(data->nuinter > cur) fprintf(stderr, "Warning: possibly missed some unique interactions\n"); else if(data->nuinter < cur) fprintf(stderr, "Warning: too many unique interactions, check mapping\n"); else {} /* mapping between individual and unique interactions */ assert(data->n_u2a = (int *) calloc(data->nuinter, sizeof(int))); assert(data->u2a = (int **) calloc(data->nuinter, sizeof(int *))); for(i=0;inuinter;i++) data->n_u2a[i] = nInstance[i]; for(i=0;inuinter;i++) { assert(data->u2a[i] = (int *) calloc(data->n_u2a[i], sizeof(int))); } cur = 0; /* current index of unique */ for(i=0;ininter;i++) { if(isUnique[i]) { data->a2u[i] = cur; data->u2a[cur][counter[cur]] = i; (counter[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j] == 0) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { data->a2u[j] = cur; data->u2a[cur][counter[cur]] = j; (counter[cur])++; } } } cur++; } } } /***********************************************************************************************************/ /*****************************************************/ /* make indicators of uniqueness in character arrays */ /* returns the number of unique elements */ /*****************************************************/ int unique_elements(char **x, int *unique, int nx) { int i,j; int nunique = nx; for(i=0;ipreyNinter = (int *) calloc(data->nprey, sizeof(int))); for(i=0;inprey;i++) data->preyNinter[i] = 0; assert(data->i2p = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2p[i] = -1; for(j=0;jninter;j++) { for(i=0;inprey;i++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { (data->preyNinter[i])++; data->i2p[j] = i; break; } } } assert(data->p2i = (int **) calloc(data->nprey, sizeof(int *))); for(i=0;inprey;i++) assert(data->p2i[i] = (int *) calloc(data->preyNinter[i], sizeof(int))); for(i=0;inprey;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { data->p2i[i][cur] = j; cur++; } if(cur >= data->preyNinter[i]) { break; } } } assert(data->ui2p = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inprey;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->PREY[i], data->uprey[j]) == 0) { data->ui2p[j] = i; } } } /* report which prey in the prey file did not show up in the interaction file */ cur = 0; for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("reformat_log"); FILE *fptemp1 = fopen("PreysNotInData", "w"); for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) fprintf(fptemp1, "%s\n", data->PREY[i]); } fclose(fptemp1); chdir(".."); return 1; } /* report which prey in the interaction file did not show up in the prey file */ cur = 0; for(i=0;ininter;i++) { if(data->i2p[i] == -1) { cur = 1; break; } } if(cur) { chdir("reformat_log"); FILE *fptemp2 = fopen("PreysNotInList", "w"); fprintf(stderr, "Some prey(s) are missing from the prey file.\nCheck PreysNotInList file in reformat_log folder and insert them in the prey file.\nThere may be duplicates in this list.\n"); for(i=0;ininter;i++) { if(data->i2p[i] == -1) fprintf(fptemp2, "%d\t%s\n", i+1, data->prey[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } /***********************************************************************************************************/ void mapIPtoBait(DATA *data) { int i,j; int nbait, nIP, cur; char temp[data->nIP][256]; int uniqueBaits[data->nIP]; nIP = data->nIP; nbait = unique_elements(data->BAIT, uniqueBaits, nIP); data->nbait = nbait; assert(data->baitNIP = (int *) calloc(nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNIP[i] = 0; cur = 0; for(i=0;inIP;i++) { if(uniqueBaits[i]) { strcpy(temp[cur], data->BAIT[i]); cur++; } } if(cur != data->nbait) fprintf(stderr, "check bait-IP file\n"); for(i=0;inbait;i++) { for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) (data->baitNIP[i])++; } } assert(data->IP2b = (int *) calloc(data->nIP, sizeof(int))); assert(data->b2IP = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2IP[i] = (int *) calloc(data->baitNIP[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) { data->IP2b[j] = i; data->b2IP[i][cur] = j; cur++; } } data->baitNIP[i] = cur; } for(i=0;inbait;i++) strcpy(data->BAIT[i], temp[i]); } int mapIPBaitToData(DATA *data) { /* Part I: bait to data */ int i,j; int cur; assert(data->baitNinter = (int *) calloc(data->nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNinter[i] = 0; for(i=0;inIP;i++) data->IPNinter[i] = 0; assert(data->i2b = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2b[i] = -1; assert(data->i2IP = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2IP[i] = -1; for(i=0;inIP;i++) { for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { (data->IPNinter[i])++; data->i2IP[j] = i; } } } for(i=0;inbait;i++) { for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { (data->baitNinter[i])++; data->i2b[j] = i; } } } assert(data->IP2i = (int **) calloc(data->nIP, sizeof(int *))); for(i=0;inIP;i++) assert(data->IP2i[i] = (int *) calloc(data->IPNinter[i], sizeof(int))); for(i=0;inIP;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { data->IP2i[i][cur] = j; cur++; } if(cur >= data->IPNinter[i]) break; } } assert(data->b2i = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2i[i] = (int *) calloc(data->baitNinter[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { data->b2i[i][cur] = j; cur++; } if(cur >= data->baitNinter[i]) break; } } /* from unique interactions to bait/IP */ assert(data->ui2b = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inbait;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->BAIT[i], data->ubait[j]) == 0) data->ui2b[j] = i; } } /* report which bait/IP in the bait file did not show up in the interaction file */ cur = 0; for(i=0;inIP;i++) { if(data->IPNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("reformat_log"); FILE *fptemp1 = fopen("IPNotInData", "w"); for(i=0;inIP;i++) { if(data->IPNinter[i] == 0) fprintf(fptemp1, "%s\t%s\n", data->IP[i], data->BAIT[data->IP2b[i]]); } fclose(fptemp1); chdir(".."); return 1; } /* report which baits/IPs in the interaction file did not show up in the bait/IP file */ cur = 0; for(i=0;ininter;i++) { if(data->i2IP[i] == -1) { cur = 1; break; } } if(cur) { chdir("reformat_log"); FILE *fptemp2 = fopen("IPNotInList", "w"); for(i=0;ininter;i++) { if(data->i2IP[i] == -1) fprintf(fptemp2, "%d\t%s\n", i+1, data->ip[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } /**************************************************************/ /* read bait data and check discrepancy with interaction data */ /**************************************************************/ /***********************************************************************************************************/ SAINT_v2.3.4/src/SAINTreformat/saint.c0000644000000000000000000000630312014654447016044 0ustar rootroot#include "saint.h" int nrow(FILE *fp) { char buf[100000]; int n = 0; while(fgets(buf, sizeof(buf), fp) != NULL) n++; return n; } int newlinechar(char *buf, int k) { int i; int found = 0; for(i=0;i_K_ = atoi(argv[4]); else data->_K_ = 100; if(fpinter == NULL) { fprintf(stderr, "Cannot locate interaction data %s.\n", argv[1]); return 1; } if(fpprey == NULL) { fprintf(stderr, "Cannot locate prey data %s.\n", argv[2]); return 1; } if(fpbait == NULL) { fprintf(stderr, "Cannot locate bait data %s.\n", argv[3]); return 1; } fclose(fpprey); fclose(fpbait); fclose(fpinter); return 0; } /**************************************************************/ /* master function for reading the data */ /**************************************************************/ int read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data, DATA *newdata) { int read_well, map_well; read_well = read_all_data(fpinter, fpprey, fpbait, data); if(read_well != 0) return 1; find_unique_interaction(data); map_well = mapPreyToData(data); mapIPtoBait(data); mapIPBaitToData(data); read_well = reformat_data(data); /* here I take the maximum K counts */ newdata->type = data->type; reread_data(newdata); find_unique_interaction(newdata); map_well = mapPreyToData(newdata); mapIPtoBait(newdata); mapIPBaitToData(newdata); system("rm -rf interaction.intermediate"); return 0; } /***************************** MAIN ***************************/ int main(int argc, char **argv) { int progress; DATA data; DATA newdata; /* Command Line */ if(commandLine(&data, argc, argv)) return 1; FILE *fpinter = fopen(argv[1], "r"); FILE *fpprey = fopen(argv[2], "r"); FILE *fpbait = fopen(argv[3], "r"); /* Read interaction data, identify baits, preys, and IPs, make unique interaction data frame, identify the mapping between different levels of data */ system("mkdir reformat_log"); /* mapping logs */ progress = read_data(fpinter, fpprey, fpbait, &data, &newdata); if(progress != 0) return 1; /* printMap(&data); */ /* we can also check if preys are only from controls here */ append(&newdata); fclose(fpinter); fclose(fpprey); fclose(fpbait); return 0; } SAINT_v2.3.4/src/SAINTreformat/printmap.c0000644000000000000000000000544612014654447016567 0ustar rootroot#include "saint.h" void printInter(DATA *data) { int i; FILE *fp = fopen("interaction","w"); fprintf(fp, "ip\tbait\tprey\tIP\tBAIT\tPREY\tubait\tuprey\n"); for(i=0;ininter;i++) { fprintf(fp, "%s\t%s\t%s\t", data->ip[i], data->bait[i], data->prey[i]); fprintf(fp, "%s\t%s\t%s\t", data->IP[data->i2IP[i]], data->BAIT[data->i2b[i]], data->PREY[data->i2p[i]]); fprintf(fp, "%s\t%s\n", data->ubait[data->a2u[i]], data->uprey[data->a2u[i]]); } fclose(fp); } void printUInter(DATA *data) { int i,j,k; FILE *fp = fopen("unique_interaction","w"); fprintf(fp, "ubait\tuprey\tubait\tuprey\tip\tbait\tprey\n"); for(i=0;inuinter;i++) { for(j=0;jn_u2a[i];j++) { k = data->u2a[i][j]; fprintf(fp, "%s\t%s\t", data->ubait[i], data->uprey[i]); fprintf(fp, "%s\t%s\t%s\n", data->ip[k], data->bait[k], data->prey[k]); } } fprintf(fp, "\n\n************************\n\n"); fprintf(fp, "ubait\tuprey\tBAIT\tPREY\n"); for(i=0;inuinter;i++) { fprintf(fp, "%s\t%s\t%s\t%s\n", data->ubait[i], data->uprey[i], data->BAIT[data->ui2b[i]], data->PREY[data->ui2p[i]]); } fclose(fp); } void printIP(DATA *data) { int i,j,k; /* IP to bait */ FILE *fp = fopen("IP","w"); fprintf(fp, "IP\tBAIT\n"); for(i=0;inIP;i++) { fprintf(fp, "%s\t%s\n", data->IP[i], data->BAIT[data->IP2b[i]]); } fprintf(fp, "\n\n************************\n\n"); /* IP to interactions */ fprintf(fp, "IP\tip\tbait\tprey\n"); for(i=0;inIP;i++) { for(j=0;jIPNinter[i];j++) { k = data->IP2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->IP[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printBait(DATA *data) { int i,j,k; FILE *fp = fopen("bait","w"); /* bait to IP */ fprintf(fp, "BAIT\tIP\n"); for(i=0;inbait;i++) { for(j=0;jbaitNIP[i];j++) { k = data->b2IP[i][j]; fprintf(fp, "%s\t%s\n", data->BAIT[i], data->IP[k]); } } fprintf(fp, "\n\n************************\n\n"); /* bait to interaction */ fprintf(fp, "BAIT\tip\tbait\tprey\n"); for(i=0;inbait;i++) { for(j=0;jbaitNinter[i];j++) { k = data->b2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->BAIT[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printPrey(DATA *data) { int i,j,k; FILE *fp = fopen("prey","w"); /* prey to interaction */ for(i=0;inprey;i++) { for(j=0;jpreyNinter[i];j++) { k = data->p2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->PREY[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printMap(DATA *data) { chdir("FILTER"); printInter(data); printUInter(data); printIP(data); printBait(data); printPrey(data); chdir(".."); } SAINT_v2.3.4/src/SAINTreformat/saint.h0000644000000000000000000001033512014654447016051 0ustar rootroot /* Copyright (C) <2011> For troubleshooting, contact hyung_won_choi@nuhs.edu.sg. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You can obtain a copy of the GNU General Public License from . */ #include #include #include #include #include #include #include #include #include #include #define _MAX_BUF_ 500 typedef struct tagDATA { int _K_; int type; /* 0: count data; 1: intensity data */ /*************/ /* logistics */ /*************/ int ninter; int nuinter; int nprey; int nIP; int nbait; /**************************/ /* interaction level data */ /**************************/ char **prey; char **bait; char **ip; /* raw data, each row corresponds to one interaction, case-sensitive */ double *d; double *iprob; /*********************************/ /* unique interaction level data */ /*********************************/ char **uprey; char **ubait; double *prob; int *n_u2a; /* number of individual interactions per unique interactions */ int **u2a; /* unique interactions to individual interactions */ int *a2u; /* individual interactions to unique interactions */ /* crucial indicator for probability calculation */ /***********************************/ /* unique bait and prey level data */ /***********************************/ char **PREY; /* unique preys */ char **PREYGENE; char **BAIT; /* unique baits */ char **IP; /* unique IP #s */ int nctrl; int ntest; int *ctrl; /* index: control IPs or not: 'C' = control, 'T' = test */ int *preyNinter; /* # interaction for prey */ int *baitNinter; /* # interaction for bait */ int *IPNinter; /* # interaction in an IP */ int *baitNIP; /* # IPs per bait */ int *preyLen; /****************/ /* mapping data */ /****************/ int *i2p; /* index: interaction to prey */ int *i2b; /* index: interaction to bait */ int *i2IP; /* index: interaction to IP */ int **p2i; /* index: prey to interaction */ int **b2i; /* index: bait to interaction */ int **IP2i; /* index: IP to interaction */ int *ui2p; /* index: unique interaction to prey */ int *ui2b; /* index: unique interaction to bait */ /* no need to build reverse mapping for unique interactions */ /* perhaps this mapping is unnecessary */ int **b2IP; /* index: bait to IP */ int *IP2b; /* index: IP to bait */ } DATA; /*************/ /* functions */ /*************/ int takeMIN(int a, int b); int nrow(FILE *fp); int newlinechar(char *buf, int k); int ncol(FILE *fp); int commandLine(DATA *data, int argc, char **argv); int read_all_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data); void find_unique_interaction(DATA *data); int unique_elements(char **x, int *unique, int nx); int unique_elements_copy(char **x, char **uniq, int nx); int count_unique_elements(char **x, int nx); int mapPreyToData(DATA *data); void prey_data(DATA *data); void mapIPtoBait(DATA *data); int mapIPBaitToData(DATA *data); void map_bait_data(DATA *data); int read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data, DATA *newdata); int reformat_data(DATA *data); void reread_data(DATA *newdata); /* new addition of files here for remapping */ void find_unique_interaction_anew(DATA *data); void mapPreyToData_anew(DATA *data); void mapIPtoBait_anew(DATA *data); void mapIPBaitToData_anew(DATA *data); void remap_data(DATA *data); void printInter(DATA *data); void printUInter(DATA *data); void printIP(DATA *data); void printBait(DATA *data); void printPrey(DATA *data); void printMap(DATA *data); int intersect(int *x, int *y, int *res, int nx, int ny); void append(DATA *data); SAINT_v2.3.4/src/SAINTreformat/remap.c0000644000000000000000000000721112014654447016031 0ustar rootroot#include "saint.h" /***********************************************************************************************************/ void reread_data(DATA *data) { int i; char buf[10000]; int nIP, nprey, ninter; /***********************/ /* read prey text file */ /***********************/ FILE *fpprey = fopen("prey.new", "r"); nprey = nrow(fpprey); rewind(fpprey); data->nprey = nprey; assert(data->PREY = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREY[i] = (char *) calloc(500, sizeof(char))); assert(data->preyLen = (int *) calloc(nprey, sizeof(int))); assert(data->PREYGENE = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREYGENE[i] = (char *) calloc(500, sizeof(char))); for(i=0;i 500) { fprintf(stderr, "Prey name %s is longer than 500 characters.\n", buf); } strcpy(data->PREY[i], buf); if(data->type == 0) { fscanf(fpprey, "%s", buf); data->preyLen[i] = atoi(buf); /* not unique at this point */ } fscanf(fpprey, "%s", buf); strcpy(data->PREYGENE[i], buf); } fclose(fpprey); /***********************/ /* read bait text file */ /***********************/ FILE *fpbait = fopen("bait.new", "r"); nIP = nrow(fpbait); rewind(fpbait); data->nIP = nIP; data->nbait = data->nIP; data->nctrl = 0; data->ntest = 0; assert(data->BAIT = (char **) calloc(nIP, sizeof(char *))); for(i=0;iBAIT[i] = (char *) calloc(500, sizeof(char))); assert(data->IP = (char **) calloc(nIP, sizeof(char *))); for(i=0;iIP[i] = (char *) calloc(500, sizeof(char))); assert(data->ctrl = (int *) calloc(nIP, sizeof(int))); assert(data->IPNinter = (int *) calloc(nIP, sizeof(int))); for(i=0;iIP[i], buf); fscanf(fpbait, "%s", buf); strcpy(data->BAIT[i], buf); /* not unique at this point */ fscanf(fpbait, "%s", buf); if(buf[0] == 'C' || buf[0] == 'c') { data->ctrl[i] = 1; /* note that control is marked as 1, test is as 0 */ (data->nctrl)++; } else { data->ctrl[i] = 0; (data->ntest)++; } } /******************************/ /* read interaction text file */ /******************************/ FILE *fpinter = fopen("interaction.intermediate", "r"); ninter = nrow(fpinter); rewind(fpinter); data->ninter = ninter; data->nuinter = data->ninter; assert(data->prey = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->prey[i] = (char *) calloc(250, sizeof(char))); assert(data->bait = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->bait[i] = (char *) calloc(250, sizeof(char))); assert(data->ip = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->ip[i] = (char *) calloc(250, sizeof(char))); assert(data->d = (double *) calloc(data->ninter, sizeof(double))); assert(data->iprob = (double *) calloc(data->ninter, sizeof(double))); assert(data->a2u = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) { fscanf(fpinter, "%s", buf); strcpy(data->ip[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->bait[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->prey[i], buf); fscanf(fpinter, "%s", buf); data->d[i] = atof(buf); } fclose(fpinter); } /* void remap_data(DATA *data) { int map_well; find_unique_interaction(data); map_well = mapPreyToData(data); mapIPtoBait(data); mapIPBaitToData(data); } */ SAINT_v2.3.4/src/SAINTspc-ctrl/0000775000000000000000000000000012145226162014464 5ustar rootrootSAINT_v2.3.4/src/SAINTspc-ctrl/mmath.c0000644000000000000000000000411712014654467015747 0ustar rootroot#include "saint.h" float vec_sum(const float *vec, int len) { int i; float res; res=vec[0]; for(i=1;ivec[i]) res=vec[i]; } return res; } float vec_mean(const float *vec, int len) { float tmp=0.0; int i; for(i=0;i sum) { rr++; sum += p[rr]; } if(rr >= K) rr = K-1; return rr; } float geometric_mean(float *x, int n) { int i; float res = 0.0; for(i=0;inprey;i++) (wsum[prior->w_mu[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_mu) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_mu[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_mu[i] = gammap[i]; for(j=0;jgamma_mu[i] *= (1.0 - gammap[j]); } } void DP_mu_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_mu, tmp_lambda, maxl, tmp; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_mu[i]); cur_mu = param->mu[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]]) { tmp_lambda = param->lambda_false[id] + prior->theta_mu[i] - cur_mu; tmp = data->d[id]; prob[i] += log_poisson_g_prop(tmp, exp(tmp_lambda), param->eta0[data->i2p[id]]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_mu[pid] = ranMultinom(r, prob, _MAX_COMP_); param->mu[pid] = prior->theta_mu[prior->w_mu[pid]]; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; param->lambda_false[id] += param->mu[pid] - cur_mu; } } void DP_mu_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale, tmp; scale = prior->gamma_mu[pid] / (1.0 - prior->gamma_mu[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_mu)) + prior->m_mu; Delta = newval - prior->theta_mu[pid]; prior->theta_mu[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); for(i=0;inprey;i++) { if(prior->w_mu[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(data->ctrl[data->i2IP[id]]) { tmp = data->d[id]; param->lambda_false_tmp[id] = param->lambda_false[id] + Delta; mhratio += log_poisson_g_prop(tmp, exp(param->lambda_false_tmp[id]), param->eta0[i]) - log_poisson_g_prop(tmp, exp(param->lambda_false[id]), param->eta0[i]); } } } } mhratio += log_gaussian(prior->theta_mu[pid] + Delta, prior->m_mu, prior->v_mu) - log_gaussian(prior->theta_mu[pid], prior->m_mu, prior->v_mu); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_mu[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_mu[i] == pid) { param->mu[i] += Delta; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; param->lambda_false[id] += Delta; } } } } } } void DP_mu(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_mu[i]] = 1; DP_mu_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_mu_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_mu_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_mu[i] * prior->theta_mu[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] -= mean; for(i=0;inprey;i++) param->mu[i] -= mean; param->betac += mean; } SAINT_v2.3.4/src/SAINTspc-ctrl/compile0000755000000000000000000000016212014654467016047 0ustar rootroot#! /bin/sh gcc -Wall -c *.c *.h gcc *.o -lgsl -lgslcblas -lm -o ../../bin/saint-spc-ctrl rm -rf *.o rm -rf *.gch SAINT_v2.3.4/src/SAINTspc-ctrl/likelihood.c0000644000000000000000000000602312014654467016762 0ustar rootroot#include "saint.h" /***************************************************/ /* computing likelihoods in log scale */ /***************************************************/ float log_poisson_prop(float N, float lambda) { float res = -lambda + N * log(lambda); return res; } float log_poisson_g_prop(float N, float lambda, float theta) { float lambda1, lambda2, out; lambda2 = 1.0 - 1.0 / sqrt(theta); lambda1 = lambda / sqrt(theta); out = log(lambda1) + (N - 1.0) * log(lambda1 + N * lambda2) - (lambda1 + N * lambda2); return out; } /*************************/ /* all interactions */ /*************************/ float LRprop(PARAM *param, PRIOR *prior, DATA *data) { int i,j,id; float pos, neg, maxl; float lik_new, lik_old; lik_new = 0.0; lik_old = 0.0; for(i=0;inuinter;i++) { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 0) { pos += log_poisson_g_prop(data->d[id], exp(param->lambda_true[id]), param->eta[data->i2p[id]]); neg += log_poisson_g_prop(data->d[id], exp(param->lambda_false[id]), param->eta0[data->i2p[id]]); } } maxl = pos > neg ? pos : neg; pos = exp(pos - maxl); neg = exp(neg - maxl); lik_new += log(param->ptrue_tmp * pos + (1.0-param->ptrue_tmp) * neg); lik_old += log(param->ptrue * pos + (1.0-param->ptrue) * neg); } return lik_new - lik_old; } float loglik_all(PARAM *param, PRIOR *prior, DATA *data) { int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(param->Z[data->a2u[i]]) { lambda = exp(param->lambda_true[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta[data->i2p[i]]); } else { lambda = exp(param->lambda_false[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta0[data->i2p[i]]); } } return lik; } float loglik_all_class(PARAM *param, PRIOR *prior, DATA *data, int cl) { /* loglik by class */ int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(cl) { if(param->Z[data->a2u[i]]) { lambda = exp(param->lambda_true[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta[data->i2p[i]]); } } else { if(param->Z[data->a2u[i]] == 0) { lambda = exp(param->lambda_false[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta0[data->i2p[i]]); } } } return lik; } float loglik_all_class_tmp(PARAM *param, PRIOR *prior, DATA *data, int cl) { /* loglik by class */ int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(cl) { if(param->Z[data->a2u[i]]) { lambda = exp(param->lambda_true_tmp[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta[data->i2p[i]]); } } else { if(param->Z[data->a2u[i]] == 0) { lambda = exp(param->lambda_false_tmp[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta0[data->i2p[i]]); } } } return lik; } SAINT_v2.3.4/src/SAINTspc-ctrl/initdata.c0000644000000000000000000004277412014654467016451 0ustar rootroot#include "saint.h" /*************************/ /* read interaction data */ /*************************/ void read_interaction_data(FILE *fpinter, DATA *data) { int i; char buf[100]; data->ninter = nrow(fpinter); rewind(fpinter); assert(data->prey = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->prey[i] = (char *) calloc(250, sizeof(char))); assert(data->bait = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->bait[i] = (char *) calloc(250, sizeof(char))); assert(data->ip = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->ip[i] = (char *) calloc(250, sizeof(char))); assert(data->d = (float *) calloc(data->ninter, sizeof(float))); assert(data->d2 = (float *) calloc(data->ninter, sizeof(float))); assert(data->iprob = (float *) calloc(data->ninter, sizeof(float))); assert(data->l = (float *) calloc(data->ninter, sizeof(float))); assert(data->c = (float *) calloc(data->ninter, sizeof(float))); assert(data->a2u = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) { fscanf(fpinter, "%s", buf); strcpy(data->ip[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->bait[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->prey[i], buf); fscanf(fpinter, "%s", buf); data->d2[i] = atof(buf); if(data->d2[i] >= _TRUNC_) data->d[i] = _TRUNC_; else data->d[i] = data->d2[i]; /* fprintf(stderr, "%s\t%s\t%s\t%f\n", data->ip[i], data->bait[i], data->prey[i], data->d[i]); */ } } /***********************************************************************************************************/ /*********************************************************************************/ /* make unique interaction data and identify mapping between unique and all data */ /*********************************************************************************/ void find_unique_interaction(DATA *data) { int i,j,cur; int baitCompare, preyCompare; int isUnique[data->ninter]; int nInstance[data->ninter]; /* this counts at the level of unique interactions */ int counter[data->ninter]; /* same as above, used for mapping unique->individual */ for(i=0;ininter;i++) { isUnique[i] = 1; nInstance[i] = 0; counter[i] = 0; } /* scan 1~n to mark unique interactions and count instances of each */ cur = 0; for(i=0;i<(data->ninter-1);i++) { if(isUnique[i]) { (nInstance[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j]) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { isUnique[j] = 0; (nInstance[cur])++; } } } cur++; } } if(isUnique[data->ninter-1]) { (nInstance[cur])++; cur++; } /* count # unique interactions */ data->nuinter = 0; for(i=0;ininter;i++) { if(isUnique[i]) (data->nuinter)++; } /* memory business for unique interactions */ assert(data->uprey = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->uprey[i] = (char *) calloc(250, sizeof(char))); assert(data->ubait = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->ubait[i] = (char *) calloc(250, sizeof(char))); assert(data->prob = (float *) calloc(data->nuinter, sizeof(float))); /* copy unique interactions */ cur = 0; for(i=0;ininter;i++) { if(isUnique[i]) { strcpy(data->uprey[cur], data->prey[i]); strcpy(data->ubait[cur], data->bait[i]); data->prob[cur] = 0.0; cur++; } } if(data->nuinter > cur) fprintf(stderr, "Warning: possibly missed some unique interactions\n"); else if(data->nuinter < cur) fprintf(stderr, "Warning: too many unique interactions, check mapping\n"); else {} /* mapping between individual and unique interactions */ assert(data->n_u2a = (int *) calloc(data->nuinter, sizeof(int))); assert(data->u2a = (int **) calloc(data->nuinter, sizeof(int *))); for(i=0;inuinter;i++) data->n_u2a[i] = nInstance[i]; for(i=0;inuinter;i++) { assert(data->u2a[i] = (int *) calloc(data->n_u2a[i], sizeof(int))); } cur = 0; /* current index of unique */ for(i=0;ininter;i++) { if(isUnique[i]) { data->a2u[i] = cur; data->u2a[cur][counter[cur]] = i; (counter[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j] == 0) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { data->a2u[j] = cur; data->u2a[cur][counter[cur]] = j; (counter[cur])++; } } } cur++; } } } /***********************************************************************************************************/ /*****************************************************/ /* make indicators of uniqueness in character arrays */ /* returns the number of unique elements */ /*****************************************************/ int unique_elements(char **x, int *unique, int nx) { int i,j; int nunique = nx; for(i=0;inprey;i++) data->preyNinter[i] = 0; assert(data->i2p = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2p[i] = -1; for(i=0;inprey;i++) { for(j=0;jninter;j++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { (data->preyNinter[i])++; data->i2p[j] = i; } } } assert(data->p2i = (int **) calloc(data->nprey, sizeof(int *))); for(i=0;inprey;i++) assert(data->p2i[i] = (int *) calloc(data->preyNinter[i], sizeof(int))); for(i=0;inprey;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { data->p2i[i][cur] = j; cur++; } if(cur >= data->preyNinter[i]) break; } } assert(data->ui2p = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inprey;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->PREY[i], data->uprey[j]) == 0) { data->ui2p[j] = i; } } } /* report which prey in the prey file did not show up in the interaction file */ cur = 0; for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp1 = fopen("PreysNotInData", "w"); for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) fprintf(fptemp1, "%s\n", data->PREY[i]); } fclose(fptemp1); chdir(".."); return 1; } /* report which prey in the interaction file did not show up in the prey file */ cur = 0; for(i=0;ininter;i++) { if(data->i2p[i] == -1) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp2 = fopen("PreysNotInList", "w"); for(i=0;ininter;i++) { if(data->i2p[i] == -1) fprintf(fptemp2, "%d\t%s\t%s\t%s\n", i+1, data->ip[i], data->bait[i], data->prey[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } /**************************************************************/ /* read prey data and check discrepancy with interaction data */ /**************************************************************/ void read_prey_data(FILE *fpprey, DATA *data) { int i, nprey; char buf[256]; nprey = nrow(fpprey); rewind(fpprey); data->nprey = nprey; assert(data->PREY = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREY[i] = (char *) calloc(250, sizeof(char))); assert(data->PREYGENE = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREYGENE[i] = (char *) calloc(250, sizeof(char))); assert(data->preyLength = (float *) calloc(nprey, sizeof(float))); assert(data->preyNinter = (int *) calloc(nprey, sizeof(int))); assert(data->ctrlavg = (float *) calloc(nprey, sizeof(float))); for(i=0;iPREY[i], buf); fscanf(fpprey, "%s", buf); data->preyLength[i] = atof(buf); /* fprintf(stderr, "%s\t%f\n", data->PREY[i], data->preyLength[i]); */ data->ctrlavg[i] = 0.0; fscanf(fpprey, "%s", buf); strcpy(data->PREYGENE[i], buf); } centerData(data->preyLength, nprey, 1); mapPreyToData(data); for(i=0;ininter;i++) data->l[i] = data->preyLength[data->i2p[i]]; } /***********************************************************************************************************/ void mapIPtoBait(DATA *data) { int i,j; int nbait, nIP, cur; char temp[data->nIP][256]; int uniqueBaits[data->nIP]; nIP = data->nIP; nbait = unique_elements(data->BAIT, uniqueBaits, nIP); data->nbait = nbait; assert(data->baitNIP = (int *) calloc(nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNIP[i] = 0; cur = 0; for(i=0;inIP;i++) { if(uniqueBaits[i]) { strcpy(temp[cur], data->BAIT[i]); cur++; } } if(cur != data->nbait) fprintf(stderr, "check bait-IP file\n"); for(i=0;inbait;i++) { cur = 0; for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) { cur++; } } data->baitNIP[i] = cur; } assert(data->IP2b = (int *) calloc(data->nIP, sizeof(int))); assert(data->b2IP = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2IP[i] = (int *) calloc(data->baitNIP[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) { data->IP2b[j] = i; data->b2IP[i][cur] = j; cur++; } } data->baitNIP[i] = cur; } for(i=0;inbait;i++) strcpy(data->BAIT[i], temp[i]); } int mapIPBaitToData(DATA *data) { /* Part I: bait to data */ int i,j; int cur; assert(data->baitNinter = (int *) calloc(data->nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNinter[i] = 0; for(i=0;inIP;i++) data->IPNinter[i] = 0; assert(data->i2b = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2b[i] = -1; assert(data->i2IP = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2IP[i] = -1; for(i=0;inIP;i++) { for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { (data->IPNinter[i])++; data->i2IP[j] = i; } } } for(i=0;inbait;i++) { for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { (data->baitNinter[i])++; data->i2b[j] = i; } } } assert(data->IP2i = (int **) calloc(data->nIP, sizeof(int *))); for(i=0;inIP;i++) assert(data->IP2i[i] = (int *) calloc(data->IPNinter[i], sizeof(int))); for(i=0;inIP;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { data->IP2i[i][cur] = j; cur++; } if(cur >= data->IPNinter[i]) break; } } assert(data->b2i = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2i[i] = (int *) calloc(data->baitNinter[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { data->b2i[i][cur] = j; cur++; } if(cur >= data->baitNinter[i]) break; } } /* from unique interactions to bait/IP */ assert(data->ui2b = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inbait;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->BAIT[i], data->ubait[j]) == 0) data->ui2b[j] = i; } } /* report which bait/IP in the bait file did not show up in the interaction file */ cur = 0; for(i=0;inbait;i++) { if(data->IPNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp1 = fopen("IPNotInData", "w"); for(i=0;inIP;i++) { if(data->IPNinter[i] == 0) fprintf(fptemp1, "%s\t%s\n", data->IP[i], data->BAIT[i]); } fclose(fptemp1); chdir(".."); return 1; } /* report which baits/IPs in the interaction file did not show up in the bait/IP file */ cur = 0; for(i=0;ininter;i++) { if(data->i2IP[i] == -1) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp2 = fopen("IPNotInList", "w"); for(i=0;ininter;i++) { if(data->i2IP[i] == -1) fprintf(fptemp2, "%d\t%s\t%s\t%s\n", i+1, data->ip[i], data->bait[i], data->prey[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } void getIPinfo(DATA *data) { int i,j; int IPmatch, BAITmatch, PREYmatch; char buf[256]; assert(data->IPbaitCoverage = (float *) calloc(data->nIP, sizeof(float))); assert(data->IPtotalAbundance = (float *) calloc(data->nIP, sizeof(float))); for(i=0;inIP;i++) { data->IPbaitCoverage[i] = 0.0; data->IPtotalAbundance[i] = 0.0; } for(i=0;inIP;i++) { strcpy(buf, data->BAIT[data->IP2b[i]]); for(j=0;jninter;j++) { IPmatch = strcmp(data->ip[j], data->IP[i]); BAITmatch = strcmp(data->bait[j], buf); PREYmatch = strcmp(data->prey[j], buf); if(IPmatch == 0) { data->IPtotalAbundance[i] += data->d[j]; if(BAITmatch == 0 && PREYmatch == 0) data->IPbaitCoverage[i] = data->d[j] / data->preyLength[data->i2p[j]]; } } /* if(data->IPbaitCoverage[i] == 0.0) { fprintf(stderr, "IP %s (bait %s) has no bait-bait interaction\n", data->IP[i], data->BAIT[data->IP2b[i]]); } */ } /* for(i=0;inIP;i++) { fprintf(stderr, "%d: %s %s %.2f %.2f\n", i+1, data->IP[i], data->BAIT[data->IP2b[i]], data->IPbaitCoverage[i], data->IPtotalAbundance[i]); } */ } /**************************************************************/ /* read bait data and check discrepancy with interaction data */ /**************************************************************/ void read_bait_data(FILE *fpbait, DATA *data) { int i, nbait, nIP; char buf[256]; nIP = nrow(fpbait); rewind(fpbait); data->nIP = nIP; data->nctrl = 0; data->ntest = 0; assert(data->BAIT = (char **) calloc(nIP, sizeof(char *))); for(i=0;iBAIT[i] = (char *) calloc(250, sizeof(char))); assert(data->IP = (char **) calloc(nIP, sizeof(char *))); for(i=0;iIP[i] = (char *) calloc(250, sizeof(char))); assert(data->ctrl = (int *) calloc(nIP, sizeof(int))); assert(data->IPNinter = (int *) calloc(nIP, sizeof(int))); for(i=0;iIP[i], buf); fscanf(fpbait, "%s", buf); strcpy(data->BAIT[i], buf); /* not unique at this point */ fscanf(fpbait, "%s", buf); if(buf[0] == 'C' || buf[0] == 'c') { data->ctrl[i] = 1; /* note that control is marked as 1, test is as 0 */ (data->nctrl)++; } else { data->ctrl[i] = 0; (data->ntest)++; } /* fprintf(stderr, "%s\t%s\t%d\n", data->IP[i], data->BAIT[i], data->ctrl[i]); */ } /* check whether IPs are unique or not */ mapIPtoBait(data); nbait = data->nbait; mapIPBaitToData(data); getIPinfo(data); /* bait coverage and total abundance */ centerData(data->IPbaitCoverage, nIP, 1); centerData(data->IPtotalAbundance, nIP, 1); /* these quantities are on log scale, mean centered now. */ for(i=0;ininter;i++) { data->c[i] = data->IPtotalAbundance[data->i2IP[i]]; } } /***********************************************************************************************************/ void set_ctrlavg(DATA *data) { int i; for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]]) { /* if(data->ctrlavg[data->i2p[i]] < data->d[i]) data->ctrlavg[data->i2p[i]] = data->d[i]; */ data->ctrlavg[data->i2p[i]] += data->d[i]; } } for(i=0;inprey;i++) data->ctrlavg[i] /= ((float) data->nctrl); } /**************************************************************/ /* master function for reading the data */ /**************************************************************/ void read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data) { read_interaction_data(fpinter, data); find_unique_interaction(data); read_prey_data(fpprey, data); read_bait_data(fpbait, data); /* make a function to filter out interactions with no matching preys and baits */ set_ctrlavg(data); } SAINT_v2.3.4/src/SAINTspc-ctrl/mcmc.c0000644000000000000000000001532012014654467015556 0ustar rootroot#include "saint.h" float log_gaussian(float x, float mu, float var) { float res = - .5 * pow(x-mu,2.0) / var - .5 * log(2.0 * M_PI * var); return res; } void sampleBeta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,accept; float likratio, lr_new, lr_old; float diff = gsl_ran_gaussian(r,0.2); for(i=0;ininter;i++) param->lambda_true_tmp[i] = param->lambda_true[i] + diff; lr_new = loglik_all_class_tmp(param, prior, data, 1); lr_old = loglik_all_class(param, prior, data, 1); likratio = lr_new - lr_old; likratio += log_gaussian(param->beta0 + diff, prior->m_beta, prior->v_beta) - log_gaussian(param->beta0, prior->m_beta, prior->v_beta); likratio = GSL_MIN(1.0, exp(likratio)); accept = gsl_ran_flat(r,0.0,1.0) <= likratio ? 1 : 0; if(accept) { param->beta0 += diff; for(i=0;ininter;i++) param->lambda_true[i] = param->lambda_true_tmp[i]; param->loglikTotal += (lr_new - lr_old); } } void sampleBetac(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,accept; float likratio, lr_new, lr_old; float diff = gsl_ran_gaussian(r,0.2); for(i=0;ininter;i++) param->lambda_false_tmp[i] = param->lambda_false[i] + diff; lr_new = loglik_all_class_tmp(param, prior, data, 0); lr_old = loglik_all_class(param, prior, data, 0); likratio = lr_new - lr_old; likratio += log_gaussian(param->betac + diff, prior->m_beta, prior->v_beta) - log_gaussian(param->betac, prior->m_beta, prior->v_beta); likratio = GSL_MIN(1.0, exp(likratio)); accept = gsl_ran_flat(r,0.0,1.0) <= likratio ? 1 : 0; if(accept) { param->betac += diff; for(i=0;ininter;i++) param->lambda_false[i] = param->lambda_false_tmp[i]; param->loglikTotal += (lr_new - lr_old); } } void sampleZ(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { /* Z and iZ */ int i,j,id; int indiv, total; int isCtrl, isReverse, isMaxOne; float prob, maxl; float posi, negi; float pos, neg, tmp, tmp_lambda, tmp_neg; int cond1; for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 1) { isCtrl = 1; break; } } isReverse = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; cond1 = data->d[id] < GSL_MAX(data->ctrlavg[data->i2p[id]], exp(param->lambda_false[id])) ? 1 : 0; if(cond1) { // isReverse = 1; break; } } isMaxOne = 1; for(j=0;jn_u2a[i];j++) { if(data->d2[data->u2a[i][j]] > 1.0) isMaxOne = 0; } if(isCtrl || isReverse || isMaxOne) { param->Z[i] = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; param->iZ[id] = 0; } } else { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; tmp = data->d2[id]; tmp_lambda = exp(param->lambda_true[id]); if(minFold) { if(tmp_lambda < GSL_MAX(exp(param->lambda_false[id]), data->ctrlavg[data->i2p[id]]) * _fold_) { tmp_lambda = GSL_MAX(exp(param->lambda_false[id]), data->ctrlavg[data->i2p[id]]) * _fold_; } } if(tmp_lambda < GSL_MAX(data->ctrlavg[data->i2p[id]], exp(param->lambda_false[id]))) { param->iZ[id] = 0; } else { tmp = data->d2[id]; tmp_neg = GSL_MAX(0.1, exp(param->lambda_false[id])); if(tmp > tmp_lambda && tmp_lambda > tmp_neg) tmp = tmp_lambda; if(lowMode) tmp = GSL_MIN(_LM_, tmp); posi = log_poisson_g_prop(tmp, tmp_lambda, param->eta[data->i2p[id]]); negi = log_poisson_g_prop(tmp, tmp_neg, param->eta0[data->i2p[id]]); pos += posi; neg += negi; maxl = posi > negi ? posi : negi; posi -= maxl; negi -= maxl; prob = param->ptrue * exp(posi) / (param->ptrue * exp(posi) + (1.0-param->ptrue) * exp(negi)); param->iZ[id] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; if(data->d2[id] == 0.0) param->iZ[id] = 0.0; } } /* Z */ if(data->n_u2a[i] == 1) { id = data->u2a[i][0]; param->Z[i] = param->iZ[id]; } else { maxl = pos > neg ? pos : neg; pos -= maxl; neg -= maxl; prob = param->ptrue * exp(pos) / (param->ptrue * exp(pos) + (1.0-param->ptrue) * exp(neg)); param->Z[i] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; indiv = 0; total = data->n_u2a[i]; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(param->iZ[id]) indiv++; } pos = ((double) indiv) / ((double) total); param->Z[i] = gsl_ran_bernoulli(r, pos); } } } } float logit(float x) { return log(x) - log(1-x); } float inverseLogit(float x) { return exp(x) / (1.0 + exp(x)); } void sampleProportion(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int accept; float mhratio; param->ptrue_tmp = inverseLogit(logit(param->ptrue) + gsl_ran_gaussian(r, 0.1)); mhratio = LRprop(param, prior, data); /* uniform prior, so no prior ratio, indep. symetric random walk, so no proposal ratio */ accept = gsl_ran_flat(r,0.0,1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0; if(accept) param->ptrue = param->ptrue_tmp; } /**************************************/ /*** Metropolis-Hastings with Gibbs ***/ /**************************************/ void mhgibbs(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary, const gsl_rng *r, int updateSum) { if(gsl_ran_flat(r,0.0,1.0) <= 0.33) sampleBeta0(param, prior, data, r); if(gsl_ran_flat(r,0.0,1.0) <= 0.33) sampleBetac(param, prior, data, r); DP_alpha_prey(param, prior, data, r); /* DP_alpha_IP(param, prior, data, r); */ DP_mu(param, prior, data, r); //if(gsl_ran_flat(r,0.0,1.0) <= 0.33) DP_eta(param, prior, data, r); //if(gsl_ran_flat(r,0.0,1.0) <= 0.33) DP_eta0(param, prior, data, r); sampleZ(param, prior, data, r); compute_lambda_all(param, prior, data); if(gsl_ran_flat(r,0.0,1.0) <= 0.33) sampleProportion(param, prior, data, r); } void write_mcmc(PARAM *param, PRIOR *prior, DATA *data, FILE *fp1, FILE *fp2, FILE *fp3, int ct) { int i; fprintf(fp1, "%d\t", ct+1); for(i=0;inprey-1;i++) { fprintf(fp1, "%.3f\t", param->alpha_prey[i]); } fprintf(fp1, "%.3f\n", param->alpha_prey[data->nprey-1]); fprintf(fp2, "%d\t", ct+1); for(i=0;inIP-1;i++) { fprintf(fp2, "%.3f\t", param->alpha_IP[i]); } fprintf(fp2, "%.3f\n", param->alpha_IP[data->nIP-1]); fprintf(fp3, "%d\t", ct+1); for(i=0;inprey-1;i++) { fprintf(fp3, "%.3f\t", param->mu[i]); } fprintf(fp3, "%.3f\n", param->mu[data->nprey-1]); } SAINT_v2.3.4/src/SAINTspc-ctrl/setparam.c0000644000000000000000000001142112014654470016443 0ustar rootroot#include "saint.h" /**************************************************************/ /* initializing the model parameters */ /**************************************************************/ void memory_param(PARAM *param, PRIOR *prior, DATA *data) { assert(param->loglik_prey = (float *) calloc(data->nprey, sizeof(float))); assert(param->loglik_IP = (float *) calloc(data->nIP, sizeof(float))); assert(param->alpha_prey = (float *) calloc(data->nprey, sizeof(float))); assert(param->alpha_IP = (float *) calloc(data->nIP, sizeof(float))); assert(param->mu = (float *) calloc(data->nprey, sizeof(float))); assert(param->eta = (float *) calloc(data->nprey, sizeof(float))); assert(param->eta0 = (float *) calloc(data->nprey, sizeof(float))); assert(param->iZ = (int *) calloc(data->ninter, sizeof(int))); assert(param->Z = (int *) calloc(data->nuinter, sizeof(int))); assert(param->lambda_true = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_false = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_true_tmp = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_false_tmp = (float *) calloc(data->ninter, sizeof(float))); } void set_Z(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { /* Z and iZ */ int i,j,id; int indiv, total; int isCtrl, isReverse, isLarge; float prob, maxl; float posi, negi; float pos, neg, tmp; for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 1) { isCtrl = 1; break; } } isReverse = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(param->lambda_true[id] < param->lambda_false[id] || exp(param->lambda_false[id]) >= 10.0) { isReverse = 1; break; } } isLarge = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->d[id] >= 100.0) { isLarge = 1; break; } } if(isLarge && !isCtrl) { param->Z[i] = 1; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; param->iZ[id] = 1; } } else if(isCtrl || isReverse) { param->Z[i] = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; param->iZ[id] = 0; } } else { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; tmp = data->d[id]; posi = log_poisson_g_prop(tmp, exp(param->lambda_true[id]), param->eta[data->i2p[id]]); /* tmp = data->d[id] < exp(param->lambda_false[id]) ? exp(param->lambda_false[id]) : data->d[id]; */ negi = log_poisson_g_prop(tmp, exp(param->lambda_false[id]), param->eta0[data->i2p[id]]); pos += posi; neg += negi; maxl = posi > negi ? posi : negi; posi -= maxl; negi -= maxl; prob = param->ptrue * exp(posi) / (param->ptrue * exp(posi) + (1.0-param->ptrue) * exp(negi)); param->iZ[id] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; } /* Z */ if(data->n_u2a[i] == 1) { id = data->u2a[i][0]; param->Z[i] = param->iZ[id]; } else { /* maxl = pos > neg ? pos : neg; pos -= maxl; neg -= maxl; prob = param->ptrue * exp(pos) / (param->ptrue * exp(pos) + (1.0-param->ptrue) * exp(neg)); param->Z[i] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; */ indiv = 0; total = data->n_u2a[i]; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(param->iZ[id]) indiv++; } pos = ((double) indiv) / ((double) total); // param->Z[i] = gsl_ran_bernoulli(r, pos); param->Z[i] = gsl_ran_bernoulli(r, prob); } } } } void initialize_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; param->beta0 = 0.0; param->betac = 0.0; for(i=0;inprey;i++) param->alpha_prey[i] = prior->theta_alpha_prey[prior->w_alpha_prey[i]]; for(i=0;inIP;i++) { param->alpha_IP[i] = 0.0; /* if(data->ctrl[i] == 0) param->alpha_IP[i] = prior->theta_alpha_IP[prior->w_alpha_IP[i]]; else param->alpha_IP[i] = 0.0; */ } for(i=0;inprey;i++) param->mu[i] = prior->theta_mu[prior->w_mu[i]]; for(i=0;inprey;i++) param->eta[i] = prior->theta_eta[prior->w_eta[i]]; for(i=0;inprey;i++) param->eta0[i] = prior->theta_eta0[prior->w_eta0[i]]; compute_lambda_all(param, prior, data); set_Z(param, prior, data, r); /* param->loglikTotal = loglik_all(param, prior, data); */ param->ptrue = 0.1; param->ptrue_tmp = 0.1; } void set_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { memory_param(param, prior, data); initialize_param(param, prior, data, r); } SAINT_v2.3.4/src/SAINTspc-ctrl/dpeta.c0000644000000000000000000001122412014654467015733 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_eta_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_eta[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_eta) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_eta[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_eta[i] = gammap[i]; for(j=0;jgamma_eta[i] *= (1.0 - gammap[j]); } } void DP_eta_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_eta, tmp_lambda, maxl, tmp; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_eta[i]); cur_eta = param->eta[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { tmp_lambda = param->lambda_true[id]; //else tmp_lambda = param->lambda_false[id]; if(lowMode) tmp = GSL_MIN(_LM_, data->d[id]); else tmp = data->d[id]; if(data->d[id] > 0.0) prob[i] += log_poisson_g_prop(tmp, exp(tmp_lambda), prior->theta_eta[i]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_eta[pid] = ranMultinom(r, prob, _MAX_COMP_); param->eta[pid] = prior->theta_eta[prior->w_eta[pid]]; } float log_exponential(float x, float mean) { float res = -log(mean) - x / mean; return res; } void DP_eta_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale, tmp_lambda, tmp; scale = prior->gamma_eta[pid] / (1.0 - prior->gamma_eta[pid]); if(inuse[pid] == 0) { newval = gsl_ran_exponential(r, prior->mean_eta) + 1.0; Delta = newval - prior->theta_eta[pid]; prior->theta_eta[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); if(prior->theta_eta[pid] + Delta <= 1.0 || prior->theta_eta[pid] + Delta > 100.0) { accept = 0; } else { for(i=0;inprey;i++) { if(prior->w_eta[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { tmp_lambda = param->lambda_true[id]; // else tmp_lambda = param->lambda_false[id]; /* if(param->Z[data->a2u[id]]) */ tmp = data->d[id] < exp(param->lambda_false[id]) && param->lambda_false[id] < param->lambda_true[id] ? exp(param->lambda_false[id]) : data->d[id]; if(lowMode) { mhratio += log_poisson_g_prop(GSL_MIN(_LM_,data->d[id]), exp(tmp_lambda), prior->theta_eta[pid]+Delta) - log_poisson_g_prop(GSL_MIN(_LM_,data->d[id]), exp(tmp_lambda), prior->theta_eta[pid]); } else { mhratio += log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta[pid]+Delta) - log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta[pid]); } /* mhratio += log_poisson_g_prop(tmp, exp(tmp_lambda), prior->theta_eta[pid]+Delta) - log_poisson_g_prop(tmp, exp(tmp_lambda), prior->theta_eta[pid]); */ } } } } mhratio += log(gsl_ran_exponential_pdf(prior->theta_eta[pid]+Delta-1.0, prior->mean_eta)) - log(gsl_ran_exponential_pdf(prior->theta_eta[pid]-1.0, prior->mean_eta)); // mhratio += -2.0 * (log(prior->theta_eta[pid]+ Delta) - log(prior->theta_eta[pid])); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; } /* if accepted, update param and lambda */ if(accept) { prior->theta_eta[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_eta[i] == pid) { param->eta[i] += Delta; } } } } } void DP_eta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_eta[i]] = 1; DP_eta_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_eta_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_eta_theta(param, prior, data, r, i, inuse); /* loglik update */ } SAINT_v2.3.4/src/SAINTspc-ctrl/setsummary.c0000644000000000000000000001625712014654470017054 0ustar rootroot#include "saint.h" /**************************************************************/ /* initializing the model summaryeters */ /**************************************************************/ void memory_summary(SUMMARY *summary, DATA *data) { assert(summary->iZ = (float *) calloc(data->ninter, sizeof(float))); assert(summary->Z = (float *) calloc(data->nuinter, sizeof(float))); assert(summary->alpha_prey = (float *) calloc(data->nprey, sizeof(float))); assert(summary->alpha_IP = (float *) calloc(data->nIP, sizeof(float))); assert(summary->mu = (float *) calloc(data->nprey, sizeof(float))); assert(summary->eta = (float *) calloc(data->nprey, sizeof(float))); assert(summary->eta0 = (float *) calloc(data->nprey, sizeof(float))); assert(summary->lambda_true = (float *) calloc(data->ninter, sizeof(float))); assert(summary->lambda_false = (float *) calloc(data->ninter, sizeof(float))); } void initialize_summary(SUMMARY *summary, DATA *data) { int i; for(i=0;ininter;i++) summary->iZ[i] = 0.0; for(i=0;inuinter;i++) summary->Z[i] = 0.0; for(i=0;inprey;i++) summary->alpha_prey[i] = 0.0; for(i=0;inIP;i++) summary->alpha_IP[i] = 0.0; for(i=0;inprey;i++) summary->mu[i] = 0.0; for(i=0;inprey;i++) summary->eta[i] = 0.0; for(i=0;inprey;i++) summary->eta0[i] = 0.0; for(i=0;ininter;i++) summary->lambda_true[i] = 0.0; for(i=0;ininter;i++) summary->lambda_false[i] = 0.0; } void initialize_histogram(HISTOGRAM *hist) { int i; float binsize = ((float) (_HISTO_END_ - _HISTO_START_)) / ((float) _HISTO_BIN_); for(i=0;i<_HISTO_BIN_;i++) { hist->start[i] = _HISTO_START_ + ((float) i) * binsize; hist->end[i] = _HISTO_START_ + ((float) (i+1)) * binsize; } for(i=0;i<(_HISTO_BIN_+2);i++) hist->count[i] = 0.0; } void initialize_histogram2(HISTOGRAM2 *hist) { int i; float binsize = ((float) (_HISTO_END2_ - _HISTO_START2_)) / ((float) _HISTO_BIN2_); for(i=0;i<_HISTO_BIN2_;i++) { hist->start[i] = _HISTO_START2_ + ((float) i) * binsize; hist->end[i] = _HISTO_START2_ + ((float) (i+1)) * binsize; } for(i=0;i<(_HISTO_BIN2_+2);i++) hist->count[i] = 0.0; } void set_summary(SUMMARY *summary, DATA *data) { memory_summary(summary, data); initialize_summary(summary, data); initialize_histogram(&(summary->hist_alpha_prey)); initialize_histogram(&(summary->hist_alpha_IP)); initialize_histogram(&(summary->hist_mu)); initialize_histogram2(&(summary->hist_eta)); initialize_histogram2(&(summary->hist_eta0)); } void updateSummary(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary) { int i; for(i=0;ininter;i++) summary->iZ[i] += ((float) param->iZ[i]); for(i=0;inuinter;i++) summary->Z[i] += ((float) param->Z[i]); for(i=0;inprey;i++) summary->alpha_prey[i] += param->alpha_prey[i]; for(i=0;inIP;i++) summary->alpha_IP[i] += param->alpha_IP[i]; for(i=0;inprey;i++) summary->mu[i] += param->mu[i]; for(i=0;inprey;i++) summary->eta[i] += param->eta[i]; for(i=0;inprey;i++) summary->eta0[i] += param->eta0[i]; for(i=0;ininter;i++) summary->lambda_true[i] += param->lambda_true[i]; for(i=0;ininter;i++) summary->lambda_false[i] += param->lambda_false[i]; updateHistogram(param, prior, data, summary); } void scaleSummary(SUMMARY *summary, DATA *data, int iter) { int i; float scale = 1.0 / ((float) iter); for(i=0;ininter;i++) summary->iZ[i] *= scale; for(i=0;inuinter;i++) summary->Z[i] *= scale; for(i=0;inprey;i++) summary->alpha_prey[i] *= scale; for(i=0;inIP;i++) summary->alpha_IP[i] *= scale; for(i=0;inprey;i++) summary->mu[i] *= scale; for(i=0;inprey;i++) summary->eta[i] *= scale; for(i=0;inprey;i++) summary->eta0[i] *= scale; for(i=0;ininter;i++) summary->lambda_true[i] *= scale; for(i=0;ininter;i++) summary->lambda_false[i] *= scale; } /*************************************/ /** Histogram updates **/ /*************************************/ void updateHist_alpha_prey(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_alpha_prey[i] < hist->start[0]) { hist->count[0] += prior->gamma_alpha_prey[i]; } else if(prior->theta_alpha_prey[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_alpha_prey[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_alpha_prey[i] >= hist->start[j] && prior->theta_alpha_prey[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_alpha_prey[i]; break; } } } } } void updateHist_alpha_IP(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_alpha_IP[i] < hist->start[0]) { hist->count[0] += prior->gamma_alpha_IP[i]; } else if(prior->theta_alpha_IP[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_alpha_IP[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_alpha_IP[i] >= hist->start[j] && prior->theta_alpha_IP[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_alpha_IP[i]; break; } } } } } void updateHist_mu(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_mu[i] < hist->start[0]) { hist->count[0] += prior->gamma_mu[i]; } else if(prior->theta_mu[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_mu[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_mu[i] >= hist->start[j] && prior->theta_mu[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_mu[i]; break; } } } } } void updateHist_eta(HISTOGRAM2 *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_eta[i] < hist->start[0]) { hist->count[0] += prior->gamma_eta[i]; } else if(prior->theta_eta[i] >= hist->end[_HISTO_BIN2_-1]) { hist->count[_HISTO_BIN2_ + 1] += prior->gamma_eta[i]; } else { for(j=0;j<_HISTO_BIN2_;j++) { if(prior->theta_eta[i] >= hist->start[j] && prior->theta_eta[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_eta[i]; break; } } } } } void updateHist_eta0(HISTOGRAM2 *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_eta0[i] < hist->start[0]) { hist->count[0] += prior->gamma_eta0[i]; } else if(prior->theta_eta0[i] >= hist->end[_HISTO_BIN2_-1]) { hist->count[_HISTO_BIN2_ + 1] += prior->gamma_eta0[i]; } else { for(j=0;j<_HISTO_BIN2_;j++) { if(prior->theta_eta0[i] >= hist->start[j] && prior->theta_eta0[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_eta0[i]; break; } } } } } void updateHistogram(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary) { updateHist_alpha_prey(&(summary->hist_alpha_prey), prior); updateHist_alpha_IP(&(summary->hist_alpha_IP), prior); updateHist_mu(&(summary->hist_mu), prior); updateHist_eta(&(summary->hist_eta), prior); updateHist_eta0(&(summary->hist_eta0), prior); } SAINT_v2.3.4/src/SAINTspc-ctrl/saint.c0000644000000000000000000001505512014654467015762 0ustar rootroot#include "saint.h" int nrow(FILE *fp) { char buf[100000]; int n = 0; while(fgets(buf, sizeof(buf), fp) != NULL) n++; return n; } int newlinechar(char *buf, int k) { int i; int found = 0; for(i=0;igamma_alpha_IP[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_alpha_IP[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_alpha_prey\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_alpha_prey[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_alpha_prey[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_mu\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_mu[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_mu[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_eta\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_eta[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_eta[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_eta0\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_eta0[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_eta0[i]); fprintf(stderr, "\n\n"); } int commandLine(int argc, char **argv) { if (argc < 9) { fprintf(stderr, "usage: saint-spc-ctrl [interactionFile] [preyFile] [baitFile] [nburnin] [niter] [lowMode] [minFold] [normalize]\n"); fprintf(stderr, "-----------------------------------------------------------------------------------\n"); fprintf(stderr, " nburnin = 2000: number of burn-in iterations in MCMC.\n"); fprintf(stderr, " niter = 10000: number of main iterations in MCMC.\n"); fprintf(stderr, "-----------------------------------------------------------------------------------\n"); fprintf(stderr, " lowMode = 0/1 : exclude extremely high counts in the model.\n"); fprintf(stderr, " - If baits are densely connected or dataset is small (few baits), use 1.\n"); fprintf(stderr, " - otherwise, use 0.\n"); fprintf(stderr, "-----------------------------------------------------------------------------------\n"); fprintf(stderr, " minFold = 0/1 : forcing separation between true and false distributions.\n"); fprintf(stderr, " - If user wishes to allow typical contaminants with significant\n"); fprintf(stderr, " differential enrichment over control purifications, use 0.\n"); fprintf(stderr, " - otherwise, use 1.\n"); fprintf(stderr, "-----------------------------------------------------------------------------------\n"); fprintf(stderr, " normalize = 0/1 : divide the counts by the total spectral counts in each IP.\n"); fprintf(stderr, "-----------------------------------------------------------------------------------\n"); return 1; } /* interaction file: IPnumber \t bait \t prey \t spectralCount \n */ /* prey file: prey \t sequenceLength \n */ /* bait file: bait \t IPnumber \t isControl \n */ FILE *fpinter = fopen(argv[1], "r"); FILE *fpprey = fopen(argv[2], "r"); FILE *fpbait = fopen(argv[3], "r"); if(fpinter == NULL) { fprintf(stderr, "Cannot locate interaction data %s.\n", argv[1]); return 1; } if(fpprey == NULL) { fprintf(stderr, "Cannot locate prey data %s.\n", argv[2]); return 1; } if(fpbait == NULL) { fprintf(stderr, "Cannot locate bait data %s.\n", argv[3]); return 1; } burn = atoi(argv[4]); iter = atoi(argv[5]); lowMode = atoi(argv[6]); minFold = atoi(argv[7]); NORMALIZE = atoi(argv[8]); fclose(fpinter); fclose(fpprey); fclose(fpbait); return 0; } /***************************** MAIN ***************************/ int main(int argc, char **argv) { int i, ct; DATA data; PARAM param; PRIOR prior; SUMMARY summary; const gsl_rng_type *T; gsl_rng *r; gsl_rng_env_setup(); T = gsl_rng_default; r = gsl_rng_alloc(T); /* Command Line */ if(commandLine(argc, argv)) return 1; FILE *fpinter = fopen(argv[1], "r"); FILE *fpprey = fopen(argv[2], "r"); FILE *fpbait = fopen(argv[3], "r"); /* Read interaction data, identify baits, preys, and IPs, make unique interaction data frame, identify the mapping between different levels of data */ system("mkdir LOG"); /* error logs */ system("mkdir MAPPING"); /* mapping logs */ system("mkdir MCMC"); /* posterior samples */ system("mkdir RESULT"); /* posterior probabilities, other summaries */ fprintf(stderr, "Reading data and mapping interactions\n"); read_data(fpinter, fpprey, fpbait, &data); printMap(&data); /* Set up model parameters and prior elicitation */ set_prior(¶m, &prior, &data, r); set_param(¶m, &prior, &data, r); set_summary(&summary, &data); /* updates and summary */ chdir("MCMC"); FILE *fp1 = fopen("alpha_prey","w"); FILE *fp2 = fopen("alpha_IP","w"); FILE *fp3 = fopen("mu","w"); /* burnin */ ct = 0; fprintf(stderr, "Burn-in Period\n"); for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]] == 0) fprintf(fp, "%s\t%s\t%s\t%s\t%d\t%.3f\t%.3f\n", data->ip[i], data->bait[i], data->prey[i], data->PREYGENE[data->i2p[i]], (int) data->d2[i], summary->Z[data->a2u[i]], summary->iZ[i]); /* exp(summary->lambda_true[i]), exp(summary->lambda_false[i])); */ } fclose(fp); } void write_unique_interactions(DATA *data, SUMMARY *summary) { int i,j; int isCtrl; int id, ct, pid; int countsum; float maxp, avgp, geop, tmp; FILE *fp = fopen("unique_interactions", "w"); fprintf(fp, "Bait\tPrey\tPreyGene\tIP\tSpec\tSpecSum\tNumRep\tProb\tiProb\tctrlCounts\tAvgP\tMaxP\n"); for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]]) isCtrl = 1; } if(isCtrl == 0) { fprintf(fp, "%s\t%s\t%s\t", data->ubait[i], data->uprey[i], data->PREYGENE[data->ui2p[i]]); for(j=0;j<(data->n_u2a[i]-1);j++) fprintf(fp, "%s|", data->ip[data->u2a[i][j]]); fprintf(fp, "%s\t", data->ip[data->u2a[i][data->n_u2a[i]-1]]); countsum = 0; for(j=0;j<(data->n_u2a[i]-1);j++) { countsum += ((int) data->d2[data->u2a[i][j]]); fprintf(fp, "%d|", (int) data->d2[data->u2a[i][j]]); } countsum += ((int) data->d2[data->u2a[i][data->n_u2a[i]-1]]); fprintf(fp, "%d\t", (int) data->d2[data->u2a[i][data->n_u2a[i]-1]]); fprintf(fp, "%d\t%d\t", countsum, data->n_u2a[i]); fprintf(fp, "%.2f\t", summary->Z[i]); for(j=0;j<(data->n_u2a[i]-1);j++) fprintf(fp, "%.2f|", summary->iZ[data->u2a[i][j]]); fprintf(fp, "%.2f\t", summary->iZ[data->u2a[i][data->n_u2a[i]-1]]); pid = data->ui2p[i]; ct = 0; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]]) { fprintf(fp, "%d", (int) data->d2[id]); if(ct < data->nctrl-1) fprintf(fp, "|"); else fprintf(fp, "\t"); ct++; } } /* maxp */ maxp = 0.0; avgp = 0.0; geop = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(summary->iZ[id] > maxp) maxp = summary->iZ[id]; avgp += summary->iZ[id] / ((float) data->n_u2a[i]); tmp = data->d[id] == 0.0 ? 0.001 : summary->iZ[id]; geop += log(tmp) / ((float) data->n_u2a[i]); } geop = exp(geop); fprintf(fp, "%.4f\t%.4f\n", avgp, maxp); /* fprintf(fp, "%.2f\n", ((float) data->preyNinter[data->ui2p[i]]) / ((float) data->nIP)); */ } } fclose(fp); } void write_prey(DATA *data, SUMMARY *summary) { int i; FILE *fp = fopen("preys", "w"); fprintf(fp, "Prey\tAlpha_prey\tMu\n"); for(i=0;inprey;i++) { fprintf(fp, "%s\t%.2f\t%.2f\n", data->PREY[i], summary->alpha_prey[i], summary->mu[i]); } fclose(fp); } void write_IP(DATA *data, SUMMARY *summary) { int i; FILE *fp = fopen("IPs", "w"); fprintf(fp, "IP\tBait\tAlpha_IP\n"); for(i=0;inIP;i++) { fprintf(fp, "%s\t%s\t%.2f\n", data->IP[i], data->BAIT[data->IP2b[i]], summary->alpha_IP[i]); } fclose(fp); } void write_bait(DATA *data, SUMMARY *summary) { int i,j; FILE *fp = fopen("baits", "w"); fprintf(fp, "Bait\tIP\tAlpha_IP\n"); for(i=0;inbait;i++) { fprintf(fp, "%s\t", data->BAIT[i]); for(j=0;jbaitNIP[i]-1;j++) { fprintf(fp, "%s|", data->IP[data->b2IP[i][j]]); } fprintf(fp, "%s\t", data->IP[data->b2IP[i][data->baitNIP[i]-1]]); for(j=0;jbaitNIP[i]-1;j++) { fprintf(fp, "%.2f|", summary->alpha_IP[data->b2IP[i][j]]); } fprintf(fp, "%.2f\n", summary->alpha_IP[data->b2IP[i][data->baitNIP[i]-1]]); } fclose(fp); } void write_histogram(FILE *fp, HISTOGRAM *hist) { int i; fprintf(fp, "-inf\t%.2f\t%.2f\n", hist->start[0], hist->count[0]); for(i=0;i<_HISTO_BIN_;i++) { fprintf(fp, "%.2f\t%.2f\t%.2f\n", hist->start[i], hist->end[i], hist->count[i+1]); } fprintf(fp, "%.2f\tinf\t%.2f\n", hist->end[_HISTO_BIN_-1], hist->count[_HISTO_BIN_+1]); } void write_histogram2(FILE *fp, HISTOGRAM2 *hist) { int i; fprintf(fp, "-inf\t%.2f\t%.2f\n", hist->start[0], hist->count[0]); for(i=0;i<_HISTO_BIN2_;i++) { fprintf(fp, "%.2f\t%.2f\t%.2f\n", hist->start[i], hist->end[i], hist->count[i+1]); } fprintf(fp, "%.2f\tinf\t%.2f\n", hist->end[_HISTO_BIN2_-1], hist->count[_HISTO_BIN2_+1]); } void write_hyperprior(DATA *data, SUMMARY *summary) { FILE *fp1 = fopen("hist_alpha_prey", "w"); FILE *fp2 = fopen("hist_alpha_IP", "w"); FILE *fp3 = fopen("hist_mu", "w"); FILE *fp4 = fopen("hist_eta", "w"); FILE *fp5 = fopen("hist_eta0", "w"); write_histogram(fp1, &(summary->hist_alpha_prey)); write_histogram(fp2, &(summary->hist_alpha_IP)); write_histogram(fp3, &(summary->hist_mu)); write_histogram2(fp4, &(summary->hist_eta)); write_histogram2(fp5, &(summary->hist_eta0)); fclose(fp1); fclose(fp2); fclose(fp3); fclose(fp4); fclose(fp5); } void write_result(DATA *data, SUMMARY *summary) { chdir("RESULT"); write_interactions(data, summary); write_unique_interactions(data, summary); write_prey(data, summary); write_IP(data, summary); write_bait(data, summary); write_hyperprior(data, summary); write_matrix_data(data, summary); write_matrix_data2(data, summary); chdir(".."); } /******************************/ void write_matrix_data(DATA *data, SUMMARY *summary) { int i,j,k,id; int endLine, isMatch; FILE *fp = fopen("matrix_form","w"); endLine = -1; for(j=0;jnIP;j++) { if(data->ctrl[j] != 1) endLine = j; } /* header line 1 */ fprintf(fp, "\t\t\t\tBait\t"); for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j]) { fprintf(fp, "%s\t", data->BAIT[id]); } } for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->BAIT[id]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 2 */ fprintf(fp, "\t\t\t\tIP\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "%s\t", data->IP[j]); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 3 */ fprintf(fp, "Prey\tmean_s\tvar_s\tprey_ns\tvar_ns\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "\t"); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%.2f", summary->alpha_IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } for(i=0;inprey;i++) { fprintf(fp, "%s\t%.2f\t%.2f\t%.2f\t%.2f\t", data->PREY[i], summary->alpha_prey[i], summary->eta[i], summary->mu[i], summary->eta0[i]); /* Control runs first */ for(j=0;jnIP;j++) { if(data->ctrl[j]) { /* find if prey wise data has this IP */ /* if not, leave the space blank */ /* else, biz as usual: (count | prob | lambda_s, lambda_ns) */ isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { fprintf(fp, "\t"); } else { fprintf(fp, "%d|%.2f\t", (int) data->d2[isMatch], exp(summary->lambda_false[isMatch])); } } } /* Rest of the IPs */ for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { /* fprintf(fp, "\t"); */ } else { fprintf(fp, "%d|%.2f|%.2f|%.2f", (int) data->d2[isMatch], summary->Z[data->a2u[isMatch]], exp(summary->lambda_true[isMatch]), exp(summary->lambda_false[isMatch])); } if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } } fclose(fp); } void write_matrix_data2(DATA *data, SUMMARY *summary) { int i,j,k,id; int endLine, isMatch; FILE *fp = fopen("matrix_form_short","w"); endLine = -1; for(j=0;jnIP;j++) { if(data->ctrl[j] != 1) endLine = j; } /* header line 1 */ fprintf(fp, "\t\t\t\tBait\t"); for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j]) { fprintf(fp, "%s\t", data->BAIT[id]); } } for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->BAIT[id]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 2 */ fprintf(fp, "\t\t\t\tIP\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "%s\t", data->IP[j]); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 3 */ fprintf(fp, "Prey\tmean_s\tvar_s\tmean_ns\tvar_ns\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "\t"); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%.2f", summary->alpha_IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } for(i=0;inprey;i++) { fprintf(fp, "%s\t%.2f\t%.2f\t%.2f\t%.2f\t", data->PREY[i], summary->alpha_prey[i], summary->eta[i], summary->mu[i], summary->eta0[i]); /* Control runs first */ for(j=0;jnIP;j++) { if(data->ctrl[j]) { /* find if prey wise data has this IP */ /* if not, leave the space blank */ /* else, biz as usual: (count | prob | lambda_s, lambda_ns) */ isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { fprintf(fp, "\t"); } else { fprintf(fp, "%d\t", (int) data->d2[isMatch]); } } } /* Rest of the IPs */ for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { /* fprintf(fp, "\t"); */ } else { fprintf(fp, "(%d|%.2f|%.2f)", (int) data->d2[isMatch], summary->Z[data->a2u[isMatch]], summary->iZ[isMatch]); } if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } } fclose(fp); } SAINT_v2.3.4/src/SAINTspc-ctrl/printmap.c0000644000000000000000000000545512014654467016501 0ustar rootroot#include "saint.h" void printInter(DATA *data) { int i; FILE *fp = fopen("interaction","w"); fprintf(fp, "ip\tbait\tprey\tIP\tBAIT\tPREY\tubait\tuprey\n"); for(i=0;ininter;i++) { fprintf(fp, "%s\t%s\t%s\t", data->ip[i], data->bait[i], data->prey[i]); fprintf(fp, "%s\t%s\t%s\t", data->IP[data->i2IP[i]], data->BAIT[data->i2b[i]], data->PREY[data->i2p[i]]); fprintf(fp, "%s\t%s\n", data->ubait[data->a2u[i]], data->uprey[data->a2u[i]]); } fclose(fp); } void printUInter(DATA *data) { int i,j,k; FILE *fp = fopen("unique_interaction","w"); fprintf(fp, "ubait\tuprey\tubait\tuprey\tip\tbait\tprey\n"); for(i=0;inuinter;i++) { for(j=0;jn_u2a[i];j++) { k = data->u2a[i][j]; fprintf(fp, "%s\t%s\t", data->ubait[i], data->uprey[i]); fprintf(fp, "%s\t%s\t%s\n", data->ip[k], data->bait[k], data->prey[k]); } } fprintf(fp, "\n\n************************\n\n"); fprintf(fp, "ubait\tuprey\tBAIT\tPREY\n"); for(i=0;inuinter;i++) { fprintf(fp, "%s\t%s\t%s\t%s\n", data->ubait[i], data->uprey[i], data->BAIT[data->ui2b[i]], data->PREY[data->ui2p[i]]); } fclose(fp); } void printIP(DATA *data) { int i,j,k; /* IP to bait */ FILE *fp = fopen("IP","w"); fprintf(fp, "IP\tBAIT\n"); for(i=0;inIP;i++) { fprintf(fp, "%s\t%s\n", data->IP[i], data->BAIT[data->IP2b[i]]); } fprintf(fp, "\n\n************************\n\n"); /* IP to interactions */ fprintf(fp, "IP\tip\tbait\tprey\n"); for(i=0;inIP;i++) { for(j=0;jIPNinter[i];j++) { k = data->IP2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->IP[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printBait(DATA *data) { int i,j,k; FILE *fp = fopen("bait","w"); /* bait to IP */ fprintf(fp, "BAIT\tIP\n"); for(i=0;inbait;i++) { for(j=0;jbaitNIP[i];j++) { k = data->b2IP[i][j]; fprintf(fp, "%s\t%s\n", data->BAIT[i], data->IP[k]); } } fprintf(fp, "\n\n************************\n\n"); /* bait to interaction */ fprintf(fp, "BAIT\tip\tbait\tprey\n"); for(i=0;inbait;i++) { for(j=0;jbaitNinter[i];j++) { k = data->b2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->BAIT[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printPrey(DATA *data) { int i,j,k; FILE *fp = fopen("prey","w"); /* prey to interaction */ for(i=0;inprey;i++) { for(j=0;jpreyNinter[i];j++) { k = data->p2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->PREY[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printMap(DATA *data) { chdir("MAPPING"); printInter(data); printUInter(data); /* printIP(data); printBait(data); printPrey(data); */ chdir(".."); } SAINT_v2.3.4/src/SAINTspc-ctrl/dpalphaIP.c0000644000000000000000000001043512014654467016503 0ustar rootroot#include "saint.h" /********* ALPHA_IP *********/ void DP_alpha_IP_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inIP;i++) { if(data->ctrl[i] == 0) (wsum[prior->w_alpha_IP[i]])++; } for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_alpha_IP) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_alpha_IP[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_alpha_IP[i] = gammap[i]; for(j=0;jgamma_alpha_IP[i] *= (1.0 - gammap[j]); } } void DP_alpha_IP_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_alpha_IP, tmp_lambda, maxl; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_alpha_IP[i]); cur_alpha_IP = param->alpha_IP[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jIPNinter[pid];j++) { id = data->IP2i[pid][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { tmp_lambda = param->lambda_true[id] + prior->theta_alpha_IP[i] - cur_alpha_IP; prob[i] += log_poisson_g_prop(data->d[id], exp(tmp_lambda), param->eta[data->i2p[id]]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_alpha_IP[pid] = ranMultinom(r, prob, _MAX_COMP_); param->alpha_IP[pid] = prior->theta_alpha_IP[prior->w_alpha_IP[pid]]; for(j=0;jIPNinter[pid];j++) { id = data->IP2i[pid][j]; param->lambda_true[id] += param->alpha_IP[pid] - cur_alpha_IP; } } void DP_alpha_IP_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale; scale = prior->gamma_alpha_IP[pid] / (1.0 - prior->gamma_alpha_IP[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_alpha_IP)) + prior->m_alpha_IP; Delta = newval - prior->theta_alpha_IP[pid]; prior->theta_alpha_IP[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 0.25); for(i=0;inIP;i++) { if(prior->w_alpha_IP[i] == pid) { for(j=0;jIPNinter[i];j++) { id = data->IP2i[i][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { param->lambda_true_tmp[id] = param->lambda_true[id] + Delta; mhratio += log_poisson_g_prop(data->d[id], exp(param->lambda_true_tmp[id]), param->eta[data->i2p[id]]) - log_poisson_g_prop(data->d[id], exp(param->lambda_true[id]), param->eta[data->i2p[id]]); } } } } mhratio += log_gaussian(prior->theta_alpha_IP[pid] + Delta, prior->m_alpha_IP, prior->v_alpha_IP) - log_gaussian(prior->theta_alpha_IP[pid], prior->m_alpha_IP, prior->v_alpha_IP); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_alpha_IP[pid] += Delta; for(i=0;inIP;i++) { if(prior->w_alpha_IP[i] == pid && data->ctrl[i] == 0) { param->alpha_IP[i] += Delta; for(j=0;jIPNinter[i];j++) { id = data->IP2i[i][j]; param->lambda_true[id] += Delta; } } } } } } void DP_alpha_IP(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inIP;i++) inuse[prior->w_alpha_IP[i]] = 1; DP_alpha_IP_gamma(param, prior, data, r); for(i=0;inIP;i++) { if(data->ctrl[i] == 0) DP_alpha_IP_w(param, prior, data, r, i); } for(i=0;i<_MAX_COMP_;i++) DP_alpha_IP_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_IP[i] * prior->theta_alpha_IP[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] -= mean; for(i=0;inIP;i++) param->alpha_IP[i] -= mean; param->beta0 += mean; } SAINT_v2.3.4/src/SAINTspc-ctrl/meancounts.c0000644000000000000000000000216412014654467017015 0ustar rootroot#include "saint.h" /**************************************************************/ /* computing expected counts in log scale (s/ns) */ /**************************************************************/ /*************************/ /* all interactions */ /*************************/ void compute_lambda_true_all(PARAM *param, PRIOR *prior, DATA *data) { int i; for(i=0;ininter;i++) { // data->c[i] // param->lambda_true[i] = data->l[i] + data->c[i] + param->beta0 + param->alpha_prey[data->i2p[i]]; param->lambda_true[i] = data->l[i] + param->beta0 + param->alpha_prey[data->i2p[i]] + param->alpha_IP[data->i2IP[i]]; if(NORMALIZE) param->lambda_true[i] += data->c[i]; } } void compute_lambda_false_all(PARAM *param, PRIOR *prior, DATA *data) { int i; for(i=0;ininter;i++) { param->lambda_false[i] = data->l[i] + param->betac + param->mu[data->i2p[i]]; if(NORMALIZE) param->lambda_false[i] += data->c[i]; } } void compute_lambda_all(PARAM *param, PRIOR *prior, DATA *data) { compute_lambda_true_all(param, prior, data); compute_lambda_false_all(param, prior, data); } SAINT_v2.3.4/src/SAINTspc-ctrl/setprior.c0000644000000000000000000000566512014654470016513 0ustar rootroot#include "saint.h" void memory_prior(PARAM *param, PRIOR *prior, DATA *data) { assert(prior->w_alpha_prey = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_alpha_IP = (int *) calloc(data->nIP, sizeof(int))); assert(prior->w_mu = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_eta = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_eta0 = (int *) calloc(data->nprey, sizeof(int))); } void initialize_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; double MAXC = ((double) _MAX_COMP_); prior->m_beta = 0.0; prior->v_beta = 100.0; prior->atrue = 0.1; prior->afalse = 1.0 - prior->atrue; prior->rho_alpha_prey = 1.0; prior->m_alpha_prey = 0.0; prior->v_alpha_prey = 100.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_alpha_prey[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] = gsl_ran_gaussian(r, 2.0); for(i=0;inprey;i++) prior->w_alpha_prey[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_prey[i] * prior->theta_alpha_prey[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] -= mean; prior->rho_alpha_IP = 1.0; prior->m_alpha_IP = 0.0; prior->v_alpha_IP = 100.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_alpha_IP[i] = 1.0 / MAXC; /* for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] = gsl_ran_gaussian(r, 2.0); */ for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] = 0.0; /* for(i=0;inIP;i++) prior->w_alpha_IP[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); */ for(i=0;inIP;i++) prior->w_alpha_IP[i] = 0; mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_IP[i] * prior->theta_alpha_IP[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] -= mean; prior->rho_mu = 1.0; prior->m_mu = 0.0; prior->v_mu = 100.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_mu[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] = gsl_ran_gaussian(r, 2.0); for(i=0;inprey;i++) prior->w_mu[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_mu[i] * prior->theta_mu[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] -= mean; prior->rho_eta = 1.0; prior->mean_eta = 0.1; for(i=0;i<_MAX_COMP_;i++) prior->gamma_eta[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_eta[i] = gsl_ran_exponential(r, prior->mean_eta) + 1.0; for(i=0;inprey;i++) prior->w_eta[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); prior->rho_eta0 = 1.0; prior->mean_eta0 = 0.1; for(i=0;i<_MAX_COMP_;i++) prior->gamma_eta0[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_eta0[i] = gsl_ran_exponential(r, prior->mean_eta0) + 1.0; for(i=0;inprey;i++) prior->w_eta0[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); } void set_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { memory_prior(param, prior, data); initialize_prior(param, prior, data, r); } SAINT_v2.3.4/src/SAINTspc-ctrl/dpalphaprey.c0000644000000000000000000001204112014654467017145 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_alpha_prey_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_alpha_prey[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_alpha_prey) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_alpha_prey[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_alpha_prey[i] = gammap[i]; for(j=0;jgamma_alpha_prey[i] *= (1.0 - gammap[j]); } } void DP_alpha_prey_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_alpha_prey, tmp_lambda, maxl, tmp; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_alpha_prey[i]); cur_alpha_prey = param->alpha_prey[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { tmp_lambda = param->lambda_true[id] + prior->theta_alpha_prey[i] - cur_alpha_prey; if(lowMode) tmp = GSL_MIN(_LM_, data->d[id]); else tmp = data->d[id]; if(tmp > 0.0) prob[i] += log_poisson_g_prop(tmp, exp(tmp_lambda), param->eta[pid]); } else if (data->d[id] >= 2.0 && data->ctrlavg[pid] > 0.0 && (data->d[id] / data->ctrlavg[pid]) > 2.0) { tmp_lambda = param->lambda_true[id] + prior->theta_alpha_prey[i] - cur_alpha_prey; if(lowMode) tmp = GSL_MIN(_LM_, data->d[id]); else tmp = data->d[id]; if(tmp > 0.0) prob[i] += log_poisson_g_prop(tmp, exp(tmp_lambda), param->eta[pid]); } else {} } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_alpha_prey[pid] = ranMultinom(r, prob, _MAX_COMP_); param->alpha_prey[pid] = prior->theta_alpha_prey[prior->w_alpha_prey[pid]]; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; param->lambda_true[id] += param->alpha_prey[pid] - cur_alpha_prey; } } void DP_alpha_prey_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale; scale = prior->gamma_alpha_prey[pid] / (1.0 - prior->gamma_alpha_prey[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_alpha_prey)) + prior->m_alpha_prey; Delta = newval - prior->theta_alpha_prey[pid]; prior->theta_alpha_prey[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); for(i=0;inprey;i++) { if(prior->w_alpha_prey[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { param->lambda_true_tmp[id] = param->lambda_true[id] + Delta; if(lowMode) { mhratio += log_poisson_g_prop(GSL_MIN(_LM_,data->d[id]), exp(param->lambda_true_tmp[id]), param->eta[i]) - log_poisson_g_prop(GSL_MIN(_LM_,data->d[id]), exp(param->lambda_true[id]), param->eta[i]); } else { mhratio += log_poisson_g_prop(data->d[id], exp(param->lambda_true_tmp[id]), param->eta[i]) - log_poisson_g_prop(data->d[id], exp(param->lambda_true[id]), param->eta[i]); } } } } } mhratio += log_gaussian(prior->theta_alpha_prey[pid] + Delta, prior->m_alpha_prey, prior->v_alpha_prey) - log_gaussian(prior->theta_alpha_prey[pid], prior->m_alpha_prey, prior->v_alpha_prey); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_alpha_prey[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_alpha_prey[i] == pid) { param->alpha_prey[i] += Delta; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; param->lambda_true[id] += Delta; } } } } } } void DP_alpha_prey(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) { inuse[prior->w_alpha_prey[i]] = 1; } DP_alpha_prey_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_alpha_prey_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_alpha_prey_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_prey[i] * prior->theta_alpha_prey[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] -= mean; for(i=0;inprey;i++) param->alpha_prey[i] -= mean; param->beta0 += mean; } SAINT_v2.3.4/src/SAINTspc-ctrl/saint.h0000644000000000000000000003014512014654467015764 0ustar rootroot/* Copyright (C) <2011> For troubleshooting, contact hyung_won_choi@nuhs.edu.sg. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You can obtain a copy of the GNU General Public License from . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _MAX_BUF_ 2000 #define _MAX_NAME_ 2000 #define _MAX_COUNT_ 250 #define _MAX_COMP_ 15 #define _SKIP_ 10 #define _PRINT_FREQ_ 100 #define _HISTO_START_ -10.0 #define _HISTO_END_ 10.0 #define _HISTO_BIN_ 100 #define _HISTO_START2_ 0.5 #define _HISTO_END2_ 20.5 #define _HISTO_BIN2_ 200 #define _TRUNC_ 500.0 #define _LM_ 100.0 #define _fold_ 5.0 typedef struct tagDATA { /*************/ /* logistics */ /*************/ int ninter; int nuinter; int nprey; int nIP; int nbait; /**************************/ /* interaction level data */ /**************************/ char **prey; char **bait; char **ip; /* raw data, each row corresponds to one interaction, case-sensitive */ float *d; float *d2; float *iprob; float *l; float *c; int *isCtrl; /*********************************/ /* unique interaction level data */ /*********************************/ char **uprey; char **ubait; float *prob; int *isAnyCtrl; int *n_u2a; /* number of individual interactions per unique interactions */ int **u2a; /* unique interactions to individual interactions */ int *a2u; /* individual interactions to unique interactions */ /* crucial indicator for probability calculation */ /***********************************/ /* unique bait and prey level data */ /***********************************/ float *IPtotalAbundance; float *IPbaitCoverage; float *preyLength; char *preyOverride; char **PREY; /* unique preys */ char **PREYGENE; char **BAIT; /* unique baits */ char **IP; /* unique IP #s */ int nctrl; int ntest; int *ctrl; /* index: control IPs or not: 'C' = control, 'T' = test */ float *ctrlavg; int *preyNinter; /* # interaction for prey */ int *baitNinter; /* # interaction for bait */ int *IPNinter; /* # interaction in an IP */ int *baitNIP; /* # IPs per bait */ /****************/ /* mapping data */ /****************/ int *i2p; /* index: interaction to prey */ int *i2b; /* index: interaction to bait */ int *i2IP; /* index: interaction to IP */ int **p2i; /* index: prey to interaction */ int **b2i; /* index: bait to interaction */ int **IP2i; /* index: IP to interaction */ int *ui2p; /* index: unique interaction to prey */ int *ui2b; /* index: unique interaction to bait */ /* no need to build reverse mapping for unique interactions */ /* perhaps this mapping is unnecessary */ int **b2IP; /* index: bait to IP */ int *IP2b; /* index: IP to bait */ } DATA; typedef struct tagPARAM{ float loglikTotal; float *loglik_prey; float *loglik_IP; float beta0; float betac; float *alpha_prey; float *alpha_IP; float *mu; float *eta; float *eta0; int *iZ; /* individual interactions */ int *Z; /* unique interactions */ float ptrue; float ptrue_tmp; float *lambda_true; float *lambda_false; float *lambda_true_tmp; float *lambda_false_tmp; } PARAM; typedef struct tagPRIOR{ /* parametric portion */ float m_beta; /* set to zero */ float v_beta; float atrue, afalse; /* nonparametric portion */ float rho_alpha_prey; float m_alpha_prey; float v_alpha_prey; int *w_alpha_prey; float gamma_alpha_prey[_MAX_COMP_]; float theta_alpha_prey[_MAX_COMP_]; float rho_alpha_IP; float m_alpha_IP; float v_alpha_IP; int *w_alpha_IP; float gamma_alpha_IP[_MAX_COMP_]; float theta_alpha_IP[_MAX_COMP_]; float rho_mu; float m_mu; float v_mu; int *w_mu; float gamma_mu[_MAX_COMP_]; float theta_mu[_MAX_COMP_]; float rho_eta; float mean_eta; int *w_eta; float gamma_eta[_MAX_COMP_]; float theta_eta[_MAX_COMP_]; float rho_eta0; float mean_eta0; int *w_eta0; float gamma_eta0[_MAX_COMP_]; float theta_eta0[_MAX_COMP_]; } PRIOR; typedef struct tagHISTOGRAM{ float start[_HISTO_BIN_]; float end[_HISTO_BIN_]; float count[_HISTO_BIN_ + 2]; } HISTOGRAM; typedef struct tagHISTOGRAM2{ float start[_HISTO_BIN2_]; float end[_HISTO_BIN2_]; float count[_HISTO_BIN2_ + 2]; } HISTOGRAM2; typedef struct tagSUMMARY{ float *iZ; float *Z; float *alpha_prey; float *alpha_IP; float *mu; float *eta; float *eta0; float *lambda_true; float *lambda_false; HISTOGRAM hist_alpha_prey; HISTOGRAM hist_alpha_IP; HISTOGRAM hist_mu; HISTOGRAM2 hist_eta; HISTOGRAM2 hist_eta0; } SUMMARY; int NORMALIZE; int lowMode; int minFold; int burn; int iter; /*************/ /* functions */ /*************/ int nrow(FILE *fp); int newlinechar(char *buf, int k); int ncol(FILE *fp); int commandLine(int argc, char **argv); void print_DP(PRIOR *prior, DATA *data); /* initdata.c */ void read_interaction_data(FILE *fpinter, DATA *data); void find_unique_interaction(DATA *data); int unique_elements(char **x, int *unique, int nx); int count_unique_elements(char **x, int nx); void centerData(float *x, int n, int takelog); int mapPreyToData(DATA *data); void read_prey_data(FILE *fpprey, DATA *data); void mapIPtoBait(DATA *data); int mapIPBaitToData(DATA *data); void getIPinfo(DATA *data); void read_bait_data(FILE *fpbait, DATA *data); void set_ctrlavg(DATA *data); void read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data); /* meancounts.c */ void compute_lambda_true_all(PARAM *param, PRIOR *prior, DATA *data); void compute_lambda_false_all(PARAM *param, PRIOR *prior, DATA *data); void compute_lambda_all(PARAM *param, PRIOR *prior, DATA *data); /* void compute_lambda_true_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_false_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_true_IP(PARAM *param, PRIOR *prior, DATA *data, int ipid); void compute_lambda_IP(PARAM *param, PRIOR *prior, DATA *data, int ipid); */ /* likelihood.c */ float log_poisson_prop(float N, float lambda); float log_poisson_g_prop(float N, float lambda, float theta); float LRprop(PARAM *param, PRIOR *prior, DATA *data); float loglik_all(PARAM *param, PRIOR *prior, DATA *data); float loglik_all_class(PARAM *param, PRIOR *prior, DATA *data, int cl); float loglik_all_class_tmp(PARAM *param, PRIOR *prior, DATA *data, int cl); /* mcmc.c */ float log_gaussian(float x, float mu, float var); void sampleBeta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void sampleBetac(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void sampleZ(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); float logit(float x); float inverseLogit(float x); void sampleProportion(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void mhgibbs(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary, const gsl_rng *r, int updateSum); void write_mcmc(PARAM *param, PRIOR *prior, DATA *data, FILE *fp1, FILE *fp2, FILE *fp3, int ct); /* printmap.c */ void printInter(DATA *data); void printUInter(DATA *data); void printIP(DATA *data); void printBait(DATA *data); void printPrey(DATA *data); void printMap(DATA *data); /* setprior.c */ void memory_prior(PARAM *param, PRIOR *prior, DATA *data); void initialize_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* setparam.c */ void memory_param(PARAM *param, PRIOR *prior, DATA *data); void initialize_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_Z(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* setsumamry.c */ void memory_summary(SUMMARY *summary, DATA *data); void initialize_summary(SUMMARY *summary, DATA *data); void initialize_histogram(HISTOGRAM *hist); void initialize_histogram2(HISTOGRAM2 *hist); void set_summary(SUMMARY *summary, DATA *data); void updateSummary(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary); void scaleSummary(SUMMARY *summary, DATA *data, int iter); void updateHist_alpha_prey(HISTOGRAM *hist, PRIOR *prior); void updateHist_alpha_IP(HISTOGRAM *hist, PRIOR *prior); void updateHist_mu(HISTOGRAM *hist, PRIOR *prior); void updateHist_eta(HISTOGRAM2 *hist, PRIOR *prior); void updateHist_eta0(HISTOGRAM2 *hist, PRIOR *prior); void updateHistogram(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary); /* dpalphaprey.c */ void DP_alpha_prey_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_alpha_prey_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_alpha_prey_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_alpha_prey(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpalphaIP.c */ void DP_alpha_IP_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_alpha_IP_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_alpha_IP_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_alpha_IP(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_mu_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_mu_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_mu_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_mu(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_eta_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_eta_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_eta_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_eta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_eta0_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_eta0_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_eta0_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_eta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* result.c */ void write_interactions(DATA *data, SUMMARY *summary); void write_unique_interactions(DATA *data, SUMMARY *summary); void write_prey(DATA *data, SUMMARY *summary); void write_IP(DATA *data, SUMMARY *summary); void write_bait(DATA *data, SUMMARY *summary); void write_histogram(FILE *fp, HISTOGRAM *hist); void write_histogram2(FILE *fp, HISTOGRAM2 *hist); void write_hyperprior(DATA *data, SUMMARY *summary); void write_result(DATA *data, SUMMARY *summary); void write_matrix_data(DATA *data, SUMMARY *summary); void write_matrix_data2(DATA *data, SUMMARY *summary); /**************** mmath.c ******************************/ float vec_sum(const float *vec, int len); float vec_max(const float *vec, int len); float vec_min(const float *vec, int len); float vec_mean(const float *vec, int len); float vec_var(const float *vec, int len); float vec_med(const float *vec, int len); float vec_mad(const float *vec, int len); float geometric_mean(float *x, int n); int ranMultinom(const gsl_rng *r, float *p, int K); SAINT_v2.3.4/src/SAINTspc-ctrl/dpeta0.c0000644000000000000000000000725312014654467016022 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_eta0_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_eta0[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_eta0) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_eta0[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_eta0[i] = gammap[i]; for(j=0;jgamma_eta0[i] *= (1.0 - gammap[j]); } } void DP_eta0_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_eta, tmp_lambda, maxl; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_eta0[i]); cur_eta = param->eta0[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]]) { tmp_lambda = param->lambda_false[id]; prob[i] += log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta0[i]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_eta0[pid] = ranMultinom(r, prob, _MAX_COMP_); param->eta0[pid] = prior->theta_eta0[prior->w_eta0[pid]]; } void DP_eta0_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale, tmp_lambda; scale = prior->gamma_eta0[pid] / (1.0 - prior->gamma_eta0[pid]); if(inuse[pid] == 0) { newval = gsl_ran_exponential(r, prior->mean_eta) + 1.0; Delta = newval - prior->theta_eta0[pid]; prior->theta_eta0[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); if(prior->theta_eta0[pid] + Delta <= 1.0 || prior->theta_eta0[pid] + Delta > 100.0) { accept = 0; } else { for(i=0;inprey;i++) { if(prior->w_eta0[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(data->ctrl[data->i2IP[id]]) { tmp_lambda = param->lambda_false[id]; mhratio += log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta0[pid]+Delta) - log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta0[pid]); } } } } mhratio += log(gsl_ran_exponential_pdf(prior->theta_eta0[pid]+Delta-1.0, prior->mean_eta0)) - log(gsl_ran_exponential_pdf(prior->theta_eta0[pid]-1.0, prior->mean_eta0)); // mhratio += -2.0 * (log(prior->theta_eta0[pid]+ Delta) - log(prior->theta_eta0[pid])); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; } /* if accepted, update param and lambda */ if(accept) { prior->theta_eta0[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_eta0[i] == pid) { param->eta0[i] += Delta; } } } } } void DP_eta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_eta0[i]] = 1; DP_eta0_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_eta0_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_eta0_theta(param, prior, data, r, i, inuse); /* loglik update */ } SAINT_v2.3.4/src/SAINTspc-noctrl-matrix/0000777000000000000000000000000012145226162016325 5ustar rootrootSAINT_v2.3.4/src/SAINTspc-noctrl-matrix/mmath.c0000666000000000000000000000323611746171441017610 0ustar rootroot#include "saint.h" double vec_sum(const double *vec, int len) { int i; double res; res=vec[0]; for(i=1;ivec[i]) res=vec[i]; } return res; } double vec_mean(const double *vec, int len) { double tmp=0.0; int i; for(i=0;i 9)) { fprintf(stderr, "usage: saint-spc-noctrl-matrix [interactomeData] [output file] [nburnin] [niter] [ff]\n saint-spc-noctrl-matrix [interactomeData] [output file] [nburnin] [niter] [ff] [useAbundance(0/1)] [useLength(0/1)] [useCoverage(0/1)]\n"); return 1; } FILE *fpi = fopen(argv[1], "r"); p = nrow(fpi)-3; q = ncol(fpi)-3; fclose(fpi); fprintf(stderr, "%d proteins, %d IPs\n", p, q); FILE *fp = fopen(argv[1], "r"); FILE *fp_output = fopen(argv[2], "w"); strcpy(prob, argv[2]); strcat(prob, "_prob"); FILE *fp_outprob = fopen(prob, "w"); strcpy(list, argv[2]); strcat(list, "_list"); FILE *fp_list = fopen(list, "w"); strcpy(fbait, argv[2]); strcat(fbait, "_alpha_bait"); FILE *fpbait = fopen(fbait, "w"); strcpy(fprey, argv[2]); strcat(fprey, "_alpha_prey"); FILE *fpprey = fopen(fprey, "w"); strcpy(fmu, argv[2]); strcat(fmu, "_mu_prey"); FILE *fpmu = fopen(fmu, "w"); strcpy(iprob, argv[2]); strcat(iprob, "_iprob"); FILE *fp_iprob = fopen(iprob, "w"); burn = atoi(argv[3]); iter = atoi(argv[4]); ff = atof(argv[5]); if(argc > 7) { tmp = atoi(argv[6]); if(&tmp != NULL) { param.useAbun = tmp; prior.useAbun = tmp; } tmp = atoi(argv[7]); if(&tmp != NULL) { param.useLen = tmp; prior.useLen = tmp; } tmp = atoi(argv[8]); if(&tmp != NULL) { param.useCov = tmp; prior.useCov = tmp; } } if(fp == NULL) { fprintf(stderr, "Interactome data %s does not exist.\n", argv[1]); return 1; } if(&p == NULL) { fprintf(stderr, "The number of prey proteins was not provided.\n"); return 1; } if(&q == NULL) { fprintf(stderr, "The number of bait proteins was not provided.\n"); return 1; } if(&burn == NULL) { fprintf(stderr, "The number of burnin iterations was not provided.\n"); return 1; } if(&iter == NULL) { fprintf(stderr, "The number of main interations was not provided.\n"); return 1; } /* fprintf(stderr, "\n*************************************************\n"); fprintf(stderr, "************ Welcome to SAInt ***********\n"); fprintf(stderr, "*** Significance Analysis of Interactome Data ***\n"); fprintf(stderr, "*************************************************\n"); if(param.useAbun) fprintf(stderr, "Background abundance is used in this model\n"); if(param.useLen) fprintf(stderr, "Sequence length is used in this model\n"); if(param.useCov) fprintf(stderr, "Bait Coverage is used in this model\n"); fprintf(stderr, "*************************************************\n"); */ /* Read Data */ read_data(fp, &data, &p, &q); init_prior(&prior, &p, &q); init_param(&data, ¶m, &p, &q); param.ff_prop = ff; init_summary(&data, &summary, &p, &q); set_summary(&data, &summary); set_prior(&prior); set_param(¶m, &prior, &data, iter, r); u = data.uniqueNum; for(j=0;j<(p-1);j++) fprintf(fpprey, "%s\t", data.prey[j]); fprintf(fpprey, "%s\n", data.prey[p-1]); for(j=0;j<(q-1);j++) fprintf(fpbait, "%s\t", data.bait[j]); fprintf(fpbait, "%s\n", data.bait[q-1]); for(j=0;j<(p-1);j++) fprintf(fpmu, "%s\t", data.prey[j]); fprintf(fpmu, "%s\n", data.prey[p-1]); /* Estimation */ fprintf(stderr, "Burnin:\n"); for(i=0;i= ff ? "yes" : "no")); } } } /* Output */ fprintf(stderr,"\nOutput summary.."); fprintf(stderr, ".."); /* First line */ fprintf(fp_outprob, "\t"); for(j=0;j<(u-1);j++) fprintf(fp_outprob, "%s\t", data.unique[j]); fprintf(fp_outprob, "%s\n", data.unique[u-1]); /* First line */ fprintf(fp_iprob, "\t"); for(j=0;j<(q-1);j++) fprintf(fp_iprob, "%s\t", data.experiment[j]); fprintf(fp_iprob, "%s\n", data.experiment[q-1]); /* First line */ fprintf(fp_output, "\t"); for(j=0;j<(q-1);j++) fprintf(fp_output, "%s\t", data.experiment[j]); fprintf(fp_output, "%s\n", data.experiment[q-1]); /* Second line */ fprintf(fp_output, "\t"); for(j=0;j<(q-1);j++) fprintf(fp_output, "%s\t", data.bait[j]); fprintf(fp_output, "%s\n", data.bait[q-1]); for(j=0;j tmpmaxint) tmpmaxint = ((int) data.d[j][data.mfu[l][k]]); } if(tmpmaxint >= 1) count++; } fprintf(fp_output, "%s\t", data.prey[j]); fprintf(fp_outprob, "%s\t", data.prey[j]); for(k=0;k<(u-1);k++) { fprintf(fp_outprob, "%.3f\t", summary.Z[j][k]); } for(k=0;k<(q-1);k++) { fprintf(fp_output, "%d|%.3f|%.3f\t", ((int) data.d[j][k]), summary.Z[j][data.mtu[k]],summary.iZ[j][k]); } fprintf(fp_output, "%d|%.3f|%.3f\n", ((int) data.d[j][k]), summary.Z[j][data.mtu[q-1]], summary.iZ[j][q-1]); fprintf(fp_outprob, "%.3f\n", summary.Z[j][u-1]); } /* Matrix Output */ for(j=0;jnrow = *p; data->ncol = *q; assert(data->d = (double **) calloc(*p, sizeof(double *))); for(i=0; i<*p; i++) { assert(data->d[i] = (double *) calloc(*q, sizeof(double))); } assert(data->maxRow = (double *) calloc(*p, sizeof(double))); assert(data->preyORF = (char **) calloc(*p, sizeof(char *))); assert(data->override = (char *) calloc(*p, sizeof(char))); for(i=0; i<*p; i++) { assert(data->preyORF[i] = (char *) calloc(_MAX_NAME_, sizeof(char))); } assert(data->prey = (char **) calloc(*p, sizeof(char *))); for(i=0; i<*p; i++) { assert(data->prey[i] = (char *) calloc(_MAX_NAME_, sizeof(char))); } assert(data->experiment = (char **) calloc(*q, sizeof(char *))); for(i=0; i<*q; i++) { assert(data->experiment[i] = (char *) calloc(_MAX_NAME_, sizeof(char))); } assert(data->bait = (char **) calloc(*q, sizeof(char *))); for(i=0; i<*q; i++) { assert(data->bait[i] = (char *) calloc(_MAX_NAME_, sizeof(char))); } assert(data->unique = (char **) calloc(*q, sizeof(char *))); for(i=0; i<*q; i++) { assert(data->unique[i] = (char *) calloc(_MAX_NAME_, sizeof(char))); } assert(data->uniqueSize = (int *) calloc(*q, sizeof(int))); assert(data->mtu = (int *) calloc(*q, sizeof(int))); assert(data->mfu = (int **) calloc(*q, sizeof(int *))); for(i=0; i<*q; i++) { assert(data->mfu[i] = (int *) calloc(_MAX_REPLICA_, sizeof(int))); } assert(data->baitCoverage = (double *) calloc(*q, sizeof(double))); assert(data->preyAbundance = (double *) calloc(*p, sizeof(double))); assert(data->preyLogLength = (double *) calloc(*p, sizeof(double))); } void free_data(DATA *data) { int i; for(i=0; inrow; i++) { free(data->d[i]); free(data->prey[i]); free(data->preyORF[i]); } free(data->d); free(data->maxRow); free(data->prey); free(data->preyORF); for(i=0; incol; i++) { free(data->bait[i]); free(data->experiment[i]); } for(i=0; incol; i++) { free(data->unique[i]); free(data->mfu[i]); } free(data->bait); free(data->experiment); free(data->unique); free(data->uniqueSize); free(data->mtu); free(data->mfu); free(data->baitCoverage); free(data->preyAbundance); free(data->preyLogLength); } void normalizeCoverage(double *cover, int *len) { int j; double tmp[*len]; double max, med; for(j=0;j<*len;j++) cover[j] = cover[j] + 1.0; max = vec_max(cover, *len); for(j=0;j<*len;j++) cover[j] /= max; for(j=0;j<*len;j++) cover[j] *= 0.99; for(j=0;j<*len;j++) tmp[j] = gsl_cdf_gaussian_Pinv(cover[j], 1.0); med = vec_med(tmp, *len); for(j=0;j<*len;j++) cover[j] = tmp[j] - med; } void read_data(FILE *fp, DATA *data, int *p, int *q) { int i,j,k,cur,curU,exist; char buf[_MAX_BUF_]; init_data(data, p, q); double med; /* Read first two lines of info */ for(j=0;j<4;j++) fscanf(fp,"%s",buf); for(j=0;jncol;j++) { fscanf(fp,"%s",buf); strcpy(data->experiment[j], buf); } for(j=0;j<4;j++) fscanf(fp,"%s",buf); for(j=0;jncol;j++) { fscanf(fp,"%s",buf); strcpy(data->bait[j], buf); } for(j=0;j<4;j++) fscanf(fp,"%s",buf); for(j=0;jncol;j++) { fscanf(fp,"%s",buf); data->baitCoverage[j] = atof(buf); } /* normalizeCoverage(data->baitCoverage, q); */ /* Read each row First three elements are: preyName, preyAbundance, preyLogLength */ for(i=0;i<*p;i++) { fscanf(fp,"%s",buf); strcpy(data->prey[i], buf); fscanf(fp,"%s",buf); data->preyAbundance[i] = atof(buf); fscanf(fp,"%s",buf); data->preyLogLength[i] = atof(buf); fscanf(fp,"%s",buf); data->override[i] = buf[0]; for(j=0;jncol;j++) { fscanf(fp,"%s",buf); data->d[i][j] = atof(buf); } } /* Unique Bait-Experiments Identification */ cur = 0; strcpy(data->unique[cur], data->bait[cur]); cur++; for(j=1;j<*q;j++) { exist = 0; for(i=0;iunique[i], data->bait[j]) == 0) exist = 1; } if(exist == 0) { strcpy(data->unique[cur], data->bait[j]); cur++; } } data->uniqueNum = cur; for(i=0;iuniqueNum;i++) { cur = 0; for(j=0;j<*q;j++) { if(strcmp(data->unique[i], data->bait[j]) == 0) { data->mtu[j] = i; data->mfu[i][cur] = j; cur++; } } data->uniqueSize[i] = cur; } /* use and useUnique indices */ /* by column */ assert(data->ninterUnique = (int *) calloc(data->uniqueNum, sizeof(int))); assert(data->useUnique = (int **) calloc(data->uniqueNum, sizeof(int *))); assert(data->ninter = (int *) calloc(*q, sizeof(int))); assert(data->use = (int **) calloc(*q, sizeof(int *))); /* by row */ assert(data->ninterRowUnique = (int *) calloc(*p, sizeof(int))); assert(data->useRowUnique = (int **) calloc(*p, sizeof(int *))); assert(data->ninterRow = (int *) calloc(*p, sizeof(int))); assert(data->useRow = (int **) calloc(*p, sizeof(int *))); /* column */ for(j=0;juniqueNum;j++) { /* Count first */ data->ninterUnique[j] = 0; for(i=0;i<*p;i++) { exist = 0; for(k=0;kuniqueSize[j];k++) { if(data->d[i][data->mfu[j][k]] > 0) exist = 1; } if(exist) { (data->ninterUnique[j])++; } } } for(j=0;juniqueNum;j++) { for(k=0;kuniqueSize[j];k++) data->ninter[data->mfu[j][k]] = data->ninterUnique[j]; } for(k=0;k<*q;k++) { assert(data->use[k] = (int *) calloc(data->ninterUnique[data->mtu[k]], sizeof(int))); } for(j=0;juniqueNum;j++) { assert(data->useUnique[j] = (int *) calloc(data->ninterUnique[j], sizeof(int))); } for(j=0;juniqueNum;j++) { cur = 0; for(i=0;i<*p;i++) { exist = 0; for(k=0;kuniqueSize[j];k++) { if(data->d[i][data->mfu[j][k]] > 0) exist = 1; } if(exist) { for(k=0;kuniqueSize[j];k++) data->use[data->mfu[j][k]][cur] = i; data->useUnique[j][cur] = i; cur++; } } } /* row - learn from column */ for(i=0;i<*p;i++) { data->ninterRow[i] = 0; data->ninterRowUnique[i] = 0; for(j=0;juniqueNum;j++) { exist = 0; for(k=0;kuniqueSize[j];k++) { if(data->d[i][data->mfu[j][k]] > 0) exist = 1; } if(exist) { data->ninterRow[i] += data->uniqueSize[j]; (data->ninterRowUnique[i])++; } } } for(i=0;i<*p;i++) assert(data->useRow[i] = (int *) calloc(data->ninterRow[i], sizeof(int))); for(i=0;i<*p;i++) assert(data->useRowUnique[i] = (int *) calloc(data->ninterRowUnique[i], sizeof(int))); for(i=0;i<*p;i++) { curU = 0; cur = 0; for(j=0;juniqueNum;j++) { exist = 0; for(k=0;kuniqueSize[j];k++) { if(data->d[i][data->mfu[j][k]] > 0) exist = 1; } if(exist) { data->useRowUnique[i][curU] = j; curU++; for(k=0;kuniqueSize[j];k++) { data->useRow[i][cur] = data->mfu[j][k]; cur++; } } } } /* Normalize Count Information */ for(j=0;jncol;j++) data->baitCoverage[j] = log(data->baitCoverage[j] + 1.0); med = vec_min(data->baitCoverage, data->ncol); for(j=0;jncol;j++) data->baitCoverage[j] -= med; for(j=0;jncol;j++) data->baitCoverage[j] *= 1.0; for(i=0;inrow;i++) data->preyAbundance[i] = log( (data->preyAbundance[i] + 1.0) / (data->preyLogLength[i]) ); med = vec_min(data->preyAbundance, data->nrow); for(i=0;inrow;i++) data->preyAbundance[i] -= med; for(i=0;inrow;i++) data->preyAbundance[i] *= 1.0; for(i=0;inrow;i++) data->preyLogLength[i] = log(data->preyLogLength[i] + 1.0); med = vec_min(data->preyLogLength, data->nrow); for(i=0;inrow;i++) data->preyLogLength[i] -= med; for(i=0;inrow;i++) data->preyLogLength[i] *= 1.0; for(i=0;inrow;i++) { data->maxRow[i] = 0.0; for(j=0;jncol;j++) { if(data->d[i][j] > data->maxRow[i]) { data->maxRow[i] = data->d[i][j]; } } } } void init_param(DATA *data, PARAM *param, int *p, int *q) { int i; param->np = *p; param->nb = *q; param->nvar = 1 + param->useAbun + param->useLen + param->useCov; assert(param->loglikRow = (double *) calloc(*p, sizeof(double))); assert(param->loglikCol = (double *) calloc(*q, sizeof(double))); assert(param->loglikRow_tmp = (double *) calloc(*p, sizeof(double))); assert(param->loglikCol_tmp = (double *) calloc(*q, sizeof(double))); /* Regression Parameters */ assert(param->beta = (double *) calloc(param->nvar, sizeof(double ))); assert(param->gamma = (double *) calloc(param->nvar, sizeof(double ))); /* for(j=0;jnb;j++) { assert(param->beta[j] = (double *) calloc(param->nvar, sizeof(double))); assert(param->gamma[j] = (double *) calloc(param->nvar, sizeof(double))); } */ assert(param->alpha_prey = (double *) calloc(*p, sizeof(double))); assert(param->delta_prey = (double *) calloc(*p, sizeof(double))); assert(param->alpha_bait = (double *) calloc(*q, sizeof(double))); assert(param->delta_bait = (double *) calloc(*q, sizeof(double))); assert(param->mu_prey = (double *) calloc(*p, sizeof(double))); assert(param->mu_prey_flag = (double *) calloc(*p, sizeof(double))); assert(param->beta_tmp = (double *) calloc(param->nvar, sizeof(double ))); assert(param->gamma_tmp = (double *) calloc(param->nvar, sizeof(double ))); /* for(j=0;jnb;j++) { assert(param->beta_tmp[j] = (double *) calloc(param->nvar, sizeof(double))); assert(param->gamma_tmp[j] = (double *) calloc(param->nvar, sizeof(double))); } */ assert(param->alpha_prey_tmp = (double *) calloc(*p, sizeof(double))); assert(param->delta_prey_tmp = (double *) calloc(*p, sizeof(double))); assert(param->alpha_bait_tmp = (double *) calloc(*q, sizeof(double))); assert(param->delta_bait_tmp = (double *) calloc(*q, sizeof(double))); assert(param->mu_prey_tmp = (double *) calloc(*p, sizeof(double))); assert(param->mu_prey_flag_tmp = (double *) calloc(*p, sizeof(double))); /* Mixture Indicators */ assert(param->iZ = (int **) calloc(*p, sizeof(int *))); for(i=0;i<*p;i++) assert(param->iZ[i] = (int *) calloc(*q, sizeof(int))); assert(param->Z = (int **) calloc(*p, sizeof(int *))); for(i=0;i<*p;i++) assert(param->Z[i] = (int *) calloc(data->uniqueNum, sizeof(int))); assert(param->Y = (int *) calloc(*p, sizeof(int))); /* Actual Mean Parameter for Real Interactors and Contaminants */ assert(param->lambda_real = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(param->lambda_real[i] = (double *) calloc(*q, sizeof(double))); assert(param->lambda_cont = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(param->lambda_cont[i] = (double *) calloc(*q, sizeof(double))); assert(param->r0 = (double *) calloc(*q, sizeof(double))); assert(param->lambda_real0 = (double *) calloc(*q, sizeof(double))); assert(param->lambda_real_tmp = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(param->lambda_real_tmp[i] = (double *) calloc(*q, sizeof(double))); assert(param->lambda_cont_tmp = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(param->lambda_cont_tmp[i] = (double *) calloc(*q, sizeof(double))); assert(param->lambda_real0_tmp = (double *) calloc(*q, sizeof(double))); assert(param->flagged = (double *) calloc(*p, sizeof(double))); assert(param->appearCont = (double *) calloc(*q, sizeof(double))); } void set_param(PARAM *param, PRIOR *prior, DATA *data, int iter, const gsl_rng *r) { int i,j,ct,anypos,k; double tmp; param->loglik = - ((double) (param->np * param->nb) ); param->loglik_tmp = param->loglik; for(i=0;inp;i++) param->loglikRow[i] = - ((double) param->np); for(j=0;jnb;j++) param->loglikCol[j] = - ((double) param->nb); /* Setting Initial Values for Poisson Regression Parameters */ for(i=0;invar;i++) param->beta[i] = 0.0; /* gsl_ran_gaussian(r, _PSD_BETA_); */ for(i=0;inp;i++) { tmp = vec_max(data->d[i], param->nb); param->alpha_prey[i] = 0.0; param->delta_prey[i] = 0.0; param->mu_prey[i] = 0.0; param->mu_prey_flag[i] = 0.0; } for(j=0;jnb;j++) { param->alpha_bait[j] = 0.0; param->delta_bait[j] = 0.0; } for(i=0;invar;i++) param->gamma[i] = 1.0; /* gsl_ran_gaussian(r, _PSD_GAMMA_); */ param->loglik_tmp = - ((double) (param->np * param->nb) ); for(i=0;inp;i++) param->loglikRow_tmp[i] = - ((double) param->np); for(j=0;jnb;j++) param->loglikCol_tmp[j] = - ((double) param->nb); /* Setting Initial Values for Poisson Regression Parameters */ for(i=0;invar;i++) param->beta_tmp[i] = 0.0; for(i=0;inp;i++) { param->alpha_prey_tmp[i] = 0.0; param->delta_prey_tmp[i] = 0.0; /* log(vec_med(data->d[i], data->ncol)+1.0); */ param->mu_prey_tmp[i] = param->alpha_prey_tmp[i]; param->mu_prey_flag_tmp[i] = param->mu_prey_tmp[i]; } for(j=0;jnb;j++) { param->alpha_bait_tmp[j] = 0.0; param->delta_bait_tmp[j] = 0.0; } for(i=0;invar;i++) param->gamma_tmp[i] = 0.0; /* Setting Initial Values for Mixture Indicators */ for(i=0;inp;i++) { ct = 0; if(gsl_ran_flat(r,0.0,1.0) < 0.5) param->Y[i] = 1; else param->Y[i] = 0; if(param->Y[i] == 1) { for(j=0;juniqueNum;j++) { anypos = 0; for(k=0;kuniqueSize[j];k++) if(data->d[i][data->mfu[j][k]] > 0.0) anypos = 1; if((gsl_ran_flat(r,0.0,1.0) < 0.5) && anypos) param->Z[i][j] = 1; else param->Z[i][j] = 0; for(k=0;kuniqueSize[j];k++) if(data->d[i][data->mfu[j][k]] > 20.0) param->Z[i][j] = 1; } } else { for(j=0;juniqueNum;j++) param->Z[i][j] = 0; } if(param->Y[i] == 1) { for(j=0;jnb;j++) { if(data->d[i][j] > 3.0) param->iZ[i][j] = 1; else param->iZ[i][j] = 0; } } else { for(j=0;jnb;j++) param->iZ[i][j] = 0; } } /* Mean Parameters for All Four Possible Mixture Components */ for(j=0;jnb;j++) { param->r0[j] = 0.95; param->lambda_real0[j] = log(1.0); } for(i=0;inp;i++) { param->flagged[i] = 0.0; } for(j=0;jnb;j++) { param->appearCont[j] = 0.0; } calcLambdaReal(data, prior, param); calcLambdaCont(data, prior, param); /* Mixture Proportions */ param->pcont[0] = 0.2; param->pcont[1] = 0.8; param->preal[0] = 0.9; param->preal[1] = 0.1; } void free_param(PARAM *param) { int i; free(param->loglikRow); free(param->loglikCol); free(param->loglikRow_tmp); free(param->loglikCol_tmp); free(param->beta); free(param->gamma); free(param->alpha_prey); free(param->delta_prey); free(param->alpha_bait); free(param->delta_bait); free(param->mu_prey); free(param->mu_prey_flag); free(param->beta_tmp); free(param->gamma_tmp); free(param->alpha_prey_tmp); free(param->delta_prey_tmp); free(param->alpha_bait_tmp); free(param->delta_bait_tmp); free(param->mu_prey_tmp); for(i=0;inp;i++) free(param->iZ[i]); free(param->iZ); for(i=0;inp;i++) free(param->Z[i]); free(param->Z); free(param->Y); for(i=0;inp;i++) free(param->lambda_real[i]); free(param->lambda_real); for(i=0;inp;i++) free(param->lambda_cont[i]); free(param->lambda_cont); free(param->r0); free(param->lambda_real0); for(i=0;inp;i++) free(param->lambda_real_tmp[i]); free(param->lambda_real_tmp); for(i=0;inp;i++) free(param->lambda_cont_tmp[i]); free(param->lambda_cont_tmp); free(param->lambda_real0_tmp); } void init_prior(PRIOR *prior, int *p, int *q) { prior->np = *p; prior->nb = *q; prior->nvar = 1 + prior->useAbun + prior->useLen + prior->useCov; assert(prior->mean_beta = (double *) calloc(prior->nvar, sizeof(double))); assert(prior->var_beta = (double *) calloc(prior->nvar * prior->nvar, sizeof(double))); assert(prior->mean_delta_bait = (double *) calloc(*p, sizeof(double))); assert(prior->var_delta_bait = (double *) calloc(*p, sizeof(double))); assert(prior->mean_alpha_prey = (double *) calloc(*p, sizeof(double))); assert(prior->var_alpha_prey = (double *) calloc(*p, sizeof(double))); assert(prior->mean_alpha_bait = (double *) calloc(*q, sizeof(double))); assert(prior->var_alpha_bait = (double *) calloc(*q, sizeof(double))); assert(prior->mean_gamma = (double *) calloc(prior->nvar, sizeof(double))); assert(prior->var_gamma = (double *) calloc(prior->nvar * prior->nvar, sizeof(double))); assert(prior->mean_mu_prey = (double *) calloc(*p, sizeof(double))); assert(prior->var_mu_prey = (double *) calloc(*p, sizeof(double))); assert(prior->epsilon_real = (double *) calloc(*q, sizeof(double))); assert(prior->epsilon_cont = (double *) calloc(*q, sizeof(double))); assert(prior->kappa_real = (double *) calloc(*q, sizeof(double))); assert(prior->kappa_cont = (double *) calloc(*q, sizeof(double))); } void set_prior(PRIOR *prior) { int i; /* Beta */ for(i=0;invar;i++) prior->mean_beta[i] = 1.0; for(i=0;invar*prior->nvar;i++) prior->var_beta[i] = 0.0; for(i=0;invar;i++) prior->var_beta[i*prior->nvar + i] = 2.0; for(i=0;invar;i++) prior->mean_gamma[i] = 2.0; for(i=0;invar*prior->nvar;i++) prior->var_gamma[i] = 0.0; for(i=0;invar;i++) prior->var_gamma[i*prior->nvar + i] = 2.0; prior->mean_gamma[0] = 1.0; prior->var_gamma[0] = 0.01; for(i=0;inp;i++) { prior->mean_alpha_prey[i] = 0.0; prior->var_alpha_prey[i] = 2.00; } for(i=0;inp;i++) { prior->mean_delta_bait[i] = 0.0; prior->var_delta_bait[i] = 2.00; } for(i=0;inb;i++) { prior->mean_alpha_bait[i] = 0.0; prior->var_alpha_bait[i] = 2.00; } for(i=0;inp;i++) { prior->mean_mu_prey[i] = 0.0; prior->var_mu_prey[i] = 2.00; } for(i=0;inb;i++) { prior->epsilon_real[i] = ((double) prior->np) * 0.015; prior->kappa_real[i] = ((double) prior->np) * 0.01; prior->epsilon_cont[i] = 1.0; prior->kappa_cont[i] = 1.0; } prior->acont[0] = 1.0; prior->acont[1] = 1.0; prior->areal[0] = 1.0; prior->areal[1] = 1.0; prior->sigmasq_alpha_prey = 1.0; prior->sigmasq_alpha_bait = 1.0; prior->sigmasq_mu_prey = 1.0; prior->shape_alpha_prey = 10.0 * (0.01 * ((double) prior->np)); prior->rate_alpha_bait = 10.0 * (0.01 * ((double) prior->np)); prior->shape_alpha_bait = 10.0 * (0.01 * ((double) prior->np)); prior->rate_alpha_bait = 10.0 * (0.01 * ((double) prior->np)); prior->shape_mu_prey = 10.0 * (0.01 * ((double) prior->np)); prior->rate_mu_prey = 10.0 * (0.01 * ((double) prior->np)); } void free_prior(PRIOR *prior) { free(prior->mean_beta); free(prior->var_beta); free(prior->mean_alpha_prey); free(prior->var_alpha_prey); free(prior->mean_delta_bait); free(prior->var_delta_bait); free(prior->mean_alpha_bait); free(prior->var_alpha_bait); free(prior->mean_gamma); free(prior->var_gamma); free(prior->mean_mu_prey); free(prior->var_mu_prey); free(prior->epsilon_real); free(prior->epsilon_cont); free(prior->kappa_real); free(prior->kappa_cont); } void init_summary(DATA *data, SUMMARY *summary, int *p, int *q) { int i; summary->np = *p; summary->nb = *q; assert(summary->iZ = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(summary->iZ[i] = (double *) calloc(*q, sizeof(double))); assert(summary->Z = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(summary->Z[i] = (double *) calloc(data->uniqueNum, sizeof(double))); /* assert(summary->lambda_real = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(summary->lambda_real[i] = (double *) calloc(*q, sizeof(double))); assert(summary->lambda_cont = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(summary->lambda_cont[i] = (double *) calloc(*q, sizeof(double))); */ assert(summary->expect = (double **) calloc(*p, sizeof(double *))); for(i=0;i<*p;i++) assert(summary->expect[i] = (double *) calloc(*q, sizeof(double))); /* assert(summary->lambda_real0 = (double *) calloc(*q, sizeof(double))); */ assert(summary->Y = (double *) calloc(*p, sizeof(double))); assert(summary->max_prob = (double *) calloc(*p, sizeof(double))); } void set_summary(DATA *data, SUMMARY *summary) { int i,j; /* Reproducibility score */ for(i=0;inp;i++) { for(j=0;jnb;j++) { summary->iZ[i][j] = 0.0; } } /* for(j=0;jnb;j++) summary->lambda_real0[j] = 0.0; */ for(i=0;inp;i++) { summary->Y[i] = 0.0; for(j=0;juniqueNum;j++) summary->Z[i][j] = 0.0; for(j=0;jnb;j++) { /* summary->lambda_real[i][j] = 0.0; summary->lambda_cont[i][j] = 0.0; */ summary->expect[i][j] = 0.0; } } } void free_summary(SUMMARY *summary) { int i; for(i=0;inp;i++) { free(summary->iZ[i]); free(summary->Z[i]); /* free(summary->lambda_real[i]); free(summary->lambda_cont[i]); */ free(summary->expect[i]); } free(summary->iZ); free(summary->Z); /* free(summary->lambda_real); free(summary->lambda_cont); */ free(summary->expect); free(summary->Y); /* free(summary->lambda_real0); */ free(summary->max_prob); } SAINT_v2.3.4/src/SAINTspc-noctrl-matrix/saint.h0000666000000000000000000001646611746171441017636 0ustar rootroot/* Copyright (C) <2011> For troubleshooting, contact hyung_won_choi@nuhs.edu.sg. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You can obtain a copy of the GNU General Public License from . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _MAX_PROT_ 10000 #define _MAX_BUF_ 2000 #define _MAX_NAME_ 2000 #define _MAX_SAMPLE_ 10000 #define _MAX_BAIT_ 10000 #define _MAX_REPLICA_ 100 #define _FF_PROP_ 0.10 #define _SKIP_ 1 #define _PRINT_FREQ_ 100 #define _PSD_BETA_ 1.0 #define _PSD_GAMMA_ 1.0 #define _PSD_ALPHA_PREY_ 2.0 #define _PSD_ALPHA_BAIT_ 2.0 #define _PSD_DELTA_BAIT_ 1.0 #define _PSD_MU_PREY_ 1.0 typedef struct tagDATA { char **preyORF; char **prey; char **experiment; char **bait; double *baitCoverage; double *preyAbundance; double *preyLogLength; double **d; double *maxRow; char *override; int nrow; int ncol; char **unique; int uniqueNum; int *uniqueSize; int *mtu; /* mapToUnique */ int **mfu; /* mapFromUnique */ int *ninter; int **use; int *ninterUnique; int **useUnique; int *ninterRow; int **useRow; int *ninterRowUnique; int **useRowUnique; } DATA; typedef struct tagPARAM{ int np; int nb; int useAbun; int useLen; int useCov; int nvar; double loglik; double *loglikRow; double *loglikCol; double loglik_tmp; double *loglikRow_tmp; double *loglikCol_tmp; double *beta; double *alpha_prey; double *delta_prey; double *alpha_bait; double *delta_bait; double *gamma; double *mu_prey; double *mu_prey_flag; int **iZ; int **Z; int *Y; double pcont[2]; double preal[2]; double **lambda_real; double **lambda_cont; double *r0; double *lambda_real0; double *beta_tmp; double *alpha_prey_tmp; double *delta_prey_tmp; double *alpha_bait_tmp; double *delta_bait_tmp; double *gamma_tmp; double *mu_prey_tmp; double *mu_prey_flag_tmp; double **lambda_real_tmp; double **lambda_cont_tmp; double *lambda_real0_tmp; double ff_prop; double *flagged; double *appearCont; } PARAM; typedef struct tagPRIOR{ int np; int nb; int useAbun; int useLen; int useCov; int nvar; double *mean_beta; double *var_beta; double *mean_alpha_prey; double *var_alpha_prey; double *mean_delta_bait; double *var_delta_bait; double *mean_alpha_bait; double *var_alpha_bait; double *mean_gamma; double *var_gamma; double *mean_mu_prey; double *var_mu_prey; double *epsilon_real; double *kappa_real; double *epsilon_cont; double *kappa_cont; double acont[2]; double areal[2]; double sigmasq_alpha_prey; double sigmasq_alpha_bait; double sigmasq_mu_prey; double shape_alpha_prey; double rate_alpha_prey; double shape_alpha_bait; double rate_alpha_bait; double shape_mu_prey; double rate_mu_prey; } PRIOR; typedef struct tagSUMMARY{ int np; int nb; double **iZ; double **Z; double **lambda_real; double **lambda_cont; double *lambda_real0; double **expect; double *Y; double *max_prob; } SUMMARY; /*************/ /* functions */ /*************/ int nrow(FILE *fp); int newlinechar(char *buf, int k); int ncol(FILE *fp); /**************** init_data.c ***************************/ void init_data(DATA *data, int *p, int *q); void free_data(DATA *data); void read_data(FILE *fp, DATA *data, int *p, int *q); void init_param(DATA *data, PARAM *param, int *p, int *q); void set_param(PARAM *param, PRIOR *prior, DATA *data, int iter, const gsl_rng *r); void free_param(PARAM *param); void init_prior(PRIOR *prior, int *p, int *q); void set_prior(PRIOR *prior); void free_prior(PRIOR *prior); void init_summary(DATA *data, SUMMARY *summary, int *p, int *q); void set_summary(DATA *data, SUMMARY *summary); void free_summary(SUMMARY *summary); void normalizeCoverage(double *cover, int *len); /**************** update.c *****************************/ double logZIP(double x, double r, double lambda); double loglikAll(DATA *data, PRIOR *prior, PARAM *param); double loglikAll_realtmp(DATA *data, PRIOR *prior, PARAM *param); double loglikAll_conttmp(DATA *data, PRIOR *prior, PARAM *param); double loglikRow(DATA *data, PRIOR *prior, PARAM *param, int r); double loglikCol(DATA *data, PRIOR *prior, PARAM *param, int c); void calcLambdaRealRow(DATA *data, PRIOR *prior, PARAM *param, int r); void calcLambdaRealCol(DATA *data, PRIOR *prior, PARAM *param, int c); void calcLambdaReal(DATA *data, PRIOR *prior, PARAM *param); void calcLambdaReal_tmp(DATA *data, PRIOR *prior, PARAM *param); void calcLambdaContRow(DATA *data, PRIOR *prior, PARAM *param, int r); void calcLambdaContCol(DATA *data, PRIOR *prior, PARAM *param, int c); void calcLambdaCont(DATA *data, PRIOR *prior, PARAM *param); void calcLambdaCont_tmp(DATA *data, PRIOR *prior, PARAM *param); double dMultGauss(double *x, double *mu, double *Sigma, int length); void updateBeta(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r, int k); void updateAlphaDeltaPrey(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void updateAlphaDeltaBait(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void updateGamma(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r, int k); void updateMuPrey(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void updateLambdaReal0(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void updatePriors(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void updateSigmasqAlphaPrey(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void updateSigmasqAlphaBait(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void updateSigmasqMuPrey(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void sampleY(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void sampleZ(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void sampleP(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); void mhgibbs(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r); double calc_dist(int calls[], int num); /**************** mmath.c ******************************/ double vec_sum(const double *vec, int len); double vec_max(const double *vec, int len); double vec_min(const double *vec, int len); double vec_mean(const double *vec, int len); double vec_var(const double *vec, int len); double vec_med(const double *vec, int len); double vec_mad(const double *vec, int len); double poisson_unscaled_pdf(const double c, const double lambda); SAINT_v2.3.4/src/SAINTspc-noctrl-matrix/update.c0000666000000000000000000007175211746171441017774 0ustar rootroot#include "saint.h" double logZIP(double x, double r, double lambda) { double res = 0.0; res = -exp(lambda) + x * lambda - gsl_sf_lngamma(x+1.0); res += (1.0-r) * exp(res); res += (x == 0 ? r : 0.0); return res; } double loglikAll(DATA *data, PRIOR *prior, PARAM *param) { int b, i, j; double tmp = 0.0; for(i=0;inp;i++) { for(b=0;bninterRow[i];b++) { j = data->useRow[i][b]; if(param->Y[i]==1 && param->Z[i][data->mtu[j]]==1) { tmp += ( data->d[i][j] * param->lambda_real[i][j] - exp(param->lambda_real[i][j]) ); } else if(param->Y[i]==1 && param->Z[i][data->mtu[j]]==0) { tmp += logZIP(data->d[i][j], param->r0[j], param->lambda_real0[j]); } else { if(data->d[i][j] > 0.0) tmp += ( data->d[i][j] * param->lambda_cont[i][j] - exp(param->lambda_cont[i][j]) ); else tmp += ( data->d[i][j] * param->lambda_real0[j] - exp(param->lambda_real0[j]) ); } } } return tmp; } double loglikAll_realtmp(DATA *data, PRIOR *prior, PARAM *param) { int b, i, j; double tmp = 0.0; for(i=0;inp;i++) { for(b=0;bninterRow[i];b++) { j = data->useRow[i][b]; if(param->Y[i]==1 && param->Z[i][data->mtu[j]]==1) { tmp += ( data->d[i][j] * param->lambda_real_tmp[i][j] - exp(param->lambda_real_tmp[i][j]) ); } else if(param->Y[i]==1 && param->Z[i][data->mtu[j]]==0) { tmp += logZIP(data->d[i][j], param->r0[j], param->lambda_real0[j]); } else { if(data->d[i][j] > 0.0) tmp += ( data->d[i][j] * param->lambda_cont[i][j] - exp(param->lambda_cont[i][j]) ); else tmp += ( data->d[i][j] * param->lambda_real0[j] - exp(param->lambda_real0[j]) ); } } } return tmp; } double loglikAll_conttmp(DATA *data, PRIOR *prior, PARAM *param) { int b, i, j; double tmp = 0.0; for(i=0;inp;i++) { for(b=0;bninterRow[i];b++) { j = data->useRow[i][b]; if(param->Y[i]==1 && param->Z[i][data->mtu[j]]==1) { tmp += ( data->d[i][j] * param->lambda_real[i][j] - exp(param->lambda_real[i][j]) ); } else if(param->Y[i]==1 && param->Z[i][data->mtu[j]]==0) { tmp += logZIP(data->d[i][j], param->r0[j], param->lambda_real0[j]); } else { if(data->d[i][j] > 0.0) tmp += ( data->d[i][j] * param->lambda_cont_tmp[i][j] - exp(param->lambda_cont_tmp[i][j]) ); else tmp += ( data->d[i][j] * param->lambda_real0[j] - exp(param->lambda_real0[j]) ); } } } return tmp; } double loglikRow(DATA *data, PRIOR *prior, PARAM *param, int r) { int b, j; double tmp = 0.0; for(b=0;bninterRow[r];b++) { j = data->useRow[r][b]; if(param->Y[r]==1 && param->Z[r][data->mtu[j]]==1) { tmp += ( data->d[r][j] * param->lambda_real[r][j] - exp(param->lambda_real[r][j]) ); } else if(param->Y[r]==1 && param->Z[r][data->mtu[j]]==0) { tmp += logZIP(data->d[r][j], param->r0[j], param->lambda_real0[j]); } else { if(data->d[r][j] > 0.0) tmp += ( data->d[r][j] * param->lambda_cont[r][j] - exp(param->lambda_cont[r][j]) ); else tmp += ( data->d[r][j] * param->lambda_real0[j] - exp(param->lambda_real0[j]) ); } } return tmp; } double loglikCol(DATA *data, PRIOR *prior, PARAM *param, int c) { int a,i; double tmp = 0.0; for(a=0;aninter[c];a++) { i = data->use[c][a]; if(param->Y[i]==1 && param->Z[i][data->mtu[c]]==1) { tmp += ( data->d[i][c] * param->lambda_real[i][c] - exp(param->lambda_real[i][c]) ); } else if(param->Y[i]==1 && param->Z[i][data->mtu[c]]==0) { tmp += logZIP(data->d[i][c], param->r0[c], param->lambda_real0[c]); } else { if(data->d[i][c] > 0.0) tmp += ( data->d[i][c] * param->lambda_cont[i][c] - exp(param->lambda_cont[i][c]) ); else tmp += ( data->d[i][c] * param->lambda_real0[c] - exp(param->lambda_real0[c]) ); } } return tmp; } void calcLambdaRealRow(DATA *data, PRIOR *prior, PARAM *param, int r) { int b, j, pos; double tmp; for(b=0;bninterRow[r];b++) { j = data->useRow[r][b]; tmp = param->beta[0]; pos = 1; if(param->useAbun) { tmp += param->beta[pos] * data->preyAbundance[r]; pos++; } if(param->useLen) { tmp += param->beta[pos] * data->preyLogLength[r]; pos++; } if(param->useCov) { tmp += param->beta[pos] * data->baitCoverage[j]; pos++; } tmp += ( param->alpha_prey[r] + param->alpha_bait[j] ); param->lambda_real[r][j] = tmp; } } void calcLambdaRealCol(DATA *data, PRIOR *prior, PARAM *param, int c) { int a, i, pos; double tmp; for(a=0;aninter[c];a++) { i = data->use[c][a]; tmp = param->beta[0]; pos = 1; if(param->useAbun) { tmp += param->beta[pos] * data->preyAbundance[i]; pos++; } if(param->useLen) { tmp += param->beta[pos] * data->preyLogLength[i]; pos++; } if(param->useCov) { tmp += param->beta[pos] * data->baitCoverage[c]; pos++; } tmp += ( param->alpha_prey[i] + param->alpha_bait[c] ) ; param->lambda_real[i][c] = tmp; } } void calcLambdaReal(DATA *data, PRIOR *prior, PARAM *param) { int b, i, j, pos; double tmp; for(i=0;inp;i++) { for(b=0;bninterRow[i];b++) { j = data->useRow[i][b]; tmp = param->beta[0]; pos = 1; if(param->useAbun) { tmp += param->beta[pos] * data->preyAbundance[i]; pos++; } if(param->useLen) { tmp += param->beta[pos] * data->preyLogLength[i]; pos++; } if(param->useCov) { tmp += param->beta[pos] * data->baitCoverage[j]; pos++; } tmp += ( param->alpha_prey[i] + param->alpha_bait[j] ); param->lambda_real[i][j] = tmp; } } } void calcLambdaReal_tmp(DATA *data, PRIOR *prior, PARAM *param) { int b, i, j, pos; double tmp; for(i=0;inp;i++) { for(b=0;bninterRow[i];b++) { j = data->useRow[i][b]; tmp = param->beta_tmp[0]; pos = 1; if(param->useAbun) { tmp += param->beta_tmp[pos] * data->preyAbundance[i]; pos++; } if(param->useLen) { tmp += param->beta_tmp[pos] * data->preyLogLength[i]; pos++; } if(param->useCov) { tmp += param->beta_tmp[pos] * data->baitCoverage[j]; pos++; } tmp += ( param->alpha_prey[i] + param->alpha_bait[j] ); param->lambda_real_tmp[i][j] = tmp; } } } void calcLambdaContRow(DATA *data, PRIOR *prior, PARAM *param, int r) { int b, j, pos; double tmp; for(b=0;bninterRow[r];b++) { j = data->useRow[r][b]; tmp = param->gamma[0]; pos = 1; if(param->useAbun) { tmp += param->gamma[pos] * data->preyAbundance[r]; pos++; } if(param->useLen) { tmp += param->gamma[pos] * data->preyLogLength[r]; pos++; } if(param->useCov) { tmp += param->gamma[pos] * data->baitCoverage[j]; pos++; } tmp += ( param->mu_prey[r]) ; param->lambda_cont[r][j] = tmp; } } void calcLambdaContCol(DATA *data, PRIOR *prior, PARAM *param, int c) { int a, i, pos; double tmp; for(a=0;aninter[c];a++) { i = data->use[c][a]; tmp = param->gamma[0]; pos = 1; if(param->useAbun) { tmp += param->gamma[pos] * data->preyAbundance[i]; pos++; } if(param->useLen) { tmp += param->gamma[pos] * data->preyLogLength[i]; pos++; } if(param->useCov) { tmp += param->gamma[pos] * data->baitCoverage[c]; pos++; } tmp += (param->mu_prey[i]) ; param->lambda_cont[i][c] = tmp; } } void calcLambdaCont(DATA *data, PRIOR *prior, PARAM *param) { int b, i,j, pos; double tmp; for(i=0;inp;i++) { for(b=0;bninterRow[i];b++) { j = data->useRow[i][b]; tmp = param->gamma[0]; pos = 1; if(param->useAbun) { tmp += param->gamma[pos] * data->preyAbundance[i]; pos++; } if(param->useLen) { tmp += param->gamma[pos] * data->preyLogLength[i]; pos++; } if(param->useCov) { tmp += param->gamma[pos] * data->baitCoverage[j]; pos++; } tmp += (param->mu_prey[i]) ; param->lambda_cont[i][j] = tmp; } } } void calcLambdaCont_tmp(DATA *data, PRIOR *prior, PARAM *param) { int b, i, j, pos; double tmp; for(i=0;inp;i++) { for(b=0;bninterRow[i];b++) { j = data->useRow[i][b]; tmp = param->gamma_tmp[0]; pos = 1; if(param->useAbun) { tmp += param->gamma_tmp[pos] * data->preyAbundance[i]; pos++; } if(param->useLen) { tmp += param->gamma_tmp[pos] * data->preyLogLength[i]; pos++; } if(param->useCov) { tmp += param->gamma_tmp[pos] * data->baitCoverage[j]; pos++; } tmp += (param->mu_prey[i]) ; param->lambda_cont_tmp[i][j] = tmp; } } } double dMultGauss(double *x, double *mu, double *Sigma, int length) { int i,j,s; double temp[length]; double dmult = 0.0; gsl_matrix *inv = gsl_matrix_alloc(length, length); gsl_matrix_view m = gsl_matrix_view_array(Sigma, length, length); gsl_permutation *p = gsl_permutation_alloc(length); gsl_linalg_LU_decomp(&m.matrix, p, &s); gsl_linalg_LU_invert(&m.matrix, p, inv); for(j=0;jloglik = loglikAll(data, prior, param); for(i=0;invar;i++) { param->beta_tmp[i] = param->beta[i]; } param->beta_tmp[k] += gsl_ran_gaussian(r, _PSD_BETA_ * gsl_ran_gamma(r, 0.5, 0.5)); calcLambdaReal_tmp(data, prior, param); param->loglik_tmp = loglikAll_realtmp(data, prior, param); /* fprintf(stderr, "likdiff=%f, oldbeta=%f, newbeta=%f\n", param->loglik_tmp - param->loglik, param->beta_tmp[k], param->beta[k]); */ /* Calculate Accept-Reject Probability and Flip a coin */ acceptProb = -param->loglik; if(param->nvar > 1) acceptProb -= dMultGauss(param->beta, prior->mean_beta, prior->var_beta, param->nvar); else acceptProb -= ( - pow(param->beta[0] - prior->mean_beta[0], 2.0) / (2.0 * prior->var_beta[0]) ); acceptProb += param->loglik_tmp; if(param->nvar > 1) acceptProb += dMultGauss(param->beta_tmp, prior->mean_beta, prior->var_beta, param->nvar); else acceptProb += ( - pow(param->beta_tmp[0] - prior->mean_beta[0], 2.0) / (2.0 * prior->var_beta[0]) ); acceptProb = exp(acceptProb); if(acceptProb > 1.0) acceptProb = 1.0; if(gsl_ran_flat(r, 0.0, 1.0) < acceptProb) { for(i=0;invar;i++) param->beta[i] = param->beta_tmp[i]; for(i=0;inp;i++) { for(j=0;jnb;j++) { param->lambda_real[i][j] = param->lambda_real_tmp[i][j]; } } } else { } } void updateAlphaPrey(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int i,j,prey; double acceptProb, tmp; double mean; /* Generate Proposal and Copy Current Values of beta, lambda_real, lik to Temp*/ for(prey=0;preynp;prey++) { param->loglikRow[prey] = loglikRow(data, prior, param, prey); for(i=0;inp;i++) param->alpha_prey_tmp[i] = param->alpha_prey[i]; tmp = gsl_ran_gaussian(r, _PSD_ALPHA_PREY_); param->alpha_prey[prey] += tmp; param->loglikRow_tmp[prey] = param->loglikRow[prey]; for(j=0;jnb;j++) { param->lambda_real_tmp[prey][j] = param->lambda_real[prey][j]; } calcLambdaRealRow(data, prior, param, prey); param->loglikRow[prey] = loglikRow(data, prior, param, prey); /* Calculate Accept-Reject Probability and Flip a coin */ acceptProb = param->loglikRow[prey]; acceptProb += (-.5 * pow(param->alpha_prey[prey], 2.0) / prior->sigmasq_alpha_prey); acceptProb -= param->loglikRow_tmp[prey]; acceptProb -= (-.5 * pow(param->alpha_prey_tmp[prey], 2.0) / prior->sigmasq_alpha_prey); acceptProb = exp(acceptProb); if(acceptProb > 1.0) acceptProb = 1.0; /* tmp = GSL_MAX(param->mu_prey[prey], param->mu_prey_flag[prey]); if(param->alpha_prey[prey] < tmp) acceptProb = 0.0; */ if(gsl_ran_flat(r, 0.0, 1.0) < acceptProb) { /* mean = vec_mean(param->alpha_prey, param->np); for(i=0;inp;i++) param->alpha_prey[i] -= mean; param->beta[0] += mean; */ } else { param->alpha_prey[prey] = param->alpha_prey_tmp[prey]; for(j=0;jnb;j++) { param->lambda_real[prey][j] = param->lambda_real_tmp[prey][j]; } } } mean = vec_mean(param->alpha_prey, param->np); for(prey=0;preynp;prey++) param->alpha_prey[prey] -= mean; param->beta[0] += mean; } void updateAlphaDeltaBait(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int i,bait; double acceptProb; double mean; for(bait=0;baitnb;bait++) { param->loglikCol[bait] = loglikCol(data, prior, param, bait); param->alpha_bait_tmp[bait] = param->alpha_bait[bait]; param->alpha_bait[bait] += gsl_ran_gaussian(r, _PSD_ALPHA_BAIT_); param->loglikCol_tmp[bait] = param->loglikCol[bait]; for(i=0;inp;i++) { param->lambda_real_tmp[i][bait] = param->lambda_real[i][bait]; } calcLambdaRealCol(data, prior, param, bait); param->loglikCol[bait] = loglikCol(data, prior, param, bait); /* Calculate Accept-Reject Probability and Flip a coin */ mean = vec_mean(param->alpha_bait, param->nb); acceptProb = param->loglikCol[bait]; acceptProb += (-.5 * pow(param->alpha_bait[bait]-mean, 2.0) / prior->sigmasq_alpha_bait); acceptProb -= param->loglikCol_tmp[bait]; acceptProb -= (-.5 * pow(param->alpha_bait_tmp[bait]-mean, 2.0) / prior->sigmasq_alpha_bait); acceptProb = exp(acceptProb); if(acceptProb > 1.0) acceptProb = 1.0; if(gsl_ran_flat(r, 0.0, 1.0) < acceptProb) { } else { param->alpha_bait[bait] = param->alpha_bait_tmp[bait]; for(i=0;inp;i++) { param->lambda_real[i][bait] = param->lambda_real_tmp[i][bait]; } } } mean = vec_mean(param->alpha_bait, param->np); for(bait=0;baitnb;bait++) param->alpha_bait[bait] -= mean; param->beta[0] += mean; } void updateGamma(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r, int k) { int i,j; double acceptProb; param->loglik = loglikAll(data, prior, param); for(i=0;invar;i++) { param->gamma_tmp[i] = param->gamma[i]; } param->gamma_tmp[k] += gsl_ran_gaussian(r, _PSD_GAMMA_ * gsl_ran_gamma(r, 0.5,0.5)); calcLambdaCont_tmp(data, prior, param); param->loglik_tmp = loglikAll_conttmp(data, prior, param); /* Calculate Accept-Reject Probability and Flip a coin */ acceptProb = - param->loglik; if(param->nvar > 1) acceptProb -= dMultGauss(param->gamma, prior->mean_gamma, prior->var_gamma, param->nvar); else acceptProb -= ( - pow(param->gamma[0] - prior->mean_gamma[0], 2.0) / (2.0 * prior->var_gamma[0]) ); acceptProb += param->loglik_tmp; if(param->nvar > 1) acceptProb += dMultGauss(param->gamma_tmp, prior->mean_gamma, prior->var_gamma, param->nvar); else acceptProb += ( - pow(param->gamma_tmp[0] - prior->mean_gamma[0], 2.0) / (2.0 * prior->var_gamma[0]) ); acceptProb = exp(acceptProb); if(acceptProb > 1.0) acceptProb = 1.0; if(gsl_ran_flat(r, 0.0, 1.0) < acceptProb) { for(i=0;invar;i++) param->gamma[i] = param->gamma_tmp[i]; for(i=0;inp;i++) { for(j=0;jnb;j++) { param->lambda_cont[i][j] = param->lambda_cont_tmp[i][j]; } } } else { } } void updateMuPrey(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int i,j,prey; double acceptProb, tmp; double mean; /* Generate Proposal and Copy Current Values of beta, lambda_real, lik to Temp*/ for(prey=0;preynp;prey++) { param->loglikRow[prey] = loglikRow(data, prior, param, prey); for(i=0;inp;i++) param->mu_prey_tmp[i] = param->mu_prey[i]; param->loglikRow_tmp[prey] = param->loglikRow[prey]; for(j=0;jnb;j++) { param->lambda_cont_tmp[prey][j] = param->lambda_cont[prey][j]; } tmp = gsl_ran_gaussian(r, _PSD_MU_PREY_); param->mu_prey[prey] += tmp; calcLambdaContRow(data, prior, param, prey); param->loglikRow[prey] = loglikRow(data, prior, param, prey); /* Calculate Accept-Reject Probability and Flip a coin */ acceptProb = param->loglikRow[prey]; acceptProb += (-.5 * pow(param->mu_prey[prey], 2.0) / prior->sigmasq_mu_prey); acceptProb -= param->loglikRow_tmp[prey]; acceptProb -= (-.5 * pow(param->mu_prey_tmp[prey], 2.0) / prior->sigmasq_mu_prey); acceptProb = exp(acceptProb); if(acceptProb > 1.0) acceptProb = 1.0; /* if(param->mu_prey[prey] > param->alpha_prey[prey]) acceptProb = 0.0; */ if(gsl_ran_flat(r, 0.0, 1.0) < acceptProb) { } else { param->mu_prey[prey] = param->mu_prey_tmp[prey]; for(j=0;jnb;j++) { param->lambda_cont[prey][j] = param->lambda_cont_tmp[prey][j]; } } } mean = vec_mean(param->mu_prey, param->np); for(prey=0;preynp;prey++) param->mu_prey[prey] -= mean; param->gamma[0] += mean; } void updateLambdaReal0(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int i,j; double shape, rate; double a, b; int n0; /* update r0's first */ for(j=0;jnb;j++) { n0 = param->np - data->ninter[j]; a = ((double) n0) / (1.0 + exp(-exp(param->lambda_real0[j]))); b = ((double) data->ninter[j]) + ((double) n0) * exp(-exp(param->lambda_real0[j])) / (1 + exp(-exp(param->lambda_real0[j]))) ; param->r0[j] = a / (a + b); } for(j=0;jnb;j++) { n0 = param->np - data->ninter[j]; shape = prior->epsilon_real[j]; rate = prior->kappa_real[j]; for(i=0;inp;i++) { if(param->Y[i] == 1 && param->Z[i][data->mtu[j]] == 0 && data->use[j][i]) { shape += ((double) data->d[i][j]); rate += 1.0; } } rate += ((double) n0) * exp(-param->lambda_real0[j]) / (1 + exp(-param->lambda_real0[j])); param->lambda_real0[j] = log(gsl_ran_gamma(r, shape, 1.0/rate)); } } /* void updateLambdaReal0(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int i,j; double shape, rate; for(j=0;jnb;j++) { shape = prior->epsilon_real[j] + 0.01 * ((double) param->np); rate = prior->kappa_real[j] + 0.002 * ((double) param->np); for(i=0;inp;i++) { if(param->Y[i] == 1 && param->Z[i][data->mtu[j]] == 0) { shape += ((double) data->d[i][j]); rate += 1.0; } } param->lambda_real0[j] = log(gsl_ran_gamma(r, shape, 1.0/rate)); } } */ void updatePriors(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r){ updateSigmasqAlphaPrey(data, prior, param, r); updateSigmasqAlphaBait(data, prior, param, r); updateSigmasqMuPrey(data, prior, param, r); } void sampleY(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int b, i,j,k,mfuk, tmpsum0, tmpsum, tmpmaxint, filterout1, filterout2, count; double comp1, comp2, ratio, tmp, dotprod; double tmp1, tmp2, tmp3, tmpmax; double tmp_prob[data->uniqueNum]; double preal[2]; int flagged[param->np]; for(i=0;inp;i++) { comp1 = param->pcont[1]; comp2 = param->pcont[0]; preal[0] = 0.0; preal[1] = 0.0; for(b=0;bninterRowUnique[i];b++) { j = data->useRowUnique[i][b]; preal[0] += ((double) (1-param->Z[i][j])) ; preal[1] += ((double) param->Z[i][j]) ; } preal[0] += ((double) (data->uniqueNum - data->ninterRowUnique[i])); preal[0] /= ((double) data->uniqueNum); preal[1] /= ((double) data->uniqueNum); for(b=0;bninterRowUnique[i];b++) { j = data->useRowUnique[i][b]; tmp1 = 0.0; tmp2 = 0.0; tmp3 = 0.0; for(k=0;kuniqueSize[j];k++) { mfuk = data->mfu[j][k]; tmp = param->lambda_real[i][mfuk]; tmp1 += -exp(param->lambda_real[i][mfuk]) + data->d[i][mfuk] * param->lambda_real[i][mfuk] - gsl_sf_lngamma(data->d[i][mfuk]+1.0); tmp2 += logZIP(data->d[i][mfuk], param->r0[mfuk], param->lambda_real0[mfuk]); /* tmp2 += -exp(param->lambda_real0[mfuk]) + data->d[i][mfuk] * param->lambda_real0[mfuk]; */ tmp3 += -exp(param->lambda_cont[i][mfuk]) + data->d[i][mfuk] * param->lambda_cont[i][mfuk] - gsl_sf_lngamma(data->d[i][mfuk]+1.0); } tmpmax = GSL_MAX(tmp1, tmp2); tmpmax = GSL_MAX(tmpmax, tmp3); tmp1 -= tmpmax; tmp2 -= tmpmax; tmp3 -= tmpmax; tmp1 = exp(tmp1); tmp2 = exp(tmp2); tmp3 = exp(tmp3); if(tmp1 == 0.0 && tmp2 == 0.0) { tmp_prob[j] = 0.0; } else tmp_prob[j] = (preal[1] * tmp1) / ( preal[1] * tmp1 + preal[0] * tmp2); comp1 *= (param->Z[i][j] ? tmp1 : tmp2 ); comp2 *= tmp3; } if(comp1 == 0.0 && comp2 == 0.0) ratio = 0.0; else ratio = comp1 / (comp1 + comp2); if(isnan(ratio)) ratio = 0.0; if(!finite(ratio)) ratio = 1.0; if(gsl_ran_flat(r, 0.0, 1.0) < ratio) param->Y[i] = 1; else param->Y[i] = 0; /* if(vec_max(param->lambda_cont[i], param->nb) <= 2.0 && vec_max(param->lambda_real[i], param->nb) <= 2.0) param->Y[i] = 1; */ tmpsum = 0; tmpsum0 = 0; for(b=0;bninterRowUnique[i];b++) { j = data->useRowUnique[i][b]; tmpmaxint = 0; for(k=0;kuniqueSize[j];k++) { if(((int) data->d[i][data->mfu[j][k]]) > tmpmaxint) tmpmaxint = ((int) data->d[i][data->mfu[j][k]]); } if(tmpmaxint >= 1) tmpsum0++; if(param->Z[i][j]) tmpsum++; } filterout1 = ( (int) ( ((double) tmpsum) / ((double) data->uniqueNum) > param->ff_prop) ); filterout2 = ( (int) ( ((double) tmpsum0) / ((double) data->uniqueNum) <= param->ff_prop) ); if(filterout2) { param->Y[i] = 1; param->flagged[i] += 1.0; } flagged[i] = 0; if(filterout1) { flagged[i] = 1; param->Y[i] = 0; } if(data->override[i] == 'C') param->Y[i] = 0; else if(data->override[i] == 'R') param->Y[i] = 1; else { } } /* Calculate Frequency Distribution -- appearCont */ count = 0; for(i=0;inp;i++) { if(param->Y[i]==0) count++; } for(j=0;jnb;j++) { param->appearCont[j] = 0.0; } for(j=0;jnb;j++) { for(i=0;inp;i++) { if(param->Y[i]==0 && data->d[i][j] > 0.0) param->appearCont[j] += (1.0 / ((double) count)); } } /* Flag unflagged ones with high nonzero counts > ff_prop */ for(i=0;inp;i++) { if(param->Y[i] && flagged[i] == 0) {} else { dotprod = 0.0; for(j=0;jnb;j++) { if(data->d[i][j] > 0.0) dotprod += param->appearCont[j] / ((double) param->nb); } if(dotprod >= param->ff_prop) { param->Y[i] = 0; } } } } void sampleZ(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int b, i,j,k,mfuk,anypos; double p1, p2; double tmp1, tmp2, tmp, tmpmax, prob, tp; for(i=0;inp;i++) { for(b=0;bninterRowUnique[i];b++) { j = data->useRowUnique[i][b]; if(data->uniqueSize[j] > 1) { anypos = 0; for(k=0;kuniqueSize[j];k++) { if(data->d[i][data->mfu[j][k]] > 0.0) anypos = 1; } if(anypos == 0) { for(k=0;kuniqueSize[j];k++) param->iZ[i][data->mfu[j][k]] = 0; param->Z[i][j] = 0; } else { p1 = p2 = 1.0; for(k=0;kuniqueSize[j];k++) { mfuk = data->mfu[j][k]; tmp = data->d[i][mfuk] <= exp(param->lambda_real[i][mfuk]) ? param->lambda_real[i][mfuk] : log(data->d[i][mfuk]); /* used to be masked -- nov-17-08 */ tmp = param->lambda_real[i][mfuk]; tmp1 = -exp(tmp) + data->d[i][mfuk] * tmp - gsl_sf_lngamma(data->d[i][mfuk]+1.0); tmp2 = logZIP(data->d[i][mfuk], param->r0[mfuk], param->lambda_real0[mfuk]); tmpmax = GSL_MAX(tmp1, tmp2); tmp1 -= tmpmax; tmp2 -= tmpmax; p1 *= exp(tmp1); p2 *= exp(tmp2); /* make calls and calculate distance within replicate */ tp = param->preal[1] * exp(tmp1) / (param->preal[1] * exp(tmp1) + param->preal[0] * exp(tmp2)); param->iZ[i][mfuk] = gsl_ran_flat(r, 0.0, 1.0) < tp ? 1 : 0; if(data->d[i][mfuk] == 0.0) param->iZ[i][mfuk] = 0; } p1 *= param->preal[1]; p2 *= param->preal[0]; prob = p1 / (p1 + p2); if(gsl_ran_flat(r, 0.0, 1.0) < prob) param->Z[i][j] = 1; else param->Z[i][j] = 0; } } else { anypos = 0; if(data->d[i][data->mfu[j][0]] > 0.0) anypos = 1; if(anypos == 0) { param->iZ[i][data->mfu[j][0]] = 0; param->Z[i][j] = 0; } else { p1 = p2 = 1.0; mfuk = data->mfu[j][0]; /* tmp = data->d[i][mfuk] <= exp(param->lambda_real[i][mfuk]) ? param->lambda_real[i][mfuk] : log(data->d[i][mfuk]); */ /* used to be masked -- nov-17-08 */ tmp = param->lambda_real[i][mfuk]; tmp1 = -exp(tmp) + data->d[i][mfuk] * tmp - gsl_sf_lngamma(data->d[i][mfuk]+1.0); tmp2 = logZIP(data->d[i][mfuk], param->r0[mfuk], param->lambda_real0[mfuk]); tmpmax = GSL_MAX(tmp1, tmp2); tmp1 -= tmpmax; tmp2 -= tmpmax; p1 *= exp(tmp1); p2 *= exp(tmp2); /* make calls and calculate distance within replicate */ p1 *= param->preal[1]; p2 *= param->preal[0]; prob = p1 / (p1 + p2); if(gsl_ran_flat(r, 0.0, 1.0) < prob) { param->Z[i][j] = 1; param->iZ[i][mfuk] = 1; } else { param->Z[i][j] = 0; param->iZ[i][mfuk] = 0; } } } } } } double calc_dist(int calls[], int num) { int i,j,count; double dist = 0.0; count = 0; for(i=0;i<(num-1);i++) { for(j=(i+1);jnp;i++) { if(param->Y[i]) { shape += 0.5; rate += 0.5 * pow(param->alpha_prey[i], 2.0); } } prior->sigmasq_alpha_prey = 1.0 / gsl_ran_gamma(r, shape, 1.0/rate); } void updateSigmasqAlphaBait(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int j; double shape, rate, mean; shape = 1.0; rate = 1.0; mean = vec_mean(param->alpha_bait, param->nb); for(j=0;jnb;j++) { shape += 0.5; rate += 0.5 * pow(param->alpha_bait[j]-mean, 2.0); } prior->sigmasq_alpha_bait = 1.0 / gsl_ran_gamma(r, shape, 1.0/rate); } void updateSigmasqMuPrey(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int i; double shape, rate; shape = 1.0; rate = 1.0; for(i=0;inp;i++) { if(param->Y[i]==0) { shape += 0.5; rate += 0.5 * pow(param->mu_prey[i], 2.0); } } prior->sigmasq_mu_prey = 1.0 / gsl_ran_gamma(r, shape, 1.0/rate); } void sampleP(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int b, i, j; int n[2]; double alpha[2]; for(i=0;i<2;i++) n[i] = 0; for(i=0;inp;i++) (n[param->Y[i]])++; for(i=0;i<2;i++) alpha[i] = prior->acont[i] + ((double) n[i]); gsl_ran_dirichlet(r, 2, alpha, param->pcont); for(i=0;i<2;i++) n[i] = 0; for(i=0;inp;i++) { if(param->Y[i]) { for(b=0;bninterRowUnique[i];b++) { j = data->useRowUnique[i][b]; (n[param->Z[i][j]])++; } n[0] += data->uniqueNum - data->ninterRowUnique[i]; } } for(i=0;i<2;i++) alpha[i] = prior->areal[i] + ((double) n[i]); gsl_ran_dirichlet(r, 2, alpha, param->preal); /* fprintf(stderr, "C-%.3f R-%.3f NR-%.3f\n", param->pcont[0], param->preal[1], param->preal[0]); */ } /**************************************/ /*** Metropolis-Hastings with Gibbs ***/ /**************************************/ void mhgibbs(DATA *data, PRIOR *prior, PARAM *param, const gsl_rng *r) { int k; /* fprintf(stderr, "beta:"); for(k=0;knvar;k++) fprintf(stderr, "%.5f ", param->beta[k]); fprintf(stderr, "gamma:"); for(k=0;knvar;k++) fprintf(stderr, "%.5f ", param->gamma[k]); fprintf(stderr, "\n"); */ for(k=0;knvar;k++) updateBeta(data, prior, param, r, k); updateAlphaPrey(data, prior, param, r); updateAlphaDeltaBait(data, prior, param, r); /* calcLambdaReal(data, prior, param); */ updateLambdaReal0(data, prior, param, r); for(k=0;knvar;k++) updateGamma(data, prior, param, r, k); updateMuPrey(data, prior, param, r); updatePriors(data, prior, param, r); sampleY(data, prior, param, r); sampleZ(data, prior, param, r); sampleP(data, prior, param, r); } SAINT_v2.3.4/src/SAINTspc-noctrl/0000777000000000000000000000000012145226162015023 5ustar rootrootSAINT_v2.3.4/src/SAINTspc-noctrl/mmath.c0000666000000000000000000000411711746171455016312 0ustar rootroot#include "saint.h" float vec_sum(const float *vec, int len) { int i; float res; res=vec[0]; for(i=1;ivec[i]) res=vec[i]; } return res; } float vec_mean(const float *vec, int len) { float tmp=0.0; int i; for(i=0;i sum) { rr++; sum += p[rr]; } if(rr >= K) rr = K-1; return rr; } float geometric_mean(float *x, int n) { int i; float res = 0.0; for(i=0;inprey;i++) (wsum[prior->w_mu[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_mu) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_mu[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_mu[i] = gammap[i]; for(j=0;jgamma_mu[i] *= (1.0 - gammap[j]); } } void DP_mu_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_mu, tmp_lambda, maxl, tmp, false_avg; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_mu[i]); cur_mu = param->mu[pid]; false_avg = 0.0; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; false_avg += param->lambda_false[id]; } false_avg /= ((float) data->preyNinter[pid]); for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]] || param->Z[data->a2u[id]] == 0) { tmp_lambda = param->lambda_false[id] + prior->theta_mu[i] - cur_mu; tmp = data->d[id]; prob[i] += log_poisson_g_prop(tmp, exp(tmp_lambda), param->eta0[pid]); } } if( data->preyNinter[pid] < ((int) ((param->freqgroup) * ((float) data->nIP))) ) { tmp = ((float) data->nIP) - ((float) data->preyNinter[pid]); tmp_lambda = false_avg + prior->theta_mu[i] - cur_mu; tmp = tmp * log_poisson_g_prop(0.0, exp(tmp_lambda), param->eta0[pid]); prob[i] += tmp; } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_mu[pid] = ranMultinom(r, prob, _MAX_COMP_); param->mu[pid] = prior->theta_mu[prior->w_mu[pid]]; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; param->lambda_false[id] += param->mu[pid] - cur_mu; } } void DP_mu_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale, tmp, false_avg, tmp_lambda; scale = prior->gamma_mu[pid] / (1.0 - prior->gamma_mu[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_mu)) + prior->m_mu; Delta = newval - prior->theta_mu[pid]; prior->theta_mu[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 0.1); for(i=0;inprey;i++) { if(data->preyFlag[i] == 0) { false_avg = 0.0; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; false_avg += param->lambda_false[id]; } false_avg /= ((float) data->preyNinter[i]); if(prior->w_mu[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(data->ctrl[data->i2IP[id]] || param->Z[data->a2u[id]] == 0) { tmp = data->d[id]; param->lambda_false_tmp[id] = param->lambda_false[id] + Delta; mhratio += log_poisson_g_prop(tmp, exp(param->lambda_false_tmp[id]), param->eta0[i]) - log_poisson_g_prop(tmp, exp(param->lambda_false[id]), param->eta0[i]); } } if( data->preyNinter[i] < ((int) ((param->freqgroup) * ((float) data->nIP))) ) { tmp = ((float) data->nIP) - ((float) data->preyNinter[i]); tmp_lambda = false_avg + Delta; tmp = tmp * ( log_poisson_g_prop(0.0, exp(tmp_lambda), param->eta0[i]) - log_poisson_g_prop(0.0, exp(false_avg), param->eta0[i]) ); mhratio += tmp; } } } } mhratio += log_gaussian(prior->theta_mu[pid] + Delta, prior->m_mu, prior->v_mu) - log_gaussian(prior->theta_mu[pid], prior->m_mu, prior->v_mu); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_mu[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_mu[i] == pid) { param->mu[i] += Delta; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; param->lambda_false[id] += Delta; } } } } } } void DP_mu(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_mu[i]] = 1; DP_mu_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_mu_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_mu_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_mu[i] * prior->theta_mu[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] -= mean; for(i=0;inprey;i++) param->mu[i] -= mean; param->betac += mean; } SAINT_v2.3.4/src/SAINTspc-noctrl/compile0000777000000000000000000000015511746171455016414 0ustar rootroot#! /bin/sh gcc -Wall -c *.c *.h gcc *.o -lgsl -lgslcblas -lm -o ../saint-spc-noctrl rm -rf *.o rm -rf *.gch SAINT_v2.3.4/src/SAINTspc-noctrl/likelihood.c0000666000000000000000000000765411746171455017340 0ustar rootroot#include "saint.h" /***************************************************/ /* computing likelihoods in log scale */ /***************************************************/ float log_poisson_prop(float N, float lambda) { float res = -lambda + N * log(lambda); return res; } float log_poisson_g_prop(float N, float lambda, float theta) { float lambda1, lambda2, out; lambda2 = 1.0 - 1.0 / sqrt(theta); lambda1 = lambda / sqrt(theta); out = log(lambda1) + (N - 1.0) * log(lambda1 + N * lambda2) - (lambda1 + N * lambda2); return out; } /*************************/ /* all interactions */ /*************************/ float LRprop(PARAM *param, PRIOR *prior, DATA *data) { int i,j,id; float pos, neg, maxl, tmp; float false_avg, true_avg; float lik_new, lik_old; lik_new = 0.0; lik_old = 0.0; for(i=0;inuinter;i++) { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(1) { pos += log_poisson_g_prop(data->d[id], exp(param->lambda_true[id]), param->eta[data->i2p[id]]); neg += log_poisson_g_prop(data->d[id], exp(param->lambda_false[id]), param->eta0[data->i2p[id]]); } } maxl = pos > neg ? pos : neg; pos = exp(pos - maxl); neg = exp(neg - maxl); lik_new += log(param->ptrue_tmp * pos + (1.0-param->ptrue_tmp) * neg); lik_old += log(param->ptrue * pos + (1.0-param->ptrue) * neg); } for(i=0;inprey;i++) { if(data->preyFlag[i] == 0) { false_avg = 0.0; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; false_avg += param->lambda_false[id]; } false_avg /= ((float) data->preyNinter[i]); true_avg = 0.0; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; true_avg += param->lambda_true[id]; } true_avg /= ((float) data->preyNinter[i]); tmp = ((float) (data->nIP - data->preyNinter[i])); pos = tmp * log_poisson_g_prop(0.0, exp(true_avg), param->eta[i]); neg = tmp * log_poisson_g_prop(0.0, exp(false_avg), param->eta0[i]); maxl = pos > neg ? pos : neg; pos = exp(pos - maxl); neg = exp(neg - maxl); lik_new += log(param->ptrue_tmp * pos + (1.0-param->ptrue_tmp) * neg); lik_old += log(param->ptrue * pos + (1.0-param->ptrue) * neg); } } return lik_new - lik_old; } float loglik_all(PARAM *param, PRIOR *prior, DATA *data) { int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(param->Z[data->a2u[i]]) { lambda = exp(param->lambda_true[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta[data->i2p[i]]); } else { lambda = exp(param->lambda_false[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta0[data->i2p[i]]); } } return lik; } float loglik_all_class(PARAM *param, PRIOR *prior, DATA *data, int cl) { /* loglik by class */ int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(cl) { if(param->Z[data->a2u[i]]) { lambda = exp(param->lambda_true[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta[data->i2p[i]]); } } else { if(param->Z[data->a2u[i]] == 0) { lambda = exp(param->lambda_false[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta0[data->i2p[i]]); } } } return lik; } float loglik_all_class_tmp(PARAM *param, PRIOR *prior, DATA *data, int cl) { /* loglik by class */ int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(cl) { if(param->Z[data->a2u[i]]) { lambda = exp(param->lambda_true_tmp[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta[data->i2p[i]]); } } else { if(param->Z[data->a2u[i]] == 0) { lambda = exp(param->lambda_false_tmp[i]); lik += log_poisson_g_prop(data->d[i], lambda, param->eta0[data->i2p[i]]); } } } return lik; } SAINT_v2.3.4/src/SAINTspc-noctrl/initdata.c0000666000000000000000000004402011746171455016776 0ustar rootroot#include "saint.h" /*************************/ /* read interaction data */ /*************************/ void read_interaction_data(FILE *fpinter, DATA *data) { int i; char buf[1000]; data->ninter = nrow(fpinter); rewind(fpinter); assert(data->prey = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->prey[i] = (char *) calloc(500, sizeof(char))); assert(data->bait = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->bait[i] = (char *) calloc(500, sizeof(char))); assert(data->ip = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->ip[i] = (char *) calloc(500, sizeof(char))); assert(data->d = (float *) calloc(data->ninter, sizeof(float))); assert(data->d2 = (float *) calloc(data->ninter, sizeof(float))); assert(data->iprob = (float *) calloc(data->ninter, sizeof(float))); assert(data->l = (float *) calloc(data->ninter, sizeof(float))); assert(data->c = (float *) calloc(data->ninter, sizeof(float))); assert(data->a2u = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) { fscanf(fpinter, "%s", buf); strcpy(data->ip[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->bait[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->prey[i], buf); fscanf(fpinter, "%s", buf); data->d2[i] = atof(buf); if(data->d2[i] >= _TRUNC_) data->d[i] = _TRUNC_; else data->d[i] = data->d2[i]; /* fprintf(stderr, "%s\t%s\t%s\t%f\n", data->ip[i], data->bait[i], data->prey[i], data->d[i]); */ } } /***********************************************************************************************************/ /*********************************************************************************/ /* make unique interaction data and identify mapping between unique and all data */ /*********************************************************************************/ void find_unique_interaction(DATA *data) { int i,j,cur; int baitCompare, preyCompare; int isUnique[data->ninter]; int nInstance[data->ninter]; /* this counts at the level of unique interactions */ int counter[data->ninter]; /* same as above, used for mapping unique->individual */ for(i=0;ininter;i++) { isUnique[i] = 1; nInstance[i] = 0; counter[i] = 0; } /* scan 1~n to mark unique interactions and count instances of each */ cur = 0; for(i=0;i<(data->ninter-1);i++) { if(isUnique[i]) { (nInstance[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j]) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { isUnique[j] = 0; (nInstance[cur])++; } } } cur++; } } if(isUnique[data->ninter-1]) { (nInstance[cur])++; cur++; } /* count # unique interactions */ data->nuinter = 0; for(i=0;ininter;i++) { if(isUnique[i]) data->nuinter++; } fprintf(stdout, "%d interactions (%d observations)\n", data->nuinter, data->ninter); /* memory business for unique interactions */ assert(data->uprey = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->uprey[i] = (char *) calloc(500, sizeof(char))); assert(data->ubait = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->ubait[i] = (char *) calloc(500, sizeof(char))); assert(data->prob = (float *) calloc(data->nuinter, sizeof(float))); /* copy unique interactions */ cur = 0; for(i=0;ininter;i++) { if(isUnique[i]) { strcpy(data->uprey[cur], data->prey[i]); strcpy(data->ubait[cur], data->bait[i]); data->prob[cur] = 0.0; cur++; } } if(data->nuinter > cur) fprintf(stderr, "Warning: possibly missed some unique interactions\n"); else if(data->nuinter < cur) fprintf(stderr, "Warning: too many unique interactions, check mapping\n"); else {} /* mapping between individual and unique interactions */ assert(data->n_u2a = (int *) calloc(data->nuinter, sizeof(int))); assert(data->u2a = (int **) calloc(data->nuinter, sizeof(int *))); for(i=0;inuinter;i++) data->n_u2a[i] = nInstance[i]; for(i=0;inuinter;i++) { assert(data->u2a[i] = (int *) calloc(data->n_u2a[i], sizeof(int))); } cur = 0; /* current index of unique */ for(i=0;ininter;i++) { if(isUnique[i]) { data->a2u[i] = cur; data->u2a[cur][counter[cur]] = i; (counter[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j] == 0) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { data->a2u[j] = cur; data->u2a[cur][counter[cur]] = j; (counter[cur])++; } } } cur++; } } } /***********************************************************************************************************/ /*****************************************************/ /* make indicators of uniqueness in character arrays */ /* returns the number of unique elements */ /*****************************************************/ int unique_elements(char **x, int *unique, int nx) { int i,j; int nunique = nx; for(i=0;inprey;i++) data->preyNinter[i] = 0; assert(data->i2p = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2p[i] = -1; for(i=0;inprey;i++) { for(j=0;jninter;j++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { (data->preyNinter[i])++; data->i2p[j] = i; } } } assert(data->p2i = (int **) calloc(data->nprey, sizeof(int *))); for(i=0;inprey;i++) assert(data->p2i[i] = (int *) calloc(data->preyNinter[i], sizeof(int))); for(i=0;inprey;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { data->p2i[i][cur] = j; cur++; } if(cur >= data->preyNinter[i]) break; } } assert(data->ui2p = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inprey;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->PREY[i], data->uprey[j]) == 0) { data->ui2p[j] = i; } } } /* report which prey in the prey file did not show up in the interaction file */ cur = 0; for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp1 = fopen("PreysNotInData", "w"); for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) fprintf(fptemp1, "%s\n", data->PREY[i]); } fclose(fptemp1); chdir(".."); return 1; } /* report which prey in the interaction file did not show up in the prey file */ cur = 0; for(i=0;ininter;i++) { if(data->i2p[i] == -1) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp2 = fopen("PreysNotInList", "w"); for(i=0;ininter;i++) { if(data->i2p[i] == -1) fprintf(fptemp2, "%d\t%s\t%s\t%s\n", i+1, data->ip[i], data->bait[i], data->prey[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } /**************************************************************/ /* read prey data and check discrepancy with interaction data */ /**************************************************************/ void read_prey_data(FILE *fpprey, DATA *data) { int i, nprey; char buf[256]; nprey = nrow(fpprey); rewind(fpprey); data->nprey = nprey; assert(data->PREY = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREY[i] = (char *) calloc(500, sizeof(char))); assert(data->PREYGENE = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREYGENE[i] = (char *) calloc(500, sizeof(char))); assert(data->preyLength = (float *) calloc(nprey, sizeof(float))); assert(data->preyNinter = (int *) calloc(nprey, sizeof(int))); assert(data->preyFlag = (int *) calloc(nprey, sizeof(int))); assert(data->ctrlavg = (float *) calloc(nprey, sizeof(float))); for(i=0;iPREY[i], buf); fscanf(fpprey, "%s", buf); data->preyLength[i] = atof(buf); /* fprintf(stderr, "%s\t%f\n", data->PREY[i], data->preyLength[i]); */ data->ctrlavg[i] = 0.0; fscanf(fpprey, "%s", buf); strcpy(data->PREYGENE[i], buf); } centerData(data->preyLength, nprey, 1); mapPreyToData(data); for(i=0;ininter;i++) data->l[i] = data->preyLength[data->i2p[i]]; } /***********************************************************************************************************/ void mapIPtoBait(DATA *data) { int i,j; int nbait, nIP, cur; char temp[data->nIP][500]; int uniqueBaits[data->nIP]; nIP = data->nIP; nbait = unique_elements(data->BAIT, uniqueBaits, nIP); data->nbait = nbait; assert(data->baitNIP = (int *) calloc(nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNIP[i] = 0; cur = 0; for(i=0;inIP;i++) { if(uniqueBaits[i]) { strcpy(temp[cur], data->BAIT[i]); cur++; } } if(cur != data->nbait) fprintf(stderr, "check bait-IP file\n"); for(i=0;inbait;i++) { cur = 0; for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) { cur++; } } data->baitNIP[i] = cur; } assert(data->IP2b = (int *) calloc(data->nIP, sizeof(int))); assert(data->b2IP = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2IP[i] = (int *) calloc(data->baitNIP[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) { data->IP2b[j] = i; data->b2IP[i][cur] = j; cur++; } } data->baitNIP[i] = cur; } for(i=0;inbait;i++) strcpy(data->BAIT[i], temp[i]); } int mapIPBaitToData(DATA *data) { /* Part I: bait to data */ int i,j; int cur; assert(data->baitNinter = (int *) calloc(data->nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNinter[i] = 0; for(i=0;inIP;i++) data->IPNinter[i] = 0; assert(data->i2b = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2b[i] = -1; assert(data->i2IP = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2IP[i] = -1; for(i=0;inIP;i++) { for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { (data->IPNinter[i])++; data->i2IP[j] = i; } } } for(i=0;inbait;i++) { for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { (data->baitNinter[i])++; data->i2b[j] = i; } } } assert(data->IP2i = (int **) calloc(data->nIP, sizeof(int *))); for(i=0;inIP;i++) assert(data->IP2i[i] = (int *) calloc(data->IPNinter[i], sizeof(int))); for(i=0;inIP;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { data->IP2i[i][cur] = j; cur++; } if(cur >= data->IPNinter[i]) break; } } assert(data->b2i = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2i[i] = (int *) calloc(data->baitNinter[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { data->b2i[i][cur] = j; cur++; } if(cur >= data->baitNinter[i]) break; } } /* from unique interactions to bait/IP */ assert(data->ui2b = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inbait;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->BAIT[i], data->ubait[j]) == 0) data->ui2b[j] = i; } } /* report which bait/IP in the bait file did not show up in the interaction file */ cur = 0; for(i=0;inbait;i++) { if(data->IPNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp1 = fopen("IPNotInData", "w"); for(i=0;inIP;i++) { if(data->IPNinter[i] == 0) fprintf(fptemp1, "%s\t%s\n", data->IP[i], data->BAIT[i]); } fclose(fptemp1); chdir(".."); return 1; } /* report which baits/IPs in the interaction file did not show up in the bait/IP file */ cur = 0; for(i=0;ininter;i++) { if(data->i2IP[i] == -1) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp2 = fopen("IPNotInList", "w"); for(i=0;ininter;i++) { if(data->i2IP[i] == -1) fprintf(fptemp2, "%d\t%s\t%s\t%s\n", i+1, data->ip[i], data->bait[i], data->prey[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } void getIPinfo(DATA *data) { int i,j; int IPmatch, BAITmatch, PREYmatch; char buf[256]; assert(data->IPbaitCoverage = (float *) calloc(data->nIP, sizeof(float))); assert(data->IPtotalAbundance = (float *) calloc(data->nIP, sizeof(float))); for(i=0;inIP;i++) { data->IPbaitCoverage[i] = 0.0; data->IPtotalAbundance[i] = 0.0; } for(i=0;inIP;i++) { strcpy(buf, data->BAIT[data->IP2b[i]]); for(j=0;jninter;j++) { IPmatch = strcmp(data->ip[j], data->IP[i]); BAITmatch = strcmp(data->bait[j], buf); PREYmatch = strcmp(data->prey[j], buf); if(IPmatch == 0) { data->IPtotalAbundance[i] += data->d[j]; if(BAITmatch == 0 && PREYmatch == 0) data->IPbaitCoverage[i] = data->d[j] / data->preyLength[data->i2p[j]]; } } /* if(data->IPbaitCoverage[i] == 0.0) { fprintf(stderr, "IP %s (bait %s) has no bait-bait interaction\n", data->IP[i], data->BAIT[data->IP2b[i]]); } */ } /* for(i=0;inIP;i++) { fprintf(stderr, "%d: %s %s %.2f %.2f\n", i+1, data->IP[i], data->BAIT[data->IP2b[i]], data->IPbaitCoverage[i], data->IPtotalAbundance[i]); } */ } /**************************************************************/ /* read bait data and check discrepancy with interaction data */ /**************************************************************/ void read_bait_data(FILE *fpbait, DATA *data) { int i, nbait, nIP; char buf[256]; nIP = nrow(fpbait); rewind(fpbait); data->nIP = nIP; data->nctrl = 0; data->ntest = 0; assert(data->BAIT = (char **) calloc(nIP, sizeof(char *))); for(i=0;iBAIT[i] = (char *) calloc(500, sizeof(char))); assert(data->IP = (char **) calloc(nIP, sizeof(char *))); for(i=0;iIP[i] = (char *) calloc(500, sizeof(char))); assert(data->ctrl = (int *) calloc(nIP, sizeof(int))); assert(data->IPNinter = (int *) calloc(nIP, sizeof(int))); for(i=0;iIP[i], buf); fscanf(fpbait, "%s", buf); strcpy(data->BAIT[i], buf); /* not unique at this point */ fscanf(fpbait, "%s", buf); if(buf[0] == 'C' || buf[0] == 'c') { data->ctrl[i] = 1; /* note that control is marked as 1, test is as 0 */ (data->nctrl)++; } else { data->ctrl[i] = 0; (data->ntest)++; } /* fprintf(stderr, "%s\t%s\t%d\n", data->IP[i], data->BAIT[i], data->ctrl[i]); */ } /* check whether IPs are unique or not */ mapIPtoBait(data); nbait = data->nbait; mapIPBaitToData(data); getIPinfo(data); /* bait coverage and total abundance */ centerData(data->IPbaitCoverage, nIP, 1); centerData(data->IPtotalAbundance, nIP, 1); /* these quantities are on log scale, mean centered now. */ for(i=0;ininter;i++) { data->c[i] = data->IPtotalAbundance[data->i2IP[i]]; } } /***********************************************************************************************************/ void set_ctrlavg(DATA *data) { int i; for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]]) { if(data->ctrlavg[data->i2p[i]] < data->d[i]) data->ctrlavg[data->i2p[i]] = data->d[i]; // data->ctrlavg[data->i2p[i]] += data->d[i]; } } // for(i=0;inprey;i++) data->ctrlavg[i] /= ((float) data->nctrl); } void prey_flag(DATA *data, float *freq, float *freqgroup) { int i, tt; /* tt = ( (int) (((float) data->nIP) * (*freq)) ); */ tt = ( (int) (((float) data->nIP) * (*freqgroup)) ); for(i=0;inprey;i++) { if( data->preyNinter[i] > tt ) data->preyFlag[i] = 1; else data->preyFlag[i] = 0; } } /**************************************************************/ /* master function for reading the data */ /**************************************************************/ void read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data, float *freq, float *freqgroup) { read_interaction_data(fpinter, data); find_unique_interaction(data); read_prey_data(fpprey, data); read_bait_data(fpbait, data); /* make a function to filter out interactions with no matching preys and baits */ set_ctrlavg(data); prey_flag(data, freq, freqgroup); } SAINT_v2.3.4/src/SAINTspc-noctrl/mcmc.c0000666000000000000000000001567411746171455016135 0ustar rootroot#include "saint.h" float log_gaussian(float x, float mu, float var) { float res = - .5 * pow(x-mu,2.0) / var - .5 * log(2.0 * M_PI * var); return res; } void sampleBeta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,accept; float likratio, lr_new, lr_old; float diff = gsl_ran_gaussian(r,0.2); for(i=0;ininter;i++) param->lambda_true_tmp[i] = param->lambda_true[i] + diff; lr_new = loglik_all_class_tmp(param, prior, data, 1); lr_old = loglik_all_class(param, prior, data, 1); likratio = lr_new - lr_old; likratio += log_gaussian(param->beta0 + diff, prior->m_beta, prior->v_beta) - log_gaussian(param->beta0, prior->m_beta, prior->v_beta); likratio = GSL_MIN(1.0, exp(likratio)); accept = gsl_ran_flat(r,0.0,1.0) <= likratio ? 1 : 0; if(accept) { param->beta0 += diff; for(i=0;ininter;i++) param->lambda_true[i] = param->lambda_true_tmp[i]; param->loglikTotal += (lr_new - lr_old); } } void sampleBetac(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,accept; float likratio, lr_new, lr_old; float diff = gsl_ran_gaussian(r,0.2); for(i=0;ininter;i++) param->lambda_false_tmp[i] = param->lambda_false[i] + diff; lr_new = loglik_all_class_tmp(param, prior, data, 0); lr_old = loglik_all_class(param, prior, data, 0); likratio = lr_new - lr_old; likratio += log_gaussian(param->betac + diff, prior->m_beta, prior->v_beta) - log_gaussian(param->betac, prior->m_beta, prior->v_beta); likratio = GSL_MIN(1.0, exp(likratio)); accept = gsl_ran_flat(r,0.0,1.0) <= likratio ? 1 : 0; if(accept) { param->betac += diff; for(i=0;ininter;i++) param->lambda_false[i] = param->lambda_false_tmp[i]; param->loglikTotal += (lr_new - lr_old); } } void sampleZ(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { /* Z and iZ */ int i,j,id,ct; int isCtrl, isReverse, isMaxOne; float prob, maxl, pt; float posi, negi; float posprob, negprob; float pos, neg, tmp, tmp_lambda, tmp_neg; int cond1; for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 1) { isCtrl = 1; break; } } isReverse = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; cond1 = param->lambda_true[id] < param->lambda_false[id] ? 1 : 0; if(cond1) { /* isReverse = 1; */ break; } } isMaxOne = 1; for(j=0;jn_u2a[i];j++) { if(data->d[data->u2a[i][j]] > 1.0) isMaxOne = 0; } if(isMaxOne) { ct = 0; for(j=0;jn_u2a[i];j++) { if(data->d[data->u2a[i][j]] == 1.0) ct++; } if(ct >= 3) isMaxOne = 0; } if(isCtrl || isReverse || isMaxOne) { param->Z[i] = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; param->iZ[id] = 0; } } else { pos = 0.0; neg = 0.0; posprob = 0.0; negprob = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; pt = param->ptrue; tmp_lambda = exp(param->lambda_true[id]); if(param->lambda_true[id] < param->lambda_false[id] || data->preyFlag[data->i2p[id]]) { tmp_lambda = GSL_MAX(5.0 * GSL_MAX(exp(param->lambda_false[id]),0.1), tmp_lambda); } tmp = data->d[id] > tmp_lambda ? tmp_lambda : data->d[id] ; posi = log_poisson_g_prop(data->d[id], tmp_lambda, param->eta[data->i2p[id]]); tmp_neg = GSL_MAX(exp(param->lambda_false[id]), 0.1); negi = log_poisson_g_prop(data->d[id], tmp_neg, param->eta0[data->i2p[id]]); pos += posi; neg += negi; maxl = posi > negi ? posi : negi; posi -= maxl; negi -= maxl; prob = pt * exp(posi) / (pt * exp(posi) + (1.0-pt) * exp(negi)); posprob += prob; negprob += (1.0 - prob); param->iZ[id] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; if(data->d[id] == 0.0) param->iZ[id] = 0.0; } /* Z */ if(data->n_u2a[i] == 1) { id = data->u2a[i][0]; param->Z[i] = param->iZ[id]; } else { maxl = pos > neg ? pos : neg; pos -= maxl; neg -= maxl; prob = pt * exp(pos) / (pt * exp(pos) + (1.0-pt) * exp(neg)); posprob /= ((float) data->n_u2a[i]); negprob /= ((float) data->n_u2a[i]); param->Z[i] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; } } } } float logit(float x) { return log(x) - log(1-x); } float inverseLogit(float x) { return exp(x) / (1.0 + exp(x)); } void sampleProportion(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int accept; float mhratio; param->ptrue_tmp = inverseLogit(logit(param->ptrue) + gsl_ran_gaussian(r, 0.1)); mhratio = LRprop(param, prior, data); /* uniform prior, so no prior ratio, indep. symetric random walk, so no proposal ratio */ accept = gsl_ran_flat(r,0.0,1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0; if(accept) param->ptrue = param->ptrue_tmp; } void contaminant(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j,id; float freq; for(i=0;inprey;i++) { freq = 0.0; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(param->Z[data->a2u[id]]) freq += 1.0; } freq /= ((float) data->nIP); if(freq > param->freq) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; param->Z[data->a2u[id]] = 0; param->iZ[id] = 0; } } } } /**************************************/ /*** Metropolis-Hastings with Gibbs ***/ /**************************************/ void mhgibbs(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary, const gsl_rng *r, int updateSum) { if(gsl_ran_flat(r,0.0,1.0) <= 0.2) sampleBeta0(param, prior, data, r); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) sampleBetac(param, prior, data, r); DP_alpha_prey(param, prior, data, r); DP_alpha_IP(param, prior, data, r); DP_mu(param, prior, data, r); if(param->modelvar) { if(gsl_ran_flat(r,0.0,1.0) <= 0.2) DP_eta(param, prior, data, r); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) DP_eta0(param, prior, data, r); } sampleZ(param, prior, data, r); contaminant(param, prior, data, r); compute_lambda_all(param, prior, data); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) sampleProportion(param, prior, data, r); } void write_mcmc(PARAM *param, PRIOR *prior, DATA *data, FILE *fp1, FILE *fp2, FILE *fp3, int ct) { int i; fprintf(fp1, "%d\t", ct+1); for(i=0;inprey-1;i++) { fprintf(fp1, "%.3f\t", param->alpha_prey[i]); } fprintf(fp1, "%.3f\n", param->alpha_prey[data->nprey-1]); fprintf(fp2, "%d\t", ct+1); for(i=0;inIP-1;i++) { fprintf(fp2, "%.3f\t", param->alpha_IP[i]); } fprintf(fp2, "%.3f\n", param->alpha_IP[data->nIP-1]); fprintf(fp3, "%d\t", ct+1); for(i=0;inprey-1;i++) { fprintf(fp3, "%.3f\t", param->mu[i]); } fprintf(fp3, "%.3f\n", param->mu[data->nprey-1]); } SAINT_v2.3.4/src/SAINTspc-noctrl/setparam.c0000666000000000000000000001113511746171455017016 0ustar rootroot#include "saint.h" /**************************************************************/ /* initializing the model parameters */ /**************************************************************/ void memory_param(PARAM *param, PRIOR *prior, DATA *data) { assert(param->loglik_prey = (float *) calloc(data->nprey, sizeof(float))); assert(param->loglik_IP = (float *) calloc(data->nIP, sizeof(float))); assert(param->alpha_prey = (float *) calloc(data->nprey, sizeof(float))); assert(param->alpha_IP = (float *) calloc(data->nIP, sizeof(float))); assert(param->mu = (float *) calloc(data->nprey, sizeof(float))); assert(param->eta = (float *) calloc(data->nprey, sizeof(float))); assert(param->eta0 = (float *) calloc(data->nprey, sizeof(float))); assert(param->iZ = (int *) calloc(data->ninter, sizeof(int))); assert(param->Z = (int *) calloc(data->nuinter, sizeof(int))); assert(param->lambda_true = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_false = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_true_tmp = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_false_tmp = (float *) calloc(data->ninter, sizeof(float))); } void set_Z(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { /* Z and iZ */ int i,j,id; int isCtrl, isReverse, isLarge; float prob, maxl; float posi, negi; float pos, neg, tmp; for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 1) { isCtrl = 1; break; } } isReverse = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(param->lambda_true[id] < param->lambda_false[id] || exp(param->lambda_false[id]) >= 5.0) { isReverse = 1; break; } } isLarge = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->d[id] >= 5.0) { isLarge = 1; break; } } if(isLarge && !isCtrl) { param->Z[i] = 1; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->d[id] > 0.0) param->iZ[id] = 1; } } else if(isCtrl || isReverse) { param->Z[i] = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; param->iZ[id] = 0; } } else { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; tmp = data->d[id]; posi = log_poisson_g_prop(tmp, exp(param->lambda_true[id]), param->eta[data->i2p[id]]); /* tmp = data->d[id] < exp(param->lambda_false[id]) ? exp(param->lambda_false[id]) : data->d[id]; */ negi = log_poisson_g_prop(tmp, exp(param->lambda_false[id]), param->eta0[data->i2p[id]]); pos += posi; neg += negi; maxl = posi > negi ? posi : negi; posi -= maxl; negi -= maxl; prob = param->ptrue * exp(posi) / (param->ptrue * exp(posi) + (1.0-param->ptrue) * exp(negi)); param->iZ[id] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; } /* Z */ if(data->n_u2a[i] == 1) { id = data->u2a[i][0]; param->Z[i] = param->iZ[id]; } else { maxl = pos > neg ? pos : neg; pos -= maxl; neg -= maxl; prob = param->ptrue * exp(pos) / (param->ptrue * exp(pos) + (1.0-param->ptrue) * exp(neg)); param->Z[i] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; } } } } void initialize_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; param->beta0 = 0.0; param->betac = 0.0; for(i=0;inprey;i++) param->alpha_prey[i] = prior->theta_alpha_prey[prior->w_alpha_prey[i]]; for(i=0;inIP;i++) { param->alpha_IP[i] = 0.0; /* if(data->ctrl[i] == 0) param->alpha_IP[i] = prior->theta_alpha_IP[prior->w_alpha_IP[i]]; else param->alpha_IP[i] = 0.0; */ } for(i=0;inprey;i++) param->mu[i] = prior->theta_mu[prior->w_mu[i]]; if(param->modelvar) { for(i=0;inprey;i++) param->eta[i] = prior->theta_eta[prior->w_eta[i]]; for(i=0;inprey;i++) param->eta0[i] = prior->theta_eta0[prior->w_eta0[i]]; } else { for(i=0;inprey;i++) param->eta[i] = 1.0; for(i=0;inprey;i++) param->eta0[i] = 1.0; } compute_lambda_all(param, prior, data); set_Z(param, prior, data, r); /* param->loglikTotal = loglik_all(param, prior, data); */ param->ptrue = 0.1; param->ptrue_tmp = 0.1; } void set_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { memory_param(param, prior, data); initialize_param(param, prior, data, r); } SAINT_v2.3.4/src/SAINTspc-noctrl/dpeta.c0000666000000000000000000000774611746171455016314 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_eta_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_eta[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_eta) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_eta[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_eta[i] = gammap[i]; for(j=0;jgamma_eta[i] *= (1.0 - gammap[j]); } } void DP_eta_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_eta, tmp_lambda, maxl; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_eta[i]); cur_eta = param->eta[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(param->Z[data->a2u[id]]) tmp_lambda = param->lambda_true[id]; else tmp_lambda = param->lambda_false[id]; if(data->d[id] > 0.0) prob[i] += log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta[i]); } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_eta[pid] = ranMultinom(r, prob, _MAX_COMP_); param->eta[pid] = prior->theta_eta[prior->w_eta[pid]]; } float log_exponential(float x, float mean) { float res = -log(mean) - x / mean; return res; } void DP_eta_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale, tmp_lambda, tmp; scale = prior->gamma_eta[pid] / (1.0 - prior->gamma_eta[pid]); if(inuse[pid] == 0) { newval = gsl_ran_exponential(r, prior->mean_eta) + 1.0; Delta = newval - prior->theta_eta[pid]; prior->theta_eta[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); if(prior->theta_eta[pid] + Delta <= 1.0 || prior->theta_eta[pid] + Delta > 100.0) { accept = 0; } else { for(i=0;inprey;i++) { if(prior->w_eta[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(param->Z[data->a2u[id]]) tmp_lambda = param->lambda_true[id]; else tmp_lambda = param->lambda_false[id]; /* if(param->Z[data->a2u[id]]) */ tmp = data->d[id] < exp(param->lambda_false[id]) && param->lambda_false[id] < param->lambda_true[id] ? exp(param->lambda_false[id]) : data->d[id]; mhratio += log_poisson_g_prop(tmp, exp(tmp_lambda), prior->theta_eta[pid]+Delta) - log_poisson_g_prop(tmp, exp(tmp_lambda), prior->theta_eta[pid]); } } } mhratio += log(gsl_ran_exponential_pdf(prior->theta_eta[pid]+Delta-1.0, prior->mean_eta)) - log(gsl_ran_exponential_pdf(prior->theta_eta[pid]-1.0, prior->mean_eta)); mhratio += -2.0 * (log(prior->theta_eta[pid]+ Delta) - log(prior->theta_eta[pid])); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; } /* if accepted, update param and lambda */ if(accept) { prior->theta_eta[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_eta[i] == pid) { param->eta[i] += Delta; } } } } } void DP_eta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_eta[i]] = 1; DP_eta_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_eta_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_eta_theta(param, prior, data, r, i, inuse); /* loglik update */ } SAINT_v2.3.4/src/SAINTspc-noctrl/setsummary.c0000666000000000000000000001710611746171455017417 0ustar rootroot#include "saint.h" /**************************************************************/ /* initializing the model summaryeters */ /**************************************************************/ void memory_summary(SUMMARY *summary, DATA *data) { assert(summary->iZ = (float *) calloc(data->ninter, sizeof(float))); assert(summary->Z = (float *) calloc(data->nuinter, sizeof(float))); assert(summary->alpha_prey = (float *) calloc(data->nprey, sizeof(float))); assert(summary->alpha_IP = (float *) calloc(data->nIP, sizeof(float))); assert(summary->mu = (float *) calloc(data->nprey, sizeof(float))); assert(summary->eta = (float *) calloc(data->nprey, sizeof(float))); assert(summary->eta0 = (float *) calloc(data->nprey, sizeof(float))); assert(summary->lambda_true = (float *) calloc(data->ninter, sizeof(float))); assert(summary->lambda_false = (float *) calloc(data->ninter, sizeof(float))); } void initialize_summary(SUMMARY *summary, DATA *data) { int i; for(i=0;ininter;i++) summary->iZ[i] = 0.0; for(i=0;inuinter;i++) summary->Z[i] = 0.0; for(i=0;inprey;i++) summary->alpha_prey[i] = 0.0; for(i=0;inIP;i++) summary->alpha_IP[i] = 0.0; for(i=0;inprey;i++) summary->mu[i] = 0.0; for(i=0;inprey;i++) summary->eta[i] = 0.0; for(i=0;inprey;i++) summary->eta0[i] = 0.0; for(i=0;ininter;i++) summary->lambda_true[i] = 0.0; for(i=0;ininter;i++) summary->lambda_false[i] = 0.0; } void initialize_histogram(HISTOGRAM *hist) { int i; float binsize = ((float) (_HISTO_END_ - _HISTO_START_)) / ((float) _HISTO_BIN_); for(i=0;i<_HISTO_BIN_;i++) { hist->start[i] = _HISTO_START_ + ((float) i) * binsize; hist->end[i] = _HISTO_START_ + ((float) (i+1)) * binsize; } for(i=0;i<(_HISTO_BIN_+2);i++) hist->count[i] = 0.0; } void initialize_histogram2(HISTOGRAM2 *hist) { int i; float binsize = ((float) (_HISTO_END2_ - _HISTO_START2_)) / ((float) _HISTO_BIN2_); for(i=0;i<_HISTO_BIN2_;i++) { hist->start[i] = _HISTO_START2_ + ((float) i) * binsize; hist->end[i] = _HISTO_START2_ + ((float) (i+1)) * binsize; } for(i=0;i<(_HISTO_BIN2_+2);i++) hist->count[i] = 0.0; } void set_summary(SUMMARY *summary, DATA *data) { memory_summary(summary, data); initialize_summary(summary, data); initialize_histogram(&(summary->hist_alpha_prey)); initialize_histogram(&(summary->hist_alpha_IP)); initialize_histogram(&(summary->hist_mu)); initialize_histogram2(&(summary->hist_eta)); initialize_histogram2(&(summary->hist_eta0)); } void updateSummary(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary) { int i; for(i=0;ininter;i++) summary->iZ[i] += ((float) param->iZ[i]); for(i=0;inuinter;i++) summary->Z[i] += ((float) param->Z[i]); for(i=0;inprey;i++) summary->alpha_prey[i] += param->alpha_prey[i]; for(i=0;inIP;i++) summary->alpha_IP[i] += param->alpha_IP[i]; for(i=0;inprey;i++) summary->mu[i] += param->mu[i]; for(i=0;inprey;i++) summary->eta[i] += param->eta[i]; for(i=0;inprey;i++) summary->eta0[i] += param->eta0[i]; for(i=0;ininter;i++) summary->lambda_true[i] += param->lambda_true[i]; for(i=0;ininter;i++) summary->lambda_false[i] += param->lambda_false[i]; updateHistogram(param, prior, data, summary); } void scaleSummary(SUMMARY *summary, DATA *data, int iter) { int i,j,id; float sum; float scale = 1.0 / ((float) iter); for(i=0;ininter;i++) summary->iZ[i] *= scale; for(i=0;inuinter;i++) summary->Z[i] *= scale; for(i=0;inprey;i++) { sum = 0.0; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; sum += summary->Z[data->a2u[id]]; } sum /= ((float) data->nIP); if(sum >= summary->freq) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; summary->Z[data->a2u[id]] = 0.0; summary->iZ[id] = 0.0; } } } for(i=0;inprey;i++) summary->alpha_prey[i] *= scale; for(i=0;inIP;i++) summary->alpha_IP[i] *= scale; for(i=0;inprey;i++) summary->mu[i] *= scale; for(i=0;inprey;i++) summary->eta[i] *= scale; for(i=0;inprey;i++) summary->eta0[i] *= scale; for(i=0;ininter;i++) summary->lambda_true[i] *= scale; for(i=0;ininter;i++) summary->lambda_false[i] *= scale; } /*************************************/ /** Histogram updates **/ /*************************************/ void updateHist_alpha_prey(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_alpha_prey[i] < hist->start[0]) { hist->count[0] += prior->gamma_alpha_prey[i]; } else if(prior->theta_alpha_prey[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_alpha_prey[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_alpha_prey[i] >= hist->start[j] && prior->theta_alpha_prey[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_alpha_prey[i]; break; } } } } } void updateHist_alpha_IP(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_alpha_IP[i] < hist->start[0]) { hist->count[0] += prior->gamma_alpha_IP[i]; } else if(prior->theta_alpha_IP[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_alpha_IP[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_alpha_IP[i] >= hist->start[j] && prior->theta_alpha_IP[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_alpha_IP[i]; break; } } } } } void updateHist_mu(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_mu[i] < hist->start[0]) { hist->count[0] += prior->gamma_mu[i]; } else if(prior->theta_mu[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_mu[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_mu[i] >= hist->start[j] && prior->theta_mu[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_mu[i]; break; } } } } } void updateHist_eta(HISTOGRAM2 *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_eta[i] < hist->start[0]) { hist->count[0] += prior->gamma_eta[i]; } else if(prior->theta_eta[i] >= hist->end[_HISTO_BIN2_-1]) { hist->count[_HISTO_BIN2_ + 1] += prior->gamma_eta[i]; } else { for(j=0;j<_HISTO_BIN2_;j++) { if(prior->theta_eta[i] >= hist->start[j] && prior->theta_eta[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_eta[i]; break; } } } } } void updateHist_eta0(HISTOGRAM2 *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_eta0[i] < hist->start[0]) { hist->count[0] += prior->gamma_eta0[i]; } else if(prior->theta_eta0[i] >= hist->end[_HISTO_BIN2_-1]) { hist->count[_HISTO_BIN2_ + 1] += prior->gamma_eta0[i]; } else { for(j=0;j<_HISTO_BIN2_;j++) { if(prior->theta_eta0[i] >= hist->start[j] && prior->theta_eta0[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_eta0[i]; break; } } } } } void updateHistogram(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary) { updateHist_alpha_prey(&(summary->hist_alpha_prey), prior); updateHist_alpha_IP(&(summary->hist_alpha_IP), prior); updateHist_mu(&(summary->hist_mu), prior); updateHist_eta(&(summary->hist_eta), prior); updateHist_eta0(&(summary->hist_eta0), prior); } SAINT_v2.3.4/src/SAINTspc-noctrl/saint.c0000666000000000000000000001520211746171455016317 0ustar rootroot#include "saint.h" int nrow(FILE *fp) { char buf[10000]; int n = 0; while(fgets(buf, sizeof(buf), fp) != NULL) n++; return n; } int newlinechar(char *buf, int k) { int i; int found = 0; for(i=0;igamma_alpha_IP[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_alpha_IP[i]); fprintf(stderr, "\n"); */ fprintf(stderr, "\nDP_alpha_prey\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_alpha_prey[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_alpha_prey[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_mu\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_mu[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_mu[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_eta\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_eta[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_eta[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_eta0\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_eta0[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_eta0[i]); fprintf(stderr, "\n"); fprintf(stderr, "\n"); } int commandLine(int argc, char **argv) { if (argc < 7) { fprintf(stderr, "\nusage: saint-spc-noctrl [interactionFile] [preyFile] [baitFile] [nburnin] [niter] [fthres] [fgroup] [var] [normalize] \n\n-burnin and iter: burn-in period and main iteration of MCMC\n-fthres: frequency threshold above which probability is set to 0\n-fgroup: frequency boundary dividing high and low frequency groups\n-var: binary [0/1] indicator for modeling variance of the count data distributions\n-normalize: whether to normalize the counts by total spectral counts\n\n"); return 1; } /* interaction file: IPnumber \t bait \t prey \t spectralCount \n */ /* prey file: prey \t sequenceLength \n */ /* bait file: bait \t IPnumber \t isControl \n */ FILE *fpinter = fopen(argv[1], "r"); FILE *fpprey = fopen(argv[2], "r"); FILE *fpbait = fopen(argv[3], "r"); if(fpinter == NULL) { fprintf(stderr, "Cannot locate interaction data %s.\n", argv[1]); return 1; } if(fpprey == NULL) { fprintf(stderr, "Cannot locate prey data %s.\n", argv[2]); return 1; } if(fpbait == NULL) { fprintf(stderr, "Cannot locate bait data %s.\n", argv[3]); return 1; } if(argc < 5) { fprintf(stderr, "The number of burnin was not provided. Set to 2,000.\n"); burn = 2000; } else { burn = atoi(argv[4]); } if(argc < 6) { fprintf(stderr, "The number of main interations was not provided. Set to 10,000.\n"); iter = 10000; } else { iter = atoi(argv[5]); } if(argc < 7) { fprintf(stderr, "The frequency threshold was not provided. Set to 0.1.\n"); freq = 0.1; } else { freq = atof(argv[6]); } if(argc < 8) { fprintf(stderr, "The frequency group boundary was not provided. Set to 0.01.\n"); freqgroup = 0.01; } else { freqgroup = atof(argv[7]); } if(argc < 9) { fprintf(stderr, "The indicator for variance modelling was not provided. Set to 0 (Yes).\n"); modelvar = 0; } else { modelvar = atoi(argv[8]); } if(argc < 9) { fprintf(stderr, "The indicator for normalization was not provided. Set to 0 (Yes).\n"); NORMALIZE = 0; } else { NORMALIZE = atoi(argv[9]); } fclose(fpinter); fclose(fpprey); fclose(fpbait); return 0; } /***************************** MAIN ***************************/ int main(int argc, char **argv) { int i,ct; DATA data; PARAM param; PRIOR prior; SUMMARY summary; const gsl_rng_type *T; gsl_rng *r; gsl_rng_env_setup(); T = gsl_rng_default; r = gsl_rng_alloc(T); /* Command Line */ if(commandLine(argc, argv)) return 1; FILE *fpinter = fopen(argv[1], "r"); FILE *fpprey = fopen(argv[2], "r"); FILE *fpbait = fopen(argv[3], "r"); /* Read interaction data, identify baits, preys, and IPs, make unique interaction data frame, identify the mapping between different levels of data */ system("mkdir LOG"); /* error logs */ system("mkdir MAPPING"); /* mapping logs */ system("mkdir MCMC"); /* posterior samples */ system("mkdir RESULT"); /* posterior probabilities, other summaries */ fprintf(stderr, "Reading data and mapping interactions\n"); read_data(fpinter, fpprey, fpbait, &data, &freq, &freqgroup); printMap(&data); /* Set up model parameters and prior elicitation */ set_prior(¶m, &prior, &data, r); set_param(¶m, &prior, &data, r); set_summary(&summary, &data); param.freq = freq; param.freqgroup = freqgroup; param.modelvar = modelvar; summary.freq = freq; /* updates and summary */ chdir("MCMC"); FILE *fp1 = fopen("alpha_prey","w"); FILE *fp2 = fopen("alpha_IP","w"); FILE *fp3 = fopen("mu","w"); /* burnin */ ct = 0; fprintf(stderr, "Burn-in Period\n"); for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]] == 0) fprintf(fp, "%s\t%s\t%s\t%d\t%.3f\t%.3f\n", data->ip[i], data->bait[i], data->prey[i], (int) data->d2[i], summary->Z[data->a2u[i]], summary->iZ[i]); } fclose(fp); } void write_unique_interactions(DATA *data, SUMMARY *summary) { int i,j; int isCtrl; int id; int countsum; float maxp, avgp, geop, tmp; FILE *fp = fopen("unique_interactions", "w"); fprintf(fp, "Bait\tPrey\tPreyGene\tIP\tSpec\tSpecSum\tNumRep\tProb\tiProb\tAvgP\tMaxP\n"); for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]]) isCtrl = 1; } if(isCtrl == 0) { fprintf(fp, "%s\t%s\t%s\t", data->ubait[i], data->uprey[i], data->PREYGENE[data->ui2p[i]]); for(j=0;j<(data->n_u2a[i]-1);j++) fprintf(fp, "%s|", data->ip[data->u2a[i][j]]); fprintf(fp, "%s\t", data->ip[data->u2a[i][data->n_u2a[i]-1]]); countsum = 0; for(j=0;j<(data->n_u2a[i]-1);j++) { countsum += ((int) data->d2[data->u2a[i][j]]); fprintf(fp, "%d|", (int) data->d2[data->u2a[i][j]]); } countsum += ((int) data->d2[data->u2a[i][data->n_u2a[i]-1]]); fprintf(fp, "%d\t", (int) data->d2[data->u2a[i][data->n_u2a[i]-1]]); fprintf(fp, "%d\t%d\t", countsum, data->n_u2a[i]); fprintf(fp, "%.2f\t", summary->Z[i]); for(j=0;j<(data->n_u2a[i]-1);j++) fprintf(fp, "%.2f|", summary->iZ[data->u2a[i][j]]); fprintf(fp, "%.2f\t", summary->iZ[data->u2a[i][data->n_u2a[i]-1]]); /* maxp */ maxp = 0.0; avgp = 0.0; geop = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(summary->iZ[id] > maxp) maxp = summary->iZ[id]; avgp += summary->iZ[id] / ((float) data->n_u2a[i]); tmp = data->d[id] == 0.0 ? 0.001 : summary->iZ[id]; geop += log(tmp) / ((float) data->n_u2a[i]); } geop = exp(geop); fprintf(fp, "%.4f\t%.4f\n", avgp, maxp); /* fprintf(fp, "%.2f\n", ((float) data->preyNinter[data->ui2p[i]]) / ((float) data->nIP)); */ } } fclose(fp); } void write_prey(DATA *data, SUMMARY *summary) { int i; FILE *fp = fopen("preys", "w"); fprintf(fp, "Prey\tAlpha_prey\tMu\n"); for(i=0;inprey;i++) { fprintf(fp, "%s\t%.2f\t%.2f\n", data->PREY[i], summary->alpha_prey[i], summary->mu[i]); } fclose(fp); } void write_IP(DATA *data, SUMMARY *summary) { int i; FILE *fp = fopen("IPs", "w"); fprintf(fp, "IP\tBait\tAlpha_IP\n"); for(i=0;inIP;i++) { fprintf(fp, "%s\t%s\t%.2f\n", data->IP[i], data->BAIT[data->IP2b[i]], summary->alpha_IP[i]); } fclose(fp); } void write_bait(DATA *data, SUMMARY *summary) { int i,j; FILE *fp = fopen("baits", "w"); fprintf(fp, "Bait\tIP\tAlpha_IP\n"); for(i=0;inbait;i++) { fprintf(fp, "%s\t", data->BAIT[i]); for(j=0;jbaitNIP[i]-1;j++) { fprintf(fp, "%s|", data->IP[data->b2IP[i][j]]); } fprintf(fp, "%s\t", data->IP[data->b2IP[i][data->baitNIP[i]-1]]); for(j=0;jbaitNIP[i]-1;j++) { fprintf(fp, "%.2f|", summary->alpha_IP[data->b2IP[i][j]]); } fprintf(fp, "%.2f\n", summary->alpha_IP[data->b2IP[i][data->baitNIP[i]-1]]); } fclose(fp); } void write_histogram(FILE *fp, HISTOGRAM *hist) { int i; fprintf(fp, "-inf\t%.2f\t%.2f\n", hist->start[0], hist->count[0]); for(i=0;i<_HISTO_BIN_;i++) { fprintf(fp, "%.2f\t%.2f\t%.2f\n", hist->start[i], hist->end[i], hist->count[i+1]); } fprintf(fp, "%.2f\tinf\t%.2f\n", hist->end[_HISTO_BIN_-1], hist->count[_HISTO_BIN_+1]); } void write_histogram2(FILE *fp, HISTOGRAM2 *hist) { int i; fprintf(fp, "-inf\t%.2f\t%.2f\n", hist->start[0], hist->count[0]); for(i=0;i<_HISTO_BIN2_;i++) { fprintf(fp, "%.2f\t%.2f\t%.2f\n", hist->start[i], hist->end[i], hist->count[i+1]); } fprintf(fp, "%.2f\tinf\t%.2f\n", hist->end[_HISTO_BIN2_-1], hist->count[_HISTO_BIN2_+1]); } void write_hyperprior(DATA *data, SUMMARY *summary) { FILE *fp1 = fopen("hist_alpha_prey", "w"); FILE *fp2 = fopen("hist_alpha_IP", "w"); FILE *fp3 = fopen("hist_mu", "w"); FILE *fp4 = fopen("hist_eta", "w"); FILE *fp5 = fopen("hist_eta0", "w"); write_histogram(fp1, &(summary->hist_alpha_prey)); write_histogram(fp2, &(summary->hist_alpha_IP)); write_histogram(fp3, &(summary->hist_mu)); write_histogram2(fp4, &(summary->hist_eta)); write_histogram2(fp5, &(summary->hist_eta0)); fclose(fp1); fclose(fp2); fclose(fp3); fclose(fp4); fclose(fp5); } void write_result(DATA *data, SUMMARY *summary) { chdir("RESULT"); write_interactions(data, summary); write_unique_interactions(data, summary); /* write_prey(data, summary); write_IP(data, summary); write_bait(data, summary); write_hyperprior(data, summary); */ write_matrix_data(data, summary); /* write_matrix_data2(data, summary); */ chdir(".."); } /******************************/ void write_matrix_data(DATA *data, SUMMARY *summary) { int i,j,k,id; int endLine, isMatch; FILE *fp = fopen("matrix_form","w"); endLine = -1; for(j=0;jnIP;j++) { if(data->ctrl[j] != 1) endLine = j; } /* header line 1 */ fprintf(fp, "Bait\t"); for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->BAIT[id]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 2 */ fprintf(fp, "IP\t"); for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 3 */ /* fprintf(fp, "\t"); for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%.2f", summary->alpha_IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } */ for(i=0;inprey;i++) { fprintf(fp, "%s\t", data->PREY[i]); /* Rest of the IPs */ for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { /* fprintf(fp, "\t"); */ } else { fprintf(fp, "%d|%.2f|%.2f|%.2f", (int) data->d2[isMatch], summary->Z[data->a2u[isMatch]], exp(summary->lambda_true[isMatch]), exp(summary->lambda_false[isMatch])); } if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } } fclose(fp); } void write_matrix_data2(DATA *data, SUMMARY *summary) { int i,j,k,id; int endLine, isMatch; FILE *fp = fopen("matrix_form_short","w"); endLine = -1; for(j=0;jnIP;j++) { if(data->ctrl[j] != 1) endLine = j; } /* header line 1 */ fprintf(fp, "\t\t\t\tBait\t"); for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j]) { fprintf(fp, "%s\t", data->BAIT[id]); } } for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->BAIT[id]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 2 */ fprintf(fp, "\t\t\t\tIP\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "%s\t", data->IP[j]); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 3 */ fprintf(fp, "Prey\tmean_s\tvar_s\tmean_ns\tvar_ns\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "\t"); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%.2f", summary->alpha_IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } for(i=0;inprey;i++) { fprintf(fp, "%s\t%.2f\t%.2f\t%.2f\t%.2f\t", data->PREY[i], summary->alpha_prey[i], summary->eta[i], summary->mu[i], summary->eta0[i]); /* Control runs first */ for(j=0;jnIP;j++) { if(data->ctrl[j]) { /* find if prey wise data has this IP */ /* if not, leave the space blank */ /* else, biz as usual: (count | prob | lambda_s, lambda_ns) */ isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { fprintf(fp, "\t"); } else { fprintf(fp, "%d\t", (int) data->d2[isMatch]); } } } /* Rest of the IPs */ for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { /* fprintf(fp, "\t"); */ } else { fprintf(fp, "(%d|%.2f|%.2f)", (int) data->d2[isMatch], summary->Z[data->a2u[isMatch]], summary->iZ[isMatch]); } if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } } fclose(fp); } SAINT_v2.3.4/src/SAINTspc-noctrl/printmap.c0000666000000000000000000000544711746171455017045 0ustar rootroot#include "saint.h" void printInter(DATA *data) { int i; FILE *fp = fopen("interaction","w"); fprintf(fp, "ip\tbait\tprey\tIP\tBAIT\tPREY\tubait\tuprey\n"); for(i=0;ininter;i++) { fprintf(fp, "%s\t%s\t%s\t", data->ip[i], data->bait[i], data->prey[i]); fprintf(fp, "%s\t%s\t%s\t", data->IP[data->i2IP[i]], data->BAIT[data->i2b[i]], data->PREY[data->i2p[i]]); fprintf(fp, "%s\t%s\n", data->ubait[data->a2u[i]], data->uprey[data->a2u[i]]); } fclose(fp); } void printUInter(DATA *data) { int i,j,k; FILE *fp = fopen("unique_interaction","w"); fprintf(fp, "ubait\tuprey\tubait\tuprey\tip\tbait\tprey\n"); for(i=0;inuinter;i++) { for(j=0;jn_u2a[i];j++) { k = data->u2a[i][j]; fprintf(fp, "%s\t%s\t", data->ubait[i], data->uprey[i]); fprintf(fp, "%s\t%s\t%s\n", data->ip[k], data->bait[k], data->prey[k]); } } fprintf(fp, "\n\n************************\n\n"); fprintf(fp, "ubait\tuprey\tBAIT\tPREY\n"); for(i=0;inuinter;i++) { fprintf(fp, "%s\t%s\t%s\t%s\n", data->ubait[i], data->uprey[i], data->BAIT[data->ui2b[i]], data->PREY[data->ui2p[i]]); } fclose(fp); } void printIP(DATA *data) { int i,j,k; /* IP to bait */ FILE *fp = fopen("IP","w"); fprintf(fp, "IP\tBAIT\n"); for(i=0;inIP;i++) { fprintf(fp, "%s\t%s\n", data->IP[i], data->BAIT[data->IP2b[i]]); } fprintf(fp, "\n\n************************\n\n"); /* IP to interactions */ fprintf(fp, "IP\tip\tbait\tprey\n"); for(i=0;inIP;i++) { for(j=0;jIPNinter[i];j++) { k = data->IP2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->IP[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printBait(DATA *data) { int i,j,k; FILE *fp = fopen("bait","w"); /* bait to IP */ fprintf(fp, "BAIT\tIP\n"); for(i=0;inbait;i++) { for(j=0;jbaitNIP[i];j++) { k = data->b2IP[i][j]; fprintf(fp, "%s\t%s\n", data->BAIT[i], data->IP[k]); } } fprintf(fp, "\n\n************************\n\n"); /* bait to interaction */ fprintf(fp, "BAIT\tip\tbait\tprey\n"); for(i=0;inbait;i++) { for(j=0;jbaitNinter[i];j++) { k = data->b2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->BAIT[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printPrey(DATA *data) { int i,j,k; FILE *fp = fopen("prey","w"); /* prey to interaction */ for(i=0;inprey;i++) { for(j=0;jpreyNinter[i];j++) { k = data->p2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->PREY[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printMap(DATA *data) { chdir("MAPPING"); printInter(data); printUInter(data); printIP(data); printBait(data); printPrey(data); chdir(".."); } SAINT_v2.3.4/src/SAINTspc-noctrl/dpalphaIP.c0000666000000000000000000001043511746171455017046 0ustar rootroot#include "saint.h" /********* ALPHA_IP *********/ void DP_alpha_IP_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inIP;i++) { if(data->ctrl[i] == 0) (wsum[prior->w_alpha_IP[i]])++; } for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_alpha_IP) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_alpha_IP[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_alpha_IP[i] = gammap[i]; for(j=0;jgamma_alpha_IP[i] *= (1.0 - gammap[j]); } } void DP_alpha_IP_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_alpha_IP, tmp_lambda, maxl; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_alpha_IP[i]); cur_alpha_IP = param->alpha_IP[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jIPNinter[pid];j++) { id = data->IP2i[pid][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { tmp_lambda = param->lambda_true[id] + prior->theta_alpha_IP[i] - cur_alpha_IP; prob[i] += log_poisson_g_prop(data->d[id], exp(tmp_lambda), param->eta[data->i2p[id]]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_alpha_IP[pid] = ranMultinom(r, prob, _MAX_COMP_); param->alpha_IP[pid] = prior->theta_alpha_IP[prior->w_alpha_IP[pid]]; for(j=0;jIPNinter[pid];j++) { id = data->IP2i[pid][j]; param->lambda_true[id] += param->alpha_IP[pid] - cur_alpha_IP; } } void DP_alpha_IP_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale; scale = prior->gamma_alpha_IP[pid] / (1.0 - prior->gamma_alpha_IP[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_alpha_IP)) + prior->m_alpha_IP; Delta = newval - prior->theta_alpha_IP[pid]; prior->theta_alpha_IP[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 0.25); for(i=0;inIP;i++) { if(prior->w_alpha_IP[i] == pid) { for(j=0;jIPNinter[i];j++) { id = data->IP2i[i][j]; if(param->Z[data->a2u[id]] && data->d[id] > 0.0) { param->lambda_true_tmp[id] = param->lambda_true[id] + Delta; mhratio += log_poisson_g_prop(data->d[id], exp(param->lambda_true_tmp[id]), param->eta[data->i2p[id]]) - log_poisson_g_prop(data->d[id], exp(param->lambda_true[id]), param->eta[data->i2p[id]]); } } } } mhratio += log_gaussian(prior->theta_alpha_IP[pid] + Delta, prior->m_alpha_IP, prior->v_alpha_IP) - log_gaussian(prior->theta_alpha_IP[pid], prior->m_alpha_IP, prior->v_alpha_IP); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_alpha_IP[pid] += Delta; for(i=0;inIP;i++) { if(prior->w_alpha_IP[i] == pid && data->ctrl[i] == 0) { param->alpha_IP[i] += Delta; for(j=0;jIPNinter[i];j++) { id = data->IP2i[i][j]; param->lambda_true[id] += Delta; } } } } } } void DP_alpha_IP(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inIP;i++) inuse[prior->w_alpha_IP[i]] = 1; DP_alpha_IP_gamma(param, prior, data, r); for(i=0;inIP;i++) { if(data->ctrl[i] == 0) DP_alpha_IP_w(param, prior, data, r, i); } for(i=0;i<_MAX_COMP_;i++) DP_alpha_IP_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_IP[i] * prior->theta_alpha_IP[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] -= mean; for(i=0;inIP;i++) param->alpha_IP[i] -= mean; param->beta0 += mean; } SAINT_v2.3.4/src/SAINTspc-noctrl/meancounts.c0000666000000000000000000000210111746171455017347 0ustar rootroot#include "saint.h" /**************************************************************/ /* computing expected counts in log scale (s/ns) */ /**************************************************************/ /*************************/ /* all interactions */ /*************************/ void compute_lambda_true_all(PARAM *param, PRIOR *prior, DATA *data) { int i; for(i=0;ininter;i++) { param->lambda_true[i] = data->l[i] + param->beta0 + param->alpha_prey[data->i2p[i]] + param->alpha_IP[data->i2IP[i]]; if(NORMALIZE) param->lambda_true[i] += data->c[i]; } } void compute_lambda_false_all(PARAM *param, PRIOR *prior, DATA *data) { int i; for(i=0;ininter;i++) { param->lambda_false[i] = data->l[i] + param->betac + param->mu[data->i2p[i]]; if(NORMALIZE) param->lambda_false[i] += data->c[i]; // param->lambda_false[i] = param->betac + param->mu[data->i2p[i]]; } } void compute_lambda_all(PARAM *param, PRIOR *prior, DATA *data) { compute_lambda_true_all(param, prior, data); compute_lambda_false_all(param, prior, data); } SAINT_v2.3.4/src/SAINTspc-noctrl/setprior.c0000666000000000000000000000577111746171455017062 0ustar rootroot#include "saint.h" void memory_prior(PARAM *param, PRIOR *prior, DATA *data) { assert(prior->w_alpha_prey = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_alpha_IP = (int *) calloc(data->nIP, sizeof(int))); assert(prior->w_mu = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_eta = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_eta0 = (int *) calloc(data->nprey, sizeof(int))); } void initialize_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; double MAXC = ((double) _MAX_COMP_); prior->m_beta = 0.0; prior->v_beta = 1000.0; prior->atrue = 0.1; prior->afalse = 1.0 - prior->atrue; prior->rho_alpha_prey = data->nprey / 10.0; prior->m_alpha_prey = 0.0; prior->v_alpha_prey = 100.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_alpha_prey[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] = gsl_ran_gaussian(r, 2.0); for(i=0;inprey;i++) prior->w_alpha_prey[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_prey[i] * prior->theta_alpha_prey[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] -= mean; prior->rho_alpha_IP = data->nIP / 10.0; prior->m_alpha_IP = 0.0; prior->v_alpha_IP = 10.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_alpha_IP[i] = 1.0 / MAXC; /* for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] = gsl_ran_gaussian(r, 2.0); */ for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] = 0.0; for(i=0;inIP;i++) prior->w_alpha_IP[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_IP[i] * prior->theta_alpha_IP[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] -= mean; prior->rho_mu = data->nprey / 10.0; prior->m_mu = -1.0; prior->v_mu = 10.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_mu[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] = gsl_ran_gaussian(r, 2.0); for(i=0;inprey;i++) prior->w_mu[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_mu[i] * prior->theta_mu[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] -= mean; prior->rho_eta = data->nprey / 10.0; prior->mean_eta = 0.01; for(i=0;i<_MAX_COMP_;i++) prior->gamma_eta[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_eta[i] = (param->modelvar ? gsl_ran_exponential(r, prior->mean_eta) : 0.0) + 1.0; for(i=0;inprey;i++) prior->w_eta[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); prior->rho_eta0 = data->nprey / 10.0; prior->mean_eta0 = 0.01; for(i=0;i<_MAX_COMP_;i++) prior->gamma_eta0[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_eta0[i] = (param->modelvar ? gsl_ran_exponential(r, prior->mean_eta0) : 0.0) + 1.0; for(i=0;inprey;i++) prior->w_eta0[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); } void set_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { memory_prior(param, prior, data); initialize_prior(param, prior, data, r); } SAINT_v2.3.4/src/SAINTspc-noctrl/dpalphaprey.c0000666000000000000000000001046211746171455017515 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_alpha_prey_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_alpha_prey[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_alpha_prey) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_alpha_prey[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_alpha_prey[i] = gammap[i]; for(j=0;jgamma_alpha_prey[i] *= (1.0 - gammap[j]); } } void DP_alpha_prey_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_alpha_prey, tmp_lambda, maxl, tmp; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_alpha_prey[i]); cur_alpha_prey = param->alpha_prey[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(param->Z[data->a2u[id]]) { tmp_lambda = param->lambda_true[id] + prior->theta_alpha_prey[i] - cur_alpha_prey; tmp = GSL_MIN(50.0, data->d[id]); if(tmp > 0.0) prob[i] += log_poisson_g_prop(tmp, exp(tmp_lambda), param->eta[pid]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_alpha_prey[pid] = ranMultinom(r, prob, _MAX_COMP_); param->alpha_prey[pid] = prior->theta_alpha_prey[prior->w_alpha_prey[pid]]; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; param->lambda_true[id] += param->alpha_prey[pid] - cur_alpha_prey; } } void DP_alpha_prey_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale; scale = prior->gamma_alpha_prey[pid] / (1.0 - prior->gamma_alpha_prey[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_alpha_prey)) + prior->m_alpha_prey; Delta = newval - prior->theta_alpha_prey[pid]; prior->theta_alpha_prey[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); for(i=0;inprey;i++) { if(prior->w_alpha_prey[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(param->Z[data->a2u[id]]) { param->lambda_true_tmp[id] = param->lambda_true[id] + Delta; mhratio += log_poisson_g_prop(GSL_MIN(50.0, data->d[id]), exp(param->lambda_true_tmp[id]), param->eta[i]) - log_poisson_g_prop(GSL_MIN(50.0, data->d[id]), exp(param->lambda_true[id]), param->eta[i]); } } } } mhratio += log_gaussian(prior->theta_alpha_prey[pid] + Delta, prior->m_alpha_prey, prior->v_alpha_prey) - log_gaussian(prior->theta_alpha_prey[pid], prior->m_alpha_prey, prior->v_alpha_prey); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_alpha_prey[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_alpha_prey[i] == pid) { param->alpha_prey[i] += Delta; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; param->lambda_true[id] += Delta; } } } } } } void DP_alpha_prey(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) { inuse[prior->w_alpha_prey[i]] = 1; } DP_alpha_prey_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_alpha_prey_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_alpha_prey_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_prey[i] * prior->theta_alpha_prey[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] -= mean; for(i=0;inprey;i++) param->alpha_prey[i] -= mean; param->beta0 += mean; } SAINT_v2.3.4/src/SAINTspc-noctrl/saint.h0000666000000000000000000003065511746171455016335 0ustar rootroot/* Copyright (C) <2011> For troubleshooting, contact hyung_won_choi@nuhs.edu.sg. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You can obtain a copy of the GNU General Public License from . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _MAX_BUF_ 2000 #define _MAX_NAME_ 2000 #define _MAX_COUNT_ 250 #define _MAX_COMP_ 15 #define _SKIP_ 10 #define _PRINT_FREQ_ 100 #define _HISTO_START_ -5.0 #define _HISTO_END_ 5.0 #define _HISTO_BIN_ 100 #define _HISTO_START2_ 0.5 #define _HISTO_END2_ 20.5 #define _HISTO_BIN2_ 200 #define _TRUNC_ 1000.0 #define _FR_ 0.01 typedef struct tagDATA { /*************/ /* logistics */ /*************/ int ninter; int nuinter; int nprey; int nIP; int nbait; /**************************/ /* interaction level data */ /**************************/ char **prey; char **bait; char **ip; /* raw data, each row corresponds to one interaction, case-sensitive */ float *d; float *d2; float *iprob; float *l; float *c; int *isCtrl; /*********************************/ /* unique interaction level data */ /*********************************/ char **uprey; char **ubait; float *prob; int *isAnyCtrl; int *n_u2a; /* number of individual interactions per unique interactions */ int **u2a; /* unique interactions to individual interactions */ int *a2u; /* individual interactions to unique interactions */ /* crucial indicator for probability calculation */ /***********************************/ /* unique bait and prey level data */ /***********************************/ float *IPtotalAbundance; float *IPbaitCoverage; float *preyLength; char *preyOverride; char **PREY; /* unique preys */ char **PREYGENE; /* unique preys */ char **BAIT; /* unique baits */ char **IP; /* unique IP #s */ int nctrl; int ntest; int *ctrl; /* index: control IPs or not: 'C' = control, 'T' = test */ float *ctrlavg; int *preyNinter; /* # interaction for prey */ int *preyFlag; /* if preyNinter is larger than freq */ int *baitNinter; /* # interaction for bait */ int *IPNinter; /* # interaction in an IP */ int *baitNIP; /* # IPs per bait */ /****************/ /* mapping data */ /****************/ int *i2p; /* index: interaction to prey */ int *i2b; /* index: interaction to bait */ int *i2IP; /* index: interaction to IP */ int **p2i; /* index: prey to interaction */ int **b2i; /* index: bait to interaction */ int **IP2i; /* index: IP to interaction */ int *ui2p; /* index: unique interaction to prey */ int *ui2b; /* index: unique interaction to bait */ /* no need to build reverse mapping for unique interactions */ /* perhaps this mapping is unnecessary */ int **b2IP; /* index: bait to IP */ int *IP2b; /* index: IP to bait */ } DATA; typedef struct tagPARAM{ float freq; float freqgroup; float modelvar; float loglikTotal; float *loglik_prey; float *loglik_IP; float beta0; float betac; float *alpha_prey; float *alpha_IP; float *mu; float *eta; float *eta0; int *iZ; /* individual interactions */ int *Z; /* unique interactions */ float ptrue; float ptrue_tmp; float *lambda_true; float *lambda_false; float *lambda_true_tmp; float *lambda_false_tmp; } PARAM; typedef struct tagPRIOR{ /* parametric portion */ float m_beta; /* set to zero */ float v_beta; float atrue, afalse; /* nonparametric portion */ float rho_alpha_prey; float m_alpha_prey; float v_alpha_prey; int *w_alpha_prey; float gamma_alpha_prey[_MAX_COMP_]; float theta_alpha_prey[_MAX_COMP_]; float rho_alpha_IP; float m_alpha_IP; float v_alpha_IP; int *w_alpha_IP; float gamma_alpha_IP[_MAX_COMP_]; float theta_alpha_IP[_MAX_COMP_]; float rho_mu; float m_mu; float v_mu; int *w_mu; float gamma_mu[_MAX_COMP_]; float theta_mu[_MAX_COMP_]; float rho_eta; float mean_eta; int *w_eta; float gamma_eta[_MAX_COMP_]; float theta_eta[_MAX_COMP_]; float rho_eta0; float mean_eta0; int *w_eta0; float gamma_eta0[_MAX_COMP_]; float theta_eta0[_MAX_COMP_]; } PRIOR; typedef struct tagHISTOGRAM{ float start[_HISTO_BIN_]; float end[_HISTO_BIN_]; float count[_HISTO_BIN_ + 2]; } HISTOGRAM; typedef struct tagHISTOGRAM2{ float start[_HISTO_BIN2_]; float end[_HISTO_BIN2_]; float count[_HISTO_BIN2_ + 2]; } HISTOGRAM2; typedef struct tagSUMMARY{ float freq; float *iZ; float *Z; float *alpha_prey; float *alpha_IP; float *mu; float *eta; float *eta0; float *lambda_true; float *lambda_false; HISTOGRAM hist_alpha_prey; HISTOGRAM hist_alpha_IP; HISTOGRAM hist_mu; HISTOGRAM2 hist_eta; HISTOGRAM2 hist_eta0; } SUMMARY; /* GLOBAL VARIABLES */ int burn; int iter; float freq; float freqgroup; int modelvar; int NORMALIZE; /*************/ /* functions */ /*************/ int nrow(FILE *fp); int newlinechar(char *buf, int k); int ncol(FILE *fp); int commandLine(int argc, char **argv); void print_DP(PRIOR *prior, DATA *data); /* initdata.c */ void read_interaction_data(FILE *fpinter, DATA *data); void find_unique_interaction(DATA *data); int unique_elements(char **x, int *unique, int nx); int count_unique_elements(char **x, int nx); void centerData(float *x, int n, int takelog); int mapPreyToData(DATA *data); void read_prey_data(FILE *fpprey, DATA *data); void mapIPtoBait(DATA *data); int mapIPBaitToData(DATA *data); void getIPinfo(DATA *data); void read_bait_data(FILE *fpbait, DATA *data); void set_ctrlavg(DATA *data); void read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data, float *freq, float *freqgroup); void prey_flag(DATA *data, float *freq, float *freqgroup); /* meancounts.c */ void compute_lambda_true_all(PARAM *param, PRIOR *prior, DATA *data); void compute_lambda_false_all(PARAM *param, PRIOR *prior, DATA *data); void compute_lambda_all(PARAM *param, PRIOR *prior, DATA *data); /* void compute_lambda_true_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_false_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_true_IP(PARAM *param, PRIOR *prior, DATA *data, int ipid); void compute_lambda_IP(PARAM *param, PRIOR *prior, DATA *data, int ipid); */ /* likelihood.c */ float log_poisson_prop(float N, float lambda); float log_poisson_g_prop(float N, float lambda, float theta); float LRprop(PARAM *param, PRIOR *prior, DATA *data); float loglik_all(PARAM *param, PRIOR *prior, DATA *data); float loglik_all_class(PARAM *param, PRIOR *prior, DATA *data, int cl); float loglik_all_class_tmp(PARAM *param, PRIOR *prior, DATA *data, int cl); /* mcmc.c */ float log_gaussian(float x, float mu, float var); void sampleBeta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void sampleBetac(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void sampleZ(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void contaminant(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); float logit(float x); float inverseLogit(float x); void sampleProportion(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void mhgibbs(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary, const gsl_rng *r, int updateSum); void write_mcmc(PARAM *param, PRIOR *prior, DATA *data, FILE *fp1, FILE *fp2, FILE *fp3, int ct); /* printmap.c */ void printInter(DATA *data); void printUInter(DATA *data); void printIP(DATA *data); void printBait(DATA *data); void printPrey(DATA *data); void printMap(DATA *data); /* setprior.c */ void memory_prior(PARAM *param, PRIOR *prior, DATA *data); void initialize_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* setparam.c */ void memory_param(PARAM *param, PRIOR *prior, DATA *data); void initialize_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_Z(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* setsumamry.c */ void memory_summary(SUMMARY *summary, DATA *data); void initialize_summary(SUMMARY *summary, DATA *data); void initialize_histogram(HISTOGRAM *hist); void initialize_histogram2(HISTOGRAM2 *hist); void set_summary(SUMMARY *summary, DATA *data); void updateSummary(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary); void scaleSummary(SUMMARY *summary, DATA *data, int iter); void updateHist_alpha_prey(HISTOGRAM *hist, PRIOR *prior); void updateHist_alpha_IP(HISTOGRAM *hist, PRIOR *prior); void updateHist_mu(HISTOGRAM *hist, PRIOR *prior); void updateHist_eta(HISTOGRAM2 *hist, PRIOR *prior); void updateHist_eta0(HISTOGRAM2 *hist, PRIOR *prior); void updateHistogram(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary); /* dpalphaprey.c */ void DP_alpha_prey_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_alpha_prey_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_alpha_prey_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_alpha_prey(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpalphaIP.c */ void DP_alpha_IP_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_alpha_IP_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_alpha_IP_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_alpha_IP(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_mu_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_mu_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_mu_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_mu(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_eta_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_eta_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_eta_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_eta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_eta0_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_eta0_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_eta0_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_eta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* result.c */ void write_interactions(DATA *data, SUMMARY *summary); void write_unique_interactions(DATA *data, SUMMARY *summary); void write_prey(DATA *data, SUMMARY *summary); void write_IP(DATA *data, SUMMARY *summary); void write_bait(DATA *data, SUMMARY *summary); void write_histogram(FILE *fp, HISTOGRAM *hist); void write_histogram2(FILE *fp, HISTOGRAM2 *hist); void write_hyperprior(DATA *data, SUMMARY *summary); void write_result(DATA *data, SUMMARY *summary); void write_matrix_data(DATA *data, SUMMARY *summary); void write_matrix_data2(DATA *data, SUMMARY *summary); /**************** mmath.c ******************************/ float vec_sum(const float *vec, int len); float vec_max(const float *vec, int len); float vec_min(const float *vec, int len); float vec_mean(const float *vec, int len); float vec_var(const float *vec, int len); float vec_med(const float *vec, int len); float vec_mad(const float *vec, int len); float geometric_mean(float *x, int n); int ranMultinom(const gsl_rng *r, float *p, int K); SAINT_v2.3.4/src/SAINTspc-noctrl/dpeta0.c0000666000000000000000000001161011746171455016355 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_eta0_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_eta0[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_eta0) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_eta0[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_eta0[i] = gammap[i]; for(j=0;jgamma_eta0[i] *= (1.0 - gammap[j]); } } void DP_eta0_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_eta, tmp_lambda, maxl, false_avg, tmp; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_eta0[i]); false_avg = 0.0; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; false_avg += param->lambda_false[id]; } false_avg /= ((float) data->preyNinter[pid]); cur_eta = param->eta0[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]] || param->Z[data->a2u[id]] == 0) { tmp_lambda = param->lambda_false[id]; prob[i] += log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta0[i]); } } if( data->preyNinter[pid] < ((int) ((param->freqgroup) * ((float) data->nIP))) ) { tmp = ((float) data->nIP) - ((float) data->preyNinter[pid]); tmp_lambda = false_avg; tmp = tmp * log_poisson_g_prop(0.0, exp(tmp_lambda), prior->theta_eta0[i]); prob[i] += tmp; } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_eta0[pid] = ranMultinom(r, prob, _MAX_COMP_); param->eta0[pid] = prior->theta_eta0[prior->w_eta0[pid]]; } void DP_eta0_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale, tmp_lambda, false_avg,tmp; scale = prior->gamma_eta0[pid] / (1.0 - prior->gamma_eta0[pid]); if(inuse[pid] == 0) { newval = gsl_ran_exponential(r, prior->mean_eta) + 1.0; Delta = newval - prior->theta_eta0[pid]; prior->theta_eta0[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); if(prior->theta_eta0[pid] + Delta <= 1.0 || prior->theta_eta0[pid] + Delta > 100.0) { accept = 0; } else { for(i=0;inprey;i++) { if(data->preyFlag[i] == 0) { false_avg = 0.0; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; false_avg += param->lambda_false[id]; } false_avg /= ((float) data->preyNinter[i]); if(prior->w_eta0[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(data->ctrl[data->i2IP[id]] || param->Z[data->a2u[id]] == 0) { tmp_lambda = param->lambda_false[id]; mhratio += log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta0[pid]+Delta) - log_poisson_g_prop(data->d[id], exp(tmp_lambda), prior->theta_eta0[pid]); } } if( data->preyNinter[i] < ((int) ((param->freqgroup) * ((float) data->nIP))) ) { tmp = ((float) data->nIP) - ((float) data->preyNinter[i]); tmp_lambda = false_avg; tmp = tmp * ( log_poisson_g_prop(0.0, exp(tmp_lambda), param->eta0[i]+Delta) - log_poisson_g_prop(0.0, exp(false_avg), param->eta0[i]) ); mhratio += tmp; } } } } mhratio += log(gsl_ran_exponential_pdf(prior->theta_eta0[pid]+Delta-1.0, prior->mean_eta0)) - log(gsl_ran_exponential_pdf(prior->theta_eta0[pid]-1.0, prior->mean_eta0)); mhratio += -2.0 * (log(prior->theta_eta0[pid]+ Delta) - log(prior->theta_eta0[pid])); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; } /* if accepted, update param and lambda */ if(accept) { prior->theta_eta0[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_eta0[i] == pid) { param->eta0[i] += Delta; } } } } } void DP_eta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_eta0[i]] = 1; DP_eta0_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_eta0_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_eta0_theta(param, prior, data, r, i, inuse); /* loglik update */ } SAINT_v2.3.4/src/SAINTint-ctrl/0000777000000000000000000000000012145226162014473 5ustar rootrootSAINT_v2.3.4/src/SAINTint-ctrl/mmath.c0000666000000000000000000000411711746171473015762 0ustar rootroot#include "saint.h" float vec_sum(const float *vec, int len) { int i; float res; res=vec[0]; for(i=1;ivec[i]) res=vec[i]; } return res; } float vec_mean(const float *vec, int len) { float tmp=0.0; int i; for(i=0;i sum) { rr++; sum += p[rr]; } if(rr >= K) rr = K-1; return rr; } float geometric_mean(float *x, int n) { int i; float res = 0.0; for(i=0;inprey;i++) (wsum[prior->w_mu[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_mu) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_mu[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_mu[i] = gammap[i]; for(j=0;jgamma_mu[i] *= (1.0 - gammap[j]); } } void DP_mu_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_mu, tmp_lambda, maxl, tmp; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_mu[i]); cur_mu = param->mu[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]]) { tmp_lambda = param->lambda_false[id] + prior->theta_mu[i] - cur_mu; tmp = data->d[id]; prob[i] += log_gaussian(tmp, (tmp_lambda), param->eta0[data->i2p[id]]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_mu[pid] = ranMultinom(r, prob, _MAX_COMP_); param->mu[pid] = prior->theta_mu[prior->w_mu[pid]]; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; param->lambda_false[id] += param->mu[pid] - cur_mu; } } void DP_mu_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale, tmp; scale = prior->gamma_mu[pid] / (1.0 - prior->gamma_mu[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_mu)) + prior->m_mu; Delta = newval - prior->theta_mu[pid]; prior->theta_mu[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 0.25); for(i=0;inprey;i++) { if(prior->w_mu[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(data->ctrl[data->i2IP[id]]) { tmp = data->d[id]; param->lambda_false_tmp[id] = param->lambda_false[id] + Delta; mhratio += log_gaussian(tmp, (param->lambda_false_tmp[id]), param->eta0[i]) - log_gaussian(tmp, (param->lambda_false[id]), param->eta0[i]); } } } } mhratio += log_gaussian(prior->theta_mu[pid] + Delta, prior->m_mu, prior->v_mu) - log_gaussian(prior->theta_mu[pid], prior->m_mu, prior->v_mu); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_mu[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_mu[i] == pid) { param->mu[i] += Delta; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; param->lambda_false[id] += Delta; } } } } } } void DP_mu(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_mu[i]] = 1; DP_mu_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_mu_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_mu_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_mu[i] * prior->theta_mu[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] -= mean; for(i=0;inprey;i++) param->mu[i] -= mean; param->betac += mean; } SAINT_v2.3.4/src/SAINTint-ctrl/compile0000777000000000000000000000015211746171473016061 0ustar rootroot#! /bin/sh gcc -Wall -c *.c *.h gcc *.o -lgsl -lgslcblas -lm -o ../saint-int-ctrl rm -rf *.o rm -rf *.gch SAINT_v2.3.4/src/SAINTint-ctrl/likelihood.c0000666000000000000000000000530111746171473016773 0ustar rootroot#include "saint.h" /***************************************************/ /* computing likelihoods in log scale */ /***************************************************/ /*************************/ /* all interactions */ /*************************/ float LRprop(PARAM *param, PRIOR *prior, DATA *data) { int i,j,id; float pos, neg, maxl; float lik_new, lik_old; lik_new = 0.0; lik_old = 0.0; for(i=0;inuinter;i++) { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 0) { pos += log_gaussian(data->d[id], (param->lambda_true[id]), param->eta[data->i2p[id]]); neg += log_gaussian(data->d[id], (param->lambda_false[id]), param->eta0[data->i2p[id]]); } } maxl = pos > neg ? pos : neg; pos = exp(pos - maxl); neg = exp(neg - maxl); lik_new += log(param->ptrue_tmp * pos + (1.0-param->ptrue_tmp) * neg); lik_old += log(param->ptrue * pos + (1.0-param->ptrue) * neg); } return lik_new - lik_old; } float loglik_all(PARAM *param, PRIOR *prior, DATA *data) { int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(param->Z[data->a2u[i]] && data->miss[i] == 0) { lambda = (param->lambda_true[i]); lik += log_gaussian(data->d[i], lambda, param->eta[data->i2p[i]]); } else if (param->Z[data->a2u[i]] == 0) { lambda = (param->lambda_false[i]); lik += log_gaussian(data->d[i], lambda, param->eta0[data->i2p[i]]); } else {} } return lik; } float loglik_all_class(PARAM *param, PRIOR *prior, DATA *data, int cl) { /* loglik by class */ int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(cl) { if(param->Z[data->a2u[i]] && data->miss[i] == 0) { lambda = (param->lambda_true[i]); lik += log_gaussian(data->d[i], lambda, param->eta[data->i2p[i]]); } } else { if(param->Z[data->a2u[i]] == 0) { lambda = (param->lambda_false[i]); lik += log_gaussian(data->d[i], lambda, param->eta0[data->i2p[i]]); } } } return lik; } float loglik_all_class_tmp(PARAM *param, PRIOR *prior, DATA *data, int cl) { /* loglik by class */ int i; float lambda; float lik = 0.0; for(i=0;ininter;i++) { if(cl) { if(param->Z[data->a2u[i]] && data->miss[i] == 0) { lambda = (param->lambda_true_tmp[i]); lik += log_gaussian(data->d[i], lambda, param->eta[data->i2p[i]]); } } else { if(param->Z[data->a2u[i]] == 0) { lambda = (param->lambda_false_tmp[i]); lik += log_gaussian(data->d[i], lambda, param->eta0[data->i2p[i]]); } } } return lik; } SAINT_v2.3.4/src/SAINTint-ctrl/initdata.c0000666000000000000000000004713611746171473016461 0ustar rootroot#include "saint.h" /*************************/ /* read interaction data */ /*************************/ void read_interaction_data(FILE *fpinter, DATA *data) { int i,ct; float tmp; float tmp_mean, tmp_var; char buf[1000]; data->ninter = nrow(fpinter); rewind(fpinter); assert(data->prey = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->prey[i] = (char *) calloc(500, sizeof(char))); assert(data->bait = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->bait[i] = (char *) calloc(500, sizeof(char))); assert(data->ip = (char **) calloc(data->ninter, sizeof(char *))); for(i=0;ininter;i++) assert(data->ip[i] = (char *) calloc(500, sizeof(char))); assert(data->d = (float *) calloc(data->ninter, sizeof(float))); assert(data->miss = (int *) calloc(data->ninter, sizeof(int))); assert(data->iprob = (float *) calloc(data->ninter, sizeof(float))); assert(data->a2u = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) { fscanf(fpinter, "%s", buf); strcpy(data->ip[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->bait[i], buf); fscanf(fpinter, "%s", buf); strcpy(data->prey[i], buf); fscanf(fpinter, "%s", buf); tmp = atof(buf); if(tmp == 0.0) { data->miss[i] = 1; data->d[i] = GSL_NEGINF; } else { data->miss[i] = 0; data->d[i] = log(tmp); } } /* centering and normalizing */ tmp_mean = 0.0; ct = 0; for(i=0;ininter;i++) { if(data->miss[i] == 0) { tmp_mean += data->d[i]; ct++; } } tmp_mean /= ((float) ct); for(i=0;ininter;i++) { if(data->miss[i] == 0) data->d[i] -= tmp_mean; } tmp_var = 0.0; for(i=0;ininter;i++) { if(data->miss[i] == 0) { tmp_var += pow(data->d[i],2.0); } } tmp_var /= ((float) ct); for(i=0;ininter;i++) { if(data->miss[i] == 0) data->d[i] /= sqrt(tmp_var); } } /**********************************************************************************************************/ /*********************************************************************************/ /* make unique interaction data and identify mapping between unique and all data */ /*********************************************************************************/ void find_unique_interaction(DATA *data) { int i,j,cur; int baitCompare, preyCompare; int isUnique[data->ninter]; int nInstance[data->ninter]; /* this counts at the level of unique interactions */ int counter[data->ninter]; /* same as above, used for mapping unique->individual */ for(i=0;ininter;i++) { isUnique[i] = 1; nInstance[i] = 0; counter[i] = 0; } /* scan 1~n to mark unique interactions and count instances of each */ cur = 0; for(i=0;i<(data->ninter-1);i++) { if(isUnique[i]) { (nInstance[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j]) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { isUnique[j] = 0; (nInstance[cur])++; } } } cur++; } } if(isUnique[data->ninter-1]) { (nInstance[cur])++; cur++; } /* count # unique interactions */ data->nuinter = 0; for(i=0;ininter;i++) { if(isUnique[i]) (data->nuinter)++; } /* memory business for unique interactions */ assert(data->uprey = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->uprey[i] = (char *) calloc(500, sizeof(char))); assert(data->ubait = (char **) calloc(data->nuinter, sizeof(char *))); for(i=0;inuinter;i++) assert(data->ubait[i] = (char *) calloc(500, sizeof(char))); assert(data->prob = (float *) calloc(data->nuinter, sizeof(float))); /* copy unique interactions */ cur = 0; for(i=0;ininter;i++) { if(isUnique[i]) { strcpy(data->uprey[cur], data->prey[i]); strcpy(data->ubait[cur], data->bait[i]); data->prob[cur] = 0.0; cur++; } } if(data->nuinter > cur) fprintf(stderr, "Warning: possibly missed some unique interactions\n"); else if(data->nuinter < cur) fprintf(stderr, "Warning: too many unique interactions, check mapping\n"); else {} /* mapping between individual and unique interactions */ assert(data->n_u2a = (int *) calloc(data->nuinter, sizeof(int))); assert(data->u2a = (int **) calloc(data->nuinter, sizeof(int *))); for(i=0;inuinter;i++) data->n_u2a[i] = nInstance[i]; for(i=0;inuinter;i++) { assert(data->u2a[i] = (int *) calloc(data->n_u2a[i], sizeof(int))); } cur = 0; /* current index of unique */ for(i=0;ininter;i++) { if(isUnique[i]) { data->a2u[i] = cur; data->u2a[cur][counter[cur]] = i; (counter[cur])++; for(j=(i+1);jninter;j++) { if(isUnique[j] == 0) { baitCompare = strcmp(data->bait[i], data->bait[j]); preyCompare = strcmp(data->prey[i], data->prey[j]); if(baitCompare == 0 && preyCompare == 0) { data->a2u[j] = cur; data->u2a[cur][counter[cur]] = j; (counter[cur])++; } } } cur++; } } } /***********************************************************************************************************/ /*****************************************************/ /* make indicators of uniqueness in character arrays */ /* returns the number of unique elements */ /*****************************************************/ int unique_elements(char **x, int *unique, int nx) { int i,j; int nunique = nx; for(i=0;inprey;i++) data->preyNinter[i] = 0; assert(data->i2p = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2p[i] = -1; for(i=0;inprey;i++) { for(j=0;jninter;j++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { (data->preyNinter[i])++; data->i2p[j] = i; } } } assert(data->p2i = (int **) calloc(data->nprey, sizeof(int *))); for(i=0;inprey;i++) assert(data->p2i[i] = (int *) calloc(data->preyNinter[i], sizeof(int))); for(i=0;inprey;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->PREY[i], data->prey[j]) == 0) { data->p2i[i][cur] = j; cur++; } if(cur >= data->preyNinter[i]) break; } } assert(data->ui2p = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inprey;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->PREY[i], data->uprey[j]) == 0) { data->ui2p[j] = i; } } } /* report which prey in the prey file did not show up in the interaction file */ cur = 0; for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp1 = fopen("PreysNotInData", "w"); for(i=0;inprey;i++) { if(data->preyNinter[i] == 0) fprintf(fptemp1, "%s\n", data->PREY[i]); } fclose(fptemp1); chdir(".."); return 1; } /* report which prey in the interaction file did not show up in the prey file */ cur = 0; for(i=0;ininter;i++) { if(data->i2p[i] == -1) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp2 = fopen("PreysNotInList", "w"); for(i=0;ininter;i++) { if(data->i2p[i] == -1) fprintf(fptemp2, "%d\t%s\t%s\t%s\n", i+1, data->ip[i], data->bait[i], data->prey[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } /**************************************************************/ /* read prey data and check discrepancy with interaction data */ /**************************************************************/ void read_prey_data(FILE *fpprey, DATA *data) { int i, nprey; char buf[256]; nprey = nrow(fpprey); rewind(fpprey); data->nprey = nprey; assert(data->PREY = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREY[i] = (char *) calloc(500, sizeof(char))); assert(data->PREYGENE = (char **) calloc(nprey, sizeof(char *))); for(i=0;iPREYGENE[i] = (char *) calloc(500, sizeof(char))); assert(data->preyLength = (float *) calloc(nprey, sizeof(float))); assert(data->preyNinter = (int *) calloc(nprey, sizeof(int))); assert(data->ctrlavg = (float *) calloc(nprey, sizeof(float))); for(i=0;iPREY[i], buf); data->ctrlavg[i] = 0.0; fscanf(fpprey, "%s", buf); strcpy(data->PREYGENE[i], buf); } centerData(data->preyLength, nprey, 1); mapPreyToData(data); } /***********************************************************************************************************/ void mapIPtoBait(DATA *data) { int i,j; int nbait, nIP, cur; char temp[data->nIP][256]; int uniqueBaits[data->nIP]; nIP = data->nIP; nbait = unique_elements(data->BAIT, uniqueBaits, nIP); data->nbait = nbait; assert(data->baitNIP = (int *) calloc(nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNIP[i] = 0; cur = 0; for(i=0;inIP;i++) { if(uniqueBaits[i]) { strcpy(temp[cur], data->BAIT[i]); cur++; } } if(cur != data->nbait) fprintf(stderr, "check bait-IP file\n"); for(i=0;inbait;i++) { cur = 0; for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) { cur++; } } data->baitNIP[i] = cur; } assert(data->IP2b = (int *) calloc(data->nIP, sizeof(int))); assert(data->b2IP = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2IP[i] = (int *) calloc(data->baitNIP[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jnIP;j++) { if(strcmp(temp[i], data->BAIT[j]) == 0) { data->IP2b[j] = i; data->b2IP[i][cur] = j; cur++; } } data->baitNIP[i] = cur; } for(i=0;inbait;i++) strcpy(data->BAIT[i], temp[i]); } int mapIPBaitToData(DATA *data) { /* Part I: bait to data */ int i,j; int cur; assert(data->baitNinter = (int *) calloc(data->nbait, sizeof(int))); for(i=0;inbait;i++) data->baitNinter[i] = 0; for(i=0;inIP;i++) data->IPNinter[i] = 0; assert(data->i2b = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2b[i] = -1; assert(data->i2IP = (int *) calloc(data->ninter, sizeof(int))); for(i=0;ininter;i++) data->i2IP[i] = -1; for(i=0;inIP;i++) { for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { (data->IPNinter[i])++; data->i2IP[j] = i; } } } for(i=0;inbait;i++) { for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { (data->baitNinter[i])++; data->i2b[j] = i; } } } assert(data->IP2i = (int **) calloc(data->nIP, sizeof(int *))); for(i=0;inIP;i++) assert(data->IP2i[i] = (int *) calloc(data->IPNinter[i], sizeof(int))); for(i=0;inIP;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->IP[i], data->ip[j]) == 0) { data->IP2i[i][cur] = j; cur++; } if(cur >= data->IPNinter[i]) break; } } assert(data->b2i = (int **) calloc(data->nbait, sizeof(int *))); for(i=0;inbait;i++) assert(data->b2i[i] = (int *) calloc(data->baitNinter[i], sizeof(int))); for(i=0;inbait;i++) { cur = 0; for(j=0;jninter;j++) { if(strcmp(data->BAIT[i], data->bait[j]) == 0) { data->b2i[i][cur] = j; cur++; } if(cur >= data->baitNinter[i]) break; } } /* from unique interactions to bait/IP */ assert(data->ui2b = (int *) calloc(data->nuinter, sizeof(int))); for(i=0;inbait;i++) { for(j=0;jnuinter;j++) { if(strcmp(data->BAIT[i], data->ubait[j]) == 0) data->ui2b[j] = i; } } /* report which bait/IP in the bait file did not show up in the interaction file */ cur = 0; for(i=0;inbait;i++) { if(data->IPNinter[i] == 0) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp1 = fopen("IPNotInData", "w"); for(i=0;inIP;i++) { if(data->IPNinter[i] == 0) fprintf(fptemp1, "%s\t%s\n", data->IP[i], data->BAIT[i]); } fclose(fptemp1); chdir(".."); return 1; } /* report which baits/IPs in the interaction file did not show up in the bait/IP file */ cur = 0; for(i=0;ininter;i++) { if(data->i2IP[i] == -1) { cur = 1; break; } } if(cur) { chdir("LOG"); FILE *fptemp2 = fopen("IPNotInList", "w"); for(i=0;ininter;i++) { if(data->i2IP[i] == -1) fprintf(fptemp2, "%d\t%s\t%s\t%s\n", i+1, data->ip[i], data->bait[i], data->prey[i]); } fclose(fptemp2); chdir(".."); return 1; } return 0; } void getIPinfo(DATA *data) { int i,j; int IPmatch, BAITmatch, PREYmatch; char buf[256]; assert(data->IPbaitCoverage = (float *) calloc(data->nIP, sizeof(float))); assert(data->IPtotalAbundance = (float *) calloc(data->nIP, sizeof(float))); for(i=0;inIP;i++) { data->IPbaitCoverage[i] = 0.0; data->IPtotalAbundance[i] = 0.0; } for(i=0;inIP;i++) { strcpy(buf, data->BAIT[data->IP2b[i]]); for(j=0;jninter;j++) { IPmatch = strcmp(data->ip[j], data->IP[i]); BAITmatch = strcmp(data->bait[j], buf); PREYmatch = strcmp(data->prey[j], buf); if(IPmatch == 0) { data->IPtotalAbundance[i] += data->d[j]; if(BAITmatch == 0 && PREYmatch == 0) data->IPbaitCoverage[i] = data->d[j] / data->preyLength[data->i2p[j]]; } } /* if(data->IPbaitCoverage[i] == 0.0) { fprintf(stderr, "IP %s (bait %s) has no bait-bait interaction\n", data->IP[i], data->BAIT[data->IP2b[i]]); } */ } /* for(i=0;inIP;i++) { fprintf(stderr, "%d: %s %s %.2f %.2f\n", i+1, data->IP[i], data->BAIT[data->IP2b[i]], data->IPbaitCoverage[i], data->IPtotalAbundance[i]); } */ } /**************************************************************/ /* read bait data and check discrepancy with interaction data */ /**************************************************************/ void read_bait_data(FILE *fpbait, DATA *data) { int i, nbait, nIP; char buf[256]; nIP = nrow(fpbait); rewind(fpbait); data->nIP = nIP; data->nctrl = 0; data->ntest = 0; assert(data->BAIT = (char **) calloc(nIP, sizeof(char *))); for(i=0;iBAIT[i] = (char *) calloc(500, sizeof(char))); assert(data->IP = (char **) calloc(nIP, sizeof(char *))); for(i=0;iIP[i] = (char *) calloc(500, sizeof(char))); assert(data->ctrl = (int *) calloc(nIP, sizeof(int))); assert(data->IPNinter = (int *) calloc(nIP, sizeof(int))); for(i=0;iIP[i], buf); fscanf(fpbait, "%s", buf); strcpy(data->BAIT[i], buf); /* not unique at this point */ fscanf(fpbait, "%s", buf); if(buf[0] == 'C' || buf[0] == 'c') { data->ctrl[i] = 1; /* note that control is marked as 1, test is as 0 */ (data->nctrl)++; } else { data->ctrl[i] = 0; (data->ntest)++; } /* fprintf(stderr, "%s\t%s\t%d\n", data->IP[i], data->BAIT[i], data->ctrl[i]); */ } /* check whether IPs are unique or not */ mapIPtoBait(data); nbait = data->nbait; mapIPBaitToData(data); getIPinfo(data); /* bait coverage and total abundance */ centerData(data->IPbaitCoverage, nIP, 1); centerData(data->IPtotalAbundance, nIP, 1); /* these quantities are on log scale, mean centered now. */ } /***********************************************************************************************************/ void set_ctrlavg(DATA *data) { int i; for(i=0;inprey;i++) data->ctrlavg[i] = GSL_NEGINF; for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]]) { if(data->ctrlavg[data->i2p[i]] < data->d[i] && data->miss[i] == 0) data->ctrlavg[data->i2p[i]] = data->d[i]; } } /* for(i=0;inprey;i++) data->ctrlavg[i] /= ((float) data->nctrl); */ } void get_dmin(DATA *data) { int i,j,id,uid; float tmp; dmin = GSL_POSINF; for(i=0;ininter;i++) { if(dmin > data->d[i] && data->miss[i] == 0) dmin = data->d[i]; } assert(data->dmin_ctrl = (float *) calloc(data->nprey, sizeof(float))); assert(data->ctrl_obs = (int *) calloc(data->nprey, sizeof(int))); assert(data->dmin_inter = (float *) calloc(data->ninter, sizeof(float))); /* control */ data->dmin__ctrl = GSL_POSINF; for(i=0;inprey;i++) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; tmp = data->d[id]; if(data->dmin__ctrl > tmp && data->miss[id] == 0 && data->ctrl[data->i2IP[id]]) data->dmin__ctrl = tmp; } } // fprintf(stderr, "%.3f\n", data->dmin__ctrl); for(i=0;inprey;i++) { data->ctrl_obs[i] = 0; data->dmin_ctrl[i] = GSL_POSINF; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; tmp = data->d[id]; if(data->dmin_ctrl[i] > tmp && data->miss[id] == 0 && data->ctrl[data->i2IP[id]]) data->dmin_ctrl[i] = tmp; if(data->miss[id] == 0 && data->ctrl[data->i2IP[id]]) (data->ctrl_obs[i])++; } if(data->dmin_ctrl[i] == GSL_POSINF) data->dmin_ctrl[i] = data->dmin__ctrl; } /* interaction - non-control */ for(i=0;ininter;i++) { data->dmin_inter[i] = GSL_POSINF; uid = data->a2u[i]; if(data->ctrl[data->i2IP[i]] == 0) { for(j=0;jn_u2a[uid];j++) { id = data->u2a[uid][j]; tmp = data->d[id]; if(data->dmin_inter[i] > tmp && data->miss[id] == 0) data->dmin_inter[i] = tmp; } } if(data->dmin_inter[i] == GSL_POSINF) data->dmin_inter[i] = dmin; } } /**************************************************************/ /* master function for reading the data */ /**************************************************************/ void read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data) { read_interaction_data(fpinter, data); find_unique_interaction(data); read_prey_data(fpprey, data); read_bait_data(fpbait, data); get_dmin(data); /* make a function to filter out interactions with no matching preys and baits */ } /***********************************************************************************************************/ /* void initial_impute(PARAM *param, DATA *data, const gsl_rng *r) { int i; for(i=0;ininter;i++) { if(data->miss[i]) { data->d[i] = gsl_ran_gaussian(r, sqrt(param->missVar)) + param->missMean; } } } */ SAINT_v2.3.4/src/SAINTint-ctrl/mcmc.c0000666000000000000000000002256211746171473015577 0ustar rootroot#include "saint.h" float gaussian(float x, float mu, float var) { float res = exp( - .5 * pow(x-mu,2.0) / var ) / sqrt(2.0 * M_PI * var); return res; } float log_gaussian(float x, float mu, float var) { float res; if(x == GSL_NEGINF) { res = gsl_cdf_gaussian_P(dmin - log(2.0) - mu, sqrt(var)); res = log(GSL_MAX(res, _tiny_)); } else res = - .5 * pow(x-mu,2.0) / var - .5 * log(2.0 * M_PI * var); return res; } float log_inv_gamma(float x, float sh, float sc) { float res = 0.0; res = sh * log(sc) - gsl_sf_lngamma(sh) - (sh + 1.0) * log(x) - sc / x; return res; } void sampleBeta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,accept; float likratio, lr_new, lr_old; float diff = gsl_ran_gaussian(r,0.2); for(i=0;ininter;i++) param->lambda_true_tmp[i] = param->lambda_true[i] + diff; lr_new = loglik_all_class_tmp(param, prior, data, 1); lr_old = loglik_all_class(param, prior, data, 1); likratio = lr_new - lr_old; likratio += log_gaussian(param->beta0 + diff, prior->m_beta, prior->v_beta) - log_gaussian(param->beta0, prior->m_beta, prior->v_beta); likratio = GSL_MIN(1.0, exp(likratio)); accept = gsl_ran_flat(r,0.0,1.0) <= likratio ? 1 : 0; if(accept) { param->beta0 += diff; for(i=0;ininter;i++) param->lambda_true[i] = param->lambda_true_tmp[i]; param->loglikTotal += (lr_new - lr_old); } } void sampleBetac(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,accept; float likratio, lr_new, lr_old; float diff = gsl_ran_gaussian(r,0.2); for(i=0;ininter;i++) param->lambda_false_tmp[i] = param->lambda_false[i] + diff; lr_new = loglik_all_class_tmp(param, prior, data, 0); lr_old = loglik_all_class(param, prior, data, 0); likratio = lr_new - lr_old; likratio += log_gaussian(param->betac + diff, prior->m_beta, prior->v_beta) - log_gaussian(param->betac, prior->m_beta, prior->v_beta); likratio = GSL_MIN(1.0, exp(likratio)); accept = gsl_ran_flat(r,0.0,1.0) <= likratio ? 1 : 0; if(accept) { param->betac += diff; for(i=0;ininter;i++) param->lambda_false[i] = param->lambda_false_tmp[i]; param->loglikTotal += (lr_new - lr_old); } } void sampleZ(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { /* Z and iZ */ int i,j,id; int indiv, total; int isCtrl, isReverse; float prob, maxl; float posi, negi; float pos, neg, tmp, tmp_lambda, tmp_neg, scale; int cond1; for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 1) { isCtrl = 1; break; } } isReverse = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; cond1 = data->d[id] < GSL_MAX(param->lambda_false[id], data->ctrlavg[data->i2p[id]]) ? 1 : 0; if(cond1) { isReverse = 1; break; } } if(isCtrl || isReverse) { param->Z[i] = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; param->iZ[id] = 0; } } else { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; tmp = data->d[id]; tmp_lambda = (param->lambda_true[id]); scale = 1.0; if(tmp > tmp_lambda && tmp_lambda > param->lambda_false[id]) tmp = tmp_lambda; if(data->d[id] < data->ctrlavg[data->i2p[id]] + log(10.0) && data->ctrl_obs[data->i2p[id]] > 0 ) { tmp_lambda = data->ctrlavg[data->i2p[id]] + 2.0 * (data->ctrlavg[data->i2p[id]] + log(10.0) - data->d[id]); scale = 1.0; } posi = log_gaussian(tmp, tmp_lambda, param->eta[data->i2p[id]]); tmp_neg = (param->lambda_false[id]); tmp = GSL_MAX(data->d[id], tmp_neg); negi = log_gaussian(tmp, tmp_neg, param->eta0[data->i2p[id]] * scale); pos += posi; neg += negi; maxl = posi > negi ? posi : negi; posi -= maxl; negi -= maxl; prob = param->ptrue * exp(posi) / (param->ptrue * exp(posi) + (1.0-param->ptrue) * exp(negi)); param->iZ[id] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; if(data->miss[id]) param->iZ[id] = 0; } /* Z */ if(data->n_u2a[i] == 1) { id = data->u2a[i][0]; param->Z[i] = param->iZ[id]; } else { maxl = pos > neg ? pos : neg; pos -= maxl; neg -= maxl; prob = param->ptrue * exp(pos) / (param->ptrue * exp(pos) + (1.0-param->ptrue) * exp(neg)); param->Z[i] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; indiv = 0; total = data->n_u2a[i]; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(param->iZ[id]) indiv++; } pos = ((double) indiv) / ((double) total); param->Z[i] = pos; param->Z[i] = gsl_ran_bernoulli(r, pos); } } } } float logit(float x) { return log(x) - log(1-x); } float inverseLogit(float x) { return exp(x) / (1.0 + exp(x)); } void sampleProportion(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int accept; float mhratio; param->ptrue_tmp = inverseLogit(logit(param->ptrue) + gsl_ran_gaussian(r, 0.1)); mhratio = LRprop(param, prior, data); /* uniform prior, so no prior ratio, indep. symetric random walk, so no proposal ratio */ accept = gsl_ran_flat(r,0.0,1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0; if(accept) param->ptrue = param->ptrue_tmp; } void updateMiss(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,p,ctrl_status; float new_miss, mhratio, maxval, tmpeta, tmplambda; for(i=0;ininter;i++) { if(data->miss[i]) { ctrl_status = data->ctrl[data->i2IP[i]]; p = data->i2p[i]; maxval = data->dmin_ctrl[p]; new_miss = data->d[i] + gsl_ran_gaussian(r, 1.0); if(ctrl_status) { tmplambda = param->lambda_false[i]; tmpeta = param->eta0[p]; mhratio = 0.0; if(data->ctrl_obs[p] > 0) { // mhratio += log_gaussian(new_miss, tmplambda, tmpeta); // mhratio -= log_gaussian(data->d[i], tmplambda, tmpeta); } mhratio += log_gaussian(new_miss, param->missMean, param->missVar); mhratio -= log_gaussian(data->d[i], param->missMean, param->missVar); mhratio = GSL_MIN(1.0, exp(mhratio)); if(gsl_ran_flat(r,0.0,1.0) < mhratio) { data->d[i] = new_miss; } } } } } /**************************************/ /*** Metropolis-Hastings with Gibbs ***/ /**************************************/ void mhgibbs(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary, const gsl_rng *r, int updateSum) { // updateMiss(param, prior, data, r); // set_ctrlavg(data); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) sampleBeta0(param, prior, data, r); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) sampleBetac(param, prior, data, r); DP_alpha_prey(param, prior, data, r); // DP_alpha_IP(param, prior, data, r); DP_mu(param, prior, data, r); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) DP_eta(param, prior, data, r); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) DP_eta0(param, prior, data, r); sampleZ(param, prior, data, r); // fprintf(stderr, "%.3f\t(%.3f\t%.3f)\t(%.3f\t%.3f)\n", data->d[186], param->lambda_true[186], param->eta[data->i2p[186]], param->lambda_false[186], param->eta0[data->i2p[186]]); // fprintf(stderr, "%.3f\n", param->eta0[3]); compute_lambda_all(param, prior, data); if(gsl_ran_flat(r,0.0,1.0) <= 0.2) sampleProportion(param, prior, data, r); } void updateMissMean(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float new_miss, mhratio; new_miss = param->missMean + gsl_ran_gaussian(r, 0.1); mhratio = 0.0; for(i=0;ininter;i++) { if(data->miss[i]) { mhratio += log_gaussian(data->d[i], new_miss, param->missVar); mhratio -= log_gaussian(data->d[i], param->missMean, param->missVar); } } mhratio += log_gaussian(new_miss, 0.0, 10.0); mhratio -= log_gaussian(param->missMean, 0.0, 10.0); mhratio = GSL_MIN(1.0, exp(mhratio)); if(gsl_ran_flat(r,0.0,1.0) < mhratio) { param->missMean = new_miss; } } void updateMissVar(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float new_miss, mhratio; new_miss = param->missVar + gsl_ran_gaussian(r, 0.1); if(new_miss > 0.0) { mhratio = 0.0; for(i=0;ininter;i++) { if(data->miss[i]) { mhratio += log_gaussian(data->d[i], param->missMean, new_miss); mhratio -= log_gaussian(data->d[i], param->missMean, param->missVar); } } mhratio -= log(gsl_ran_gamma_pdf(new_miss, 1.0, 1.0)); mhratio += log(gsl_ran_gamma_pdf(param->missVar, 1.0, 1.0)); mhratio = GSL_MIN(1.0, exp(mhratio)); if(gsl_ran_flat(r,0.0,1.0) < mhratio) { param->missVar = new_miss; } } } void write_mcmc(PARAM *param, PRIOR *prior, DATA *data, FILE *fp1, FILE *fp2, FILE *fp3, int ct) { int i; fprintf(fp1, "%d\t", ct+1); for(i=0;inprey-1;i++) { fprintf(fp1, "%.3f\t", param->alpha_prey[i]); } fprintf(fp1, "%.3f\n", param->alpha_prey[data->nprey-1]); fprintf(fp2, "%d\t", ct+1); for(i=0;inIP-1;i++) { fprintf(fp2, "%.3f\t", param->alpha_IP[i]); } fprintf(fp2, "%.3f\n", param->alpha_IP[data->nIP-1]); fprintf(fp3, "%d\t", ct+1); for(i=0;inprey-1;i++) { fprintf(fp3, "%.3f\t", param->mu[i]); } fprintf(fp3, "%.3f\n", param->mu[data->nprey-1]); } SAINT_v2.3.4/src/SAINTint-ctrl/setparam.c0000666000000000000000000001272611746171473016475 0ustar rootroot#include "saint.h" /**************************************************************/ /* initializing the model parameters */ /**************************************************************/ void memory_param(PARAM *param, PRIOR *prior, DATA *data) { assert(param->loglik_prey = (float *) calloc(data->nprey, sizeof(float))); assert(param->loglik_IP = (float *) calloc(data->nIP, sizeof(float))); assert(param->alpha_prey = (float *) calloc(data->nprey, sizeof(float))); assert(param->alpha_IP = (float *) calloc(data->nIP, sizeof(float))); assert(param->mu = (float *) calloc(data->nprey, sizeof(float))); assert(param->eta = (float *) calloc(data->nprey, sizeof(float))); assert(param->eta0 = (float *) calloc(data->nprey, sizeof(float))); assert(param->iZ = (int *) calloc(data->ninter, sizeof(int))); assert(param->Z = (int *) calloc(data->nuinter, sizeof(int))); assert(param->lambda_true = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_false = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_true_tmp = (float *) calloc(data->ninter, sizeof(float))); assert(param->lambda_false_tmp = (float *) calloc(data->ninter, sizeof(float))); } void set_Z(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { /* Z and iZ */ int i,j,id; int indiv, total; int isCtrl, isReverse; float prob, maxl; float posi, negi; float pos, neg, tmp; for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]] == 1) { isCtrl = 1; break; } } isReverse = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->d[id] < param->lambda_false[id]) { isReverse = 0; break; } } if(isCtrl || isReverse) { param->Z[i] = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; param->iZ[id] = 0; } } else { pos = 0.0; neg = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; tmp = data->d[id]; posi = log_gaussian(tmp, param->lambda_true[id], param->eta[data->i2p[id]]); negi = log_gaussian(tmp, param->lambda_false[id], param->eta0[data->i2p[id]]); pos += posi; neg += negi; maxl = posi > negi ? posi : negi; posi -= maxl; negi -= maxl; prob = param->ptrue * exp(posi) / (param->ptrue * exp(posi) + (1.0-param->ptrue) * exp(negi)); param->iZ[id] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; } /* Z */ if(data->n_u2a[i] == 1) { id = data->u2a[i][0]; param->Z[i] = param->iZ[id]; } else { /* maxl = pos > neg ? pos : neg; pos -= maxl; neg -= maxl; prob = param->ptrue * exp(pos) / (param->ptrue * exp(pos) + (1.0-param->ptrue) * exp(neg)); param->Z[i] = gsl_ran_flat(r,0.0,1.0) <= prob ? 1 : 0; */ indiv = 0; total = data->n_u2a[i]; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(param->iZ[id]) indiv++; } pos = ((double) indiv) / ((double) total); param->Z[i] = gsl_ran_bernoulli(r, pos); } } } } float vec_obs_mean(PARAM *param, DATA *data) { int i,ct; float m = 0.0; ct = 0; for(i=0;ininter;i++) { if(data->miss[i] == 0) { m += data->d[i]; ct++; } } m /= ((float) ct); return m; } float vec_obs_var(PARAM *param, DATA *data, float m) { int i,ct; float v = 0.0; ct = 0; for(i=0;ininter;i++) { if(data->miss[i] == 0) { v += pow(data->d[i] - m, 2.0); ct++; } } v /= ((float) (ct-1)); return v; } void initialize_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; param->beta0 = 0.0; param->betac = 0.0; for(i=0;inprey;i++) param->alpha_prey[i] = prior->theta_alpha_prey[prior->w_alpha_prey[i]]; for(i=0;inIP;i++) { param->alpha_IP[i] = 0.0; /* if(data->ctrl[i] == 0) param->alpha_IP[i] = prior->theta_alpha_IP[prior->w_alpha_IP[i]]; else param->alpha_IP[i] = 0.0; */ } for(i=0;inprey;i++) param->mu[i] = prior->theta_mu[prior->w_mu[i]]; for(i=0;inprey;i++) param->eta[i] = prior->theta_eta[prior->w_eta[i]]; for(i=0;inprey;i++) param->eta0[i] = prior->theta_eta0[prior->w_eta0[i]]; compute_lambda_all(param, prior, data); set_Z(param, prior, data, r); /* param->loglikTotal = loglik_all(param, prior, data); */ param->ptrue = 0.2; param->ptrue_tmp = 0.2; param->missMean = 0.0; j = 0; for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]] && data->miss[i] == 0) { param->missMean += data->d[i]; j++; } } param->missMean /= ((double) j); param->missVar = 0.0; for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]] && data->miss[i] == 0) { param->missVar += pow(data->d[i] - param->missMean, 2.0); } } param->missVar /= ((double) (j-1)); data->dvar = param->missVar; param->missMean = data->dmin__ctrl - 0.0 * sqrt(param->missVar); param->missVar *= 0.5; for(i=0;ininter;i++) { if(data->miss[i]) data->d[i] = gsl_ran_gaussian(r, sqrt(param->missVar)) + param->missMean; } //fprintf(stderr, "%.3f\n", param->missMean); //fprintf(stderr, "%.3f\n", param->missVar); } void set_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { memory_param(param, prior, data); initialize_param(param, prior, data, r); } SAINT_v2.3.4/src/SAINTint-ctrl/dpeta.c0000666000000000000000000000737011746171473015755 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_eta_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_eta[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_eta) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_eta[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_eta[i] = gammap[i]; for(j=0;jgamma_eta[i] *= (1.0 - gammap[j]); } } void DP_eta_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_eta, tmp_lambda, maxl; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_eta[i]); cur_eta = param->eta[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(param->Z[data->a2u[id]]) tmp_lambda = param->lambda_true[id]; else tmp_lambda = param->lambda_false[id]; prob[i] += log_gaussian(data->d[id], (tmp_lambda), prior->theta_eta[i]); } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_eta[pid] = ranMultinom(r, prob, _MAX_COMP_); param->eta[pid] = prior->theta_eta[prior->w_eta[pid]]; } float log_exponential(float x, float mean) { float res = -log(mean) - x / mean; return res; } void DP_eta_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept, pass; float Delta, mhratio, newval, scale, tmp_lambda, tmp; scale = prior->gamma_eta[pid] / (1.0 - prior->gamma_eta[pid]); if(inuse[pid] == 0) { pass = 0; while(!pass) { newval = 1.0 / gsl_ran_gamma(r, 100.0, 1.0); if(newval < 2.0) pass = 1; } Delta = newval - prior->theta_eta[pid]; prior->theta_eta[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 0.1); if(prior->theta_eta[pid] + Delta <= 0.0 || prior->theta_eta[pid] + Delta > 2.0) { accept = 0; } else { for(i=0;inprey;i++) { if(prior->w_eta[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(param->Z[data->a2u[id]] && data->miss[id] == 0) { tmp_lambda = param->lambda_true[id]; tmp = data->d[id]; mhratio += log_gaussian(tmp, (tmp_lambda), prior->theta_eta[pid]+Delta) - log_gaussian(tmp, (tmp_lambda), prior->theta_eta[pid]); } } } } mhratio += log_inv_gamma( (prior->theta_eta[pid]+Delta), prior->shape_eta, prior->scale_eta) - log_inv_gamma( prior->theta_eta[pid], prior->shape_eta, prior->scale_eta); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; } /* if accepted, update param and lambda */ if(accept) { prior->theta_eta[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_eta[i] == pid) { param->eta[i] += Delta; } } } } } void DP_eta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_eta[i]] = 1; DP_eta_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_eta_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_eta_theta(param, prior, data, r, i, inuse); /* loglik update */ } SAINT_v2.3.4/src/SAINTint-ctrl/setsummary.c0000666000000000000000000001625711746171473017075 0ustar rootroot#include "saint.h" /**************************************************************/ /* initializing the model summaryeters */ /**************************************************************/ void memory_summary(SUMMARY *summary, DATA *data) { assert(summary->iZ = (float *) calloc(data->ninter, sizeof(float))); assert(summary->Z = (float *) calloc(data->nuinter, sizeof(float))); assert(summary->alpha_prey = (float *) calloc(data->nprey, sizeof(float))); assert(summary->alpha_IP = (float *) calloc(data->nIP, sizeof(float))); assert(summary->mu = (float *) calloc(data->nprey, sizeof(float))); assert(summary->eta = (float *) calloc(data->nprey, sizeof(float))); assert(summary->eta0 = (float *) calloc(data->nprey, sizeof(float))); assert(summary->lambda_true = (float *) calloc(data->ninter, sizeof(float))); assert(summary->lambda_false = (float *) calloc(data->ninter, sizeof(float))); } void initialize_summary(SUMMARY *summary, DATA *data) { int i; for(i=0;ininter;i++) summary->iZ[i] = 0.0; for(i=0;inuinter;i++) summary->Z[i] = 0.0; for(i=0;inprey;i++) summary->alpha_prey[i] = 0.0; for(i=0;inIP;i++) summary->alpha_IP[i] = 0.0; for(i=0;inprey;i++) summary->mu[i] = 0.0; for(i=0;inprey;i++) summary->eta[i] = 0.0; for(i=0;inprey;i++) summary->eta0[i] = 0.0; for(i=0;ininter;i++) summary->lambda_true[i] = 0.0; for(i=0;ininter;i++) summary->lambda_false[i] = 0.0; } void initialize_histogram(HISTOGRAM *hist) { int i; float binsize = ((float) (_HISTO_END_ - _HISTO_START_)) / ((float) _HISTO_BIN_); for(i=0;i<_HISTO_BIN_;i++) { hist->start[i] = _HISTO_START_ + ((float) i) * binsize; hist->end[i] = _HISTO_START_ + ((float) (i+1)) * binsize; } for(i=0;i<(_HISTO_BIN_+2);i++) hist->count[i] = 0.0; } void initialize_histogram2(HISTOGRAM2 *hist) { int i; float binsize = ((float) (_HISTO_END2_ - _HISTO_START2_)) / ((float) _HISTO_BIN2_); for(i=0;i<_HISTO_BIN2_;i++) { hist->start[i] = _HISTO_START2_ + ((float) i) * binsize; hist->end[i] = _HISTO_START2_ + ((float) (i+1)) * binsize; } for(i=0;i<(_HISTO_BIN2_+2);i++) hist->count[i] = 0.0; } void set_summary(SUMMARY *summary, DATA *data) { memory_summary(summary, data); initialize_summary(summary, data); initialize_histogram(&(summary->hist_alpha_prey)); initialize_histogram(&(summary->hist_alpha_IP)); initialize_histogram(&(summary->hist_mu)); initialize_histogram2(&(summary->hist_eta)); initialize_histogram2(&(summary->hist_eta0)); } void updateSummary(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary) { int i; for(i=0;ininter;i++) summary->iZ[i] += ((float) param->iZ[i]); for(i=0;inuinter;i++) summary->Z[i] += ((float) param->Z[i]); for(i=0;inprey;i++) summary->alpha_prey[i] += param->alpha_prey[i]; for(i=0;inIP;i++) summary->alpha_IP[i] += param->alpha_IP[i]; for(i=0;inprey;i++) summary->mu[i] += param->mu[i]; for(i=0;inprey;i++) summary->eta[i] += param->eta[i]; for(i=0;inprey;i++) summary->eta0[i] += param->eta0[i]; for(i=0;ininter;i++) summary->lambda_true[i] += param->lambda_true[i]; for(i=0;ininter;i++) summary->lambda_false[i] += param->lambda_false[i]; updateHistogram(param, prior, data, summary); } void scaleSummary(SUMMARY *summary, DATA *data, int iter) { int i; float scale = 1.0 / ((float) iter); for(i=0;ininter;i++) summary->iZ[i] *= scale; for(i=0;inuinter;i++) summary->Z[i] *= scale; for(i=0;inprey;i++) summary->alpha_prey[i] *= scale; for(i=0;inIP;i++) summary->alpha_IP[i] *= scale; for(i=0;inprey;i++) summary->mu[i] *= scale; for(i=0;inprey;i++) summary->eta[i] *= scale; for(i=0;inprey;i++) summary->eta0[i] *= scale; for(i=0;ininter;i++) summary->lambda_true[i] *= scale; for(i=0;ininter;i++) summary->lambda_false[i] *= scale; } /*************************************/ /** Histogram updates **/ /*************************************/ void updateHist_alpha_prey(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_alpha_prey[i] < hist->start[0]) { hist->count[0] += prior->gamma_alpha_prey[i]; } else if(prior->theta_alpha_prey[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_alpha_prey[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_alpha_prey[i] >= hist->start[j] && prior->theta_alpha_prey[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_alpha_prey[i]; break; } } } } } void updateHist_alpha_IP(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_alpha_IP[i] < hist->start[0]) { hist->count[0] += prior->gamma_alpha_IP[i]; } else if(prior->theta_alpha_IP[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_alpha_IP[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_alpha_IP[i] >= hist->start[j] && prior->theta_alpha_IP[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_alpha_IP[i]; break; } } } } } void updateHist_mu(HISTOGRAM *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_mu[i] < hist->start[0]) { hist->count[0] += prior->gamma_mu[i]; } else if(prior->theta_mu[i] >= hist->end[_HISTO_BIN_-1]) { hist->count[_HISTO_BIN_ + 1] += prior->gamma_mu[i]; } else { for(j=0;j<_HISTO_BIN_;j++) { if(prior->theta_mu[i] >= hist->start[j] && prior->theta_mu[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_mu[i]; break; } } } } } void updateHist_eta(HISTOGRAM2 *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_eta[i] < hist->start[0]) { hist->count[0] += prior->gamma_eta[i]; } else if(prior->theta_eta[i] >= hist->end[_HISTO_BIN2_-1]) { hist->count[_HISTO_BIN2_ + 1] += prior->gamma_eta[i]; } else { for(j=0;j<_HISTO_BIN2_;j++) { if(prior->theta_eta[i] >= hist->start[j] && prior->theta_eta[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_eta[i]; break; } } } } } void updateHist_eta0(HISTOGRAM2 *hist, PRIOR *prior) { int i,j; for(i=0;i<_MAX_COMP_;i++) { if(prior->theta_eta0[i] < hist->start[0]) { hist->count[0] += prior->gamma_eta0[i]; } else if(prior->theta_eta0[i] >= hist->end[_HISTO_BIN2_-1]) { hist->count[_HISTO_BIN2_ + 1] += prior->gamma_eta0[i]; } else { for(j=0;j<_HISTO_BIN2_;j++) { if(prior->theta_eta0[i] >= hist->start[j] && prior->theta_eta0[i] < hist->end[j]) { hist->count[j+1] += prior->gamma_eta0[i]; break; } } } } } void updateHistogram(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary) { updateHist_alpha_prey(&(summary->hist_alpha_prey), prior); updateHist_alpha_IP(&(summary->hist_alpha_IP), prior); updateHist_mu(&(summary->hist_mu), prior); updateHist_eta(&(summary->hist_eta), prior); updateHist_eta0(&(summary->hist_eta0), prior); } SAINT_v2.3.4/src/SAINTint-ctrl/saint.c0000666000000000000000000001272111746171473015772 0ustar rootroot#include "saint.h" int nrow(FILE *fp) { char buf[100000]; int n = 0; while(fgets(buf, sizeof(buf), fp) != NULL) n++; return n; } int newlinechar(char *buf, int k) { int i; int found = 0; for(i=0;igamma_alpha_IP[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_alpha_IP[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_alpha_prey\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_alpha_prey[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_alpha_prey[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_mu\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_mu[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_mu[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_eta\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_eta[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_eta[i]); fprintf(stderr, "\n"); fprintf(stderr, "DP_eta0\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->gamma_eta0[i]); fprintf(stderr, "\n"); for(i=0;i<_MAX_COMP_;i++) fprintf(stderr, "%.2f\t", prior->theta_eta0[i]); fprintf(stderr, "\n\n"); } int commandLine(int argc, char **argv, int *burn, int *iter) { if (argc < 4) { fprintf(stderr, "usage: saint-int-ctrl [interactionFile] [preyFile] [baitFile] [nburnin] [niter]\n"); return 1; } /* interaction file: IPnumber \t bait \t prey \t spectralCount \n */ /* prey file: prey \t sequenceLength \n */ /* bait file: bait \t IPnumber \t isControl \n */ FILE *fpinter = fopen(argv[1], "r"); FILE *fpprey = fopen(argv[2], "r"); FILE *fpbait = fopen(argv[3], "r"); if(fpinter == NULL) { fprintf(stderr, "Cannot locate interaction data %s.\n", argv[1]); return 1; } if(fpprey == NULL) { fprintf(stderr, "Cannot locate prey data %s.\n", argv[2]); return 1; } if(fpbait == NULL) { fprintf(stderr, "Cannot locate bait data %s.\n", argv[3]); return 1; } if(argc < 5) { fprintf(stderr, "The number of burnin was not provided. Set to 5,000.\n"); *burn = 5000; } else { *burn = atoi(argv[4]); } if(argc < 6) { fprintf(stderr, "The number of main interations was not provided. Set to 20,000.\n"); *iter = 20000; } else { *iter = atoi(argv[5]); } fclose(fpinter); fclose(fpprey); fclose(fpbait); return 0; } /***************************** MAIN ***************************/ int main(int argc, char **argv) { int i, burn, iter, ct; DATA data; PARAM param; PRIOR prior; SUMMARY summary; const gsl_rng_type *T; gsl_rng *r; gsl_rng_env_setup(); T = gsl_rng_default; r = gsl_rng_alloc(T); /* Command Line */ if(commandLine(argc, argv, &burn, &iter)) return 1; FILE *fpinter = fopen(argv[1], "r"); FILE *fpprey = fopen(argv[2], "r"); FILE *fpbait = fopen(argv[3], "r"); /* Read interaction data, identify baits, preys, and IPs, make unique interaction data frame, identify the mapping between different levels of data */ system("mkdir LOG"); /* error logs */ system("mkdir MAPPING"); /* mapping logs */ system("mkdir MCMC"); /* posterior samples */ system("mkdir RESULT"); /* posterior probabilities, other summaries */ fprintf(stderr, "Reading data and mapping interactions\n"); read_data(fpinter, fpprey, fpbait, &data); // initial_impute(¶m, &data, r); set_ctrlavg(&data); // printMap(&data); /* Set up model parameters and prior elicitation */ set_prior(¶m, &prior, &data, r); set_param(¶m, &prior, &data, r); set_summary(&summary, &data); /* updates and summary */ chdir("MCMC"); FILE *fp1 = fopen("alpha_prey","w"); FILE *fp2 = fopen("alpha_IP","w"); FILE *fp3 = fopen("mu","w"); /* burnin */ ct = 0; fprintf(stderr, "Burn-in Period\n"); for(i=0;ininter;i++) { if(data->ctrl[data->i2IP[i]] == 0 && data->miss[i] == 0) fprintf(fp, "%s\t%s\t%s\t%s\t%.3f\t%.3f\t%.3f\n", data->ip[i], data->bait[i], data->prey[i], data->PREYGENE[data->i2p[i]], data->d[i], summary->Z[data->a2u[i]], summary->iZ[i]); } fclose(fp); } void write_unique_interactions(DATA *data, SUMMARY *summary) { int i,j; int isCtrl; int id, ct, pid; float intsum; float maxp, avgp, geop, tmp; FILE *fp = fopen("unique_interactions", "w"); fprintf(fp, "Bait\tPrey\tPreyGene\tIP\tIntensity\tIntensitySum\tNumRep\tProb\tiProb\tctrlIntensity\tAvgP\tMaxP\n"); for(i=0;inuinter;i++) { isCtrl = 0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(data->ctrl[data->i2IP[id]]) isCtrl = 1; } if(isCtrl == 0) { fprintf(fp, "%s\t%s\t%s\t", data->ubait[i], data->uprey[i], data->PREYGENE[data->ui2p[i]]); for(j=0;j<(data->n_u2a[i]-1);j++) fprintf(fp, "%s|", data->ip[data->u2a[i][j]]); fprintf(fp, "%s\t", data->ip[data->u2a[i][data->n_u2a[i]-1]]); intsum = 0.0; for(j=0;j<(data->n_u2a[i]-1);j++) { id = data->u2a[i][j]; if(data->miss[id]==0) intsum += exp(data->d[id]); if(data->miss[id]==0) fprintf(fp, "%.3f", exp(data->d[id])); else fprintf(fp, "."); fprintf(fp, "|"); } id = data->u2a[i][data->n_u2a[i]-1]; if(data->miss[id]==0) intsum += exp(data->d[id]); if(data->miss[id]==0) fprintf(fp, "%.3f", exp(data->d[id])); else fprintf(fp, "."); fprintf(fp, "\t"); fprintf(fp, "%.3f\t%d\t", intsum, data->n_u2a[i]); fprintf(fp, "%.2f\t", summary->Z[i]); for(j=0;j<(data->n_u2a[i]-1);j++) { id = data->u2a[i][j]; fprintf(fp, "%.2f", summary->iZ[id]); fprintf(fp, "|"); } id = data->u2a[i][data->n_u2a[i]-1]; fprintf(fp, "%.2f", summary->iZ[id]); fprintf(fp, "\t"); pid = data->ui2p[i]; ct = 0; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]]) { if(data->miss[id]==0) fprintf(fp, "%.2f", exp(data->d[id])); else fprintf(fp, "."); if(ct < data->nctrl-1) fprintf(fp, "|"); else fprintf(fp, "\t"); ct++; } } /* maxp */ maxp = 0.0; avgp = 0.0; geop = 0.0; for(j=0;jn_u2a[i];j++) { id = data->u2a[i][j]; if(summary->iZ[id] > maxp) maxp = summary->iZ[id]; avgp += summary->iZ[id] / ((float) data->n_u2a[i]); tmp = data->d[id] == 0.0 ? 0.001 : summary->iZ[id]; geop += log(tmp) / ((float) data->n_u2a[i]); } geop = exp(geop); fprintf(fp, "%.4f\t%.4f\n", avgp, maxp); /* fprintf(fp, "%.2f\n", ((float) data->preyNinter[data->ui2p[i]]) / ((float) data->nIP)); */ } } fclose(fp); } void write_prey(DATA *data, SUMMARY *summary) { int i; FILE *fp = fopen("preys", "w"); fprintf(fp, "Prey\tAlpha_prey\tMu\n"); for(i=0;inprey;i++) { fprintf(fp, "%s\t%.2f\t%.2f\n", data->PREY[i], summary->alpha_prey[i], summary->mu[i]); } fclose(fp); } void write_IP(DATA *data, SUMMARY *summary) { int i; FILE *fp = fopen("IPs", "w"); fprintf(fp, "IP\tBait\tAlpha_IP\n"); for(i=0;inIP;i++) { fprintf(fp, "%s\t%s\t%.2f\n", data->IP[i], data->BAIT[data->IP2b[i]], summary->alpha_IP[i]); } fclose(fp); } void write_bait(DATA *data, SUMMARY *summary) { int i,j; FILE *fp = fopen("baits", "w"); fprintf(fp, "Bait\tIP\tAlpha_IP\n"); for(i=0;inbait;i++) { fprintf(fp, "%s\t", data->BAIT[i]); for(j=0;jbaitNIP[i]-1;j++) { fprintf(fp, "%s|", data->IP[data->b2IP[i][j]]); } fprintf(fp, "%s\t", data->IP[data->b2IP[i][data->baitNIP[i]-1]]); for(j=0;jbaitNIP[i]-1;j++) { fprintf(fp, "%.2f|", summary->alpha_IP[data->b2IP[i][j]]); } fprintf(fp, "%.2f\n", summary->alpha_IP[data->b2IP[i][data->baitNIP[i]-1]]); } fclose(fp); } void write_histogram(FILE *fp, HISTOGRAM *hist) { int i; fprintf(fp, "-inf\t%.2f\t%.2f\n", hist->start[0], hist->count[0]); for(i=0;i<_HISTO_BIN_;i++) { fprintf(fp, "%.2f\t%.2f\t%.2f\n", hist->start[i], hist->end[i], hist->count[i+1]); } fprintf(fp, "%.2f\tinf\t%.2f\n", hist->end[_HISTO_BIN_-1], hist->count[_HISTO_BIN_+1]); } void write_histogram2(FILE *fp, HISTOGRAM2 *hist) { int i; fprintf(fp, "-inf\t%.2f\t%.2f\n", hist->start[0], hist->count[0]); for(i=0;i<_HISTO_BIN2_;i++) { fprintf(fp, "%.2f\t%.2f\t%.2f\n", hist->start[i], hist->end[i], hist->count[i+1]); } fprintf(fp, "%.2f\tinf\t%.2f\n", hist->end[_HISTO_BIN2_-1], hist->count[_HISTO_BIN2_+1]); } void write_hyperprior(DATA *data, SUMMARY *summary) { FILE *fp1 = fopen("hist_alpha_prey", "w"); FILE *fp2 = fopen("hist_alpha_IP", "w"); FILE *fp3 = fopen("hist_mu", "w"); FILE *fp4 = fopen("hist_eta", "w"); FILE *fp5 = fopen("hist_eta0", "w"); write_histogram(fp1, &(summary->hist_alpha_prey)); write_histogram(fp2, &(summary->hist_alpha_IP)); write_histogram(fp3, &(summary->hist_mu)); write_histogram2(fp4, &(summary->hist_eta)); write_histogram2(fp5, &(summary->hist_eta0)); fclose(fp1); fclose(fp2); fclose(fp3); fclose(fp4); fclose(fp5); } void write_result(DATA *data, SUMMARY *summary) { chdir("RESULT"); write_interactions(data, summary); write_unique_interactions(data, summary); write_prey(data, summary); write_IP(data, summary); write_bait(data, summary); write_hyperprior(data, summary); write_matrix_data(data, summary); write_matrix_data2(data, summary); chdir(".."); } /******************************/ void write_matrix_data(DATA *data, SUMMARY *summary) { int i,j,k,id; int endLine, isMatch; FILE *fp = fopen("matrix_form","w"); endLine = -1; for(j=0;jnIP;j++) { if(data->ctrl[j] != 1) endLine = j; } /* header line 1 */ fprintf(fp, "Bait\t"); for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j]) { fprintf(fp, "%s\t", data->BAIT[id]); } } for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->BAIT[id]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 2 */ fprintf(fp, "IP\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "%s\t", data->IP[j]); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 3 */ fprintf(fp, "Prey\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "\t"); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%.2f", summary->alpha_IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } for(i=0;inprey;i++) { fprintf(fp, "%s\t", data->PREY[i]); /* Control runs first */ for(j=0;jnIP;j++) { if(data->ctrl[j]) { /* find if prey wise data has this IP */ /* if not, leave the space blank */ /* else, biz as usual: (count | prob | lambda_s, lambda_ns) */ isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { fprintf(fp, "\t"); } else { if(data->miss[isMatch]==0) fprintf(fp, "%.2f|%.2f\t", data->d[isMatch], (summary->lambda_false[isMatch])); else fprintf(fp, "\t"); } } } /* Rest of the IPs */ for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { /* fprintf(fp, "\t"); */ } else { fprintf(fp, "%.2f|%.2f|%.2f|%.2f", data->d[isMatch], summary->Z[data->a2u[isMatch]], (summary->lambda_true[isMatch]), (summary->lambda_false[isMatch])); } if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } } fclose(fp); } void write_matrix_data2(DATA *data, SUMMARY *summary) { int i,j,k,id; int endLine, isMatch; FILE *fp = fopen("matrix_form_short","w"); endLine = -1; for(j=0;jnIP;j++) { if(data->ctrl[j] != 1) endLine = j; } /* header line 1 */ fprintf(fp, "Bait\t"); for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j]) { fprintf(fp, "%s\t", data->BAIT[id]); } } for(j=0;jnIP;j++) { id = data->IP2b[j]; if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->BAIT[id]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 2 */ fprintf(fp, "IP\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "%s\t", data->IP[j]); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%s", data->IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } /* header line 3 */ fprintf(fp, "Prey\t"); for(j=0;jnIP;j++) { if(data->ctrl[j]) { fprintf(fp, "\t"); } } for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { fprintf(fp, "%.2f", summary->alpha_IP[j]); if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } for(i=0;inprey;i++) { fprintf(fp, "%s\t", data->PREY[i]); /* Control runs first */ for(j=0;jnIP;j++) { if(data->ctrl[j]) { /* find if prey wise data has this IP */ /* if not, leave the space blank */ /* else, biz as usual: (count | prob | lambda_s, lambda_ns) */ isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { fprintf(fp, "\t"); } else { fprintf(fp, "%.2f\t", data->d[isMatch]); } } } /* Rest of the IPs */ for(j=0;jnIP;j++) { if(data->ctrl[j] == 0) { isMatch = -1; for(k=0;kpreyNinter[i];k++) { id = data->p2i[i][k]; if(strcmp(data->ip[id], data->IP[j]) == 0) isMatch = id; } if(isMatch == -1) { /* fprintf(fp, "\t"); */ } else { fprintf(fp, "(%.2f|%.2f|%.2f)", data->d[isMatch], summary->Z[data->a2u[isMatch]], summary->iZ[isMatch]); } if(j==endLine) fprintf(fp, "\n"); else fprintf(fp, "\t"); } } } fclose(fp); } SAINT_v2.3.4/src/SAINTint-ctrl/printmap.c0000666000000000000000000000545511746171473016514 0ustar rootroot#include "saint.h" void printInter(DATA *data) { int i; FILE *fp = fopen("interaction","w"); fprintf(fp, "ip\tbait\tprey\tIP\tBAIT\tPREY\tubait\tuprey\n"); for(i=0;ininter;i++) { fprintf(fp, "%s\t%s\t%s\t", data->ip[i], data->bait[i], data->prey[i]); fprintf(fp, "%s\t%s\t%s\t", data->IP[data->i2IP[i]], data->BAIT[data->i2b[i]], data->PREY[data->i2p[i]]); fprintf(fp, "%s\t%s\n", data->ubait[data->a2u[i]], data->uprey[data->a2u[i]]); } fclose(fp); } void printUInter(DATA *data) { int i,j,k; FILE *fp = fopen("unique_interaction","w"); fprintf(fp, "ubait\tuprey\tubait\tuprey\tip\tbait\tprey\n"); for(i=0;inuinter;i++) { for(j=0;jn_u2a[i];j++) { k = data->u2a[i][j]; fprintf(fp, "%s\t%s\t", data->ubait[i], data->uprey[i]); fprintf(fp, "%s\t%s\t%s\n", data->ip[k], data->bait[k], data->prey[k]); } } fprintf(fp, "\n\n************************\n\n"); fprintf(fp, "ubait\tuprey\tBAIT\tPREY\n"); for(i=0;inuinter;i++) { fprintf(fp, "%s\t%s\t%s\t%s\n", data->ubait[i], data->uprey[i], data->BAIT[data->ui2b[i]], data->PREY[data->ui2p[i]]); } fclose(fp); } void printIP(DATA *data) { int i,j,k; /* IP to bait */ FILE *fp = fopen("IP","w"); fprintf(fp, "IP\tBAIT\n"); for(i=0;inIP;i++) { fprintf(fp, "%s\t%s\n", data->IP[i], data->BAIT[data->IP2b[i]]); } fprintf(fp, "\n\n************************\n\n"); /* IP to interactions */ fprintf(fp, "IP\tip\tbait\tprey\n"); for(i=0;inIP;i++) { for(j=0;jIPNinter[i];j++) { k = data->IP2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->IP[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printBait(DATA *data) { int i,j,k; FILE *fp = fopen("bait","w"); /* bait to IP */ fprintf(fp, "BAIT\tIP\n"); for(i=0;inbait;i++) { for(j=0;jbaitNIP[i];j++) { k = data->b2IP[i][j]; fprintf(fp, "%s\t%s\n", data->BAIT[i], data->IP[k]); } } fprintf(fp, "\n\n************************\n\n"); /* bait to interaction */ fprintf(fp, "BAIT\tip\tbait\tprey\n"); for(i=0;inbait;i++) { for(j=0;jbaitNinter[i];j++) { k = data->b2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->BAIT[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printPrey(DATA *data) { int i,j,k; FILE *fp = fopen("prey","w"); /* prey to interaction */ for(i=0;inprey;i++) { for(j=0;jpreyNinter[i];j++) { k = data->p2i[i][j]; fprintf(fp, "%s\t%s\t%s\t%s\n", data->PREY[i], data->ip[k], data->bait[k], data->prey[k]); } } fclose(fp); } void printMap(DATA *data) { chdir("MAPPING"); printInter(data); printUInter(data); /* printIP(data); printBait(data); printPrey(data); */ chdir(".."); } SAINT_v2.3.4/src/SAINTint-ctrl/dpalphaIP.c0000666000000000000000000001035711746171473016521 0ustar rootroot#include "saint.h" /********* ALPHA_IP *********/ void DP_alpha_IP_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inIP;i++) { if(data->ctrl[i] == 0) (wsum[prior->w_alpha_IP[i]])++; } for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_alpha_IP) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_alpha_IP[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_alpha_IP[i] = gammap[i]; for(j=0;jgamma_alpha_IP[i] *= (1.0 - gammap[j]); } } void DP_alpha_IP_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_alpha_IP, tmp_lambda, maxl; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_alpha_IP[i]); cur_alpha_IP = param->alpha_IP[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jIPNinter[pid];j++) { id = data->IP2i[pid][j]; if(param->Z[data->a2u[id]]) { tmp_lambda = param->lambda_true[id] + prior->theta_alpha_IP[i] - cur_alpha_IP; prob[i] += log_gaussian(data->d[id], (tmp_lambda), param->eta[data->i2p[id]]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_alpha_IP[pid] = ranMultinom(r, prob, _MAX_COMP_); param->alpha_IP[pid] = prior->theta_alpha_IP[prior->w_alpha_IP[pid]]; for(j=0;jIPNinter[pid];j++) { id = data->IP2i[pid][j]; param->lambda_true[id] += param->alpha_IP[pid] - cur_alpha_IP; } } void DP_alpha_IP_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale; scale = prior->gamma_alpha_IP[pid] / (1.0 - prior->gamma_alpha_IP[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_alpha_IP)) + prior->m_alpha_IP; Delta = newval - prior->theta_alpha_IP[pid]; prior->theta_alpha_IP[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 0.25); for(i=0;inIP;i++) { if(prior->w_alpha_IP[i] == pid) { for(j=0;jIPNinter[i];j++) { id = data->IP2i[i][j]; if(param->Z[data->a2u[id]] && data->miss[id] == 0) { param->lambda_true_tmp[id] = param->lambda_true[id] + Delta; mhratio += log_gaussian(data->d[id], (param->lambda_true_tmp[id]), param->eta[data->i2p[id]]) - log_gaussian(data->d[id], (param->lambda_true[id]), param->eta[data->i2p[id]]); } } } } mhratio += log_gaussian(prior->theta_alpha_IP[pid] + Delta, prior->m_alpha_IP, prior->v_alpha_IP) - log_gaussian(prior->theta_alpha_IP[pid], prior->m_alpha_IP, prior->v_alpha_IP); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_alpha_IP[pid] += Delta; for(i=0;inIP;i++) { if(prior->w_alpha_IP[i] == pid && data->ctrl[i] == 0) { param->alpha_IP[i] += Delta; for(j=0;jIPNinter[i];j++) { id = data->IP2i[i][j]; param->lambda_true[id] += Delta; } } } } } } void DP_alpha_IP(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inIP;i++) inuse[prior->w_alpha_IP[i]] = 1; DP_alpha_IP_gamma(param, prior, data, r); for(i=0;inIP;i++) { if(data->ctrl[i] == 0) DP_alpha_IP_w(param, prior, data, r, i); } for(i=0;i<_MAX_COMP_;i++) DP_alpha_IP_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_IP[i] * prior->theta_alpha_IP[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] -= mean; for(i=0;inIP;i++) param->alpha_IP[i] -= mean; param->beta0 += mean; } SAINT_v2.3.4/src/SAINTint-ctrl/meancounts.c0000666000000000000000000000151711746171473017031 0ustar rootroot#include "saint.h" /**************************************************************/ /* computing expected counts in log scale (s/ns) */ /**************************************************************/ /*************************/ /* all interactions */ /*************************/ void compute_lambda_true_all(PARAM *param, PRIOR *prior, DATA *data) { int i; for(i=0;ininter;i++) { param->lambda_true[i] = param->beta0 + param->alpha_prey[data->i2p[i]]; } } void compute_lambda_false_all(PARAM *param, PRIOR *prior, DATA *data) { int i; for(i=0;ininter;i++) { param->lambda_false[i] = param->betac + param->mu[data->i2p[i]]; } } void compute_lambda_all(PARAM *param, PRIOR *prior, DATA *data) { compute_lambda_true_all(param, prior, data); compute_lambda_false_all(param, prior, data); } SAINT_v2.3.4/src/SAINTint-ctrl/setprior.c0000666000000000000000000000645611746171473016533 0ustar rootroot#include "saint.h" void memory_prior(PARAM *param, PRIOR *prior, DATA *data) { assert(prior->w_alpha_prey = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_alpha_IP = (int *) calloc(data->nIP, sizeof(int))); assert(prior->w_mu = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_eta = (int *) calloc(data->nprey, sizeof(int))); assert(prior->w_eta0 = (int *) calloc(data->nprey, sizeof(int))); } void initialize_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,pass; float mean; float MAXC = ((float) _MAX_COMP_); prior->m_beta = 0.0; prior->v_beta = 100.0; prior->atrue = 0.1; prior->afalse = 1.0 - prior->atrue; prior->rho_alpha_prey = GSL_MAX(((float) data->nprey) * 0.01, 1.0); prior->m_alpha_prey = 0.0; prior->v_alpha_prey = 10.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_alpha_prey[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] = gsl_ran_gaussian(r, 2.0) + 2.0; for(i=0;inprey;i++) prior->w_alpha_prey[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_prey[i] * prior->theta_alpha_prey[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] -= mean; prior->rho_alpha_IP = 1.0; prior->m_alpha_IP = 0.0; prior->v_alpha_IP = 10.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_alpha_IP[i] = 1.0 / MAXC; /* for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] = gsl_ran_gaussian(r, 2.0); */ for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] = 0.0; /* for(i=0;inIP;i++) prior->w_alpha_IP[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); */ for(i=0;inIP;i++) prior->w_alpha_IP[i] = 0; mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_IP[i] * prior->theta_alpha_IP[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_IP[i] -= mean; prior->rho_mu = GSL_MAX(((float) data->nprey) * 0.01, 1.0); prior->m_mu = 0.0; prior->v_mu = 10.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_mu[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] = gsl_ran_gaussian(r, 2.0) - 2.0; for(i=0;inprey;i++) prior->w_mu[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_mu[i] * prior->theta_mu[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_mu[i] -= mean; prior->rho_eta = GSL_MAX(((float) data->nprey) * 0.01, 1.0); prior->shape_eta = 1.0; prior->scale_eta = 1.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_eta[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) { pass = 0; while(!pass){ prior->theta_eta[i] = gsl_ran_flat(r, 0.0, 1.0) ; if(prior->theta_eta[i] < 1.0) pass = 1; } } for(i=0;inprey;i++) prior->w_eta[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); prior->rho_eta0 = GSL_MAX(((float) data->nprey) * 0.01, 1.0); prior->shape_eta0 = 1.0; prior->scale_eta0 = 1.0; for(i=0;i<_MAX_COMP_;i++) prior->gamma_eta0[i] = 1.0 / MAXC; for(i=0;i<_MAX_COMP_;i++) { pass = 0; while(!pass){ prior->theta_eta0[i] = gsl_ran_flat(r, 0.0, 1.0) ; if(prior->theta_eta0[i] < 1.0) pass = 1; } } for(i=0;inprey;i++) prior->w_eta0[i] = ((int) gsl_ran_flat(r,0.0,MAXC)); } void set_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { memory_prior(param, prior, data); initialize_prior(param, prior, data, r); } SAINT_v2.3.4/src/SAINTint-ctrl/dpalphaprey.c0000666000000000000000000001036411746171473017166 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_alpha_prey_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_alpha_prey[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_alpha_prey) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_alpha_prey[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_alpha_prey[i] = gammap[i]; for(j=0;jgamma_alpha_prey[i] *= (1.0 - gammap[j]); } } void DP_alpha_prey_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_alpha_prey, tmp_lambda, maxl, tmp; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_alpha_prey[i]); cur_alpha_prey = param->alpha_prey[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(param->Z[data->a2u[id]]) { tmp_lambda = param->lambda_true[id] + prior->theta_alpha_prey[i] - cur_alpha_prey; tmp = data->d[id]; prob[i] += log_gaussian(tmp, (tmp_lambda), param->eta[pid]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_alpha_prey[pid] = ranMultinom(r, prob, _MAX_COMP_); param->alpha_prey[pid] = prior->theta_alpha_prey[prior->w_alpha_prey[pid]]; for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; param->lambda_true[id] += param->alpha_prey[pid] - cur_alpha_prey; } } void DP_alpha_prey_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept; float Delta, mhratio, newval, scale; scale = prior->gamma_alpha_prey[pid] / (1.0 - prior->gamma_alpha_prey[pid]); if(inuse[pid] == 0) { newval = gsl_ran_gaussian(r, sqrt(prior->v_alpha_prey)) + prior->m_alpha_prey; Delta = newval - prior->theta_alpha_prey[pid]; prior->theta_alpha_prey[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 1.0); for(i=0;inprey;i++) { if(prior->w_alpha_prey[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(param->Z[data->a2u[id]] && data->miss[id] == 0) { param->lambda_true_tmp[id] = param->lambda_true[id] + Delta; mhratio += log_gaussian(data->d[id], (param->lambda_true_tmp[id]), param->eta[i]) - log_gaussian(data->d[id], (param->lambda_true[id]), param->eta[i]); } } } } mhratio += log_gaussian(prior->theta_alpha_prey[pid] + Delta, prior->m_alpha_prey, prior->v_alpha_prey) - log_gaussian(prior->theta_alpha_prey[pid], prior->m_alpha_prey, prior->v_alpha_prey); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; /* if accepted, update param and lambda */ if(accept) { prior->theta_alpha_prey[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_alpha_prey[i] == pid) { param->alpha_prey[i] += Delta; for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; param->lambda_true[id] += Delta; } } } } } } void DP_alpha_prey(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; float mean; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) { inuse[prior->w_alpha_prey[i]] = 1; } DP_alpha_prey_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_alpha_prey_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_alpha_prey_theta(param, prior, data, r, i, inuse); /* loglik update */ mean = 0.0; for(i=0;i<_MAX_COMP_;i++) mean += prior->gamma_alpha_prey[i] * prior->theta_alpha_prey[i]; for(i=0;i<_MAX_COMP_;i++) prior->theta_alpha_prey[i] -= mean; for(i=0;inprey;i++) param->alpha_prey[i] -= mean; param->beta0 += mean; } SAINT_v2.3.4/src/SAINTint-ctrl/saint.h0000666000000000000000000003131111746171473015773 0ustar rootroot/* Copyright (C) <2011> For troubleshooting, contact hyung_won_choi@nuhs.edu.sg. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You can obtain a copy of the GNU General Public License from . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _MAX_BUF_ 2000 #define _MAX_NAME_ 2000 #define _MAX_COUNT_ 250 #define _MAX_COMP_ 20 #define _SKIP_ 10 #define _PRINT_FREQ_ 100 #define _HISTO_START_ -10.0 #define _HISTO_END_ 10.0 #define _HISTO_BIN_ 100 #define _HISTO_START2_ 0.5 #define _HISTO_END2_ 20.5 #define _HISTO_BIN2_ 200 #define _TRUNC_ 1000.0 #define _tiny_ 1e-100 /* global variable */ float dmin; typedef struct tagDATA { /*************/ /* logistics */ /*************/ int ninter; int nuinter; int nprey; int nIP; int nbait; /**************************/ /* interaction level data */ /**************************/ char **prey; char **bait; char **ip; /* raw data, each row corresponds to one interaction, case-sensitive */ float *d; float *iprob; int *miss; int *isCtrl; float *dmin_ctrl; float *dmin_inter; float dmin__ctrl; float dvar; int *ctrl_obs; /*********************************/ /* unique interaction level data */ /*********************************/ char **uprey; char **ubait; float *prob; int *isAnyCtrl; int *n_u2a; /* number of individual interactions per unique interactions */ int **u2a; /* unique interactions to individual interactions */ int *a2u; /* individual interactions to unique interactions */ /* crucial indicator for probability calculation */ /***********************************/ /* unique bait and prey level data */ /***********************************/ float *IPtotalAbundance; float *IPbaitCoverage; float *preyLength; char *preyOverride; char **PREY; /* unique preys */ char **PREYGENE; /* unique preys */ char **BAIT; /* unique baits */ char **IP; /* unique IP #s */ int nctrl; int ntest; int *ctrl; /* index: control IPs or not: 'C' = control, 'T' = test */ float *ctrlavg; int *preyNinter; /* # interaction for prey */ int *baitNinter; /* # interaction for bait */ int *IPNinter; /* # interaction in an IP */ int *baitNIP; /* # IPs per bait */ /****************/ /* mapping data */ /****************/ int *i2p; /* index: interaction to prey */ int *i2b; /* index: interaction to bait */ int *i2IP; /* index: interaction to IP */ int **p2i; /* index: prey to interaction */ int **b2i; /* index: bait to interaction */ int **IP2i; /* index: IP to interaction */ int *ui2p; /* index: unique interaction to prey */ int *ui2b; /* index: unique interaction to bait */ /* no need to build reverse mapping for unique interactions */ /* perhaps this mapping is unnecessary */ int **b2IP; /* index: bait to IP */ int *IP2b; /* index: IP to bait */ } DATA; typedef struct tagPARAM{ float loglikTotal; float *loglik_prey; float *loglik_IP; float missMean; /* new parameters; estimated empirically and fixed */ float missVar; float beta0; float betac; float *alpha_prey; float *alpha_IP; float *mu; float *eta; float *eta0; int *iZ; /* individual interactions */ int *Z; /* unique interactions */ float ptrue; float ptrue_tmp; float *lambda_true; float *lambda_false; float *lambda_true_tmp; float *lambda_false_tmp; } PARAM; typedef struct tagPRIOR{ /* parametric portion */ float m_beta; /* set to zero */ float v_beta; float atrue, afalse; /* nonparametric portion */ float rho_alpha_prey; float m_alpha_prey; float v_alpha_prey; int *w_alpha_prey; float gamma_alpha_prey[_MAX_COMP_]; float theta_alpha_prey[_MAX_COMP_]; float rho_alpha_IP; float m_alpha_IP; float v_alpha_IP; int *w_alpha_IP; float gamma_alpha_IP[_MAX_COMP_]; float theta_alpha_IP[_MAX_COMP_]; float rho_mu; float m_mu; float v_mu; int *w_mu; float gamma_mu[_MAX_COMP_]; float theta_mu[_MAX_COMP_]; float rho_eta; float shape_eta; float scale_eta; int *w_eta; float gamma_eta[_MAX_COMP_]; float theta_eta[_MAX_COMP_]; float rho_eta0; float shape_eta0; float scale_eta0; int *w_eta0; float gamma_eta0[_MAX_COMP_]; float theta_eta0[_MAX_COMP_]; } PRIOR; typedef struct tagHISTOGRAM{ float start[_HISTO_BIN_]; float end[_HISTO_BIN_]; float count[_HISTO_BIN_ + 2]; } HISTOGRAM; typedef struct tagHISTOGRAM2{ float start[_HISTO_BIN2_]; float end[_HISTO_BIN2_]; float count[_HISTO_BIN2_ + 2]; } HISTOGRAM2; typedef struct tagSUMMARY{ float *iZ; float *Z; float *alpha_prey; float *alpha_IP; float *mu; float *eta; float *eta0; float *lambda_true; float *lambda_false; HISTOGRAM hist_alpha_prey; HISTOGRAM hist_alpha_IP; HISTOGRAM hist_mu; HISTOGRAM2 hist_eta; HISTOGRAM2 hist_eta0; } SUMMARY; /*************/ /* functions */ /*************/ int nrow(FILE *fp); int newlinechar(char *buf, int k); int ncol(FILE *fp); int commandLine(int argc, char **argv, int *burn, int *iter); void print_DP(PRIOR *prior, DATA *data); void initial_impute(PARAM *param, DATA *data, const gsl_rng *r); /* initdata.c */ void read_interaction_data(FILE *fpinter, DATA *data); void find_unique_interaction(DATA *data); int unique_elements(char **x, int *unique, int nx); int count_unique_elements(char **x, int nx); void centerData(float *x, int n, int takelog); int mapPreyToData(DATA *data); void read_prey_data(FILE *fpprey, DATA *data); void mapIPtoBait(DATA *data); int mapIPBaitToData(DATA *data); void getIPinfo(DATA *data); void read_bait_data(FILE *fpbait, DATA *data); void get_dmin(DATA *data); void set_ctrlavg(DATA *data); void read_data(FILE *fpinter, FILE *fpprey, FILE *fpbait, DATA *data); /* meancounts.c */ void compute_lambda_true_all(PARAM *param, PRIOR *prior, DATA *data); void compute_lambda_false_all(PARAM *param, PRIOR *prior, DATA *data); void compute_lambda_all(PARAM *param, PRIOR *prior, DATA *data); /* void compute_lambda_true_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_false_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_prey(PARAM *param, PRIOR *prior, DATA *data, int pid); void compute_lambda_true_IP(PARAM *param, PRIOR *prior, DATA *data, int ipid); void compute_lambda_IP(PARAM *param, PRIOR *prior, DATA *data, int ipid); */ /* likelihood.c */ float LRprop(PARAM *param, PRIOR *prior, DATA *data); float loglik_all(PARAM *param, PRIOR *prior, DATA *data); float loglik_all_class(PARAM *param, PRIOR *prior, DATA *data, int cl); float loglik_all_class_tmp(PARAM *param, PRIOR *prior, DATA *data, int cl); /* mcmc.c */ float gaussian(float x, float mu, float var); float log_gaussian(float x, float mu, float var); float log_inv_gamma(float x, float sh, float sc); void sampleBeta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void sampleBetac(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void sampleZ(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); float logit(float x); float inverseLogit(float x); void sampleProportion(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void updateMiss(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void mhgibbs(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary, const gsl_rng *r, int updateSum); void write_mcmc(PARAM *param, PRIOR *prior, DATA *data, FILE *fp1, FILE *fp2, FILE *fp3, int ct); void updateMissMean(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void updateMissVar(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* printmap.c */ void printInter(DATA *data); void printUInter(DATA *data); void printIP(DATA *data); void printBait(DATA *data); void printPrey(DATA *data); void printMap(DATA *data); /* setprior.c */ void memory_prior(PARAM *param, PRIOR *prior, DATA *data); void initialize_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_prior(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* setparam.c */ void memory_param(PARAM *param, PRIOR *prior, DATA *data); void initialize_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_param(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void set_Z(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); float vec_obs_mean(PARAM *param, DATA *data); float vec_obs_var(PARAM *param, DATA *data, float m); /* setsumamry.c */ void memory_summary(SUMMARY *summary, DATA *data); void initialize_summary(SUMMARY *summary, DATA *data); void initialize_histogram(HISTOGRAM *hist); void initialize_histogram2(HISTOGRAM2 *hist); void set_summary(SUMMARY *summary, DATA *data); void updateSummary(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary); void scaleSummary(SUMMARY *summary, DATA *data, int iter); void updateHist_alpha_prey(HISTOGRAM *hist, PRIOR *prior); void updateHist_alpha_IP(HISTOGRAM *hist, PRIOR *prior); void updateHist_mu(HISTOGRAM *hist, PRIOR *prior); void updateHist_eta(HISTOGRAM2 *hist, PRIOR *prior); void updateHist_eta0(HISTOGRAM2 *hist, PRIOR *prior); void updateHistogram(PARAM *param, PRIOR *prior, DATA *data, SUMMARY *summary); /* dpalphaprey.c */ void DP_alpha_prey_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_alpha_prey_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_alpha_prey_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_alpha_prey(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpalphaIP.c */ void DP_alpha_IP_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_alpha_IP_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_alpha_IP_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_alpha_IP(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_mu_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_mu_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_mu_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_mu(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_eta_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_eta_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_eta_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_eta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* dpmu.c */ void DP_eta0_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); void DP_eta0_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid); void DP_eta0_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int cid, int *inuse); void DP_eta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r); /* result.c */ void write_interactions(DATA *data, SUMMARY *summary); void write_unique_interactions(DATA *data, SUMMARY *summary); void write_prey(DATA *data, SUMMARY *summary); void write_IP(DATA *data, SUMMARY *summary); void write_bait(DATA *data, SUMMARY *summary); void write_histogram(FILE *fp, HISTOGRAM *hist); void write_histogram2(FILE *fp, HISTOGRAM2 *hist); void write_hyperprior(DATA *data, SUMMARY *summary); void write_result(DATA *data, SUMMARY *summary); void write_matrix_data(DATA *data, SUMMARY *summary); void write_matrix_data2(DATA *data, SUMMARY *summary); /**************** mmath.c ******************************/ float vec_sum(const float *vec, int len); float vec_max(const float *vec, int len); float vec_min(const float *vec, int len); float vec_mean(const float *vec, int len); float vec_var(const float *vec, int len); float vec_med(const float *vec, int len); float vec_mad(const float *vec, int len); float geometric_mean(float *x, int n); int ranMultinom(const gsl_rng *r, float *p, int K); SAINT_v2.3.4/src/SAINTint-ctrl/dpeta0.c0000666000000000000000000000717211746171473016035 0ustar rootroot#include "saint.h" /********* ALPHA_prey *********/ void DP_eta0_gamma(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i,j; int wsum[_MAX_COMP_]; int wrevsum[_MAX_COMP_]; float gammap[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) { wsum[i] = 0; wrevsum[i] = 0; } for(i=0;inprey;i++) (wsum[prior->w_eta0[i]])++; for(i=_MAX_COMP_-1;i>=0;i--) { for(j=i;j<_MAX_COMP_;j++) wrevsum[i] += wsum[j]; } for(i=0;i<_MAX_COMP_-1;i++) gammap[i] = gsl_ran_beta(r, (1.0 + (double) wsum[i]), ((double) prior->rho_eta0) + ((double) wrevsum[i+1])); gammap[_MAX_COMP_-1] = 1.0; prior->gamma_eta0[0] = gammap[0]; for(i=1;i<_MAX_COMP_;i++) { prior->gamma_eta0[i] = gammap[i]; for(j=0;jgamma_eta0[i] *= (1.0 - gammap[j]); } } void DP_eta0_w(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid) { int i,j,id; float cur_eta, tmp_lambda, maxl; float prob[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) prob[i] = log(prior->gamma_eta0[i]); cur_eta = param->eta0[pid]; for(i=0;i<_MAX_COMP_;i++) { for(j=0;jpreyNinter[pid];j++) { id = data->p2i[pid][j]; if(data->ctrl[data->i2IP[id]]) { tmp_lambda = param->lambda_false[id]; prob[i] += log_gaussian(data->d[id], (tmp_lambda), prior->theta_eta0[i]); } } } maxl = vec_max(prob, _MAX_COMP_); for(i=0;i<_MAX_COMP_;i++) prob[i] -= maxl; for(i=0;i<_MAX_COMP_;i++) prob[i] = exp(prob[i]); prior->w_eta0[pid] = ranMultinom(r, prob, _MAX_COMP_); param->eta0[pid] = prior->theta_eta0[prior->w_eta0[pid]]; } void DP_eta0_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) { int i, j, id, accept, pass; float Delta, mhratio, newval, scale, tmp_lambda; scale = prior->gamma_eta0[pid] / (1.0 - prior->gamma_eta0[pid]); if(inuse[pid] == 0) { pass = 0; while(!pass) { newval = 1.0 / gsl_ran_gamma(r, 100.0, 1.0); if(newval < 2.0) pass = 1; } Delta = newval - prior->theta_eta0[pid]; prior->theta_eta0[pid] = newval; } else { /* metropolis-hastings */ mhratio = 0.0; Delta = gsl_ran_gaussian(r, 0.1); if(prior->theta_eta0[pid] + Delta <= 0.0 || prior->theta_eta0[pid] + Delta > 2.0) { accept = 0; } else { for(i=0;inprey;i++) { if(prior->w_eta0[i] == pid) { for(j=0;jpreyNinter[i];j++) { id = data->p2i[i][j]; if(data->ctrl[data->i2IP[id]]) { tmp_lambda = param->lambda_false[id]; mhratio += log_gaussian(data->d[id], (tmp_lambda), prior->theta_eta0[pid]+Delta) - log_gaussian(data->d[id], (tmp_lambda), prior->theta_eta0[pid]); } } } } mhratio += log_inv_gamma( (prior->theta_eta0[pid]+Delta), prior->shape_eta0, prior->scale_eta0) - log_inv_gamma( prior->theta_eta0[pid], prior->shape_eta0, prior->scale_eta0); accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ; } /* if accepted, update param and lambda */ if(accept) { prior->theta_eta0[pid] += Delta; for(i=0;inprey;i++) { if(prior->w_eta0[i] == pid) { param->eta0[i] += Delta; } } } } } void DP_eta0(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r) { int i; int inuse[_MAX_COMP_]; for(i=0;i<_MAX_COMP_;i++) inuse[i] = 0; for(i=0;inprey;i++) inuse[prior->w_eta0[i]] = 1; DP_eta0_gamma(param, prior, data, r); for(i=0;inprey;i++) DP_eta0_w(param, prior, data, r, i); for(i=0;i<_MAX_COMP_;i++) DP_eta0_theta(param, prior, data, r, i, inuse); /* loglik update */ }