/********************************************************* DEDUP source code and documentation copyright 1995,1996,1997 Stephan Pelikan, Steven H. Rogstad and the University of Cincinnati. We're giving this program away. You alone must decide if it is correct and suitable for your purposes. If you do use it, you do so at your own risk. You may redistribute the source code or executable versions of this program provided that you 1) include this copyright notice, 2) do not charge money for it, and 3) distribute the program without modifications. **********************************************************/ /* CHANGES and HISTORY */ //#include //not needed with include of fstream.h #include #include #include #include #include #include "arrays.h" //#include "maths.h" #include "llist.h" #define MIN(A,B) ((A <= B)? A:B) #define PROGRAM_NAME "DEDUP" #define VERSION "0.1" #define BUILD_DATE "14 AUG 97" /*********************************** ***********************************/ /*********************************** some global variables where data is stored **************************************/ int N,G,P; imatrix data; /* iVector number_of_bands; matrix frequency_of_bands; Vector avg_number_bands; iVector number_in_group; */ /*********************************** FORMAT OF DATA FILE There are N "population" bands and P lanes. The data is grouped into G groups. The data is stored as an ascii file with N+1 rows and P columns that follow a line on which N, P, and G appear (in that order). The first row is used to specify which group the lane belongs to. You must include the first row. If you don't want to define subgroups, make G=1 and the first row all 0's. The groups are identified by consecutive integers starting with 0 and ending with G-1. For this program there are three groups, offspring (group 0) maternal parent (group 1) and potential paternal parent (group 2). All the program does is this: Read in the data, generate a list of distinct columns in the data, and write a new dataset consisting of the distinct columns to standard out. ***********************************/ /* read_data() is called by process_command_line() It reads in data from fstream f, tests it for validity, stores the dimensions of the data (N,P,G) in global variables, and returns an imatrix of the data. */ imatrix read_data(fstream& f,int& N,int& P,int& G) { int i,j; f >> N >> P >> G; imatrix data(N+1,P); for(i=0;i> data[i][j]; if(f.eof()) { cerr << "Problem in read_data(): can't seem to read enough data" << endl; cerr << "EOF occurred before entire data array was read." << endl; exit(37); } } } cout << "Here's the data I've just read\n"; cout << "\n\n-----------------------------------------------\n"; for(i=0;i 1) { cerr << "Problem: your data file contains values other that 0 and 1\n"; cerr << "For example, data["<< i << "][" << j << "] = " << data[i][j] << endl; cerr << "Check this.\n"; exit(7); } } } /// Check to make sure labels for groups in first row of data array are okay. for(j=0;j<(P);j++) { if(data[0][j] <0 || data[0][j] >(G)-1) { cerr << "Problem: your data file mentions more groups than expected\n"; cerr << "With G = " << G << " only the values 0,...," << (G-1) << " should appear in\n"; cerr << "the first row of data. But I found data[0][" << j << "]=" << data[0][j] << endl; cerr << "You'd better fix it before running the program on this data\n"; exit(7); } } return data; } void process_command_line(int argc,char *argv[]) { cerr << PROGRAM_NAME << " version " << VERSION << " built on " << BUILD_DATE << endl; if(argc < 2) { cout << "\n----------------------------------------------------------\n"; cout << "Use this program by giving the command "<< endl; cout << argv[0] << " " << endl; cout << "\n----------------------------------------------------------\n"; exit( 0); } fstream fin(argv[1],ios::in); if( fin.fail()) { cout << "Sorry: can't seem to open file " << argv[1] << endl;; exit(11); } cout << "Output from program " << PROGRAM_NAME << " version " << VERSION << " built " << BUILD_DATE << endl;; cout << "Reading data from file: " << argv[1] << endl; data=read_data(fin,N,P,G); fin.close(); return; } int main(int argc, char *argv[]) { int l0,i,j, equal,notequal,l,k; LList Good,Bad; cout << setiosflags(ios::showpoint| ios::fixed| ios::left) ; cout << setprecision(4); process_command_line(argc,argv) ; Good.Insert(0); for(l0=1;l0=0 ) cout << data[i][j] << " "; } cout << endl; } cout << "\n\nThis dataset generated from " << argv[1] << endl; cout << " by eliminating these columns:" << Bad << endl; if(Bad.Length()>0) { cout << "Don't forget to check the number of groups since some"<