/****************************************************************************** FILE: PROFSTAT.C USAGE: PROFSTAT CONTROL_FILE_NAME This is a set of routines for making statistical summaries of profile time series, especially ADCP data. For any variable selected, the mean, standard deviation, and standard error of the mean are calculated. A histogram is generated and output in any of four styles: raw, cumulative, normalized (divided by the number of points), and normalized-cumulative (which ranges from zero to one). This version calculates single variable statistics only. Before the statistics are calculated, the profile may be referenced to a bin range, and/or its first or second difference may be calculated. Each time range read from a list can be analyzed as an independent ensemble, or all the ranges can be combined into a single ensemble. If the profiles are to be analyzed by CODAS block, use \CODAS\UTIL\LSTBTIM (List Block Time) to generate a list of time ranges for each block. This set of routines may be used as a template for generating other programs that process CODAS profile data by time range. Useful extensions could include multivariate statistics, time series analysis, and an option to produce Matlab binary file output. Eric Firing 88-03-17 89-08-08: HUI ZHU 1) (Minor change) The ASCII output file is automatically given the extension .PRS 2) (Minor change) The control file prompts have been converted to one-word equivalents by using the underscore to connect words (specifically, step_size and time_ranges). 3) (Major addition) Matlab data file(s) is (are) written with the following items for each variable (VAR; for example U or V): ARRAYS: pstat_VAR: n x 4, with columns [Npts, depth, mean, std_dev] n is the number of rows with Npts > 0; rows with Npts = 0 are deleted from the Matlab file. ref_VAR: 1 x 2, with columns [first_bin, last_bin] (bin range of reference layer; if none was used, this will be [0,0].) Others: label_VAR: data label (as specified in control file) type_VAR : data name (as specified in control file) diff_VAR : difference (0, 1, or 2 for none, first, second dif.) j_npts: always 1; column with Npts in pstat_VAR j_depth 2 depth j_mean 3 mean j_std 4 std_dev If "time_ranges" are "combined", only one *.MAT file will be generated. Otherwise, a number of MAT files will be generated, one for each time range, with sequential file extension M01, M02, ... Thu 12-21-1989 Eric Firing: Flexibility added: if depth information is lacking, the array index will be substituted. Only adjust_depth() was modified, and there is no effect on the control file. Tue 12-26-89 JR: The program was updated so that it does not fail with search before beginning. Now that the large memory model is required anyway, we can open the database with directories in memory. Mon 07-02-90 JR: Program revised to use pressure if depth not available, and to use indexes if both are not available. Depth array initialized to BADFLOAT after allocating memory to make default to indexing work. Wed 08-15-90 JR: Program revised to use PROFILE_FLAGS, if available, to set database array(s) to BADFLOAT according to some mask specified in the control file. 96/01/25 EF: Revised to call get_data_list_index in dbget_n.c instead of using the fixed list in include/data_lst.h. As a result, profstat can now access any named array. ---------------------------------------------------------------- control file structure: dbname: output: step_size: ndepth: time_ranges: (*combined or separate*) (*variables list, terminated by an unrecognized data name (e.g. 'end'):*) (*option list, terminated by an unrecognized option (e.g. 'end'):*) [reference: to ] (*referencing range; negative for no referencing*) [difference: ] (*n=0, 1, or 2 for none, first, second difference*) (*either or both of the following:*) [statistics: scale= ] (*e.g., 0.01 for output in cm instead of m*) [histogram: nbins= origin= increment= style= ] (*style is a string with one or two lower case characters; the presence of n and/or c indicates that the display will be normalized and/or cumulative; use h if neither option is desired.*) [flag_mask: ... end] (*bit-mask is any one or more of those defined in profmask.h; these cumulatively indicate which criteria to consider in flagging profile bins as bad prior to calculations*) (*list of YMDHMS time pairs:*) to . . ******************************************************************************/ #include "geninc.h" /* DB*() */ #include "data_id.h" /* DEPTH, ... */ #include "ioserv.h" /* check_error() */ #include "vector.h" #include "use_db.h" /* get_time_range(), print_time_range(), ... */ #include "vstat.h" #include "matfile.h" #include "profmask.h" /* PROFILE_FLAGS mask #defines */ #define NVAR 10 /* max number of datatypes for which stats are calculated */ #define TRUE 1 #define FALSE 0 /* prototypes for functions in this file */ void take_difference(int i, float *data, int n); void adjust_depth(int i, float *depth, float *depth_a, int n); void create_mat_file(FILE *fpt, UNISTAT_TYPE *s, float *d, int bin_1st, int bin_last, char *data_type, int *diff); void initialize_ensemble(void); int get_depth(void); int get_pressure(void); void increment_ensemble(void); void calculate_ensemble(void); void print_ensemble(void); /* global variables for this file */ static FILE *fp_out, *fp_mat; static FILE_NAME_TYPE dbname, out_filename, mat_filename; static int steps, ierr, type; static int save_j_flag; /* 1, to save j_npts,j_depth,j_mean and j_std in MAT file; 0, otherwise. */ static unsigned int n; static int data_id[NVAR+1]; /* list of data types by ID */ static char data_type[NVAR+1][20]; /* list of data types */ static HISTOGRAM_TYPE hist[NVAR]; static UNISTAT_TYPE ustat[NVAR]; static int combined; /* flag: 1 if all in one ensemble */ static int mflag[NVAR]; /* 1 if mean, std dev are desired */ static int hflag[NVAR]; /* 1 if histogram is desired */ static int nvariables; /* number of variables analyzed */ static int ndepth; /* max number of depth bins to process */ static int difference[NVAR]; /* 0th, 1st, or 2nd difference: flag */ static int ref_bins[NVAR][2]; /* bin range for reference calc. */ static UBYTE flag_mask[NVAR]; /* profile flags mask */ static float *data; static UBYTE *profile_flags; static float *depth; /* allocated for ndepth elements */ static float *adj_depth; /* adjusted when taking differences */ static char h_style[NVAR][3]; /* printout style for each variable */ static double stat_scale[NVAR]; /* scale for stat printout */ static YMDHMS_TIME_TYPE start, end, /* for time range */ ptime; /* current profile time */ /* ----------------------------------------------------------------------- */ void create_mat_file(FILE *fpt, UNISTAT_TYPE *s, float *d, int bin_1st, int bin_last, char *data_type, int *diff) { double *std_dev, *temp_n, *temp_dep, *temp_sum, temp_double[2]; int i, j1=1, j2=2, j3=3, j4=4, k, bin_range[2]; static MAT_ARRAY_FP_TYPE *mat_pstat_ptr; char pstatname[20], refname[20], labname[20], typename[20], diffname[20]; strcpy(pstatname, "pstat_"); strcpy(refname, "ref_"); strcpy(labname, "label_"); strcpy(typename, "type_"); strcpy(diffname, "diff_"); strncat(pstatname,data_type,13); strncat(refname,data_type,13); strncat(labname,data_type,13); strncat(typename,data_type,13); strncat(diffname,data_type,13); if (bin_1st < 0) bin_range[0] = bin_range[1] = 0; else { bin_range[0] = bin_1st+1; bin_range[1] = bin_last+1; } check_error( ((std_dev = (double *) calloc(s->n, sizeof(double))) == NULL), "allocating memory"); check_error( ((temp_sum = (double *) calloc(s->n, sizeof(double))) == NULL), "allocating memory"); check_error( ((temp_n = (double *) calloc(s->n, sizeof(double))) == NULL), "allocating memory"); check_error( ((temp_dep = (double *) calloc(s->n, sizeof(double))) == NULL), "allocating memory"); k = 0; for(i = 0; i < s->n; i++) { if((s->npts)[i] > 0) { /* keep those values with npts > 0. */ *(std_dev+k) = sqrt((s->sumsq)[i]); *(temp_sum+k) = s->sum[i]; *(temp_n+k) = (double) (s->npts)[i]; *(temp_dep+k) = (double) d[i]; k++; } } if(k > 0) { mat_pstat_ptr = start_mat_array(fpt, 'd',pstatname,'c', k); add_to_mat_array(mat_pstat_ptr, (char *)temp_n); add_to_mat_array(mat_pstat_ptr, (char *)temp_dep); add_to_mat_array(mat_pstat_ptr, (char *)temp_sum); add_to_mat_array(mat_pstat_ptr, (char *)std_dev); end_mat_array(mat_pstat_ptr); /* 91/11/18 jr+: save all non-double as double in mat-file */ temp_double[0] = (double) bin_range[0]; temp_double[1] = (double) bin_range[1]; savemat(fpt, 'd', refname, 1, 2, 0, (char *)temp_double, (char *)NULL); savemat(fpt, 't', labname, 1, 1, 0, (char *)s->name, (char *)NULL); savemat(fpt, 't', typename, 1, 1, 0, (char *)data_type, (char *)NULL); temp_double[0] = (double) *diff; savemat(fpt, 'd', diffname, 1, 1, 0, (char *)temp_double, (char *)NULL); if (save_j_flag) { temp_double[0] = (double) j1; savemat(fpt, 'd', "j_npts",1, 1, 0, (char *)temp_double, (char *)NULL); temp_double[0] = (double) j2; savemat(fpt, 'd', "j_depth",1, 1, 0, (char *)temp_double, (char *)NULL); temp_double[0] = (double) j3; savemat(fpt, 'd', "j_mean",1, 1, 0, (char *)temp_double, (char *)NULL); temp_double[0] = (double) j4; savemat(fpt, 'd', "j_std",1, 1, 0, (char *)temp_double, (char *)NULL); } } free(std_dev); free(temp_sum); free(temp_n); free(temp_dep); } /* In these two functions, take_difference and adjust_depth, the first argument gives the order of differencing to be done: 0 for none, 1 for first, 2 for second. */ void take_difference(int i, float *data, int n) { switch (i) { case 0: break; case 1: dif1(data, data, n); break; case 2: dif2(data, data, n); break; default: check_error( i, "Invalid difference flag."); } return; } /* take_difference */ void adjust_depth(int i, float *depth, float *depth_a, int n) { int j; switch (i) { case 0: for (j=0; j= ADJ_BADFLOAT) return(0); return(n); } /* get_depth */ int get_pressure(void) /* jr+ */ { int i; type = P; /* assuming it is the same throughout the range */ n = ndepth; DBGET_F(&type, depth, &n, &ierr); check_error( (ierr && (ierr != INVALID_TYPE_REQUEST)), "getting pressure" ); for (i = 0; i= ADJ_BADFLOAT) return(0); return(n); } /* get_pressure */ void initialize_ensemble(void) { int i; for (i=0; i UBYTE */ int nd, id; FILE *fp_cnt; fp_cnt = get_fpcnt(argc, argv); check_error(get_parameters(fp_cnt, profstat_params, profstat_name_lists), "getting profstat parameters"); check_dbopen(1, dbname, READ_ONLY, DIR_IN_MEMORY); printf("\n\n DATABASE: %s SUCCESSFULLY OPENED", dbname); new_extension(prs_filename, out_filename, ".prs"); fp_out = check_fopen(prs_filename,"w"); depth = (float *)calloc(ndepth, sizeof(float)); for (i = 0; i < ndepth; i++) depth[i] = BADFLOAT; /* jr+ */ adj_depth = (float *)calloc(ndepth, sizeof(float)); data = (float *)calloc(ndepth, sizeof(float)); profile_flags = (UBYTE *)calloc(ndepth, sizeof(UBYTE)); check_error( (data == NULL || depth == NULL || adj_depth == NULL), "Not enough memory for depth or data arrays."); /* ---> Turn off all the option flags. */ for (i=0; i Read the list of variable id's, labels, and options. */ for (i=0, finished=0; i Read the list of options for each variable. */ end_options = 0; while (!end_options) /* process an option */ { check_error( (fscanf(fp_cnt, " %19s", option) != 1), "reading data name or operation"); if (!strcmp(option, "reference:")) { check_error( (fscanf(fp_cnt, " %d to %d", &(ref_bins[i][0]), &(ref_bins[i][1])) !=2), "Can't read ref bin range"); (ref_bins[i][0])--; /* bin 1 is element 0, etc. */ (ref_bins[i][1])--; } else if (!strcmp(option, "difference:")) { check_error( (fscanf(fp_cnt, " %d", difference+i) != 1), "Difference flag is missing"); check_error( (difference[i]<0 || difference[i]>2), "Difference must be 0, 1, or 2."); } else if (!strcmp(option, "statistics:")) { check_error( (fscanf(fp_cnt, " scale= %lf", stat_scale + i) != 1), "reading scale"); mflag[i] = 1; allocate_unistat(ustat+i, ndepth, title, U_ALL); } else if (!strcmp(option, "histogram:")) { int nbins; check_error( (fscanf(fp_cnt, " nbins= %d origin= %lf increment= %lf style= %2s", &(nbins), &(hist[i].origin), &(hist[i].bin_width), &(h_style[i][0]) ) != 4), "reading histogram parameters"); hflag[i] = 1; allocate_histogram(hist+i, ndepth, nbins, title); } else if (!strcmp(option, "flag_mask:")) { flag_mask[i] = 0; /* reset to cumulate instead of default */ do { check_error( !fscanf(fp_cnt, " %19s", flag_name), "reading flag_mask"); if (!strcmp(flag_name, "end")) break; /* normal loop exit */ check_error( (BADINT == (bit_flag = get_code(profmask_list, flag_name))), "unrecognized flag_mask"); flag_mask[i] |= (UBYTE) bit_flag; /* cumulate flag masks bit-by-bit */ } while (1); } else if (!strcmp(option, "end")) end_options = 1; else check_error( 1, "unrecognized option"); } /* finished processing the option */ } /* all options processed for that variable */ } /* end of loop through variable list */ check_error( (!finished), "Too many variables or missing list terminator."); nvariables = i-1; /* ---> Start the loop through time ranges listed in the control file. */ initialize_ensemble(); while (get_time_range(fp_cnt, &start, &end) == 1) { DBSRCH(&search_mode, (char *)&start, &ierr); check_error( ( (ierr) && (ierr != SEARCH_BEFORE_BEGINNING) ), "DBSRCH" ); if(combined) print_time_range(fp_out, &start, &end); nd = get_depth(); if (nd == 0) nd = get_pressure(); if (nd != ndepth) { for (id = 0; id < ndepth; id++) depth[id] = id + 1; } if (!combined) initialize_ensemble(); /* ---> Start the loop through profiles within the time range. */ /* printf ("\n ierr = %d\n", ierr); */ ierr = 0; while (!ierr && check_time(&ptime, &start, &end)) { fprintf(stdout, "\n"); write_ymdhms_time(stdout, &ptime); increment_ensemble(); DBMOVE(&steps, &ierr); /* ierr is checked at the start of the loop */ } /* end of loop through profiles within a time range */ if (!combined) { nt_range++; sprintf(matext,".m%02d",nt_range); new_extension(mat_filename, out_filename, matext); fp_mat = check_fopen(mat_filename, "wb"); calculate_ensemble(); print_ensemble(); fclose(fp_mat); } } /* end of loop through time ranges read from control file */ if (combined) { new_extension(mat_filename, out_filename, ".mat"); fp_mat = check_fopen(mat_filename, "wb"); calculate_ensemble(); print_ensemble(); fclose(fp_mat); } fclose(fp_out); DBCLOSE(&ierr); check_error(ierr, "DBCLOSE"); free(depth); free(adj_depth); free(data); for (i=0; i