function lookAheadBin(hdrPath,root,cruise) %usage: binaverage(hdrPath,root,cruiseStrWithShipCode) % %analyzes a group of files, finding both their total line count and the %first and last time values. It then uses this to bin the data into 600 %second bins (10 minutes). %hdrPath should always be 'Z:\SCIMS Data and Analysis\corrected_hdr.csv' %size to be binned %default is 600; binsize = 600; %assume that the data is nominally spaced 30 seconds apart %this can be changed if necessary spacing = 30; %load header and create header mapping between name and column number header = textread(hdrPath,'%s','delimiter',','); headmap = cell2struct(num2cell(1:size(header,1)),header',2); fmtstr = '%8.0f,%06.0f,%1.0f,%3.6f,%3.6f,%2.3f,%2.3f,%1.3f,%1.3f\n'; %create index array of needed data indarr = [headmap.Date, headmap.Time, headmap.Elapsed_Time, ... headmap.Latitude, headmap.Longitude, headmap.TSG_Temp90, ... headmap.Salinity, headmap.CorrectedFluor, headmap.ChloroA]; indsz = size(indarr,2) + 1; %create the paths to the flags and the data datapath = fullfile(root,'CorrectedData'); %move to data root cd (datapath); %get list of data files files = ls('*corr_data.csv'); %begin analysis % lineTot = 0; % for i = 1:size(files,1) % nlines = length(textread(files(i,:),'%[^\n]')); % lineTot(i) = lineTot(i) + nlines; % end data = load('-ascii',fullfile(datapath,files(1,:))); bcVal(1,:) = data(1,1:3); data = load('-ascii',fullfile(datapath,files(end,:))); bcVal(2,:) = data(end,1:3); bc = [86400*datenum([num2str(bcVal(:,1),'%06.0f') num2str(lVal(:,2),'%06.0f')],'yyyymmddHHMMSS') ... bcVal(:,3)]; endPts = [bc(1,1):binsize:bc(2,1)]; %lop off the dates dates = files(:,1:6); %iterate over dates for day = 1:size(dates,1) data = load('-ascii',files(day,:)); data = data(:,indarr); %create the field, seconds since epoch. This allows us to average time %values data(:,end + 1) = datenum([num2str(data(:,1),'%06.0f') num2str(data(:,2),'%06.0f')], ... 'yyyymmddHHMMSS')*86400; flags = load('-ascii',fullfile(datapath,[dates(day,:) 'corr_flags.csv'])); flags = flags(:,[indarr headmap.Time end]); edges = find(endPts <= data(end,end)); if (count == 1) endPt = min(find(data(1,3) - data(:,3) + binsize < 0)) - 1; for j=1:size(data,2) meandata = data(1:endpt,j); fInd = (flags(:,j) == 1) | (flags(:,end) == 1); meanval(1,j) = sum(meandata(find,j))/sum(fInd); meanflag(1,j) = mode(flags(:,j) end avg_cnt = 1; bin_cnt = 1; meanval(1,:) = zeros(1,indsz); meanflag(1,:) = ones(1,indsz); goodvals = zeros(1,indsz); end for i=1:size(data,1) %the spacing/2 term is necessary for records that are very close to %the threshold value but are just shy (ie- we are looking for 145 %seconds and get 144s, but the next record is 175s.) if ((data(i,3) + spacing/2) >= threshold) %compute a new threshold, initETime, and initDateTime threshold = data(i,3) + binsize; %compute the goodvals metric (fraction of non-flagged values in %the average) goodvals = goodvals/avg_cnt; %if this metric is less than .5, then the data is (arbitrarily) %useless and will be flagged with a 9 meanflag(bin_cnt,(goodvals < .5)) = 9; avg_cnt = 1; bin_cnt = bin_cnt + 1; meanval(bin_cnt,:) = zeros(1,indsz); meanflag(bin_cnt,:) = ones(1,indsz); goodvals = zeros(1,indsz); end %we compute the averages here. Also here is where we skip %values if the value is flagged with anything but a 1. %we can generalize the averaging for all values in data for j=3:size(data,2) if ((flags(i,j) == 1) | (flags(i,end-1) == 1)) %flag is ok, calculate the mean %day*i is the universal counter for the dataset meanval(bin_cnt,j) = meanval(bin_cnt,j) + ... (data(i,j) - meanval(bin_cnt,j))/avg_cnt; goodvals(j) = goodvals(j) + 1; else flag(avg_cnt,j) = flags(i,j); meanflag(bin_cnt,j) = 3; end end avg_cnt = avg_cnt + 1; end count = count + 1; end %do some last minute processing on the last bin %compute the goodvals metric (fraction of non-flagged values in %the average) goodvals = goodvals/avg_cnt; %if this metric is less than .5, then the data is (arbitrarily) %useless and will be flagged with a 9 meanflag(bin_cnt,(goodvals < .5)) = 9; %write dates proper-like meanval(:,1) = str2num(datestr(meanval(:,end)/86400,'yyyymmdd')); meanflag(:,1) = meanflag(:,end); %write times proper-like meanval(:,2) = str2num(datestr(meanval(:,end)/86400,'HHMMSS')); %the time gets the same flag as the date since they were both flagged %at the same time meanflag(:,2) = meanflag(:,1); %create outfile name flagoutfile = [cruise '10minflags.csv']; dataoutfile = [cruise '10mindata.csv']; %now we need to check to make sure that if Date, Time or Lat and Long %values have a non 1 flag, all values get that flag for i=1:size(meanflag,1) if (sum(meanflag(i,[1 2 4 5])) ~= 4) %the sum of the values is not 4, therefore they aren't all 1 flags, %so we need to flag the row with the most meaninful flag of the %bunch (3 if there are only 3 flags, 9 otherwise). The corner case %is when you have a variable with a 9 flag but the "row" flag is %only 3, 9 is a more serious flag, so it should be retained. meanflag(i,(meanflag(i,:)~=9)) = max(meanflag(i,[1 2 4 5])); end end %moves to the cruise root for writing. cd ('..'); %open a filehandle for the output data file fid = fopen(dataoutfile,'wt'); %print out data fprintf(fid,fmtstr,meanval(:,1:end-1)'); %close the file fclose(fid); %write flags file csvwrite(flagoutfile,meanflag(:,1:end-1)); function secs = time2Seconds(time) %converts an integer-ized time to seconds since midnight x1 = floor(time/10000); x2 = floor((time - x1*10000)/100); x3 = ((time - x1*10000) - x2*100); secs = x1*3600 + x2*60 + x3;