Adds length assertion and logic for dynamic cols (0c47eff6) · Commits · aflab / astrogeology / Plio

plio/io/io_gpf.py

+14 −267

Original line number	Diff line number	Diff line
		@@ -24,6 +24,17 @@ def read_gpf(input_data):
		containing the gfp data with appropriate column names and indices
		"""

		# Check that the number of rows is matching the expected number
		with open(input_data, 'r') as f:
		for i, l in enumerate(f):
		if i == 1:
		cnt = int(l)
		elif i == 2:
		col = l
		break



		# Mixed types requires read as unicode - let pandas soft convert
		d = np.genfromtxt(input_data, skip_header=3, dtype='unicode')
		d = d.reshape(-1, 12)
		@@ -36,272 +47,8 @@ def read_gpf(input_data):
		'res0', 'res1', 'res2'])
		# Soft conversion of numeric types to numerics
		df = df.apply(pd.to_numeric, errors='ignore')
		return df




		# def GPF_SAV(input_data, ave=True):
		# # read the IDL .SAV file

		# data = io.readsav(input_data, python_dict=True)

		# # put the spectra into data frames and combine them
		# df_UV = pd.DataFrame(data['uv'], index=data['defuv'])
		# df_VIS = pd.DataFrame(data['vis'], index=data['defvis'])
		# df_VNIR = pd.DataFrame(data['vnir'], index=data['defvnir'])
		# df_spect = pd.concat([df_UV, df_VIS, df_VNIR])
		# df_spect.columns = ['shot' + str(i + 1) for i in
		# df_spect.columns] # add 1 to the columns so they correspond to shot number

		# df_aUV = pd.DataFrame(data['auv'], index=data['defuv'], columns=['average'])
		# df_aVIS = pd.DataFrame(data['avis'], index=data['defvis'], columns=['average'])
		# df_aVNIR = pd.DataFrame(data['avnir'], index=data['defvnir'], columns=['average'])
		# df_ave = pd.concat([df_aUV, df_aVIS, df_aVNIR])

		# df_mUV = pd.DataFrame(data['muv'], index=data['defuv'], columns=['median'])
		# df_mVIS = pd.DataFrame(data['mvis'], index=data['defvis'], columns=['median'])
		# df_mVNIR = pd.DataFrame(data['mvnir'], index=data['defvnir'], columns=['median'])
		# df_med = pd.concat([df_mUV, df_mVIS, df_mVNIR])

		# df = pd.concat([df_spect, df_ave, df_med], axis=1)
		# # create multiindex to access wavelength values
		# # also, round the wavlength values to a more reasonable level of precision
		# df.index = [['wvl'] * len(df.index), df.index.values.round(4)]
		# # transpose so that spectra are rows rather than columns
		# df = df.T

		# # extract metadata from the file name and add it to the data frame
		# # use the multiindex label "meta" for all metadata

		# fname = os.path.basename(input_data)

		# # for some reason, some ChemCam files have the 'darkname' key, others call it 'darkspect'
		# # this try-except pair converts to 'darkname' when needed
		# try:
		# data['darkname']
		# except:
		# data['darkname'] = data['darkspec']

		# metadata = [fname,
		# fname[4:13],
		# fname[25:34].upper(),
		# fname[34:36],
		# data['continuumvismin'],
		# data['continuumvnirmin'],
		# data['continuumuvmin'],
		# data['continuumvnirend'],
		# data['distt'],
		# data['darkname'],
		# data['nshots'],
		# data['dnoiseiter'],
		# data['dnoisesig'],
		# data['matchedfilter']]
		# metadata = np.tile(metadata, (len(df.index), 1))
		# metadata_cols = list(zip(['meta'] * len(df.index), ['file',
		# 'sclock',
		# 'seqid',
		# 'Pversion',
		# 'continuumvismin',
		# 'continuumvnirmin',
		# 'continuumuvmin',
		# 'continuumvnirend',
		# 'distt',
		# 'dark',
		# 'nshots',
		# 'dnoiseiter',
		# 'dnoisesig',
		# 'matchedfilter']))
		# metadata = pd.DataFrame(metadata, columns=pd.MultiIndex.from_tuples(metadata_cols), index=df.index)

		# df = pd.concat([metadata, df], axis=1)
		# if ave == True:
		# df = df.loc['average']
		# df = df.to_frame().T
		# else:
		# pass

		# return df


		# def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None):
		# # Determine if the file is a .csv or .SAV
		# if '.sav' in searchstring.lower():
		# is_sav = True
		# else:
		# is_sav = False
		# filelist = file_search(directory, searchstring)
		# basenames = np.zeros_like(filelist)
		# sclocks = np.zeros_like(filelist)
		# P_version = np.zeros_like(filelist, dtype='int')

		# # Extract the sclock and version for each file and ensure that only one
		# # file per sclock is being read, and that it is the one with the highest version number
		# for i, name in enumerate(filelist):
		# basenames[i] = os.path.basename(name)
		# sclocks[i] = basenames[i][4:13] # extract the sclock
		# P_version[i] = basenames[i][-5:-4] # extract the version

		# sclocks_unique = np.unique(sclocks) # find unique sclocks
		# filelist_new = np.array([], dtype='str')
		# for i in sclocks_unique:
		# match = (sclocks == i) # find all instances with matching sclocks
		# maxP = P_version[match] == max(P_version[match]) # find the highest version among these files
		# filelist_new = np.append(filelist_new, filelist[match][maxP]) # keep only the file with thei highest version

		# filelist = filelist_new
		# # Should add a progress bar for importing large numbers of files
		# dt = []

		# for i, file in enumerate(filelist):
		# print(file)
		# if is_sav:
		# tmp = CCAM_SAV(file, ave=ave)
		# else:
		# tmp = CCAM_CSV(file, ave=ave)
		# if i == 0:
		# combined = tmp
		# else:
		# # This ensures that rounding errors are not causing mismatches in columns
		# cols1 = list(combined['wvl'].columns)
		# cols2 = list(tmp['wvl'].columns)
		# if set(cols1) == set(cols2):
		# combined = pd.concat([combined, tmp])
		# else:
		# print("Wavelengths don't match!")

		# combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')])

		# if lookupfile is not None:

		# combined = lookup(combined, lookupfile=lookupfile.replace('[','').replace(']','').replace("'",'').replace(' ','').split(','))
		# if to_csv is not None:
		# combined.to_csv(to_csv)
		# return combined

		# Validate the read data with the header point count
		assert int(cnt) == len(df)


		# main(int argc, char *argv[])
		# {

		# char gpfFile[FILELEN];
		# char csvFile[FILELEN];
		# char pointIDsFile[FILELEN];

		# FILE *gpfFp; // file pointer to input csv file
		# FILE *csvFp; // file pointer to output csv file
		# FILE *ptsFp; // file pointer to output point ids list file

		# char gpfLine[LINELENGTH];

		# // check number of command line args and issue help if needed
		# //-----------------------------------------------------------
		# if (argc != 2) {
		# printf ("\nrun %s as follows:\n",argv[0]);
		# printf (" %s SSgpfFile\n",
		# argv[0]);
		# printf ("\nwhere:\n");
		# printf (" SSgpfFile = Socet Set *.gpf file, from a geographic project\n\n");
		# printf (" This program will convert a Socet Set ground point file into a CSV\n");
		# printf (" of lat,lon,height. The output file will have the same core name\n");
		# printf (" of the input *.gpf file, but with a .csv extension\n\n");
		# printf (" Also output is the list of point IDs that were converted. This file\n");
		# printf (" will be used to port the points back to Socet Set later on. The output\n");
		# printf (" file will have the same core name as the input file, but with a .pointids\n");
		# printf (" .tiePointIds.txt extension.\n");
		# exit(1);
		# }

		# //------------------------------------------------
		# // get input arguments entered at the command line
		# //------------------------------------------------

		# strcpy (gpfFile,argv[1]);

		# //-----------------------------
		# // generate ouput file names
		# //-----------------------------

		# char corename[FILELEN];
		# strcpy (corename,gpfFile);
		# int len = strlen(corename);
		# corename[len-4] = '\0';

		# strcpy (csvFile,corename);
		# strcat (csvFile,".csv");

		# strcpy (pointIDsFile,corename);
		# strcat (pointIDsFile,".tiePointIds.txt");

		# /////////////////////////////////////////////////////////////////////////////
		# // open files
		# /////////////////////////////////////////////////////////////////////////////

		# gpfFp = fopen (gpfFile,"r");
		# if (gpfFp == NULL) {
		# printf ("unable to open input gpf file: %s\n",gpfFile);
		# exit (1);
		# }

		# csvFp = fopen (csvFile,"w");
		# if (csvFp == NULL) {
		# printf ("unable to open output csv file: %s\n",csvFile);
		# fclose(gpfFp);
		# exit (1);
		# }

		# ptsFp = fopen (pointIDsFile,"w");
		# if (csvFp == NULL) {
		# printf ("unable to open output list file of tie point ids: %s\n",pointIDsFile);
		# fclose(gpfFp);
		# fclose(csvFp);
		# exit (1);
		# }

		# //------------------------------------------------
		# //------------------------------------------------

		# // skip the header
		# fgets(gpfLine,LINELENGTH,gpfFp);

		# // read in number of points in *.gpf file
		# char value[50];
		# fgets(gpfLine,LINELENGTH,gpfFp);
		# sscanf (gpfLine,"%s",value);
		# int numpts = atoi(value);

		# // skip next header
		# fgets(gpfLine,LINELENGTH,gpfFp);

		# // Parse gpf, output csv
		# char pointID[50], valStat[2], valKnown[2];
		# char valLon[50], valLat[50], Height[50];
		# double rad2dd = 57.295779513082320876798154814105;

		# for (int i=0; i<numpts; i++) {
		# fgets(gpfLine,LINELENGTH,gpfFp);
		# sscanf (gpfLine,"%s %s %s",pointID,valStat,valKnown);
		# int stat = atoi(valStat);
		# int known = atoi(valKnown);

		# // get coordinate
		# fgets(gpfLine,LINELENGTH,gpfFp);

		# //only output tie points that are on
		# if (stat == 1 && known == 0) {
		# sscanf (gpfLine,"%s %s %s",valLat,valLon,Height);
		# double radLat = atof(valLat);
		# double radLon = atof(valLon);
		# fprintf(csvFp,"%.14lf,%.14lf,%s\n",rad2ddradLat,rad2ddradLon,Height);
		# fprintf(ptsFp,"%s\n",pointID);
		# }

		# // skip next three lines in input *.gpf file
		# for (int j=0; j<3; j++)
		# fgets(gpfLine,LINELENGTH,gpfFp);
		# }

		# fclose(gpfFp);
		# fclose(csvFp);

		# } // end of program
		return df