Commit 0c47eff6 authored by jay's avatar jay
Browse files

Adds length assertion and logic for dynamic cols

parent 83136d03
Loading
Loading
Loading
Loading
+14 −267
Original line number Diff line number Diff line
@@ -24,6 +24,17 @@ def read_gpf(input_data):
         containing the gfp data with appropriate column names and indices
    """

    # Check that the number of rows is matching the expected number
    with open(input_data, 'r') as f:
        for i, l in enumerate(f):
            if i == 1:
                cnt = int(l)
            elif i == 2:
                col = l
                break



    # Mixed types requires read as unicode - let pandas soft convert
    d = np.genfromtxt(input_data, skip_header=3, dtype='unicode')
    d = d.reshape(-1, 12)
@@ -36,272 +47,8 @@ def read_gpf(input_data):
                              'res0', 'res1', 'res2'])
    # Soft conversion of numeric types to numerics
    df = df.apply(pd.to_numeric, errors='ignore')
    return df




# def GPF_SAV(input_data, ave=True):
    # # read the IDL .SAV file

    # data = io.readsav(input_data, python_dict=True)

    # # put the spectra into data frames and combine them
    # df_UV = pd.DataFrame(data['uv'], index=data['defuv'])
    # df_VIS = pd.DataFrame(data['vis'], index=data['defvis'])
    # df_VNIR = pd.DataFrame(data['vnir'], index=data['defvnir'])
    # df_spect = pd.concat([df_UV, df_VIS, df_VNIR])
    # df_spect.columns = ['shot' + str(i + 1) for i in
                        # df_spect.columns]  # add 1 to the columns so they correspond to shot number

    # df_aUV = pd.DataFrame(data['auv'], index=data['defuv'], columns=['average'])
    # df_aVIS = pd.DataFrame(data['avis'], index=data['defvis'], columns=['average'])
    # df_aVNIR = pd.DataFrame(data['avnir'], index=data['defvnir'], columns=['average'])
    # df_ave = pd.concat([df_aUV, df_aVIS, df_aVNIR])

    # df_mUV = pd.DataFrame(data['muv'], index=data['defuv'], columns=['median'])
    # df_mVIS = pd.DataFrame(data['mvis'], index=data['defvis'], columns=['median'])
    # df_mVNIR = pd.DataFrame(data['mvnir'], index=data['defvnir'], columns=['median'])
    # df_med = pd.concat([df_mUV, df_mVIS, df_mVNIR])

    # df = pd.concat([df_spect, df_ave, df_med], axis=1)
    # # create multiindex to access wavelength values
    # # also, round the wavlength values to a more reasonable level of precision
    # df.index = [['wvl'] * len(df.index), df.index.values.round(4)]
    # # transpose so that spectra are rows rather than columns
    # df = df.T

    # # extract metadata from the file name and add it to the data frame
    # # use the multiindex label "meta" for all metadata

    # fname = os.path.basename(input_data)

    # # for some reason, some ChemCam files have the 'darkname' key, others call it 'darkspect'
    # # this try-except pair converts to 'darkname' when needed
    # try:
        # data['darkname']
    # except:
        # data['darkname'] = data['darkspec']

    # metadata = [fname,
                # fname[4:13],
                # fname[25:34].upper(),
                # fname[34:36],
                # data['continuumvismin'],
                # data['continuumvnirmin'],
                # data['continuumuvmin'],
                # data['continuumvnirend'],
                # data['distt'],
                # data['darkname'],
                # data['nshots'],
                # data['dnoiseiter'],
                # data['dnoisesig'],
                # data['matchedfilter']]
    # metadata = np.tile(metadata, (len(df.index), 1))
    # metadata_cols = list(zip(['meta'] * len(df.index), ['file',
                                                        # 'sclock',
                                                        # 'seqid',
                                                        # 'Pversion',
                                                        # 'continuumvismin',
                                                        # 'continuumvnirmin',
                                                        # 'continuumuvmin',
                                                        # 'continuumvnirend',
                                                        # 'distt',
                                                        # 'dark',
                                                        # 'nshots',
                                                        # 'dnoiseiter',
                                                        # 'dnoisesig',
                                                        # 'matchedfilter']))
    # metadata = pd.DataFrame(metadata, columns=pd.MultiIndex.from_tuples(metadata_cols), index=df.index)

    # df = pd.concat([metadata, df], axis=1)
    # if ave == True:
        # df = df.loc['average']
        # df = df.to_frame().T
    # else:
        # pass

    # return df


# def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None):
    # # Determine if the file is a .csv or .SAV
    # if '.sav' in searchstring.lower():
        # is_sav = True
    # else:
        # is_sav = False
    # filelist = file_search(directory, searchstring)
    # basenames = np.zeros_like(filelist)
    # sclocks = np.zeros_like(filelist)
    # P_version = np.zeros_like(filelist, dtype='int')

    # # Extract the sclock and version for each file and ensure that only one
    # # file per sclock is being read, and that it is the one with the highest version number
    # for i, name in enumerate(filelist):
        # basenames[i] = os.path.basename(name)
        # sclocks[i] = basenames[i][4:13]  # extract the sclock
        # P_version[i] = basenames[i][-5:-4]  # extract the version

    # sclocks_unique = np.unique(sclocks)  # find unique sclocks
    # filelist_new = np.array([], dtype='str')
    # for i in sclocks_unique:
        # match = (sclocks == i)  # find all instances with matching sclocks
        # maxP = P_version[match] == max(P_version[match])  # find the highest version among these files
        # filelist_new = np.append(filelist_new, filelist[match][maxP])  # keep only the file with thei highest version

    # filelist = filelist_new
    # # Should add a progress bar for importing large numbers of files
    # dt = []

    # for i, file in enumerate(filelist):
        # print(file)
        # if is_sav:
            # tmp = CCAM_SAV(file, ave=ave)
        # else:
            # tmp = CCAM_CSV(file, ave=ave)
        # if i == 0:
            # combined = tmp
        # else:
            # # This ensures that rounding errors are not causing mismatches in columns
            # cols1 = list(combined['wvl'].columns)
            # cols2 = list(tmp['wvl'].columns)
            # if set(cols1) == set(cols2):
                # combined = pd.concat([combined, tmp])
            # else:
                # print("Wavelengths don't match!")

    # combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')])

    # if lookupfile is not None:

        # combined = lookup(combined, lookupfile=lookupfile.replace('[','').replace(']','').replace("'",'').replace(' ','').split(','))
    # if to_csv is not None:
        # combined.to_csv(to_csv)
    # return combined

    # Validate the read data with the header point count
    assert int(cnt) == len(df)


# main(int argc, char *argv[])
# {

  # char     gpfFile[FILELEN];
  # char     csvFile[FILELEN];
  # char     pointIDsFile[FILELEN];

  # FILE     *gpfFp;     // file pointer to input csv file
  # FILE     *csvFp;     // file pointer to output csv file
  # FILE     *ptsFp;     // file pointer to output point ids list file

  # char     gpfLine[LINELENGTH];

  # // check number of command line args and issue help if needed
  # //-----------------------------------------------------------
  # if (argc != 2) {
     # printf ("\nrun %s as follows:\n",argv[0]);
     # printf ("   %s SSgpfFile\n",
             # argv[0]);
     # printf ("\nwhere:\n");
     # printf ("  SSgpfFile = Socet Set *.gpf file, from a geographic project\n\n");
     # printf ("  This program will convert a Socet Set ground point file into a CSV\n");
     # printf ("  of lat,lon,height.  The output file will have the same core name\n");
     # printf ("  of the input *.gpf file, but with a .csv extension\n\n");
     # printf ("  Also output is the list of point IDs that were converted.  This file\n");
     # printf ("  will be used to port the points back to Socet Set later on.  The output\n");
     # printf ("  file will have the same core name as the input file, but with a .pointids\n");
     # printf ("  .tiePointIds.txt extension.\n");
     # exit(1);
  # }

  # //------------------------------------------------
  # // get input arguments entered at the command line
  # //------------------------------------------------

  # strcpy (gpfFile,argv[1]);

  # //-----------------------------
  # // generate ouput file names
  # //-----------------------------

  # char corename[FILELEN];
  # strcpy (corename,gpfFile);
  # int len = strlen(corename);
  # corename[len-4] = '\0';

  # strcpy (csvFile,corename);
  # strcat (csvFile,".csv");

  # strcpy (pointIDsFile,corename);
  # strcat (pointIDsFile,".tiePointIds.txt");

  # /////////////////////////////////////////////////////////////////////////////
  # // open files
  # /////////////////////////////////////////////////////////////////////////////

  # gpfFp = fopen (gpfFile,"r");
  # if (gpfFp == NULL) {
    # printf ("unable to open input gpf file: %s\n",gpfFile);
    # exit (1);
  # }

  # csvFp = fopen (csvFile,"w");
  # if (csvFp == NULL) {
    # printf ("unable to open output csv file: %s\n",csvFile);
    # fclose(gpfFp);
    # exit (1);
  # }

  # ptsFp = fopen (pointIDsFile,"w");
  # if (csvFp == NULL) {
    # printf ("unable to open output list file of tie point ids: %s\n",pointIDsFile);
    # fclose(gpfFp);
    # fclose(csvFp);
    # exit (1);
  # }

  # //------------------------------------------------
  # //------------------------------------------------

  # // skip the header
  # fgets(gpfLine,LINELENGTH,gpfFp);

  # // read in number of points in *.gpf file
  # char value[50];
  # fgets(gpfLine,LINELENGTH,gpfFp);
  # sscanf (gpfLine,"%s",value);
  # int numpts = atoi(value);

  # // skip next header
  # fgets(gpfLine,LINELENGTH,gpfFp);

  # // Parse gpf, output csv
  # char pointID[50], valStat[2], valKnown[2];
  # char valLon[50], valLat[50], Height[50];
  # double rad2dd = 57.295779513082320876798154814105;

  # for (int i=0; i<numpts; i++) {
    # fgets(gpfLine,LINELENGTH,gpfFp);
    # sscanf (gpfLine,"%s %s %s",pointID,valStat,valKnown);
    # int stat = atoi(valStat);
    # int known = atoi(valKnown);

    # // get coordinate
    # fgets(gpfLine,LINELENGTH,gpfFp);

    # //only output tie points that are on
    # if (stat == 1 && known == 0) {
      # sscanf (gpfLine,"%s %s %s",valLat,valLon,Height);
      # double radLat = atof(valLat);
      # double radLon = atof(valLon);
      # fprintf(csvFp,"%.14lf,%.14lf,%s\n",rad2dd*radLat,rad2dd*radLon,Height);
      # fprintf(ptsFp,"%s\n",pointID);
    # }

    # // skip next three lines in input *.gpf file
    # for (int j=0; j<3; j++)
      # fgets(gpfLine,LINELENGTH,gpfFp);
  # }

  # fclose(gpfFp);
  # fclose(csvFp);

# } // end of program
    return df