# coding: utf-8 """ Source Name: SSForest.py Version: ArcGIS Pro 2.2 Author: Environmental Systems Research Institute Inc. Description: Python file with utilities classes for running forest. """ ################### Imports ######################## import os as OS import sys as SYS import operator as OP import numpy as NUM import arcgisscripting as ARC import scipy.stats as SCPS import arcpy as ARCPY import arcpy.management as DM import arcpy.conversion as CONV import arcpy.da as DA import arcpy.sa as ASA import SSDataObject as SSDO import SSUtilities as UTILS import Stats as STATS import ErrorUtils as ERROR import time as TIME import json as JSON import pdb import locale as LOCALE import collections as COLL import gc as GC import netCDF4 as NET ##### Global Variables ##### DBGFILES = None DBGPAR = False NULL = -999999 MaxCategoriesAllowed = 60 predictivePowerVariableSize = 15 minNumObs = 20 minNumElemePerClassStrat = 5 maxSizeCells = 1e9 randSeed = None HDBG = False MEMSIZE = 1e8 MEMPOINT = 1e8 convertType = {'SmallInteger': 'SHORT', 'Integer': 'LONG', 'String': 'TEXT', 'Single': 'FLOAT', 'Double': 'FLOAT', 'Date': 'DATE'} convert2Numpy = {"DOUBLE": NUM.float, "LONG": NUM.int, "SHORT": NUM.int, "FLOAT": NUM.float} convertTypeSupportedByFastOutput = {'SmallInteger': 'LONG', 'Integer': 'LONG', 'String': 'TEXT', 'Single': 'DOUBLE', 'Double': 'DOUBLE','Date': 'DATE', 'SMALLINTEGER': 'LONG','INTEGER': 'LONG', 'STRING': 'TEXT','SINGLE': 'DOUBLE', 'DOUBLE': 'DOUBLE','DATE': 'DATE'} continuous = "(CNT)" discrete = "(CAT)" categoricalVar = 'Categorical' numericVar = 'Numeric' userTypeType = { numericVar: 1, categoricalVar : 2} varType = { numericVar: continuous , categoricalVar: discrete } varTypeRev = { continuous: numericVar, discrete:categoricalVar } def numpy2FormatSupport(typeNumpy, size = None): """ Numpy dtype Supported by Fast Write Candidate Field INPUT: typeNumpy (str): type (INT, LONG...) size {int}: String length OUTPUT: return (Numpy type) """ if typeNumpy == "TEXT": typeNumpy = 'U'+str(size) return typeNumpy return convert2Numpy[typeNumpy] ################## Classes ######################### class RFField(): """This class store information information of each field in RF """ def _init__(self): self.name = None self.alias = None self.source = None self.dataSource = None self.fieldType = None self.rfType = None self.info = None self.data = None self.infoCount = None def toDict(self): """ This method helps to serialize properties of the RFField """ return {"name": self.name, "alias": self.alias, "source": self.source, "fieldType": self.fieldType, "rfType": self.rfType, "info": list(self.info) } def fromDict(self, dictRF): """ This method helps to deserialize properties of the RFField """ self.name = dictRF["name"] self.alias = dictRF["alias"] self.source = dictRF["source"] self.fieldType = dictRF["fieldType"] self.rfType = dictRF["rfType"] self.info = dictRF["info"] class InfoForestField(object): """This class store information about the forest source data. INPUTS: splitPercentage {float}: percentage to split test dataset seed {int}: seed ssdo {SSDataObject}: Data Object numXVariables {int} : Number of X's variables isTraining {bool, True}: The dataset is for training ATTRIBUTES: name (str): name of the field baseName (str): name of th field on disk, I.e. without table joins type (str): type of data {'Single', 'Double', 'Integer', etc...} length (int): length of the field METHODS: generateXVariable() getIndexField(name) get_fieldType(id_field) get_categories(id_field) add_field(names, type, source) """ def __init__(self, splitPercentage = 0, seed = 0, ssdo = None, numXVariables = None, isTraining = True): self.seed = seed self.splitPercentage = splitPercentage self.isTraining = isTraining self.ssdo = ssdo self.filteredRowsIds = [] self.filterApplied = False self.y = None self.yField = None self.fields = [] self.explodeRaster = False self.properties = {"version": "1.0", "date": TIME.asctime(TIME.localtime(TIME.time())), "leafSize":1, "sampleSize": -1, "numFieldPerm": 1, "explodeRaster": False } self.viewSplitMessage = False self.infoMissTrain = [] self.infoMissTest = [] ### Get original Indices master SSDO FC ### self.masterIndexFC = {} self.getIndicesBadRecords() self.numXVariables = 0 ### Split Data ### self.x_train = None self.y_train = None self.x_test = None self.y_test = None self.indices = None self.train_split_indices = None self.test_split_indices = None self.sparseMessage = False if type(ssdo) == SSDO.SSDataObject: self.ssdo = ssdo self.N = 0 #self.y = None self.x = None def loadDataRasterForTraining(self, infoRaster, ssdo, variableToPredict, typeYVarUserRF = "Categorical" , checkBalance = True): """ This method load data from a RasterInfo Object to generate the variables require for training forest model """ dtypeV = ssdo.fields[variableToPredict.upper()].data.dtype dataX = None mask = None if ssdo.shapeType == "Polygon": dataX, mask = infoRaster.extractDataFromPolygons(ssdo) infoRaster.ssdo = None self.createVariablesFromRasterList(infoRaster, dataX) infoRaster.idPoly = infoRaster.idPoly[mask] infoRaster.polygonMask = infoRaster.polygonMask[mask] infoRaster.centroids = infoRaster.centroids[mask] yVar = NUM.zeros(len(infoRaster.polygonMask), dtype = dtypeV) y = ssdo.fields[variableToPredict.upper()].data for id, i in enumerate(infoRaster.idPoly): if i in ssdo.master2Order: orderId = ssdo.master2Order[i] idP = NUM.argwhere(infoRaster.idPoly == i) yVar[idP] = y[orderId] #### Add Y Field #### self.defineYFromArray(name = variableToPredict.upper(), userType = typeYVarUserRF, inputType = ssdo.fields[variableToPredict.upper()].type, dataInfo = yVar, source = "FC") self.dataObject = UTILS.DataContainer(infoRaster.srf, infoRaster.centroids) if ssdo.shapeType == "Point": dataX, mask = infoRaster.extractDataUsingPoints(ssdo) infoRaster.ssdo = None infoRaster.srfOut = ssdo.spatialRef self.createVariablesFromRasterList(infoRaster, dataX) centroids = ssdo.xyCoords[mask,:] y = ssdo.fields[variableToPredict.upper()].data[mask] ssdo.fields[variableToPredict.upper()].data = y self.defineYFromDataObject(variableToPredict.upper(), typeYVarUserRF, ignoreValue = True, checkBalance = checkBalance) self.dataObject = UTILS.DataContainer(infoRaster.srf, centroids) def loadDataRasterForPredicting(self, trainingField, infoRaster, indexRange): """ This method load data from a RasterInfo Object to generate the variables require for training forest model """ dataX, mask = infoRaster.extractZone(indexRange) if len(dataX) == 0: return None #### Clean Memory #### self.fields = None GC.collect() self.fields = [] self.createVariablesFromRasterList(infoRaster, dataX, trainingField) return mask def createVariablesFromRasterList(self, infoRaster, dataBlock, trainingField = None): """ Check Variables """ messages = [] for id, val in enumerate(infoRaster.newNames): name = val userType = numericVar if infoRaster.typeRF[id] else categoricalVar self.addVariable(name, userType, "DOUBLE", "RASTER", None) errors = None info = None if trainingField is not None: errors, info = self.variableTest(name, dataBlock, userType, id, trainingField.fields[id]) else: errors, info = self.variableTest(name, dataBlock, userType, id) self.fields[id].info = info messages.extend(errors) processErrors(messages) self.x = dataBlock def variableTest(self, name, dataBlock, userType, id, field = None): """ Test variability """ errors = None info = "" data = dataBlock.T[id] if userType == 'Categorical': unique = NUM.unique(data) data1 = NUM.zeros(len(data), dtype = float) if field is not None: t = NUM.in1d(unique,NUM.array(field.info)).sum() if t != len(unique): ARCPY.AddIDMessage("ERROR", 110194, name + "(DOUBLE)", field.name + "(DOUBLE)") raise SystemExit for ind, ele in enumerate(field.info): data1[data == ele] = float(ind) else: for ind, ele in enumerate(unique): data1[data == ele] = float(ind) dataBlock.T[id] = data1 info = field.info errors = self.checkVariabilityCat(name, info) else: errors = self.checkVariability(name, data) info = [data.min(), data.max()] return errors, info def checkAllCategoriesUsed(self, yTrain, yTest): """ This function check if all categories were selected to create the subset training/testing INPUT: yTrain (1D array): Id Categories in Training yTest (1D array): Id Categories in testing """ categories = self.yField.info nCatTrain = len(yTrain) nCatTest = len(yTest) nCatData = len(categories) self.infoMissTrain = [] maxN = 30 if nCatTrain < nCatData: noContain = [] for i, e in enumerate(categories): if i not in yTrain: noContain.append(str(e)) if len(noContain) > maxN - 1: break self.infoMissTrain = noContain if nCatTest < nCatData: noContain = [] for i, e in enumerate(categories): if i not in yTest: noContain.append(str(e)) if len(noContain) > maxN - 1: break self.infoMissTest = noContain def areAllCategoriesUsed(self): """ Create a warning when all categories were not used in the process """ if self.viewSplitMessage: if len(self.infoMissTrain) > 0: self.applySparseMessage("ERROR") ARCPY.AddIDMessage("ERROR", 110234, (100-self.splitPercentage),", ".join(self.infoMissTrain)) raise SystemExit if len(self.infoMissTest) > 0: self.applySparseMessage() ARCPY.AddIDMessage("WARNING", 110235,", ".join(self.infoMissTest)) def splitDataForTesting(self, balance = False): """ Split Dataset for training INPUT: balance {bool, False}: Apply balance """ if self.isTraining: if self.splitPercentage > 0: #### Set Seed To Control Split Process #### NUM.random.seed(self.seed) flag = True timesSearch = 0 maxTimesToTry = 30 if balance: counts = self.yField.infoCount minSizeClass = NUM.floor(counts*(100-self.splitPercentage)/100) minClass = NUM.where(minSizeClass < minNumElemePerClassStrat)[0] if len(minClass) > 0: ARCPY.AddIDMessage("ERROR", 110230) idsMinElem = minClass.tolist() listErr = [str(self.yField.info[id]) for id in idsMinElem ] listCat = ", ".join(listErr if len(listErr) < 30 else listErr[0:30]) ARCPY.AddIDMessage("ERROR", 110231, minNumElemePerClassStrat, listCat) raise SystemExit else: self.indices = NUM.arange(len(self.x), dtype = int) indices = NUM.zeros(len(self.y), dtype = int) for id, elem in enumerate(self.yField.info): ind = self.y == id #### Verify Count #### if counts[id] != ind.sum(): self.yField.infoCount[id] = ind.sum() counts[id] = ind.sum() minSizeClass[id] = NUM.floor(counts[id]*(100-self.splitPercentage)/100) valueIndCat = NUM.zeros(counts[id], dtype = int) indicesCat = NUM.arange(counts[id], dtype = int) NUM.random.shuffle(indicesCat) train_split_indices = indicesCat[slice(0, int(minSizeClass[id]))] valueIndCat[train_split_indices] = 1 indices[ind] = valueIndCat self.train_split_indices = NUM.where(indices == 1)[0] self.test_split_indices = NUM.where(indices == 0)[0] self.indicesTrain = indices self.x_train = self.x[self.train_split_indices,] self.y_train = self.y[self.train_split_indices] self.x_test = self.x[self.test_split_indices,] self.y_test = self.y[self.test_split_indices] else: while flag: timesSearch += 1 n = len(self.x) split = int(n * self.splitPercentage / 100) indices = NUM.arange(len(self.x), dtype = int) self.indicesTrain = NUM.ones(len(self.x), dtype = int) NUM.random.shuffle(indices) self.train_split_indices = indices[split:n] self.test_split_indices = indices[0:split] self.indices = NUM.arange(len(self.x), dtype = int) self.indicesTrain[self.test_split_indices] = 0 self.x_train = self.x[self.train_split_indices,] self.y_train = self.y[self.train_split_indices] self.x_test = self.x[self.test_split_indices,] self.y_test = self.y[self.test_split_indices] flag = False if self.yField.rfType == categoricalVar: uniq_y_tr, cnt_tr = NUM.unique(self.y_train, return_counts = True) uniq_y_test, cnt_test = NUM.unique(self.y_test, return_counts = True) #### Categories in Testing Subset Must Exist In The Training Subset #### if not NUM.in1d(uniq_y_test, uniq_y_tr).all(): flag = True if timesSearch > maxTimesToTry: ARCPY.AddIDMessage("ERROR", 110205, self.yField.alias) raise SystemExit() if not flag: for id, field in enumerate(self.fields): if field.rfType == categoricalVar: uniq_y_tr = NUM.unique(self.x_train.T[id]) uniq_y_test = NUM.unique(self.x_test.T[id]) if not NUM.in1d(uniq_y_test, uniq_y_tr).all(): flag = True if timesSearch > maxTimesToTry: ARCPY.AddIDMessage("ERROR", 110205, field.alias) raise SystemExit() if not flag: if self.yField.rfType == categoricalVar: uniq_y_tr = NUM.unique(self.y_train) uniq_y_test = NUM.unique(self.y_test) self.checkAllCategoriesUsed(uniq_y_tr, uniq_y_test ) else: self.indices = NUM.arange(len(self.x), dtype = int) self.indicesTrain = NUM.ones(len(self.x), dtype = int) self.train_split_indices = self.indices self.test_split_indices = NUM.array([], dtype = float) self.x_train = self.x self.y_train = self.y self.x_test = NUM.array([], dtype = float) self.y_test = NUM.array([], dtype = float) self.y_test = NUM.asarray(self.y_test, dtype= float) self.y_train = NUM.asarray(self.y_train, dtype= float) def generateXVariable(self, dictOrder = None, fillVar = True, balance = False): """ This method creates an array of all variables """ if fillVar: if dictOrder is None: if self.ssdo is not None: self.N = len(self.ssdo.xyCoords) else: #### All Fields Contain the Same Size #### self.N = len(self.fields[0].data) self.x = NUM.empty((self.N, self.numXVariables), dtype=float) for i in NUM.arange(self.numXVariables): self.x.T[i] = self.fields[i].data else: if self.ssdo is not None: self.N = len(self.ssdo.xyCoords) else: self.N = len(self.fields[0].data) self.x = NUM.empty((self.N, self.numXVariables), dtype=float) for i in NUM.arange(self.numXVariables): self.x.T[i] = self.fields[dictOrder[i]].data self.splitDataForTesting(balance) def getIndexField(self, name): """ This method returns the field index INPUT: name {str}: RF Name OUTPUT: index {int} """ for ind, fieldName in enumerate(self.fields): if fieldName.name.upper() == name.upper(): return ind return None def getVars(self): """ This method generates the RF field type 1 -> Numeric 2->Categorical OUTPUT: indices {1D array, int32}: indices of all fields including Y field """ fields = [2 if f.rfType == "Categorical" else 1 for f in self.fields] if self.yField is None: return NUM.array(fields, dtype = NUM.int32) else: if self.yField.rfType == "Numeric": fields.append(1) else: fields.append(2) return NUM.array(fields, dtype = NUM.int32) def getOperationsInRaster(self, listRasterName): """ Get Operations from each raster field """ dNameOpe = [] for rasterName in listRasterName: for ind, field in enumerate(self.fields): if "RASTER" in field.source and fieldName == rasterName: typeOp = field.source.split("-")[1] dNameOpe.append(typeOpe) return dNameOpe def getVariables(self): """ This method returns the variables of a specific source type {FC, DIST, RASTER} The output order changes depends the type due to UI parameter Issue(fields must be the first column in the Value table [['field','Field to predict'], ['GPString', "variable']], otherwise it does not apply filters OUTPUT: variablesNames {list str} """ variables = [] for ind, field in enumerate(self.fields): variables.append([{"name":field.name, "alias":field.alias, "rfType":field.rfType, "fieldType":field.fieldType, "source":field.source, "sourceData":None},{}]) return variables def getVariablesList(self, sourceType, alias = False): """ This method returns the variables of a specific source type {FC, DIST, RASTER} The output order changes depends the type due to UI parameter Issue(fields must be the first column in the Value table [['field','Field to predict'], ['GPString', "variable']], otherwise it does not apply filters OUTPUT: variablesNames {list str} """ variables = [] if not alias: for ind, field in enumerate(self.fields): if field.source == sourceType: if field.rfType == "Categorical": variables.append([None, "{0}{1}".format(field.name, discrete)]) else: variables.append([None, field.name]) return variables else: for ind, field in enumerate(self.fields): if field.source == sourceType: if field.rfType == "Categorical": variables.append([None, "{0}{1}".format(field.alias, discrete)]) else: variables.append([None, field.alias]) return variables def saveInfo(self, outputFileName): """ Save header in output file """ import codecs with open(outputFileName, 'w') as outfile: data = {i.name : i.toDict() for i in self.fields} if self.yField is not None: data["YRandomForest"] = self.yField.toDict() data["properties"] = self.properties JSON.dump(data, outfile) def getHeader(self): """ This method returns the information of all fields in the instance of InfoForestField as string OUTPUT: header {str}: Information used in the model header """ data = {i.name : i.toDict() for i in self.fields} if self.yField is not None: data["YRandomForest"] = self.yField.toDict() data["properties"] = self.properties r = JSON.dumps(data) return r def readModelHeader(self, filename): """ Read RFM model header """ textData = None try: with open(filename, "rb") as f: index = 8 chunk = f.read(index) f.seek(index) size = int.from_bytes(chunk, byteorder='little') data = f.read(size) index += size textData = data.decode('utf-8') except: pass return textData def loadInfo(self, inputFilename): """ This method extracts the information from the model loading it in the InfoForestField INPUT: inputFilename {str}: inputFilename path """ try: data = None data = self.readModelHeader(inputFilename) #if HDBG: # data = SS.read_model(inputFilename) #else: # data = ARC._ss.read_model(inputFilename) data_loaded = JSON.loads(data) self.fields= [] self.numXVariables = 0 self.yField = None for info in data_loaded: if info == "properties": self.properties = data_loaded[info] else: rf = RFField() rf.fromDict(data_loaded[info]) if info == "YRandomForest": self.yField= rf else: self.fields.append(rf) self.numXVariables += 1 except: ARCPY.AddIDMessage("ERROR",110184 ) raise SystemExit def loadHeaderfromTextFile(self, inputFilename): """ This method load information from a text file into the InfoForestField instance INPUT: inputFilename {str}: inputFilename path """ try: data = "" with open(inputFilename, 'r') as file: data = file.read() data_loaded = JSON.loads(data) self.fields= [] self.numXVariables = 0 self.yField = None for info in data_loaded: rf = RFField() if info == "properties": self.properties = data_loaded[info] else: rf.fromDict(data_loaded[info]) if info == "YRandomForest": self.yField= rf elif info != "properties" : self.fields.append(rf) self.numXVariables += 1 except: ARCPY.AddError("Problem reading model") raise SystemExit def isBalanced(self, name, categories, counts, checkBalance = True): """ Check if the Y variable is unbalance, throwing a message INPUT: name (str): field name categories (list): categories count (1D array): count """ timesThreshold = 2 n = len(categories) total = counts.sum() perc = 1/n values = counts/total areHigh = values > perc*timesThreshold areLow = values < perc/timesThreshold highCategories = [ "{0} [{1}]".format(i, counts[id]) for id, i in enumerate(categories) if areHigh[id] ] lowCategories = [ "{0} [{1}]".format(i, counts[id]) for id, i in enumerate(categories) if areLow[id] ] if areLow.sum() > 0 or areHigh.sum() > 0: msgHig = "high number of features: " if len(highCategories) > 2: highCategories = highCategories[0:len(highCategories)] msgHig += ", ".join(highCategories) elif len(highCategories) > 0: msgHig += ", ".join(highCategories) else: msgHig = "" msgLow = "low number of features: " if len(lowCategories) > 2: lowCategories = lowCategories[0:len(lowCategories)] msgLow += ", ".join(lowCategories) elif len(lowCategories) >0: msgLow += ", ".join(lowCategories) else: msgLow = "" if msgHig != "" and msgLow != "": msgHig += " and" ARCPY.AddIDMessage("WARNING", 110236, (msgHig + " " + msgLow).strip()) if checkBalance: self.applySparseMessage() def applySparseMessage(self, typeE = "WARNING"): """ Suggest Sparse Option """ if not self.sparseMessage: ARCPY.AddIDMessage(typeE, 110233) self.sparseMessage = True def defineYFromDataObject(self, name, userType, alias = None, ignoreValue = False, checkBalance = True): """ Use a field from a SSDO instance in RF as dependable Variable (Y) INPUT: name {str}: Field name userType {str}: RF Type (Numeric/Categorical) alias {str}: Field alias name ignoreValue {boo}: avoid to include null value as a class """ errors = None rf = RFField() rf.name = name rf.rfType = userType rf.fieldType = self.ssdo.fields[name].type rf.source = "FC" rf.info = "" if alias is not None: rf.alias = alias else: rf.alias = name dataY = self.ssdo.fields[name].data if userType == 'Categorical': data = NUM.ones(len(dataY), dtype= float)*NULL unique, counts = NUM.unique(dataY, return_counts = True) if not ignoreValue: idR = NUM.argwhere(unique != str(NULL)) unique = unique[idR] for ind, ele in enumerate(unique): data[dataY == ele] = float(ind) rf.info = unique.ravel().tolist() rf.infoCount = counts errors = self.checkVariabilityCat(name, rf.info) self.y = data #### Generate a Unbalanced Dataset #### self.isBalanced(rf.alias,rf.info, rf.infoCount, checkBalance) else: self.y = NUM.asarray(dataY.copy(), dtype = float) errors = self.checkVariability(name, self.y) self.yField = rf return errors def defineYFromArray(self, name, userType, dataInfo, inputType, source, alias = None, ignoreValue = False, checkBalance = True): """ Use an Array Variable as dependable Variable (Y) INPUT: name {str}: Field name userType {str}: RF Type (Numeric/Categorical) dataInfo {array}: RF data alias {str}: Field alias name ignoreValue {boo}: avoid to include null value as a class """ errors = None rf = RFField() rf.name = name rf.rfType = userType rf.fieldType = inputType rf.source = source rf.info = "" if alias is not None: rf.alias = alias else: rf.alias = name if userType == 'Categorical': data = dataInfo.copy() unique, counts = NUM.unique(data, return_counts = True) data = NUM.zeros(len(dataInfo), dtype = float) for ind, ele in enumerate(unique): data[dataInfo == ele] = float(ind) rf.info = unique.ravel().tolist() rf.infoCount = counts errors = self.checkVariabilityCat(name, rf.info) self.y = data #### Generate a Unbalanced Dataset #### self.isBalanced(rf.alias,rf.info, rf.infoCount, checkBalance) else: self.y = NUM.asarray(dataY.copy(), dtype = float) errors = self.checkVariability(name, rf.data) self.yField = rf return errors def addVariable(self, name, userType, dataType, source, dataSource, alias = None): rf = RFField() rf.name = name rf.rfType = userType rf.fieldType = dataType rf.source = source rf.dataSource = dataSource rf.info = "" if alias is not None: rf.alias = alias else: rf.alias = name for ele in self.fields: if ele.name == name: ARCPY.AddIDMessage("ERROR", 110182, name) raise SystemExit self.fields.append(rf) def updateData(self, name, data): """ Update data of a RF Field """ index = self.getIndexField(name) self.fields[index].data = data def updataDataCheck(self, name, data): index = self.getIndexField(name) self.fields[index].data = data if userType == 'Categorical': datafield = data unique = NUM.unique(dataField) toValues = NUM.arange(unique.size, dtype = NUM.float) sortIdx = NUM.argsort(unique) idx = NUM.searchsorted(unique, dataField, sorter = sortIdx) data1 = toValues[sortIdx][idx] self.fields[index].info = unique.ravel().tolist() self.fields[index].data = data1 else: self.fields[index].data = data1 def checkVariability(self, name, values): errors = [] if self.isTraining: if '<U' in str(values.dtype): errors.append((110179, name, "E")) if len(values): if values.var() == 0: errors.append((110180, name, "E")) if len(values) == 0: ARCPY.AddIDMessage("ERROR", 110170, name) raise SystemExit #### Check variability Threshold .95 Dataset Size #### uniq, counts = NUM.unique(values, return_counts = True) maxCat = counts.max() nLimit = len(values) * 0.95 if maxCat > nLimit: errors.append((110180, name, "E")) return errors def checkVariabilityCat(self, name, values): errors = [] if self.isTraining: if len(values) == 1: errors.append((110181, name, "E")) n = len(values) if n > MaxCategoriesAllowed: errors.append((110192, name, "E")) if n > predictivePowerVariableSize: errors.append((110214, name, "W")) if len(values) == 0: ARCPY.AddIDMessage("ERROR", 110170, name) raise SystemExit return errors def addXVariableFromDataObject(self, name, userType, source, alias = None): """ Add an field from a SSDO instance in the RF field INPUT: name {str}: Field name userType {str}: RF Type (Numeric/Categorical) source {str}: "FC", "DIST", "RASTER" alias {str}: Field alias name """ errors = None rf = RFField() rf.name = name if userType not in userTypeType: ARCPY.AddIDMessage("ERROR", 110178) raise SystemExit rf.rfType = userType rf.fieldType = self.ssdo.fields[name].type rf.source = source rf.info = "" if alias is not None: rf.alias = alias else: rf.alias = name for ele in self.fields: if ele.name == name: ARCPY.AddIDMessage("ERROR", 110182, name) raise SystemExit if userType == 'Categorical': dataField = self.ssdo.fields[name].data unique = NUM.unique(dataField) toValues = NUM.arange(unique.size, dtype = NUM.float) sortIdx = NUM.argsort(unique) idx = NUM.searchsorted(unique, dataField, sorter = sortIdx) data = toValues[sortIdx][idx] rf.info = unique.ravel().tolist() errors = self.checkVariabilityCat(name, rf.info) rf.data = data else: rf.data = self.ssdo.fields[name].data errors = self.checkVariability(name, rf.data) self.fields.append(rf) self.numXVariables += 1 return errors def addXVariableFromArray(self, name, userType, dataInfo, source, inputType, alias = None, maskedOut = None): """ Add an array as RF field dataInfo size depends of source.. if dataInfo includes bad records then masked should be None then dataInfo will be filtered by bad records from SSDO if dataInfo does not include bad records then its size should be (size-> len(dataInfo)+ len(ssdo.badRecords) + len(maskedOut) == size original FC) method verified the real size INPUT: name {str}: Field name userType {str}: RF Type (Numeric/Categorical) dataInfo: {array}: Numpy array 1D source {str}: "FC", "DIST", "RASTER" inputType {str}: Type field similar that feature class field type 'Double, Interger, String, SmallInteger, Single' alias {str}: Field alias name """ errors = None rf = RFField() rf.name = name rf.rfType = userType rf.fieldType = inputType rf.source = source rf.info = "" if alias is not None: rf.alias = alias else: rf.alias = name for ele in self.fields: if ele.name == name: ARCPY.AddIDMessage("ERROR", 110182, name) raise SystemExit if userType == 'Categorical': data = dataInfo.copy() unique = NUM.unique(dataInfo) data = NUM.zeros(len(dataInfo), dtype = float) for ind, ele in enumerate(unique): data[dataInfo == ele] = float(ind) rf.info = unique.ravel().tolist() errors = self.checkVariabilityCat(name, rf.info) rf.data = data else: rf.data = dataInfo errors = self.checkVariability(name, rf.data) if self.ssdo is not None: nBadRec = len(self.ssdo.badRecords) if nBadRec > 0: total = nBadRec + self.ssdo.numObs if maskedOut is not None: dataInfoLen = len(dataInfo) + len(maskedOut) + nBadRec if dataInfoLen != total: ARCPY.AddIDMessage("ERROR",110193, name) raise SystemExit self.fields.append(rf) self.numXVariables += 1 return errors def getIndicesBadRecords(self, ssdo = None): """ Get Original Indices of bad records """ self.masterIndexFC = {} if self.ssdo is not None: if len(self.ssdo.badRecords): ssdo = self.ssdo masterIndex = list(ssdo.master2Order.keys()) masterIndex.extend(ssdo.badRecords) masterIndex = sorted(masterIndex) masterIndex = NUM.array(masterIndex, dtype = int) self.masterIndexFC = {i:id for id, i in enumerate(masterIndex)} def getTypeFromDescribe(desc, dictVar): typeF = {} for idF in desc.fields: if idF.name in dictVar: typeF[idF.name.upper()] = idF.type return typeF def getName(desc, dictVar): typeF = {} for idF in desc.fields: if idF.name.upper() in dictVar: typeF[idF.name.upper()] = idF.type return typeF def compareForestInfo(training: InfoForestField, test: InfoForestField, dictFields: dict, updateTestT = True, balance = False): """ Check if the categories of the trained dataset corresponds with the test dataset - Number of fields - Type - New Categories in the Test dataset are not allowed - Remap categories of Test dataset using trainind dataset - Generate X variable of Test Dataset INPUT: training {InfoForestField}: Training RF field test: {InfoForestField}: Test RF field dictFields {dict}: Match fields from Test and Training OUTPUT: str/None: If it returns a str as Error, None successfully """ if training.numXVariables != test.numXVariables: return "Different number of variables" #dictFields fieldTest :fieldTrain testedChanged = False namesTrained = [ f.name for f in training.fields] dictFieldsTestTrain = {dictFields[i]:i for i in dictFields } dictOrder = {} for nameTra in dictFields: name = dictFields[nameTra] id = training.getIndexField(nameTra.upper()) idt = test.getIndexField(name.upper()) dictOrder[idt] = id if dictFields[nameTra] in namesTrained or dictFieldsTestTrain[name] in namesTrained and id is not None: if training.fields[id].fieldType.upper() != test.fields[idt].fieldType.upper(): ARCPY.AddIDMessage("ERROR", 110194, training.fields[id].name + "("+ training.fields[id].fieldType+ ")", test.fields[idt].name + "("+test.fields[idt].fieldType+")") return "error" if training.fields[id].rfType != test.fields[idt].rfType: ARCPY.AddIDMessage("ERROR", 110194, training.fields[id].name + "("+ training.fields[id].rfType+ ")", test.fields[idt].name + "("+test.fields[idt].rfType+")") return "error" else: if training.fields[id].rfType == 'Categorical': if not NUM.array_equal(training.fields[id].info, test.fields[idt].info): testDict ={ row : id for id, row in enumerate(test.fields[idt].info) } trainingDict={ row : id for id, row in enumerate(training.fields[id].info) } for ele in testDict: if not ele in trainingDict: ARCPY.AddIDMessage("ERROR", 110195, name) return "error" data = test.fields[idt].data for ele in testDict: tid = testDict[ele] trid = trainingDict[ele] data[data == tid] = -trid testedChanged = True data= data *-1 test.fields[idt].data = data if updateTestT: test.generateXVariable(dictOrder, balance = balance) if test.x.shape[0] == 0: return "Empty X's variables" test.y = NUM.zeros(test.x.shape[0], dtype = float) return None def getNearFeature(inputFC, distancesFC): """ Get Near Feature Distances """ ARCPY.SetProgressor("default", ARCPY.GetIDMessage(84822), 0, len(distancesFC), 1) listArray = [] outputTable = UTILS.returnScratchName("DIST_RF_", fileType = "TABLE", scratchWS = ARCPY.env.scratchGDB) for pos, fc in enumerate(distancesFC): ARCPY.GenerateNearTable_analysis(inputFC, fc, outputTable +str(pos), None, "NO_LOCATION", "NO_ANGLE", "CLOSEST", 0, "PLANAR") ssdoTable = SSDO.table2RecArray(outputTable + str(pos),["NEAR_DIST"]) listArray.append(ssdoTable["NEAR_DIST"].copy()) neg = ssdoTable["NEAR_DIST"] == -1 if NUM.sum(neg) > 0: ARCPY.AddIDMessage("Warning", 110169, distancesFC) UTILS.passiveDelete(outputTable+ str(pos)) ARCPY.SetProgressorPosition(pos) return listArray def extractFeatureFromRaster(ssdo, rasters, names, typeVariables, newFC = None, typeSampling = "Tool", useCopy = True): """ This Method Extract values from multiple raster datasets INPUT: ssdo {SSDataObject}: data Object form inputFC rasters {list str}: List raster paths names {list str}: List names operation {list str}: AVG, MAJORITY, ETC model {InfoForestField obj} newFC {str} path feature class OUTPUT: data, IdsSelected, null {ndarray NxR} Data array where N->ssdo.numObs and R number of raster datasets """ data = None if typeSampling == "Tool" or ssdo.shapeType == "Polygon": data = getRasterDataUsingSA(ssdo, rasters, typeVariables, newFC, useCopy) else: data = getRasterDataUsingNP(ssdo, rasters, names, typeVariables, newFC, useCopy) if data[2] is not None: if len(data[2]): #### Check Whether the Number of Features is Appropriate #### ERROR.reportBadRecords(ssdo.numObs,len(data[2]) , [str(i) for i in data[2]], label = ssdo.oidName + " Null Values in Raster", explicitBadRecordID = None) return data def getOperationType(shapeType, typeO): """ Select Operation depending on shape Type and RF Field type (CNT-DSC) """ if shapeType == "Polygon": if typeO == numericVar: return "MEAN" else: return "MAJORITY" else: if typeO == numericVar: return "BILINEAR" else: return "NEAREST" def checkLicense(): """ Check License Spatial Analysis """ prod = ARCPY.ProductInfo() extension = ARCPY.CheckExtension("Spatial") if extension == "Available": if prod != "ArcServer": lic = ARCPY.CheckOutExtension("Spatial") if lic != 'CheckedOut': ARCPY.AddIDMessage("ERROR", 110188) raise SystemExit else: ARCPY.AddIDMessage("ERROR", 110188) raise SystemExit def getRasterDataUsingSA(ssdo, rasters, typeVariables, newFC, useCopy): """ Get Raster Data using Zonation Tool INPUT: ssdo {SSDataObject}: ssdo instance rasters {list rasters}: list of rasters typeVariables {list str}: List to get the operation of current variable MEAN, MAJORITY newFC {str}: Path feature Class useCopy {bool}: Define type of oidField Name to be used """ #### Check License SA #### checkLicense() if useCopy: oidName = "IDS" tempOidName = oidName else: oidName = ssdo.oidName tempOidName = oidName if oidName == "OBJECTID": tempOidName = "OBJECTID_1" featureClassPath = newFC shapeType = ssdo.shapeType masterIndex = [] masterIndex = list(ssdo.master2Order.keys()) masterIndex = NUM.array(masterIndex, dtype = int) msg = ARCPY.GetIDMessage(84823) msg = msg.format("","") ARCPY.SetProgressor("default", msg, 0, len(rasters), 1) rasterBadIds = [] masterIdsBadRecordFromRasters = [] opr = [] data = NUM.ones ((ssdo.numObs, len(rasters)), dtype= float) * NUM.nan bad=[] for id, raster in enumerate(rasters): op = getOperationType(shapeType, typeVariables[id]).replace(" ", "_") opr.append(op) outputTable = UTILS.returnScratchName("EXTR_RF_",fileType = "TABLE", scratchWS = ARCPY.env.scratchGDB) inRaster = None if type(raster) == str: inRaster = ARCPY.sa.Raster(raster) else: inRaster = ARCPY.sa.Raster(raster.value) if "HTTP:" in inRaster.catalogPath.upper(): del inRaster ARCPY.AddIDMessage("ERROR", 110213) raise SystemExit array = None oidNameField = oidName.upper() nameDataField = op.upper() tempOidNameField = tempOidName.upper() if shapeType == "Polygon": if inRaster.pixelType in ['F64', 'F32'] and op == "MAJORITY": ARCPY.AddIDMessage("ERROR", 110212, inRaster.name) del inRaster raise SystemExit if not UTILS.isGDB(raster) and inRaster.pixelType in ['UNKNOWN']: ARCPY.AddIDMessage("ERROR", 10160, inRaster.name) del inRaster raise SystemExit #### Check Join Field #### if "." in tempOidNameField: tableName, tempOidNameField = tempOidNameField.split('.') #### Check OBJECTID Name #### if tempOidNameField == "OBJECTID": tempOidNameField ="OBJECTID_1" ASA.ZonalStatisticsAsTable(featureClassPath, oidName, inRaster, outputTable+str(id), 'DATA', op) nameRaster = getNameRaster(inRaster.name.upper()) array = SSDO.table2RecArray(outputTable+str(id),[tempOidNameField, op]) #### Delete table #### UTILS.passiveDelete(outputTable+str(id)) if shapeType == "Point": if not UTILS.isGDB(raster) and inRaster.pixelType in ['UNKNOWN']: ARCPY.AddIDMessage("ERROR", 10160, inRaster.name) del inRaster raise SystemExit ASA.Sample(in_rasters = inRaster, in_location_data = featureClassPath, out_table = outputTable, resampling_type = op, unique_id_field = oidName, process_as_multidimensional = "CURRENT_SLICE") outPath, outName = OS.path.split(featureClassPath) nameRaster = getNameRaster(inRaster.name.upper()) fieldsNam = getNameRasterFromBands([inRaster]) desc = None try: desc = ARCPY.Describe(outputTable) except: pass fields = [field.name for field in desc.fields if "Band" in field.name ] fieldName = fields[0] array = SSDO.table2RecArray(inputTable = outputTable, fieldNames = [outName.upper(), fieldName.upper().replace(" ", "_")], silentWarnings = True, includeNulls = False) tempOidNameField = outName.upper() oidNameField = outName.upper() nameDataField = fieldName.upper().replace(" ", "_") #### Delete table #### UTILS.passiveDelete(outputTable) ids = None sortId = None values = None if useCopy: sortId = NUM.argsort(array[oidNameField]) ids = NUM.asarray(array[oidNameField][sortId], dtype = int) else: dtInfo = NUM.asarray(array[tempOidNameField], dtype = int) sortId = NUM.argsort(dtInfo) ids = dtInfo[sortId] ids = NUM.asarray([ssdo.master2Order[i] for i in ids], dtype = int) values = array[nameDataField][sortId] data.T[id][ids] = values bad = NUM.where(NUM.isnan(data.T[id]))[0] if len(bad)> 0: rasterBadIds = NUM.union1d(rasterBadIds, bad) if len(data) == len(bad): del inRaster ARCPY.AddIDMessage("ERROR", 110170, nameDataField) raise SystemExit try: del inRaster except: pass ARCPY.SetProgressorPosition(id) if len(rasterBadIds): rasterBadIds = NUM.asarray(rasterBadIds, dtype= int) masterIdsBadRecordFromRasters = masterIndex[rasterBadIds] goodData = NUM.arange(len(masterIndex)) goodData[rasterBadIds] = -1 goodData = NUM.where(goodData != -1)[0] indices = goodData data = data[goodData,:] else: index = NUM.arange(ssdo.numObs , dtype = int) indices = index return data, indices, masterIdsBadRecordFromRasters, opr def getRasterDataUsingNP(ssdo, rasters, names, typeVariables, newFC, useCopy): """ Get Raster Data using Zonation Tool INPUT: ssdo {SSDataObject}: ssdo instance rasters {list rasters path}: list of rasters names {list str}: list raster names operations {list str}: operations MEAN, MAJORITY """ #### Check License SA #### checkLicense() if useCopy: oidName = "IDS" else: oidName = ssdo.oidName featureClassPath = newFC shapeType = ssdo.shapeType masterIndex = [] masterIndex = list(ssdo.master2Order.keys()) masterIndex = NUM.array(masterIndex, dtype = int) centroids = ssdo.xyCoords rasterBadIds = [] masterIdsBadRecordFromRasters = [] opr = [] data = NUM.ones ((ssdo.numObs, len(rasters)), dtype= float) * NUM.nan bad=[] ids = NUM.array(list(ssdo.master2Order.values()), dtype = int) for id, raster in enumerate(rasters): op = getOperationType(shapeType, typeVariables[id]) opr.append(op) typeRF = [i == numericVar for i in typeVariables] infoRaster = RasterInfo(rasters, typeRF, printInfo = False, rasterNames = names, maxSize = MEMSIZE*2) dataValues, mask = infoRaster.extractDataUsingPoints(ssdo) del infoRaster badIndex = NUM.logical_not(NUM.in1d(ids, mask)) badMasterIds = masterIndex[badIndex] indices = ids if len(ssdo.xyCoords) == len(badMasterIds): ARCPY.AddIDMessage("ERROR", 110170, ",".join(names)) raise SystemExit return dataValues, mask, badMasterIds, opr def getNameRaster(name): listExts = [".ASC", ".IMG",".TIFF",".JPG", ".TIF", ".JP2", ".PNG", ".GIF", ".BIP", ".DAT", ".CRF", ".MRF", ".BMP", ".BIL", ".BSQ"] for i in listExts: if i in name: return name.replace(i, "") return name def applyMaskFromRaster(RFField: InfoForestField, maskIds = None, allIndices = None): """ Resize data if there is maskIds(raster mask ids), and it updates the RFField Object INPUT: RFField {InfoForestField}: forest Object maskIds {array}: Arrays should be removed allIndices {array}: Arrays to use (ignore mask Ids) """ RFField.filterApplied = False if RFField.ssdo is None: return if maskIds is None and allIndices is None: return N = RFField.ssdo.numObs if allIndices is not None: if len(allIndices) == 0: return #### Don't apply if all indices are included #### if allIndices is not None: if N == len(allIndices): return if allIndices is None: allIndices = NUM.arange(N) masked = NUM.setdiff1d(allIndices, maskIds) else: masked = allIndices ## [ RFField.ssdo.master2Order[i] for i in allIndices] #### ReShape SSDO #### if hasattr(RFField.ssdo, "shapes"): RFField.ssdo.shapes = NUM.array(RFField.ssdo.shapes)[masked].tolist() RFField.ssdo.numObs = len(masked) RFField.ssdo.xyCoords = RFField.ssdo.xyCoords[masked,:] orderIds = NUM.arange(len(RFField.ssdo.master2Order)) newOrder = orderIds[masked] m2o ={ RFField.ssdo.order2Master[int(i)]:id for id, i in enumerate(newOrder)} RFField.ssdo.master2Order = m2o #### RFField.filteredRowsIds = maskIds if not RFField.filterApplied: for field in RFField.fields: if "RASTER" not in field.source: field.data = field.data[masked] RFField.filterApplied = True if RFField.yField is not None: RFField.y = RFField.y[masked] if RFField.yField.rfType == categoricalVar: nClass = len(RFField.yField.info) uniqueY, countY = NUM.unique(RFField.y, return_counts = True) uniqueY = NUM.asarray(uniqueY, dtype = int) if nClass != len(uniqueY): dictClass = {ind:classInfo for ind, classInfo in enumerate(RFField.yField.info)} listInfo = [] for newId, oldId in enumerate(uniqueY): cl = dictClass[oldId] listInfo.append(cl) RFField.y[RFField.y == oldId] = newId RFField.yField.info = listInfo RFField.yField.infoCount = countY def getRasterData(rasters): """ Get Information of a set raster datasets, extracting all values using rasterToNumpy INPUT: rasters {list}: list of rasters as strings OUTPUT: data {list}: list of ndarrays of each raster maskedTotal {array}: mask of all rasters shape {tuple}: (nRows, nCols) """ listMasked = [] dataValues = [] nSize= 0 shape = () for id, r in enumerate(rasters): dataO = ARCPY.RasterToNumPyArray(r, nodata_to_value = -9999.99) if len(shape) == 3: dataO = dataO[0,:] data = dataO.ravel() dataValues.append(data) listToMask = NUM.where(data == -9999.99) listMasked.append(listToMask) if id == 0: nSize = len(data) shape = dataO.shape else: if nSize != len(data): ARCPY.AddError("Rasters should have the same size") raise SystemExit() ids = NUM.arange(nSize) maskedTotal = NUM.array([]) for id, mask in enumerate(listMasked): maskedTotal = NUM.union1d(maskedTotal, mask) maskedTotal = NUM.setdiff1d(ids, maskedTotal) outputData = [] for id, data in enumerate(dataValues): outputData.append(data[maskedTotal].copy()) return data, maskedTotal, shape def checkProjectionRasters(rasterList): """ Get projection of list of raster paths """ listProjectections = [] for i in rasterList: d = ASA.Raster(i) spatialRef = d.spatialReference listProjectections.append(spatialRef.factoryCode) if "HTTP:" in d.catalogPath.upper(): ARCPY.AddIDMessage("ERROR", 110213) raise SystemExit del d ids, counts = NUM.unique(NUM.array(listProjectections), return_counts = True) #### All Projections are equals #### if len(ids) == 1: return False #### There are different projections #### if len(ids) != len(listProjectections): return True return True def getNameRasterFromBands(rasters, names = None, justNames = True): """ Get Names used in Sample when the list of rasters contain band names, The function also extracts the bands used in each raster. """ dictName = [] namesValues = [] pathRaster = [] for id, raster in enumerate(rasters): name = None desc = ARCPY.Describe(raster) isRasterBand = desc.dataType == 'RasterBand' isNameBand = False if names is None: raster = str(raster) name = str(desc.name) path = desc.path else: name = names[id] path = str(raster) if isRasterBand: isRasterBand = False isNameBand = True if name.upper().startswith("BAND_") or isRasterBand: if name.upper() in path.upper(): path = path.upper().replace(name.upper(),"") if path[-1] == "\\": path = path[:-1] descParent = ARCPY.Describe(path) if hasattr(descParent, "BandCount"): dictName.append(getNameRaster(descParent.name.upper())) namesValues.append(name) pathRaster.append(path) else: dictName.append(getNameRaster(name.upper())) namesValues.append("") pathRaster.append(raster) else: dictName.append(getNameRaster(name.upper())) if isNameBand: namesValues.append("RenamedBand") else: namesValues.append("") pathRaster.append(raster) namesRep = dictName unique, count = NUM.unique(dictName, return_counts = True) #### Dictionary Contain the Raster path and list order and bands #### valuesBands = {id:[] for id in NUM.unique(pathRaster)} values = {str(unique[id]):count[id] for id in NUM.arange(len(unique))} valuesIni = {str(unique[id]):1 for id in NUM.arange(len(unique))} #### Names used in Sample when Band are extracted #### newNames = [] #### Rasters path / Band name #### orderRaster = [] for id, name in enumerate(dictName): if values[name] == 1: newNames.append(name.upper().replace(" ", "_")) valuesBands[pathRaster[id]].append([id, namesValues[id]]) orderRaster.append([pathRaster[id],namesValues[id] ]) else: if valuesIni[name] <= values[name]: newNames.append((name + getNameId(valuesIni[name]-1)).upper()) valuesIni[name] += 1 valuesBands[pathRaster[id]].append([id, namesValues[id]]) orderRaster.append([pathRaster[id],namesValues[id] ]) if justNames: return newNames else: return newNames, valuesBands, orderRaster def getNameId(id): """ Get Extra Name for Sample """ seq = ["", "_1","_12"] if id <= 2: return seq[id] else: base = seq[2] for i in NUM.arange(3, id+1, 1): base += "_1"+str(i) return base def updateCellsizeAccordingSpatialRef(srf, value): if srf.type.upper() != "PROJECTED": if srf.type.upper() == "GEOGRAPHIC": if value > 45: ARCPY.AddIDMessage("ERROR", 1775) raise SystemExit else: return value def getCentroids(extentList, cellSize, srf, outputFC = None, fc = None, outArea = False, getIdsCentroids = False, justIndex = 1): """ Get Feature Class of points over the area of interest, if fc is provided the function returns an array with fc IDs INPUT: extentList {list(Extend values)} cellSize {float}: cell size to be applied srf {Spatial Reference Object}: Spatial Reference outputFC {str}: Feature Class output fc {str}: Mask Feature class Feature Class outArea {Bool}: out area works together fc and ouputFC-- true -> get a fc of points over polygon mask false -> get all points over extent """ cols = int(NUM.ceil((extentList[2] - extentList[0]) / cellSize)) rows = int(NUM.ceil((extentList[3] - extentList[1]) / cellSize)) halfCell = cellSize /2 #### Verify Size Raster #### diff = ((extentList[0] + halfCell) + cols * cellSize) - extentList[2] part = diff/halfCell if not NUM.allclose(part, 1): cols = cols - 1 diff = ((extentList[3] - halfCell) - rows* cellSize) - extentList[1] part = diff/halfCell if not NUM.allclose(part, 1): rows = rows - 1 sizeNumber = cols * rows centroids = None polygonMask = ids = None if sizeNumber < 2*maxSizeCells and fc is not None: #### Define Cube For Simulating a Raster Area #### cubeInfo = ARC._ss.CubeInfo(rows, cols, 1, cellSize, use_hexagons = False) #### Get IDs of Polygons in the Current SRF-Extent #### polygonMask, ids = cubeInfo.get_polygon_ids_mask(fc, extentList[0], extentList[3], srf, just_index = justIndex) polygonMaskSort = NUM.argsort(polygonMask) polygonMask = polygonMask[polygonMaskSort] ids = ids[polygonMaskSort] return cols*rows , polygonMask, ids, cols, rows, centroids, srf else: if fc is not None: ARCPY.AddError("Explode polygons to points cannot be used when the raster dataset has more 2 billions cells." ) raise SystemExit() centroids = [0, cols*rows-1] return cols*rows , polygonMask, ids, cols, rows, centroids, srf #### Just Return Centroids Data #### if outputFC is None: return NUM.arange(len(centroids)), polygonMask, ids, cols, rows, centroids, srf ### It is generated a FC with centroids #### if outputFC is not None and not getIdsCentroids: dataObject = None dataObject = UTILS.DataContainer(srf) if not outArea: dataObject.renderType = "POINT" dataObject.requireGeometry = False dataObject.xyCoords = centroids dataObject.numObs = len(centroids) idvalues = NUM.arange(dataObject.numObs) field = SSDO.CandidateField(name = "IDS", type = "LONG", data = idvalues) fields = [field] #### Create Point Feature Class #### ARC._ss.output_featureclass_from_dataobject(dataObject, outputFC, fields) return idvalues, polygonMask, ids, cols, rows, centroids, srf else: dataObject.renderType = "POINT" dataObject.requireGeometry = False dataObject.xyCoords = centroids[polygonMask,:] dataObject.numObs = len(dataObject.xyCoords) idvalues = NUM.arange(dataObject.numObs) field = SSDO.CandidateField(name = "IDS", type = "LONG", data = idvalues) fields = [field] #### Create Point Feature Class #### ARC._ss.output_featureclass_from_dataobject(dataObject, outputFC, fields) return idvalues, polygonMask, ids, cols, rows, centroids, srf def createRaster(data, sf, minX, minY, cellXSize, cellYSize, outputRaster): """ Create Raster Output from RF INPUT: data {array 1D}: data compute by RF sf {Spatial Reference}: Spatial Reference minX {float}: Xmin minY {float}: Ymin cellXSize {double}: cell size in X cellYSize: {double}: cell size in Y outputRaster: {str}: Output path """ spatialRef2SetBack = ARCPY.env.outputCoordinateSystem ARCPY.env.outputCoordinateSystem = sf point = ARCPY.Point(minX, minY) raster = ARCPY.NumPyArrayToRaster (data, point, cellXSize , cellYSize, NULL) raster.save(outputRaster) ARCPY.env.outputCoordinateSystem = spatialRef2SetBack def reMapCategories(categories, typeField, dataInput): """ This function remap values from categories which can be Int,Long, Double, String.. INPUT: categories {list}: All categories of this field typeField {str}: type of field from original dataset must match UTILS.numpyConvert dataInput {array /Double}: input array OUTPUT: data {array} """ dictClasses = { id:value for id, value in enumerate(categories)} typeNumpyField = UTILS.numpyConvert[typeField] #### Calculate the maximum string size if typeNumpyField =='U%i': size = max(map(len, categories)) typeNumpyField = 'U'+str(size) data = NUM.zeros(len(dataInput), dtype = typeNumpyField) #### Introduce Predicted values in the output data array for i in NUM.arange(len(categories)): data[dataInput == i ] = dictClasses[i] return data def reMapDataField(nRowsFC, typeField, dataFilteredMapped, validIndex): ##No Used This Idea it is needed if we want to have the same output of ssdo/but it is not possible if we include raster that ##Contain null values """ This Method restores the number of rows of the original dataset, it uses a filtered rows Ids to restore the original id INPUT: nRowsFC {int}: Number of Rows of original DataSet typeFiled {str}: Original type of field must match UTILS.numpyConvert dataFilterdMapped {array}: Data filtered -> it should contained the original type validIndex {array}: List of valid ids OUTPUT: data{array} """ if filterRowsIds == []: return dataFilteredMapped typeNumpyField = UTILS.numpyConvert[typeField] if typeNumpyField =='U%i': size = max(map(len, categories)) typeNumpyField = 'U'+str(size) maxId = max[finalSelection] + 1 data = NUM.zeros(nRowsFC, dtype = typeNumpyField) data[validIndex] = dataFilteredMapped return data def UpdataRFFields(rfTest): for id, fieldData in enumerate(rfTest.fields): dataN = rfTest.x.T[id] if fieldData.rfType == "Categorical": fieldData.data = reMapCategories(fieldData.info, fieldData.fieldType, dataN) else: fieldData.data = dataN.copy() def statConfusionMatrix(className, matrix): """ Calculate Confusion Matrix Values """ matrix = matrix.T tp = matrix[0,0] fp = matrix[0,1] fn = matrix[1,0] tn = matrix[1,1] p = tp + fp n = fn + tn den1 = (2*tp + (fp + fn)) if den1 > 0: f1 = 2*tp / den1 else: f1 = -1 den = -1 try: value = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn) den = NUM.sqrt(value) except: pass if den > 0: mcc = (tp*tn - (fp * fn) ) / den else: mcc = -1 acc = (tp + tn )/(p + n) sen = -1 try: if (tp + fp) > 0: sen = tp / (tp + fn) else: sen = -1 except: pass return f1, mcc, sen, acc def statisticsForClassification(rfField, trueData, predData, isTesting = True, hpar = None): """ This method generates a string table of the classification statistics """ msgCat = ARCPY.GetIDMessage(84825) msgF1 = ARCPY.GetIDMessage(84833) msgMCC = ARCPY.GetIDMessage(84832) msgAcc = ARCPY.GetIDMessage(84827) msgSen = ARCPY.GetIDMessage(84843) msgHeader = None mess = None #### Change Messages Depending on Data #### if isTesting: msgHeader = ARCPY.GetIDMessage(84828) mess = ARCPY.GetIDMessage(84830) else: msgHeader = ARCPY.GetIDMessage(84838) mess = ARCPY.GetIDMessage(84840) if hpar is not None: if hasattr(hpar, "outputConfusion"): #### Check Path Output Confusion Matrix Test Data #### if hpar.outputConfusion not in ["", None]: idClass = {name:id for id, name in enumerate(rfField.yField.info)} nTru = len(idClass) #### Matrix Confusion Matrix #### completeConfMat = NUM.zeros((nTru,nTru), dtype = NUM.int32) #### Accumulate Values in Confusion Matrix #### for index in NUM.arange(len(trueData)): trInd = idClass[trueData[index]] teInd = idClass[predData[index]] completeConfMat[trInd, teInd] += 1 namesCat = ["CAT_" + str(i) for i in idClass] arrayNames = NUM.array(namesCat) #### Column Categories #### fieldIni = SSDO.CandidateField(name = "CATEGORY", type = "TEXT", data = arrayNames) fields = [fieldIni] #### Create Column For Each Category #### for id, cat in enumerate(namesCat): field = SSDO.CandidateField(name = cat, alias = cat.replace("CAT_",""), type = "LONG", data = completeConfMat.T[id]) fields.append(field) #### Use Data Container To create Output Table #### dataC = UTILS.DataContainer(None) fields = UTILS.checkCandidateFieldName(fields, hpar.outputConfusion) dataC.generateOutput(hpar.outputConfusion, fields) trIds = NUM.unique(trueData) teIds = NUM.unique(predData) ids = NUM.intersect1d(trIds, teIds) n = len(ids) idClass = {name:id for id, name in enumerate(rfField.yField.info) if name in ids} listTable = [] header = msgHeader headTable = [ msgCat, msgF1, msgMCC, msgSen, msgAcc] listTable.append(headTable) for className in idClass: confMatrix = NUM.zeros((2,2), dtype = float) tClass = (trueData != className) * 1 pClass = (predData != className) * 1 for i in NUM.arange(len(tClass)): confMatrix[tClass[i],pClass[i]] += 1 f1, mcc, sen, acc = statConfusionMatrix(className, confMatrix) row = [className, "{:.2f}".format(f1) if f1 != -1 else "NA", "{:.2f}".format(mcc) if mcc != -1 else "NA", "{:.2f}".format(sen) if sen != -1 else "NA", "{:.2f}".format(acc)] listTable.append(row) outputReport = UTILS.outputTextTable(listTable, header = header, justify = ["left", "right", "right", "right", "right"], pad = 1, colPad = 3, titleFillToken = "-") return outputReport + mess def statisticsForRegression(trueData, predData, isTesting = True): """ This method generates string table of the regression statistics """ msgR = ARCPY.GetIDMessage(84826) msgPValue = ARCPY.GetIDMessage(84807) msgStdError = ARCPY.GetIDMessage(84831) msgHeader = None mess = None #### Change Messages Depending on Data #### if isTesting: msgHeader = ARCPY.GetIDMessage(84829) mess = ARCPY.GetIDMessage(84830) else: msgHeader = ARCPY.GetIDMessage(84839) mess = ARCPY.GetIDMessage(84840) try: slope, intercept, r_value, p_value, std_err = SCPS.linregress(trueData, predData) listTable=[] listTable.append([msgR, "{:.3f}".format(r_value**2)]) listTable.append([msgPValue,"{:.3f}".format(p_value) ]) listTable.append([msgStdError, "{:.3f}".format(std_err)]) outputReport = UTILS.outputTextTable(listTable, header = msgHeader, justify = ["left", "right"], colPad = 31, pad = 1, titleFillToken = "-") return outputReport + mess except: pass def getMat(data, nClasses): """ Get Matrices to calculate data workspace """ limit = 1000 if len(data) > limit: NUM.random.seed(0) ind = NUM.arange(len(data)) NUM.random.shuffle(ind) minV = data.min() maxV = data.max() ind=ind[0:1000] data = data[ind] data[0]= minV data[limit-1] = maxV data.sort() nData = len(data) lcl = NUM.zeros((nData+1, nClasses+1), dtype = float) lcl[1,1:nClasses+1] = 1.0 vc = NUM.zeros((nData+1, nClasses+1), dtype = float) hinf = NUM.float('inf') vc[2:nData+1, 1:nClasses+1] = hinf return data, lcl, vc def jMatrices(data1, nClasses): """ Generate Matrices to Calculate Natural Breaks """ data, lcls, vcs = getMat(data1, nClasses) sl = 2 nl = data.shape[0] + 1 sum = 0.0 sumSquares = 0.0 w = 0.0 variance = 0.0 for l in range(2, nl): sum = 0.0 sumSquares = 0.0 w = 0.0 for m in range(1, l+1): lcl = l - m + 1 i4 = lcl - 1 val = data[i4] w += 1.0 sum += val sumSquares += val * val variance = sumSquares - (sum * sum) / w if i4 != 0: for j in range(2, nClasses+1): jm1 = j - 1 if vcs[l, j] >= (variance + vcs[i4, jm1]): lcls[l, j] = lcl vcs[l, j] = variance + vcs[i4, jm1] lcls[l, 1] = 1. vcs[l, 1] = variance return data, lcls, vcs def breaksJenks(data1, nClasses): """ Calculate Natural Breaks """ data, lcls, _ = jMatrices(data1, nClasses) k = data.shape[0] - 1 kclass = [0.] * (nClasses+1) countNum = nClasses kclass[nClasses] = data[len(data) - 1] kclass[0] = data[0] while countNum > 1: elt = int(lcls[k][countNum] - 2) kclass[countNum - 1] = data[elt] k = int(lcls[k][countNum] - 1) countNum -= 1 classes = NUM.array(kclass) return classes[1:,] def getJenkBreaks(data, nClass): """ Equal Interval if Jenks is not calculated properly """ uq = NUM.unique(data) if len(uq) <= 5: return NUM.floor(min(uq)), uq try: data1 = breaksJenks(data, nClass) return NUM.floor(data.min()), data1 except: pass minV = data.min() maxV = data.max() interval = (maxV-minV)/nClass rang = NUM.arange(5)+1 return NUM.floor(minV), minV + rang*interval def genColor(nColors): """ Generate Random Colors """ diff = 100 NUM.random.seed(33) dictC = [] lastC = None while len(dictC) < nColors: cols = NUM.random.randint(60, 240, 500) c = 0 t = [] for i in cols: t.append(i) if len(t) == 3: col =",".join([str(i) for i in t]) if col not in dictC: if lastC is not None: v = NUM.sqrt(((NUM.array(lastC)-NUM.array(t))**2).sum()) if v > diff: dictC.append(col) lastC = t else: dictC.append(col) lastC = t if len(dictC) == nColors: return dictC t = [] def applyLayer(hpar, nameOutputPar, data, isClass, headingName, shapeType, isTrainingOutput, isRasterLayer = False): """ Use a template layer to generate a customized layer. The default symbol is described in a line 'decribeSymbolType', this position should be known from the template layer. OUTPUT : layer (lyrx) path """ import tempfile as TEMPFILE import random as RANDOM pos = [id for id, i in enumerate(hpar.parameters) if hpar.parameters[id].name == nameOutputPar][0] regcolors = ["237,248,251","178,226,226","102,194,164","44,162,95","0,109,44"] layerFileTemplate = None valueUnique = None uniqueValues = None typeSymbolLocation = None heading = None colors = None minV = None minBreak = None #### Check Template Layer #### if shapeType != "Raster": if isClass: uniqueValues = NUM.unique(data) if shapeType == "Polygon": layerFileTemplate = "RFPolygonClassification.lyrx" typeSymbolLocation = 323 heading = 326 if shapeType == "Point": layerFileTemplate = "RFPointClassification.lyrx" typeSymbolLocation = 376 heading = 379 colors = genColor(len(uniqueValues)) else: minV, uniqueValues = getJenkBreaks(data, 5) uniqueValues = NUM.round(uniqueValues,4) if shapeType == "Polygon": layerFileTemplate = "RFPolygonRegression.lyrx" typeSymbolLocation = 263 heading = 306 minBreak = 296 if shapeType == "Point": layerFileTemplate = "RFPointRegression.lyrx" typeSymbolLocation = 265 heading = 308 minBreak = 298 colors = regcolors else: if isClass: layerFileTemplate = "RFRasterClasses.lyrx" uniqueValues = data[0] #Label valueUnique = data[1] typeSymbolLocation = 64 heading = 67 colors = genColor(len(uniqueValues)) else: layerFileTemplate = "RFRasterRegression.lyrx" minV, uniqueValues = getJenkBreaks(data, 5) typeSymbolLocation = 51 heading = 0 colors = regcolors pathLayer = OS.path.join(UTILS.pathLayers, layerFileTemplate) lines = [] if OS.path.isfile(pathLayer): f = open(pathLayer, 'r') lines = f.readlines() f.close() else: return decribeSymbolType = lines[typeSymbolLocation] msg = ARCPY.GetIDMessage(84338) head = lines[heading].replace("NAME_FIELD", headingName +"("+msg+")") lines[heading] = head newElem = [] if minBreak is not None: minBreakV = lines[minBreak].replace("4", str(minV)) lines[minBreak] = minBreakV if type(uniqueValues) == NUM.float or type(uniqueValues) == NUM.int32: uniqueValues = NUM.array([uniqueValues]) #### Generate New Elements in the Template Replacing Values #### for id, i in enumerate(uniqueValues): ele = decribeSymbolType.replace("150,150,150", colors[id]) ele = ele.replace("99999", str(i)) ele = ele.replace("CATEGORY", str(i)) if str(type(i)).startswith('U'): ele = ele.replace('"VALUE2CHANGE"', str(i)) else: ele = ele.replace('VALUE2CHANGE', str(i)) if valueUnique is not None: ele = ele.replace("VALUETOSEARCH", str(valueUnique[id])) newElem.append(ele) replac = ",".join(newElem) lines[typeSymbolLocation] = replac iniText = lines[0:typeSymbolLocation] endText = lines[typeSymbolLocation:len(lines)] iniText += endText; tempFolder = TEMPFILE.gettempdir() pathLyrx = pathLayer if isTrainingOutput: pathLyrx = OS.path.join(tempFolder, "tempRFLayer.lyrx") else: if isRasterLayer: pathLyrx = OS.path.join(tempFolder, "tempRFPLayerR.lyrx") else: pathLyrx = OS.path.join(tempFolder, "tempRFPLayer.lyrx") #### Avoid to Use Previous Layer #### id = str(NUM.random.randint(10000000)) pathLyrx = pathLyrx.replace(".lyrx", id + ".lyrx") try: rf= open(pathLyrx, "w", encoding = "utf-8") rf.writelines(iniText) rf.close() except: ARCPY.AddIDMessage("WARNING", 515, pathLyrx) try: hpar.parameters[pos].symbology = pathLyrx except: ARCPY.AddIDMessage("WARNING", 973) def createDBGOutput(rfField, parameters = None, trainField = True): if DBGFILES is not None: import codecs if parameters is not None: with open(DBGFILES +"_pars.txt", 'w') as outfile: data = { p.name:p.valueAsText for id, p in enumerate(parameters)} JSON.dump(data, outfile) if trainField: createOutputRFRieldASNumpy(rfField, DBGFILES+"_TRAI") else: createOutputRFRieldASNumpy(rfField, DBGFILES+"_TEST") def createOutputRFRieldASNumpy(rfField, outputFC): """ Create Output binary Numpy file From a RFField instance """ rfField.saveInfo(outputFC + "_h.txt") NUM.save(outputFC + "_x", rfField.x) if rfField.yField is not None: NUM.save(outputFC + "_y", rfField.y) def newFCFromSSDO(ssdo, outputFC): """ Create New Feature Dataset """ if len(ssdo.badRecords) == 0: return ssdo.inputFC, ssdo.oidName, False dataIds = NUM.array(list(ssdo.master2Order.keys()), dtype= int) field = SSDO.CandidateField(name = "IDS", type = "LONG", data = NUM.arange(len(dataIds), dtype = int)) if ssdo.shapeType == "Polygon": ARC._ss.copy_selected_features_polygons(ssdo = ssdo, input_feature_class = ssdo.inputFC, output_feature_class = outputFC, subset_ids = dataIds, candidate_fields = [field] ) else: ARC._ss.output_featureclass_from_dataobject(ssdo, outputFC, [field]) return outputFC, "IDS", True class HandleInputs(): def __init__(self, parameters): #### Initialize Vars #### self.parameters = parameters self.par = {p.name:p.valueAsText for id, p in enumerate(parameters)} self.parValue = { p.name:p for id, p in enumerate(parameters)} self.parStatus = { id:False for id, p in enumerate(parameters)} self.discrete = discrete self.continuous = continuous self.fieldAliasInput = None self.fieldNameToAlias = None self.activeParameters = [] self.isModel = False self.where = None self.what = None self.isClassification = False self.haveToPredict = False self.featuresToPredict = None self.rfM = None self.explodeDataRaster = False self.requireOutputModel = False self.outputModel = None self.typeRasters = [] self.inputModel = "" self.outputModel = "" #### Get Basic Parameters #### self.predictionType = UTILS.getTextParameter(0, parameters) self.input = UTILS.getTextParameter(1, parameters) self.variableToPredict = UTILS.getTextParameter(2, parameters) self.classes = UTILS.getTextParameter(3, parameters) self.featuresToPredict = UTILS.getTextParameter(7, parameters) self.variableToPredictAlias = self.variableToPredict #### Evaluate Other Parameters and Check Contraints #### self.initialDecision() self.decision() self.selectedParameters() self.getParameters(self.parameters) self.iterationSeeds = None def aliasChange(self, desc): """ Check Alias of input feature class, matching variables" """ if desc is None: return #### Check Explanatory Variables #### expVar = self.parameters[4] if expVar in ["", None]: return matchExVar = self.parameters[10] if desc is not None: self.fieldAliasInput = {field.aliasName: (field.name, field.type) for field in desc.fields} #### Add Elements to Replace Field Name with Category label #### for i in desc.fields: if i.name not in self.fieldAliasInput: self.fieldAliasInput[i.name] = (i.name, i.type) #### Replace Alias For Field Names #### if expVar.value: values = expVar.value valuesM = None if matchExVar.value: valuesM = matchExVar.value valuesE = [] orderFieldVar = [] for i in values: if str(i[0].value) not in self.fieldAliasInput: ARCPY.AddIDMessage("ERROR", 544, str(i[0].value) ) raise SystemExit orderFieldVar.append(self.fieldAliasInput[str(i[0].value)][0]) valuesE.append( [self.fieldAliasInput[str(i[0].value)][0], i[1]]) values = valuesE valuesToM = [] if valuesM is not None: if len(values) != len(valuesM): ARCPY.AddIDMessage("ERROR", 110203) raise SystemExit for idv, i in enumerate(valuesM): valuesToM.append([i[0].value, orderFieldVar[idv]]) valuesM = valuesToM expVar.value = values matchExVar.value = valuesM def getParameters(self, parameters): self.nullValue = None self.explanatoryVariables = UTILS.getTextParameter(4, parameters) self.distanceFeatures = UTILS.getTextParameter(5, parameters) self.explanatoryRasters = UTILS.getTextParameter(6, parameters) self.outputFeatures = UTILS.getTextParameter(8, parameters) self.outputRaster = UTILS.getTextParameter(9, parameters) self.matchVariables = UTILS.getTextParameterMatch(10, parameters) self.matchDistances = UTILS.getTextParameterMatch(11, parameters, ["MappingLayerObject","mp.Layer"]) self.matchRaster = UTILS.getTextParameterMatch(12, parameters) self.outputTrainedFeatures = UTILS.getTextParameter(13, parameters) self.outputDiagnostic = UTILS.getTextParameter(14, parameters) self.outputDiagnosticParameter = self.parameters[14] self.useAllRasterValues = UTILS.getTextParameter(15, parameters) self.numTrees = UTILS.getNumericParameter(16, parameters) self.leafSize = UTILS.getNumericParameter(17, parameters) self.maxLevel = UTILS.getNumericParameter(18, parameters) self.sample_size = UTILS.getNumericParameter(19, parameters) self.fields_to_try = UTILS.getNumericParameter(20, parameters) self.split = UTILS.getNumericParameter(21, parameters) self.outputConfusion = UTILS.getTextParameter(22, parameters) self.outputConfusionParameter = self.parameters[22] self.outputCrossValidation = UTILS.getTextParameter(23, parameters) self.outputCrossValidationParameter = self.parameters[23] balanceTree = UTILS.getTextParameter(24, parameters) self.balanceTree = False self.iterations = UTILS.getNumericParameter(25, parameters) #### Default Set Iterations #### if self.iterations is None: self.iterations = 1 #### Max Level Null - Each Tree is Explored Completed #### if self.maxLevel is None: self.maxLevel = -1 if self.split == 0: if self.outputConfusion not in ["", None]: ARCPY.AddIDMessage("WARNING", 110228) #### Check extension of output raster #### if self.outputRaster is not None: if not UTILS.isGDB(self.outputRaster): if ".TIF" not in self.outputRaster.upper(): self.outputRaster = self.outputRaster+ ".tif" #### Balance Tree #### if balanceTree is not None: self.balanceTree = True if balanceTree in [True, "true", "TRUE"] else False if self.balanceTree: self.sample_size = 100 self.maxLevel = -1 self.leafSize = 1 else: self.balanceTree = False if self.useAllRasterValues is not None: self.explodeDataRaster = True if self.useAllRasterValues == "true" else False if not self.isModel: if self.explanatoryVariables is None and \ self.distanceFeatures is None and \ self.explanatoryRasters is None: ARCPY.AddIDMessage("ERROR", 110173) raise SystemExit if self.numTrees is None: self.numTrees = 100 if self.numTrees == 0: self.what = "NONE" if not self.isModel: if self.sample_size is None: self.sample_size = 100 if self.leafSize is None or self.leafSize < 0: if self.isClassification: self.leafSize = 1 else: if self.leafSize is None or self.leafSize < 5 : self.leafSize = 5 if self.outputModel is None: self.outputModel = "" if self.outputRaster is None: self.outputRaster = "" def initialDecision(self): par = self.par self.isPolygon = False self.isModel = False self.desc = None if self.input: try: self.desc = ARCPY.Describe(self.input) self.isPolygon = self.desc.shapeType == "Polygon" except: ARCPY.AddIDMessage("ERROR", 294, self.input) raise SystemExit if self.desc is not None: self.aliasChange(self.desc) alias = [self.fieldAliasInput[i] for i in self.fieldAliasInput if i.upper() == self.variableToPredict ] ### Set Alias Variable to Predict #### if len(alias) > 0: self.variableToPredictAlias = alias[0] self.descF2P = None if self.featuresToPredict: try: self.descF2P = ARCPY.Describe(self.featuresToPredict) except: ARCPY.AddIDMessage("ERROR", 294, self.featuresToPredict) raise SystemExit userTypeRF = "CATEGORICAL" if self.classes in [ "CATEGORICAL", True, "True", "true"] else "NUMERIC" self.classes = userTypeRF == "CATEGORICAL" self.isClassification = self.classes self.requireOutputModel = False self.outputModel = "" def enableParametersBy(self, par , enb, disable ): for i in enb: par[i] = True for j in disable: par[j] = False def selectedParameters(self): activeParameters = {} for id, i in enumerate(self.par): name = self.parameters[id].name if self.parStatus[id]: if DBGPAR: ARCPY.AddMessage(str(id) + " " + name + ": " + str(self.par[name])) activeParameters[name] = self.parameters[id] else: if DBGPAR: ARCPY.AddMessage(str(id) + " " + name + "- Cleaned >>>>>>>>>>>: " + str(self.par[name])) self.parameters[id].value = None self.activeParameters = activeParameters def decision(self): """ This Method checks the user selection.. It helps to clean unnecessary parameters """ par = self.parStatus iniEnabled = [] self.enableParametersBy(par, range(26),[]) if self.predictionType == "TRAIN": self.enableParametersBy(par, [4, 5, 6], [7, 8, 9, 10, 11, 12]) self.what = "NONE" self.haveToPredict = False elif self.predictionType == "PREDICT_FEATURES": self.enableParametersBy(par, [4, 5, 6, 7, 8, 10, 11, 12], [9]) self.what = "FEATURES" self.haveToPredict = True elif self.predictionType == "PREDICT_RASTER": self.enableParametersBy(par, [9, 10, 11, 12], [7, 8, 4, 5, 10, 11 ]) self.what = "RASTER" self.haveToPredict = False anotherCheck = self.parameters[4].value is None and self.parameters[5].value is None if self.isClassification and self.what in ["NONE", "RASTER"] and self.isPolygon and anotherCheck: self.enableParametersBy(par, [15], []) else: self.enableParametersBy(par, [], [15]) def splitFieldName(self, text, testId = 0): """ split Match Variable from fields INPUT: text {str}: It should be POP(DSC) POP; testId->1 OUTPUT: list: [Training, rFType, Test] """ if type(text) == str: textPart = text.split(" ") fieldTest = textPart[testId] fieldTraining = textPart[(not testId)*1] if self.discrete in fieldTraining: return fieldTraining.replace(self.discrete,"").strip(), varTypeRev[self.discrete], fieldTest else: return fieldTraining, varTypeRev[self.continuous], fieldTest elif type(text) == list: if self.discrete in text[1]: return text[1].replace(self.discrete,"").strip(), varTypeRev[self.discrete], text[0] else: return text[1], varTypeRev[self.continuous], text[0] def getIndexVarByNameSource(self, listVar, name, source): idVar = [id for id, i in enumerate(listVar) if source in i[0]["source"] and i[0]["name"].upper() == name] if len(idVar): return idVar[0] else: return None def getInfoValues(self,): """ get Info: FieldsToMatch, DistancesToMatch, RastersToMatch, eVariables, eDistances, eRasters FieldsToMatchVerify """ #### Get Input FC Path #### nameInputFC = self.desc.name pathInputFC = self.desc.path pathCompleteInputFC = OS.path.normpath(pathInputFC + "\\"+ nameInputFC) #### Get Input Feature To Predict Path #### pathCompleteInputF2P = None if self.descF2P is not None: nameInputF2P = self.desc.name pathInputF2P = self.desc.path pathCompleteInputF2P = OS.path.normpath(pathInputF2P + "\\"+ nameInputF2P) listVariables = [] if self.rfM is not None: listVariables = self.rfM.getVariables() varFC = {} eV = {} if self.explanatoryVariables is not None : error = None try: eVt = {} eV = {row.split(' ')[0]: row.split(' ')[1] for row in self.explanatoryVariables.split(";")} rowVar = {} for idF in self.desc.fields: if idF.name in eV or idF.aliasName in eV: typeValue = categoricalVar if eV[idF.name] in ['', '#', 'false', False, "NUMERIC"]: typeValue = numericVar varFC[idF.name.upper()]= typeValue rowVar = [{"name":idF.name.upper(), "alias":idF.aliasName, "rfType":typeValue, "fieldType":idF.type, "source":"FC", "sourceData": self.desc.path +"\\"+ self.desc.name},{}] eVt[idF.name.upper()] = idF.type if idF.type == "String": if eV[idF.name] in ["false", False, continuous]: ARCPY.AddIDMessage("ERROR", 110200,idF.name) raise SystemExit listVariables.append(rowVar) except: ARCPY.AddIDMessage("ERROR", 110199) raise SystemExit #### Check if Variable exists in the dataset #### doNotExist = [] if len(listVariables) != len(eV): lisV = [i[0]["name"] for i in listVariables] for id, i in enumerate(eV): if i.upper() not in lisV: doNotExist.append(i) if len(doNotExist): ARCPY.AddIDMessage("ERROR", 10269, ",".join(doNotExist)) raise SystemExit if self.matchVariables is not None : mfV = None if type(self.matchVariables) == str: mfV = [self.splitFieldName(field, 0) for field in self.matchVariables.split(";")] else: mfV = [self.splitFieldName(field, 0) for field in self.matchVariables] mfVt = [] val = list(eV.keys()) for id, i in enumerate(NUM.arange(len(mfV))): mfVt.append([val[id].upper(), varFC[val[id].upper()], mfV[id][2]]) mfV = mfVt tmfV = {field[2]:field[1] for field in mfV } nameTestFieldTrainField = {field[2].upper():field[0].upper() for field in mfV } fieldNamesPredict = [i[2] for i in mfV ] rowVar = [] for idF in self.descF2P.fields: nameToUse = None if idF.name in fieldNamesPredict: nameToUse = idF.name.upper() elif idF.aliasName in fieldNamesPredict: nameToUse = idF.aliasName.upper() if nameToUse is not None: idVar = self.getIndexVarByNameSource(listVariables, nameTestFieldTrainField[nameToUse], "FC") if idVar is not None: varToMatch = listVariables[idVar][0] if idF.type == varToMatch["fieldType"]: rowVar = {"name":idF.name.upper(), "alias":idF.aliasName, "rfType":varToMatch["rfType"], "fieldType":idF.type, "source":"FC", "sourceData": self.descF2P.path + "\\"+ self.descF2P.name} listVariables[idVar][1] = rowVar else: ARCPY.AddIDMessage("ERROR", 110176, listVariables[idVar][0]["name"], idF.name ) raise SystemExit else: ARCPY.AddIDMessage("ERROR", 110185, nameTestFieldTrainField[nameToUse] ) raise SystemExit if self.distanceFeatures is not None and self.rfM is None : unknownRF = [] cantBeUsed = False try: pathD = [ out.replace("'","") for out in self.distanceFeatures.split(";")] eD = { ARCPY.Describe(out).name:"false" for out in pathD} for distFeature in pathD: desc = ARCPY.Describe(distFeature) if desc.spatialReference.type.upper() == "UNKNOWN": unknownRF.append(distFeature) name = desc.name path = desc.path pathComplete = OS.path.normpath(path + "\\"+ name) if pathCompleteInputFC == pathComplete: cantBeUsed = True if desc.dataType == "FeatureLayer": pathComplete = desc.nameString rowVar = {"name":name.upper(), "alias":name.upper(), "rfType":numericVar, "fieldType":"Double", "source":"DIST", "sourceData": pathComplete} listVariables.append([rowVar,{}]) except: ARCPY.AddIDMessage("ERROR", 110201) raise SystemExit if cantBeUsed: ARCPY.AddIDMessage("ERROR", 110260) raise SystemExit if len(unknownRF): ARCPY.AddIDMessage("ERROR", 522, ",".join(unknownRF)) raise SystemExit if self.matchDistances is not None: cantBeUsed = False try: mdV = None if type(self.matchDistances) == str: mdV = [self.splitFieldName(field, 0) for field in self.matchDistances.split(";")] else: mdV = [self.splitFieldName(field, 0) for field in self.matchDistances] tmdV= {} cPmdV= {} for row in mdV: nameDistToMatch = None if self.rfM is None: nameDistToMatch = ARCPY.Describe(row[0]).name.upper() else: nameDistToMatch = row[0].upper() desc = ARCPY.Describe(row[2]) name = desc.name path = desc.path pathComplete = OS.path.normpath(path + "\\"+ name) if pathCompleteInputF2P is not None: if pathCompleteInputF2P == pathComplete: cantBeUsed = True if desc.dataType == "FeatureLayer": pathComplete = desc.nameString rowVar = {"name":name.upper(), "alias":name.upper(), "rfType":numericVar, "fieldType":"Double", "source":"DIST", "sourceData": pathComplete} idVar = self.getIndexVarByNameSource(listVariables, nameDistToMatch, "DIST") if idVar is not None: listVariables[idVar][1] = rowVar else: ARCPY.GetIDMessage("ERROR", 110176, nameDistToMatch , name ) raise SystemExit() except: ARCPY.AddIDMessage("ERROR", 110186) raise SystemExit if cantBeUsed: ARCPY.AddIDMessage("ERROR", 110261) raise SystemExit varRast = {} if self.explanatoryRasters is not None : try: valueRasters = self.parValue['explanatory_rasters'].value for i in valueRasters: val = None if hasattr(i[0], "value"): val = i[0].value elif hasattr(i[0], "name") : val = i[0].name else: val = i[0] typeVar = numericVar if i[1] in [discrete, True , "true", "CATEGORICAL"] : typeVar = categoricalVar desc = ARCPY.sa.Raster(val) name1 = str(desc.name) path = desc.path name = getNameRaster(name1.upper()) varRast[val] = typeVar pathAll = OS.path.normpath(path +"\\"+ desc.name) if desc.format== "NetCDF": pathAll = desc.name del desc rowVar = {"name":name, "alias":name1, "rfType":typeVar, "fieldType":"Double", "source":"RASTER", "sourceData": pathAll } listVariables.append([rowVar,{}]) except: ARCPY.AddIDMessage("ERROR", 110202) raise SystemExit if self.matchRaster is not None : try: mrV = None if type(self.matchRaster) == str: mrV = [self.splitFieldName(field, 0) for field in self.matchRaster.split(";")] else: mrV = [self.splitFieldName(field, 0) for field in self.matchRaster] mrVn = [] for id, el in enumerate(mrV): mrVn.append([mrV[id][0], varRast[mrV[id][0]],mrV[id][2]]) mrV = mrVn tmrV = { field[2]:field[1] for field in mrV } tmdV= {} cPmdV= {} for row in mrV: nameRasterToMatch = None if self.rfM is None: val = None if hasattr(row[0], "value"): val = row[0].value else: val = row[0] nameRasterToMatch = ARCPY.Describe(val).name.upper() nameRasterToMatch = getNameRaster(nameRasterToMatch) else: nameRasterToMatch = row[0].upper() typeVar = row[1] desc = ARCPY.Describe(row[2]) name1 = desc.name path = desc.path name = getNameRaster(name1.upper()) pathAll = OS.path.normpath(path +"\\"+ desc.name) if desc.format== "NetCDF": pathAll = desc.name del desc rowVar = {"name":name, "alias":name1, "rfType":typeVar, "fieldType":"Double", "source":"RASTER", "sourceData": pathAll } idVar = self.getIndexVarByNameSource(listVariables, nameRasterToMatch, "RASTER") if idVar is not None: listVariables[idVar][1] = rowVar else: ARCPY.GetIDMessage("ERROR", 110176, nameDistToMatch, name ) raise SystemExit() except: ARCPY.AddIDMessage("ERROR", 110187) raise SystemExit verifyOnltTraining = 1 if len(listVariables) == 0: ARCPY.AddIDMessage("ERROR", 110173) raise SystemExit() if self.what != "NONE": verifyOnltTraining = 2 for i in listVariables: if len(i[1]) == 0: ARCPY.AddIDMessage("ERROR", 110203) raise SystemExit() #### Repeat Names in Training and Testing #### for iv in range(verifyOnltTraining): names = NUM.array([i[iv]["name"] for i in listVariables]) uniqueNames, count = NUM.unique(names, return_counts = True) if len(count) and NUM.max(count) > 1: name = uniqueNames[NUM.where(count>1)[0][0]] ARCPY.AddIDMessage("ERROR", 110182, name ) raise SystemExit() #### Check String Field #### nl = [id for id, i in enumerate(listVariables) if i[iv]["fieldType"] == "String" and i[iv]["rfType"] != categoricalVar] if len(nl): ARCPY.AddIDMessage("ERROR", 110200,self.getNamesList(listVariables, nl) ) raise SystemExit() self.outArea = False if verifyOnltTraining == 2: nl = [id for id, i in enumerate(listVariables) if i[0]["fieldType"] != i[1]["fieldType"]] if len(nl): ARCPY.AddIDMessage("ERROR", 110206, self.getNamesList(listVariables, nl)) raise SystemExit() outFC = [id for id, i in enumerate(listVariables) if i[1]["source"] in ["DIST", "RASTER"] and i[1]["sourceData"] != i[0]["sourceData"]] if len(outFC): self.outArea = True self.listVariables = listVariables def getNamesList(self, listVar, ids): idVar = [i[0]["name"] + "-" + i[1]["name"] for id, i in enumerate(listVar) if id in ids ] return ";".join(idVar) def getVarsBySourceOrigin(self, source, training = True, info = "name"): id = 0 if not training: id = 1 if self.listVariables is not None: return {i[id][info]:i[id] for i in self.listVariables if source in i[id]["source"]} def getDictOrder(self): return {i[0]["name"].upper():i[1]["name"].upper() for i in self.listVariables} def processErrors(errors): """ Check Variability Messages """ dictTwoAdd = {110192:MaxCategoriesAllowed} errorCounts = len([i for i in errors if len(i) and i[2] == "E"]) warningCounts = len([i for i in errors if len(i) and i[2] == "W"]) if errorCounts: typeOfErrors = [i[0] for i in errors if len(i) and i[2] == "E"] dictErrors = {i:[] for i in typeOfErrors} outputErrors = {i[0]:dictErrors[i[0]].append(i[1]) for i in errors if len(i) and i[2] == "E"} for i in dictErrors: uq = NUM.unique(dictErrors[i]) if i in dictTwoAdd: ARCPY.AddIDMessage("ERROR", i, ", ".join(uq), dictTwoAdd[i] ) else: ARCPY.AddIDMessage("ERROR", i, ", ".join(uq)) raise SystemExit if warningCounts: typeOfErrors = [i[0] for i in errors if len(i) and i[2] == "W"] dictErrors = {i:[] for i in typeOfErrors} outputErrors = {i[0]:dictErrors[i[0]].append(i[1]) for i in errors if len(i) and i[2] == "W"} for i in dictErrors: uq = NUM.unique(dictErrors[i]) if i in dictTwoAdd: ARCPY.AddIDMessage("Warning", i, ", ".join(uq),dictTwoAdd[i] ) else: ARCPY.AddIDMessage("Warning", i, ", ".join(uq)) def AddDataN(rfField, hpar, typeSampling = "Tool"): """ Add data in the random Field """ vars = hpar.getVarsBySourceOrigin("FC") varsD = hpar.getVarsBySourceOrigin("DIST") varsR = hpar.getVarsBySourceOrigin("RASTER") noRequiredNewFC = (len(varsD) + len(varsR)) == 0 ssdo = rfField.ssdo maskTotal = None nFeatures = ssdo.inputFC messages = [] for f in vars: errors = rfField.addXVariableFromDataObject(name = f.upper(), userType = vars[f]["rfType"], source = "FC", alias = vars[f]["alias"] ) messages.extend(errors) processErrors(messages) if not noRequiredNewFC: #### Create Scratcth FC #### nFeatures = UTILS.returnScratchName("FC_RF_", scratchWS = ARCPY.env.scratchGDB) nFeatures, idField, useCopy = newFCFromSSDO(ssdo, nFeatures) messages = [] if len(varsD): pathD = [varsD[i]["sourceData"] for i in varsD] dataList = getNearFeature(nFeatures, pathD ) for id, f in enumerate(varsD): errors = rfField.addXVariableFromArray(name = f.upper(), userType = "Numeric", dataInfo = dataList[id], source = "DIST", inputType = "Double") messages.extend(errors) processErrors(messages) if len(varsR): pathR = [varsR[i]["sourceData"] for i in varsR] namesR = [varsR[i]["alias"] for i in varsR] typeRasters = [varsR[i]["rfType"] for i in varsR] dataListr, maskTotal, nullR, eROp = extractFeatureFromRaster(ssdo, pathR, namesR, typeRasters, nFeatures, typeSampling = typeSampling, useCopy = useCopy ) for id, rasterName in enumerate(varsR): errors = rfField.addXVariableFromArray(name = rasterName.upper(), userType = varsR[rasterName]["rfType"], dataInfo = dataListr.T[id], source = "RASTER", inputType = "Double", maskedOut = nullR) messages.extend(errors) processErrors(messages) if useCopy: UTILS.passiveDelete(nFeatures) return maskTotal def AddTestFieldN(rfTest, hpar): """ Get Values from each Type of Input """ featuresToPredict = hpar.featuresToPredict ssdo = rfTest.ssdo maskTotal = None dictFields = {} maskTotal = [] nFeaturesToPredict = None vars = hpar.getVarsBySourceOrigin("FC", False) varsD = hpar.getVarsBySourceOrigin("DIST", False) varsR = hpar.getVarsBySourceOrigin("RASTER", False) noRequiredNewFC = (len(varsD) + len(varsR)) == 0 if len(vars): for tField in vars: rfTest.addXVariableFromDataObject(name = tField.upper(), userType = vars[tField]["rfType"], source = "FC", alias = vars[tField]["alias"]) if not noRequiredNewFC: #### Create Scratcth FC #### nFeaturesToPredict = UTILS.returnScratchName("FC_RF_", scratchWS = ARCPY.env.scratchGDB) nFeaturesToPredict, idField, useCopy = newFCFromSSDO(ssdo, nFeaturesToPredict) if len(varsD): pathD = [varsD[i]["sourceData"] for i in varsD] dataList = getNearFeature(nFeaturesToPredict, pathD) for id, tField in enumerate(varsD): rfTest.addXVariableFromArray(name = tField.upper(), userType = "Numeric", dataInfo = dataList[id], source = "DIST", inputType = "Double") if len(varsR): pathR = [varsR[i]["sourceData"] for i in varsR] namesR = [varsR[i]["alias"] for i in varsR] typeRasters = [varsR[i]["rfType"] for i in varsR] dataList, maskTotal, nullR, opr = extractFeatureFromRaster(ssdo, pathR, namesR, typeRasters, nFeaturesToPredict, useCopy = useCopy) if dataList is None: ARCPY.AddIdMessage("ERROR", 110183) raise SystemExit for id, tField in enumerate(varsR): rfTest.addXVariableFromArray(name = tField.upper(), userType = varsR[tField]["rfType"], dataInfo = dataList.T[id], source = "RASTER", inputType = "Double", maskedOut = nullR) if useCopy: UTILS.passiveDelete(nFeaturesToPredict) return maskTotal def checkOutput(path, typeOutput): """ Check output path """ noAllowCharacters = ["!", "?", "*"] outPath = None if path not in ["", None]: try: outPath, outName = OS.path.split(path) ARCPY.Describe(outPath) except: if typeOutput == "RASTER": ARCPY.AddIDMessage("ERROR", 210, path ) if typeOutput == "FC": ARCPY.AddIDMessage("ERROR", 210, path ) raise SystemExit if typeOutput == "RASTER": if UTILS.isGDB(path): if ".TIF" in path.upper() or len([i for i in outName if i in noAllowCharacters]) > 0: ARCPY.AddIDMessage("ERROR", 210, path ) raise SystemExit if typeOutput == "FC": if ".SHP" == outName.upper(): ARCPY.AddIDMessage("ERROR", 210, path ) raise SystemExit if UTILS.isGDB(path): if ".SHP" in path.upper(): ARCPY.AddIDMessage("ERROR", 210, path ) raise SystemExit def evaluateRF(percentage, headerFile, xDataFile, yDataFile, tHeaderFile = None, xTestFile = None, numTrees = 100, leafSize = 1, sampleSize = 100, fields_to_try = 2, maxLevel = -1, what = "NONE", balance = False): """ Evaluate a RF from Numpy File Objects """ train = InfoForestField(percentage,0) train.loadHeaderfromTextFile(headerFile) train.x = NUM.load(xDataFile) train.y = NUM.load(yDataFile) test = None if tHeaderFile is not None: test = InfoForestField(percentage,0, isTraining = False) test.loadHeaderfromTextFile(tHeaderFile) test.x = NUM.load(xTestFile) test.generateXVariable(fillVar = False, balance = balance) train.generateXVariable(fillVar = False, balance = balance) train.isModel = False train.numTrees = numTrees train.leafSize = leafSize train.sample_size = sampleSize train.fields_to_try = fields_to_try train.outputTrainedFeature = None train.what = what train.outputDiagnostic = None train.outputTrainedFeatures = None train.outputFeatures = None train.outputRaster = None train.maxLevel = maxLevel exr = RunRF(train, percentage, None) exr.executeModel(train, test ) class RunRF(object): """ Run Model depending on type of input """ def __init__(self, hpar, randSeed, parameters): self.hpar = hpar self.randSeed = randSeed self.rasterDimensionInfo = None self.rfTraining = None self.parameters = parameters self.infoRasterInstances = None def createOutputRasterFromTemporalFile(self, outputRasterPath, rasterDimensionInfoSet, data): """ Create temporal file INPUT: outputPath (str): output path rasterInfo (RasterInfo Instance): raster object data (2d array): array """ xMin = rasterDimensionInfoSet.getXs().min() - rasterDimensionInfoSet.cellSize/2.0 yMin = rasterDimensionInfoSet.getYs().min() - rasterDimensionInfoSet.cellSize/2.0 nCols = rasterDimensionInfoSet.nCols nRows = rasterDimensionInfoSet.nRows cellSize = rasterDimensionInfoSet.cellSize wkspc, nameFile = self.createTemporalFLT(cellSize, (xMin, yMin), (nRows, nCols), data = data) flt = OS.path.join(wkspc,"{0}.flt".format(nameFile)) hdr = OS.path.join(wkspc,"{0}.hdr".format(nameFile)) ARCPY.env.outputCoordinateSystem = rasterDimensionInfoSet.srf ARCPY.management.CopyRaster(flt, outputRasterPath, None, None, NULL, "NONE", "NONE", None, "NONE", "NONE", None, "NONE") try: OS.remove(hdr) OS.remove(flt) except: ARCPY.AddWarning("Temporal File was not removed " + flt) pass return None def createTemporalFLT(self, cellSize = None, xMinYMin = None, nRowsCols = None, data = None, pathFileAndName = None, srf = None ): """ Create temporal file INPUT: cellSize (float): cell Size createCopy (Bool, True): Convert FLT in outputRasterPath xMinYMin (tuple): if createCopy is False, it is required insert Point nColsRows (tuple): if createCopy is False, it is required nCols nRows data (2d array, None): array pathFileAndName """ xMin = yMin = nCols = nRows = None if xMinYMin is not None and nRowsCols is not None: xMin = xMinYMin[0] yMin = xMinYMin[1] nCols = nRowsCols[1] nRows = nRowsCols[0] if pathFileAndName is None: pathNC = UTILS.returnScratchFolder() randomName = str(NUM.random.randint(1000000)) header = pathNC + r"\nc"+randomName+ ".hdr" flt = pathNC + r"\nc"+ randomName + ".flt" if srf is not None: prj = pathNC + r"\nc"+ randomName + ".prj" with open(prj, 'w') as f: f.write(srf.exportToString().split(";")[0]) lines = [] lines.append("ncols\t\t{0}".format(nCols)) lines.append("nrows\t\t{0}".format(nRows)) lines.append("xllcorner\t{:.12f}".format(xMin)) lines.append("yllcorner\t{:.12f}".format(yMin)) lines.append("cellsize\t{:.12f}".format(cellSize)) lines.append("NODATA_value\t{0}".format(NULL)) lines.append("BYTEORDER\tLSBFIRST") with open(header, 'w') as f: for i in lines: f.write(i + "\n") #### Create HDR, PRJ Files ### if data is None: return pathNC , r"nc"+ randomName else: pathNC = pathFileAndName[0] randomName = pathFileAndName[1] flt = pathNC + "\\"+ randomName + ".flt" #### Verify Dtype #### if data.dtype != NUM.float32: ARCPY.AddError("Data Array should be float 32 bits") raise SystemExit() done = ARC._ss.output_flt(flt, data) #### Exit - Function Was Cancelled / Couldn't be Created #### if not done: raise SystemExit() return pathNC , r"nc"+ randomName def calculateChunckSize(self, infoRasterInstances, listBase): """Calculate Chunk Size using group of slices INPUT: infoRasterInstances (RasterInfo Instance): Raster Info listBase (list - int): List of Zones in one chunck OUTPUT: nCells (int): number of cell in a chuck newSliceCol (slice): Slice of Columns for the current chuck newSliceRow (slice): Slice of Rows for the current chuck iniIndex (NUM.int64): Starting index of the chuck xMinYMin (tuple): xMin and yMin chuck nColsStep (int): Number of Columns nRowsSteps (int): Number of Rows """ iniBase = infoRasterInstances.getRasterInfoZone(listBase[0])[0] endBase = infoRasterInstances.getRasterInfoZone(listBase[-1])[0] #### Get Indice Info #### iniRow = iniBase[0].start endRow = endBase[0].stop iniCol = iniBase[1].start endCol = endBase[1].stop nColsStep = endCol-iniCol nRowsStep = endRow-iniRow newSliceCol = slice(iniCol, endCol) newSliceRow = slice(iniRow, endRow) iniIndex = infoRasterInstances.rangeZones[listBase[0]][0] xMinYMin = infoRasterInstances.getRasterInfoZone(listBase[-1])[2] nCells = nColsStep*nRowsStep return nCells , newSliceCol, newSliceRow, iniIndex, xMinYMin, nColsStep, nRowsStep def createTemporalNC(self, rasterDimensionInfoSet, chunkSizes = None, data = None, outputRasterPath = None): """ Create temporal NetCDF file INPUT: rasterInfo (RasterInfo Instance): raster object data (2d array): array outputPath (str): output path """ pathNC = UTILS.returnScratchFolder() pathNC = pathNC + r"\nc"+ str(NUM.random.randint(1000000)) + ".nc" f = NET.Dataset(pathNC, 'w') f.createDimension('y', rasterDimensionInfoSet.nRows) f.createDimension('x', rasterDimensionInfoSet.nCols) Y = None X = None isGCS = rasterDimensionInfoSet.isGCS if isGCS: Y = f.createVariable('y', 'f8', ('y' )) Y.long_name = 'latitude coordinate' Y.standard_name = 'latitude' Y.units = 'degrees_north' X = f.createVariable('x', 'f8', ('x' )) X.long_name = 'longitude coordinate' Y.standard_name = 'longitude' Y.units = 'degrees_east' else: Y = f.createVariable('y', 'f8', ('y' )) Y.long_name = 'y coordinate of projection' Y.standard_name = 'projection_y_coordinate' Y.units = rasterDimensionInfoSet.unitCell X = f.createVariable('x', 'f8', ('x' )) X.long_name = 'x coordinate of projection' X.standard_name = 'projection_x_coordinate' X.units = rasterDimensionInfoSet.unitCell variable = f.createVariable('PREDICTED', 'f4', ('y', 'x'), fill_value = NULL) variable.long_name= "prediction" variable.missing_value = NULL variable.coordinate = "x, y" variable.esri_pe_string = rasterDimensionInfoSet.srf.exportToString() #variable.unit = rasterInfo.srf.units Y[:] = rasterDimensionInfoSet.getYs() X[:] = rasterDimensionInfoSet.getXs() if data is None: return f , variable, pathNC variable[:] = data f.close() strOut = "RFNC_OUTPUT" ARCPY.md.MakeNetCDFRasterLayer(pathNC, "PREDICTED", "x", "y", strOut, None, None, "BY_VALUE") ARCPY.management.CopyRaster(strOut, outputRasterPath, None, None, NULL, "NONE", "NONE", None, "NONE", "NONE", None, "NONE") UTILS.passiveDelete(strOut) try: OS.remove(pathNC) except: ARCPY.AddWarning("Temporal File was not removed " + pathNC) pass return def outputRaster(self, info, newSRF = None, getNValues = None, dataBase = None): """Generate Output Raster INPUT: info (1D Array - float): 1D array with prediction values newSRF {Spatial Reference, None}: Spatial reference / use training Spatial Reference getNValues {int, None}: Number of Values to Calculate Stat- rederer dataBase {1D Array, None}: If this value is provided the array is updated with information predicted (info) using the mask goodPointsIdsTesting OUTPUT: values (list, Array, None) """ hpar = self.hpar if self.rasterDimensionInfo is not None: sliceData, goodPointsIdsTesting, intersection, cellSize, nRows, nCols = self.rasterDimensionInfo if newSRF is None: newSRF = self.rfTraining.ssdo.spatialRef #### Output Raster If Path is Defined #### if hpar.outputRaster != "": #### Check Type Output #### isClass = self.rfTraining.yField.rfType == categoricalVar infoCat = self.rfTraining.yField.info if dataBase is None: dataBase = NUM.ones((self.infoRasterInstances.nRows*self.infoRasterInstances.nCols), dtype = float)*NULL dataBase[goodPointsIdsTesting] = info else: dataBase[goodPointsIdsTesting] = NUM.asarray(info, dtype = NUM.float32) #### Accumulate Values #### if getNValues is not None: #### Get Current Classes in the prediction #### if isClass: return [id for id, i in enumerate(infoCat)] #### Get ids = NUM.random.randint(0, len(info), getNValues ) maxValue = info.max() values = list(info[ids]) values.append(info.min()) values.append(info.max()) return values #### Reshape Array #### dataBase.shape = (self.infoRasterInstances.nRows, self.infoRasterInstances.nCols) dataBase = dataBase[sliceData[0],sliceData[1]] #### Create Raster #### createRaster(dataBase, newSRF, intersection[0], intersection[1], cellSize, cellSize, hpar.outputRaster) dataV = None if isClass: dataV = (infoCat, [id for id, i in enumerate(infoCat)]) else: dataV = info #### Apply Raster #### applyLayer(hpar, "output_raster", data = dataV, isClass = isClass, headingName = self.rfTraining.yField.name, shapeType = "Raster", isTrainingOutput = False, isRasterLayer = True) def displayImportance(self, rfTraining, importance, report): """Get String with Importance information INPUT: rfTraining (InfoForestField instance): Trainig information importance (1D Array): Importance report (str): Initial String OUTPUT: str : Concatenate report and importance table """ numVarToDisp = 20 header = ARCPY.GetIDMessage(84834) head = [ARCPY.GetIDMessage(84622), ARCPY.GetIDMessage(84835), "%"] rows = [] rows.append(head) nVar = len(importance) total = importance.sum() values = NUM.zeros((len(importance),3), dtype= float) values.T[0] = NUM.arange(nVar) values.T[1] = importance values.T[2] = 100*importance/total indices = NUM.argsort(importance)[::-1] if nVar > numVarToDisp: indices = indices[0:numVarToDisp] values = values[indices,:] for i in values: row = [rfTraining.fields[int(i[0])].alias, LOCALE.format("%0.2f", i[1]), LOCALE.format("%0.0f", i[2]) ] rows.append(row) outputReport = UTILS.outputTextTable(rows, header = header, justify = ["left", "right", "right"], pad = 1, colPad = 14, titleFillToken = "-") return report + outputReport def report(self, rfTraining, data, hpar, randSeed, bestValue, listDecValues, maxIndexReach, ssdoPoints2Train = None, rfTest = None, displayShare = True): trainedData, testingData, predictData, errors, \ classErrors, varianceErrors, importance, depth = data if displayShare: shareTraining = self.getMinMaxX(rfTraining, rfTraining.x_train) shareTest = self.getMinMaxX(rfTraining, rfTraining.x_test) if rfTest is None: shareMsg = self.reportShare(rfTraining, shareTraining, shareTest) else: sharePredict = self.getMinMaxX(rfTraining, rfTest.x) shareMsg = self.reportShare(rfTraining, shareTraining, shareTest, sharePredict) reportParam = self.displayParameters(rfTraining, depth) report = self.displayInfoModel(errors, rfTraining, hpar.numTrees, reportParam, importance, varianceErrors, classErrors) self.createOutputDiagnostic(rfTraining, importance, hpar) staReport = self.createOutputFromRFField(rfTraining, hpar.outputTrainedFeatures, (trainedData, testingData), hpar, ssdoTrField = ssdoPoints2Train) info = self.reportCrosValidation(rfTraining, randSeed, bestValue, listDecValues, maxIndexReach) if displayShare: ARCPY.AddMessage(report + staReport + info + shareMsg) else: ARCPY.AddMessage(report + staReport + info ) def displayInfoModel(self, errors , rfTraining, numTrees, report , importance, varianceErrors, classErrors): """Display OOB Errors INPUT: errors (1D array): OOB errors rfTraining (InfoForestField instance): Trainig information numTrees (int): Number of Trees report (str): String to concatenate width importance (1D array): importance values varianceError(1D array): Variance Error OUTPUT: str : Concatenated report and OOB errors """ ini = (numTrees // 2) end = numTrees-1 error50 = errors[ini] error100 = errors[end] header = ARCPY.GetIDMessage(84817) rows = [] if end > 0: head = [ARCPY.GetIDMessage(84811), str(ini), str(end+1)] else: head = [ARCPY.GetIDMessage(84811), str(end+1)] rows.append(head) outputReportI = "" pad = 1 just = None colPad = 7 if rfTraining.yField.rfType == categoricalVar: just = ["left", "right", "right"] if end > 0: total = [ARCPY.GetIDMessage(84819), LOCALE.format("%0.3f", error50),LOCALE.format("%0.3f", error100) ] else: just = ["left", "right"] total = [ARCPY.GetIDMessage(84819), LOCALE.format("%0.3f", error100) ] colPad = 30 rows.append(total) clasError = classErrors.reshape(numTrees, len(rfTraining.yField.info)) if end > 0: valueError = [" ", " ", " "] else: valueError = ["" ," "] rows.append(valueError) for index, i in enumerate(rfTraining.yField.info): if end > 0: valueError = [i, LOCALE.format("%0.3f", clasError[ini, index]) if clasError[ini, index] >=0 else "NA" , LOCALE.format("%0.3f", clasError[end, index]) if clasError[end, index] >=0 else "NA"] else: valueError = [i, LOCALE.format("%0.3f", clasError[end, index]) if clasError[end, index] >=0 else "NA"] rows.append(valueError) pad = 1 else: outputReportI = "" just = ["left", "right", "right"] if end > 0: total = [ARCPY.GetIDMessage(84819), LOCALE.format("%0.3f", error50),LOCALE.format("%0.3f", error100) ] valueError = [ARCPY.GetIDMessage(84820), LOCALE.format("%0.3f", 100 - varianceErrors[ini]),LOCALE.format("%0.3f", 100 - varianceErrors[end]) ] else: just = ["left", "right"] colPad = 25 total = [ARCPY.GetIDMessage(84819), LOCALE.format("%0.3f", error100) ] valueError = [ARCPY.GetIDMessage(84820), LOCALE.format("%0.3f", 100 - varianceErrors[end]) ] rows.append(total) rows.append(valueError) outputReport = UTILS.outputTextTable(rows, header = header, justify = just, pad = pad, colPad = colPad, titleFillToken = "-") reportVal = outputReportI + outputReport return self.displayImportance(rfTraining, importance, report + reportVal) def displayParameters(self, trained, depth): """ Display Input Parameter """ header = ARCPY.GetIDMessage(84810) rows = [] #depth = depth + 1 rows.append([ARCPY.GetIDMessage(84811), str(self.hpar.numTrees)]) rows.append([ARCPY.GetIDMessage(84812), str(self.hpar.leafSize)]) rows.append([ARCPY.GetIDMessage(84813), str(depth.min()) + "-" + str(depth.max())]) rows.append([ARCPY.GetIDMessage(84836), str(int(depth.mean()))]) rows.append([ARCPY.GetIDMessage(84814), str(self.hpar.sample_size)]) rows.append([ARCPY.GetIDMessage(84815), str(self.hpar.fields_to_try)]) rows.append([ARCPY.GetIDMessage(84816), str(trained.splitPercentage)]) outputReport = UTILS.outputTextTable(rows, header = header, justify = ["left", "right"], pad = 1, colPad = 5, titleFillToken = "-") return outputReport def createOutputDiagnostic(self, rfField, importance, hpar): """ This method creates the diagnostic table and Chart of importance INPUT: rfField (InfoForestField): RF Field importance (1D array): importance hpar (HandlePar instance): Parameters information """ if hpar.outputDiagnostic not in [ "", None]: data = UTILS.DataContainer(None) fields = [] field = SSDO.CandidateField(name = "VARIABLES", type = "TEXT", data = NUM.array([i.alias for i in rfField.fields])) impField = SSDO.CandidateField(name = "IMPORTANCE", type = "DOUBLE", data = importance) total = importance.sum() fields.append(field) fields.append(impField) if total > 0: perc = NUM.round(100* importance /total,3) perField = SSDO.CandidateField(name = "PERCENTAGE", type = "DOUBLE", data = perc) fields.append(perField) #### Get Output Table Name With Extension if Appropriate #### hpar.outputDiagnostic, dbf = UTILS.returnTableName(hpar.outputDiagnostic) ARC._ss.output_table_from_candidate_fields(hpar.outputDiagnostic, len(importance), fields) try: title = ARCPY.GetIDMessage(84841) chart = ARCPY.Chart(title) chart.type = "bar" chart.xAxis.field = "VARIABLES" chart.xAxis.title = ARCPY.GetIDMessage(84842) chart.bar.rotated = True chart.title = title chart.yAxis.field = "IMPORTANCE" chart.yAxis.title = ARCPY.GetIDMessage(84835) chart.yAxis.sort = True hpar.outputDiagnosticParameter.charts = [chart] except: pass def setResidualField(self, candFields, data, dataPred, fieldSupportY, isClass, outputField): """ Add Residual / Same_Class Field in the list of fields INPUT: candField (list CandidateField): List of Candidate Fields data (array 1d): True Values array dataPred (array 1d): Predicted Values array fieldSupportY (type str): Type of Field For Residuls isClass (bool): Is Classification outputField (dtype): dtype for Residuals """ #### Don't Create Residual Field #### if dataPred is None: return if isClass: diff = NUM.ones(len(data), dtype = int) for i in NUM.arange(len(data)): if data[i] != dataPred[i]: diff[i] = 0 pFieldDiff = SSDO.CandidateField(name = "SAME_CLASS", alias = "Correctly Classified", type = "LONG", data = diff) candFields.append(pFieldDiff) else: pFieldRes = SSDO.CandidateField(name = "RESIDUAL", alias = "Residual", type = fieldSupportY, data = NUM.asarray((data -dataPred), dtype = outputField) ) candFields.append(pFieldRes) def extractStatistics(self, rfTrained, allInfoData): """Extract Statistics from Forest Output INPUT: rfTrained (InfoForestField Instance): Trained Object allInfoData (tuple arrays): (Trained Output, Tested Output) OUTPUT: outputData (double/tuple): double R^2, tuple (class, f1, mcc, sen, acc) """ trainedData = None testingData = None getStatistics = False predictedData = NUM.zeros(len(rfTrained.y), dtype = float) sourceData = rfTrained.y if allInfoData is not None: #### Return Array with Predicted values from Training #### trainedData, testingData = allInfoData predictedData[rfTrained.train_split_indices] = trainedData #### If Trained Data Was Splitted #### if len(rfTrained.test_split_indices) > 0: predictedData[rfTrained.test_split_indices] = testingData if len(rfTrained.test_split_indices) < 2: return None testTrainedData = sourceData[rfTrained.test_split_indices] testPredData = predictedData[rfTrained.test_split_indices] outputData = [] if rfTrained.yField.rfType == "Categorical": #### Accumulate Values in Confusion Matrix #### tnTP = 0 for index in NUM.arange(len(rfTrained.test_split_indices)): if testTrainedData[index] == testPredData[index]: tnTP += 1 n = len(testTrainedData) return tnTP/n else: slope, intercept, r_value, p_value, std_err = SCPS.linregress(testTrainedData, testPredData ) outputData = r_value**2 return outputData return None def createOutputFromRFField(self, rfTrained, outputFC, allInfoData, hpar, ssdoTrField = None): """ Create Output Feature Class using output generated by RF INPUT: rfTrained {InfoForestField}: Random Objected from trained outputFC {str}: Output Path """ typeNumpyField = None dictClasses = {} dataP = None useExplodeData = False printStatistics = False trainedData = None testingData = None report = "" data = None if ssdoTrField is not None: useExplodeData = True infoData = NUM.zeros(len(rfTrained.y), dtype = float) if allInfoData is not None: #### Return Array with Predicted values from Training #### trainedData, testingData = allInfoData infoData[rfTrained.train_split_indices] = trainedData #### If Trained Data Was Splitted #### if len(rfTrained.test_split_indices) > 0: infoData[rfTrained.test_split_indices] = testingData if len(rfTrained.test_split_indices) > 2: printStatistics = True else: ARCPY.AddIDMessage("Warning", 110198) #### Contain All Predicted Data in Trained Dataset #### rfTrained.trainedAndTested = infoData fieldSupportY = convertTypeSupportedByFastOutput[rfTrained.yField.fieldType] #### Check Type Field To be Predicted #### isClass = rfTrained.yField.rfType == "Categorical" if isClass: #### Creat a Dictionary With Categories #### dictClasses = { id:value for id, value in enumerate(rfTrained.yField.info)} typeNumpyField = UTILS.numpyConvert[rfTrained.yField.fieldType] #### Calculate the maximum string size #### size = None if typeNumpyField =='U%i': size = max(map(len, rfTrained.yField.info)) typeNumpyField = 'U'+str(size) #### Initialize Ouput Arrays #### data = NUM.zeros(len(rfTrained.y), dtype = typeNumpyField) dataP = NUM.zeros(len(rfTrained.y), dtype = typeNumpyField) #### Introduce Predicted values in the output data array #### for value in dictClasses: data[rfTrained.y == float(value)] = dictClasses[value] dataP[infoData == float(value)] = dictClasses[value] outputField = numpy2FormatSupport(fieldSupportY, size) else: outputField = numpy2FormatSupport(fieldSupportY) data = rfTrained.y dataP = infoData #### Print Statistics of Training Dataset #### trainData = data[rfTrained.train_split_indices] predData = dataP[rfTrained.train_split_indices] #### Generate Statistics Depending on Type Classification / Regression #### if rfTrained.yField.rfType == "Categorical": report += statisticsForClassification(rfTrained, trainData, predData, isTesting = False) + "\n" else: report += statisticsForRegression(trainData, predData, isTesting = False ) + "\n" #### Print Statistics of Testing Data Set #### if printStatistics: testTrainedData = data[rfTrained.test_split_indices] testPredData = dataP[rfTrained.test_split_indices] if rfTrained.yField.rfType == "Categorical": report += statisticsForClassification(rfTrained, testTrainedData, testPredData, hpar = hpar) else: report += statisticsForRegression(testTrainedData, testPredData ) #### Get Statistics Without Generating an Output ### if outputFC in [None, ""]: return report ###Updata RF fields #### candFields = [] dataSourceId = None if not useExplodeData: ### Source ID #### dataSourceId = NUM.array(list(rfTrained.ssdo.master2Order.keys()), dtype = int) sourceField = SSDO.CandidateField(name = "SOURCE_ID", alias = "Source ID", type = "INTEGER", data = dataSourceId) candFields.append(sourceField) #### Get Infor of each Field #### for id, i in enumerate(rfTrained.fields): cfield = None fieldSupport = convertTypeSupportedByFastOutput[i.fieldType] if i.rfType == "Categorical": dictClassesV = { id:value for id, value in enumerate(i.info)} typeNumField = UTILS.numpyConvert[i.fieldType.title().replace("integer", "Integer")] #### Calculate the maximum string size #### size = None if typeNumField =='U%i': size = max(map(len, i.info)) typeNumField = 'U'+str(size) ndata = NUM.zeros(len(rfTrained.x), dtype = typeNumField) for value in dictClassesV: tempIds = rfTrained.x.T[id] == float(value) ndata[tempIds] = dictClassesV[value] cfield = SSDO.CandidateField(name = i.name, alias = i.alias, type = fieldSupport , data = NUM.asarray(ndata, dtype = numpy2FormatSupport(fieldSupport, size))) else: cfield = SSDO.CandidateField(name = i.name, alias = i.alias, type = fieldSupport, data = NUM.asarray( rfTrained.x.T[id], dtype = numpy2FormatSupport(fieldSupport))) candFields.append(cfield) pFieldT = SSDO.CandidateField(name = "TRAINED_ID", alias = "Trained Features", type = "LONG", data = NUM.asarray(rfTrained.indicesTrain, dtype = int)) candFields.append(pFieldT) pField = SSDO.CandidateField(name = rfTrained.yField.name, alias = rfTrained.yField.alias, type = fieldSupportY, data = NUM.asarray(data, dtype = outputField)) candFields.append(pField) dataPred = None #### Do not Generate Prdicted Field When allInfoData is None #### if allInfoData is not None: dataPred = NUM.asarray(dataP, dtype = outputField) pField2 = SSDO.CandidateField(name = "PREDICTED", alias = rfTrained.yField.alias+"(Predicted)", type = fieldSupportY, data = dataPred) candFields.append(pField2) #### Create Residual Field #### self.setResidualField(candFields, data, dataPred, fieldSupportY, isClass, outputField) if not useExplodeData: if rfTrained.ssdo.shapeType == "Polygon": ARC._ss.copy_selected_features_polygons(ssdo = rfTrained.ssdo, input_feature_class = rfTrained.ssdo.inputFC, output_feature_class = outputFC, subset_ids = dataSourceId, candidate_fields = UTILS.checkCandidateFieldName(candFields, outputFC) ) else: ARC._ss.output_featureclass_from_dataobject(rfTrained.ssdo, outputFC, UTILS.checkCandidateFieldName(candFields, outputFC)) if allInfoData is not None: applyLayer(hpar, "output_trained_features", dataPred, isClass, headingName = rfTrained.yField.alias, shapeType = rfTrained.ssdo.shapeType, isTrainingOutput = True) else: ARC._ss.output_featureclass_from_dataobject(ssdoTrField, outputFC, UTILS.checkCandidateFieldName(candFields, outputFC)) if allInfoData is not None: applyLayer(hpar, "output_trained_features", dataPred, isClass, headingName = rfTrained.yField.alias, shapeType = "Point", isTrainingOutput = True) return report def createOutput(self, info, rfTrained, rfTest, ssdoTesting, outputFC, hpar): """ Create Output Feature Class using output generated by RF INPUT: info {array}: Predicted values rfTrained {InfoForestField}: Random Objected from trained rfTest {InfoForestField}: forest Object ssdoTesting {SSDataObject}: Instance SSDO of testing dataset fields {list}: list of fields used outputFC {str}: Output Path hpar {Hpar ins}: parameters """ convertTypeSupportedByFastOutput = { 'SmallInteger': 'LONG', 'Integer': 'LONG', 'String': 'TEXT', 'Single': 'DOUBLE', 'Double': 'DOUBLE', 'Date': 'DATE'} if info is None: ARCPY.AddMessage("No output Generated for testing") return typeNumpyField = None dictClasses = {} data = None fieldSupportY = convertTypeSupportedByFastOutput[rfTrained.yField.fieldType] isClass = rfTrained.yField.rfType == "Categorical" #### Check Type Field To be Predicted #### if isClass: dictClasses = { id:value for id, value in enumerate(rfTrained.yField.info)} typeNumpyField = UTILS.numpyConvert[rfTrained.yField.fieldType] size = None #### Calculate the maximum string size #### if typeNumpyField =='U%i': size = max(map(len, rfTrained.yField.info)) typeNumpyField = 'U'+str(size) data = NUM.zeros(len(rfTest.x), dtype = typeNumpyField) #### Introduce Predicted values in the output data array #### for value in dictClasses: data[info == float(value)] = dictClasses[value] outputField = numpy2FormatSupport(fieldSupportY, size) else: outputField = numpy2FormatSupport(fieldSupportY) data = info candFields = [] ### Source ID #### dataSourceId = NUM.array(list(ssdoTesting.master2Order.keys()), dtype = int) sourceField = SSDO.CandidateField(name = "SOURCE_ID", alias = "Source ID", type = "INTEGER", data = dataSourceId) candFields.append(sourceField) for id, i in enumerate(rfTest.fields): cfield = None fieldSupport = convertTypeSupportedByFastOutput[i.fieldType] if i.rfType == "Categorical": dictClassesV = { id:value for id, value in enumerate(rfTrained.fields[id].info)} typeNumField = UTILS.numpyConvert[i.fieldType] size = None #### Calculate the maximum string size #### if typeNumField =='U%i': size = max(map(len, i.info)) typeNumField = 'U'+str(size) ndata = NUM.zeros(len(rfTest.x), dtype = typeNumField) for value in dictClassesV: tempIds = rfTest.x.T[id] == float(value) ndata[tempIds] = dictClassesV[value] cfield = SSDO.CandidateField(name = i.name, alias = i.alias, type = fieldSupport , data = NUM.asarray(ndata, dtype = numpy2FormatSupport(fieldSupport, size))) else: cfield = SSDO.CandidateField(name = i.name, alias = i.alias, type = fieldSupport, data = NUM.asarray(rfTest.x.T[id], dtype = numpy2FormatSupport(fieldSupport))) candFields.append(cfield) pField = SSDO.CandidateField(name = "PREDICTED", alias = rfTrained.yField.alias + "(Predicted)", type = fieldSupportY, data = NUM.asarray(data, dtype = outputField)) candFields.append(pField) created = False try: if ssdoTesting.shapeType == "Polygon": ARC._ss.copy_selected_features_polygons(ssdo = ssdoTesting, input_feature_class = ssdoTesting.inputFC, output_feature_class = outputFC, subset_ids = dataSourceId, candidate_fields = UTILS.checkCandidateFieldName(candFields, outputFC)) created = True #### Apply Layer #### applyLayer(hpar, "output_features", data, isClass, headingName = rfTrained.yField.alias, shapeType = "Polygon", isTrainingOutput = False) else: ARC._ss.output_featureclass_from_dataobject(ssdoTesting, outputFC, UTILS.checkCandidateFieldName(candFields, outputFC)) created = True #### Apply Layer #### applyLayer(hpar, "output_features", data, isClass, headingName = rfTrained.yField.alias, shapeType = "Point", isTrainingOutput = False) except: if not created: ARCPY.AddIDMessage("ERROR", 210, outputFC) pass def iterateTraining(self, origSeed, rfTraining): """ Make RF Iteration INPUT: origSeed (int): Original Seed rfTraining (Forest Field Instance): Contain Input Original Data OUTPUT: (tuple, (Closest Seed Mean, Best Mean R2/Acc, Array R2/Acc, max R2/Acc index)) """ hpar = self.hpar if hpar.iterations > 1: header = rfTraining.getHeader() variab = rfTraining.getVars() decisionValues = NUM.ones(hpar.iterations, dtype = float) * -1 for i in NUM.arange(hpar.iterations): rfTraining.seed = hpar.iterationSeeds[i] rfTraining.splitDataForTesting(hpar.balanceTree) ### Execute Forest For a Given Seed ### data = ARC._ss.forest(x = rfTraining.x_train, y = rfTraining.y_train, x_test = rfTraining.x_test, y_test = rfTraining.y_test, type_vars = variab, number_trees = hpar.numTrees, node_size = hpar.leafSize, sample_size = hpar.sample_size, model = "", max_nodes = hpar.maxLevel, header = header, permute_vars = hpar.fields_to_try, seed = rfTraining.seed, balance = int(hpar.balanceTree)) trainedData, testingData, predictData, errors, classErrors, varianceErrors, importance, depth = data valueDec = self.extractStatistics(rfTraining, (trainedData, testingData)) decisionValues[i] = valueDec if len(NUM.where(decisionValues == -1)[0]) >0: ARCPY.AddError("Forest iteration return None") raise SystemExit #### Check if Number of Trees is Even #### isEven = hpar.numTrees % 2 == 0 #### Get Median #### median = NUM.median(decisionValues) diff = abs(decisionValues - median) orderValues = NUM.argsort(diff) bestOption = orderValues[0] #### Select Best Seed, In Even Number the Larger Decision Value is Selected #### if isEven: if decisionValues[orderValues[1]]>decisionValues[orderValues[0]]: bestOption = orderValues[1] return hpar.iterationSeeds[bestOption] , decisionValues[bestOption], decisionValues, orderValues[-1] else: return origSeed, None, None, None def reportCrosValidation(self, rfTraining, randSeed, bestValue, listDecValues, maxReach): """ Report and Create Ouput of Cross Validation INPUT: rfTraining (InfoForestField Instance): Training Field randSeed (int): Best Seed bestValue (double): R2/Accuracy listDecValues (array): Values for each iteration (R2/Acc) maxReach (int): location Maximum R2/Acc OUTPUT: info (str): message """ info = "" if bestValue is not None: isClass = rfTraining.yField.rfType == "Categorical" msg = "" if isClass: msg = ARCPY.GetIDMessage(84827) else: msg = ARCPY.GetIDMessage(84018) bestSeedMsg = ARCPY.GetIDMessage(84864) info +="\n\n"+ bestSeedMsg.format(msg,"{:.3f}".format(bestValue),str(randSeed)) if self.hpar.outputCrossValidation not in ["", None]: data = UTILS.DataContainer(None) fields = [] field = SSDO.CandidateField(name = msg, type = "DOUBLE", data = NUM.asarray(listDecValues, dtype = float)) seedField = SSDO.CandidateField(name = "SEED", type = "LONG", data = NUM.asarray(self.hpar.iterationSeeds, dtype = int)) fields = [field, seedField] #### Get Output Table Name With Extension if Appropriate #### self.hpar.outputCrossValidation, dbf = UTILS.returnTableName(self.hpar.outputCrossValidation) ARC._ss.output_table_from_candidate_fields(self.hpar.outputCrossValidation, len(self.hpar.iterationSeeds), fields) y = listDecValues numBreaks = int(STATS.riskFunBins(y, 8, 64, 1)) riceBreaks = STATS.riceBins(len(y)) if riceBreaks < numBreaks: numBreaks = riceBreaks chart = ARCPY.Chart("Validation") chart.type = "histogram" chart.xAxis.field = msg chart.title = ARCPY.GetIDMessage(84856)+" " + msg chart.histogram.binCount = numBreaks chart.histogram.showMean = True chart.histogram.showMedian = True chart.histogram.showStandardDeviation = True self.hpar.outputCrossValidationParameter.charts = [chart] return info def getMinMaxX(self, rfTraining, dataX): """ Get Max-Min Value Variables X INPUT: rfTraining (RFField instance): Information about each Variable dataX (n-array, float): X variable """ if dataX is None: return None variables = rfTraining.getVars() variables = variables[0:len(variables)-1] continuousVar = list(variables).count(1) if continuousVar > 0 and len(dataX): minMax = [[dataX.T[i].min(), dataX.T[i].max()] for i, e in enumerate(variables) if e == 1] return minMax else: return None def reportShare(self, rfTraining, shareTraining, shareTest = None, sharePredict = None ): """ Create Share Report INPUT: rfTraining (RFField instance): Information about each Variable shareTraining( list - tuples): List of tuples that contains min-max of each variable in Training shareTest( list - tuples): List of tuples that contains min-max of each variable in Training sharePredict( list - tuples): List of tuples that contains min-max of each variable in Training OUTPUT: report (string): output """ header = ARCPY.GetIDMessage(84860) footMsg1 = "(a) " + ARCPY.GetIDMessage(84861) footMsg2 = "(b) " + ARCPY.GetIDMessage(84865) footMsg3 = "(c) " + ARCPY.GetIDMessage(84866) trainingMsg = ARCPY.GetIDMessage(84862) predictionMsg = ARCPY.GetIDMessage(84863) validationMsg = ARCPY.GetIDMessage(84856) maximumMsg = ARCPY.GetIDMessage(84413) minimumMsg = ARCPY.GetIDMessage(84412) shareMsg = ARCPY.GetIDMessage(84273) variableMsg = ARCPY.GetIDMessage(84068) descValues = ARCPY.GetIDMessage(84899) descValues2 = ARCPY.GetIDMessage(84900) minMaxMsg = [minimumMsg, maximumMsg] headerColumns1 = [variableMsg, minimumMsg, maximumMsg] headerColumns = ["", trainingMsg, ""] justifyArray = ["left", "left", "left"] shareBase = self.getMinMaxX(rfTraining, rfTraining.x) if shareTest is not None: headerColumns.extend([validationMsg, "" ]) justifyArray.extend(["left", "left"]) headerColumns1.extend(minMaxMsg) if sharePredict is not None: headerColumns.extend([predictionMsg, ""]) justifyArray.extend(["left", "left"]) headerColumns1.extend(minMaxMsg) headerColumns.append(trainingMsg) headerColumns1.append(shareMsg+"(a)") justifyArray.append("right") if shareTest is not None: headerColumns.append(validationMsg) headerColumns1.append(shareMsg+"(b)") justifyArray.append("right") if sharePredict is not None: headerColumns.append(predictionMsg) headerColumns1.append(shareMsg+"(c)") justifyArray.append("right") variables = rfTraining.getVars() variables = variables[0:len(variables)-1] continuousVar = list(variables).count(1) cnt = 0 listTable = [] listTable.append(headerColumns) listTable.append(headerColumns1) for i, typeVar in enumerate(variables): if typeVar == 1: rows = [] nameVar = rfTraining.fields[i].alias dataB = shareBase[cnt] dataT = shareTraining[cnt] rows.append(nameVar) stringFormat = "{:.2f}" if dataT[1] < 0.001: stringFormat = "{:.2e}" rows.extend([stringFormat.format(dataT[0]),stringFormat.format(dataT[1])]) baseShare = [] dataTest = None dataPredict = None if shareTest is not None: dataTest = shareTest[cnt] rows.extend([stringFormat.format(dataTest[0]),stringFormat.format(dataTest[1])]) if sharePredict is not None: dataPred = sharePredict[cnt] rows.extend([stringFormat.format(dataPred[0]),stringFormat.format(dataPred[1])]) share = STATS.calculateShare(dataB, dataT) baseShare.append(self.checkShare(share)) if shareTest is not None: share = STATS.calculateShare(dataT, dataTest) baseShare.append(self.checkShare(share)) if sharePredict is not None: share = STATS.calculateShare(dataT, dataPred) baseShare.append(self.checkShare(share, True)) rows.extend(baseShare) listTable.append(rows) cnt += 1 #### There Are Numeric Variables #### if cnt == 0: return "" outputReport = UTILS.outputTextTable(listTable, header = header, justify = justifyArray, pad = 1, colPad = 3, titleFillToken = "-") outputReport = "\n"+outputReport + footMsg1 if shareTest is not None: outputReport += "\n"+ footMsg2 if sharePredict is not None: outputReport += "\n"+ footMsg3 return outputReport + "\n" + descValues + "\n" + descValues2 def checkShare(self, share, isPrediction = False): """ Evaluate the share value, adding an identifier if the share value is incompleted, No overlapped or extrapolated INPUT: share (tuple): value, type isPreditction (bool): True prediction incomplete does not matter OUTPUT: outputStr (str): String """ value = share[0] typeValue = share[1] outputStr = "" if not isPrediction: if "Incomplete" in typeValue: outputStr = "*" if "No overlapping" in typeValue: outputStr = "*" else: if "No overlapping" in typeValue: outputStr = "*" if "Extrapolation" in typeValue: outputStr = "+" return "{0:.2f}{1}".format(value,outputStr) def processRasterByParts(self, rfTest, randSeed, hpar, newSRF, useNetCDFAsOutput = False, multipleFLTAreCreated = False): """ This function process the raster in parts, dividing the raster output in chuncks and predicting each chunk using the same model parameters """ rfTraining = self.rfTraining header = rfTraining.getHeader() #### Calculate Share from Training Data #### shareTraining = self.getMinMaxX(rfTraining, rfTraining.x_train) shareTest = self.getMinMaxX(rfTraining, rfTraining.x_test) #### Get Number of Zones #### nRasterOutput = len(self.infoRasterInstances.rangeZones) #### Create List Of Id Zones #### listIds = list(NUM.arange(nRasterOutput)) #### Threshold Values to Calculate Render #### sampleVisSize = 1000 #### Number Values To Extract in each Zone #### sampleVisSize = int(sampleVisSize / nRasterOutput) + 1 #### Initialize Values To Render #### valuesToRender = [] rasters = [] #### Number Of Cells To Process #### sizeRaster = float(self.infoRasterInstances.nRows) * float(self.infoRasterInstances.nCols) #### Number of Cell per Chunck #### nOutputTempRasters = int(NUM.ceil(sizeRaster / (maxSizeCells/2))) ### Get Number of Zones Per Chuck (Output Raster to write) #### pn = int(NUM.ceil(nRasterOutput / nOutputTempRasters)) #### Set Each Part to a Raster Zone #### #### Create a List of Ids In each Chuck #### oup = [NUM.asarray(NUM.arange(pn) + i*pn, dtype = int) for i in range(nOutputTempRasters)] oup = [ i[i<nRasterOutput] for i in oup ] #### Initialize Variables #### dataBase = dataset = variable = pathTempNC = pathFLT = nameFLT = None workspaceOutput = "" listRasterZones = [] #### Calculate Initial Chuck Information #### infoSlice = self.calculateChunckSize(self.infoRasterInstances, oup[0]) chunckSize , newSliceCol, newSliceRow, iniIndex, \ xMinYMin, nColsStep, nRowsStep = infoSlice #### Raster Keep in Memory ##### if sizeRaster < maxSizeCells: #### Create an array smaller than maxSizeCells ### #### Only one Temporal file is created #### dataBase = NUM.ones((self.infoRasterInstances.nRows*self.infoRasterInstances.nCols), dtype = NUM.float32)*float(NULL) else: if useNetCDFAsOutput: chunckSize = (nRowsStep, nColsStep) #### Create on netCDF file #### dataset, variable, pathTempNC = self.createTemporalNC(self.infoRasterInstances, chunkSizes = None) else: #### Calculate Last Chuck Information #### infoSlice = self.calculateChunckSize(self.infoRasterInstances, oup[-1]) chunckSize , newSliceCol, newSliceRow, iniIndex, \ xMinYMin, nColsStep, nRowsStep = infoSlice #### Create FLT Using Info Last Chunck To set pathFLT, nameFLT = self.createTemporalFLT(cellSize = self.infoRasterInstances.cellSize, xMinYMin = xMinYMin, nRowsCols = (self.infoRasterInstances.nRows, self.infoRasterInstances.nCols), data = None, srf = self.infoRasterInstances.srf) cnt = 0 #### Iterate Number of Raster #### for listBase in oup: #### Get Information of Current Chuck (Group of Zones) #### infoSlice = self.calculateChunckSize(self.infoRasterInstances, listBase) chunckSize , newSliceCol, newSliceRow, iniIndex, \ xMinYMin, nColsStep, nRowsStep = infoSlice if not sizeRaster < maxSizeCells: #### Create Temporal Array Container #### dataBase = NUM.ones((nRowsStep*nColsStep), dtype = NUM.float32)*float(NULL) #### Predict Each Zone in a Chunck #### for i in listBase: #### Extract Data From All Raster Dataset for the Current Zone i #### mask = rfTest.loadDataRasterForPredicting(rfTraining, self.infoRasterInstances, i) #### If Mask is None #### if mask is None: continue #### Get Dimension Raster Zones #### self.rasterDimensionInfo = self.infoRasterInstances.getRasterInfoZone(i) if sizeRaster < maxSizeCells: #### Mask Contain Real Cell Ids #### self.rasterDimensionInfo[1] = mask else: #### Mask Id Cell are Relative to Current Container #### self.rasterDimensionInfo[1] = mask - iniIndex ##### Execute Forest For each Zone #### dataP = ARC._ss.forest(x = rfTraining.x_train, y = rfTraining.y_train, x_test = rfTraining.x_test, y_test = rfTraining.y_test, type_vars = rfTraining.getVars(), real_x_to_predict = rfTest.x, number_trees = hpar.numTrees, node_size = hpar.leafSize, sample_size = hpar.sample_size, max_nodes = hpar.maxLevel, model = "", header = header, permute_vars = hpar.fields_to_try, seed = randSeed, balance = int(hpar.balanceTree)) #### Stop Process ### if dataP is None: return #### Aggregate Values To Calculate Share #### shareValues = self.getMinMaxX(rfTraining, rfTest.x) #### Get Information To Calculate Share from Each Zone #### if cnt == 0: sharePredictValues = shareValues else: for i, ele in enumerate(shareValues): sharePredictValues[i].extend(ele) cnt += 1 #### Update Container dataBase #### listVal = self.outputRaster(dataP[2], newSRF = newSRF, getNValues = sampleVisSize, dataBase = dataBase) valuesToRender.extend(listVal) del dataP GC.collect() if not sizeRaster < maxSizeCells: #### One Temporal netCDF file #### if useNetCDFAsOutput: #### Reshape Array #### dataBase.shape = (nRowsStep, nColsStep) #### Set netCDF Slice / Insert Data ### variable[newSliceRow,newSliceCol] = dataBase #### Flush data To disk #### dataset.sync() else: ##### Update Temporal File FLT for each Chunck #### ARCPY.SetProgressor("default", "Adding prediction..." ) #### Reshpae dataBase Array #### dataBase.shape = (nRowsStep, nColsStep) #### Update Temporal FLT File #### workspaceOutput, outputName = self.createTemporalFLT(data = dataBase, pathFileAndName = (pathFLT, nameFLT)) listRasterZones.append(outputName) ARCPY.ResetProgressor() #### Calculate Min Max #### sharePredict = [] for el in sharePredictValues: sharePredict.append([min(el), max(el)]) #### Create Share Summary #### shareMsg = self.reportShare(rfTraining, shareTraining, shareTest, sharePredict) ARCPY.AddMessage(shareMsg) if sizeRaster < maxSizeCells: #### Create Temporal File FLT For Container #### ARCPY.SetProgressor("default", ARCPY.GetIDMessage(84857)) dataBase.shape = (self.infoRasterInstances.nRows, self.infoRasterInstances.nCols) self.createOutputRasterFromTemporalFile(hpar.outputRaster, self.infoRasterInstances, dataBase ) ARCPY.ResetProgressor() else: if useNetCDFAsOutput: #### Close NetCDF File #### dataset.close() strOut = "RFNC_OUTPUT" ARCPY.SetProgressor("default", ARCPY.GetIDMessage(84857)) #### Make Raster Layer #### ARCPY.md.MakeNetCDFRasterLayer(pathTempNC, "PREDICTED", "x", "y", strOut, None, None, "BY_VALUE") #### Copy NetCDF Layer To Output Raster File #### ARCPY.management.CopyRaster(strOut, hpar.outputRaster, None, None, NULL, "NONE", "NONE", None, "NONE", "NONE", None, "NONE") #### Delete Raster Layer #### UTILS.passiveDelete(strOut) ARCPY.ResetProgressor() ARCPY.SetProgressor("default", "Removing Temporal NetCDF File") try: OS.remove(pathTempNC) except: ARCPY.AddWarning("Temporal NetCDF File was not removed " + pathTempNC) pass ARCPY.ResetProgressor() else: ARCPY.env.pyramid = None ARCPY.env.rasterStatistics = None if multipleFLTAreCreated : listRasterZonesFLT = [workspaceOutput+"\{0}.flt".format(i) for i in listRasterZones] #### Save Current WorkSpace #### currentWrksp = ARCPY.env.workspace ARCPY.env.workspace = workspaceOutput outPath, outName = OS.path.split(hpar.outputRaster) #### Merging Rasters #### ARCPY.SetProgressor("default", "Merging Rasters ") try: ARCPY.MosaicToNewRaster_management(";".join(listRasterZonesFLT), outPath, outName, self.infoRasterInstances.srf, "32_BIT_FLOAT", self.infoRasterInstances.cellSize, 1, "LAST", "FIRST") except: ARCPY.AddWarning("Assert - Merge Failed - please check temporal files " + ";".join(listRasterZonesFLT)) raise SystemExit ARCPY.ResetProgressor() #### Applying Previous WorkSpace #### ARCPY.env.workspace = currentWrksp else: strOut = OS.path.join(pathFLT, nameFLT + ".flt") ARCPY.SetProgressor("default", "Creating Raster...") #### Copy NetCDF Layer To Output Raster File #### ARCPY.management.CopyRaster(strOut, hpar.outputRaster, None, None, NULL, "NONE", "NONE", None, "NONE", "NONE", None, "NONE") ARCPY.ResetProgressor() listRasterZonesFLT = [strOut] ARCPY.SetProgressor("default", "Removing Temporal Files") ### Remove Temporal Files #### for pathFLT in listRasterZonesFLT: try: OS.remove(pathFLT) OS.remove(pathFLT.replace(".flt",".hdr")) OS.remove(pathFLT.replace(".flt",".prj")) except: pass ARCPY.ResetProgressor() isClass = self.rfTraining.yField.rfType == categoricalVar #### Required Output For Classification #### if isClass: valuesToRender = (self.rfTraining.yField.info, list(set(valuesToRender))) else: valuesToRender = NUM.asarray(valuesToRender, dtype = float) ARCPY.SetProgressor("default",ARCPY.GetIDMessage(84858)) #### Apply Layer In The Output Raster Merged #### applyLayer(hpar, "output_raster", data = valuesToRender, isClass = isClass, headingName = self.rfTraining.yField.name, shapeType = "Raster", isTrainingOutput = False, isRasterLayer = True) ARCPY.ResetProgressor() #### Delete Objects #### del dataBase GC.collect() def executeModel(self, rfTraining, rfTest = None, rasterInfo = None, ssdoPoints2Train = None, newSRF = None, infoRasterInstances = None ): """ Execute Model """ self.rfTraining = rfTraining hpar = self.hpar randSeed = self.randSeed #### Raster Info ### self.infoRasterInstances = infoRasterInstances info = None processInBatches = False if infoRasterInstances is not None: infoRasterInstances.preProcessZones() newSRF = infoRasterInstances.srf processInBatches = len(infoRasterInstances.rangeZones) > 1 if not processInBatches : mask = rfTest.loadDataRasterForPredicting(rfTraining, infoRasterInstances, 0) if mask is None: ARCPY.AddError("Sampling training data failed.") raise SystemExit self.rasterDimensionInfo = infoRasterInstances.getRasterInfoZone() self.rasterDimensionInfo[1] = mask if processInBatches: header = rfTraining.getHeader() randSeed1, bestValue, listDecValues, maxIndexReach = self.iterateTraining(randSeed, rfTraining) if randSeed1 != randSeed: randSeed = randSeed1 rfTraining.seed = randSeed rfTraining.splitDataForTesting(hpar.balanceTree) #### Check All Categories Were Used #### rfTraining.viewSplitMessage = True rfTraining.areAllCategoriesUsed() data = ARC._ss.forest(x = rfTraining.x_train, y = rfTraining.y_train, x_test = rfTraining.x_test, y_test = rfTraining.y_test, type_vars = rfTraining.getVars(), number_trees = hpar.numTrees, node_size = hpar.leafSize, sample_size = hpar.sample_size, model = "", max_nodes = hpar.maxLevel, header = header, permute_vars = hpar.fields_to_try, seed = randSeed, balance = int(hpar.balanceTree)) #### Stop Process ### if data is None: return self.report(rfTraining, data, hpar, randSeed, bestValue, listDecValues, maxIndexReach, ssdoPoints2Train , displayShare = False) #### Process Raster By Chuncks #### self.processRasterByParts(rfTest, randSeed, hpar, newSRF) return else: if hpar.what == "NONE": #### Create Output Trained Data #### if hpar.numTrees == 0: if hpar.outputTrainedFeatures is not None: self.createOutputFromRFField(rfTraining, hpar.outputTrainedFeatures, None, hpar, ssdoTrField = ssdoPoints2Train) return else: ARCPY.AddIDMessage("ERROR", 110229) return header = rfTraining.getHeader() randSeed1, bestValue, listDecValues, maxIndexReach = self.iterateTraining(randSeed, rfTraining ) if randSeed1 != randSeed: randSeed = randSeed1 rfTraining.seed = randSeed rfTraining.splitDataForTesting(hpar.balanceTree) #### Check All Categories Were Used #### rfTraining.viewSplitMessage = True rfTraining.areAllCategoriesUsed() data = ARC._ss.forest(x = rfTraining.x_train, y = rfTraining.y_train, x_test = rfTraining.x_test, y_test = rfTraining.y_test, type_vars = rfTraining.getVars(), number_trees = hpar.numTrees, node_size = hpar.leafSize, sample_size = hpar.sample_size, model = "", max_nodes = hpar.maxLevel, header = header, permute_vars = hpar.fields_to_try, seed = randSeed, balance = int(hpar.balanceTree)) #### RF does not return anything #### if data is None: return self.report(rfTraining, data, hpar, randSeed, bestValue, listDecValues, maxIndexReach, ssdoPoints2Train) return if hpar.what in ["RASTER", "FEATURES"]: header = rfTraining.getHeader() randSeed1, bestValue, listDecValues, maxIndexReach = self.iterateTraining(randSeed, rfTraining ) if randSeed1 != randSeed: randSeed = randSeed1 rfTraining.seed = randSeed rfTraining.splitDataForTesting(hpar.balanceTree) else: #### Raster Dataset Split Testing Subset #### if infoRasterInstances is not None: rfTraining.splitDataForTesting(hpar.balanceTree) #### Check All Categories Were Used #### rfTraining.viewSplitMessage = True rfTraining.areAllCategoriesUsed() data = ARC._ss.forest(x = rfTraining.x_train, y = rfTraining.y_train, x_test = rfTraining.x_test, y_test = rfTraining.y_test, type_vars = rfTraining.getVars(), real_x_to_predict = rfTest.x, number_trees = hpar.numTrees, node_size = hpar.leafSize, sample_size = hpar.sample_size, max_nodes = hpar.maxLevel, model = "", header = header, permute_vars = hpar.fields_to_try, seed = randSeed, balance = int(hpar.balanceTree)) #### RF does not return anything #### if data is None: return predictData = data[2] self.report(rfTraining, data, hpar, randSeed, bestValue, listDecValues, maxIndexReach, ssdoPoints2Train, rfTest = rfTest) if hpar.what == "FEATURES": if hpar.outputFeatures != "": self.createOutput(predictData, rfTraining, rfTest, rfTest.ssdo, hpar.outputFeatures, hpar) if hpar.what == "RASTER": if rfTraining.ssdo is None: self.outputRaster(predictData, newSRF = newSRF) else: self.outputRaster(predictData, newSRF = newSRF) class BaseRaster: """ Base Raster """ def __init__(self): self.nCols = None self.nRows = None self.xc = None self.yc = None def getXY(self, idIni = None, col = None, row = None): """Get XY Coordinates centroid from index (int) or array of indices [int] """ if col is None: col = idIni % self.nCols row = idIni // self.nCols if type(idIni) in [NUM.int32, int] or type(col) in [NUM.int32, int]: #return NUM.array([self.xMin + col*self.xc,self.yMax - self.yc - row*self.yc]) return NUM.array([self.xMin + col*self.xc + self.xc*0.5, self.yMax - self.yc*0.5 - row*self.yc]) else: col = idIni % self.nCols row = idIni // self.nCols coord = NUM.zeros((len(idIni),2), dtype = float) coord.T[0] = self.xMin + self.xc*.5 + col*self.xc coord.T[1] = self.yMax - self.yc*.5 - row*self.yc return coord def getColRow(self, x, y): """ get Col Row """ dx = x - self.xMin dy = self.yMax - y col = NUM.ceil(dx/self.xc) - 1 row = NUM.ceil(dy/self.yc) - 1 return int(col), int(row) class TestRaster(): """ TestRaster Class is used test the RasterMeta/RasterInfo Fuctions """ def __init__(self, cellSize, extent, srf, testName = ""): """ Initilize Class INPUT: cellSize (float): Cell Size extent (arcpy.Extent instance): Raster Extent srf (Spatial Reference instance): Spatial Reference testName {strin, ""}: Raster Name """ self.meanCellWidth = cellSize self.meanCellHeight = cellSize self.extent = extent self.spatialReference = srf self.bandCount = 1 self.name = "Raster " + testName self.catalogPath = r"Raster\\" + testName self.width = int( (self.extent.XMax - self.extent.XMin)/cellSize) self.height = int( (self.extent.YMax - self.extent.YMin)/cellSize) def dummyLarge(size = 1, name = ''): """Create Dummy large Raster """ if size == 1: srf = ARCPY.SpatialReference(26910) cellSize = 0.1 xMin = yMax = 500000 nCols = 150000 nRows = 50000 xMax = xMin + nCols*cellSize yMin = yMax - nRows*cellSize ext = ARCPY.Extent(XMin = xMin, YMin = yMin, XMax = xMax, YMax = yMax) return TestRaster(cellSize, ext, srf, testName = name) if size == 2: srf = ARCPY.SpatialReference(102003) cellSize = 10.4089398045887 xMin = -2950438.4687 yMax = 1733595.78 nCols = 576358 nRows = 310462 xMax = xMin + nCols*cellSize yMin = yMax - nRows*cellSize ext = ARCPY.Extent(XMin = xMin, YMin = yMin, XMax = xMax, YMax = yMax) return TestRaster(cellSize, ext, srf, testName = name) def __repr__(self): return "TestRaster" class RasterInfo(BaseRaster): """ This Class extracts information of a list of raster datasets""" def __init__(self, listRasters, typeRF, bandList = None, maxSize = MEMSIZE, srfOutput = None, ssdo = None, printInfo = True, thresholdRaster = MEMPOINT, rasterNames = None ): """Constructor RasterInfo Class """ self.printInfo = printInfo self.ssdo = ssdo self.srfOutput = srfOutput if self.ssdo is not None: self.srfOutput = None #### Mask Extent to Predict #### self.maskExtentFC = ARCPY.env.mask self.maskDataFC = None #### Maximum data to Process by Batch #### self.MAXALLOWED = maxSize self.thresholdRaster = thresholdRaster if str(listRasters[0]) == "TestRaster": self.sourceListRasters = listRasters self.listRasters = listRasters self.newNames = [i.name for i in listRasters] self.dictRasterBand = {i.catalogPath: [[order, ""]] for order, i in enumerate(listRasters)} self.orderRaster = [[listRasters[0].catalogPath, ""] * len(listRasters)] else: #### List of Raster Datase Source #### self.sourceListRasters = listRasters #### Detect Bands in List of Rasters #### groupRasterInfo = getNameRasterFromBands(listRasters, rasterNames, False) #### Names-Bands / Raster Path Dict([Order -Bands]) / Just Raster Path #### self.newNames, self.dictRasterBand, self.orderRasterN = groupRasterInfo #### List of Real Raster to Process (excluding Bands) #### self.listRasters = list(self.dictRasterBand.keys()) #### Define Type of Extraction (True-> Bilinear/ False-> Nearest) #### self.typeRF = typeRF #### Number of Layers(Bands in all Raster Datasets) #### self.numTotalOfBandToUse = len(self.sourceListRasters) #### Reduce Threshold to Avoid Create Large Size of Centroids #### if self.numTotalOfBandToUse == 1: self.MAXALLOWED = self.MAXALLOWED/2 #### List of Bands in each Raster #### self.bandList = [] #### Index Order of each band respect source raster order #### self.orderBands= [] #### 1-bil 0->near #### self.typeField = [] for rasterPath in self.dictRasterBand: orderBandsArray = self.dictRasterBand[rasterPath] orderR, bandR = orderBandsArray[0] #### Check Number of Bands to Process in each Raster #### if len(orderBandsArray) == 1 and bandR in ["", "RenamedBand"]: if bandR == "RenamedBand": self.bandList.append(NUM.array([-1], dtype = int)) else: self.bandList.append(NUM.array([0], dtype = int)) self.orderBands.append(NUM.array([orderR], dtype = int)) typeFieldv = 1 if self.typeRF[orderR] else 0 self.typeField.append(NUM.array([typeFieldv], dtype = int)) else: bands = [] orderv = [] typeFieldv = [] #### Generate list of index Bands and Order Index #### for order_band in orderBandsArray: bands.append(int(order_band[1].upper().replace("BAND_",""))-1) orderv.append(order_band[0]) typeFieldT = 1 if self.typeRF[order_band[0]] else 0 typeFieldv.append(typeFieldT) self.bandList.append(NUM.array(bands, dtype = int)) self.orderBands.append(NUM.array(orderv, dtype = int)) self.typeField.append(NUM.array(typeFieldv, dtype = int)) self.grp = {} self.infoR = [] self.gcsR = [] #### Get Raster Properties #### self.infoAboutRasters() def getInfo(self, otherData): """ Get Info about raster """ header = "Info Raster" justif = ["Left", "Left"] rows = [] rows.append([".listRaster", str(self.newNames)]) rows.append([".intersection", str(self.intersection)]) rows.append([".cellSize", str(self.cellSize)]) rows.append(["Number Zones", str(len(self.rangeZones))]) for i, e in enumerate(self.rangeZones): rows.append(["Zones {0}".format(i), str(e[0])+ "-"+ str(e[1])]) for i,listBase in enumerate(otherData): iniBase = self.getRasterInfoZone(listBase[0])[0] endBase = self.getRasterInfoZone(listBase[-1])[0] #### Get Indice Info #### iniRow = iniBase[0].start endRow = endBase[0].stop iniCol = iniBase[1].start endCol = endBase[1].stop nColsStep = endCol-iniCol nRowsStep = endRow-iniRow iniIndex = self.rangeZones[listBase[0]][0] xMinYMin = self.getRasterInfoZone(listBase[-1])[2] rows.append(["iniBase zone {0}/{1}".format(i, len(listBase)), str(iniBase)]) rows.append(["endBase zone {0}/{1}".format(i, len(listBase)), str(endBase)]) outputReport = UTILS.outputTextTable(rows, header = header, justify = justif, pad = 1, colPad = 5, titleFillToken = "-") ARCPY.AddMessage(outputReport) def getExt(self, ssdo): polyDict, polyAreas = UTILS.readPolygonFC(ssdo.inputFC, spatialRef = ssdo.spatialRef, useGeodesic = ssdo.useChordal) boundArea = sum(polyAreas.values()) boundExtent = UTILS.getPolyExtent(polyDict) return UTILS.getExtent(boundExtent) def getMaskExtentIds(self): """ Get mask data using the intersection extent """ if self.intersection is not None and self.maskExtentFC is not None: pol = self.extentIntersection.polygon desc = ARCPY.Describe(self.maskExtentFC) polMask = desc.extent.polygon.projectAs(self.srf) base = pol.intersect(polMask, 4) if base.area > 0: ext = base.extent colTL, rowTL = self.getColRow(ext.XMin, ext.YMax) colBR, rowBR = self.getColRow(ext.XMax, ext.YMin) nCols = (colBR - colTL +1 ) self.maskDataFC = NUM.zeros((rowBR - rowTL + 1)*(colBR - colTL + 1), dtype = NUM.int64) cnt = 0 for row in NUM.arange(rowTL, rowBR + 1): info = self.nCols*row + NUM.arange(colTL, colBR+1, dtype = int) sliceV = slice(cnt*nCols,(cnt+1)*nCols) cnt += 1 self.maskDataFC[sliceV] = info return def getRastersExtent(self, rasters, ssdo = None, spatialRef = None, printCell = True, applyGetExtent = True, maskExtent = None): """ Verify if Rasters overlap Feature Class (ssdo) INPUT: ssdo {SSDataObject instance /str fc}: object for training rasters {list RasterMeta}: list of rasters spatialRef {spatialReference instance}: Output spatial reference printCell {bool}: print information cell size applyGetExtent {return}: vector /false extent maskExtent {extent}: mask extent (it should be provided with the spatialRef parameter) OUTPUT: intersection {list floats} : extend of the intersection among all rasters -> list [extent.XMin, extent.YMin, extent.XMax, extent.YMax] insideArea {boolean}: True if ssdo FC inside of intersection extent {list floats} : extend list of FC [extent.XMin, extent.YMin, extent.XMax, extent.YMax] cellMaximum {float}: get maximum cell size from all rasters. cellMinumum {float}: get minimum cell size from all rasters. """ #extent = UTILS.getExtent(ssdo.extent) baseFC = None if ssdo is not None: baseFC = ssdo.extent.polygon spatialRef = ssdo.spatialRef if maskExtent is not None: if ssdo is None and spatialRef is not None: baseFC = maskExtent.polygon baseFC = baseFC.projectAs(spatialRef) extArr = None base = None cellMaximum = 0 cellMinimum = 0 #### Use Spatial Reference of First RAster ### if spatialRef is None: spatialRef = UTILS.returnOutputSpatialRef(None) if spatialRef is None: d = rasters[0] spatialRef = d.srf if printCell: ARCPY.AddIDMessage("WARNING", 110196, d.name) if maskExtent is not None: baseFC = maskExtent.polygon baseFC = baseFC.projectAs(spatialRef) cellSizeInfo = ARCPY.env.cellSize #### Get Cell 0-> maximum, 1-> minimum, 2-> numeric env getCell = 0 if cellSizeInfo not in ['MAXOF', 'MINOF']: numericCellSize = True try: cellSizeInfo = UTILS.strToFloat(cellSizeInfo) getCell = 2 except: numericCellSize = False getCell = 0 if getCell == 2: updateCellsizeAccordingSpatialRef(spatialRef, cellSizeInfo) #### Get Cell Size From Environment Raster Reference #### if not numericCellSize: try: d = ASA.Raster(cellSizeInfo) base = d.extent.polygon.projectAs(spatialRef) cell = base.extent.width / d.width cellSizeInfo = cell getCell = 2 except: cellSizeInfo = 'MAXOF' getCell = 0 else: if cellSizeInfo != 'MAXOF': getCell = 1 else: getCell = 0 fcSR = spatialRef.factoryCode for id, raster in enumerate(rasters): rSRF = raster.srf.factoryCode if id == 0: base = raster.extr.polygon.projectAs(spatialRef) \ if fcSR != rSRF else raster.extr.polygon cellMaximum = base.extent.width / raster.width cellMinimum = cellMaximum else: pol = raster.extr.polygon.projectAs(spatialRef) \ if fcSR != rSRF else raster.extr.polygon cell = pol.extent.width / raster.width if cell > cellMaximum: cellMaximum = cell if cell < cellMinimum: cellMinimum = cell base = base.intersect(pol, 4) if getCell == 0: cellSize = cellMaximum if getCell == 1: cellSize = cellMinimum if getCell == 2: cellSize = cellSizeInfo msg = ARCPY.GetIDMessage(84824) if printCell: ARCPY.AddMessage(msg.format(cellSize)) intersection = base.extent if applyGetExtent: intersection = UTILS.getExtent(base.extent) insideArea = None if baseFC is not None: insideArea = not base.disjoint(baseFC) return intersection, insideArea, UTILS.getExtent(baseFC.extent), cellSize, spatialRef else: return intersection, None, None, cellSize, spatialRef def getWorkExtent(self): """ Obtain extent / centroids ids work area """ ### ssdo obtains the srfOutput to use ssdo = self.ssdo data = None self.maskExtentFC = ARCPY.env.mask #### Ignore Mask if it does not exists #### if self.maskExtentFC not in ["", None]: if not ARCPY.Exists(self.maskExtentFC): ARCPY.AddIDMessage("ERROR", 10161 , self.maskExtentFC) raise SystemExit #### srfOutput from inputFC / enviroment #### if self.srfOutput is not None: #### Obtain Intersection Area / CellSize / Reference System #### if self.maskExtentFC is not None: ##### Predicting in Same Area Using Mask #### info = ARCPY.Describe(self.maskExtentFC) maskExtent = info.extent data = self.getRastersExtent(self.infoR, spatialRef = self.srfOutput, applyGetExtent = False, maskExtent = maskExtent, printCell = self.printInfo) else: ##### Predicting in Same Area Without Mask #### data = self.getRastersExtent(self.infoR, spatialRef = self.srfOutput, applyGetExtent = False, printCell = self.printInfo) else: if ssdo is not None: #### Only Training use SSDO srf (inputFC/ #### data = self.getRastersExtent(self.infoR, ssdo = ssdo, applyGetExtent = False, printCell = self.printInfo) elif self.maskExtentFC is not None: #### Predicting External Area using Mask #### info = ARCPY.Describe(self.maskExtentFC) maskExtent = info.extent data = self.getRastersExtent(self.infoR, applyGetExtent = False, maskExtent = maskExtent, printCell = self.printInfo) else: #### Predicting External Area Using Raster(0) SRF #### data = self.getRastersExtent(self.infoR, applyGetExtent = False, printCell = self.printInfo) intersection, insideArea, baseExt, cellSize, srf = data self.intersection = UTILS.getExtent(intersection) data = None ARCPY.SetProgressor("default", ARCPY.GetIDMessage(84859)) if ssdo is not None: if ssdo.shapeType == "Polygon": #### Get Intersection Centroids Depending on Cell Size #### #### Explode Polygons in DataObject - output Indices #### #### The spatial reference is defined by inputFC in SSDO object #### data = getCentroids(self.intersection, cellSize, ssdo.spatialRef, None, ssdo.inputFC, None, getIdsCentroids = True, justIndex = 0) else: #### Centroids Intersection #### #### The spatial reference is defined by inputFC in SSDO object #### data = getCentroids(self.intersection, cellSize, ssdo.spatialRef, None, None, None, getIdsCentroids = True) else: #### Centroids Intersection #### #### This option is used in 'external prediction area' #### data = getCentroids(self.intersection, cellSize, srf, None, None, None, getIdsCentroids = True) ARCPY.ResetProgressor() numIds, masked, idPoly, nCols, nRows, centroids, srf = data #### Create Extent To Fit Raster Mask #### xMin = intersection.XMin yMax = intersection.YMax xMax = xMin + cellSize * nCols yMin = yMax - cellSize * nRows self.extentIntersection = ARCPY.Extent(XMin = xMin, YMin = yMin, XMax = xMax, YMax = yMax) self.srf = srf self.nRows = nRows self.nCols = nCols ### Num cells #### self.nCells = numIds ### nRows*nCols Array Masked (true/false) #### self.polygonMask = masked self.centroids = None ### Id Range Centroids RangeZone [0, n-1] #### self.idCentroids = centroids ### Contain OID input Polygon FC for Each Cells #### self.idPoly = idPoly self.cellSize = cellSize self.xc = cellSize self.yc = cellSize self.xMin = self.intersection[0] self.yMin = self.intersection[1] self.yMax = self.intersection[3] self.xMax = self.intersection[2] dist = UTILS.DistanceInfo(self.srf, useChordalDistances = False) self.unitCell = dist.name self.isGCS = dist.type == "GEOGRAPHIC" def getXs(self): """ Return X's coordinates """ rangeZone = self.rangeZones[0] xMinYmax = self.getXY(rangeZone[0]) rangeZone = self.rangeZones[len(self.rangeZones)-1] xMaxYmin = self.getXY(rangeZone[1]) return xMinYmax[0] + NUM.arange(self.nCols)*self.cellSize def getYs(self): """ Return Y's coordinates """ rangeZone = self.rangeZones[0] xMinYmax = self.getXY(rangeZone[0]) rangeZone = self.rangeZones[len(self.rangeZones)-1] xMaxYmin = self.getXY(rangeZone[1]) output = xMaxYmin[1] + NUM.arange(self.nRows)*self.cellSize return output[::-1] def infoAboutRasters(self): """Create Groups of raster paths with the same properties origin and cell size """ infoR = [] for id, i in enumerate(self.listRasters): infoR.append( RasterMeta(rasterPath = i, threshold = self.thresholdRaster, bandList = self.bandList[id], typeFields = self.typeField[id])) namePr = NUM.array([i.name for i in infoR ]) self.gcsR = [i.srf.type.upper()=="GEOGRAPHIC" for i in infoR ] extX = [(i.ext[0], i.ext[3], i.xc) for i in infoR ] sameExt = COLL.defaultdict(NUM.int32) for i in extX: sameExt[i]+=1 grp = {id:[] for id in sameExt} for i, el in UTILS.iteritems(sameExt): for id, e in enumerate(infoR): v = (e.ext[0], e.ext[3], e.xc) if i[0] == v[0] and i[1] == v[1] and i[2] == v[2]: grp[i].append(id) self.grp = grp self.infoR = infoR def getNewStart(self, x, y, rasterId): inputRast = self.infoR[rasterId] return inputRast.ext[0], inputRast.ext[3] def getNumZonesSizes(self, m = 1, plot = False ): """ This method use a threshold for dividing the centroid of the AOI in batches. The number of rasters-bands times the number of centroids in the study area is used to compare the threshold. The function generate an index array with centroids ids of each batch. """ flag = True nRows = 0 #### Decide The Correct Size to Split Intersection Area #### while flag: nRows = int(self.nRows/m) flag = False if self.nCols*nRows*self.numTotalOfBandToUse > self.MAXALLOWED: flag = True m += 2 #ARCPY.AddMessage("Number of Zones = {0}".format(m)) #ARCPY.AddMessage("Number of R/C = {0}/{1}".format(self.nRows,self.nCols)) centroidsExt = NUM.zeros((m*2,2), dtype = float) self.rasterZoneExtentBasedCentroids = [] rangeRows = [] if m == 1: rangeRows.append([0, nRows*self.nCols -1]) centroidsExt[0,:] = self.getXY(idIni = rangeRows[0][0]) centroidsExt[1,:] = self.getXY(idIni = rangeRows[0][1]) self.rasterZoneExtentBasedCentroids = [[centroidsExt[0,0], centroidsExt[1,1], centroidsExt[1,0], centroidsExt[0,1]]] return [[0, self.idCentroids[-1]]] for i in NUM.arange(m): rangeRows.append([int(float(nRows)*float(i)*float(self.nCols)), int(float(nRows)*float((i+1))*float(self.nCols) -1)]) centroidsExt[i*2,:] = self.getXY(idIni = rangeRows[i][0]) centroidsExt[i*2+1,:] = self.getXY(idIni = rangeRows[i][1]) n = len(rangeRows)-1 rangeRows[n][1] = self.idCentroids[-1] centroidsExt[n*2 ,:] = self.getXY(idIni = rangeRows[n][0]) centroidsExt[n*2+1 ,:] = self.getXY(idIni = rangeRows[n][1]) for i in NUM.arange(m): self.rasterZoneExtentBasedCentroids.append([centroidsExt[n*2,0], centroidsExt[n*2+1,1], centroidsExt[n*2+1,0], centroidsExt[n*2,1]]) return rangeRows def getMaskValues(self, rangeZone): """ Get mask ids inside of rangeZone """ if self.maskDataFC is not None: ids = NUM.asarray(self.maskDataFC[NUM.where((self.maskDataFC >= rangeZone[0]) & (self.maskDataFC <= rangeZone[1]))[0]] - rangeZone[0], dtype = NUM.int64) ids.sort() return ids , True return None, False def getCentroids(self, rangeZone, srfTo = None , ids = None): """Generate centroids of a specific rangeZone [Start, End] index The centroids are generated in the projection srfTo. The current intersection projection is self.srf (intersection raster/mask) If srfTo is equal to self.srf the centroids are not projected. INPUT: rangeZone {1D array}: start - finish indices of intersection area srfTo = {SpatialReference Instance} : Change centroid to this SRF ids {array id mask) OUTPUT: centroids { 2D array (n,2)} """ #### Number of Columns and Rows from Intersection Area #### cols = self.nCols rows = self.nRows centroids = None sourceSRF = None targetSRF = srfTo #### Check Source Projection and Target Projection #### if self.srf.factoryCode != srfTo.factoryCode: sourceSRF = self.srf targetSRF = srfTo else: targetSRF = None #### Define CubeInfo For generating AOI depending rangeZone and mask #### cubeInfo = ARC._ss.CubeInfo(rows, cols, 1, self.cellSize, use_hexagons = False, is_mask_required = False) if sourceSRF is not None: ### No transformation is used in the projection process #### if self.srf.type.upper() == "PROJECTED" and srfTo.type.upper() == "GEOGRAPHIC": #### Create Centroids lon/lat using self.srf Ellipsoid #### #### Similar method xy_to_lonlat(xy, srf) #### centroids = cubeInfo.return_centroids_from_ids(self.xMin, self.yMax, srf = sourceSRF, start = NUM.int64(rangeZone[0]), finish = NUM.int64(rangeZone[1]), ids = ids, srfTo = None) elif self.srf.type.upper() in [ "GEOGRAPHIC", "PROJECTED"] \ and srfTo.type.upper() == "PROJECTED": #### Create centroids in the srfTo projection #### #### Similar method change_projection_xy(xy, srf, srfTo) #### centroids = cubeInfo.return_centroids_from_ids(self.xMin, self.yMax, srf = sourceSRF, start = NUM.int64(rangeZone[0]), finish = NUM.int64(rangeZone[1]), ids = ids, srfTo = targetSRF) else: #### Get Centroids Without Apply Projection #### centroids = cubeInfo.return_centroids_from_ids(self.xMin, self.yMax, srf = None, start = NUM.int64(rangeZone[0]), finish = NUM.int64(rangeZone[1]), ids = ids, srfTo = None) return centroids, True if ids is not None else False def preProcessZones(self): """ Get Zones using information all rasters """ self.getWorkExtent() self.getMaskExtentIds() self.rangeZones = self.getNumZonesSizes() def getRasterInfoZone(self, index = None): """This method return the information required to create a raster using numpytoraster """ minX = 0 minY = 0 if index is not None: rangeZone = self.rangeZones[index] xMinYmax = self.getXY(rangeZone[0]) xMaxYmin = self.getXY(rangeZone[1]) xMinYMin = NUM.array([xMinYmax[0], xMaxYmin[1]]) xMaxYMax = NUM.array([xMaxYmin[0], xMinYmax[1]]) minX = xMinYMin[0] - self.cellSize/2.0 minY = xMinYMin[1] - self.cellSize/2.0 nCols = int(NUM.round((xMaxYMax[0] - xMinYMin[0])/self.cellSize)) + 1 nRows = int(NUM.round((xMaxYMax[1] - xMinYMin[1])/self.cellSize)) + 1 colRowLT = self.getColRow(xMinYMin[0], xMaxYMax[1]) colRowRB = self.getColRow(xMaxYMax[0], xMinYMin[1]) sliceRow = slice(colRowLT[1], colRowRB[1] + 1) sliceCol = slice(colRowLT[0], colRowRB[0] + 1) return [(sliceRow, sliceCol), None, (minX, minY), self.cellSize, nRows, nCols] else: if self.centroidsOutput is not None: xMinYMin = self.centroidsOutput.min(0) xMaxYMax = self.centroidsOutput.max(0) minX = xMinYMin[0] - self.cellSize/2.0 minY = xMinYMin[1] - self.cellSize/2.0 nCols = int(NUM.round((xMaxYMax[0] - xMinYMin[0])/self.cellSize)) + 1 nRows = int(NUM.round((xMaxYMax[1] - xMinYMin[1])/self.cellSize)) + 1 colRowLT = self.getColRow(xMinYMin[0], xMaxYMax[1]) colRowRB = self.getColRow(xMaxYMax[0], xMinYMin[1]) sliceRow = slice(colRowLT[1],colRowRB[1]+1) sliceCol = slice(colRowLT[0],colRowRB[0]+1) return [(sliceRow, sliceCol), None, (minX, minY), self.cellSize, nRows, nCols] else: ARCPY.Error("Centroids Output empty") raise SystemExit return [None, None, (minX, minY), self.cellSize, self.nRows, self.nCols] def getAOIData(self, index = 0): """ Get Information of AOI index Index -> rangeZone """ X, mask = self.extractZone(self.rangeZones[index]) return X, mask def extractDataFromPolygons(self, ssdo): """ Get X variables of polygon samples for training """ self.ssdo = ssdo self.srfOutput = None id = 0 msg = ARCPY.GetIDMessage(84823) msg = msg.format("...","") ARCPY.SetProgressor("default", msg, 0, len(self.listRasters), 1) self.getWorkExtent() size = len(self.idPoly) nCells = self.nCols*self.nRows -1 dataX = NUM.ones((size, len(self.sourceListRasters)), dtype = float)*NULL maskedValuesSampling = NUM.ones(size, dtype = bool) self.centroids = None for i in self.grp: #### Projection Group #### indexRaster = self.grp[i][0] srfRaster = self.infoR[indexRaster].srf currentRaster = self.infoR[indexRaster] #### Centroids rangeZone #### centroids, maskedIds = self.getCentroids([0,nCells], srfTo = srfRaster, ids = self.polygonMask) if self.srf.factoryCode == srfRaster.factoryCode: self.centroids = centroids centroids = currentRaster.coordsToExt(centroids) for rasterId in self.grp[i]: informationRaster = self.infoR[rasterId] informationRaster.getValuesPerZone(centroids, self.orderBands[rasterId], dataX, maskedValuesSampling, True) if self.centroids is None: self.centroids, maskedIds = self.getCentroids(self.idCentroids, srfTo = self.srf, ids = self.polygonMask) #### Output Sampled Data Using Mask Which Represent the Ids Based on rangeZone #### return dataX[maskedValuesSampling], NUM.where(maskedValuesSampling)[0] def extractZone(self, indexRangeZone): """ Get X variables in batches (depend on raster size) """ rangeZone = self.rangeZones[indexRangeZone] id = 0 msg = ARCPY.GetIDMessage(84823) zoneStr = "{0}/{1}".format(str(indexRangeZone + 1), len(self.rangeZones)) msg = msg.format("...", zoneStr) ARCPY.SetProgressor("default", msg, 0, len(self.listRasters), 1) #### Masked Ids in the rangeZone #### idsMask, maskedIdsRangeZone = self.getMaskValues(rangeZone) size = 0 if not maskedIdsRangeZone: size = rangeZone[1] - rangeZone[0] + 1 else: size = len(idsMask) dataX = NUM.ones((size, len(self.sourceListRasters)), dtype = float)*NULL maskedValuesSampling = NUM.ones(size, dtype = bool) self.centroidsOutput = None for i in self.grp: #### Projection Group #### indexRaster = self.grp[i][0] srfRaster = self.infoR[indexRaster].srf currentRaster = self.infoR[indexRaster] #### Centroids rangeZone #### centroids, maskedIds = self.getCentroids(rangeZone, srfTo = srfRaster, ids = idsMask) if self.srf.factoryCode == srfRaster.factoryCode: self.centroidsOutput = centroids centroids = currentRaster.coordsToExt(centroids) for rasterId in self.grp[i]: informationRaster = self.infoR[rasterId] informationRaster.getValuesPerZone(centroids, self.orderBands[rasterId], dataX, maskedValuesSampling, True) for i in self.orderBands[rasterId]: maskedValuesSampling = maskedValuesSampling*(dataX.T[i] != NULL) if self.centroidsOutput is None: self.centroidsOutput, maskedIds = self.getCentroids(rangeZone, srfTo = self.srf, ids = idsMask) self.centroidsOutput = self.centroidsOutput[maskedValuesSampling] #### Output Sampled Data Using Mask Which Represent the Ids Based on rangeZone #### if maskedIdsRangeZone: idsMask = idsMask + rangeZone[0] return dataX[maskedValuesSampling], idsMask[maskedValuesSampling] else: return dataX[maskedValuesSampling], NUM.where(maskedValuesSampling)[0] + rangeZone[0] def extractDataUsingPoints(self, ssdo): """ Get X variables of point samples for training """ xy = ssdo.xyCoords self.srfOutput = ssdo.spatialRef self.ssdo = None self.getWorkExtent() id = 0 msg = ARCPY.GetIDMessage(84823) msg = msg.format("...","") ARCPY.SetProgressor("default", msg, 0, len(self.listRasters), 1) size = len(xy) dataX = NUM.ones((size, len(self.sourceListRasters)), dtype = float)*NULL maskedValuesSampling = NUM.ones(size, dtype = bool) for i in self.grp: #### Projection Group #### indexRaster = self.grp[i][0] currentRaster = self.infoR[indexRaster] srfTo = self.infoR[indexRaster].srf xy1 = None #### Check Source Projection and Target Projection #### if self.srf.factoryCode != srfTo.factoryCode: if self.srf.type.upper() == "PROJECTED" and srfTo.type.upper() == "GEOGRAPHIC": xy1 = ARC._ss.change_projection_xy(xy, self.srf, srfTo.GCS ) elif self.srf.type.upper() == "PROJECTED" and srfTo.type.upper() == "PROJECTED": xy1 = ARC._ss.change_projection_xy(xy, self.srf, srfTo ) elif self.srf.type.upper() == "GEOGRAPHIC" and srfTo.type.upper() == "PROJECTED": xy1 = ARC._ss.change_projection_xy(xy, self.srf, srfTo) else: #### Assuming Same Ellipsoid Geo - Geo ### xy1 = xy else: xy1 = xy xy1 = currentRaster.coordsToExt(xy1) for rasterId in self.grp[i]: informationRaster = self.infoR[rasterId] informationRaster.getValuesPerZone(xy1, self.orderBands[rasterId], dataX, maskedValuesSampling, False) for i in self.orderBands[rasterId]: maskedValuesSampling = maskedValuesSampling*(dataX.T[i] != NULL) #### Output Sampled Data Using Mask Which Represent the Ids Based on rangeZone #### return dataX[maskedValuesSampling], NUM.where(maskedValuesSampling)[0] class RasterMeta(BaseRaster): """ This Class handles a basic geometry info of a raster""" def __init__(self, rasterPath = None, threshold = 12e6, bandList = None, typeFields = None ): """ Contructor of RasterMeta""" r = None #### Set Real Raster #### self.typeRaster = "REAL" #### Recive Test Raster Instance To Simulate a Raster #### if str(rasterPath) == "TestRaster": r = rasterPath self.catalogPath = r.catalogPath self.typeRaster = "DUMMY" else: #### USE SA Raster to Get Raster Properties #### r = ARCPY.sa.Raster(rasterPath) #### It is Not Support Raster Services #### if "HTTP:" in rasterPath.upper(): ARCPY.AddIDMessage("ERROR", 110213) del r raise SystemExit #### Unknown Pixel Type is Not Supported #### if not UTILS.isGDB(rasterPath) and r.pixelType in ['UNKNOWN']: ARCPY.AddIDMessage("ERROR", 10160, r.name) del r raise SystemExit if r.format== "NetCDF": self.catalogPath = r.name else: self.catalogPath = rasterPath self.thresholdRaster = threshold self.xc = r.meanCellWidth self.yc = r.meanCellHeight self.ext = UTILS.getExtent(r.extent) self.extr = r.extent self.width = r.width self.height = r.height self.w = self.ext[2] - self.ext[0] self.h = self.ext[3] - self.ext[1] self.nCols = int(self.w / self.xc) self.nRows = int(self.h / self.yc) self.srf = r.spatialReference self.name = r.name self.numBands = r.bandCount self.xMin = self.ext[0] self.yMin = self.ext[1] self.xMax = self.ext[2] self.yMax = self.ext[3] #### Definition Extent of Raster is Based on Azimuth #### self.isAzimuthBased = False #### Check if Raster Spatial Reference is Geographic #### self.isGCS = self.srf.type.upper() == "GEOGRAPHIC" threshold = 1.0 if self.isGCS: if self.xMin > 180.0 or self.xMax > 180.0 + threshold: self.isAzimuthBased = True self.bandList = bandList self.typeFields = typeFields #### bands To Process #### if bandList is None: self.bandList = [0] if typeFields is None: self.typeFields = NUM.zeros(len(self.bandList), dtype = int) #### Check Raster from NetCDF #### if r.format == "NetCDF": self.bandList[0] = 0 else: if len(self.bandList) == 1: if self.bandList[0] != -1: self.catalogPath = self.catalogPath + "\\BAND_{}".format(self.bandList[0] + 1) else: self.bandList[0] = 0 #### Divide Raster according with threshold #### self.zones = self.generateZones() self.idZonesToCheck = None del r def coordsToExt(self, xy): """ Convert Longitude to Azimuths """ if self.isAzimuthBased: xyCopy = xy.copy() x = xy.T[0] longN = xy.T[0] < 0 x[longN] = 360.0 + x[longN] xyCopy.T[0] = x return xyCopy else: return xy def reCalZones(self, threshold): """ Recalculate Zones changing the threshold """ self.thresholdRaster = threshold self.zones = self.generateZones() def generateZones(self): """ This method generates zones according with a memory threshold """ justOne = False if self.bandList is not None: justOne = len(self.bandList) == 1 nCells = self.nCols*self.nRows limit = self.thresholdRaster total = nCells*self.numBands if justOne: total = nCells if total > limit: d = NUM.ceil(total/limit) if d < 2: d = 2 divBy = self.nCols > self.nRows nG = [] div = int(NUM.ceil(self.nRows/d)) yMaxA = self.yMax - (NUM.arange(int(d))*div*self.yc) yMinA = self.yMax - (NUM.arange(int(d))*div*self.yc) - div*self.yc yMinA[-1] = self.yMin for id, i in enumerate(range(int(d))): grpN = None if i == 0: cExt = [self.xMin, yMinA[id] - self.yc , self.xMax, yMaxA[id]] grpN = cExt else: my = yMinA[id] - self.yc if my < self.yMin: my = self.yMin cExt = [self.xMin, my , self.xMax, yMaxA[id]] grpN = cExt nG.append(grpN) return nG else: return [[self.xMin, self.yMin, self.xMax, self.yMax]] def extractLocValuesZones(self, extCentroids, zones = None ): """ This function intersects the centroids area with raster zones Return zones that interset centroids extent """ ext1 = extCentroids rz = [] if zones is None: zones = self.zones for id, vz in enumerate(zones): ext2 = vz nZone = self.intersect(ext1, vz) if nZone is not None: rz.append(id) self.idZonesToCheck = rz return rz def intersect(self, ext1 , zone): """ Intersect two extension """ ext2 = zone e1 = ARCPY.Extent(XMin = ext1[0], YMin = ext1[1],XMax = ext1[2], YMax = ext1[3]).polygon e2 = ARCPY.Extent(XMin = ext2[0], YMin = ext2[1],XMax = ext2[2], YMax = ext2[3]).polygon inter = e1.intersect(e2,4) if hasattr(inter, "area") and inter.area > 0: return UTILS.getExtent(inter.extent) else: None def borderRaster(self, col, row): if col >= 0 and col <self.nCols and row >=0 and row < self.nRows: return True else: return False def getIndexFromXY(self, x, y, colRowOutput = False): dx = x - self.xMin dy = self.yMax - y col = NUM.ceil(dx/self.xc) - 1 row = NUM.ceil(dy/self.yc) - 1 if self.borderRaster(col, row): if not colRowOutput: return int(col + row*self.nCols) else: return int(col), int(row) else: if not colRowOutput: return -1 else: return -1, -1 def defineInputDataPerBand(self, xMin, yMin, cols, rows): """ This Method Get Values Per Raster Band INPUT: xMin (float): xMin To extract yMin (float): yMin To extract cols (int): Number of Columns rows (int) Number of Rows OUTPUT: data (array 1d): Raster Data flatten """ data = None #### Define Output Data Size #### data = NUM.ones((len(self.bandList),rows, cols), dtype = float)*NULL for id, index in enumerate(self.bandList): rasterPathBand = self.catalogPath + "\\BAND_" + str(int(index + 1 )) #### Get Raster Data Per Band Using Starting Point #### dataBand = ARCPY.RasterToNumPyArray(rasterPathBand, ARCPY.Point(xMin, yMin), cols, rows, nodata_to_value = NULL) data[id,] = dataBand #### Clean Objects #### del dataBand GC.collect() ARCPY.ResetProgressor() dataS = data.ravel() #### Clean Objects #### del data GC.collect() return dataS def createOutput(self, data_raster, centroids, cell_size_x, cell_size_y, x_min, y_max, n_cols, n_rows, bands, type_field, is_grid, values): outputWS = "" if is_grid: nc= len(centroids) index = NUM.random.randint(0,200, 2) output = outputWS+str(y_max).replace(".","_")+".shp" outP = UTILS.DataContainer(self.srf, xy = centroids[index,]) v,m = values v1 = v.copy() v1 = v1.reshape((len(m), len(bands))) f = [] for i in range(len(bands)): t= v1.T[i] f.append(t[index]) outP.generateOutput(output,f) ext = ARCPY.Extent(XMin = x_min, XMax = x_min +(n_cols*cell_size_x), YMin = y_max - (n_rows* cell_size_y), YMax = y_max) output = outputWS+str(y_max).replace(".","_")+"P.shp" outPol = UTILS.DataContainer(self.srf, shapes = [ext.polygon]) outPol.generateOutput(output,[[ext.JSON]]) def interpolate(self, centroids, ext = None, isGrid = False): """ Extract Values in centroids from a Matrix using bilinear method """ xMin, yMin, xMax, yMax = self.xMin, self.yMin, self.xMax, self.yMax cols, rows = self.nCols, self.nRows data = None #### Recalculate Cols and Rows #### if ext is not None: xMin, yMin, xMax, yMax = ext cols = int(NUM.round((xMax - xMin)/self.xc)) rows = int(NUM.round((yMax - yMin)/self.yc)) msg = ARCPY.GetIDMessage(84823) msg = msg.format(self.name.lower()," /") ARCPY.SetProgressor("default", msg) if self.typeRaster == "DUMMY": return NUM.ones(len(self.bandList)*len(centroids), dtype = float),\ NUM.ones(len(centroids), dtype = bool) try: #### Get Data Using Starting Point All Bands #### data = ARCPY.RasterToNumPyArray(self.catalogPath, ARCPY.Point(xMin, yMin), cols, rows, nodata_to_value = NULL) except: pass #### None -> Cancel Process #### if data is None: ARCPY.AddError("Problem extracting values from {0} using origin location [{1}/{2}].".format(self.name, xMin, yMin)) raise SystemExit ARCPY.ResetProgressor() data = data.ravel() msg = ARCPY.GetIDMessage(84823) msg = msg.format(self.name.lower()," \\") ARCPY.SetProgressor("default", msg) values = ARC._ss.sample_xy(data_raster = NUM.asarray(data, dtype = float), centroids = centroids, cell_size_x = self.xc, cell_size_y = self.yc, x_min= xMin, y_max = yMax, n_cols = cols, n_rows = rows, bands = NUM.asarray(self.bandList, dtype = int), type_field = NUM.asarray(self.typeFields, dtype = int), is_grid = int(isGrid)) #### None -> Cancel Process #### if values is None: raise SystemExit ARCPY.ResetProgressor() #### Clean Memory #### del data GC.collect() return values def interpolateXY(self, x, y): """ Interpolate a point - it uses the bands and typeFields defined in the class """ value = self.getPixelBlock(x, y) if value is not None: x1, y1, x2, y2 = value data1 = ARCPY.RasterToNumPyArray(self.catalogPath, ARCPY.Point(x1, y1), 3, 3, nodata_to_value = NULL) values = ARC._ss.sample_xy(data_raster = NUM.asarray(data1.ravel(),dtype = float), centroids = NUM.array([x, y], dtype = float).reshape(1,2), cell_size_x = self.xc, cell_size_y = self.yc, x_min= x1, y_max = y2, n_cols = 3,n_rows = 3, bands = NUM.asarray(self.bandList, dtype = int), type_field = NUM.asarray(self.typeFields, dtype = int)) return values, NUM.array([[x1, y1],[x1, y2], [x2, y1], [x2,y2]]) else: return None def getData(self, x, y, block = False): """Get data from one location """ if not block: index = self.getIndexFromXY(x, y) if index == -1: return None x1, y1 = self.getCornerCell(index) data1 = ARCPY.RasterToNumPyArray(self.catalogPath, ARCPY.Point(x1, y1), 1, 1, nodata_to_value = NULL) return data1.ravel() else: value = self.getPixelBlock(x, y) if value is not None: x1, y1, x2, y2 = value data1 = ARCPY.RasterToNumPyArray(self.catalogPath, ARCPY.Point(x1, y1), 3, 3, nodata_to_value = NULL) return data1.ravel() else: return None def getCornerCell(self, index): """ Get corner BL cell """ col = index % self.nCols row = index // self.nCols x = self.xMin + col*self.xc y = self.yMax - row*self.yc - self.yc return x, y def getPixelBlock(self, x, y): """Get XY Coordinates from int/array of int This function return 3x3 coordenates of LL-TR around cell of interest """ val0 = NUM.round((x - self.xMin) / self.xc) val1 = NUM.round((self.xMax - x) / self.xc) val2 = NUM.round((self.yMax - y) / self.yc) val3 = NUM.round((y - self.yMin) / self.yc) if val0 >= -1 and val1 >= -1 and val2 >= -1 and val3 >= -1: col, row = self.getColRow(x, y) if col >= 0 and col <= self.nCols and row >= 0 and row <= self.nRows : nx = self.xMin + (col-1)*self.xc ny = self.yMax - (row+2)*self.yc return nx, ny, nx + (self.xc * 3), ny + (self.yc * 3) else: return None else: return None def getExtCendroids(self, centroids): """ Get extent of Centroids """ ep = 1e-5 minV = centroids.min(0) maxV = centroids.max(0) if len(centroids) == 1: minV, maxV = minV - ep, maxV + ep return minV[0], minV[1], maxV[0], maxV[1] def getAlignedExtFromCentroidsExt(self, x, y, x1, y1): """ This method gets an aligned extent from ext (x,y,x1,y1) """ ex1 = ARCPY.Extent(self.xMin, self.yMin, self.xMax, self.yMax).polygon if x == x1 and y == y1: return self.getPixelBlock(x,y) if x != x1 and y == y1: extBL = self.getPixelBlock(x,y) extTR = self.getPixelBlock(x1,y1) if extBL is None or extTR is None: return None return extBL[0], extBL[1], extTR[2], extTR[3] if x == x1 and y != y1: extBL = self.getPixelBlock(x,y) extTR = self.getPixelBlock(x1,y1) if extBL is None or extTR is None: return None return extBL[0], extBL[1], extTR[2], extTR[3] ex2 = ARCPY.Extent(x, y, x1, y1).polygon inter = ex1.intersect(ex2,4) if hasattr(inter, "area") and inter.area > 0: x, y, x1, y1 = UTILS.getExtent(inter.extent) else: return None extBL = self.getPixelBlock(x,y) extTR = self.getPixelBlock(x1,y1) if extBL is None or extTR is None: return None return extBL[0], extBL[1], extTR[2], extTR[3] def test_extract(self, input): """Extract Values From Raster """ xy = None ARCPY.env.outputCoordinateSystem = self.srf if type(input) == str: ssdo = SSDO.SSDataObject(input) ssdo.obtainData() xy = ssdo.xyCoords else: xy = input n = len(xy) nb = len(self.bandList) orderBands = NUM.arange(nb, dtype = int) if nb == 1: values = NUM.ones((n,1), dtype = float)* NULL maskedValues = NUM.ones(n, dtype = bool) self.getValuesPerZone(xy, orderBands, values, maskedValues ) return values else: values = NUM.ones((n, nb), dtype = float)* NULL maskedValues = NUM.ones(n, dtype = bool) self.getValuesPerZone(xy, orderBands, values, maskedValues ) return values def getValuesPerZone(self, xy, orderBands, values, maskedValues, isGrid = False): """Get values from raster - it fills the values array, and uses the maskedValues to check the mask""" #### Check xy Size #### if xy is None or len(xy) == 0: return #### Calculation Extent XY Coordinates #### extCent = self.getExtCendroids(xy) #### Finding Real Zones That Intersect with XY Coordinates #### self.extractLocValuesZones(extCent) x = xy.T[0] y = xy.T[1] #print("Sampling {0} Zones:".format(len(self.idZonesToCheck)), end="\r", flush=True) #### Use Zones Define for id in self.idZonesToCheck: zone = self.zones[id] xMin, yMin, xMax, yMax = zone #### Obtain The Ids of XY Coordinates that Intersects with the zone #### ind = (y <= yMax)*(y >= yMin)*maskedValues cenBase = xy[ind,:] if len(cenBase) > 0: newExtCen = self.getExtCendroids(cenBase) #### Get Extent That Aligned With Raster #### ext = self.getAlignedExtFromCentroidsExt(*newExtCen) indRows = NUM.where(ind)[0] if ext is not None: #### Interpolate XY Coordinates Using Type Fields (Near/Bil) #### valuesP, maskP = self.interpolate(cenBase, ext = ext, isGrid = isGrid) #### Obtain Ids With Interpolated Values #### goodData = NUM.asarray(maskP, dtype = bool) badData = NUM.logical_not(maskP) gIdRows = indRows[goodData] bIdRows = indRows[badData] #### Update Mask #### maskedValues[bIdRows] = False if len(values) == 1: try: values[gIdRows] = valuesP[goodData] except: ARCPY.AddError("Problem updating data from the raster {0}.".format(self.name)) raise SystemExit else: valuesP = valuesP.reshape((len(maskP), len(orderBands))) valuesP = valuesP[goodData,] if len(valuesP): done = ARC._ss.update2DArray(values, valuesP, NUM.asarray(gIdRows, dtype = NUM.int32), NUM.asarray(orderBands, dtype = NUM.int32) ) #### If Function is Cancelled #### if done is None: ARCPY.AddError("Problem updating data from the raster {0}.".format(self.name)) raise SystemExit #### Clean Memory #### del goodData, badData, valuesP, maskP, cenBase, indRows GC.collect() else: pass def rasterWorkFlow(hpar, nRasters, explodeData, ssdoCheck, nVar, rfRun, globalExtent, randSeed): """This function is used for input Raster dataset only (X' variables). Prediction Raster or Training. """ if hpar.explodeDataRaster and explodeData : ssdoTraining = ssdoCheck fields = [hpar.variableToPredict.upper()] userTypeRF ="Categorical" if hpar.classes in [ True, "True", "true"] else "Numeric" ssdoTraining.obtainData(fields = fields, requireGeometry = False) uniqueV = NUM.unique(ssdoTraining.fields[hpar.variableToPredict.upper()].data) if len(uniqueV) == 1: alias = hpar.variableToPredict.upper(); ARCPY.AddIDMessage("ERROR", 110181, alias) raise SystemExit #### ConverT GP object To String #### pathR = [i[0]["sourceData"] for i in hpar.listVariables if i[0]["source"]== "RASTER"] nameR = [i[0]["alias"] for i in hpar.listVariables if i[0]["source"]== "RASTER"] typeRF = [i[0]["rfType"] == numericVar for i in hpar.listVariables if i[0]["source"]== "RASTER"] typeRFD = [i[0]["rfType"] for i in hpar.listVariables if i[0]["source"] == "RASTER"] infoRaster = RasterInfo(pathR, typeRF, rasterNames = nameR, printInfo = False) rfTraining = InfoForestField(splitPercentage = hpar.split, seed = randSeed) rfTraining.loadDataRasterForTraining(infoRaster, ssdoTraining, hpar.variableToPredict, userTypeRF, checkBalance = not hpar.balanceTree) if hpar.what == "NONE": rfTraining.splitDataForTesting(hpar.balanceTree) rfRun.executeModel(rfTraining, newSRF = infoRaster.srf, ssdoPoints2Train = rfTraining.dataObject) ARCPY.env.extent = globalExtent return True if hpar.what == "RASTER": if hpar.outArea: pathROut = [i[1]["sourceData"] for i in hpar.listVariables if i[1]["source"]== "RASTER"] nameROut = [i[1]["alias"] for i in hpar.listVariables if i[1]["source"]== "RASTER"] typeRFOut = [i[1]["rfType"] == numericVar for i in hpar.listVariables if i[1]["source"]== "RASTER"] typeRFD = [i[1]["rfType"] for i in hpar.listVariables if i[1]["source"] == "RASTER"] infoRasterOut = RasterInfo(pathROut, typeRFD, printInfo = True, rasterNames = nameROut) rfTest = InfoForestField(isTraining = False) infoRasterOut.srfOutput = None rfRun.executeModel(rfTraining, rfTest = rfTest, rasterInfo = None, ssdoPoints2Train = rfTraining.dataObject, infoRasterInstances = infoRasterOut) return True else: infoRasterIn = RasterInfo(pathR, typeRF, printInfo = True, rasterNames = nameR) #### Set SRF #### infoRasterIn.srfOutput = ssdoTraining.spatialRef rfTest = InfoForestField(isTraining = False) #### Execute Forest #### rfRun.executeModel(rfTraining, rfTest = rfTest, rasterInfo = None, ssdoPoints2Train = rfTraining.dataObject, newSRF = ssdoTraining.spatialRef, infoRasterInstances = infoRasterIn) return True if nRasters > 0 and nVar == 0 and hpar.what in ["NONE", "RASTER"]: ### Get Data From Input FC #### ssdoTraining = ssdoCheck checkLicense() ssdoTest = None rfTest = None fields = [hpar.variableToPredict.upper()] userTypeRF ="Categorical" if hpar.classes in [ "CATEGORICAL", True, "True", "true"] else "Numeric" ssdoTraining.obtainData(fields = fields, requireGeometry = False) #### Define Training Field #### rfTraining = InfoForestField(splitPercentage = hpar.split, seed = randSeed, ssdo = ssdoTraining) rfTraining.defineYFromDataObject(hpar.variableToPredict.upper(), userTypeRF, ignoreValue = True, checkBalance = not hpar.balanceTree) #### Extract Data To Train #### maskTotal = AddDataN(rfTraining, hpar, typeSampling = "NP") #### Apply mask in each RF Field #### applyMaskFromRaster(rfTraining, allIndices = maskTotal) rfTraining.generateXVariable(balance = hpar.balanceTree) srf = ssdoTraining.spatialRef applyZone = 0 if hpar.outArea: applyZone = 1 if hpar.what == "RASTER": #### ConverT GP object To String #### pathR = [i[applyZone]["sourceData"] for i in hpar.listVariables if i [applyZone]["source"] == "RASTER"] nameR = [i[applyZone]["alias"] for i in hpar.listVariables if i [applyZone]["source"] == "RASTER"] typeRF = [i[applyZone]["rfType"] == numericVar for i in hpar.listVariables if i[applyZone]["source"]== "RASTER"] typeRFD = [i[applyZone]["rfType"] for i in hpar.listVariables if i[applyZone]["source"] == "RASTER"] data = None intersection = NoUse = cellSize = srfOut = None infoRaster = RasterInfo(pathR, typeRF, rasterNames = nameR) #### Verify If The prediction is in external Area #### if applyZone: infoRaster.srfOutput = None srf = infoRaster.infoR[0].srf else: infoRaster.srfOutput = srf rfTest = InfoForestField(isTraining = False) #### Execute Forest #### rfRun.executeModel(rfTraining, rfTest = rfTest, rasterInfo = None, newSRF = srf, infoRasterInstances = infoRaster) return True #### Just Training Using only Raster Datasets / Points -Polygons Without Explode #### if hpar.what == "NONE": #### Execute Forest #### rfRun.executeModel(rfTraining) return True return False def execute(pytRF, parameters): """ Prepare information for executing forest """ #### Clean Global Extent #### globalExtent = ARCPY.env.extent ARCPY.env.extent = "" #### Get Seed #### randSeed = UTILS.getRandomSeed() #### Generate Seed if It is not Provided #### if randSeed == 0: randSeed = int(NUM.random.randint(1000000)) msg = ARCPY.GetIDMessage(84821) ARCPY.AddMessage(msg.format(randSeed)) hpar = HandleInputs(parameters) parameters = hpar.parameters #### Generate Iteration Seeds #### if hpar.iterations > 1 : NUM.random.seed(randSeed) hpar.iterationSeeds = NUM.random.randint(0,1000000, hpar.iterations) #### Check Output Feature Class #### if hpar.outputFeatures is None: hpar.outputFeatures="" ssdoTraining = None ssdoTesting = None rfTraining = None par = parameters model = hpar.isModel hpar.getInfoValues() #### This object decides how execute RF #### rfRun = RunRF(hpar, randSeed, parameters) ### Detect if Inputs in Matching Dist/Rasters are different #### outArea = hpar.outArea useTestData = hpar.haveToPredict totalVariables = len(hpar.listVariables) userTypeRF = "Categorical" if hpar.classes in [ "CATEGORICAL", True, "True", "true"] else "Numeric" if totalVariables > 0: tryIsNone = hpar.fields_to_try is None if tryIsNone or hpar.fields_to_try > totalVariables: n = 1 if userTypeRF == "Categorical": ### Set Number of Fields for Permutation in classification ### n = int(NUM.sqrt(totalVariables)) else: ### Set Number of Fields for Permutation in Regression ### n = int(totalVariables/3) #### fields_to_try Should be greater than zero #### hpar.fields_to_try = n if n == 0: hpar.fields_to_try = 1 if not tryIsNone: ARCPY.AddIDMessage("WARNING", 110177, str(hpar.fields_to_try)) checkOutput(hpar.outputRaster, "RASTER") checkOutput(hpar.outputFeatures, "FC") checkOutput(hpar.outputTrainedFeatures, "FC") ssdoCheck = None #### Handle All Explode Data Cases #### shapeInfo = None try: desc = ARCPY.Describe(hpar.input) shapeInfo = desc.shapeType except: pass #### Check Selection Variable #### explodeData = shapeInfo == "Polygon" and userTypeRF == "Categorical" and hpar.what in ["NONE", "RASTER"] varsF = hpar.getVarsBySourceOrigin("FC") varsD = hpar.getVarsBySourceOrigin("DIST") varsR = hpar.getVarsBySourceOrigin("RASTER", info = "sourceData") nVar = len(varsF) + len(varsD) nRasters = len(varsR) ssdoCheck = SSDO.SSDataObject(hpar.input) infoRasterArg = (hpar, nRasters, explodeData, ssdoCheck, nVar, rfRun, globalExtent, randSeed) created = rasterWorkFlow(*infoRasterArg) if created in [True, None]: return if not hpar.useAllRasterValues: ssdoTraining = ssdoCheck useTestData = hpar.what == "FEATURES" ssdoTest = None rfTest = None fields = [hpar.variableToPredict.upper()] userTypeRF ="Categorical" if hpar.classes in [ "CATEGORICAL", True, "True", "true"] else "Numeric" #### Fields In Exploratory Variables #### varsF = hpar.getVarsBySourceOrigin("FC") for f in varsF: fields.append(f) ssdoTraining.obtainData(fields = fields, requireGeometry = False, minNumObs = minNumObs) rfTraining = InfoForestField(splitPercentage = hpar.split, seed = randSeed, ssdo = ssdoTraining) rfTraining.defineYFromDataObject(hpar.variableToPredict.upper(), userTypeRF, ignoreValue = True, checkBalance = not hpar.balanceTree) maskTotal = AddDataN(rfTraining, hpar) #### Apply mask in each RF Field #### applyMaskFromRaster(rfTraining, allIndices = maskTotal) rfTraining.generateXVariable(balance = hpar.balanceTree) #### Using New Fatures #### if useTestData and hpar.what == "FEATURES" and not outArea: ssdoTest = SSDO.SSDataObject(hpar.featuresToPredict) fieldTest = [] varsF = hpar.getVarsBySourceOrigin("FC", False) for f in varsF: fieldTest.append(f.upper()) #### Obtain Geometry Data #### ssdoTest.obtainData(fields = fieldTest, requireGeometry = False) rfTest = InfoForestField(ssdo = ssdoTest, isTraining = False) maskTotalTest = AddTestFieldN(rfTest, hpar) #### Apply mask in each RF Field #### applyMaskFromRaster(rfTest, allIndices = maskTotalTest) #### Using New Fatures #### if useTestData and hpar.what == "FEATURES" and outArea: ssdoTest = SSDO.SSDataObject(hpar.featuresToPredict) fieldTest = [] varsF = hpar.getVarsBySourceOrigin("FC", False) for f in varsF: fieldTest.append(f.upper()) #### Obtain Geometry Data #### ssdoTest.obtainData(fields = fieldTest, requireGeometry = False) rfTest = InfoForestField(ssdo = ssdoTest, isTraining = False) maskTotalTest = AddTestFieldN(rfTest, hpar) #### Apply mask in each RF Field #### applyMaskFromRaster(rfTest, allIndices = maskTotalTest) if hpar.what == "NONE": rfRun.executeModel(rfTraining, rfTest = rfTest) ARCPY.env.extent = globalExtent return if hpar.what == "FEATURES": dictOrder = hpar.getDictOrder() information = compareForestInfo(rfTraining, rfTest, dictOrder, balance = hpar.balanceTree) if information is not None: raise SystemExit() rfRun.executeModel(rfTraining, rfTest = rfTest) ARCPY.env.extent = globalExtent return return
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question