我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.number()。
def _check_annotations(value): """ Recursively check that value is either of a "simple" type (number, string, date/time) or is a (possibly nested) dict, list or numpy array containing only simple types. """ if isinstance(value, np.ndarray): if not issubclass(value.dtype.type, ALLOWED_ANNOTATION_TYPES): raise ValueError("Invalid annotation. NumPy arrays with dtype %s" "are not allowed" % value.dtype.type) elif isinstance(value, dict): for element in value.values(): _check_annotations(element) elif isinstance(value, (list, tuple)): for element in value: _check_annotations(element) elif not isinstance(value, ALLOWED_ANNOTATION_TYPES): raise ValueError("Invalid annotation. Annotations of type %s are not" "allowed" % type(value))
def _check_annotations(value): """ Recursively check that value is either of a "simple" type (number, string, date/time) or is a (possibly nested) dict, list or numpy array containing only simple types. """ if isinstance(value, np.ndarray): if not issubclass(value.dtype.type, ALLOWED_ANNOTATION_TYPES): raise ValueError("Invalid annotation. NumPy arrays with dtype %s" "are not allowed" % value.dtype.type) elif isinstance(value, dict): for element in value.values(): _check_annotations(element) elif isinstance(value, (list, tuple)): for element in value: _check_annotations(element) elif not isinstance(value, ALLOWED_ANNOTATION_TYPES): raise ValueError("Invalid annotation. Annotations of type %s are not" "allowed" % type(value))
def linear_trajectory_to(self, target_tf, traj_len): """Creates a trajectory of poses linearly interpolated from this tf to a target tf. Parameters ---------- target_tf : :obj:`RigidTransform` The RigidTransform to interpolate to. traj_len : int The number of RigidTransforms in the returned trajectory. Returns ------- :obj:`list` of :obj:`RigidTransform` A list of interpolated transforms from this transform to the target. """ if traj_len < 0: raise ValueError('Traj len must at least 0') delta_t = 1.0 / (traj_len + 1) t = 0.0 traj = [] while t < 1.0: traj.append(self.interpolate_with(target_tf, t)) t += delta_t traj.append(target_tf) return traj
def drop_inconsistent_keys(self, columns, obj): """Drop inconsistent keys Drop inconsistent keys from a ValueCounts or Histogram object. :param list columns: columns key to retrieve desired datatypes :param object obj: ValueCounts or Histogram object to drop inconsistent keys from """ # has array been converted first? if so, set correct comparison # datatype comp_dtype = [] for col in columns: dt = np.dtype(self.var_dtype[col]).type() is_converted = isinstance( dt, np.number) or isinstance( dt, np.datetime64) if is_converted: comp_dtype.append(np.int64) else: comp_dtype.append(self.var_dtype[col]) # keep only keys of types in comp_dtype obj.remove_keys_of_inconsistent_type(prefered_key_type=comp_dtype) return obj
def categorize_columns(self, df): """Categorize columns of dataframe by data type :param df: input (pandas) data frame """ # check presence and data type of requested columns # sort columns into numerical, timestamp and category based for c in self.columns: for col in c: if col not in df.columns: raise KeyError('column "{0:s}" not in dataframe "{1:s}"'.format(col, self.read_key)) dt = self.get_data_type(df, col) if col not in self.var_dtype: self.var_dtype[col] = dt.type if (self.var_dtype[col] is np.string_) or (self.var_dtype[col] is np.object_): self.var_dtype[col] = str if not any(dt in types for types in (STRING_SUBSTR, NUMERIC_SUBSTR, TIME_SUBSTR)): raise TypeError('cannot process column "{0:s}" of data type "{1:s}"'.format(col, str(dt))) is_number = isinstance(dt.type(), np.number) is_timestamp = isinstance(dt.type(), np.datetime64) colset = self.num_cols if is_number else self.dt_cols if is_timestamp else self.str_cols if col not in colset: colset.append(col) self.log().debug('Data type of column "%s" is "%s"', col, self.var_dtype[col])
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], dtypes[0]) failures = [] # ignore complex warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', np.ComplexWarning) for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None): """ Return an array rounded a to the given number of decimals. Refer to `numpy.around` for full documentation. See Also -------- numpy.around : equivalent function """ result = self._data.round(decimals=decimals, out=out).view(type(self)) if result.ndim > 0: result._mask = self._mask result._update_from(self) elif self._mask: # Return masked when the scalar is masked result = masked # No explicit output: we're done if out is None: return result if isinstance(out, MaskedArray): out.__setmask__(self._mask) return out
def setup_class(cls): # Load a dataframe dataframe = pd.read_csv('tests/data/decathlon.csv', index_col=0) # Determine the categorical columns cls.df_categorical = dataframe.select_dtypes(exclude=[np.number]) # Determine the numerical columns cls.df_numeric = dataframe.drop(cls.df_categorical.columns, axis='columns') # Determine the size of the numerical part of the dataframe (cls.n, cls.p) = cls.df_numeric.shape # Determine the covariance matrix X = cls.df_numeric.copy() cls.center_reduced = ((X - X.mean()) / X.std()).values cls.cov = cls.center_reduced.T @ cls.center_reduced # Calculate a full PCA cls.n_components = len(cls.df_numeric.columns) cls.pca = PCA(dataframe, n_components=cls.n_components, scaled=True)
def _filter(self, dataframe, supplementary_row_names, supplementary_column_names): # Extract the categorical columns self.categorical_columns = dataframe.select_dtypes(exclude=[np.number]) # Extract the supplementary rows self.supplementary_rows = dataframe.loc[supplementary_row_names].copy() self.supplementary_rows.drop(supplementary_column_names, axis=1, inplace=True) # Extract the supplementary columns self.supplementary_columns = dataframe[supplementary_column_names].copy() self.supplementary_columns.drop(supplementary_row_names, axis=0, inplace=True) # Remove the the supplementary columns and rows from the dataframe dataframe.drop(supplementary_row_names, axis=0, inplace=True) dataframe.drop(supplementary_column_names, axis=1, inplace=True)
def _filter(self, dataframe, supplementary_row_names, supplementary_column_names): # Extract the categorical columns self.categorical_columns = dataframe.select_dtypes(exclude=[np.number]) # Extract the supplementary rows self.supplementary_rows = dataframe.loc[supplementary_row_names].copy() self.supplementary_rows.drop(self.categorical_columns.columns, axis='columns', inplace=True) # Extract the supplementary columns self.supplementary_columns = dataframe[supplementary_column_names].copy() self.supplementary_columns.drop(supplementary_row_names, axis='rows', inplace=True) # Remove the categorical column and the supplementary columns and rows from the dataframe dataframe.drop(supplementary_row_names, axis='rows', inplace=True) dataframe.drop(supplementary_column_names, axis='columns', inplace=True) dataframe.drop(self.categorical_columns.columns, axis='columns', inplace=True)
def __init__(self, bin_type, *repr_args): """ Constructor for a bin object. :param id: identifier (e.g. bin number) of the bin :param bin_type: "numerical" or "categorical" :param repr_args: arguments to represent this bin. args for numerical bin includes lower, upper, lower_closed, upper_closed args for categorical bin includes a list of categories for this bin. """ if bin_type == "numerical" and len(repr_args) != 4: raise ValueError("args for numerical bin are lower, upper, lower_closed, upper_closed.") if bin_type == "categorical" and len(repr_args) != 1 and type(repr_args[0]) is not list: raise ValueError("args for categorical bin is a list of categorical values for this bin.") self.bin_type = bin_type if bin_type == "numerical": self.representation = NumericalRepresentation(*repr_args) elif bin_type == "categorical": self.representation = CategoricalRepresentation(*repr_args)
def _get_power(mean1, std1, n1, mean2, std2, n2, z_1_minus_alpha): """ Compute statistical power. This is a helper function for compute_statistical_power(x, y, alpha=0.05) Args: mean1 (float): mean value of the treatment distribution std1 (float): standard deviation of the treatment distribution n1 (integer): number of samples of the treatment distribution mean2 (float): mean value of the control distribution std2 (float): standard deviation of the control distribution n2 (integer): number of samples of the control distribution z_1_minus_alpha (float): critical value for significance level alpha. That is, z-value for 1-alpha. Returns: float: statistical power --- that is, the probability of a test to detect an effect, if the effect actually exists. """ effect_size = mean1 - mean2 std = pooled_std(std1, n1, std2, n2) tmp = (n1 * n2 * effect_size**2) / ((n1 + n2) * std**2) z_beta = z_1_minus_alpha - np.sqrt(tmp) beta = stats.norm.cdf(z_beta) power = 1 - beta return power
def test_import_trajectory_interp_nans(self): fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht', 'ortho_ht', 'num_sats', 'pdop'] df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'), columns=fields, skiprows=1, timeformat='hms', interp=True) # Test and verify an arbitrary line of data against the same line in the pandas DataFrame line11 = ['3/22/2017', '9:59:00.20', 76.5350241071, -68.7218956324, 65.898, 82.778, 11, 2.00] sample_line = dict(zip(fields, line11)) np.testing.assert_almost_equal(df.lat[10], sample_line['lat'], decimal=10) np.testing.assert_almost_equal(df.long[10], sample_line['long'], decimal=10) numeric = df.select_dtypes(include=[np.number]) # check whether NaNs were interpolated for numeric type fields self.assertTrue(numeric.iloc[[2]].notnull().values.all())
def test_import_trajectory_fields(self): # test number of fields in data greater than number of fields named fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht'] df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'), columns=fields, skiprows=1, timeformat='hms') columns = [x for x in fields if x is not None] np.testing.assert_array_equal(df.columns, columns[2:]) # test fields in the middle are dropped fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht', None, 'num_sats', 'pdop'] df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'), columns=fields, skiprows=1, timeformat='hms') columns = [x for x in fields if x is not None] np.testing.assert_array_equal(df.columns, columns[2:])
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], dtypes[0]) failures = [] # ignore complex warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', np.ComplexWarning) for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None): """ Return an array rounded a to the given number of decimals. Refer to `numpy.around` for full documentation. See Also -------- numpy.around : equivalent function """ result = self._data.round(decimals=decimals, out=out).view(type(self)) if result.ndim > 0: result._mask = self._mask result._update_from(self) elif self._mask: # Return masked when the scalar is masked result = masked # No explicit output: we're done if out is None: return result if isinstance(out, MaskedArray): out.__setmask__(self._mask) return out
def get_binary_op_return_class(cls1, cls2): if cls1 is cls2: return cls1 if cls1 in (np.ndarray, np.matrix, np.ma.masked_array) or issubclass(cls1, (numeric_type, np.number, list, tuple)): return cls2 if cls2 in (np.ndarray, np.matrix, np.ma.masked_array) or issubclass(cls2, (numeric_type, np.number, list, tuple)): return cls1 if issubclass(cls1, YTQuantity): return cls2 if issubclass(cls2, YTQuantity): return cls1 if issubclass(cls1, cls2): return cls1 if issubclass(cls2, cls1): return cls2 else: raise RuntimeError("Undefined operation for a YTArray subclass. " "Received operand types (%s) and (%s)" % (cls1, cls2))
def transform(self, X, y=None): """Apply dimensionality reduction to X. X is masked. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted check_is_fitted(self, ['mask_'], all_or_any=all) X = check_array(X) return X[:, self.mask_]
def transform(self, X, y=None): """Apply dimensionality reduction to X. X is masked. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted check_is_fitted(self, ['mask_'], all_or_any=all) if hasattr(X, 'columns'): X = X.values X = check_array(X[:, self.mask_]) return X
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], dtypes[0]) failures = [] # ignore complex warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', np.ComplexWarning) for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def fit(self, X, y=None): # Return if not imputing if self.impute is False: return self # Grab list of object column names before doing imputation self.object_columns = X.select_dtypes(include=['object']).columns.values self.fill = pd.Series([X[c].value_counts().index[0] if X[c].dtype == np.dtype('O') or pd.core.common.is_categorical_dtype(X[c]) else X[c].mean() for c in X], index=X.columns) if self.verbose: num_nans = sum(X.select_dtypes(include=[np.number]).isnull().sum()) num_total = sum(X.select_dtypes(include=[np.number]).count()) percentage_imputed = num_nans / num_total * 100 print("Percentage Imputed: %.2f%%" % percentage_imputed) print("Note: Impute will always happen on prediction dataframe, otherwise rows are dropped, and will lead " "to missing predictions") # return self for scikit compatibility return self
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], dtypes[0]) failures = [] # ignore complex warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', np.ComplexWarning) for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None): """ Return an array rounded a to the given number of decimals. Refer to `numpy.around` for full documentation. See Also -------- numpy.around : equivalent function """ result = self._data.round(decimals=decimals, out=out).view(type(self)) result._mask = self._mask result._update_from(self) # No explicit output: we're done if out is None: return result if isinstance(out, MaskedArray): out.__setmask__(self._mask) return out
def load_MNIST_images(filename): """ returns a 28x28x[number of MNIST images] matrix containing the raw MNIST images :param filename: input data file """ with open(filename, "r") as f: magic = np.fromfile(f, dtype=np.dtype('>i4'), count=1) num_images = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1)) num_rows = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1)) num_cols = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1)) images = np.fromfile(f, dtype=np.ubyte) images = images.reshape((num_images, num_rows * num_cols)).transpose() images = images.astype(np.float64) / 255 f.close() return images
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], dtypes[0]) failures = [] # ignore complex warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', np.ComplexWarning) for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], np.bool_) # not x[0] because it is unordered failures = [] for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None): """ Return each element rounded to the given number of decimals. Refer to `numpy.around` for full documentation. See Also -------- ndarray.around : corresponding function for ndarrays numpy.around : equivalent function """ result = self._data.round(decimals=decimals, out=out).view(type(self)) if result.ndim > 0: result._mask = self._mask result._update_from(self) elif self._mask: # Return masked when the scalar is masked result = masked # No explicit output: we're done if out is None: return result if isinstance(out, MaskedArray): out.__setmask__(self._mask) return out
def get_numeric_subclasses(cls=numpy.number, ignore=None): """ Return subclasses of `cls` in the numpy scalar hierarchy. We only return subclasses that correspond to unique data types. The hierarchy can be seen here: http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html """ if ignore is None: ignore = [] rval = [] dtype = numpy.dtype(cls) dtype_num = dtype.num if dtype_num not in ignore: # Safety check: we should be able to represent 0 with this data type. numpy.array(0, dtype=dtype) rval.append(cls) ignore.append(dtype_num) for sub in cls.__subclasses__(): rval += [c for c in get_numeric_subclasses(sub, ignore=ignore)] return rval
def largest(*args): """ Return the [elementwise] largest of a variable number of arguments. Like python's max. """ if len(args) == 2: a, b = args return switch(a > b, a, b) else: return max(stack(args), axis=0) ########################## # Comparison ##########################
def reshape(x, newshape, ndim=None): if ndim is None: newshape = as_tensor_variable(newshape) if newshape.ndim != 1: raise TypeError( "New shape in reshape must be a vector or a list/tuple of" " scalar. Got %s after conversion to a vector." % newshape) try: ndim = get_vector_length(newshape) except ValueError: raise ValueError( "The length of the provided shape (%s) cannot " "be automatically determined, so Theano is not able " "to know what the number of dimensions of the reshaped " "variable will be. You can provide the 'ndim' keyword " "argument to 'reshape' to avoid this problem." % newshape) op = Reshape(ndim) rval = op(x, newshape) return rval
def infer_shape(self, node, shapes): if isinstance(node.inputs[1], TensorVariable): # We have padded node.inputs[0] to the right number of # dimensions for the output l = [] for sh1, sh2, b1 in zip(shapes[0], shapes[1][1:], node.inputs[0].broadcastable): if b1: l.append(sh2) else: l.append(sh1) return [tuple(l)] else: import theano.typed_list assert isinstance(node.inputs[1], theano.typed_list.TypedListVariable) raise ShapeError("Case not implemented") shape = shapes[0] for i in xrange(len(shapes[0]) - 1): shape[i] = shapes[1][i] return [(shape)]
def check_for_x_over_absX(numerators, denominators): """Convert x/abs(x) into sign(x). """ # TODO: this function should dig/search through dimshuffles # This won't catch a dimshuffled absolute value for den in list(denominators): if (den.owner and den.owner.op == T.abs_ and den.owner.inputs[0] in numerators): if den.owner.inputs[0].type.dtype.startswith('complex'): # TODO: Make an Op that projects a complex number to # have unit length but projects 0 to 0. That # would be a weird Op, but consistent with the # special case below. I heard there's some # convention in Matlab that is similar to # this... but not sure. pass else: denominators.remove(den) numerators.remove(den.owner.inputs[0]) numerators.append(T.sgn(den.owner.inputs[0])) return numerators, denominators
def test_axis_statistics(): adel_output_df = pd.read_csv(INPUTS_DIRPATH/ADEL_OUTPUT_FILENAME) adel_output_df['species'] = '0' axis_statistics_df, intermediate_df = pp.axis_statistics(adel_output_df, domain_area=1) axis_statistics_df.drop('species', 1, inplace=True) intermediate_df.drop('species', 1, inplace=True) axis_statistics_df.to_csv(OUTPUTS_DIRPATH/'actual_axis_statistics.csv', index=False, na_rep='NA') intermediate_df.to_csv(OUTPUTS_DIRPATH/'actual_intermediate.csv', index=False, na_rep='NA') desired_axis_statistics_df = pd.read_csv(OUTPUTS_DIRPATH/'desired_axis_statistics.csv') desired_axis_statistics_df.drop('has_ear', 1, inplace=True) axis_statistics_df = axis_statistics_df.select_dtypes(include=[np.number]) desired_axis_statistics_df = desired_axis_statistics_df.select_dtypes(include=[np.number]) np.testing.assert_allclose(axis_statistics_df.values, desired_axis_statistics_df.values, RELATIVE_TOLERANCE, ABSOLUTE_TOLERANCE) desired_intermediate_df = pd.read_csv(OUTPUTS_DIRPATH/'desired_intermediate.csv') desired_intermediate_df.drop('has_ear', 1, inplace=True) intermediate_df = intermediate_df.select_dtypes(include=[np.number]) desired_intermediate_df = desired_intermediate_df.select_dtypes(include=[np.number]) np.testing.assert_allclose(intermediate_df.values, desired_intermediate_df.values, RELATIVE_TOLERANCE, ABSOLUTE_TOLERANCE)
def _chart_csv_response(chart, name, data_set_name=None): "Respond with the data from a chart." if not data_set_name: data_set_name = name.split('_')[2] if not settings.DEBUG: response = HttpResponse(mimetype='text/csv') response['Content-Disposition'] = \ 'attachment; filename=%s.csv' % name else: response = HttpResponse(mimetype='text/html') writer = csv.writer(response) for row in chart.get_data(data_set_name): if isinstance(row, (float, int, numpy.number)): writer.writerow([row]) else: writer.writerow(row) return response
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], dtypes[0]) failures = [] # ignore complex warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', np.ComplexWarning) for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None): """ Return an array rounded a to the given number of decimals. Refer to `numpy.around` for full documentation. See Also -------- numpy.around : equivalent function """ result = self._data.round(decimals=decimals, out=out).view(type(self)) if result.ndim > 0: result._mask = self._mask result._update_from(self) elif self._mask: # Return masked when the scalar is masked result = masked # No explicit output: we're done if out is None: return result if isinstance(out, MaskedArray): out.__setmask__(self._mask) return out
def prefer_alignment(value_type): if np.issubdtype(value_type, np.number): return ALIGN.RIGHT else: return ALIGN.LEFT
def _check_valid_rotation(self, rotation): """Checks that the given rotation matrix is valid. """ if not isinstance(rotation, np.ndarray) or not np.issubdtype(rotation.dtype, np.number): raise ValueError('Rotation must be specified as numeric numpy array') if len(rotation.shape) != 2 or rotation.shape[0] != 3 or rotation.shape[1] != 3: raise ValueError('Rotation must be specified as a 3x3 ndarray') if np.abs(np.linalg.det(rotation) - 1.0) > 1e-3: raise ValueError('Illegal rotation. Must have determinant == 1.0')
def _check_valid_translation(self, translation): """Checks that the translation vector is valid. """ if not isinstance(translation, np.ndarray) or not np.issubdtype(translation.dtype, np.number): raise ValueError('Translation must be specified as numeric numpy array') t = translation.squeeze() if len(t.shape) != 1 or t.shape[0] != 3: raise ValueError('Translation must be specified as a 3-vector, 3x1 ndarray, or 1x3 ndarray')
def check(self,df): if self.objective == "regression" or self.objective == "classification": if self.input_type == "text": if not self.text_field: raise Exception("Please specify a text field") else: if not self.target: raise Exception("Please specify a target field") if len(self.fields) == 0: raise Exception("Please specify at least one predictor field") numericTarget = False if df[self.target].dtype == np.number: numericTarget = True if self.objective == "regression" and not numericTarget: raise Exception("Please use a numeric target field for the regression objective") if self.objective == "classification" and numericTarget: raise Exception("Please use a string target field for the classification objective") elif self.objective == "time_series": if not self.target: raise Exception("Please specify a target field") if not self.order_field: raise Exception("Please specify an index field") if df[self.target].dtype != np.number: raise Exception("Please use a numeric target field for the time series objective") else: if len(self.fields) == 0: raise Exception("Please specify at least one predictor field")
def process_columns(self, df): """Process columns before histogram filling Specifically, convert timestamp columns to integers and numeric variables are converted to indices :param df: input (pandas) data frame :returns: output (pandas) data frame with converted timestamp columns :rtype: pandas DataFrame """ # timestamp variables are converted to ns here # make temp df for value counting (used below) idf = df[self.str_cols].copy(deep=False) for col in self.dt_cols: self.log().debug('Converting column "%s" of type "%s" to nanosec', col, self.var_dtype[col]) idf[col] = df[col].apply(hf.to_ns) # numerical variables are converted to indices here for col in self.num_cols + self.dt_cols: self.log().debug('Converting column "%s" of type "%s" to index', col, self.var_dtype[col]) # find column specific bin_specs. if not found, use dict of default # values. dt = df[col].dtype is_number = isinstance(dt.type(), np.number) is_timestamp = isinstance(dt.type(), np.datetime64) sf = idf if is_timestamp else df bin_specs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs) idf[col] = sf[col].apply(hf.value_to_bin_index, **bin_specs) return idf
def bioenv(output_dir: str, distance_matrix: skbio.DistanceMatrix, metadata: qiime2.Metadata) -> None: # convert metadata to numeric values where applicable, drop the non-numeric # values, and then drop samples that contain NaNs df = metadata.to_dataframe() df = df.apply(lambda x: pd.to_numeric(x, errors='ignore')) # filter categorical columns pre_filtered_cols = set(df.columns) df = df.select_dtypes([numpy.number]).dropna() filtered_categorical_cols = pre_filtered_cols - set(df.columns) # filter 0 variance numerical columns pre_filtered_cols = set(df.columns) df = df.loc[:, df.var() != 0] filtered_zero_variance_cols = pre_filtered_cols - set(df.columns) # filter the distance matrix to exclude samples that were dropped from # the metadata, and keep track of how many samples survived the filtering # so that information can be presented to the user. initial_dm_length = distance_matrix.shape[0] distance_matrix = distance_matrix.filter(df.index, strict=False) filtered_dm_length = distance_matrix.shape[0] result = skbio.stats.distance.bioenv(distance_matrix, df) result = q2templates.df_to_html(result) index = os.path.join(TEMPLATES, 'bioenv_assets', 'index.html') q2templates.render(index, output_dir, context={ 'initial_dm_length': initial_dm_length, 'filtered_dm_length': filtered_dm_length, 'filtered_categorical_cols': ', '.join(filtered_categorical_cols), 'filtered_zero_variance_cols': ', '.join(filtered_zero_variance_cols), 'result': result})
def sanitize(x: Any) -> Any: # pylint: disable=invalid-name,too-many-return-statements """ Sanitize turns PyTorch and Numpy types into basic Python types so they can be serialized into JSON. """ if isinstance(x, (str, float, int, bool)): # x is already serializable return x elif isinstance(x, torch.autograd.Variable): return sanitize(x.data) elif isinstance(x, torch._TensorBase): # pylint: disable=protected-access # tensor needs to be converted to a list (and moved to cpu if necessary) return x.cpu().tolist() elif isinstance(x, numpy.ndarray): # array needs to be converted to a list return x.tolist() elif isinstance(x, numpy.number): # NumPy numbers need to be converted to Python numbers return x.item() elif isinstance(x, dict): # Dicts need their values sanitized return {key: sanitize(value) for key, value in x.items()} elif isinstance(x, (list, tuple)): # Lists and Tuples need their values sanitized return [sanitize(x_i) for x_i in x] else: raise ValueError("cannot sanitize {} of type {}".format(x, type(x)))
def test_array_side_effect(self): # The second use of itemsize was throwing an exception because in # ctors.c, discover_itemsize was calling PyObject_Length without # checking the return code. This failed to get the length of the # number 2, and the exception hung around until something checked # PyErr_Occurred() and returned an error. assert_equal(np.dtype('S10').itemsize, 10) np.array([['abc', 2], ['long ', '0123456789']], dtype=np.string_) assert_equal(np.dtype('S10').itemsize, 10)
def test_simple(self): a = [[1, 2], [3, 4]] a_str = [[b'1', b'2'], [b'3', b'4']] modes = ['raise', 'wrap', 'clip'] indices = [-1, 4] index_arrays = [np.empty(0, dtype=np.intp), np.empty(tuple(), dtype=np.intp), np.empty((1, 1), dtype=np.intp)] real_indices = {'raise': {-1: 1, 4: IndexError}, 'wrap': {-1: 1, 4: 0}, 'clip': {-1: 0, 4: 1}} # Currently all types but object, use the same function generation. # So it should not be necessary to test all. However test also a non # refcounted struct on top of object. types = np.int, np.object, np.dtype([('', 'i', 2)]) for t in types: # ta works, even if the array may be odd if buffer interface is used ta = np.array(a if np.issubdtype(t, np.number) else a_str, dtype=t) tresult = list(ta.T.copy()) for index_array in index_arrays: if index_array.size != 0: tresult[0].shape = (2,) + index_array.shape tresult[1].shape = (2,) + index_array.shape for mode in modes: for index in indices: real_index = real_indices[mode][index] if real_index is IndexError and index_array.size != 0: index_array.put(0, index) assert_raises(IndexError, ta.take, index_array, mode=mode, axis=1) elif index_array.size != 0: index_array.put(0, index) res = ta.take(index_array, mode=mode, axis=1) assert_array_equal(res, tresult[real_index]) else: res = ta.take(index_array, mode=mode, axis=1) assert_(res.shape == (2,) + index_array.shape)
def _delegate_binop(self, other): # This emulates the logic in # multiarray/number.c:PyArray_GenericBinaryFunction if (not isinstance(other, np.ndarray) and not hasattr(other, "__numpy_ufunc__")): other_priority = getattr(other, "__array_priority__", -1000000) if self.__array_priority__ < other_priority: return True return False
def _convert_array(self, array): try: global np import numpy as np except ImportError as ex: raise ImportError('DataFrameClient requires Numpy, ' '"{ex}" problem importing'.format(ex=str(ex))) if self.ignore_nan: number_types = (int, float, np.number) condition = (all(isinstance(el, number_types) for el in array) and np.isnan(array)) return list(np.where(condition, None, array)) else: return list(array)
def maybe_format(item): """Pretty-format a string, integer, float, or percent Parameters ---------- item : pandas.Series A single-item series containing a .name attribute and a value in the first (0th) index """ value = item[0] if pd.isnull(value): return 'N/A' elif isinstance(value, str): return value elif 'percent' in item.name.lower(): return '{:.2f}%'.format(value) elif isinstance(value, pd.Timestamp): return str(np.datetime64(value, 'D')) elif (isinstance(value, float) # this must go before ints! or np.issubdtype(value, np.number)): if value >= 1e3: return locale.format("%d", int(value), grouping=True) else: return locale.format("%.3g", value, grouping=True) elif (isinstance(value, int) or np.issubdtype(value, np.integer)): return locale.format("%d", value, grouping=True) else: raise TypeError
def q(self): """The number of columns in the initial dataframe As opposed to `p` which is the number of columns in the indicator matrix of the initial dataframe. """ return self.initial_dataframe.shape[1]
def n_supplementary_rows(self): """The number of supplementary rows.""" return self.supplementary_rows.shape[0]