如果您想了解Pythonnumpy模块-array_split()实例源码和python中numpy模块的知识,那么本篇文章将是您的不二之选。我们将深入剖析Pythonnumpy模块-array_sp
如果您想了解Python numpy 模块-array_split() 实例源码和python中numpy模块的知识,那么本篇文章将是您的不二之选。我们将深入剖析Python numpy 模块-array_split() 实例源码的各个方面,并为您解答python中numpy模块的疑在这篇文章中,我们将为您介绍Python numpy 模块-array_split() 实例源码的相关知识,同时也会详细的解释python中numpy模块的运用方法,并给出实际的案例分析,希望能帮助到您!
本文目录一览:- Python numpy 模块-array_split() 实例源码(python中numpy模块)
- 'np.array_split()' 返回的块是否按大小降序排列?
- Jupyter 中的 Numpy 在打印时出错(Python 版本 3.8.8):TypeError: 'numpy.ndarray' object is not callable
- np.split()和np.array_split()
- numpy.random.random & numpy.ndarray.astype & numpy.arange
Python numpy 模块-array_split() 实例源码(python中numpy模块)
Python numpy 模块,array_split() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.array_split()。
- def create_agents(self, generator):
- """
- Given information on a set of countries and a generator function,
- generate the agents and assign the results to ``self.agents``.
- :type generator: DataFrame,str,int
- :param generator: A function which generates the agents.
- """
- self.generator = generator
- country_array = pd.concat([pd.Series([c] * k["Population"]) for c, k in self.df.iterrows()])
- country_array.index = range(len(country_array))
- # Garbage collect before creating new processes.
- gc.collect()
- self.agents = pd.concat(
- self.pool.imap(self._gen_agents,
- np.array_split(country_array, self.processes * self.splits))
- )
- self.agents.index = range(len(self.agents))
- def create_agents(self, self.processes * self.splits))
- )
- self.agents.index = range(len(self.agents))
- def test_latlon2pix_internals(pix_size_single, origin_point, is_flipped,
- num_chunks, chunk_position):
- img = make_image(pix_size_single,
- num_chunks, chunk_position)
- chunk_idx = img.chunk_idx
- res_x = img._full_res[0]
- res_y = img._full_res[1]
- pix_size = (img.pixsize_x, img.pixsize_y)
- origin = (img._start_lon, img._start_lat)
- # +0.5 for centre of pixels
- lons = (np.arange(res_x) + 0.5) * pix_size[0] + origin[0]
- all_lats = (np.arange(res_y) + 0.5) * pix_size[1] + origin[1]
- lats = np.array_split(all_lats, num_chunks)[chunk_idx]
- pix_x = np.arange(res_x)
- pix_y = np.arange(lats.shape[0])
- d = np.array([[a, b] for a in lons for b in lats])
- xy = img.lonlat2pix(d)
- true_xy = np.array([[a, b] for a in pix_x for b in pix_y])
- assert np.all(xy == true_xy)
- def test_pix2latlong(pix_size_single, img._start_lat)
- true_lons = np.arange(res_x) * pix_size[0] + origin[0]
- all_lats = np.arange(res_y) * pix_size[1] + origin[1]
- true_lats = np.array_split(all_lats, num_chunks)[chunk_idx]
- true_d = np.array([[a, b] for a in true_lons for b in true_lats])
- pix_x = np.arange(res_x)
- pix_y = np.arange(img.resolution[1]) # chunk resolution
- xy = np.array([[a, b] for a in pix_x for b in pix_y])
- lonlats = img.pix2lonlat(xy)
- assert np.all(lonlats == true_d)
- def transform(self, X):
- if self.tagger is None:
- raise ValueError("Must find_motifs before you can tag anything")
- logging.info("Tagging %s data with motifs using %d workers..." % (
- str(X.shape), self.n_jobs))
- if self.n_jobs > 1:
- pool = mp.ProcessingPool(self.n_jobs)
- splits = np.array_split(X, self.n_jobs)
- tag_lists = pool.map(self._tag_motifs, splits)
- tags = list(itertools.chain.from_iterable(tag_lists))
- else:
- tags = self._tag_motifs(X)
- logging.info("All motifs have been tagged")
- return self._sparsify_tags(tags)
- def subset_iterator(X, m, repeats=1):
- ''''''
- Iterates over array X in chunks of m,repeat number of times.
- Each time the order of the repeat is randomly generated.
- ''''''
- N, dim = X.shape
- progress = tqdm(total=repeats * int(N / m))
- for i in range(repeats):
- indices = np.random.permutation(N)
- for idx in np.array_split(indices, N // m):
- yield X[idx][:]
- progress.update()
- progress.close()
- def _split_into_groups(y, num_groups):
- groups = [[] for _ in range(num_groups)]
- group_index = 0
- for cls in set(y):
- this_cls_indices = np.where(y == cls)[0]
- num_cls_samples = len(this_cls_indices)
- num_cls_split_groups = ceil(num_cls_samples / 500)
- split = np.array_split(this_cls_indices, num_cls_split_groups)
- for cls_group in split:
- groups[group_index] = np.hstack((groups[group_index], cls_group))
- group_index = (group_index + 1) % num_groups
- return groups
- def get_embedding_X(img):
- ''''''
- Args : Numpy Images vector
- Returns : Embedded Matrix of length Samples,4096
- ''''''
- img = img.reshape((img.shape[0], img.shape[1], img.shape[2], 1))
- sess = tf.Session()
- imgs = tf.placeholder(tf.float32, [None, None, None])
- vgg = vgg16(imgs, ''/tmp/vgg16_weights.npz'', sess)
- embs = []
- cnt = 0
- for img_batch in np.array_split(img, img.shape[0] / 1000):
- emb = sess.run(vgg.emb, Feed_dict={vgg.imgs: img_batch})
- embs.extend(emb)
- cnt += 1
- progress = round(100 * (cnt * 1000 / img.shape[0]),2)
- if(progress%10 == 0):
- print progress
- embs = np.array(embs)
- print embs.shape
- embs = np.reshape(embs,(embs.shape[0],embs.shape[1] * embs.shape[2] * embs.shape[3]))
- return embs
- def __init__(self, pobj, just_list = False, attr=''_grids'',
- round_robin=False):
- ObjectIterator.__init__(self, just_list, attr=attr)
- # pobj has to be a ParallelAnalysisInterface,so it must have a .comm
- # object.
- self._offset = pobj.comm.rank
- self._skip = pobj.comm.size
- # Note that we''re doing this in advance,and with a simple means
- # of choosing them; more advanced methods will be explored later.
- if self._use_all:
- self.my_obj_ids = np.arange(len(self._objs))
- else:
- if not round_robin:
- self.my_obj_ids = np.array_split(
- np.arange(len(self._objs)), self._skip)[self._offset]
- else:
- self.my_obj_ids = np.arange(len(self._objs))[self._offset::self._skip]
- def iter_combinatorial_pairs(queue, num_examples, batch_size, interval,
- num_classes, augment_positive=False):
- num_examples_per_class = num_examples // num_classes
- pairs = np.array(list(itertools.combinations(range(num_examples), 2)))
- if augment_positive:
- additional_positive_pairs = make_positive_pairs(
- num_classes, num_examples_per_class, num_classes - 1)
- pairs = np.concatenate((pairs, additional_positive_pairs))
- num_pairs = len(pairs)
- num_batches = num_pairs // batch_size
- perm = np.random.permutation(num_pairs)
- for i, batch_indexes in enumerate(np.array_split(perm, num_batches)):
- if i % interval == 0:
- x, c = queue.get()
- x = x.astype(np.float32) / 255.0
- c = c.ravel()
- indexes0, indexes1 = pairs[batch_indexes].T
- x0, x1, c0, c1 = x[indexes0], x[indexes1], c[indexes0], c[indexes1]
- t = np.int32(c0 == c1) # 1 if x0 and x1 are same class,0 otherwise
- yield x0, t
- def get_epoch_indexes(self):
- B = self.batch_size
- K = self.num_classes
- M = self.num_per_class
- N = K * M # number of total examples
- num_batches = M * int(K // B) # number of batches per epoch
- indexes = np.arange(N, dtype=np.int32).reshape(K, M)
- epoch_indexes = []
- for m in range(M):
- perm = np.random.permutation(K)
- c_batches = np.array_split(perm, num_batches // M)
- for c_batch in c_batches:
- b = len(c_batch) # actual number of examples of this batch
- indexes_anchor = M * c_batch + m
- positive_candidates = np.delete(indexes[c_batch], axis=1)
- indexes_positive = positive_candidates[
- range(b), np.random.choice(M - 1, size=b)]
- epoch_indexes.append((indexes_anchor, indexes_positive))
- return epoch_indexes
- def pre_processing(self):
- """Provide same API as Model,we split data to K folds here.
- """
- if self.random:
- mask = np.random.permutation(self.train_x.shape[0])
- train_x = self.train_x[mask]
- train_y = self.train_y[mask]
- else:
- train_x = self.train_x[:]
- train_y = self.train_y[:]
- if self.select_train_method == ''step'':
- self.x_folds = [train_x[i::self.k_folds] for i in range(0, self.k_folds)]
- self.y_folds = [train_y[i::self.k_folds] for i in range(0, self.k_folds)]
- else:
- self.x_folds = np.array_split(train_x, self.k_folds)
- self.y_folds = np.array_split(train_y, self.k_folds)
- # for i in range(self.k_folds):
- # self.x_folds[i] = self.train_x[0] + self.x_folds[i] + self.train_x[-1]
- # self.y_folds[i] = self.train_y[0] + self.y_folds[i] + self.train_y[-1]
- def Train(self, C, A, Y, SF):
- ''''''
- Train the classifier using the sample matrix A and target matrix Y
- ''''''
- C.fit(A, Y)
- YH = np.zeros(Y.shape, dtype = np.object)
- for i in np.array_split(np.arange(A.shape[0]), 32): #Split up verification into chunks to prevent out of memory
- YH[i] = C.predict(A[i])
- s1 = SF(Y, YH)
- print(''All:{:8.6f}''.format(s1))
- ''''''
- ss = ShuffleSplit(random_state = 1151) #Use fixed state for so training can be repeated later
- trn,tst = next(ss.split(A,Y)) #Make train/test split
- mi = [8] * 1 #Maximum number of iterations at each iter
- YH = np.zeros((A.shape[0]),dtype = np.object)
- for mic in mi: #Chunk size to split dataset for CV results
- #C.SetMaxIter(mic) #Set the maximum number of iterations to run
- #C.fit(A[trn],Y[trn]) #Perform training iterations
- ''''''
- def add_point(self, t, alt, az):
- self.window.append((t, az))
- if self._current_window_size() < self.window_duration:
- return
- points = np.array(self.window)
- steady, current = np.array_split(points, 2)
- _, steady_cube = self.create_cube(steady)
- timestamps, current_cube = self.create_cube(current)
- t = self.denoise_and_compare_cubes(steady_cube, current_cube)
- self.trigger_criterion.append(list(t))
- self.trigger_criterion_timestamps.append(list(timestamps))
- has_triggered = self.check_trigger(t)
- new_duration = self.window_duration - self.step
- self._reduce_to_duration(new_duration)
- def predict(self):
- if os.path.exists(DATA_QUERIES_VECTOR_NPZ) and not FORCE_LOAD:
- print(''{}: loading precomputed data''.format(self.__class__.__name__))
- self.load_precomputed_data()
- else:
- self.precomputed_similarity()
- batch_size = 100
- batch_elements = math.ceil(self.queries_vector.shape[0] / batch_size)
- batch_queue = np.array_split(self.queries_vector.A, batch_elements)
- print("starting batch computation of Similarity and KNN calculation")
- # # multiple versions of calculating the prediction,some faster,some use more mem
- # prediction = self.multiprocessor_batch_calc(batch_queue)
- prediction = self.batch_calculation(batch_queue)
- # prediction = self.individual_calculation()
- # prediction = self.cosine_knn_calc()
- # prediction = self.custom_knn_calculation(prediction)
- train_avg_salary = sum(self.y_train) / len(self.y_train)
- cleaned_predictions = [x if str(x) != ''nan'' else train_avg_salary for x in prediction]
- return self.y_train, cleaned_predictions
- def load_test_data(self):
- # Remove non-mat files,and perform ascending sort
- allfiles = os.listdir(self.data_dir)
- npzfiles = []
- for idx, f in enumerate(allfiles):
- if ".npz" in f:
- npzfiles.append(os.path.join(self.data_dir, f))
- npzfiles.sort()
- # Files for validation sets
- val_files = np.array_split(npzfiles, self.n_folds)
- val_files = val_files[self.fold_idx]
- print "\\n========== [Fold-{}] ==========\\n".format(self.fold_idx)
- print "Load validation set:"
- data_val, label_val = self._load_npz_list_files(val_files)
- return data_val, label_val
- def __init__(self, X, kern, Xm):
- super(PITC, self).__init__("PITC")
- M = np.shape(Xm)[0]
- self.M = M
- start = time.time()
- X_split = np.array_split(X, M)
- self.kern = kern
- kern_blocks = np.zeros((M),dtype=object)
- for t in xrange(M):
- nyst = Nystrom(X_split[t], Xm, False)
- size = np.shape(X_split[t])[0]
- kern_blocks[t] = kern.K(X_split[t], X_split[t]) - nyst.precon + (kern.noise)*np.identity(size)
- self.blocks = kern_blocks
- blocked = block_diag(*kern_blocks)
- self.nyst = Nystrom(X, False)
- self.precon = self.nyst.precon + blocked
- self.duration = time.time() - start
- def __init__(self, False)
- self.precon = self.nyst.precon + blocked
- self.duration = time.time() - start
- def _read_image_as_array(path, dtype, load_size, crop_size, flip):
- f = Image.open(path)
- A, B = numpy.array_split(numpy.asarray(f), 2, axis=1)
- if hasattr(f, ''close''):
- f.close()
- A = _resize(A, Image.BILINEAR, dtype)
- B = _resize(B, Image.NEAREST, dtype)
- sx, sy = numpy.random.randint(0, load_size-crop_size, 2)
- A = _crop(A, sx, sy, crop_size)
- B = _crop(B, crop_size)
- if flip and numpy.random.rand() > 0.5:
- A = numpy.fliplr(A)
- B = numpy.fliplr(B)
- return A.transpose(2, 0, 1), B.transpose(2, 1)
- def setup_figure():
- f = plt.figure(figsize=(7, 5))
- mat_grid = plt.GridSpec(2, 6, .07, .52, .98, .95, .15, .20)
- mat_axes = [f.add_subplot(spec) for spec in mat_grid]
- sticks_axes, rest_axes = np.array_split(mat_axes, 2)
- scatter_grid = plt.GridSpec(1, .30, .49, .05)
- scatter_axes = [f.add_subplot(spec) for spec in scatter_grid]
- kde_grid = plt.GridSpec(1, .21, .05)
- kde_axes = [f.add_subplot(spec) for spec in kde_grid]
- cbar_ax = f.add_axes([.04, .62, .015, .26])
- return f, sticks_axes, rest_axes, scatter_axes, kde_axes, cbar_ax
- def partitions(min_val, max_val, n):
- """
- Get start/stop boundaries for N partitions.
- Args:
- min_val (int): The starting value.
- max_val (int): The last value.
- n (int): The number of partitions.
- """
- pts = np.array_split(np.arange(min_val, max_val+1), n)
- bounds = []
- for pt in pts:
- bounds.append((int(pt[0]), int(pt[-1])))
- return bounds
- def fit(self, y):
- """Fit a series of independent estimators to the dataset.
- Parameters
- ----------
- X : array,shape (n_samples,n_features,n_estimators)
- The training input samples. For each data slice,a clone estimator
- is fitted independently.
- y : array,)
- The target values.
- Returns
- -------
- self : object
- Return self.
- """
- self._check_Xy(X, y)
- self.estimators_ = list()
- # For fitting,the parallelization is across estimators.
- parallel, p_func, n_jobs = parallel_func(_sl_fit, self.n_jobs)
- estimators = parallel(
- p_func(self.base_estimator, split, y)
- for split in np.array_split(X, n_jobs, axis=-1))
- self.estimators_ = np.concatenate(estimators, 0)
- return self
- def _transform(self, method):
- """Aux. function to make parallel predictions/transformation."""
- self._check_Xy(X)
- method = _check_method(self.base_estimator, method)
- if X.shape[-1] != len(self.estimators_):
- raise ValueError(''The number of estimators does not match ''
- ''X.shape[2]'')
- # For predictions/transforms the parallelization is across the data and
- # not across the estimators to avoid memory load.
- parallel, n_jobs = parallel_func(_sl_transform, self.n_jobs)
- X_splits = np.array_split(X, axis=-1)
- est_splits = np.array_split(self.estimators_, n_jobs)
- y_pred = parallel(p_func(est, x, method)
- for (est, x) in zip(est_splits, X_splits))
- if n_jobs > 1:
- y_pred = np.concatenate(y_pred, axis=1)
- else:
- y_pred = y_pred[0]
- return y_pred
- def _yield_minibatches_idx(self, n_batches, data_ary, shuffle=True):
- indices = np.arange(data_ary.shape[0])
- if shuffle:
- indices = np.random.permutation(indices)
- if n_batches > 1:
- remainder = data_ary.shape[0] % n_batches
- if remainder:
- minis = np.array_split(indices[:-remainder], n_batches)
- minis[-1] = np.concatenate((minis[-1],
- indices[-remainder:]),
- axis=0)
- else:
- minis = np.array_split(indices, n_batches)
- else:
- minis = (indices,)
- for idx_batch in minis:
- yield idx_batch
- def test_mini_batch_k_means_random_init_partial_fit():
- km = MiniBatchKMeans(n_clusters=n_clusters, init="random", random_state=42)
- # use the partial_fit API for online learning
- for X_minibatch in np.array_split(X, 10):
- km.partial_fit(X_minibatch)
- # compute the labeling on the complete dataset
- labels = km.predict(X)
- assert_equal(v_measure_score(true_labels, labels), 1.0)
- def binned_batch_stream(target_statistics, n_bins=64):
- hist, bins = np.histogram(target_statistics, bins=n_bins)
- indx = np.argsort(target_statistics)
- indicies_categories = np.array_split(indx, np.cumsum(hist)[:-1])
- per_category = batch_size / n_bins
- weight_correction = (np.float64(hist) / per_category).astype(''float32'')
- wc = np.repeat(weight_correction, per_category)
- for i in xrange(n_batches):
- sample = [
- np.random.choice(ind, size=per_category, replace=True)
- for ind in indicies_categories
- ]
- yield np.hstack(sample), wc
- def binned_batch_stream(target_statistics, n_bins=64):
- hist, bins=n_bins)
- indx = np.argsort(target_statistics)
- indicies_categories = np.array_split(indx, np.cumsum(hist)[:-1])
- n_samples = target_statistics.shape[0]
- per_category = batch_size / n_bins
- weight_correction = (n_bins * np.float64(hist) / n_samples).astype(''float32'')
- wc = np.repeat(weight_correction, per_category)
- for i in xrange(n_batches):
- sample = [
- np.random.choice(ind, replace=True)
- for ind in indicies_categories
- ]
- yield np.hstack(sample), wc
- def test_shape_factors(self):
- """
- Tests for :func:`array_split.split.shape_factors`.
- """
- f = shape_factors(4, 2)
- self.assertTrue(_np.all(f == 2))
- f = shape_factors(4, 1)
- self.assertTrue(_np.all(f == 4))
- f = shape_factors(5, 2)
- self.assertTrue(_np.all(f == [1, 5]))
- f = shape_factors(6, 2)
- self.assertTrue(_np.all(f == [2, 3]))
- f = shape_factors(6, 3)
- self.assertTrue(_np.all(f == [1, 3]))
- def scale(Boxlist, y_scale, x_scale):
- """Scale Box coordinates in x and y dimensions.
- Args:
- Boxlist: BoxList holding N Boxes
- y_scale: float
- x_scale: float
- Returns:
- Boxlist: BoxList holding N Boxes
- """
- y_min, x_min, y_max, x_max = np.array_split(Boxlist.get(), 4, axis=1)
- y_min = y_scale * y_min
- y_max = y_scale * y_max
- x_min = x_scale * x_min
- x_max = x_scale * x_max
- scaled_Boxlist = np_Box_list.BoxList(np.hstack([y_min, x_max]))
- fields = Boxlist.get_extra_fields()
- for field in fields:
- extra_field_data = Boxlist.get_field(field)
- scaled_Boxlist.add_field(field, extra_field_data)
- return scaled_Boxlist
- def iterbatches(arrays, num_batches=None, batch_size=None, shuffle=True, include_final_partial_batch=True):
- assert (num_batches is None) != (batch_size is None), ''Provide num_batches or batch_size,but not both''
- arrays = tuple(map(np.asarray, arrays))
- n = arrays[0].shape[0]
- assert all(a.shape[0] == n for a in arrays[1:])
- inds = np.arange(n)
- if shuffle: np.random.shuffle(inds)
- sections = np.arange(0, n, batch_size)[1:] if num_batches is None else num_batches
- for batch_inds in np.array_split(inds, sections):
- if include_final_partial_batch or len(batch_inds) == batch_size:
- yield tuple(a[batch_inds] for a in arrays)
- def _gen_init_n_blocks(na, nb, ka, kb):
- num_nodes_a = np.arange(na)
- n_blocks_a = map(len, np.array_split(num_nodes_a, ka))
- num_nodes_b = np.arange(nb)
- n_blocks_b = map(len, np.array_split(num_nodes_b, kb))
- n_blocks_ = " ".join(map(str, n_blocks_a)) + " " + " ".join(map(str, n_blocks_b))
- return n_blocks_
- def gen_equal_partition(n, total):
- all_nodes = np.arange(total)
- n_blocks = list(map(len, np.array_split(all_nodes, n)))
- return n_blocks
- def run_par(self, function, **kwargs):
- """
- Run a function on the agents in parallel.
- """
- columns = kwargs["columns"] if "columns" in kwargs else self.agents.columns
- # Garbage collect before creating new processes.
- gc.collect()
- return pd.concat(self.pool.imap(partial(function, **kwargs),
- np.array_split(self.agents[columns],
- self.processes * self.splits)))
- def run_par(self,
- self.processes * self.splits)))
- def split_in_chunks(minibatch, num_splits, flatten_keys=[''labels'']):
- ''''''Return the splits per device
- Return a list of dictionaries,one per device. Each dictionary
- contains,for each key,the values that should be allocated on its
- device.
- ''''''
- # Split the value of each key into chunks
- for k, v in minibatch.iteritems():
- minibatch[k] = np.array_split(v, num_splits)
- if any(k == v for v in flatten_keys):
- minibatch[k] = [el.flatten() for el in minibatch[k]]
- return map(dict, zip(*[[(k, v) for v in value]
- for k, value in minibatch.items()]))
- def chunk_iterator(dataset, chunk_size=1000):
- chunk_indices = np.array_split(np.arange(len(dataset)),
- len(dataset)/chunk_size)
- for chunk_ixs in chunk_indices:
- chunk = dataset[chunk_ixs]
- yield (chunk_ixs, chunk)
- raise stopiteration
- def array_split(ary, indices_or_sections, axis=0):
- """Splits an array into multiple sub arrays along a given axis.
- This function is almost equivalent to :func:`cupy.split`. The only
- difference is that this function allows an integer sections that does not
- evenly divide the axis.
- .. seealso:: :func:`cupy.split` for more detail,:func:`numpy.array_split`
- """
- return core.array_split(ary, axis)
- def split(ary, axis=0):
- """Splits an array into multiple sub arrays along a given axis.
- Args:
- ary (cupy.ndarray): Array to split.
- indices_or_sections (int or sequence of ints): A value indicating how
- to divide the axis. If it is an integer,then is treated as the
- number of sections,and the axis is evenly divided. Otherwise,
- the integers indicate indices to split at. Note that the sequence
- on the device memory is not allowed.
- axis (int): Axis along which the array is split.
- Returns:
- A list of sub arrays. Each array is a view of the corresponding input
- array.
- .. seealso:: :func:`numpy.split`
- """
- if ary.ndim <= axis:
- raise IndexError(''Axis exceeds ndim'')
- size = ary.shape[axis]
- if numpy.isscalar(indices_or_sections):
- if size % indices_or_sections != 0:
- raise ValueError(
- ''indices_or_sections must divide the size along the axes.\\n''
- ''If you want to split the array into non-equally-sized ''
- ''arrays,use array_split instead.'')
- return array_split(ary, axis)
- def iterbatches(arrays, *, sections):
- if include_final_partial_batch or len(batch_inds) == batch_size:
- yield tuple(a[batch_inds] for a in arrays)
- def trim_data(data, resolution):
- r = []
- for i in numpy.array_split(data, resolution):
- if len(i) > 0:
- r.append(numpy.average(i))
- return r
- def test_latlon2pix_edges(pix_size_single,
- num_chunks, img._start_lat)
- # compute chunks
- lons = np.arange(res_x + 1) * pix_size[0] + origin[0] # right edge +1
- all_lats = np.arange(res_y) * pix_size[1] + origin[1]
- lats_chunks = np.array_split(all_lats, num_chunks)[chunk_idx]
- pix_x = np.concatenate((np.arange(res_x), [res_x - 1]))
- pix_y_chunks = range(lats_chunks.shape[0])
- if chunk_position == ''end'':
- pix_y = np.concatenate((pix_y_chunks, [pix_y_chunks[-1]]))
- lats = np.concatenate((lats_chunks, [res_y * pix_size[1] + origin[1]]))
- else:
- pix_y = pix_y_chunks
- lats = lats_chunks
- d = np.array([[a, b] for a in pix_x for b in pix_y])
- assert np.all(xy == true_xy)
- def split_cfold(nsamples, k=5, seed=None):
- """
- Function that returns indices for splitting data into random folds.
- Parameters
- ----------
- nsamples: int
- the number of samples in the dataset
- k: int,optional
- the number of folds
- seed: int,optional
- random seed to provide to numpy
- Returns
- -------
- cvinds: list
- list of arrays of length k,each with approximate shape (nsamples /
- k,) of indices. These indices are randomly permuted (without
- replacement) of assignments to each fold.
- cvassigns: ndarray
- array of shape (nsamples,) with each element in [0,k),that can be
- used to assign data to a fold. This corresponds to the indices of
- cvinds.
- """
- np.random.seed(seed)
- pindeces = np.random.permutation(nsamples)
- cvinds = np.array_split(pindeces, k)
- cvassigns = np.zeros(nsamples, dtype=int)
- for n, inds in enumerate(cvinds):
- cvassigns[inds] = n
- return cvinds, cvassigns
- def fit(self, y, *args, **kwargs):
- # set a different random seed for each thread
- np.random.seed(self.random_state + mpiops.chunk_index)
- if self.parallel:
- process_rfs = np.array_split(range(self.forests),
- mpiops.chunks)[mpiops.chunk_index]
- else:
- process_rfs = range(self.forests)
- for t in process_rfs:
- print(''training forest {} using ''
- ''process {}''.format(t, mpiops.chunk_index))
- # change random state in each forest
- self.kwargs[''random_state''] = np.random.randint(0, 10000)
- rf = RandomForestTransformed(
- target_transform=self.target_transform,
- n_estimators=self.n_estimators,
- **self.kwargs
- )
- rf.fit(x, y)
- if self.parallel: # used in training
- pk_f = join(self.temp_dir,
- ''rf_model_{}.pk''.format(t))
- else: # used when parallel is false,i.e.,during x-val
- pk_f = join(self.temp_dir,
- ''rf_model_{}_{}.pk''.format(t, mpiops.chunk_index))
- with open(pk_f, ''wb'') as fp:
- pickle.dump(rf, fp)
- if self.parallel:
- mpiops.comm.barrier()
- # Mark that we are Now trained
- self._trained = True
- def kmean_distance2(x, C):
- """Compute squared euclidian distance to the nearest cluster centre
- Parameters
- ----------
- x : ndarray
- (n,d) array of n d-dimensional points
- C : ndarray
- (k,d) array of k cluster centres
- Returns
- -------
- d2_x : ndarray
- (n,) length array of distances from each x to the nearest centre
- """
- # To save memory we partition the computation
- nsplits = max(1, int(x.shape[0]/distance_partition_size))
- splits = np.array_split(x, nsplits)
- d2_x = np.empty(x.shape[0])
- idx = 0
- for x_i in splits:
- n_i = x_i.shape[0]
- D2_x = scipy.spatial.distance.cdist(x_i, metric=''sqeuclidean'')
- d2_x[idx:idx + n_i] = np.amin(D2_x, axis=1)
- idx += n_i
- return d2_x
- def compute_weights(x, C):
- """Number of points in x assigned to each centre c in C
- Parameters
- ----------
- x : ndarray
- (n,d) array of k cluster centres
- Returns
- -------
- weights : ndarray
- (k,) length array giving number of x closest to each c in C
- """
- nsplits = max(1, nsplits)
- closests = np.empty(x.shape[0], dtype=int)
- idx = 0
- for x_i in splits:
- n_i = x_i.shape[0]
- D2_x = scipy.spatial.distance.cdist(x_i, metric=''sqeuclidean'')
- closests[idx: idx+n_i] = np.argmin(D2_x, axis=1)
- idx += n_i
- weights = np.bincount(closests, minlength=C.shape[0])
- return weights
- def reseed_point(X, index):
- """ Re-initialise the centre of a class if it loses all its members
- This should almost never happen. If it does,find the point furthest
- from all the other cluster centres and use that. Maybe a bad idea but
- a decent first pass
- Parameters
- ----------
- X : ndarray
- (n,d) array of points
- C : ndarray
- (k,d) array of cluster centres
- index : int >= 0
- index between 0..k-1 of the cluster that has lost it''s points
- Returns
- -------
- new_point : ndarray
- d-dimensional point for replacing the empty cluster centre.
- """
- log.info("Reseeding class with no members")
- nsplits = max(1, int(X.shape[0]/distance_partition_size))
- splits = np.array_split(X, nsplits)
- empty_index = np.ones(C.shape[0], dtype=bool)
- empty_index[index] = False
- local_candidate = None
- local_cost = 1e23
- for x_i in splits:
- D2_x = scipy.spatial.distance.cdist(x_i, metric=''sqeuclidean'')
- costs = np.sum(D2_x[:, empty_index], axis=1)
- potential_idx = np.argmax(costs)
- potential_cost = costs[potential_idx]
- if potential_cost < local_cost:
- local_candidate = x_i[potential_idx]
- local_cost = potential_cost
- best_pernode = mpiops.comm.allgather(local_cost)
- best_node = np.argmax(best_pernode)
- new_point = mpiops.comm.bcast(local_candidate, root=best_node)
- return new_point
- def __init__(self, shape, bBox, crs, name, n_subchunks, outputdir,
- band_tags=None):
- # affine
- self.A, _, _ = image.bBox2affine(bBox[1, 0], bBox[0,
- bBox[0, 1], bBox[1,
- shape[0], shape[1])
- self.shape = shape
- self.outbands = len(band_tags)
- self.bBox = bBox
- self.name = name
- self.outputdir = outputdir
- self.n_subchunks = n_subchunks
- self.sub_starts = [k[0] for k in np.array_split(
- np.arange(self.shape[1]),
- mpiops.chunks * self.n_subchunks)]
- # file tags don''t have spaces
- if band_tags:
- file_tags = ["_".join(k.lower().split()) for k in band_tags]
- else:
- file_tags = [str(k) for k in range(self.outbands)]
- band_tags = file_tags
- if mpiops.chunk_index == 0:
- # create a file for each band
- self.files = []
- for band in range(self.outbands):
- output_filename = os.path.join(outputdir, name + "_" +
- file_tags[band] + ".tif")
- f = Rasterio.open(output_filename, ''w'', driver=''GTiff'',
- width=self.shape[0], height=self.shape[1],
- dtype=np.float32, count=1,
- crs=crs,
- transform=self.A,
- nodata=self.nodata_value)
- f.update_tags(1, image_type=band_tags[band])
- self.files.append(f)
- def gdalaverage(input_dir, out_dir, size):
- """
- average data using gdal''s averaging method.
- Parameters
- ----------
- input_dir: str
- input dir name of the tifs that needs to be averaged
- out_dir: str
- output dir name
- size: int,optional
- size of kernel
- Returns
- -------
- """
- input_dir = abspath(input_dir)
- log.info(''Reading tifs from {}''.format(input_dir))
- tifs = glob.glob(join(input_dir, ''*.tif''))
- process_tifs = np.array_split(tifs, mpiops.chunks)[mpiops.chunk_index]
- for tif in process_tifs:
- data_set = gdal.Open(tif, gdal.GA_ReadOnly)
- # band = data_set.GetRasterBand(1)
- # data_type = gdal.GetDataTypeName(band.DataType)
- # data = band.ReadAsArray()
- # no_data_val = band.GetNoDataValue()
- # averaged_data = filter_data(data,size,no_data_val)
- log.info(''Calculated average for {}''.format(basename(tif)))
- output_file = join(out_dir, ''average_'' + basename(tif))
- src_gt = data_set.GetGeoTransform()
- tmp_file = ''/tmp/tmp_{}.tif''.format(mpiops.chunk_index)
- resample_cmd = [TRANSLATE] + [tif, tmp_file] + \\
- [''-tr'', str(src_gt[1]*size), str(src_gt[1]*size)] + \\
- [''-r'', ''bilinear'']
- check_call(resample_cmd)
- rollback_cmd = [TRANSLATE] + [tmp_file, output_file] + \\
- [''-tr'', str(src_gt[1]), str(src_gt[1])]
- check_call(rollback_cmd)
- log.info(''Finished converting {}''.format(basename(tif)))
- def mean(input_dir, size, func, partitions, mask):
- input_dir = abspath(input_dir)
- if isdir(input_dir):
- log.info(''Reading tifs from {}''.format(input_dir))
- tifs = glob.glob(join(input_dir, ''*.tif''))
- else:
- assert isfile(input_dir)
- tifs = [input_dir]
- process_tifs = np.array_split(tifs, mpiops.chunks)[mpiops.chunk_index]
- for tif in process_tifs:
- log.info(''Starting to average {}''.format(basename(tif)))
- treat_file(tif, mask)
- log.info(''Finished averaging {}''.format(basename(tif)))
- def inspect(input_dir, report_file, extension):
- input_dir = abspath(input_dir)
- if isdir(input_dir):
- log.info(''Reading tifs from {}''.format(input_dir))
- tifs = glob.glob(join(input_dir, ''*.'' + extension))
- else:
- log.info(''Reporting geoinfo for {}''.format(input_dir))
- tifs = [input_dir]
- with open(report_file, newline='''') as csvfile:
- writer = csv.writer(csvfile, dialect=''excel'')
- writer.writerow([''FineName'', ''band'', ''NoDataValue'', ''rows'', ''cols'',
- ''Min'', ''Max'', ''Mean'', ''Std'',
- ''DataType'', ''Categories'', ''NanCount''])
- process_tifs = np.array_split(tifs, mpiops.chunks)[mpiops.chunk_index]
- stats = [] # process geotiff stats including multibanded geotif
- for t in process_tifs:
- stats.append(get_stats(t, partitions))
- # gather all process geotif stats in stats dict
- stats = _join_dicts(stats)
- # global gather in root
- stats = _join_dicts(mpiops.comm.gather(stats, root=0))
- if mpiops.chunk_index == 0:
- for k, v in stats.items():
- write_rows(v, writer)
'np.array_split()' 返回的块是否按大小降序排列?
如何解决''np.array_split()'' 返回的块是否按大小降序排列?
在使用整数的 numpy.array_split
中,当部分的数量不是所考虑的轴上的大小的除数时,某些部分可能会更小或更大,例如
import numpy as np
[chunk.shape[0] for chunk in np.array_split(np.arange(12),5)]
返回块大小:[3,3,2,2]
虽然 the documentation 没有提到它,但看起来最小的块在列表的末尾。并尝试获取样本证实,对于最多 200 个元素的数组,无论所需的块数是多少。
import numpy as np
not_ordered = 0
for sample_size in np.arange(2,200):
a = np.arange(sample_size)
for n in np.arange(2,sample_size//2):
chunks = np.array_split(a,n)
sizes = [chunk.shape[0] for chunk in chunks]
for i in np.arange(1,len(sizes)):
if sizes[i] > sizes[i-1]:
not_ordered += 1
break
print (f''Not ordered: {not_ordered}'')
函数背后的算法是否保证了降序?或者这是在使用返回的结果时不能指望的东西?
解决方法
numpy.array_split 文档说
对于一个长度为 l 的数组,它应该被分成 n 个部分,它 返回 l % n 个大小为 l//n + 1 和其余大小为 l//n 的子数组。
由于 l
和 n
在每次运行中都是固定的,我们可以得出结论,对于返回数组中的每个元素,下一个元素将不会超过当前元素。
编辑:如有疑问,因为这是python
,我们可以read the code。如果 indices_or_sections
是整数相关部分,那么您很感兴趣:
Nsections = int(indices_or_sections)
if Nsections <= 0:
raise ValueError(''number sections must be larger than 0.'')
Neach_section,extras = divmod(Ntotal,Nsections)
section_sizes = ([0] +
extras * [Neach_section+1] +
(Nsections-extras) * [Neach_section])
div_points = _nx.array(section_sizes,dtype=_nx.intp).cumsum()
其中 Ntotal
是输入数组中的元素数。如您所见,有 Neach_section+1
后跟 Neach_sections
。
Jupyter 中的 Numpy 在打印时出错(Python 版本 3.8.8):TypeError: 'numpy.ndarray' object is not callable
如何解决Jupyter 中的 Numpy 在打印时出错(Python 版本 3.8.8):TypeError: ''numpy.ndarray'' object is not callable?
晚安, 尝试打印以下内容时,我在 jupyter 中遇到了 numpy 问题,并且得到了一个 错误: 需要注意的是python版本是3.8.8。 我先用 spyder 测试它,它运行正确,它给了我预期的结果
使用 Spyder:
import numpy as np
for i in range (5):
n = np.random.rand ()
print (n)
Results
0.6604903457995978
0.8236300859753154
0.16067650689842816
0.6967868357083673
0.4231597934445466
现在有了 jupyter
import numpy as np
for i in range (5):
n = np.random.rand ()
print (n)
-------------------------------------------------- ------
TypeError Traceback (most recent call last)
<ipython-input-78-0c6a801b3ea9> in <module>
2 for i in range (5):
3 n = np.random.rand ()
----> 4 print (n)
TypeError: ''numpy.ndarray'' object is not callable
感谢您对我如何在 Jupyter 中解决此问题的帮助。
非常感谢您抽出宝贵时间。
阿特,约翰”
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)
np.split()和np.array_split()
来自:爱抠脚的coder
np.split():
该函数的参数要么按照数字划分(int),要么是按列表list划分:如果仅是输入一个int类型的数字,你的数组必须是均等的分割,否则会报错。
np.array_split():
array_split()可以进行不均等划分。
按列表中的数字,在3,5,6,10位置处分割。
一旦不均等就会报错:
x = np.arange(8)
y = np.split(x, 3)
print(y)
报错为:
ValueError: array split does not result in an equal division
不均等划分:
对于长度为l的数组,分割成n个部分,它返回l % n个大小为(l // n) + 1的子数组,以及其他大小为(l // n)的子数组。
25对7取余是4,所以返回4个大小为(25//7)+1的子数组,3个大小为(25//7)的子数组。
numpy.random.random & numpy.ndarray.astype & numpy.arange
今天看到这样一句代码:
xb = np.random.random((nb, d)).astype(''float32'') #创建一个二维随机数矩阵(nb行d列)
xb[:, 0] += np.arange(nb) / 1000. #将矩阵第一列的每个数加上一个值
要理解这两句代码需要理解三个函数
1、生成随机数
numpy.random.random(size=None)
size为None时,返回float。
size不为None时,返回numpy.ndarray。例如numpy.random.random((1,2)),返回1行2列的numpy数组
2、对numpy数组中每一个元素进行类型转换
numpy.ndarray.astype(dtype)
返回numpy.ndarray。例如 numpy.array([1, 2, 2.5]).astype(int),返回numpy数组 [1, 2, 2]
3、获取等差数列
numpy.arange([start,]stop,[step,]dtype=None)
功能类似python中自带的range()和numpy中的numpy.linspace
返回numpy数组。例如numpy.arange(3),返回numpy数组[0, 1, 2]
关于Python numpy 模块-array_split() 实例源码和python中numpy模块的介绍现已完结,谢谢您的耐心阅读,如果想了解更多关于'np.array_split()' 返回的块是否按大小降序排列?、Jupyter 中的 Numpy 在打印时出错(Python 版本 3.8.8):TypeError: 'numpy.ndarray' object is not callable、np.split()和np.array_split()、numpy.random.random & numpy.ndarray.astype & numpy.arange的相关知识,请在本站寻找。
本文标签: