m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1092 lines
37 KiB

6 months ago
  1. """
  2. Record Arrays
  3. =============
  4. Record arrays expose the fields of structured arrays as properties.
  5. Most commonly, ndarrays contain elements of a single type, e.g. floats,
  6. integers, bools etc. However, it is possible for elements to be combinations
  7. of these using structured types, such as::
  8. >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)])
  9. >>> a
  10. array([(1, 2.), (1, 2.)], dtype=[('x', '<i8'), ('y', '<f8')])
  11. Here, each element consists of two fields: x (and int), and y (a float).
  12. This is known as a structured array. The different fields are analogous
  13. to columns in a spread-sheet. The different fields can be accessed as
  14. one would a dictionary::
  15. >>> a['x']
  16. array([1, 1])
  17. >>> a['y']
  18. array([2., 2.])
  19. Record arrays allow us to access fields as properties::
  20. >>> ar = np.rec.array(a)
  21. >>> ar.x
  22. array([1, 1])
  23. >>> ar.y
  24. array([2., 2.])
  25. """
  26. import os
  27. import warnings
  28. from collections import Counter
  29. from contextlib import nullcontext
  30. from . import numeric as sb
  31. from . import numerictypes as nt
  32. from numpy.compat import os_fspath
  33. from numpy.core.overrides import set_module
  34. from .arrayprint import get_printoptions
  35. # All of the functions allow formats to be a dtype
  36. __all__ = [
  37. 'record', 'recarray', 'format_parser',
  38. 'fromarrays', 'fromrecords', 'fromstring', 'fromfile', 'array',
  39. ]
  40. ndarray = sb.ndarray
  41. _byteorderconv = {'b':'>',
  42. 'l':'<',
  43. 'n':'=',
  44. 'B':'>',
  45. 'L':'<',
  46. 'N':'=',
  47. 'S':'s',
  48. 's':'s',
  49. '>':'>',
  50. '<':'<',
  51. '=':'=',
  52. '|':'|',
  53. 'I':'|',
  54. 'i':'|'}
  55. # formats regular expression
  56. # allows multidimension spec with a tuple syntax in front
  57. # of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
  58. # are equally allowed
  59. numfmt = nt.sctypeDict
  60. def find_duplicate(list):
  61. """Find duplication in a list, return a list of duplicated elements"""
  62. return [
  63. item
  64. for item, counts in Counter(list).items()
  65. if counts > 1
  66. ]
  67. @set_module('numpy')
  68. class format_parser:
  69. """
  70. Class to convert formats, names, titles description to a dtype.
  71. After constructing the format_parser object, the dtype attribute is
  72. the converted data-type:
  73. ``dtype = format_parser(formats, names, titles).dtype``
  74. Attributes
  75. ----------
  76. dtype : dtype
  77. The converted data-type.
  78. Parameters
  79. ----------
  80. formats : str or list of str
  81. The format description, either specified as a string with
  82. comma-separated format descriptions in the form ``'f8, i4, a5'``, or
  83. a list of format description strings in the form
  84. ``['f8', 'i4', 'a5']``.
  85. names : str or list/tuple of str
  86. The field names, either specified as a comma-separated string in the
  87. form ``'col1, col2, col3'``, or as a list or tuple of strings in the
  88. form ``['col1', 'col2', 'col3']``.
  89. An empty list can be used, in that case default field names
  90. ('f0', 'f1', ...) are used.
  91. titles : sequence
  92. Sequence of title strings. An empty list can be used to leave titles
  93. out.
  94. aligned : bool, optional
  95. If True, align the fields by padding as the C-compiler would.
  96. Default is False.
  97. byteorder : str, optional
  98. If specified, all the fields will be changed to the
  99. provided byte-order. Otherwise, the default byte-order is
  100. used. For all available string specifiers, see `dtype.newbyteorder`.
  101. See Also
  102. --------
  103. dtype, typename, sctype2char
  104. Examples
  105. --------
  106. >>> np.format_parser(['<f8', '<i4', '<a5'], ['col1', 'col2', 'col3'],
  107. ... ['T1', 'T2', 'T3']).dtype
  108. dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), (('T3', 'col3'), 'S5')])
  109. `names` and/or `titles` can be empty lists. If `titles` is an empty list,
  110. titles will simply not appear. If `names` is empty, default field names
  111. will be used.
  112. >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
  113. ... []).dtype
  114. dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
  115. >>> np.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
  116. dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
  117. """
  118. def __init__(self, formats, names, titles, aligned=False, byteorder=None):
  119. self._parseFormats(formats, aligned)
  120. self._setfieldnames(names, titles)
  121. self._createdtype(byteorder)
  122. def _parseFormats(self, formats, aligned=False):
  123. """ Parse the field formats """
  124. if formats is None:
  125. raise ValueError("Need formats argument")
  126. if isinstance(formats, list):
  127. dtype = sb.dtype(
  128. [('f{}'.format(i), format_) for i, format_ in enumerate(formats)],
  129. aligned,
  130. )
  131. else:
  132. dtype = sb.dtype(formats, aligned)
  133. fields = dtype.fields
  134. if fields is None:
  135. dtype = sb.dtype([('f1', dtype)], aligned)
  136. fields = dtype.fields
  137. keys = dtype.names
  138. self._f_formats = [fields[key][0] for key in keys]
  139. self._offsets = [fields[key][1] for key in keys]
  140. self._nfields = len(keys)
  141. def _setfieldnames(self, names, titles):
  142. """convert input field names into a list and assign to the _names
  143. attribute """
  144. if names:
  145. if type(names) in [list, tuple]:
  146. pass
  147. elif isinstance(names, str):
  148. names = names.split(',')
  149. else:
  150. raise NameError("illegal input names %s" % repr(names))
  151. self._names = [n.strip() for n in names[:self._nfields]]
  152. else:
  153. self._names = []
  154. # if the names are not specified, they will be assigned as
  155. # "f0, f1, f2,..."
  156. # if not enough names are specified, they will be assigned as "f[n],
  157. # f[n+1],..." etc. where n is the number of specified names..."
  158. self._names += ['f%d' % i for i in range(len(self._names),
  159. self._nfields)]
  160. # check for redundant names
  161. _dup = find_duplicate(self._names)
  162. if _dup:
  163. raise ValueError("Duplicate field names: %s" % _dup)
  164. if titles:
  165. self._titles = [n.strip() for n in titles[:self._nfields]]
  166. else:
  167. self._titles = []
  168. titles = []
  169. if self._nfields > len(titles):
  170. self._titles += [None] * (self._nfields - len(titles))
  171. def _createdtype(self, byteorder):
  172. dtype = sb.dtype({
  173. 'names': self._names,
  174. 'formats': self._f_formats,
  175. 'offsets': self._offsets,
  176. 'titles': self._titles,
  177. })
  178. if byteorder is not None:
  179. byteorder = _byteorderconv[byteorder[0]]
  180. dtype = dtype.newbyteorder(byteorder)
  181. self.dtype = dtype
  182. class record(nt.void):
  183. """A data-type scalar that allows field access as attribute lookup.
  184. """
  185. # manually set name and module so that this class's type shows up
  186. # as numpy.record when printed
  187. __name__ = 'record'
  188. __module__ = 'numpy'
  189. def __repr__(self):
  190. if get_printoptions()['legacy'] == '1.13':
  191. return self.__str__()
  192. return super().__repr__()
  193. def __str__(self):
  194. if get_printoptions()['legacy'] == '1.13':
  195. return str(self.item())
  196. return super().__str__()
  197. def __getattribute__(self, attr):
  198. if attr in ('setfield', 'getfield', 'dtype'):
  199. return nt.void.__getattribute__(self, attr)
  200. try:
  201. return nt.void.__getattribute__(self, attr)
  202. except AttributeError:
  203. pass
  204. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  205. res = fielddict.get(attr, None)
  206. if res:
  207. obj = self.getfield(*res[:2])
  208. # if it has fields return a record,
  209. # otherwise return the object
  210. try:
  211. dt = obj.dtype
  212. except AttributeError:
  213. #happens if field is Object type
  214. return obj
  215. if dt.names is not None:
  216. return obj.view((self.__class__, obj.dtype))
  217. return obj
  218. else:
  219. raise AttributeError("'record' object has no "
  220. "attribute '%s'" % attr)
  221. def __setattr__(self, attr, val):
  222. if attr in ('setfield', 'getfield', 'dtype'):
  223. raise AttributeError("Cannot set '%s' attribute" % attr)
  224. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  225. res = fielddict.get(attr, None)
  226. if res:
  227. return self.setfield(val, *res[:2])
  228. else:
  229. if getattr(self, attr, None):
  230. return nt.void.__setattr__(self, attr, val)
  231. else:
  232. raise AttributeError("'record' object has no "
  233. "attribute '%s'" % attr)
  234. def __getitem__(self, indx):
  235. obj = nt.void.__getitem__(self, indx)
  236. # copy behavior of record.__getattribute__,
  237. if isinstance(obj, nt.void) and obj.dtype.names is not None:
  238. return obj.view((self.__class__, obj.dtype))
  239. else:
  240. # return a single element
  241. return obj
  242. def pprint(self):
  243. """Pretty-print all fields."""
  244. # pretty-print all fields
  245. names = self.dtype.names
  246. maxlen = max(len(name) for name in names)
  247. fmt = '%% %ds: %%s' % maxlen
  248. rows = [fmt % (name, getattr(self, name)) for name in names]
  249. return "\n".join(rows)
  250. # The recarray is almost identical to a standard array (which supports
  251. # named fields already) The biggest difference is that it can use
  252. # attribute-lookup to find the fields and it is constructed using
  253. # a record.
  254. # If byteorder is given it forces a particular byteorder on all
  255. # the fields (and any subfields)
  256. class recarray(ndarray):
  257. """Construct an ndarray that allows field access using attributes.
  258. Arrays may have a data-types containing fields, analogous
  259. to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
  260. where each entry in the array is a pair of ``(int, float)``. Normally,
  261. these attributes are accessed using dictionary lookups such as ``arr['x']``
  262. and ``arr['y']``. Record arrays allow the fields to be accessed as members
  263. of the array, using ``arr.x`` and ``arr.y``.
  264. Parameters
  265. ----------
  266. shape : tuple
  267. Shape of output array.
  268. dtype : data-type, optional
  269. The desired data-type. By default, the data-type is determined
  270. from `formats`, `names`, `titles`, `aligned` and `byteorder`.
  271. formats : list of data-types, optional
  272. A list containing the data-types for the different columns, e.g.
  273. ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
  274. convention of using types directly, i.e. ``(int, float, int)``.
  275. Note that `formats` must be a list, not a tuple.
  276. Given that `formats` is somewhat limited, we recommend specifying
  277. `dtype` instead.
  278. names : tuple of str, optional
  279. The name of each column, e.g. ``('x', 'y', 'z')``.
  280. buf : buffer, optional
  281. By default, a new array is created of the given shape and data-type.
  282. If `buf` is specified and is an object exposing the buffer interface,
  283. the array will use the memory from the existing buffer. In this case,
  284. the `offset` and `strides` keywords are available.
  285. Other Parameters
  286. ----------------
  287. titles : tuple of str, optional
  288. Aliases for column names. For example, if `names` were
  289. ``('x', 'y', 'z')`` and `titles` is
  290. ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
  291. ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
  292. byteorder : {'<', '>', '='}, optional
  293. Byte-order for all fields.
  294. aligned : bool, optional
  295. Align the fields in memory as the C-compiler would.
  296. strides : tuple of ints, optional
  297. Buffer (`buf`) is interpreted according to these strides (strides
  298. define how many bytes each array element, row, column, etc.
  299. occupy in memory).
  300. offset : int, optional
  301. Start reading buffer (`buf`) from this offset onwards.
  302. order : {'C', 'F'}, optional
  303. Row-major (C-style) or column-major (Fortran-style) order.
  304. Returns
  305. -------
  306. rec : recarray
  307. Empty array of the given shape and type.
  308. See Also
  309. --------
  310. core.records.fromrecords : Construct a record array from data.
  311. record : fundamental data-type for `recarray`.
  312. format_parser : determine a data-type from formats, names, titles.
  313. Notes
  314. -----
  315. This constructor can be compared to ``empty``: it creates a new record
  316. array but does not fill it with data. To create a record array from data,
  317. use one of the following methods:
  318. 1. Create a standard ndarray and convert it to a record array,
  319. using ``arr.view(np.recarray)``
  320. 2. Use the `buf` keyword.
  321. 3. Use `np.rec.fromrecords`.
  322. Examples
  323. --------
  324. Create an array with two fields, ``x`` and ``y``:
  325. >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  326. >>> x
  327. array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  328. >>> x['x']
  329. array([1., 3.])
  330. View the array as a record array:
  331. >>> x = x.view(np.recarray)
  332. >>> x.x
  333. array([1., 3.])
  334. >>> x.y
  335. array([2, 4])
  336. Create a new, empty record array:
  337. >>> np.recarray((2,),
  338. ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
  339. rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
  340. (3471280, 1.2134086255804012e-316, 0)],
  341. dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
  342. """
  343. # manually set name and module so that this class's type shows
  344. # up as "numpy.recarray" when printed
  345. __name__ = 'recarray'
  346. __module__ = 'numpy'
  347. def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
  348. formats=None, names=None, titles=None,
  349. byteorder=None, aligned=False, order='C'):
  350. if dtype is not None:
  351. descr = sb.dtype(dtype)
  352. else:
  353. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  354. if buf is None:
  355. self = ndarray.__new__(subtype, shape, (record, descr), order=order)
  356. else:
  357. self = ndarray.__new__(subtype, shape, (record, descr),
  358. buffer=buf, offset=offset,
  359. strides=strides, order=order)
  360. return self
  361. def __array_finalize__(self, obj):
  362. if self.dtype.type is not record and self.dtype.names is not None:
  363. # if self.dtype is not np.record, invoke __setattr__ which will
  364. # convert it to a record if it is a void dtype.
  365. self.dtype = self.dtype
  366. def __getattribute__(self, attr):
  367. # See if ndarray has this attr, and return it if so. (note that this
  368. # means a field with the same name as an ndarray attr cannot be
  369. # accessed by attribute).
  370. try:
  371. return object.__getattribute__(self, attr)
  372. except AttributeError: # attr must be a fieldname
  373. pass
  374. # look for a field with this name
  375. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  376. try:
  377. res = fielddict[attr][:2]
  378. except (TypeError, KeyError) as e:
  379. raise AttributeError("recarray has no attribute %s" % attr) from e
  380. obj = self.getfield(*res)
  381. # At this point obj will always be a recarray, since (see
  382. # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
  383. # non-structured, convert it to an ndarray. Then if obj is structured
  384. # with void type convert it to the same dtype.type (eg to preserve
  385. # numpy.record type if present), since nested structured fields do not
  386. # inherit type. Don't do this for non-void structures though.
  387. if obj.dtype.names is not None:
  388. if issubclass(obj.dtype.type, nt.void):
  389. return obj.view(dtype=(self.dtype.type, obj.dtype))
  390. return obj
  391. else:
  392. return obj.view(ndarray)
  393. # Save the dictionary.
  394. # If the attr is a field name and not in the saved dictionary
  395. # Undo any "setting" of the attribute and do a setfield
  396. # Thus, you can't create attributes on-the-fly that are field names.
  397. def __setattr__(self, attr, val):
  398. # Automatically convert (void) structured types to records
  399. # (but not non-void structures, subarrays, or non-structured voids)
  400. if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
  401. val = sb.dtype((record, val))
  402. newattr = attr not in self.__dict__
  403. try:
  404. ret = object.__setattr__(self, attr, val)
  405. except Exception:
  406. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  407. if attr not in fielddict:
  408. raise
  409. else:
  410. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  411. if attr not in fielddict:
  412. return ret
  413. if newattr:
  414. # We just added this one or this setattr worked on an
  415. # internal attribute.
  416. try:
  417. object.__delattr__(self, attr)
  418. except Exception:
  419. return ret
  420. try:
  421. res = fielddict[attr][:2]
  422. except (TypeError, KeyError) as e:
  423. raise AttributeError(
  424. "record array has no attribute %s" % attr
  425. ) from e
  426. return self.setfield(val, *res)
  427. def __getitem__(self, indx):
  428. obj = super().__getitem__(indx)
  429. # copy behavior of getattr, except that here
  430. # we might also be returning a single element
  431. if isinstance(obj, ndarray):
  432. if obj.dtype.names is not None:
  433. obj = obj.view(type(self))
  434. if issubclass(obj.dtype.type, nt.void):
  435. return obj.view(dtype=(self.dtype.type, obj.dtype))
  436. return obj
  437. else:
  438. return obj.view(type=ndarray)
  439. else:
  440. # return a single element
  441. return obj
  442. def __repr__(self):
  443. repr_dtype = self.dtype
  444. if self.dtype.type is record or not issubclass(self.dtype.type, nt.void):
  445. # If this is a full record array (has numpy.record dtype),
  446. # or if it has a scalar (non-void) dtype with no records,
  447. # represent it using the rec.array function. Since rec.array
  448. # converts dtype to a numpy.record for us, convert back
  449. # to non-record before printing
  450. if repr_dtype.type is record:
  451. repr_dtype = sb.dtype((nt.void, repr_dtype))
  452. prefix = "rec.array("
  453. fmt = 'rec.array(%s,%sdtype=%s)'
  454. else:
  455. # otherwise represent it using np.array plus a view
  456. # This should only happen if the user is playing
  457. # strange games with dtypes.
  458. prefix = "array("
  459. fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
  460. # get data/shape string. logic taken from numeric.array_repr
  461. if self.size > 0 or self.shape == (0,):
  462. lst = sb.array2string(
  463. self, separator=', ', prefix=prefix, suffix=',')
  464. else:
  465. # show zero-length shape unless it is (0,)
  466. lst = "[], shape=%s" % (repr(self.shape),)
  467. lf = '\n'+' '*len(prefix)
  468. if get_printoptions()['legacy'] == '1.13':
  469. lf = ' ' + lf # trailing space
  470. return fmt % (lst, lf, repr_dtype)
  471. def field(self, attr, val=None):
  472. if isinstance(attr, int):
  473. names = ndarray.__getattribute__(self, 'dtype').names
  474. attr = names[attr]
  475. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  476. res = fielddict[attr][:2]
  477. if val is None:
  478. obj = self.getfield(*res)
  479. if obj.dtype.names is not None:
  480. return obj
  481. return obj.view(ndarray)
  482. else:
  483. return self.setfield(val, *res)
  484. def _deprecate_shape_0_as_None(shape):
  485. if shape == 0:
  486. warnings.warn(
  487. "Passing `shape=0` to have the shape be inferred is deprecated, "
  488. "and in future will be equivalent to `shape=(0,)`. To infer "
  489. "the shape and suppress this warning, pass `shape=None` instead.",
  490. FutureWarning, stacklevel=3)
  491. return None
  492. else:
  493. return shape
  494. def fromarrays(arrayList, dtype=None, shape=None, formats=None,
  495. names=None, titles=None, aligned=False, byteorder=None):
  496. """Create a record array from a (flat) list of arrays
  497. Parameters
  498. ----------
  499. arrayList : list or tuple
  500. List of array-like objects (such as lists, tuples,
  501. and ndarrays).
  502. dtype : data-type, optional
  503. valid dtype for all arrays
  504. shape : int or tuple of ints, optional
  505. Shape of the resulting array. If not provided, inferred from
  506. ``arrayList[0]``.
  507. formats, names, titles, aligned, byteorder :
  508. If `dtype` is ``None``, these arguments are passed to
  509. `numpy.format_parser` to construct a dtype. See that function for
  510. detailed documentation.
  511. Returns
  512. -------
  513. np.recarray
  514. Record array consisting of given arrayList columns.
  515. Examples
  516. --------
  517. >>> x1=np.array([1,2,3,4])
  518. >>> x2=np.array(['a','dd','xyz','12'])
  519. >>> x3=np.array([1.1,2,3,4])
  520. >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
  521. >>> print(r[1])
  522. (2, 'dd', 2.0) # may vary
  523. >>> x1[1]=34
  524. >>> r.a
  525. array([1, 2, 3, 4])
  526. >>> x1 = np.array([1, 2, 3, 4])
  527. >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
  528. >>> x3 = np.array([1.1, 2, 3,4])
  529. >>> r = np.core.records.fromarrays(
  530. ... [x1, x2, x3],
  531. ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
  532. >>> r
  533. rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
  534. (4, b'12', 4. )],
  535. dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
  536. """
  537. arrayList = [sb.asarray(x) for x in arrayList]
  538. # NumPy 1.19.0, 2020-01-01
  539. shape = _deprecate_shape_0_as_None(shape)
  540. if shape is None:
  541. shape = arrayList[0].shape
  542. elif isinstance(shape, int):
  543. shape = (shape,)
  544. if formats is None and dtype is None:
  545. # go through each object in the list to see if it is an ndarray
  546. # and determine the formats.
  547. formats = [obj.dtype for obj in arrayList]
  548. if dtype is not None:
  549. descr = sb.dtype(dtype)
  550. else:
  551. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  552. _names = descr.names
  553. # Determine shape from data-type.
  554. if len(descr) != len(arrayList):
  555. raise ValueError("mismatch between the number of fields "
  556. "and the number of arrays")
  557. d0 = descr[0].shape
  558. nn = len(d0)
  559. if nn > 0:
  560. shape = shape[:-nn]
  561. for k, obj in enumerate(arrayList):
  562. nn = descr[k].ndim
  563. testshape = obj.shape[:obj.ndim - nn]
  564. if testshape != shape:
  565. raise ValueError("array-shape mismatch in array %d" % k)
  566. _array = recarray(shape, descr)
  567. # populate the record array (makes a copy)
  568. for i in range(len(arrayList)):
  569. _array[_names[i]] = arrayList[i]
  570. return _array
  571. def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
  572. titles=None, aligned=False, byteorder=None):
  573. """Create a recarray from a list of records in text form.
  574. Parameters
  575. ----------
  576. recList : sequence
  577. data in the same field may be heterogeneous - they will be promoted
  578. to the highest data type.
  579. dtype : data-type, optional
  580. valid dtype for all arrays
  581. shape : int or tuple of ints, optional
  582. shape of each array.
  583. formats, names, titles, aligned, byteorder :
  584. If `dtype` is ``None``, these arguments are passed to
  585. `numpy.format_parser` to construct a dtype. See that function for
  586. detailed documentation.
  587. If both `formats` and `dtype` are None, then this will auto-detect
  588. formats. Use list of tuples rather than list of lists for faster
  589. processing.
  590. Returns
  591. -------
  592. np.recarray
  593. record array consisting of given recList rows.
  594. Examples
  595. --------
  596. >>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
  597. ... names='col1,col2,col3')
  598. >>> print(r[0])
  599. (456, 'dbe', 1.2)
  600. >>> r.col1
  601. array([456, 2])
  602. >>> r.col2
  603. array(['dbe', 'de'], dtype='<U3')
  604. >>> import pickle
  605. >>> pickle.loads(pickle.dumps(r))
  606. rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
  607. dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
  608. """
  609. if formats is None and dtype is None: # slower
  610. obj = sb.array(recList, dtype=object)
  611. arrlist = [sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])]
  612. return fromarrays(arrlist, formats=formats, shape=shape, names=names,
  613. titles=titles, aligned=aligned, byteorder=byteorder)
  614. if dtype is not None:
  615. descr = sb.dtype((record, dtype))
  616. else:
  617. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  618. try:
  619. retval = sb.array(recList, dtype=descr)
  620. except (TypeError, ValueError):
  621. # NumPy 1.19.0, 2020-01-01
  622. shape = _deprecate_shape_0_as_None(shape)
  623. if shape is None:
  624. shape = len(recList)
  625. if isinstance(shape, int):
  626. shape = (shape,)
  627. if len(shape) > 1:
  628. raise ValueError("Can only deal with 1-d array.")
  629. _array = recarray(shape, descr)
  630. for k in range(_array.size):
  631. _array[k] = tuple(recList[k])
  632. # list of lists instead of list of tuples ?
  633. # 2018-02-07, 1.14.1
  634. warnings.warn(
  635. "fromrecords expected a list of tuples, may have received a list "
  636. "of lists instead. In the future that will raise an error",
  637. FutureWarning, stacklevel=2)
  638. return _array
  639. else:
  640. if shape is not None and retval.shape != shape:
  641. retval.shape = shape
  642. res = retval.view(recarray)
  643. return res
  644. def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
  645. names=None, titles=None, aligned=False, byteorder=None):
  646. r"""Create a record array from binary data
  647. Note that despite the name of this function it does not accept `str`
  648. instances.
  649. Parameters
  650. ----------
  651. datastring : bytes-like
  652. Buffer of binary data
  653. dtype : data-type, optional
  654. Valid dtype for all arrays
  655. shape : int or tuple of ints, optional
  656. Shape of each array.
  657. offset : int, optional
  658. Position in the buffer to start reading from.
  659. formats, names, titles, aligned, byteorder :
  660. If `dtype` is ``None``, these arguments are passed to
  661. `numpy.format_parser` to construct a dtype. See that function for
  662. detailed documentation.
  663. Returns
  664. -------
  665. np.recarray
  666. Record array view into the data in datastring. This will be readonly
  667. if `datastring` is readonly.
  668. See Also
  669. --------
  670. numpy.frombuffer
  671. Examples
  672. --------
  673. >>> a = b'\x01\x02\x03abc'
  674. >>> np.core.records.fromstring(a, dtype='u1,u1,u1,S3')
  675. rec.array([(1, 2, 3, b'abc')],
  676. dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
  677. >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
  678. ... ('GradeLevel', np.int32)]
  679. >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
  680. ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
  681. >>> np.core.records.fromstring(grades_array.tobytes(), dtype=grades_dtype)
  682. rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
  683. dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
  684. >>> s = '\x01\x02\x03abc'
  685. >>> np.core.records.fromstring(s, dtype='u1,u1,u1,S3')
  686. Traceback (most recent call last)
  687. ...
  688. TypeError: a bytes-like object is required, not 'str'
  689. """
  690. if dtype is None and formats is None:
  691. raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
  692. if dtype is not None:
  693. descr = sb.dtype(dtype)
  694. else:
  695. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  696. itemsize = descr.itemsize
  697. # NumPy 1.19.0, 2020-01-01
  698. shape = _deprecate_shape_0_as_None(shape)
  699. if shape in (None, -1):
  700. shape = (len(datastring) - offset) // itemsize
  701. _array = recarray(shape, descr, buf=datastring, offset=offset)
  702. return _array
  703. def get_remaining_size(fd):
  704. pos = fd.tell()
  705. try:
  706. fd.seek(0, 2)
  707. return fd.tell() - pos
  708. finally:
  709. fd.seek(pos, 0)
  710. def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
  711. names=None, titles=None, aligned=False, byteorder=None):
  712. """Create an array from binary file data
  713. Parameters
  714. ----------
  715. fd : str or file type
  716. If file is a string or a path-like object then that file is opened,
  717. else it is assumed to be a file object. The file object must
  718. support random access (i.e. it must have tell and seek methods).
  719. dtype : data-type, optional
  720. valid dtype for all arrays
  721. shape : int or tuple of ints, optional
  722. shape of each array.
  723. offset : int, optional
  724. Position in the file to start reading from.
  725. formats, names, titles, aligned, byteorder :
  726. If `dtype` is ``None``, these arguments are passed to
  727. `numpy.format_parser` to construct a dtype. See that function for
  728. detailed documentation
  729. Returns
  730. -------
  731. np.recarray
  732. record array consisting of data enclosed in file.
  733. Examples
  734. --------
  735. >>> from tempfile import TemporaryFile
  736. >>> a = np.empty(10,dtype='f8,i4,a5')
  737. >>> a[5] = (0.5,10,'abcde')
  738. >>>
  739. >>> fd=TemporaryFile()
  740. >>> a = a.newbyteorder('<')
  741. >>> a.tofile(fd)
  742. >>>
  743. >>> _ = fd.seek(0)
  744. >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
  745. ... byteorder='<')
  746. >>> print(r[5])
  747. (0.5, 10, 'abcde')
  748. >>> r.shape
  749. (10,)
  750. """
  751. if dtype is None and formats is None:
  752. raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
  753. # NumPy 1.19.0, 2020-01-01
  754. shape = _deprecate_shape_0_as_None(shape)
  755. if shape is None:
  756. shape = (-1,)
  757. elif isinstance(shape, int):
  758. shape = (shape,)
  759. if hasattr(fd, 'readinto'):
  760. # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase interface.
  761. # Example of fd: gzip, BytesIO, BufferedReader
  762. # file already opened
  763. ctx = nullcontext(fd)
  764. else:
  765. # open file
  766. ctx = open(os_fspath(fd), 'rb')
  767. with ctx as fd:
  768. if offset > 0:
  769. fd.seek(offset, 1)
  770. size = get_remaining_size(fd)
  771. if dtype is not None:
  772. descr = sb.dtype(dtype)
  773. else:
  774. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  775. itemsize = descr.itemsize
  776. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  777. shapesize = shapeprod * itemsize
  778. if shapesize < 0:
  779. shape = list(shape)
  780. shape[shape.index(-1)] = size // -shapesize
  781. shape = tuple(shape)
  782. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  783. nbytes = shapeprod * itemsize
  784. if nbytes > size:
  785. raise ValueError(
  786. "Not enough bytes left in file for specified shape and type")
  787. # create the array
  788. _array = recarray(shape, descr)
  789. nbytesread = fd.readinto(_array.data)
  790. if nbytesread != nbytes:
  791. raise IOError("Didn't read as many bytes as expected")
  792. return _array
  793. def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
  794. names=None, titles=None, aligned=False, byteorder=None, copy=True):
  795. """
  796. Construct a record array from a wide-variety of objects.
  797. A general-purpose record array constructor that dispatches to the
  798. appropriate `recarray` creation function based on the inputs (see Notes).
  799. Parameters
  800. ----------
  801. obj : any
  802. Input object. See Notes for details on how various input types are
  803. treated.
  804. dtype : data-type, optional
  805. Valid dtype for array.
  806. shape : int or tuple of ints, optional
  807. Shape of each array.
  808. offset : int, optional
  809. Position in the file or buffer to start reading from.
  810. strides : tuple of ints, optional
  811. Buffer (`buf`) is interpreted according to these strides (strides
  812. define how many bytes each array element, row, column, etc.
  813. occupy in memory).
  814. formats, names, titles, aligned, byteorder :
  815. If `dtype` is ``None``, these arguments are passed to
  816. `numpy.format_parser` to construct a dtype. See that function for
  817. detailed documentation.
  818. copy : bool, optional
  819. Whether to copy the input object (True), or to use a reference instead.
  820. This option only applies when the input is an ndarray or recarray.
  821. Defaults to True.
  822. Returns
  823. -------
  824. np.recarray
  825. Record array created from the specified object.
  826. Notes
  827. -----
  828. If `obj` is ``None``, then call the `~numpy.recarray` constructor. If
  829. `obj` is a string, then call the `fromstring` constructor. If `obj` is a
  830. list or a tuple, then if the first object is an `~numpy.ndarray`, call
  831. `fromarrays`, otherwise call `fromrecords`. If `obj` is a
  832. `~numpy.recarray`, then make a copy of the data in the recarray
  833. (if ``copy=True``) and use the new formats, names, and titles. If `obj`
  834. is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
  835. return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
  836. Examples
  837. --------
  838. >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  839. array([[1, 2, 3],
  840. [4, 5, 6],
  841. [7, 8, 9]])
  842. >>> np.core.records.array(a)
  843. rec.array([[1, 2, 3],
  844. [4, 5, 6],
  845. [7, 8, 9]],
  846. dtype=int32)
  847. >>> b = [(1, 1), (2, 4), (3, 9)]
  848. >>> c = np.core.records.array(b, formats = ['i2', 'f2'], names = ('x', 'y'))
  849. >>> c
  850. rec.array([(1, 1.0), (2, 4.0), (3, 9.0)],
  851. dtype=[('x', '<i2'), ('y', '<f2')])
  852. >>> c.x
  853. rec.array([1, 2, 3], dtype=int16)
  854. >>> c.y
  855. rec.array([ 1.0, 4.0, 9.0], dtype=float16)
  856. >>> r = np.rec.array(['abc','def'], names=['col1','col2'])
  857. >>> print(r.col1)
  858. abc
  859. >>> r.col1
  860. array('abc', dtype='<U3')
  861. >>> r.col2
  862. array('def', dtype='<U3')
  863. """
  864. if ((isinstance(obj, (type(None), str)) or hasattr(obj, 'readinto')) and
  865. formats is None and dtype is None):
  866. raise ValueError("Must define formats (or dtype) if object is "
  867. "None, string, or an open file")
  868. kwds = {}
  869. if dtype is not None:
  870. dtype = sb.dtype(dtype)
  871. elif formats is not None:
  872. dtype = format_parser(formats, names, titles,
  873. aligned, byteorder).dtype
  874. else:
  875. kwds = {'formats': formats,
  876. 'names': names,
  877. 'titles': titles,
  878. 'aligned': aligned,
  879. 'byteorder': byteorder
  880. }
  881. if obj is None:
  882. if shape is None:
  883. raise ValueError("Must define a shape if obj is None")
  884. return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
  885. elif isinstance(obj, bytes):
  886. return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
  887. elif isinstance(obj, (list, tuple)):
  888. if isinstance(obj[0], (tuple, list)):
  889. return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
  890. else:
  891. return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
  892. elif isinstance(obj, recarray):
  893. if dtype is not None and (obj.dtype != dtype):
  894. new = obj.view(dtype)
  895. else:
  896. new = obj
  897. if copy:
  898. new = new.copy()
  899. return new
  900. elif hasattr(obj, 'readinto'):
  901. return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
  902. elif isinstance(obj, ndarray):
  903. if dtype is not None and (obj.dtype != dtype):
  904. new = obj.view(dtype)
  905. else:
  906. new = obj
  907. if copy:
  908. new = new.copy()
  909. return new.view(recarray)
  910. else:
  911. interface = getattr(obj, "__array_interface__", None)
  912. if interface is None or not isinstance(interface, dict):
  913. raise ValueError("Unknown input type")
  914. obj = sb.array(obj)
  915. if dtype is not None and (obj.dtype != dtype):
  916. obj = obj.view(dtype)
  917. return obj.view(recarray)