图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

767 lines
28 KiB

  1. """Test functions for 1D array set operations.
  2. """
  3. import numpy as np
  4. from numpy.testing import (assert_array_equal, assert_equal,
  5. assert_raises, assert_raises_regex)
  6. from numpy.lib.arraysetops import (
  7. ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d, isin
  8. )
  9. import pytest
  10. class TestSetOps:
  11. def test_intersect1d(self):
  12. # unique inputs
  13. a = np.array([5, 7, 1, 2])
  14. b = np.array([2, 4, 3, 1, 5])
  15. ec = np.array([1, 2, 5])
  16. c = intersect1d(a, b, assume_unique=True)
  17. assert_array_equal(c, ec)
  18. # non-unique inputs
  19. a = np.array([5, 5, 7, 1, 2])
  20. b = np.array([2, 1, 4, 3, 3, 1, 5])
  21. ed = np.array([1, 2, 5])
  22. c = intersect1d(a, b)
  23. assert_array_equal(c, ed)
  24. assert_array_equal([], intersect1d([], []))
  25. def test_intersect1d_array_like(self):
  26. # See gh-11772
  27. class Test:
  28. def __array__(self):
  29. return np.arange(3)
  30. a = Test()
  31. res = intersect1d(a, a)
  32. assert_array_equal(res, a)
  33. res = intersect1d([1, 2, 3], [1, 2, 3])
  34. assert_array_equal(res, [1, 2, 3])
  35. def test_intersect1d_indices(self):
  36. # unique inputs
  37. a = np.array([1, 2, 3, 4])
  38. b = np.array([2, 1, 4, 6])
  39. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  40. ee = np.array([1, 2, 4])
  41. assert_array_equal(c, ee)
  42. assert_array_equal(a[i1], ee)
  43. assert_array_equal(b[i2], ee)
  44. # non-unique inputs
  45. a = np.array([1, 2, 2, 3, 4, 3, 2])
  46. b = np.array([1, 8, 4, 2, 2, 3, 2, 3])
  47. c, i1, i2 = intersect1d(a, b, return_indices=True)
  48. ef = np.array([1, 2, 3, 4])
  49. assert_array_equal(c, ef)
  50. assert_array_equal(a[i1], ef)
  51. assert_array_equal(b[i2], ef)
  52. # non1d, unique inputs
  53. a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]])
  54. b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]])
  55. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  56. ui1 = np.unravel_index(i1, a.shape)
  57. ui2 = np.unravel_index(i2, b.shape)
  58. ea = np.array([2, 6, 7, 8])
  59. assert_array_equal(ea, a[ui1])
  60. assert_array_equal(ea, b[ui2])
  61. # non1d, not assumed to be uniqueinputs
  62. a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]])
  63. b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]])
  64. c, i1, i2 = intersect1d(a, b, return_indices=True)
  65. ui1 = np.unravel_index(i1, a.shape)
  66. ui2 = np.unravel_index(i2, b.shape)
  67. ea = np.array([2, 7, 8])
  68. assert_array_equal(ea, a[ui1])
  69. assert_array_equal(ea, b[ui2])
  70. def test_setxor1d(self):
  71. a = np.array([5, 7, 1, 2])
  72. b = np.array([2, 4, 3, 1, 5])
  73. ec = np.array([3, 4, 7])
  74. c = setxor1d(a, b)
  75. assert_array_equal(c, ec)
  76. a = np.array([1, 2, 3])
  77. b = np.array([6, 5, 4])
  78. ec = np.array([1, 2, 3, 4, 5, 6])
  79. c = setxor1d(a, b)
  80. assert_array_equal(c, ec)
  81. a = np.array([1, 8, 2, 3])
  82. b = np.array([6, 5, 4, 8])
  83. ec = np.array([1, 2, 3, 4, 5, 6])
  84. c = setxor1d(a, b)
  85. assert_array_equal(c, ec)
  86. assert_array_equal([], setxor1d([], []))
  87. def test_ediff1d(self):
  88. zero_elem = np.array([])
  89. one_elem = np.array([1])
  90. two_elem = np.array([1, 2])
  91. assert_array_equal([], ediff1d(zero_elem))
  92. assert_array_equal([0], ediff1d(zero_elem, to_begin=0))
  93. assert_array_equal([0], ediff1d(zero_elem, to_end=0))
  94. assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0))
  95. assert_array_equal([], ediff1d(one_elem))
  96. assert_array_equal([1], ediff1d(two_elem))
  97. assert_array_equal([7, 1, 9], ediff1d(two_elem, to_begin=7, to_end=9))
  98. assert_array_equal([5, 6, 1, 7, 8],
  99. ediff1d(two_elem, to_begin=[5, 6], to_end=[7, 8]))
  100. assert_array_equal([1, 9], ediff1d(two_elem, to_end=9))
  101. assert_array_equal([1, 7, 8], ediff1d(two_elem, to_end=[7, 8]))
  102. assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7))
  103. assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6]))
  104. @pytest.mark.parametrize("ary, prepend, append, expected", [
  105. # should fail because trying to cast
  106. # np.nan standard floating point value
  107. # into an integer array:
  108. (np.array([1, 2, 3], dtype=np.int64),
  109. None,
  110. np.nan,
  111. 'to_end'),
  112. # should fail because attempting
  113. # to downcast to int type:
  114. (np.array([1, 2, 3], dtype=np.int64),
  115. np.array([5, 7, 2], dtype=np.float32),
  116. None,
  117. 'to_begin'),
  118. # should fail because attempting to cast
  119. # two special floating point values
  120. # to integers (on both sides of ary),
  121. # `to_begin` is in the error message as the impl checks this first:
  122. (np.array([1., 3., 9.], dtype=np.int8),
  123. np.nan,
  124. np.nan,
  125. 'to_begin'),
  126. ])
  127. def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected):
  128. # verify resolution of gh-11490
  129. # specifically, raise an appropriate
  130. # Exception when attempting to append or
  131. # prepend with an incompatible type
  132. msg = 'dtype of `{}` must be compatible'.format(expected)
  133. with assert_raises_regex(TypeError, msg):
  134. ediff1d(ary=ary,
  135. to_end=append,
  136. to_begin=prepend)
  137. @pytest.mark.parametrize(
  138. "ary,prepend,append,expected",
  139. [
  140. (np.array([1, 2, 3], dtype=np.int16),
  141. 2**16, # will be cast to int16 under same kind rule.
  142. 2**16 + 4,
  143. np.array([0, 1, 1, 4], dtype=np.int16)),
  144. (np.array([1, 2, 3], dtype=np.float32),
  145. np.array([5], dtype=np.float64),
  146. None,
  147. np.array([5, 1, 1], dtype=np.float32)),
  148. (np.array([1, 2, 3], dtype=np.int32),
  149. 0,
  150. 0,
  151. np.array([0, 1, 1, 0], dtype=np.int32)),
  152. (np.array([1, 2, 3], dtype=np.int64),
  153. 3,
  154. -9,
  155. np.array([3, 1, 1, -9], dtype=np.int64)),
  156. ]
  157. )
  158. def test_ediff1d_scalar_handling(self,
  159. ary,
  160. prepend,
  161. append,
  162. expected):
  163. # maintain backwards-compatibility
  164. # of scalar prepend / append behavior
  165. # in ediff1d following fix for gh-11490
  166. actual = np.ediff1d(ary=ary,
  167. to_end=append,
  168. to_begin=prepend)
  169. assert_equal(actual, expected)
  170. assert actual.dtype == expected.dtype
  171. def test_isin(self):
  172. # the tests for in1d cover most of isin's behavior
  173. # if in1d is removed, would need to change those tests to test
  174. # isin instead.
  175. def _isin_slow(a, b):
  176. b = np.asarray(b).flatten().tolist()
  177. return a in b
  178. isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
  179. def assert_isin_equal(a, b):
  180. x = isin(a, b)
  181. y = isin_slow(a, b)
  182. assert_array_equal(x, y)
  183. # multidimensional arrays in both arguments
  184. a = np.arange(24).reshape([2, 3, 4])
  185. b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]])
  186. assert_isin_equal(a, b)
  187. # array-likes as both arguments
  188. c = [(9, 8), (7, 6)]
  189. d = (9, 7)
  190. assert_isin_equal(c, d)
  191. # zero-d array:
  192. f = np.array(3)
  193. assert_isin_equal(f, b)
  194. assert_isin_equal(a, f)
  195. assert_isin_equal(f, f)
  196. # scalar:
  197. assert_isin_equal(5, b)
  198. assert_isin_equal(a, 6)
  199. assert_isin_equal(5, 6)
  200. # empty array-like:
  201. x = []
  202. assert_isin_equal(x, b)
  203. assert_isin_equal(a, x)
  204. assert_isin_equal(x, x)
  205. def test_in1d(self):
  206. # we use two different sizes for the b array here to test the
  207. # two different paths in in1d().
  208. for mult in (1, 10):
  209. # One check without np.array to make sure lists are handled correct
  210. a = [5, 7, 1, 2]
  211. b = [2, 4, 3, 1, 5] * mult
  212. ec = np.array([True, False, True, True])
  213. c = in1d(a, b, assume_unique=True)
  214. assert_array_equal(c, ec)
  215. a[0] = 8
  216. ec = np.array([False, False, True, True])
  217. c = in1d(a, b, assume_unique=True)
  218. assert_array_equal(c, ec)
  219. a[0], a[3] = 4, 8
  220. ec = np.array([True, False, True, False])
  221. c = in1d(a, b, assume_unique=True)
  222. assert_array_equal(c, ec)
  223. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  224. b = [2, 3, 4] * mult
  225. ec = [False, True, False, True, True, True, True, True, True,
  226. False, True, False, False, False]
  227. c = in1d(a, b)
  228. assert_array_equal(c, ec)
  229. b = b + [5, 5, 4] * mult
  230. ec = [True, True, True, True, True, True, True, True, True, True,
  231. True, False, True, True]
  232. c = in1d(a, b)
  233. assert_array_equal(c, ec)
  234. a = np.array([5, 7, 1, 2])
  235. b = np.array([2, 4, 3, 1, 5] * mult)
  236. ec = np.array([True, False, True, True])
  237. c = in1d(a, b)
  238. assert_array_equal(c, ec)
  239. a = np.array([5, 7, 1, 1, 2])
  240. b = np.array([2, 4, 3, 3, 1, 5] * mult)
  241. ec = np.array([True, False, True, True, True])
  242. c = in1d(a, b)
  243. assert_array_equal(c, ec)
  244. a = np.array([5, 5])
  245. b = np.array([2, 2] * mult)
  246. ec = np.array([False, False])
  247. c = in1d(a, b)
  248. assert_array_equal(c, ec)
  249. a = np.array([5])
  250. b = np.array([2])
  251. ec = np.array([False])
  252. c = in1d(a, b)
  253. assert_array_equal(c, ec)
  254. assert_array_equal(in1d([], []), [])
  255. def test_in1d_char_array(self):
  256. a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
  257. b = np.array(['a', 'c'])
  258. ec = np.array([True, False, True, False, False, True, False, False])
  259. c = in1d(a, b)
  260. assert_array_equal(c, ec)
  261. def test_in1d_invert(self):
  262. "Test in1d's invert parameter"
  263. # We use two different sizes for the b array here to test the
  264. # two different paths in in1d().
  265. for mult in (1, 10):
  266. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  267. b = [2, 3, 4] * mult
  268. assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True))
  269. def test_in1d_ravel(self):
  270. # Test that in1d ravels its input arrays. This is not documented
  271. # behavior however. The test is to ensure consistentency.
  272. a = np.arange(6).reshape(2, 3)
  273. b = np.arange(3, 9).reshape(3, 2)
  274. long_b = np.arange(3, 63).reshape(30, 2)
  275. ec = np.array([False, False, False, True, True, True])
  276. assert_array_equal(in1d(a, b, assume_unique=True), ec)
  277. assert_array_equal(in1d(a, b, assume_unique=False), ec)
  278. assert_array_equal(in1d(a, long_b, assume_unique=True), ec)
  279. assert_array_equal(in1d(a, long_b, assume_unique=False), ec)
  280. def test_in1d_first_array_is_object(self):
  281. ar1 = [None]
  282. ar2 = np.array([1]*10)
  283. expected = np.array([False])
  284. result = np.in1d(ar1, ar2)
  285. assert_array_equal(result, expected)
  286. def test_in1d_second_array_is_object(self):
  287. ar1 = 1
  288. ar2 = np.array([None]*10)
  289. expected = np.array([False])
  290. result = np.in1d(ar1, ar2)
  291. assert_array_equal(result, expected)
  292. def test_in1d_both_arrays_are_object(self):
  293. ar1 = [None]
  294. ar2 = np.array([None]*10)
  295. expected = np.array([True])
  296. result = np.in1d(ar1, ar2)
  297. assert_array_equal(result, expected)
  298. def test_in1d_both_arrays_have_structured_dtype(self):
  299. # Test arrays of a structured data type containing an integer field
  300. # and a field of dtype `object` allowing for arbitrary Python objects
  301. dt = np.dtype([('field1', int), ('field2', object)])
  302. ar1 = np.array([(1, None)], dtype=dt)
  303. ar2 = np.array([(1, None)]*10, dtype=dt)
  304. expected = np.array([True])
  305. result = np.in1d(ar1, ar2)
  306. assert_array_equal(result, expected)
  307. def test_in1d_with_arrays_containing_tuples(self):
  308. ar1 = np.array([(1,), 2], dtype=object)
  309. ar2 = np.array([(1,), 2], dtype=object)
  310. expected = np.array([True, True])
  311. result = np.in1d(ar1, ar2)
  312. assert_array_equal(result, expected)
  313. result = np.in1d(ar1, ar2, invert=True)
  314. assert_array_equal(result, np.invert(expected))
  315. # An integer is added at the end of the array to make sure
  316. # that the array builder will create the array with tuples
  317. # and after it's created the integer is removed.
  318. # There's a bug in the array constructor that doesn't handle
  319. # tuples properly and adding the integer fixes that.
  320. ar1 = np.array([(1,), (2, 1), 1], dtype=object)
  321. ar1 = ar1[:-1]
  322. ar2 = np.array([(1,), (2, 1), 1], dtype=object)
  323. ar2 = ar2[:-1]
  324. expected = np.array([True, True])
  325. result = np.in1d(ar1, ar2)
  326. assert_array_equal(result, expected)
  327. result = np.in1d(ar1, ar2, invert=True)
  328. assert_array_equal(result, np.invert(expected))
  329. ar1 = np.array([(1,), (2, 3), 1], dtype=object)
  330. ar1 = ar1[:-1]
  331. ar2 = np.array([(1,), 2], dtype=object)
  332. expected = np.array([True, False])
  333. result = np.in1d(ar1, ar2)
  334. assert_array_equal(result, expected)
  335. result = np.in1d(ar1, ar2, invert=True)
  336. assert_array_equal(result, np.invert(expected))
  337. def test_union1d(self):
  338. a = np.array([5, 4, 7, 1, 2])
  339. b = np.array([2, 4, 3, 3, 2, 1, 5])
  340. ec = np.array([1, 2, 3, 4, 5, 7])
  341. c = union1d(a, b)
  342. assert_array_equal(c, ec)
  343. # Tests gh-10340, arguments to union1d should be
  344. # flattened if they are not already 1D
  345. x = np.array([[0, 1, 2], [3, 4, 5]])
  346. y = np.array([0, 1, 2, 3, 4])
  347. ez = np.array([0, 1, 2, 3, 4, 5])
  348. z = union1d(x, y)
  349. assert_array_equal(z, ez)
  350. assert_array_equal([], union1d([], []))
  351. def test_setdiff1d(self):
  352. a = np.array([6, 5, 4, 7, 1, 2, 7, 4])
  353. b = np.array([2, 4, 3, 3, 2, 1, 5])
  354. ec = np.array([6, 7])
  355. c = setdiff1d(a, b)
  356. assert_array_equal(c, ec)
  357. a = np.arange(21)
  358. b = np.arange(19)
  359. ec = np.array([19, 20])
  360. c = setdiff1d(a, b)
  361. assert_array_equal(c, ec)
  362. assert_array_equal([], setdiff1d([], []))
  363. a = np.array((), np.uint32)
  364. assert_equal(setdiff1d(a, []).dtype, np.uint32)
  365. def test_setdiff1d_unique(self):
  366. a = np.array([3, 2, 1])
  367. b = np.array([7, 5, 2])
  368. expected = np.array([3, 1])
  369. actual = setdiff1d(a, b, assume_unique=True)
  370. assert_equal(actual, expected)
  371. def test_setdiff1d_char_array(self):
  372. a = np.array(['a', 'b', 'c'])
  373. b = np.array(['a', 'b', 's'])
  374. assert_array_equal(setdiff1d(a, b), np.array(['c']))
  375. def test_manyways(self):
  376. a = np.array([5, 7, 1, 2, 8])
  377. b = np.array([9, 8, 2, 4, 3, 1, 5])
  378. c1 = setxor1d(a, b)
  379. aux1 = intersect1d(a, b)
  380. aux2 = union1d(a, b)
  381. c2 = setdiff1d(aux2, aux1)
  382. assert_array_equal(c1, c2)
  383. class TestUnique:
  384. def test_unique_1d(self):
  385. def check_all(a, b, i1, i2, c, dt):
  386. base_msg = 'check {0} failed for type {1}'
  387. msg = base_msg.format('values', dt)
  388. v = unique(a)
  389. assert_array_equal(v, b, msg)
  390. msg = base_msg.format('return_index', dt)
  391. v, j = unique(a, True, False, False)
  392. assert_array_equal(v, b, msg)
  393. assert_array_equal(j, i1, msg)
  394. msg = base_msg.format('return_inverse', dt)
  395. v, j = unique(a, False, True, False)
  396. assert_array_equal(v, b, msg)
  397. assert_array_equal(j, i2, msg)
  398. msg = base_msg.format('return_counts', dt)
  399. v, j = unique(a, False, False, True)
  400. assert_array_equal(v, b, msg)
  401. assert_array_equal(j, c, msg)
  402. msg = base_msg.format('return_index and return_inverse', dt)
  403. v, j1, j2 = unique(a, True, True, False)
  404. assert_array_equal(v, b, msg)
  405. assert_array_equal(j1, i1, msg)
  406. assert_array_equal(j2, i2, msg)
  407. msg = base_msg.format('return_index and return_counts', dt)
  408. v, j1, j2 = unique(a, True, False, True)
  409. assert_array_equal(v, b, msg)
  410. assert_array_equal(j1, i1, msg)
  411. assert_array_equal(j2, c, msg)
  412. msg = base_msg.format('return_inverse and return_counts', dt)
  413. v, j1, j2 = unique(a, False, True, True)
  414. assert_array_equal(v, b, msg)
  415. assert_array_equal(j1, i2, msg)
  416. assert_array_equal(j2, c, msg)
  417. msg = base_msg.format(('return_index, return_inverse '
  418. 'and return_counts'), dt)
  419. v, j1, j2, j3 = unique(a, True, True, True)
  420. assert_array_equal(v, b, msg)
  421. assert_array_equal(j1, i1, msg)
  422. assert_array_equal(j2, i2, msg)
  423. assert_array_equal(j3, c, msg)
  424. a = [5, 7, 1, 2, 1, 5, 7]*10
  425. b = [1, 2, 5, 7]
  426. i1 = [2, 3, 0, 1]
  427. i2 = [2, 3, 0, 1, 0, 2, 3]*10
  428. c = np.multiply([2, 1, 2, 2], 10)
  429. # test for numeric arrays
  430. types = []
  431. types.extend(np.typecodes['AllInteger'])
  432. types.extend(np.typecodes['AllFloat'])
  433. types.append('datetime64[D]')
  434. types.append('timedelta64[D]')
  435. for dt in types:
  436. aa = np.array(a, dt)
  437. bb = np.array(b, dt)
  438. check_all(aa, bb, i1, i2, c, dt)
  439. # test for object arrays
  440. dt = 'O'
  441. aa = np.empty(len(a), dt)
  442. aa[:] = a
  443. bb = np.empty(len(b), dt)
  444. bb[:] = b
  445. check_all(aa, bb, i1, i2, c, dt)
  446. # test for structured arrays
  447. dt = [('', 'i'), ('', 'i')]
  448. aa = np.array(list(zip(a, a)), dt)
  449. bb = np.array(list(zip(b, b)), dt)
  450. check_all(aa, bb, i1, i2, c, dt)
  451. # test for ticket #2799
  452. aa = [1. + 0.j, 1 - 1.j, 1]
  453. assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])
  454. # test for ticket #4785
  455. a = [(1, 2), (1, 2), (2, 3)]
  456. unq = [1, 2, 3]
  457. inv = [0, 1, 0, 1, 1, 2]
  458. a1 = unique(a)
  459. assert_array_equal(a1, unq)
  460. a2, a2_inv = unique(a, return_inverse=True)
  461. assert_array_equal(a2, unq)
  462. assert_array_equal(a2_inv, inv)
  463. # test for chararrays with return_inverse (gh-5099)
  464. a = np.chararray(5)
  465. a[...] = ''
  466. a2, a2_inv = np.unique(a, return_inverse=True)
  467. assert_array_equal(a2_inv, np.zeros(5))
  468. # test for ticket #9137
  469. a = []
  470. a1_idx = np.unique(a, return_index=True)[1]
  471. a2_inv = np.unique(a, return_inverse=True)[1]
  472. a3_idx, a3_inv = np.unique(a, return_index=True,
  473. return_inverse=True)[1:]
  474. assert_equal(a1_idx.dtype, np.intp)
  475. assert_equal(a2_inv.dtype, np.intp)
  476. assert_equal(a3_idx.dtype, np.intp)
  477. assert_equal(a3_inv.dtype, np.intp)
  478. # test for ticket 2111 - float
  479. a = [2.0, np.nan, 1.0, np.nan]
  480. ua = [1.0, 2.0, np.nan]
  481. ua_idx = [2, 0, 1]
  482. ua_inv = [1, 2, 0, 2]
  483. ua_cnt = [1, 1, 2]
  484. assert_equal(np.unique(a), ua)
  485. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  486. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  487. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  488. # test for ticket 2111 - complex
  489. a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)]
  490. ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)]
  491. ua_idx = [2, 0, 3]
  492. ua_inv = [1, 2, 0, 2, 2]
  493. ua_cnt = [1, 1, 3]
  494. assert_equal(np.unique(a), ua)
  495. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  496. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  497. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  498. # test for ticket 2111 - datetime64
  499. nat = np.datetime64('nat')
  500. a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
  501. ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
  502. ua_idx = [2, 0, 1]
  503. ua_inv = [1, 2, 0, 2]
  504. ua_cnt = [1, 1, 2]
  505. assert_equal(np.unique(a), ua)
  506. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  507. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  508. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  509. # test for ticket 2111 - timedelta
  510. nat = np.timedelta64('nat')
  511. a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
  512. ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
  513. ua_idx = [2, 0, 1]
  514. ua_inv = [1, 2, 0, 2]
  515. ua_cnt = [1, 1, 2]
  516. assert_equal(np.unique(a), ua)
  517. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  518. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  519. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  520. # test for gh-19300
  521. all_nans = [np.nan] * 4
  522. ua = [np.nan]
  523. ua_idx = [0]
  524. ua_inv = [0, 0, 0, 0]
  525. ua_cnt = [4]
  526. assert_equal(np.unique(all_nans), ua)
  527. assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx))
  528. assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
  529. assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))
  530. def test_unique_axis_errors(self):
  531. assert_raises(TypeError, self._run_axis_tests, object)
  532. assert_raises(TypeError, self._run_axis_tests,
  533. [('a', int), ('b', object)])
  534. assert_raises(np.AxisError, unique, np.arange(10), axis=2)
  535. assert_raises(np.AxisError, unique, np.arange(10), axis=-2)
  536. def test_unique_axis_list(self):
  537. msg = "Unique failed on list of lists"
  538. inp = [[0, 1, 0], [0, 1, 0]]
  539. inp_arr = np.asarray(inp)
  540. assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
  541. assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)
  542. def test_unique_axis(self):
  543. types = []
  544. types.extend(np.typecodes['AllInteger'])
  545. types.extend(np.typecodes['AllFloat'])
  546. types.append('datetime64[D]')
  547. types.append('timedelta64[D]')
  548. types.append([('a', int), ('b', int)])
  549. types.append([('a', int), ('b', float)])
  550. for dtype in types:
  551. self._run_axis_tests(dtype)
  552. msg = 'Non-bitwise-equal booleans test failed'
  553. data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
  554. result = np.array([[False, True], [True, True]], dtype=bool)
  555. assert_array_equal(unique(data, axis=0), result, msg)
  556. msg = 'Negative zero equality test failed'
  557. data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
  558. result = np.array([[-0.0, 0.0]])
  559. assert_array_equal(unique(data, axis=0), result, msg)
  560. @pytest.mark.parametrize("axis", [0, -1])
  561. def test_unique_1d_with_axis(self, axis):
  562. x = np.array([4, 3, 2, 3, 2, 1, 2, 2])
  563. uniq = unique(x, axis=axis)
  564. assert_array_equal(uniq, [1, 2, 3, 4])
  565. def test_unique_axis_zeros(self):
  566. # issue 15559
  567. single_zero = np.empty(shape=(2, 0), dtype=np.int8)
  568. uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True,
  569. return_inverse=True, return_counts=True)
  570. # there's 1 element of shape (0,) along axis 0
  571. assert_equal(uniq.dtype, single_zero.dtype)
  572. assert_array_equal(uniq, np.empty(shape=(1, 0)))
  573. assert_array_equal(idx, np.array([0]))
  574. assert_array_equal(inv, np.array([0, 0]))
  575. assert_array_equal(cnt, np.array([2]))
  576. # there's 0 elements of shape (2,) along axis 1
  577. uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True,
  578. return_inverse=True, return_counts=True)
  579. assert_equal(uniq.dtype, single_zero.dtype)
  580. assert_array_equal(uniq, np.empty(shape=(2, 0)))
  581. assert_array_equal(idx, np.array([]))
  582. assert_array_equal(inv, np.array([]))
  583. assert_array_equal(cnt, np.array([]))
  584. # test a "complicated" shape
  585. shape = (0, 2, 0, 3, 0, 4, 0)
  586. multiple_zeros = np.empty(shape=shape)
  587. for axis in range(len(shape)):
  588. expected_shape = list(shape)
  589. if shape[axis] == 0:
  590. expected_shape[axis] = 0
  591. else:
  592. expected_shape[axis] = 1
  593. assert_array_equal(unique(multiple_zeros, axis=axis),
  594. np.empty(shape=expected_shape))
  595. def test_unique_masked(self):
  596. # issue 8664
  597. x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0],
  598. dtype='uint8')
  599. y = np.ma.masked_equal(x, 0)
  600. v = np.unique(y)
  601. v2, i, c = np.unique(y, return_index=True, return_counts=True)
  602. msg = 'Unique returned different results when asked for index'
  603. assert_array_equal(v.data, v2.data, msg)
  604. assert_array_equal(v.mask, v2.mask, msg)
  605. def test_unique_sort_order_with_axis(self):
  606. # These tests fail if sorting along axis is done by treating subarrays
  607. # as unsigned byte strings. See gh-10495.
  608. fmt = "sort order incorrect for integer type '%s'"
  609. for dt in 'bhilq':
  610. a = np.array([[-1], [0]], dt)
  611. b = np.unique(a, axis=0)
  612. assert_array_equal(a, b, fmt % dt)
  613. def _run_axis_tests(self, dtype):
  614. data = np.array([[0, 1, 0, 0],
  615. [1, 0, 0, 0],
  616. [0, 1, 0, 0],
  617. [1, 0, 0, 0]]).astype(dtype)
  618. msg = 'Unique with 1d array and axis=0 failed'
  619. result = np.array([0, 1])
  620. assert_array_equal(unique(data), result.astype(dtype), msg)
  621. msg = 'Unique with 2d array and axis=0 failed'
  622. result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
  623. assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)
  624. msg = 'Unique with 2d array and axis=1 failed'
  625. result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
  626. assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)
  627. msg = 'Unique with 3d array and axis=2 failed'
  628. data3d = np.array([[[1, 1],
  629. [1, 0]],
  630. [[0, 1],
  631. [0, 0]]]).astype(dtype)
  632. result = np.take(data3d, [1, 0], axis=2)
  633. assert_array_equal(unique(data3d, axis=2), result, msg)
  634. uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
  635. return_inverse=True, return_counts=True)
  636. msg = "Unique's return_index=True failed with axis=0"
  637. assert_array_equal(data[idx], uniq, msg)
  638. msg = "Unique's return_inverse=True failed with axis=0"
  639. assert_array_equal(uniq[inv], data)
  640. msg = "Unique's return_counts=True failed with axis=0"
  641. assert_array_equal(cnt, np.array([2, 2]), msg)
  642. uniq, idx, inv, cnt = unique(data, axis=1, return_index=True,
  643. return_inverse=True, return_counts=True)
  644. msg = "Unique's return_index=True failed with axis=1"
  645. assert_array_equal(data[:, idx], uniq)
  646. msg = "Unique's return_inverse=True failed with axis=1"
  647. assert_array_equal(uniq[:, inv], data)
  648. msg = "Unique's return_counts=True failed with axis=1"
  649. assert_array_equal(cnt, np.array([2, 1, 1]), msg)