Commit 0d770a9f authored by Marco Frailis's avatar Marco Frailis

Adding performance test for the chunk example

parent 81d268df
Pipeline #425 failed with stages
in 1 second
......@@ -29,7 +29,7 @@
"\n",
"f[\"dataset_one\"] = data\n",
"\n",
"# We now retrieve the dataset from file\n",
"# We now retrieve the dataset from file (it is still in memory in fact)\n",
"dset = f[\"dataset_one\"]\n"
]
},
......@@ -51,7 +51,7 @@
"source": [
"### Dataset slicing\n",
"\n",
"Datasets provide analogous slicing operations as numpy arrays (with h5py). But these selections are translated by h5py to portion of the dataset and then HDF5 read the data form \"disk\". Slicing into a dataset object returns a NumpPy array.\n"
"Datasets provide analogous slicing operations as numpy arrays (with h5py). But these selections are translated by h5py to portion of the dataset and then HDF5 reads the data form \"disk\". Slicing into a dataset object returns a NumpPy array.\n"
]
},
{
......@@ -60,6 +60,9 @@
"metadata": {},
"outputs": [],
"source": [
"# The ellipses means \"as many ':' as needed\"\n",
"# here we use it to get a numpy array of the\n",
"# entire dataset\n",
"out = dset[...]\n",
"\n",
"print(out)\n",
......@@ -73,7 +76,13 @@
"outputs": [],
"source": [
"dset[1:5, 1] = 0.0\n",
"dset[...]"
"dset[...]\n",
"\n",
"#but we cannot use negative steps with a dataset\n",
"try:\n",
" dset[0,::-1]\n",
"except: \n",
" print('No no no!')"
]
},
{
......@@ -82,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"# random 2d distribution\n",
"# random 2d distribution in the range (-1,1)\n",
"data = np.random.rand(15, 10)*2 - 1\n",
"\n",
"dset = f.create_dataset('random', data=data)\n",
......@@ -91,7 +100,7 @@
"out = dset[0:10:2, :2]\n",
"print(out)\n",
"\n",
"# clipping to zero all negative values\n",
"# clipping to zero all negative values, using boolean indexing\n",
"dset[data<0] = 0"
]
},
......@@ -107,7 +116,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"dset = f.create_dataset('dataset_two', (1,1000), dtype=np.float32, \n",
......@@ -118,12 +129,9 @@
"num_rows = dset.shape[0]\n",
"dset.resize((num_rows+a.shape[0], 1000))\n",
"\n",
"for row in a:\n",
" dset[num_rows,:] = row\n",
" num_rows +=1\n",
"dset[num_rows:] = a\n",
"\n",
"print(dset[1000,:20])\n",
"\n"
"print(dset[1:5,:20])\n"
]
},
{
......@@ -200,7 +208,7 @@
"metadata": {},
"outputs": [],
"source": [
"f = h5py.File(\"testdata.hdf5\")\n",
"f = h5py.File(\"testdata.hdf5\",'a')\n",
"dt = np.dtype([('source_id', np.uint32), ('ra', np.float32), ('dec', np.float32), ('magnitude', np.float64)])\n",
"\n",
"grp = f.create_group('source_catalog/det11')\n",
......@@ -260,7 +268,57 @@
"metadata": {},
"outputs": [],
"source": [
"dset = f.create_dataset('chunked', (10,2048,2048), dtype=np.uint16, chunks=(1,64,64))"
"rdata = np.random.randint(0,2**16,(100,2048,2048), dtype=np.uint16)\n",
"\n",
"f = h5py.File('image_sequence.hdf5','w')\n",
"dset = f.create_dataset('nochunk', data=rdata)\n",
"f.flush()\n",
"f.close()\n",
"\n",
"f = h5py.File('image_sequence_chunked.hdf5','w')\n",
"dset = f.create_dataset('chunked', data=rdata, chunks=(100,64,64))\n",
"f.flush()\n",
"f.close()\n",
"\n",
"f = h5py.File('image_sequence.hdf5','r')\n",
"dset = f['nochunk']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"for i in range(32):\n",
" for j in range(32):\n",
" block = dset[:,64*i:64*(i+1), 64*j:64*(j+1)]\n",
" np.median(block, 0)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f.close()\n",
"f = h5py.File('image_sequence_chunked.hdf5','r')\n",
"dset = f['chunked']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"for i in range(32):\n",
" for j in range(32):\n",
" block = dset[:,64*i:64*(i+1), 64*j:64*(j+1)]\n",
" np.median(block, 0)"
]
},
{
......@@ -269,11 +327,20 @@
"metadata": {},
"outputs": [],
"source": [
"f.close()\n",
"f = h5py.File('image_sequence_chunked.hdf5','a')\n",
"dset = f.require_dataset('auto_chunked', (2048,2048), dtype=np.float32, compression=\"gzip\")\n",
"print(dset.compression)\n",
"print(dset.compression_opts)\n",
"print(dset.chunks)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......@@ -292,7 +359,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
"version": "3.7.4"
}
},
"nbformat": 4,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment