From 90fd503584b5e08bbb8d2b22f1b2bc04106c2293 Mon Sep 17 00:00:00 2001 From: Marius Ciepluch <11855163+norandom@users.noreply.github.com> Date: Fri, 17 May 2024 19:29:52 +0200 Subject: [PATCH] Pandas and Polars comparison improved, added Icicle graphs and profile data --- ...csearch-Pandas-vs-Polars-May-15-2024.ipynb | 3065 ++++++++++++++++- 1 file changed, 3024 insertions(+), 41 deletions(-) diff --git a/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb b/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb index 1b5de12..4d24e53 100644 --- a/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb +++ b/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb @@ -450,12 +450,81 @@ "tags": [] }, "source": [ - "## Memory footprint comparison" + "## Memory footprint and profile comparison" + ] + }, + { + "cell_type": "markdown", + "id": "0e433322-5120-4451-9aa4-cfd5795aaa24", + "metadata": {}, + "source": [ + "A JSON schema is provided in both cases to improve the comparison." ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 27, + "id": "93a2116d-1fdd-432b-a48c-8be77c67e0e7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Once deleted, variables cannot be recovered. Proceed (y/[n])? y\n" + ] + } + ], + "source": [ + "%reset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdf4020a-3b23-47e0-b7c4-3335bf3d5d8c", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install git+https://github.com/H4dr1en/jupyterflame.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "427b969f-0e68-44da-b74d-5cada875f74f", + "metadata": {}, + "outputs": [], + "source": [ + "# directly on the shell within the conda env: conda install -y perl" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "08db61e8-d70f-4434-bfcc-6225405b81f2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The jupyterflame extension is already loaded. To reload it, use:\n", + " %reload_ext jupyterflame\n" + ] + } + ], + "source": [ + "%load_ext jupyterflame" + ] + }, + { + "cell_type": "code", + "execution_count": 29, "id": "eefffe2a-f61c-47c8-90e3-d0de0ab932d6", "metadata": { "tags": [] @@ -488,7 +557,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 46, "id": "0b2be27e-a56c-411b-bbff-dc42e533ca80", "metadata": { "tags": [] @@ -498,7 +567,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'@timestamp': String, 'host.hostname': String, 'host.ip': String, 'log.level': String, 'winlog.event_id': Int64, 'winlog.task': String, 'message': String}\n" + "Polars Schema: {'@timestamp': String, 'host.hostname': String, 'host.ip': String, 'log.level': String, 'winlog.event_id': Int64, 'winlog.task': String, 'message': String}\n", + "Pandas Schema: {'@timestamp': 'str', 'host.hostname': 'str', 'host.ip': 'str', 'log.level': 'str', 'winlog.event_id': 'int64', 'winlog.task': 'str', 'message': 'str'}\n" ] } ], @@ -513,18 +583,57 @@ " # Add more mappings if needed\n", "}\n", "\n", + "pandas_dtype_mapping = {\n", + " \"object\": \"str\",\n", + " \"int64\": \"int64\",\n", + " \"float64\": \"float64\",\n", + " # Add more mappings if needed\n", + "}\n", + "\n", + "\n", "# Generate the schema for Polars from Pandas dtype\n", - "schema = {col: dtype_mapping[str(dtype)] for col, dtype in pd_df.dtypes.items()}\n", - "print(schema)\n" + "polars_schema = {col: dtype_mapping[str(dtype)] for col, dtype in pd_df.dtypes.items()}\n", + "print(\"Polars Schema:\", polars_schema)\n", + "\n", + "pandas_schema = {col: pandas_dtype_mapping[str(dtype)] for col, dtype in pd_df.dtypes.items()}\n", + "print(\"Pandas Schema:\", pandas_schema)" ] }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 53, "id": "5ccc9d58-8e27-43d0-bf69-7f2ff44c9874", "metadata": { "tags": [] }, + "outputs": [], + "source": [ + "def test_polars():\n", + " # Read the JSON file using the defined schema\n", + " lazy_df = pl.scan_ndjson(file_path)\n", + "\n", + " # Collect the LazyFrame to a DataFrame\n", + " pl_df = lazy_df.collect()\n", + "\n", + " # Convert columns to the correct data types according to the schema\n", + " pl_df = pl_df.with_columns([pl.col(col).cast(dtype) for col, dtype in polars_schema.items()])\n", + "\n", + " # Print the DataFrame and its memory usage\n", + " print(pl_df)\n", + "\n", + " num_rows_polars = pl_df.shape[0]\n", + "\n", + " print(f\"Polars DataFarme number of rows: {num_rows_polars}\")\n", + " print(f\"Polars DataFrame memory usage: {pl_df.estimated_size() / (1024 ** 2):.2f} MB\")" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "6e1ca70b-9aae-43af-b1c0-cc8d6f19a7ce", + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -575,36 +684,704 @@ "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n", "└──────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┘\n", "Pandas DataFarme number of rows: 8000\n", - "Polars DataFrame memory usage: 4.76 MB\n" + "Polars DataFrame memory usage: 4.76 MB\n", + " " ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "Icicle Graph\n", + " \n", + "Reset Zoom\n", + "Search\n", + " \n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/lazyframe/frame.py:1683:collect (27,454 samples, 84.56%)\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/lazyframe/frame.py:1683:collect\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/ipykernel/iostream.py:532:_schedule_flush (65 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method posix.stat> (278 samples, 0.86%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/_utils/parse_expr_input.py:50:_parse_positional_inputs (36 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/zmq/sugar/socket.py:543:send (40 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/_utils/parse_expr_input.py:20:parse_as_list_of_expressions (38 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/_utils/various.py:182:normalize_filepath (147 samples, 0.45%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method builtins.print> (529 samples, 1.63%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/lazyframe/frame.py:1683:collect (599 samples, 1.84%)\n", + "/..\n", + "\n", + "\n", + "/tmp/ipykernel_67611/2832609216.py:1:test_polars (32,221 samples, 99.24%)\n", + "/tmp/ipykernel_67611/2832609216.py:1:test_polars\n", + "\n", + "\n", + "/tmp/ipykernel_67611/2832609216.py:9:<listcomp> (123 samples, 0.38%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/io/ndjson.py:86:scan_ndjson (1,108 samples, 3.41%)\n", + "/ho..\n", + "\n", + "\n", + "<frozen genericpath>:16:exists (140 samples, 0.43%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'collect' of 'builtins.PyLazyFrame' objects> (593 samples, 1.83%)\n", + "~..\n", + "\n", + "\n", + "~:0:<method 'collect' of 'builtins.PyLazyFrame' objects> (27,400 samples, 84.39%)\n", + "~:0:<method 'collect' of 'builtins.PyLazyFrame' objects>\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/ipykernel/iostream.py:243:schedule (61 samples, 0.19%)\n", + "\n", + "\n", + "\n", + "all (32,467 samples, 100%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method posix.stat> (139 samples, 0.43%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method new_from_ndjson> (951 samples, 2.93%)\n", + "~:..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/ipykernel/iostream.py:610:write (114 samples, 0.35%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/_utils/deprecation.py:130:wrapper (1,112 samples, 3.43%)\n", + "/ho..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/functions/col.py:20:_create_col (39 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/dataframe/frame.py:8164:with_columns (706 samples, 2.17%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/functions/col.py:145:__new__ (46 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/_utils/various.py:182:normalize_filepath (294 samples, 0.91%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/_utils/deprecation.py:130:wrapper (3,334 samples, 10.27%)\n", + "/home/marius/an..\n", + "\n", + "\n", + "~:0:<built-in method new_from_ndjson> (1,902 samples, 5.86%)\n", + "~:0:<bu..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/dataframe/frame.py:980:__str__ (403 samples, 1.24%)\n", + "\n", + "\n", + "\n", + "<frozen genericpath>:16:exists (280 samples, 0.86%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/lazyframe/frame.py:4006:with_columns (77 samples, 0.24%)\n", + "\n", + "\n", + "\n", + "<string>:1:<module> (32,424 samples, 99.87%)\n", + "<string>:1:<module>\n", + "\n", + "\n", + "~:0:<built-in method builtins.exec> (32,467 samples, 100.00%)\n", + "~:0:<built-in method builtins.exec>\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/expr/expr.py:1917:cast (50 samples, 0.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/polars/io/ndjson.py:86:scan_ndjson (2,217 samples, 6.83%)\n", + "/home/mar..\n", + "\n", + "\n", + "~:0:<method 'as_str' of 'builtins.PyDataFrame' objects> (399 samples, 1.23%)\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Read the JSON file using the defined schema\n", - "lazy_df = pl.scan_ndjson(file_path)\n", - "\n", - "# Collect the LazyFrame to a DataFrame\n", - "pl_df = lazy_df.collect()\n", - "\n", - "# Convert columns to the correct data types according to the schema\n", - "pl_df = pl_df.with_columns([pl.col(col).cast(dtype) for col, dtype in schema.items()])\n", - "\n", - "# Print the DataFrame and its memory usage\n", - "print(pl_df)\n", - "\n", - "num_rows_polars = pl_df.shape[0]\n", - "\n", - "print(f\"Pandas DataFarme number of rows: {num_rows_polars}\")\n", - "print(f\"Polars DataFrame memory usage: {pl_df.estimated_size() / (1024 ** 2):.2f} MB\")" + "%%flame -q --inverted\n", + "test_polars()" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 49, + "id": "87f043b9-6cfa-4c3b-b550-25818e29bd45", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (8_000, 7)\n", + "┌──────────────┬─────────────┬─────────────┬─────────────┬─────────────┬─────────────┬─────────────┐\n", + "│ @timestamp ┆ host.hostna ┆ host.ip ┆ log.level ┆ winlog.even ┆ winlog.task ┆ message │\n", + "│ --- ┆ me ┆ --- ┆ --- ┆ t_id ┆ --- ┆ --- │\n", + "│ str ┆ --- ┆ str ┆ str ┆ --- ┆ str ┆ str │\n", + "│ ┆ str ┆ ┆ ┆ i64 ┆ ┆ │\n", + "╞══════════════╪═════════════╪═════════════╪═════════════╪═════════════╪═════════════╪═════════════╡\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n", + "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n", + "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n", + "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n", + "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n", + "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n", + "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n", + "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n", + "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n", + "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n", + "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n", + "│ 6:10:07.128Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n", + "│ 6:10:07.136Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n", + "│ 6:10:07.136Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n", + "│ 6:10:07.149Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n", + "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n", + "│ 6:10:07.149Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n", + "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n", + "└──────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┘\n", + "Pandas DataFarme number of rows: 8000\n", + "Polars DataFrame memory usage: 4.76 MB\n", + " " + ] + }, + { + "data": { + "text/plain": [ + " 256 function calls (253 primitive calls) in 0.020 seconds\n", + "\n", + " Ordered by: internal time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 2 0.014 0.007 0.014 0.007 {method 'collect' of 'builtins.PyLazyFrame' objects}\n", + " 1 0.003 0.003 0.003 0.003 {built-in method new_from_ndjson}\n", + " 2 0.001 0.001 0.001 0.001 {built-in method posix.stat}\n", + " 1 0.000 0.000 0.000 0.000 {method 'as_str' of 'builtins.PyDataFrame' objects}\n", + " 1 0.000 0.000 0.020 0.020 :1()\n", + " 1 0.000 0.000 0.020 0.020 2832609216.py:1(test_polars)\n", + " 1 0.000 0.000 0.000 0.000 socket.py:543(send)\n", + " 2 0.000 0.000 0.000 0.000 wrap.py:12(wrap_df)\n", + " 6 0.000 0.000 0.000 0.000 iostream.py:610(write)\n", + " 1 0.000 0.000 0.020 0.020 {built-in method builtins.exec}\n", + " 2 0.000 0.000 0.014 0.007 frame.py:1683(collect)\n", + " 1 0.000 0.000 0.004 0.004 ndjson.py:86(scan_ndjson)\n", + " 1 0.000 0.000 0.000 0.000 2832609216.py:9()\n", + " 2/1 0.000 0.000 0.004 0.004 deprecation.py:130(wrapper)\n", + " 2 0.000 0.000 0.000 0.000 wrap.py:16(wrap_ldf)\n", + " 7 0.000 0.000 0.000 0.000 expr.py:1917(cast)\n", + " 1 0.000 0.000 0.001 0.001 various.py:182(normalize_filepath)\n", + " 41/39 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n", + " 7 0.000 0.000 0.000 0.000 col.py:20(_create_col)\n", + " 3 0.000 0.000 0.001 0.000 {built-in method builtins.print}\n", + " 1 0.000 0.000 0.000 0.000 frame.py:4006(with_columns)\n", + " 2 0.000 0.000 0.000 0.000 {method 'optimization_toggle' of 'builtins.PyLazyFrame' objects}\n", + " 1 0.000 0.000 0.000 0.000 frame.py:7998(lazy)\n", + " 3 0.000 0.000 0.000 0.000 frame.py:316(_from_pyldf)\n", + " 1 0.000 0.000 0.001 0.001 frame.py:8164(with_columns)\n", + " 7 0.000 0.000 0.000 0.000 {method 'cast' of 'builtins.PyExpr' objects}\n", + " 1 0.000 0.000 0.000 0.000 iostream.py:243(schedule)\n", + " 7 0.000 0.000 0.000 0.000 convert.py:388(py_type_to_dtype)\n", + " 19 0.000 0.000 0.000 0.000 {built-in method __new__ of type object at 0x860f60}\n", + " 6 0.000 0.000 0.000 0.000 iostream.py:505(_is_master_process)\n", + " 1 0.000 0.000 0.000 0.000 {method 'with_columns' of 'builtins.PyLazyFrame' objects}\n", + " 1 0.000 0.000 0.000 0.000 :674(__getitem__)\n", + " 7 0.000 0.000 0.000 0.000 {col}\n", + " 7 0.000 0.000 0.000 0.000 col.py:145(__new__)\n", + " 1 0.000 0.000 0.000 0.000 :39(isdir)\n", + " 7 0.000 0.000 0.000 0.000 wrap.py:24(wrap_expr)\n", + " 1 0.000 0.000 0.000 0.000 :229(expanduser)\n", + " 6 0.000 0.000 0.000 0.000 {built-in method posix.getpid}\n", + " 14 0.000 0.000 0.000 0.000 expr.py:131(_from_pyexpr)\n", + " 1 0.000 0.000 0.000 0.000 {method 'lazy' of 'builtins.PyDataFrame' objects}\n", + " 1 0.000 0.000 0.000 0.000 typing.py:1579(__subclasscheck__)\n", + " 2 0.000 0.000 0.000 0.000 frame.py:439(_from_pydf)\n", + " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:56()\n", + " 7 0.000 0.000 0.000 0.000 convert.py:146(is_polars_dtype)\n", + " 1 0.000 0.000 0.000 0.000 threading.py:1185(is_alive)\n", + " 1 0.000 0.000 0.001 0.001 :16(exists)\n", + " 1 0.000 0.000 0.000 0.000 {method 'estimated_size' of 'builtins.PyDataFrame' objects}\n", + " 6 0.000 0.000 0.000 0.000 iostream.py:532(_schedule_flush)\n", + " 1 0.000 0.000 0.000 0.000 frame.py:980(__str__)\n", + " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:59(_parse_inputs_as_iterable)\n", + " 1 0.000 0.000 0.000 0.000 :771(get)\n", + " 1 0.000 0.000 0.000 0.000 threading.py:1118(_wait_for_tstate_lock)\n", + " 7 0.000 0.000 0.000 0.000 parse_expr_input.py:85(parse_as_expression)\n", + " 1 0.000 0.000 0.000 0.000 frame.py:3600(estimated_size)\n", + " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:50(_parse_positional_inputs)\n", + " 1 0.000 0.000 0.000 0.000 :756(encode)\n", + " 1 0.000 0.000 0.000 0.000 frame.py:591(shape)\n", + " 1 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n", + " 1 0.000 0.000 0.000 0.000 iostream.py:127(_event_pipe)\n", + " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:72(_is_iterable)\n", + " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:20(parse_as_list_of_expressions)\n", + " 1 0.000 0.000 0.000 0.000 typing.py:1304(__instancecheck__)\n", + " 1 0.000 0.000 0.000 0.000 :121(__subclasscheck__)\n", + " 1 0.000 0.000 0.000 0.000 {built-in method _abc._abc_subclasscheck}\n", + " 1 0.000 0.000 0.000 0.000 {method 'shape' of 'builtins.PyDataFrame' objects}\n", + " 1 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n", + " 7 0.000 0.000 0.000 0.000 {built-in method builtins.len}\n", + " 6 0.000 0.000 0.000 0.000 {method 'write' of '_io.StringIO' objects}\n", + " 6 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.RLock' objects}\n", + " 1 0.000 0.000 0.000 0.000 {built-in method _stat.S_ISDIR}\n", + " 1 0.000 0.000 0.000 0.000 various.py:210(scale_bytes)\n", + " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", + " 2 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}\n", + " 2 0.000 0.000 0.000 0.000 deprecation.py:143(_rename_keyword_argument)\n", + " 1 0.000 0.000 0.000 0.000 {method 'encode' of 'str' objects}\n", + " 1 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}\n", + " 1 0.000 0.000 0.000 0.000 {method 'startswith' of 'str' objects}\n", + " 1 0.000 0.000 0.000 0.000 _utils.py:58(parse_row_index_args)\n", + " 1 0.000 0.000 0.000 0.000 threading.py:568(is_set)\n", + " 1 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n", + " 1 0.000 0.000 0.000 0.000 {built-in method posix.fspath}" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%prun\n", + "test_polars()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, "id": "547f7253-cd62-44c6-8d7a-840dab2dbbbd", "metadata": { "tags": [] }, + "outputs": [], + "source": [ + "def test_pandas():\n", + " # Load the JSON file into a Pandas DataFrame\n", + " pd_df = pd.read_json(file_path, lines=True, dtype=pandas_schema)\n", + " pd_memory_usage = pd_df.memory_usage(deep=True).sum()\n", + "\n", + " # Get the number of rows in the Pandas DataFrame\n", + " num_rows_pandas = pd_df.shape[0]\n", + "\n", + " print(pd_df)\n", + "\n", + " print(f\"Pandas DataFarme number of rows: {num_rows_pandas}\")\n", + " print(f\"Pandas DataFrame memory usage: {pd_memory_usage / (1024 ** 2):.2f} MB\") \n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "50230892-0a0e-4144-a17e-27d2714de1e8", + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -651,34 +1428,2240 @@ "\n", "[8000 rows x 7 columns]\n", "Pandas DataFarme number of rows: 8000\n", - "Pandas DataFrame memory usage: 7.56 MB\n" + "Pandas DataFrame memory usage: 7.56 MB\n", + " " ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\t\n", + "\t\t\n", + "\t\n", + "\n", + "\n", + "\n", + "\n", + "Icicle Graph\n", + " \n", + "Reset Zoom\n", + "Search\n", + " \n", + "\n", + "\n", + "/tmp/ipykernel_67611/1231667944.py:1:test_pandas (102,753 samples, 97.34%)\n", + "/tmp/ipykernel_67611/1231667944.py:1:test_pandas\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2068:create_block_manager_from_column_arrays (2,137 samples, 2.02%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py:76:_f (147 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:7521:ensure_index (207 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:6337:dtypes (182 samples, 0.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:6368:astype (306 samples, 0.29%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:500:read_json (82,450 samples, 78.10%)\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:500:read_json\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2194:_stack_arrays (271 samples, 0.26%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2068:create_block_manager_from_column_arrays (285 samples, 0.27%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:371:__init__ (109 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1204:_try_convert_data (379 samples, 0.36%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/base.py:510:find (182 samples, 0.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1282:_try_convert_to_date (299 samples, 0.28%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method pandas._libs.json.ujson_loads> (24,688 samples, 23.39%)\n", + "~:0:<built-in method pandas._libs.js..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:192:astype_array_safe (5,918 samples, 5.61%)\n", + "/home/m..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:506:nested_data_to_arrays (9,067 samples, 8.59%)\n", + "/home/marius..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:665:__init__ (13,135 samples, 12.44%)\n", + "/home/marius/anaco..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1681:_getitem_axis (125 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/missing.py:184:_isna (117 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:695:new_axes (124 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2224:_merge_blocks (881 samples, 0.83%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1681:_getitem_axis (127 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "~:0:<pandas._libs.lib.dicts_to_array> (2,870 samples, 2.72%)\n", + "~:..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1022:_get_object_parser (56,621 samples, 53.64%)\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1022:_get_o..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/_config/config.py:145:_get_option (122 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method builtins.max> (112 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method pandas._libs.lib.ensure_string_array> (4,514 samples, 4.28%)\n", + "~:0:<..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py:604:nansum (117 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "~:0:<pandas._libs.lib.dicts_to_array> (737 samples, 0.70%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:216:_maybe_cache (117 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'close' of '_io.TextIOWrapper' objects> (117 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:1744:_consolidate_inplace (502 samples, 0.48%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:789:truncate (1,298 samples, 1.23%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:922:<listcomp> (239 samples, 0.23%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:526:_to_datetime_with_unit (247 samples, 0.23%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:1392:items (667 samples, 0.63%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/common.py:1587:pandas_dtype (365 samples, 0.35%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/missing.py:380:notna (150 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/concat.py:94:concatenate_managers (235 samples, 0.22%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:1006:convert_object_array (164 samples, 0.16%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:618:get_result (779 samples, 0.74%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:816:__init__ (12,676 samples, 12.01%)\n", + "/home/marius/anaco..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:596:_homogenize (145 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1172:parse (56,590 samples, 53.61%)\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1172:parse\n", + "\n", + "\n", + "all (105,565 samples, 100%)\n", + "\n", + "\n", + "\n", + "<frozen codecs>:319:decode (2,921 samples, 2.77%)\n", + "<f..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/string.py:40:_get_string_representation (5,210 samples, 4.94%)\n", + "/home/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2137:_form_blocks (1,161 samples, 1.10%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1651:_getitem_tuple (305 samples, 0.29%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/common.py:121:close (131 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1139:__getitem__ (1,092 samples, 1.03%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:6368:astype (6,712 samples, 6.36%)\n", + "/home/ma..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:56:_astype_nansafe (250 samples, 0.24%)\n", + "\n", + "\n", + "\n", + "~:0:<pandas._libs.lib.fast_unique_multiple_list_gen> (4,722 samples, 4.47%)\n", + "~:0:<..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:843:_get_strcols_without_index (4,287 samples, 4.06%)\n", + "/hom..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2137:_form_blocks (155 samples, 0.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:965:<listcomp> (3,827 samples, 3.63%)\n", + "/hom..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:4402:_get_item_cache (483 samples, 0.46%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2207:_consolidate (477 samples, 0.45%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:611:get_strcols (4,538 samples, 4.30%)\n", + "/home..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:1038:astype (316 samples, 0.30%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:695:new_axes (334 samples, 0.32%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:564:__init__ (1,385 samples, 1.31%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:3776:_ixs (390 samples, 0.37%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:423:dict_to_mgr (706 samples, 0.67%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:405:astype (6,217 samples, 5.89%)\n", + "/home/m..\n", + "\n", + "\n", + "~:0:<built-in method builtins.exec> (105,565 samples, 100.00%)\n", + "~:0:<built-in method builtins.exec>\n", + "\n", + "\n", + "~:0:<pandas._libs.lib.fast_unique_multiple_list_gen> (1,213 samples, 1.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:991:iget (106 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:891:_list_of_dict_to_arrays (2,251 samples, 2.13%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:793:to_arrays (9,043 samples, 8.57%)\n", + "/home/marius..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/_config/config.py:271:__call__ (134 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:487:get_dataframe_repr_params (113 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1651:_getitem_tuple (870 samples, 0.82%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:216:_maybe_cache (485 samples, 0.46%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/numpy/core/shape_base.py:223:vstack (113 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:915:<genexpr> (796 samples, 0.75%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'split' of 'str' objects> (2,960 samples, 2.80%)\n", + "~:..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1185:_convert_axes (2,110 samples, 2.00%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:956:_get_formatted_index (245 samples, 0.23%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:890:format_col (3,355 samples, 3.18%)\n", + "/ho..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1396:_process_converter (576 samples, 0.55%)\n", + "\n", + "\n", + "\n", + "<__array_function__ internals>:177:concatenate (109 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method _codecs.utf_8_decode> (2,892 samples, 2.74%)\n", + "~:..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2207:_consolidate (918 samples, 0.87%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:157:concat (893 samples, 0.85%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1282:_try_convert_to_date (1,236 samples, 1.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:3159:_append (365 samples, 0.35%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:4402:_get_item_cache (583 samples, 0.55%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:721:to_datetime (823 samples, 0.78%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:158:astype_array (1,337 samples, 1.27%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2207:_consolidate (123 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:5422:append (261 samples, 0.25%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2194:_stack_arrays (1,054 samples, 1.00%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1204:_try_convert_data (8,299 samples, 7.86%)\n", + "/home/mariu..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:6195:sum (181 samples, 0.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:931:_validate_tuple_indexer (188 samples, 0.18%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/string.py:28:to_string (5,230 samples, 4.95%)\n", + "/home/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:12070:sum (178 samples, 0.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:928:_finalize_columns_and_data (210 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:405:astype (283 samples, 0.27%)\n", + "\n", + "\n", + "\n", + "~:0:<pandas._libs.tslib.array_with_unit_to_datetime> (223 samples, 0.21%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:1418:_format_with_header (152 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:6368:astype (1,626 samples, 1.54%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:3561:<listcomp> (11,854 samples, 11.23%)\n", + "/home/marius/ana..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:1964:array_values (165 samples, 0.16%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:5519:equals (106 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/printing.py:28:adjoin (195 samples, 0.18%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1396:_process_converter (12,381 samples, 11.73%)\n", + "/home/marius/anac..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:965:<genexpr> (2,854 samples, 2.70%)\n", + "/h..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1355:_format (590 samples, 0.56%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:1028:convert (157 samples, 0.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:618:get_result (290 samples, 0.27%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:698:<listcomp> (326 samples, 0.31%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method numpy.core._multiarray_umath.implement_array_function> (146 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:96:arrays_to_mgr (1,193 samples, 1.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:956:_get_formatted_index (210 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:960:_combine_lines (9,052 samples, 8.57%)\n", + "/home/marius..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:371:__init__ (763 samples, 0.72%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2137:_form_blocks (299 samples, 0.28%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1217:format_array (2,045 samples, 1.94%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:1418:_format_with_header (149 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1536:_validate_key (139 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:192:astype_array_safe (270 samples, 0.26%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py:389:new_func (126 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1429:<lambda> (379 samples, 0.36%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'keys' of 'dict' objects> (378 samples, 0.36%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:5223:memory_usage (11,165 samples, 10.58%)\n", + "/home/marius/an..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/blocks.py:2241:array_values (135 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2194:_stack_arrays (141 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'read' of '_io.TextIOWrapper' objects> (11,407 samples, 10.81%)\n", + "~:0:<method 'rea..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:96:arrays_to_mgr (306 samples, 0.29%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:1418:_format_with_header (129 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2224:_merge_blocks (118 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:434:adjoin (205 samples, 0.19%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'join' of 'str' objects> (2,642 samples, 2.50%)\n", + "~:..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2207:_consolidate (236 samples, 0.22%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1681:_getitem_axis (357 samples, 0.34%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:192:astype_array_safe (1,434 samples, 1.36%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:891:_list_of_dict_to_arrays (8,762 samples, 8.30%)\n", + "/home/mariu..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:922:<listcomp> (930 samples, 0.88%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:157:concat (347 samples, 0.33%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method numpy.core._multiarray_umath.implement_array_function> (108 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:825:_truncate_vertically (1,285 samples, 1.22%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/ipykernel/iostream.py:610:write (126 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1077:to_string (5,270 samples, 4.99%)\n", + "/home/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/base.py:1135:_memory_usage (11,150 samples, 10.56%)\n", + "/home/marius/an..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1204:_try_convert_data (2,029 samples, 1.92%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1429:<lambda> (8,308 samples, 7.87%)\n", + "/home/mariu..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:3776:_ixs (196 samples, 0.19%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:96:arrays_to_mgr (2,506 samples, 2.37%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:308:apply (282 samples, 0.27%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1360:_parse (40,685 samples, 38.54%)\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/base.py:510:find (249 samples, 0.24%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/common.py:1587:pandas_dtype (197 samples, 0.19%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1105:__exit__ (142 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:915:_get_formatted_column_labels (490 samples, 0.46%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:405:astype (1,506 samples, 1.43%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py:455:newfunc (121 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/printing.py:162:pprint_thing (354 samples, 0.34%)\n", + "\n", + "\n", + "\n", + "<__array_function__ internals>:177:concatenate (209 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:912:_get_data_from_filepath (1,088 samples, 1.03%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:665:__init__ (154 samples, 0.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2233:<listcomp> (138 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:3471:memory_usage (13,034 samples, 12.35%)\n", + "/home/marius/anaco..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:665:__init__ (3,382 samples, 3.20%)\n", + "/ho..\n", + "\n", + "\n", + "~:0:<method 'strip' of 'str' objects> (1,140 samples, 1.08%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:308:apply (1,499 samples, 1.42%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:96:arrays_to_mgr (645 samples, 0.61%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:478:__new__ (176 samples, 0.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:56:_astype_nansafe (1,326 samples, 1.26%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:478:__new__ (128 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:3776:_ixs (471 samples, 0.45%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:506:nested_data_to_arrays (106 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/string.py:34:_get_strcols (4,824 samples, 4.57%)\n", + "/home..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:371:__init__ (451 samples, 0.43%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/missing.py:101:isna (121 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:1396:format (130 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method pandas._libs.lib.ensure_string_array> (1,094 samples, 1.04%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/string.py:119:_join_multiline (363 samples, 0.34%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:791:array (175 samples, 0.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1332:_format_strings (1,229 samples, 1.16%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/range.py:902:_concat (141 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2068:create_block_manager_from_column_arrays (1,111 samples, 1.05%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:915:<genexpr> (3,098 samples, 2.93%)\n", + "/h..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/blocks.py:588:astype (1,476 samples, 1.40%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2224:_merge_blocks (458 samples, 0.43%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:506:nested_data_to_arrays (2,329 samples, 2.21%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/blocks.py:588:astype (6,092 samples, 5.77%)\n", + "/home/m..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1049:close (137 samples, 0.13%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/construction.py:519:sanitize_array (181 samples, 0.17%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:978:_getitem_tuple_same_dim (145 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method pandas._libs.lib.ensure_string_array> (206 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:1392:items (557 samples, 0.53%)\n", + "\n", + "\n", + "\n", + "<__array_function__ internals>:177:vstack (150 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "~:0:<pandas._libs.lib.map_infer> (143 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/printing.py:193:as_escaped_string (150 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:1744:_consolidate_inplace (966 samples, 0.92%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method io.open> (864 samples, 0.82%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:1229:to_string (6,733 samples, 6.38%)\n", + "/home/ma..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:158:astype_array (5,516 samples, 5.23%)\n", + "/home/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:6094:_reduce (158 samples, 0.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:4384:_box_col_values (195 samples, 0.18%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1909:_make_fixed_width (246 samples, 0.23%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1006:_getitem_lowerdim (493 samples, 0.47%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method builtins.print> (7,026 samples, 6.66%)\n", + "~:0:<buil..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/common.py:1268:is_extension_array_dtype (271 samples, 0.26%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'astype' of 'numpy.ndarray' objects> (190 samples, 0.18%)\n", + "\n", + "\n", + "\n", + "~:0:<pandas._libs.lib.memory_usage_of_objects> (10,563 samples, 10.01%)\n", + "~:0:<pandas._l..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1909:_make_fixed_width (515 samples, 0.49%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/series.py:371:__init__ (150 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "~:0:<method 'astype' of 'numpy.ndarray' objects> (784 samples, 0.74%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2194:_stack_arrays (548 samples, 0.52%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2137:_form_blocks (604 samples, 0.57%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:793:to_arrays (2,323 samples, 2.20%)\n", + "/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1938:<listcomp> (122 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/blocks.py:588:astype (278 samples, 0.26%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:773:_concat_indexes (265 samples, 0.25%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:991:iget (128 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/arrays/numpy_.py:98:__init__ (114 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:721:to_datetime (199 samples, 0.19%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method numpy.core._multiarray_umath.implement_array_function> (280 samples, 0.27%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:308:apply (6,187 samples, 5.86%)\n", + "/home/m..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:1396:format (151 samples, 0.14%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1006:_getitem_lowerdim (173 samples, 0.16%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:12031:_min_count_stat_function (173 samples, 0.16%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/numpy/core/shape_base.py:223:vstack (216 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "<__array_function__ internals>:177:vstack (288 samples, 0.27%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:1123:__repr__ (6,884 samples, 6.52%)\n", + "/home/ma..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:4384:_box_col_values (235 samples, 0.22%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:1396:format (155 samples, 0.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:1744:_consolidate_inplace (129 samples, 0.12%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:1068:<listcomp> (161 samples, 0.15%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:980:read (68,680 samples, 65.06%)\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:980:read\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:423:dict_to_mgr (2,742 samples, 2.60%)\n", + "/h..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/string.py:67:_insert_dot_separators (270 samples, 0.26%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/formats/format.py:1328:get_result (1,829 samples, 1.73%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/common.py:652:get_handle (988 samples, 0.94%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:896:_preprocess_data (11,571 samples, 10.96%)\n", + "/home/marius/ana..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:713:_get_concat_axis (275 samples, 0.26%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:369:_convert_listlike_datetimes (269 samples, 0.25%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:56:_astype_nansafe (5,471 samples, 5.18%)\n", + "/home/..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2068:create_block_manager_from_column_arrays (550 samples, 0.52%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/reshape/concat.py:698:<listcomp> (121 samples, 0.11%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/base.py:510:find (107 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:2224:_merge_blocks (226 samples, 0.21%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1422:_try_convert_types (13,777 samples, 13.05%)\n", + "/home/marius/anacon..\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py:1744:_consolidate_inplace (248 samples, 0.23%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:158:astype_array (252 samples, 0.24%)\n", + "\n", + "\n", + "\n", + "<string>:1:<module> (105,529 samples, 99.97%)\n", + "<string>:1:<module>\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1139:__getitem__ (368 samples, 0.35%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/io/json/_json.py:1432:_try_convert_dates (582 samples, 0.55%)\n", + "\n", + "\n", + "\n", + "/home/marius/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:793:to_arrays (106 samples, 0.10%)\n", + "\n", + "\n", + "\n", + "~:0:<built-in method numpy.core._multiarray_umath.implement_array_function> (207 samples, 0.20%)\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Load the JSON file into a Pandas DataFrame\n", - "pd_df = pd.read_json(file_path, lines=True)\n", - "pd_memory_usage = pd_df.memory_usage(deep=True).sum()\n", - "\n", - "# Get the number of rows in the Pandas DataFrame\n", - "num_rows_pandas = pd_df.shape[0]\n", - "\n", - "print(pd_df)\n", - "\n", - "print(f\"Pandas DataFarme number of rows: {num_rows_pandas}\")\n", - "print(f\"Pandas DataFrame memory usage: {pd_memory_usage / (1024 ** 2):.2f} MB\")" + "%%flame -q --inverted\n", + "test_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "4d47d7ec-d3f1-4fac-9933-ec330651a6f4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " @timestamp host.hostname host.ip \\\n", + "0 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n", + "1 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n", + "2 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n", + "3 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n", + "4 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n", + "... ... ... ... \n", + "7995 2024-05-15T16:10:07.128Z win10 fe80::24b4:3691:44a6:38a1 \n", + "7996 2024-05-15T16:10:07.136Z win10 fe80::24b4:3691:44a6:38a1 \n", + "7997 2024-05-15T16:10:07.136Z win10 fe80::24b4:3691:44a6:38a1 \n", + "7998 2024-05-15T16:10:07.149Z win10 fe80::24b4:3691:44a6:38a1 \n", + "7999 2024-05-15T16:10:07.149Z win10 fe80::24b4:3691:44a6:38a1 \n", + "\n", + " log.level winlog.event_id winlog.task \\\n", + "0 information 13 Registry value set (rule: RegistryEvent) \n", + "1 information 13 Registry value set (rule: RegistryEvent) \n", + "2 information 13 Registry value set (rule: RegistryEvent) \n", + "3 information 13 Registry value set (rule: RegistryEvent) \n", + "4 information 13 Registry value set (rule: RegistryEvent) \n", + "... ... ... ... \n", + "7995 information 4663 Removable Storage \n", + "7996 information 4663 Removable Storage \n", + "7997 information 4663 Removable Storage \n", + "7998 information 4663 Removable Storage \n", + "7999 information 4663 Removable Storage \n", + "\n", + " message \n", + "0 Registry value set:\\nRuleName: InvDB-Ver\\nEven... \n", + "1 Registry value set:\\nRuleName: InvDB-Path\\nEve... \n", + "2 Registry value set:\\nRuleName: InvDB-Pub\\nEven... \n", + "3 Registry value set:\\nRuleName: InvDB-CompileTi... \n", + "4 Registry value set:\\nRuleName: InvDB-Ver\\nEven... \n", + "... ... \n", + "7995 An attempt was made to access an object.\\n\\nSu... \n", + "7996 An attempt was made to access an object.\\n\\nSu... \n", + "7997 An attempt was made to access an object.\\n\\nSu... \n", + "7998 An attempt was made to access an object.\\n\\nSu... \n", + "7999 An attempt was made to access an object.\\n\\nSu... \n", + "\n", + "[8000 rows x 7 columns]\n", + "Pandas DataFarme number of rows: 8000\n", + "Pandas DataFrame memory usage: 7.56 MB\n", + " " + ] + }, + { + "data": { + "text/plain": [ + " 46681 function calls (46472 primitive calls) in 0.113 seconds\n", + "\n", + " Ordered by: internal time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 1 0.029 0.029 0.029 0.029 {built-in method pandas._libs.json.ujson_loads}\n", + " 6 0.010 0.002 0.010 0.002 {pandas._libs.lib.memory_usage_of_objects}\n", + " 1 0.009 0.009 0.012 0.012 {method 'read' of '_io.TextIOWrapper' objects}\n", + " 8001 0.005 0.000 0.005 0.000 construction.py:915()\n", + " 1 0.005 0.005 0.005 0.005 {pandas._libs.lib.dicts_to_array}\n", + " 94 0.004 0.000 0.004 0.000 {method 'split' of 'str' objects}\n", + " 1 0.003 0.003 0.009 0.009 {pandas._libs.lib.fast_unique_multiple_list_gen}\n", + " 1 0.003 0.003 0.010 0.010 _json.py:960(_combine_lines)\n", + " 1 0.003 0.003 0.003 0.003 {built-in method _codecs.utf_8_decode}\n", + " 64/8 0.003 0.000 0.003 0.000 {method 'join' of 'str' objects}\n", + " 1 0.003 0.003 0.113 0.113 :1()\n", + " 1 0.002 0.002 0.049 0.049 _json.py:1360(_parse)\n", + " 6 0.002 0.000 0.002 0.000 {built-in method pandas._libs.lib.ensure_string_array}\n", + " 4 0.002 0.000 0.002 0.001 managers.py:2194(_stack_arrays)\n", + " 8002 0.002 0.000 0.003 0.000 _json.py:965()\n", + " 1 0.002 0.002 0.002 0.002 construction.py:922()\n", + " 1 0.001 0.001 0.004 0.004 _json.py:965()\n", + " 1 0.001 0.001 0.091 0.091 _json.py:500(read_json)\n", + " 8001 0.001 0.000 0.001 0.000 {method 'strip' of 'str' objects}\n", + " 1 0.001 0.001 0.001 0.001 {built-in method io.open}\n", + " 9 0.001 0.000 0.001 0.000 {method 'astype' of 'numpy.ndarray' objects}\n", + " 2 0.001 0.001 0.002 0.001 managers.py:2224(_merge_blocks)\n", + " 53/51 0.001 0.000 0.001 0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}\n", + " 1 0.001 0.001 0.010 0.010 _json.py:1422(_try_convert_types)\n", + "2422/2396 0.001 0.000 0.001 0.000 {built-in method builtins.isinstance}\n", + " 8001 0.001 0.000 0.001 0.000 {method 'keys' of 'dict' objects}\n", + " 32 0.001 0.000 0.001 0.000 generic.py:6147(__finalize__)\n", + " 1 0.001 0.001 0.001 0.001 {built-in method posix.stat}\n", + " 2 0.000 0.000 0.002 0.001 _json.py:1282(_try_convert_to_date)\n", + " 2 0.000 0.000 0.005 0.003 construction.py:96(arrays_to_mgr)\n", + " 98 0.000 0.000 0.000 0.000 generic.py:6206(__setattr__)\n", + " 24 0.000 0.000 0.001 0.000 base.py:510(find)\n", + "1415/1290 0.000 0.000 0.000 0.000 {built-in method builtins.len}\n", + " 29 0.000 0.000 0.001 0.000 {pandas._libs.lib.maybe_convert_objects}\n", + " 1 0.000 0.000 0.016 0.016 construction.py:793(to_arrays)\n", + " 21 0.000 0.000 0.000 0.000 managers.py:991(iget)\n", + " 661 0.000 0.000 0.000 0.000 format.py:428(len)\n", + " 1 0.000 0.000 0.000 0.000 socket.py:543(send)\n", + " 21 0.000 0.000 0.001 0.000 common.py:1587(pandas_dtype)\n", + " 47 0.000 0.000 0.000 0.000 {built-in method numpy.empty}\n", + " 93 0.000 0.000 0.000 0.000 config.py:127(_get_single_key)\n", + " 6 0.000 0.000 0.001 0.000 format.py:1332(_format_strings)\n", + " 198 0.000 0.000 0.000 0.000 base.py:236(construct_from_string)\n", + " 41 0.000 0.000 0.000 0.000 generic.py:274(__init__)\n", + " 5 0.000 0.000 0.001 0.000 base.py:478(__new__)\n", + " 19 0.000 0.000 0.001 0.000 construction.py:519(sanitize_array)\n", + " 7 0.000 0.000 0.001 0.000 series.py:371(__init__)\n", + " 18 0.000 0.000 0.000 0.000 {method 'reduce' of 'numpy.ufunc' objects}\n", + " 67 0.000 0.000 0.000 0.000 printing.py:162(pprint_thing)\n", + " 60 0.000 0.000 0.001 0.000 format.py:1355(_format)\n", + " 4 0.000 0.000 0.000 0.000 {pandas._libs.tslib.array_with_unit_to_datetime}\n", + " 63 0.000 0.000 0.000 0.000 base.py:5350(__getitem__)\n", + " 89 0.000 0.000 0.001 0.000 config.py:145(_get_option)\n", + " 91 0.000 0.000 0.000 0.000 config.py:633(_get_root)\n", + " 67 0.000 0.000 0.000 0.000 printing.py:193(as_escaped_string)\n", + " 182 0.000 0.000 0.000 0.000 config.py:647(_get_deprecated_option)\n", + " 212 0.000 0.000 0.000 0.000 generic.py:42(_instancecheck)\n", + " 6 0.000 0.000 0.000 0.000 {pandas._libs.lib.map_infer}\n", + " 212 0.000 0.000 0.000 0.000 generic.py:37(_check)\n", + " 21 0.000 0.000 0.001 0.000 frame.py:4402(_get_item_cache)\n", + " 1 0.000 0.000 0.004 0.004 format.py:843(_get_strcols_without_index)\n", + " 21 0.000 0.000 0.001 0.000 frame.py:3776(_ixs)\n", + " 35 0.000 0.000 0.000 0.000 numeric.py:290(full)\n", + " 18 0.000 0.000 0.001 0.000 format.py:1909(_make_fixed_width)\n", + " 2 0.000 0.000 0.002 0.001 managers.py:2137(_form_blocks)\n", + " 9 0.000 0.000 0.002 0.000 format.py:1217(format_array)\n", + " 10 0.000 0.000 0.003 0.000 astype.py:56(_astype_nansafe)\n", + " 61 0.000 0.000 0.000 0.000 {built-in method builtins.max}\n", + " 180 0.000 0.000 0.000 0.000 format.py:1932(just)\n", + " 14 0.000 0.000 0.000 0.000 base.py:5300(__contains__)\n", + " 15 0.000 0.000 0.000 0.000 {built-in method numpy.array}\n", + " 67 0.000 0.000 0.000 0.000 inference.py:373(is_sequence)\n", + " 9 0.000 0.000 0.001 0.000 indexing.py:1006(_getitem_lowerdim)\n", + " 2 0.000 0.000 0.021 0.011 frame.py:665(__init__)\n", + " 198 0.000 0.000 0.000 0.000 format.py:1923()\n", + " 21 0.000 0.000 0.001 0.000 frame.py:4384(_box_col_values)\n", + " 73 0.000 0.000 0.000 0.000 missing.py:184(_isna)\n", + " 24 0.000 0.000 0.000 0.000 warnings.py:466(__enter__)\n", + " 24 0.000 0.000 0.001 0.000 frame.py:1392(items)\n", + " 88 0.000 0.000 0.001 0.000 config.py:271(__call__)\n", + " 7 0.000 0.000 0.003 0.000 format.py:890(format_col)\n", + " 301 0.000 0.000 0.000 0.000 {built-in method builtins.getattr}\n", + " 24 0.000 0.000 0.000 0.000 warnings.py:181(_add_filter)\n", + " 2 0.000 0.000 0.000 0.000 {pandas._libs.lib.array_equivalent_object}\n", + " 17 0.000 0.000 0.000 0.000 cast.py:1147(maybe_infer_to_datetimelike)\n", + " 9 0.000 0.000 0.007 0.001 _json.py:1204(_try_convert_data)\n", + " 7 0.000 0.000 0.004 0.001 managers.py:308(apply)\n", + " 5 0.000 0.000 0.000 0.000 printing.py:28(adjoin)\n", + " 18 0.000 0.000 0.000 0.000 format.py:1938()\n", + " 56 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_list_like}\n", + " 48 0.000 0.000 0.000 0.000 {built-in method numpy.asarray}\n", + " 62 0.000 0.000 0.000 0.000 {built-in method builtins.all}\n", + " 9 0.000 0.000 0.001 0.000 indexing.py:1139(__getitem__)\n", + " 2 0.000 0.000 0.010 0.005 _json.py:1396(_process_converter)\n", + " 89 0.000 0.000 0.000 0.000 config.py:686(_warn_if_deprecated)\n", + " 35 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(copyto)\n", + " 15 0.000 0.000 0.000 0.000 blocks.py:2388(new_block)\n", + " 15 0.000 0.000 0.000 0.000 inference.py:273(is_dict_like)\n", + " 6 0.000 0.000 0.000 0.000 base.py:836(__iter__)\n", + " 7 0.000 0.000 0.010 0.001 base.py:1135(_memory_usage)\n", + " 24 0.000 0.000 0.000 0.000 {method 'remove' of 'list' objects}\n", + " 20 0.000 0.000 0.000 0.000 printing.py:65()\n", + " 7 0.000 0.000 0.000 0.000 blocks.py:247(make_block)\n", + " 60 0.000 0.000 0.000 0.000 {built-in method _abc._abc_instancecheck}\n", + " 8 0.000 0.000 0.000 0.000 managers.py:1825(from_array)\n", + " 201 0.000 0.000 0.000 0.000 {method 'replace' of 'str' objects}\n", + " 28 0.000 0.000 0.000 0.000 printing.py:69()\n", + " 2 0.000 0.000 0.001 0.000 concat.py:618(get_result)\n", + " 41 0.000 0.000 0.000 0.000 flags.py:53(__init__)\n", + " 1 0.000 0.000 0.000 0.000 construction.py:928(_finalize_columns_and_data)\n", + " 1 0.000 0.000 0.110 0.110 1231667944.py:1(test_pandas)\n", + " 9 0.000 0.000 0.001 0.000 indexing.py:1651(_getitem_tuple)\n", + " 40 0.000 0.000 0.000 0.000 {built-in method builtins.any}\n", + " 31 0.000 0.000 0.000 0.000 generic.py:562(_get_axis)\n", + " 93 0.000 0.000 0.000 0.000 config.py:674(_translate_key)\n", + " 16 0.000 0.000 0.000 0.000 format.py:903(_get_formatter)\n", + " 14 0.000 0.000 0.000 0.000 numpy_.py:98(__init__)\n", + " 93 0.000 0.000 0.000 0.000 config.py:615(_select_options)\n", + " 32 0.000 0.000 0.000 0.000 managers.py:1960(internal_values)\n", + " 3 0.000 0.000 0.000 0.000 {method '_rebuild_blknos_and_blklocs' of 'pandas._libs.internals.BlockManager' objects}\n", + " 10 0.000 0.000 0.003 0.000 astype.py:158(astype_array)\n", + " 32 0.000 0.000 0.000 0.000 generic.py:335(_from_mgr)\n", + " 16 0.000 0.000 0.000 0.000 blocks.py:2317(maybe_coerce_values)\n", + " 60 0.000 0.000 0.000 0.000 __init__.py:33(using_copy_on_write)\n", + " 14 0.000 0.000 0.000 0.000 {method 'get_loc' of 'pandas._libs.index.IndexEngine' objects}\n", + " 6 0.000 0.000 0.000 0.000 {built-in method pandas._libs.missing.isnaobj}\n", + " 7 0.000 0.000 0.005 0.001 generic.py:6368(astype)\n", + " 72 0.000 0.000 0.000 0.000 base.py:909(__len__)\n", + " 2 0.000 0.000 0.000 0.000 construction.py:596(_homogenize)\n", + " 48 0.000 0.000 0.000 0.000 printing.py:60(justify)\n", + " 1 0.000 0.000 0.000 0.000 {method 'close' of '_io.TextIOWrapper' objects}\n", + " 7 0.000 0.000 0.004 0.001 blocks.py:588(astype)\n", + " 1 0.000 0.000 0.000 0.000 concat.py:94(concatenate_managers)\n", + " 9 0.000 0.000 0.001 0.000 indexing.py:1681(_getitem_axis)\n", + " 21 0.000 0.000 0.000 0.000 frame.py:654(_constructor_sliced_from_mgr)\n", + " 48 0.000 0.000 0.000 0.000 format.py:431(justify)\n", + " 73 0.000 0.000 0.000 0.000 missing.py:101(isna)\n", + " 140 0.000 0.000 0.000 0.000 {built-in method builtins.hasattr}\n", + " 4 0.000 0.000 0.001 0.000 datetimes.py:721(to_datetime)\n", + " 6 0.000 0.000 0.000 0.000 iostream.py:610(write)\n", + " 19 0.000 0.000 0.001 0.000 base.py:7521(ensure_index)\n", + " 20 0.000 0.000 0.000 0.000 blocks.py:2346(get_block_type)\n", + " 63 0.000 0.000 0.000 0.000 common.py:149(cast_scalar_indexer)\n", + " 142 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_scalar}\n", + " 14 0.000 0.000 0.000 0.000 managers.py:1949(dtype)\n", + " 2 0.000 0.000 0.000 0.000 {method 'get_slice' of 'pandas._libs.internals.BlockManager' objects}\n", + " 136 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n", + " 1 0.000 0.000 0.015 0.015 construction.py:891(_list_of_dict_to_arrays)\n", + " 24 0.000 0.000 0.000 0.000 warnings.py:487(__exit__)\n", + " 9 0.000 0.000 0.000 0.000 indexing.py:931(_validate_tuple_indexer)\n", + " 21 0.000 0.000 0.000 0.000 series.py:1372(_set_as_cached)\n", + " 7 0.000 0.000 0.000 0.000 _dtype.py:344(_name_get)\n", + " 7 0.000 0.000 0.000 0.000 missing.py:261(_isna_array)\n", + " 18 0.000 0.000 0.000 0.000 dtypes.py:1266(construct_from_string)\n", + " 7 0.000 0.000 0.000 0.000 warnings.py:130(filterwarnings)\n", + " 14 0.000 0.000 0.000 0.000 base.py:3763(get_loc)\n", + " 6 0.000 0.000 0.000 0.000 missing.py:380(notna)\n", + " 30 0.000 0.000 0.000 0.000 format.py:1617()\n", + " 27 0.000 0.000 0.000 0.000 construction.py:485(ensure_wrapped_if_datetimelike)\n", + " 3 0.000 0.000 0.000 0.000 base.py:1418(_format_with_header)\n", + " 23 0.000 0.000 0.000 0.000 {method 'match' of 're.Pattern' objects}\n", + " 18 0.000 0.000 0.000 0.000 dtypes.py:814(construct_from_string)\n", + " 9 0.000 0.000 0.000 0.000 indexing.py:1614(_is_scalar_access)\n", + " 18 0.000 0.000 0.000 0.000 dtypes.py:332(construct_from_string)\n", + " 66 0.000 0.000 0.000 0.000 {built-in method pandas._libs.missing.checknull}\n", + " 10 0.000 0.000 0.000 0.000 format.py:479(get_adjustment)\n", + " 7 0.000 0.000 0.000 0.000 base.py:649(_simple_new)\n", + " 217 0.000 0.000 0.000 0.000 {method 'ljust' of 'str' objects}\n", + " 1 0.000 0.000 0.075 0.075 _json.py:980(read)\n", + " 16 0.000 0.000 0.000 0.000 common.py:137(is_object_dtype)\n", + " 32 0.000 0.000 0.000 0.000 flags.py:89(allows_duplicate_labels)\n", + " 24 0.000 0.000 0.000 0.000 warnings.py:440(__init__)\n", + " 1 0.000 0.000 0.113 0.113 {built-in method builtins.exec}\n", + " 18 0.000 0.000 0.000 0.000 dtypes.py:1021(construct_from_string)\n", + " 9 0.000 0.000 0.002 0.000 format.py:1328(get_result)\n", + " 1 0.000 0.000 0.000 0.000 string.py:119(_join_multiline)\n", + " 3 0.000 0.000 0.000 0.000 _strptime.py:309(_strptime)\n", + " 30 0.000 0.000 0.000 0.000 construction.py:420(extract_array)\n", + " 21 0.000 0.000 0.000 0.000 blocks.py:1007(iget)\n", + " 1 0.000 0.000 0.012 0.012 frame.py:3471(memory_usage)\n", + " 76 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_np_dtype}\n", + " 77 0.000 0.000 0.000 0.000 format.py:884()\n", + " 10 0.000 0.000 0.000 0.000 generic.py:760(_set_axis)\n", + " 18 0.000 0.000 0.000 0.000 indexing.py:1536(_validate_key)\n", + " 17 0.000 0.000 0.000 0.000 warnings.py:165(simplefilter)\n", + " 4 0.000 0.000 0.000 0.000 _asarray.py:31(require)\n", + " 25 0.000 0.000 0.000 0.000 common.py:1425(_is_dtype_type)\n", + " 6 0.000 0.000 0.000 0.000 concat.py:322(_get_block_for_concat_plan)\n", + " 60 0.000 0.000 0.000 0.000 :117(__instancecheck__)\n", + " 7 0.000 0.000 0.004 0.001 astype.py:192(astype_array_safe)\n", + " 25 0.000 0.000 0.000 0.000 common.py:96(is_bool_indexer)\n", + " 1 0.000 0.000 0.001 0.001 common.py:652(get_handle)\n", + " 7 0.000 0.000 0.000 0.000 construction.py:1028(convert)\n", + " 157 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n", + " 2 0.000 0.000 0.000 0.000 concat.py:403(__init__)\n", + " 9 0.000 0.000 0.000 0.000 indexing.py:2678(check_dict_or_set_indexers)\n", + " 2 0.000 0.000 0.000 0.000 base.py:1683(_validate_names)\n", + " 3 0.000 0.000 0.000 0.000 cast.py:119(maybe_convert_platform)\n", + " 3 0.000 0.000 0.000 0.000 {built-in method numpy.arange}\n", + " 14 0.000 0.000 0.000 0.000 managers.py:1964(array_values)\n", + " 103 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_integer}\n", + " 4 0.000 0.000 0.001 0.000 base.py:1038(astype)\n", + " 7 0.000 0.000 0.000 0.000 string.py:129()\n", + " 1 0.000 0.000 0.062 0.062 _json.py:1022(_get_object_parser)\n", + " 1 0.000 0.000 0.000 0.000 concat.py:296(_get_combined_plan)\n", + " 47 0.000 0.000 0.000 0.000 range.py:963(__len__)\n", + " 2 0.000 0.000 0.000 0.000 parse.py:374(urlparse)\n", + " 6 0.000 0.000 0.000 0.000 concat.py:389(is_na)\n", + " 39 0.000 0.000 0.000 0.000 inference.py:334(is_hashable)\n", + " 6 0.000 0.000 0.000 0.000 fromnumeric.py:69(_wrapreduction)\n", + " 7 0.000 0.000 0.004 0.001 managers.py:405(astype)\n", + " 2 0.000 0.000 0.000 0.000 format.py:956(_get_formatted_index)\n", + " 3 0.000 0.000 0.000 0.000 cast.py:1544(construct_1d_object_array_from_listlike)\n", + " 8 0.000 0.000 0.000 0.000 range.py:198(_simple_new)\n", + " 9 0.000 0.000 0.000 0.000 common.py:1066(is_numeric_dtype)\n", + " 5 0.000 0.000 0.000 0.000 format.py:434(adjoin)\n", + " 4 0.000 0.000 0.001 0.000 datetimes.py:216(_maybe_cache)\n", + " 49 0.000 0.000 0.000 0.000 {built-in method __new__ of type object at 0x860f60}\n", + " 36 0.000 0.000 0.000 0.000 managers.py:1799(__init__)\n", + " 1 0.000 0.000 0.000 0.000 format.py:915(_get_formatted_column_labels)\n", + " 2 0.000 0.000 0.000 0.000 generic.py:4296(_slice)\n", + " 9 0.000 0.000 0.000 0.000 series.py:653(name)\n", + " 18 0.000 0.000 0.000 0.000 string_.py:135(construct_from_string)\n", + " 8 0.000 0.000 0.000 0.000 series.py:581(_constructor_from_mgr)\n", + " 6 0.000 0.000 0.000 0.000 __init__.py:272(_compile)\n", + " 61 0.000 0.000 0.000 0.000 printing.py:57()\n", + " 4 0.000 0.000 0.000 0.000 datetimes.py:526(_to_datetime_with_unit)\n", + " 2 0.000 0.000 0.004 0.002 managers.py:2068(create_block_manager_from_column_arrays)\n", + " 27 0.000 0.000 0.000 0.000 indexing.py:1144()\n", + " 2 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_all_arraylike}\n", + " 10 0.000 0.000 0.000 0.000 base.py:73(_validate_set_axis)\n", + " 32 0.000 0.000 0.000 0.000 series.py:750(_values)\n", + " 154 0.000 0.000 0.000 0.000 {method 'rjust' of 'str' objects}\n", + " 18 0.000 0.000 0.000 0.000 dtypes.py:2180(construct_from_string)\n", + " 14 0.000 0.000 0.000 0.000 indexing.py:1629(_validate_integer)\n", + " 6 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}\n", + " 1 0.000 0.000 0.011 0.011 frame.py:3561()\n", + " 5 0.000 0.000 0.000 0.000 base.py:574(_ensure_array)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:666(_parse)\n", + " 1 0.000 0.000 0.007 0.007 frame.py:1229(to_string)\n", + " 72 0.000 0.000 0.000 0.000 {built-in method _warnings._filters_mutated}\n", + " 71 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}\n", + " 21 0.000 0.000 0.000 0.000 frame.py:651(_sliced_from_mgr)\n", + " 1 0.000 0.000 0.000 0.000 {built-in method _operator.gt}\n", + " 1 0.000 0.000 0.000 0.000 string.py:89(_insert_dot_separator_vertical)\n", + " 4 0.000 0.000 0.000 0.000 _asarray.py:112()\n", + " 1 0.000 0.000 0.003 0.003 :319(decode)\n", + " 3 0.000 0.000 0.000 0.000 format.py:1619()\n", + " 40 0.000 0.000 0.000 0.000 inference.py:300()\n", + " 7 0.000 0.000 0.000 0.000 common.py:1268(is_extension_array_dtype)\n", + " 5 0.000 0.000 0.000 0.000 cast.py:1483(construct_1d_arraylike_from_scalar)\n", + " 14 0.000 0.000 0.000 0.000 blocks.py:2241(array_values)\n", + " 3 0.000 0.000 0.000 0.000 _parser.py:77(get_token)\n", + " 13 0.000 0.000 0.000 0.000 common.py:296(maybe_iterable_to_list)\n", + " 7 0.000 0.000 0.000 0.000 managers.py:1812(from_blocks)\n", + " 18 0.000 0.000 0.000 0.000 dtypes.py:1789(construct_from_string)\n", + " 1 0.000 0.000 0.000 0.000 array_ops.py:290(comparison_op)\n", + " 38 0.000 0.000 0.000 0.000 generic.py:548(_get_axis_number)\n", + " 5 0.000 0.000 0.000 0.000 base.py:69(shape)\n", + " 48 0.000 0.000 0.000 0.000 {method 'startswith' of 'str' objects}\n", + " 14 0.000 0.000 0.000 0.000 dtypes.py:1407(__init__)\n", + " 9 0.000 0.000 0.000 0.000 numerictypes.py:356(issubdtype)\n", + " 12 0.000 0.000 0.000 0.000 base.py:7616(maybe_extract_name)\n", + " 1 0.000 0.000 0.000 0.000 expressions.py:95(_evaluate_numexpr)\n", + " 1 0.000 0.000 0.000 0.000 iostream.py:243(schedule)\n", + " 1 0.000 0.000 0.000 0.000 range.py:902(_concat)\n", + " 14 0.000 0.000 0.000 0.000 construction.py:695(_sanitize_ndim)\n", + " 7 0.000 0.000 0.000 0.000 _json.py:1442(is_ok)\n", + " 1 0.000 0.000 0.000 0.000 numeric.py:2407(array_equal)\n", + " 7 0.000 0.000 0.000 0.000 series.py:703(name)\n", + " 10 0.000 0.000 0.000 0.000 common.py:1322(is_ea_or_datetimelike_dtype)\n", + " 2 0.000 0.000 0.000 0.000 config.py:153(_set_option)\n", + " 1 0.000 0.000 0.014 0.014 _json.py:816(__init__)\n", + " 4 0.000 0.000 0.000 0.000 blocks.py:297(slice_block_columns)\n", + " 1 0.000 0.000 0.000 0.000 blocks.py:2375(new_block_2d)\n", + " 8 0.000 0.000 0.001 0.000 <__array_function__ internals>:177(concatenate)\n", + " 1 0.000 0.000 0.007 0.007 frame.py:1123(__repr__)\n", + " 10 0.000 0.000 0.000 0.000 common.py:1562(validate_all_hashable)\n", + " 1 0.000 0.000 0.002 0.002 managers.py:2207(_consolidate)\n", + " 9 0.000 0.000 0.000 0.000 indexing.py:948(_is_nested_tuple_indexer)\n", + " 1 0.000 0.000 0.001 0.001 format.py:564(__init__)\n", + " 10 0.000 0.000 0.000 0.000 managers.py:225(set_axis)\n", + " 2 0.000 0.000 0.000 0.000 common.py:1155(_is_binary_mode)\n", + " 42 0.000 0.000 0.000 0.000 base.py:5127(_values)\n", + " 3 0.000 0.000 0.000 0.000 range.py:234(_data)\n", + " 18 0.000 0.000 0.000 0.000 common.py:367(apply_if_callable)\n", + " 1 0.000 0.000 0.000 0.000 common.py:228(asarray_tuplesafe)\n", + " 2 0.000 0.000 0.000 0.000 indexing.py:978(_getitem_tuple_same_dim)\n", + " 1 0.000 0.000 0.000 0.000 {pandas._libs.internals.get_concat_blkno_indexers}\n", + " 4 0.000 0.000 0.000 0.000 datetimes.py:369(_convert_listlike_datetimes)\n", + " 35 0.000 0.000 0.000 0.000 {method 'format' of 'str' objects}\n", + " 9 0.000 0.000 0.000 0.000 blocks.py:2467(extend_blocks)\n", + " 17 0.000 0.000 0.000 0.000 __init__.py:43(using_pyarrow_string_dtype)\n", + " 1 0.000 0.000 0.012 0.012 _json.py:896(_preprocess_data)\n", + " 1 0.000 0.000 0.000 0.000 cast.py:1569(maybe_cast_to_integer_array)\n", + " 1 0.000 0.000 0.000 0.000 cast.py:774(infer_dtype_from_scalar)\n", + " 2 0.000 0.000 0.000 0.000 base.py:5519(equals)\n", + " 3 0.000 0.000 0.007 0.002 {built-in method builtins.print}\n", + " 6 0.000 0.000 0.000 0.000 missing.py:305(_isna_string_dtype)\n", + " 5 0.000 0.000 0.000 0.000 api.py:379(default_index)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:62(__init__)\n", + " 1 0.000 0.000 0.004 0.004 construction.py:423(dict_to_mgr)\n", + " 1 0.000 0.000 0.002 0.002 _json.py:1185(_convert_axes)\n", + " 6 0.000 0.000 0.000 0.000 fromnumeric.py:2432(all)\n", + " 1 0.000 0.000 0.000 0.000 common.py:289(_get_filepath_or_buffer)\n", + " 2 0.000 0.000 0.001 0.001 concat.py:157(concat)\n", + " 53 0.000 0.000 0.000 0.000 {built-in method builtins.hash}\n", + " 1 0.000 0.000 0.000 0.000 base.py:2293(is_unique)\n", + " 2 0.000 0.000 0.000 0.000 base.py:5422(append)\n", + " 2 0.000 0.000 0.000 0.000 _json.py:1049(close)\n", + " 36 0.000 0.000 0.000 0.000 {method 'insert' of 'list' objects}\n", + " 1 0.000 0.000 0.000 0.000 common.py:538(infer_compression)\n", + " 7 0.000 0.000 0.010 0.001 series.py:5223(memory_usage)\n", + " 3 0.000 0.000 0.000 0.000 concat.py:572(_is_uniform_join_units)\n", + " 6 0.000 0.000 0.000 0.000 managers.py:2212()\n", + " 7 0.000 0.000 0.000 0.000 generic.py:6189(__getattr__)\n", + " 18 0.000 0.000 0.000 0.000 indexing.py:2651(is_label_like)\n", + " 13 0.000 0.000 0.000 0.000 blocks.py:187(is_extension)\n", + " 1 0.000 0.000 0.000 0.000 base.py:1427()\n", + " 5 0.000 0.000 0.000 0.000 common.py:173(_expand_user)\n", + " 1 0.000 0.000 0.002 0.002 _json.py:912(_get_data_from_filepath)\n", + " 9 0.000 0.000 0.000 0.000 common.py:131()\n", + " 2 0.000 0.000 0.000 0.000 common.py:121(close)\n", + " 1 0.000 0.000 0.000 0.000 construction.py:487()\n", + " 7 0.000 0.000 0.000 0.000 {method 'add_index_reference' of 'pandas._libs.internals.BlockValuesRefs' objects}\n", + " 9 0.000 0.000 0.000 0.000 format.py:1300(__init__)\n", + " 2 0.000 0.000 0.000 0.000 concat.py:492(_clean_keys_and_objs)\n", + " 3 0.000 0.000 0.000 0.000 blocks.py:198(_consolidate_key)\n", + " 1 0.000 0.000 0.000 0.000 string.py:128()\n", + " 1 0.000 0.000 0.005 0.005 format.py:1077(to_string)\n", + " 1 0.000 0.000 0.000 0.000 range.py:489(copy)\n", + " 18 0.000 0.000 0.000 0.000 numerictypes.py:282(issubclass_)\n", + " 24 0.000 0.000 0.000 0.000 managers.py:169(blknos)\n", + " 14 0.000 0.000 0.000 0.000 construction.py:734(_sanitize_str_dtypes)\n", + " 7 0.000 0.000 0.000 0.000 frame.py:1539(__len__)\n", + " 9 0.000 0.000 0.000 0.000 concat.py:597()\n", + " 66 0.000 0.000 0.000 0.000 generic.py:393(flags)\n", + " 9 0.000 0.000 0.000 0.000 common.py:514(is_string_or_object_np_dtype)\n", + " 63 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_float}\n", + " 34 0.000 0.000 0.000 0.000 {method 'endswith' of 'str' objects}\n", + " 2 0.000 0.000 0.000 0.000 concat.py:478(_get_ndims)\n", + " 68 0.000 0.000 0.000 0.000 {built-in method builtins.iter}\n", + " 1 0.000 0.000 0.062 0.062 _json.py:1172(parse)\n", + " 1 0.000 0.000 0.000 0.000 base.py:842(_engine)\n", + " 20 0.000 0.000 0.000 0.000 common.py:1581()\n", + " 1 0.000 0.000 0.000 0.000 {pandas._libs.missing.is_float_nan}\n", + " 6 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(all)\n", + " 1 0.000 0.000 0.000 0.000 base.py:7092(_cmp_method)\n", + " 1 0.000 0.000 0.001 0.001 format.py:825(_truncate_vertically)\n", + " 18 0.000 0.000 0.000 0.000 indexing.py:966(_validate_key_length)\n", + " 9 0.000 0.000 0.000 0.000 :1207(_handle_fromlist)\n", + " 27 0.000 0.000 0.000 0.000 indexing.py:955()\n", + " 2 0.000 0.000 0.000 0.000 concat.py:52(concat_compat)\n", + " 27 0.000 0.000 0.000 0.000 indexing.py:2685()\n", + " 27 0.000 0.000 0.000 0.000 indexing.py:1143()\n", + " 5 0.000 0.000 0.000 0.000 :229(expanduser)\n", + " 6 0.000 0.000 0.000 0.000 generic.py:487(_validate_dtype)\n", + " 1 0.000 0.000 0.000 0.000 managers.py:1740()\n", + " 1 0.000 0.000 0.000 0.000 series.py:3159(_append)\n", + " 25 0.000 0.000 0.000 0.000 managers.py:185(blklocs)\n", + " 34 0.000 0.000 0.000 0.000 flags.py:57(allows_duplicate_labels)\n", + " 1 0.000 0.000 0.000 0.000 {method 'take' of 'numpy.ndarray' objects}\n", + " 14 0.000 0.000 0.000 0.000 managers.py:2124(_grouping_func)\n", + " 34 0.000 0.000 0.000 0.000 generic.py:358(attrs)\n", + " 14 0.000 0.000 0.000 0.000 series.py:626(dtype)\n", + " 6 0.000 0.000 0.000 0.000 {built-in method posix.getpid}\n", + " 1 0.000 0.000 0.000 0.000 format.py:487(get_dataframe_repr_params)\n", + " 42 0.000 0.000 0.000 0.000 {method 'lower' of 'str' objects}\n", + " 11 0.000 0.000 0.000 0.000 common.py:306(is_null_slice)\n", + " 1 0.000 0.000 0.000 0.000 console.py:9(get_console_size)\n", + " 1 0.000 0.000 0.000 0.000 {method 'argsort' of 'numpy.ndarray' objects}\n", + " 29 0.000 0.000 0.000 0.000 managers.py:1902(_block)\n", + " 13 0.000 0.000 0.000 0.000 frame.py:949(axes)\n", + " 1 0.000 0.000 0.016 0.016 construction.py:506(nested_data_to_arrays)\n", + " 11 0.000 0.000 0.000 0.000 indexing.py:150(iloc)\n", + " 10 0.000 0.000 0.000 0.000 format.py:425(__init__)\n", + " 15 0.000 0.000 0.000 0.000 base.py:831(_reset_identity)\n", + " 4 0.000 0.000 0.000 0.000 common.py:233(stringify_path)\n", + " 5 0.000 0.000 0.000 0.000 base.py:592(_dtype_to_subclass)\n", + " 27 0.000 0.000 0.000 0.000 indexing.py:2694()\n", + " 2 0.000 0.000 0.000 0.000 base.py:7592(trim_front)\n", + " 7 0.000 0.000 0.000 0.000 _dtype.py:330(_name_includes_bit_suffix)\n", + " 2 0.000 0.000 0.000 0.000 construction.py:765(_try_cast)\n", + " 1 0.000 0.000 0.000 0.000 format.py:1163(save_to_buffer)\n", + " 2 0.000 0.000 0.000 0.000 managers.py:1734(_consolidate_check)\n", + " 7 0.000 0.000 0.005 0.001 _json.py:1429()\n", + " 14 0.000 0.000 0.000 0.000 series.py:791(array)\n", + " 2 0.000 0.000 0.002 0.001 managers.py:1744(_consolidate_inplace)\n", + " 9 0.000 0.000 0.000 0.000 concat.py:587()\n", + " 2 0.000 0.000 0.000 0.000 frozen.py:73(__getitem__)\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:76(_f)\n", + " 1 0.000 0.000 0.000 0.000 array_ops.py:191(_na_arithmetic_op)\n", + " 35 0.000 0.000 0.000 0.000 multiarray.py:1079(copyto)\n", + " 2 0.000 0.000 0.000 0.000 concat.py:713(_get_concat_axis)\n", + " 9 0.000 0.000 0.000 0.000 common.py:556(require_length_match)\n", + " 16 0.000 0.000 0.000 0.000 common.py:123()\n", + " 1 0.000 0.000 0.000 0.000 string.py:189(_binify)\n", + " 1 0.000 0.000 0.000 0.000 format.py:683(_initialize_justify)\n", + " 21 0.000 0.000 0.000 0.000 common.py:1255(is_1d_only_ea_dtype)\n", + " 6 0.000 0.000 0.000 0.000 iostream.py:505(_is_master_process)\n", + " 2 0.000 0.000 0.000 0.000 indexing.py:1718(_get_slice_axis)\n", + " 1 0.000 0.000 0.001 0.001 format.py:789(truncate)\n", + " 1 0.000 0.000 0.005 0.005 string.py:40(_get_string_representation)\n", + " 24 0.000 0.000 0.000 0.000 blocks.py:583(dtype)\n", + " 18 0.000 0.000 0.000 0.000 indexing.py:1627()\n", + " 12 0.000 0.000 0.000 0.000 format.py:633(is_truncated_horizontally)\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:604(nansum)\n", + " 7 0.000 0.000 0.000 0.000 _json.py:1462()\n", + " 8 0.000 0.000 0.000 0.000 common.py:1366(_is_dtype)\n", + " 1 0.000 0.000 0.005 0.005 string.py:28(to_string)\n", + " 3 0.000 0.000 0.000 0.000 frame.py:641(_constructor_from_mgr)\n", + " 3 0.000 0.000 0.000 0.000 locale.py:396(normalize)\n", + " 14 0.000 0.000 0.000 0.000 {built-in method builtins.setattr}\n", + " 28 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_iterator}\n", + " 1 0.000 0.000 0.000 0.000 format.py:1023(__init__)\n", + " 1 0.000 0.000 0.000 0.000 generic.py:12031(_min_count_stat_function)\n", + " 27 0.000 0.000 0.000 0.000 indexing.py:915()\n", + " 14 0.000 0.000 0.000 0.000 construction.py:754(_maybe_repeat)\n", + " 2 0.000 0.000 0.000 0.000 {method 'all' of 'numpy.ndarray' objects}\n", + " 1 0.000 0.000 0.000 0.000 format.py:949()\n", + " 17 0.000 0.000 0.000 0.000 blocks.py:1003(shape)\n", + " 15 0.000 0.000 0.000 0.000 generic.py:659(ndim)\n", + " 16 0.000 0.000 0.000 0.000 common.py:121(classes)\n", + " 2 0.000 0.000 0.000 0.000 _methods.py:61(_all)\n", + " 14 0.000 0.000 0.000 0.000 utils.py:62(is_list_like_indexer)\n", + " 2 0.000 0.000 0.000 0.000 concat.py:695(new_axes)\n", + " 9 0.000 0.000 0.000 0.000 indexing.py:909(_expand_ellipsis)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:199(split)\n", + " 5 0.000 0.000 0.000 0.000 printing.py:48()\n", + " 6 0.000 0.000 0.000 0.000 fromnumeric.py:70()\n", + " 1 0.000 0.000 0.000 0.000 fromnumeric.py:51(_wrapfunc)\n", + " 1 0.000 0.000 0.000 0.000 range.py:341(nbytes)\n", + " 2 0.000 0.000 0.000 0.000 common.py:557(condition)\n", + " 1 0.000 0.000 0.000 0.000 managers.py:918(_verify_integrity)\n", + " 1 0.000 0.000 0.000 0.000 console.py:79(in_ipython_frontend)\n", + " 8 0.000 0.000 0.000 0.000 {method 'max' of 'numpy.ndarray' objects}\n", + " 2 0.000 0.000 0.000 0.000 missing.py:466(array_equivalent)\n", + " 1 0.000 0.000 0.000 0.000 generic.py:6337(dtypes)\n", + " 2 0.000 0.000 0.000 0.000 concat.py:698()\n", + " 6 0.000 0.000 0.000 0.000 enum.py:193(__get__)\n", + " 1 0.000 0.000 0.000 0.000 string.py:67(_insert_dot_separators)\n", + " 1 0.000 0.000 0.000 0.000 base.py:674(_with_infer)\n", + " 15 0.000 0.000 0.000 0.000 base.py:71()\n", + " 2 0.000 0.000 0.000 0.000 common.py:277(is_fsspec_url)\n", + " 3 0.000 0.000 0.000 0.000 format.py:1613(_format_strings)\n", + " 3 0.000 0.000 0.000 0.000 base.py:1396(format)\n", + " 2 0.000 0.000 0.000 0.000 common.py:145(is_url)\n", + " 2 0.000 0.000 0.000 0.000 missing.py:564(_array_equivalent_object)\n", + " 2 0.000 0.000 0.000 0.000 concat.py:565()\n", + " 2 0.000 0.000 0.000 0.000 :1()\n", + " 3 0.000 0.000 0.000 0.000 frame.py:4399(_clear_item_cache)\n", + " 1 0.000 0.000 0.001 0.001 _json.py:1432(_try_convert_dates)\n", + " 1 0.000 0.000 0.000 0.000 construction.py:1068()\n", + " 18 0.000 0.000 0.000 0.000 {method 'search' of 're.Pattern' objects}\n", + " 14 0.000 0.000 0.000 0.000 format.py:877()\n", + " 4 0.000 0.000 0.000 0.000 missing.py:642(na_value_for_dtype)\n", + " 1 0.000 0.000 0.000 0.000 managers.py:278(get_dtypes)\n", + " 2 0.000 0.000 0.000 0.000 range.py:996(_getitem_slice)\n", + " 5 0.000 0.000 0.000 0.000 format.py:2024(_has_names)\n", + " 4 0.000 0.000 0.000 0.000 base.py:1751(_get_names)\n", + " 1 0.000 0.000 0.000 0.000 common.py:62(new_method)\n", + " 26 0.000 0.000 0.000 0.000 base.py:7606()\n", + " 1 0.000 0.000 0.005 0.005 string.py:34(_get_strcols)\n", + " 1 0.000 0.000 0.000 0.000 series.py:6094(_reduce)\n", + " 44 0.000 0.000 0.000 0.000 typing.py:2256(cast)\n", + " 3 0.000 0.000 0.000 0.000 blocks.py:265(make_block_same_class)\n", + " 3 0.000 0.000 0.000 0.000 locale.py:593(getlocale)\n", + " 2 0.000 0.000 0.000 0.000 parse.py:119(_coerce_args)\n", + " 6 0.000 0.000 0.000 0.000 {built-in method builtins.sum}\n", + " 25 0.000 0.000 0.000 0.000 {built-in method builtins.callable}\n", + " 1 0.000 0.000 0.005 0.005 format.py:611(get_strcols)\n", + " 2 0.000 0.000 0.000 0.000 format.py:974()\n", + " 2 0.000 0.000 0.000 0.000 concat.py:543(_get_sample_object)\n", + " 7 0.000 0.000 0.000 0.000 series.py:784(_references)\n", + " 19 0.000 0.000 0.000 0.000 base.py:1657(name)\n", + " 6 0.000 0.000 0.000 0.000 blocks.py:203(_can_hold_na)\n", + " 6 0.000 0.000 0.000 0.000 __init__.py:225(compile)\n", + " 4 0.000 0.000 0.000 0.000 base.py:448(size)\n", + " 1 0.000 0.000 0.000 0.000 construction.py:1006(convert_object_array)\n", + " 1 0.000 0.000 0.000 0.000 api.py:106(_get_distinct_objs)\n", + " 1 0.000 0.000 0.000 0.000 inference.py:404(is_dataclass)\n", + " 10 0.000 0.000 0.000 0.000 _parser.py:203(isword)\n", + " 7 0.000 0.000 0.000 0.000 inspect.py:292(isclass)\n", + " 1 0.000 0.000 0.000 0.000 base.py:1795(set_names)\n", + " 1 0.000 0.000 0.000 0.000 managers.py:279()\n", + " 1 0.000 0.000 0.000 0.000 threading.py:1185(is_alive)\n", + " 3 0.000 0.000 0.000 0.000 {built-in method _locale.setlocale}\n", + " 6 0.000 0.000 0.000 0.000 generic.py:6182()\n", + " 3 0.000 0.000 0.000 0.000 format.py:645(has_index_names)\n", + " 1 0.000 0.000 0.001 0.001 shape_base.py:223(vstack)\n", + " 1 0.000 0.000 0.000 0.000 construction.py:481()\n", + " 3 0.000 0.000 0.000 0.000 inference.py:105(is_file_like)\n", + " 2 0.000 0.000 0.000 0.000 concat.py:773(_concat_indexes)\n", + " 6 0.000 0.000 0.000 0.000 base.py:6625(_validate_indexer)\n", + " 3 0.000 0.000 0.000 0.000 frame.py:966(shape)\n", + " 3 0.000 0.000 0.000 0.000 format.py:653(show_row_idx_names)\n", + " 2 0.000 0.000 0.000 0.000 managers.py:1726(is_consolidated)\n", + " 1 0.000 0.000 0.000 0.000 arraylike.py:54(__gt__)\n", + " 1 0.000 0.000 0.000 0.000 _json.py:1126(__init__)\n", + " 1 0.000 0.000 0.000 0.000 config.py:469(__init__)\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:389(new_func)\n", + " 1 0.000 0.000 0.000 0.000 contextlib.py:104(__init__)\n", + " 7 0.000 0.000 0.000 0.000 _dtype.py:24(_kind_name)\n", + " 2 0.000 0.000 0.000 0.000 base.py:773(_view)\n", + " 6 0.000 0.000 0.000 0.000 iostream.py:532(_schedule_flush)\n", + " 3 0.000 0.000 0.000 0.000 _strptime.py:26(_getlang)\n", + " 1 0.000 0.000 0.000 0.000 construction.py:950(_validate_or_indexify_columns)\n", + " 1 0.000 0.000 0.000 0.000 base.py:1243(copy)\n", + " 1 0.000 0.000 0.000 0.000 base.py:5458(_concat)\n", + " 9 0.000 0.000 0.000 0.000 common.py:126(_classes_and_not_datetimelike)\n", + " 4 0.000 0.000 0.000 0.000 nanops.py:79()\n", + " 1 0.000 0.000 0.000 0.000 shape_base.py:81(atleast_2d)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:221(__init__)\n", + " 1 0.000 0.000 0.000 0.000 {built-in method builtins.sorted}\n", + " 4 0.000 0.000 0.000 0.000 {pandas._libs.lib.maybe_indices_to_slice}\n", + " 1 0.000 0.000 0.001 0.001 :16(exists)\n", + " 1 0.000 0.000 0.000 0.000 api.py:120(_get_combined_index)\n", + " 4 0.000 0.000 0.000 0.000 base.py:675(empty)\n", + " 1 0.000 0.000 0.000 0.000 config.py:477(__enter__)\n", + " 2 0.000 0.000 0.000 0.000 _json.py:1105(__exit__)\n", + " 4 0.000 0.000 0.000 0.000 generic.py:568(_get_block_manager_axis)\n", + " 2 0.000 0.000 0.000 0.000 base.py:4190(_validate_positional_slice)\n", + " 1 0.000 0.000 0.000 0.000 range.py:352(memory_usage)\n", + " 1 0.000 0.000 0.000 0.000 function.py:411(validate_func)\n", + " 8 0.000 0.000 0.000 0.000 common.py:1390(_get_dtype)\n", + " 15 0.000 0.000 0.000 0.000 blocks.py:239(mgr_locs)\n", + " 9 0.000 0.000 0.000 0.000 contextlib.py:428(__init__)\n", + " 1 0.000 0.000 0.000 0.000 string.py:126()\n", + " 8 0.000 0.000 0.000 0.000 _methods.py:39(_amax)\n", + " 1 0.000 0.000 0.000 0.000 range.py:1030(_cmp_method)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:395(__init__)\n", + " 9 0.000 0.000 0.000 0.000 series.py:577(_constructor)\n", + " 2 0.000 0.000 0.000 0.000 config.py:215(get_default_val)\n", + " 1 0.000 0.000 0.000 0.000 api.py:72(get_objs_combined_axis)\n", + " 13 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}\n", + " 1 0.000 0.000 0.000 0.000 concat.py:202(_maybe_reindex_columns_na_proxy)\n", + " 3 0.000 0.000 0.000 0.000 locale.py:479(_parse_localename)\n", + " 2 0.000 0.000 0.000 0.000 base.py:5453()\n", + " 2 0.000 0.000 0.000 0.000 construction.py:196(mgr_to_mgr)\n", + " 9 0.000 0.000 0.000 0.000 contextlib.py:434(__exit__)\n", + " 4 0.000 0.000 0.000 0.000 construction.py:687(_sanitize_non_ordered)\n", + " 3 0.000 0.000 0.000 0.000 _parser.py:189(__next__)\n", + " 9 0.000 0.000 0.000 0.000 concat.py:584()\n", + " 1 0.000 0.000 0.000 0.000 construction.py:532(treat_as_nested)\n", + " 2 0.000 0.000 0.000 0.000 format.py:657(show_col_idx_names)\n", + " 3 0.000 0.000 0.000 0.000 base.py:791(is_)\n", + " 1 0.000 0.000 0.000 0.000 format.py:751(_adjust_max_rows)\n", + " 1 0.000 0.000 0.000 0.000 generic.py:12070(sum)\n", + " 3 0.000 0.000 0.000 0.000 _strptime.py:565(_strptime_datetime)\n", + " 4 0.000 0.000 0.000 0.000 {built-in method sys.getsizeof}\n", + " 2 0.000 0.000 0.000 0.000 format.py:629(is_truncated)\n", + " 1 0.000 0.000 0.000 0.000 base.py:1754(_set_names)\n", + " 1 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(array_equal)\n", + " 4 0.000 0.000 0.000 0.000 format.py:637(is_truncated_vertically)\n", + " 4 0.000 0.000 0.000 0.000 range.py:347()\n", + " 4 0.000 0.000 0.000 0.000 managers.py:920()\n", + " 2 0.000 0.000 0.000 0.000 common.py:521(is_string_dtype)\n", + " 1 0.000 0.000 0.001 0.001 common.py:1141(file_exists)\n", + " 2 0.000 0.000 0.000 0.000 base.py:7607()\n", + " 2 0.000 0.000 0.000 0.000 generic.py:4314(_set_is_copy)\n", + " 1 0.000 0.000 0.000 0.000 base.py:5153(_get_engine_target)\n", + " 5 0.000 0.000 0.000 0.000 managers.py:896(__init__)\n", + " 1 0.000 0.000 0.000 0.000 concat.py:703(_get_comb_axis)\n", + " 11 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}\n", + " 1 0.000 0.000 0.000 0.000 fromnumeric.py:1038(argsort)\n", + " 4 0.000 0.000 0.000 0.000 blocks.py:1016(_slice)\n", + " 1 0.000 0.000 0.000 0.000 contextlib.py:132(__enter__)\n", + " 2 0.000 0.000 0.000 0.000 format.py:649(has_column_names)\n", + " 1 0.000 0.000 0.000 0.000 expressions.py:226(evaluate)\n", + " 1 0.000 0.000 0.000 0.000 common.py:977(is_numeric_v_string_like)\n", + " 1 0.000 0.000 0.000 0.000 config.py:483(__exit__)\n", + " 3 0.000 0.000 0.000 0.000 generic.py:2073()\n", + " 2 0.000 0.000 0.000 0.000 base.py:782(_rename)\n", + " 1 0.000 0.000 0.000 0.000 threading.py:1118(_wait_for_tstate_lock)\n", + " 3 0.000 0.000 0.000 0.000 nanops.py:72(check)\n", + " 1 0.000 0.000 0.000 0.000 missing.py:131(dispatch_fill_zeros)\n", + " 6 0.000 0.000 0.000 0.000 enum.py:1249(value)\n", + " 4 0.000 0.000 0.000 0.000 datetimes.py:156(should_cache)\n", + " 1 0.000 0.000 0.000 0.000 expressions.py:67(_evaluate_standard)\n", + " 2 0.000 0.000 0.000 0.000 format.py:1179(get_buffer)\n", + " 3 0.000 0.000 0.000 0.000 blocks.py:192(_can_consolidate)\n", + " 1 0.000 0.000 0.000 0.000 iostream.py:127(_event_pipe)\n", + " 1 0.000 0.000 0.000 0.000 range.py:484(_view)\n", + " 4 0.000 0.000 0.000 0.000 generic.py:6177()\n", + " 1 0.000 0.000 0.000 0.000 :309(__init__)\n", + " 1 0.000 0.000 0.000 0.000 {method 'sum' of 'numpy.ndarray' objects}\n", + " 1 0.000 0.000 0.000 0.000 managers.py:2233()\n", + " 14 0.000 0.000 0.000 0.000 base.py:6612(_maybe_cast_indexer)\n", + " 9 0.000 0.000 0.000 0.000 range.py:377(dtype)\n", + " 1 0.000 0.000 0.000 0.000 common.py:85(consensus_name_attr)\n", + " 1 0.000 0.000 0.000 0.000 _validators.py:450(check_dtype_backend)\n", + " 1 0.000 0.000 0.000 0.000 base.py:459(_engine_type)\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:455(newfunc)\n", + " 1 0.000 0.000 0.001 0.001 <__array_function__ internals>:177(vstack)\n", + " 6 0.000 0.000 0.000 0.000 common.py:1107()\n", + " 1 0.000 0.000 0.000 0.000 {built-in method _codecs.lookup}\n", + " 1 0.000 0.000 0.000 0.000 console.py:54(in_interactive_session)\n", + " 1 0.000 0.000 0.000 0.000 contextlib.py:287(helper)\n", + " 1 0.000 0.000 0.000 0.000 common.py:80(ensure_str)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:322(weekday)\n", + " 1 0.000 0.000 0.000 0.000 {method 'any' of 'numpy.ndarray' objects}\n", + " 2 0.000 0.000 0.000 0.000 concat.py:689(_get_result_dim)\n", + " 9 0.000 0.000 0.000 0.000 {pandas._libs.lib.item_from_zerodim}\n", + " 7 0.000 0.000 0.000 0.000 managers.py:335()\n", + " 4 0.000 0.000 0.000 0.000 config.py:663(_get_registered_option)\n", + " 6 0.000 0.000 0.000 0.000 concat.py:351(__init__)\n", + " 4 0.000 0.000 0.000 0.000 {method 'upper' of 'str' objects}\n", + " 12 0.000 0.000 0.000 0.000 base.py:363(ndim)\n", + " 1 0.000 0.000 0.000 0.000 string.py:22(__init__)\n", + " 11 0.000 0.000 0.000 0.000 {method 'read' of '_io.StringIO' objects}\n", + " 7 0.000 0.000 0.000 0.000 {method 'write' of '_io.StringIO' objects}\n", + " 3 0.000 0.000 0.000 0.000 concat.py:167()\n", + " 1 0.000 0.000 0.000 0.000 :2(__init__)\n", + " 1 0.000 0.000 0.000 0.000 series.py:6195(sum)\n", + " 1 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(argsort)\n", + " 2 0.000 0.000 0.000 0.000 {built-in method builtins.next}\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:253(_get_values)\n", + " 1 0.000 0.000 0.000 0.000 base.py:1900(rename)\n", + " 3 0.000 0.000 0.000 0.000 range.py:281(start)\n", + " 2 0.000 0.000 0.000 0.000 base.py:346(shape)\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:324(_get_dtype_max)\n", + " 1 0.000 0.000 0.000 0.000 {method 'fill' of 'numpy.ndarray' objects}\n", + " 1 0.000 0.000 0.000 0.000 concat.py:303()\n", + " 2 0.000 0.000 0.000 0.000 generic.py:638(_info_axis)\n", + " 1 0.000 0.000 0.000 0.000 contextlib.py:141(__exit__)\n", + " 1 0.000 0.000 0.000 0.000 format.py:641(dimensions_info)\n", + " 1 0.000 0.000 0.000 0.000 generic.py:2015(empty)\n", + " 1 0.000 0.000 0.000 0.000 base.py:5462()\n", + " 1 0.000 0.000 0.000 0.000 concat.py:747()\n", + " 1 0.000 0.000 0.000 0.000 dataclasses.py:1256(is_dataclass)\n", + " 4 0.000 0.000 0.000 0.000 {pandas._libs.algos.ensure_object}\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:208(isnum)\n", + " 1 0.000 0.000 0.000 0.000 common.py:1107(_maybe_memory_map)\n", + " 1 0.000 0.000 0.000 0.000 api.py:102()\n", + " 6 0.000 0.000 0.000 0.000 fromnumeric.py:2427(_all_dispatcher)\n", + " 8 0.000 0.000 0.000 0.000 multiarray.py:152(concatenate)\n", + " 10 0.000 0.000 0.000 0.000 {method 'isalpha' of 'str' objects}\n", + " 2 0.000 0.000 0.000 0.000 construction.py:916()\n", + " 1 0.000 0.000 0.000 0.000 frame.py:1114(_info_repr)\n", + " 1 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n", + " 2 0.000 0.000 0.000 0.000 config.py:897(is_nonnegative_int)\n", + " 1 0.000 0.000 0.000 0.000 format.py:732(_calc_max_rows_fitted)\n", + " 6 0.000 0.000 0.000 0.000 common.py:175()\n", + " 2 0.000 0.000 0.000 0.000 common.py:171(not_none)\n", + " 1 0.000 0.000 0.000 0.000 shape_base.py:218(_vhstack_dispatcher)\n", + " 1 0.000 0.000 0.000 0.000 generic.py:1948(__iter__)\n", + " 1 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(atleast_2d)\n", + " 1 0.000 0.000 0.000 0.000 range.py:946()\n", + " 6 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.RLock' objects}\n", + " 3 0.000 0.000 0.000 0.000 {method 'clear' of 'dict' objects}\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:342(ampm)\n", + " 2 0.000 0.000 0.000 0.000 series.py:3169()\n", + " 1 0.000 0.000 0.000 0.000 expressions.py:76(_can_use_numexpr)\n", + " 1 0.000 0.000 0.000 0.000 common.py:1025(needs_i8_conversion)\n", + " 1 0.000 0.000 0.000 0.000 format.py:721(_calc_max_cols_fitted)\n", + " 9 0.000 0.000 0.000 0.000 contextlib.py:431(__enter__)\n", + " 3 0.000 0.000 0.000 0.000 range.py:316(step)\n", + " 1 0.000 0.000 0.000 0.000 _methods.py:55(_any)\n", + " 6 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_int_or_none}\n", + " 7 0.000 0.000 0.000 0.000 _json.py:1401()\n", + " 1 0.000 0.000 0.000 0.000 config.py:478()\n", + " 1 0.000 0.000 0.000 0.000 concat.py:631()\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:329(month)\n", + " 3 0.000 0.000 0.000 0.000 range.py:911()\n", + " 1 0.000 0.000 0.000 0.000 format.py:623(should_show_dimensions)\n", + " 2 0.000 0.000 0.000 0.000 base.py:539()\n", + " 7 0.000 0.000 0.000 0.000 series.py:1381(_clear_item_cache)\n", + " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", + " 2 0.000 0.000 0.000 0.000 format.py:765(_is_in_terminal)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:319(jump)\n", + " 5 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects}\n", + " 3 0.000 0.000 0.000 0.000 frame.py:637(_constructor)\n", + " 2 0.000 0.000 0.000 0.000 concat.py:73()\n", + " 3 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.lock' objects}\n", + " 1 0.000 0.000 0.000 0.000 concat.py:720()\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:213(isspace)\n", + " 4 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_bool}\n", + " 1 0.000 0.000 0.000 0.000 format.py:689(_initialize_columns)\n", + " 1 0.000 0.000 0.000 0.000 {built-in method pandas._libs.lib.is_interval}\n", + " 5 0.000 0.000 0.000 0.000 {built-in method posix.fspath}\n", + " 2 0.000 0.000 0.000 0.000 {pandas._libs.lib.dtypes_all_equal}\n", + " 1 0.000 0.000 0.000 0.000 common.py:503(get_compression_method)\n", + " 1 0.000 0.000 0.000 0.000 {built-in method builtins.min}\n", + " 1 0.000 0.000 0.000 0.000 _methods.py:47(_sum)\n", + " 1 0.000 0.000 0.000 0.000 dispatch.py:17(should_extension_dispatch)\n", + " 1 0.000 0.000 0.000 0.000 {method 'getvalue' of '_io.StringIO' objects}\n", + " 1 0.000 0.000 0.000 0.000 format.py:665(_initialize_sparsify)\n", + " 1 0.000 0.000 0.000 0.000 range.py:922()\n", + " 1 0.000 0.000 0.000 0.000 threading.py:568(is_set)\n", + " 1 0.000 0.000 0.000 0.000 managers.py:536(nblocks)\n", + " 1 0.000 0.000 0.000 0.000 shape_base.py:207(_arrays_for_stack_dispatcher)\n", + " 1 0.000 0.000 0.000 0.000 inference.py:306(is_named_tuple)\n", + " 1 0.000 0.000 0.000 0.000 string.py:63(_need_to_wrap_around)\n", + " 1 0.000 0.000 0.000 0.000 _validators.py:226(validate_bool_kwarg)\n", + " 2 0.000 0.000 0.000 0.000 base.py:974(dtype)\n", + " 3 0.000 0.000 0.000 0.000 range.py:299(stop)\n", + " 2 0.000 0.000 0.000 0.000 managers.py:235(items)\n", + " 1 0.000 0.000 0.000 0.000 {built-in method _codecs.lookup_error}\n", + " 2 0.000 0.000 0.000 0.000 indexing.py:2665(need_slice)\n", + " 3 0.000 0.000 0.000 0.000 {built-in method builtins.id}\n", + " 2 0.000 0.000 0.000 0.000 format.py:959()\n", + " 1 0.000 0.000 0.000 0.000 managers.py:2242()\n", + " 2 0.000 0.000 0.000 0.000 concat.py:766(_maybe_check_integrity)\n", + " 1 0.000 0.000 0.000 0.000 :260(__init__)\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:209(_maybe_get_mask)\n", + " 1 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}\n", + " 1 0.000 0.000 0.000 0.000 base.py:2756(_is_multi)\n", + " 1 0.000 0.000 0.000 0.000 function.py:64(__call__)\n", + " 1 0.000 0.000 0.000 0.000 format.py:697(_initialize_colspace)\n", + " 1 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n", + " 1 0.000 0.000 0.000 0.000 {method 'isdigit' of 'str' objects}\n", + " 2 0.000 0.000 0.000 0.000 {built-in method numpy.asanyarray}\n", + " 1 0.000 0.000 0.000 0.000 {method 'isspace' of 'str' objects}\n", + " 1 0.000 0.000 0.000 0.000 nanops.py:1491(_maybe_null_out)\n", + " 1 0.000 0.000 0.000 0.000 fromnumeric.py:1034(_argsort_dispatcher)\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:1056(_could_be_tzname)\n", + " 1 0.000 0.000 0.000 0.000 numeric.py:2403(_array_equal_dispatcher)\n", + " 1 0.000 0.000 0.000 0.000 {method 'values' of 'dict' objects}\n", + " 2 0.000 0.000 0.000 0.000 {function FrozenList.__getitem__ at 0x7f0665c34860}\n", + " 1 0.000 0.000 0.000 0.000 _parser.py:186(__iter__)\n", + " 2 0.000 0.000 0.000 0.000 parse.py:108(_noop)\n", + " 1 0.000 0.000 0.000 0.000 {method 'reverse' of 'list' objects}\n", + " 2 0.000 0.000 0.000 0.000 _json.py:1102(__enter__)\n", + " 2 0.000 0.000 0.000 0.000 base.py:1954(nlevels)\n", + " 1 0.000 0.000 0.000 0.000 interactiveshell.py:637(get_ipython)\n", + " 1 0.000 0.000 0.000 0.000 range.py:228(_constructor)\n", + " 1 0.000 0.000 0.000 0.000 format.py:670(_initialize_formatters)\n", + " 1 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_period}\n", + " 1 0.000 0.000 0.000 0.000 shape_base.py:77(_atleast_2d_dispatcher)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%prun\n", + "test_pandas()" ] }, { "cell_type": "markdown", - "id": "04937c37-16b4-4e03-9cf2-ac704e48f60e", + "id": "18519e3f-6d5e-477a-a3e5-1fd0e0b30fcc", "metadata": {}, "source": [ - "# Result\n", + "# Results\n", "\n", - "Polars and Pandas borth processed the same data (8000 rows, categorical data represented as strings).\n", - "\n" + "Polars and Pandas both processed the same data (8000 rows, categorical data represented as strings).\n", + "\n", + "\n", + "## Versions\n", + "\n", + "\n", + "* Pandas: 2.1.4\n", + "* Polars: 0.20.26\n", + "\n", + "## Memory usage comparison\n", + "\n", + "File on disk: 6,0 MB (du -sh), 8000 rows, 7 columns. \n", + "\n", + "* Polars: 4,76 MB\n", + "* Pandas: 7,56 MB\n", + "\n", + "-> Polars was more memory efficient: ~ 1,6 times less memory\n", + "\n", + "\n", + "## Profile comparison\n", + "\n", + "* Polars: 256 function calls (253 primitive calls) in 0.020 seconds\n", + "* Pandas: 46681 function calls (46472 primitive calls) in 0.113 seconds\n", + "\n", + "-> Polars was ~ 5,6 times faster and needed ~ 180x less function and primitive calls. \n", + "\n", + "\n", + "## Conclusion\n", + "\n", + "Polars should be used whenever possible." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdbf106d-4117-491b-9773-85dcd9d5914c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {