diff --git a/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb b/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb
index 1b5de12..4d24e53 100644
--- a/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb
+++ b/Elasticsearch-Pandas-vs-Polars-May-15-2024.ipynb
@@ -450,12 +450,81 @@
"tags": []
},
"source": [
- "## Memory footprint comparison"
+ "## Memory footprint and profile comparison"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e433322-5120-4451-9aa4-cfd5795aaa24",
+ "metadata": {},
+ "source": [
+ "A JSON schema is provided in both cases to improve the comparison."
]
},
{
"cell_type": "code",
- "execution_count": 74,
+ "execution_count": 27,
+ "id": "93a2116d-1fdd-432b-a48c-8be77c67e0e7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Once deleted, variables cannot be recovered. Proceed (y/[n])? y\n"
+ ]
+ }
+ ],
+ "source": [
+ "%reset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bdf4020a-3b23-47e0-b7c4-3335bf3d5d8c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install git+https://github.com/H4dr1en/jupyterflame.git"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "427b969f-0e68-44da-b74d-5cada875f74f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# directly on the shell within the conda env: conda install -y perl"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "08db61e8-d70f-4434-bfcc-6225405b81f2",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The jupyterflame extension is already loaded. To reload it, use:\n",
+ " %reload_ext jupyterflame\n"
+ ]
+ }
+ ],
+ "source": [
+ "%load_ext jupyterflame"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
"id": "eefffe2a-f61c-47c8-90e3-d0de0ab932d6",
"metadata": {
"tags": []
@@ -488,7 +557,7 @@
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": 46,
"id": "0b2be27e-a56c-411b-bbff-dc42e533ca80",
"metadata": {
"tags": []
@@ -498,7 +567,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "{'@timestamp': String, 'host.hostname': String, 'host.ip': String, 'log.level': String, 'winlog.event_id': Int64, 'winlog.task': String, 'message': String}\n"
+ "Polars Schema: {'@timestamp': String, 'host.hostname': String, 'host.ip': String, 'log.level': String, 'winlog.event_id': Int64, 'winlog.task': String, 'message': String}\n",
+ "Pandas Schema: {'@timestamp': 'str', 'host.hostname': 'str', 'host.ip': 'str', 'log.level': 'str', 'winlog.event_id': 'int64', 'winlog.task': 'str', 'message': 'str'}\n"
]
}
],
@@ -513,18 +583,57 @@
" # Add more mappings if needed\n",
"}\n",
"\n",
+ "pandas_dtype_mapping = {\n",
+ " \"object\": \"str\",\n",
+ " \"int64\": \"int64\",\n",
+ " \"float64\": \"float64\",\n",
+ " # Add more mappings if needed\n",
+ "}\n",
+ "\n",
+ "\n",
"# Generate the schema for Polars from Pandas dtype\n",
- "schema = {col: dtype_mapping[str(dtype)] for col, dtype in pd_df.dtypes.items()}\n",
- "print(schema)\n"
+ "polars_schema = {col: dtype_mapping[str(dtype)] for col, dtype in pd_df.dtypes.items()}\n",
+ "print(\"Polars Schema:\", polars_schema)\n",
+ "\n",
+ "pandas_schema = {col: pandas_dtype_mapping[str(dtype)] for col, dtype in pd_df.dtypes.items()}\n",
+ "print(\"Pandas Schema:\", pandas_schema)"
]
},
{
"cell_type": "code",
- "execution_count": 78,
+ "execution_count": 53,
"id": "5ccc9d58-8e27-43d0-bf69-7f2ff44c9874",
"metadata": {
"tags": []
},
+ "outputs": [],
+ "source": [
+ "def test_polars():\n",
+ " # Read the JSON file using the defined schema\n",
+ " lazy_df = pl.scan_ndjson(file_path)\n",
+ "\n",
+ " # Collect the LazyFrame to a DataFrame\n",
+ " pl_df = lazy_df.collect()\n",
+ "\n",
+ " # Convert columns to the correct data types according to the schema\n",
+ " pl_df = pl_df.with_columns([pl.col(col).cast(dtype) for col, dtype in polars_schema.items()])\n",
+ "\n",
+ " # Print the DataFrame and its memory usage\n",
+ " print(pl_df)\n",
+ "\n",
+ " num_rows_polars = pl_df.shape[0]\n",
+ "\n",
+ " print(f\"Polars DataFarme number of rows: {num_rows_polars}\")\n",
+ " print(f\"Polars DataFrame memory usage: {pl_df.estimated_size() / (1024 ** 2):.2f} MB\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "6e1ca70b-9aae-43af-b1c0-cc8d6f19a7ce",
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"name": "stdout",
@@ -575,36 +684,704 @@
"│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n",
"└──────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┘\n",
"Pandas DataFarme number of rows: 8000\n",
- "Polars DataFrame memory usage: 4.76 MB\n"
+ "Polars DataFrame memory usage: 4.76 MB\n",
+ " "
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "# Read the JSON file using the defined schema\n",
- "lazy_df = pl.scan_ndjson(file_path)\n",
- "\n",
- "# Collect the LazyFrame to a DataFrame\n",
- "pl_df = lazy_df.collect()\n",
- "\n",
- "# Convert columns to the correct data types according to the schema\n",
- "pl_df = pl_df.with_columns([pl.col(col).cast(dtype) for col, dtype in schema.items()])\n",
- "\n",
- "# Print the DataFrame and its memory usage\n",
- "print(pl_df)\n",
- "\n",
- "num_rows_polars = pl_df.shape[0]\n",
- "\n",
- "print(f\"Pandas DataFarme number of rows: {num_rows_polars}\")\n",
- "print(f\"Polars DataFrame memory usage: {pl_df.estimated_size() / (1024 ** 2):.2f} MB\")"
+ "%%flame -q --inverted\n",
+ "test_polars()"
]
},
{
"cell_type": "code",
- "execution_count": 79,
+ "execution_count": 49,
+ "id": "87f043b9-6cfa-4c3b-b550-25818e29bd45",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "shape: (8_000, 7)\n",
+ "┌──────────────┬─────────────┬─────────────┬─────────────┬─────────────┬─────────────┬─────────────┐\n",
+ "│ @timestamp ┆ host.hostna ┆ host.ip ┆ log.level ┆ winlog.even ┆ winlog.task ┆ message │\n",
+ "│ --- ┆ me ┆ --- ┆ --- ┆ t_id ┆ --- ┆ --- │\n",
+ "│ str ┆ --- ┆ str ┆ str ┆ --- ┆ str ┆ str │\n",
+ "│ ┆ str ┆ ┆ ┆ i64 ┆ ┆ │\n",
+ "╞══════════════╪═════════════╪═════════════╪═════════════╪═════════════╪═════════════╪═════════════╡\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n",
+ "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n",
+ "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n",
+ "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n",
+ "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 13 ┆ Registry ┆ Registry │\n",
+ "│ 5:57:18.471Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ value set ┆ value set: │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ (rule: ┆ RuleName: … │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ Regi… ┆ │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n",
+ "│ 6:10:07.128Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n",
+ "│ 6:10:07.136Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n",
+ "│ 6:10:07.136Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n",
+ "│ 6:10:07.149Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n",
+ "│ 2024-05-15T1 ┆ win10 ┆ fe80::24b4: ┆ information ┆ 4663 ┆ Removable ┆ An attempt │\n",
+ "│ 6:10:07.149Z ┆ ┆ 3691:44a6:3 ┆ ┆ ┆ Storage ┆ was made to │\n",
+ "│ ┆ ┆ 8a1 ┆ ┆ ┆ ┆ access … │\n",
+ "└──────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┘\n",
+ "Pandas DataFarme number of rows: 8000\n",
+ "Polars DataFrame memory usage: 4.76 MB\n",
+ " "
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ " 256 function calls (253 primitive calls) in 0.020 seconds\n",
+ "\n",
+ " Ordered by: internal time\n",
+ "\n",
+ " ncalls tottime percall cumtime percall filename:lineno(function)\n",
+ " 2 0.014 0.007 0.014 0.007 {method 'collect' of 'builtins.PyLazyFrame' objects}\n",
+ " 1 0.003 0.003 0.003 0.003 {built-in method new_from_ndjson}\n",
+ " 2 0.001 0.001 0.001 0.001 {built-in method posix.stat}\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'as_str' of 'builtins.PyDataFrame' objects}\n",
+ " 1 0.000 0.000 0.020 0.020 :1()\n",
+ " 1 0.000 0.000 0.020 0.020 2832609216.py:1(test_polars)\n",
+ " 1 0.000 0.000 0.000 0.000 socket.py:543(send)\n",
+ " 2 0.000 0.000 0.000 0.000 wrap.py:12(wrap_df)\n",
+ " 6 0.000 0.000 0.000 0.000 iostream.py:610(write)\n",
+ " 1 0.000 0.000 0.020 0.020 {built-in method builtins.exec}\n",
+ " 2 0.000 0.000 0.014 0.007 frame.py:1683(collect)\n",
+ " 1 0.000 0.000 0.004 0.004 ndjson.py:86(scan_ndjson)\n",
+ " 1 0.000 0.000 0.000 0.000 2832609216.py:9()\n",
+ " 2/1 0.000 0.000 0.004 0.004 deprecation.py:130(wrapper)\n",
+ " 2 0.000 0.000 0.000 0.000 wrap.py:16(wrap_ldf)\n",
+ " 7 0.000 0.000 0.000 0.000 expr.py:1917(cast)\n",
+ " 1 0.000 0.000 0.001 0.001 various.py:182(normalize_filepath)\n",
+ " 41/39 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n",
+ " 7 0.000 0.000 0.000 0.000 col.py:20(_create_col)\n",
+ " 3 0.000 0.000 0.001 0.000 {built-in method builtins.print}\n",
+ " 1 0.000 0.000 0.000 0.000 frame.py:4006(with_columns)\n",
+ " 2 0.000 0.000 0.000 0.000 {method 'optimization_toggle' of 'builtins.PyLazyFrame' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 frame.py:7998(lazy)\n",
+ " 3 0.000 0.000 0.000 0.000 frame.py:316(_from_pyldf)\n",
+ " 1 0.000 0.000 0.001 0.001 frame.py:8164(with_columns)\n",
+ " 7 0.000 0.000 0.000 0.000 {method 'cast' of 'builtins.PyExpr' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 iostream.py:243(schedule)\n",
+ " 7 0.000 0.000 0.000 0.000 convert.py:388(py_type_to_dtype)\n",
+ " 19 0.000 0.000 0.000 0.000 {built-in method __new__ of type object at 0x860f60}\n",
+ " 6 0.000 0.000 0.000 0.000 iostream.py:505(_is_master_process)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'with_columns' of 'builtins.PyLazyFrame' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 :674(__getitem__)\n",
+ " 7 0.000 0.000 0.000 0.000 {col}\n",
+ " 7 0.000 0.000 0.000 0.000 col.py:145(__new__)\n",
+ " 1 0.000 0.000 0.000 0.000 :39(isdir)\n",
+ " 7 0.000 0.000 0.000 0.000 wrap.py:24(wrap_expr)\n",
+ " 1 0.000 0.000 0.000 0.000 :229(expanduser)\n",
+ " 6 0.000 0.000 0.000 0.000 {built-in method posix.getpid}\n",
+ " 14 0.000 0.000 0.000 0.000 expr.py:131(_from_pyexpr)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'lazy' of 'builtins.PyDataFrame' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 typing.py:1579(__subclasscheck__)\n",
+ " 2 0.000 0.000 0.000 0.000 frame.py:439(_from_pydf)\n",
+ " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:56()\n",
+ " 7 0.000 0.000 0.000 0.000 convert.py:146(is_polars_dtype)\n",
+ " 1 0.000 0.000 0.000 0.000 threading.py:1185(is_alive)\n",
+ " 1 0.000 0.000 0.001 0.001 :16(exists)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'estimated_size' of 'builtins.PyDataFrame' objects}\n",
+ " 6 0.000 0.000 0.000 0.000 iostream.py:532(_schedule_flush)\n",
+ " 1 0.000 0.000 0.000 0.000 frame.py:980(__str__)\n",
+ " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:59(_parse_inputs_as_iterable)\n",
+ " 1 0.000 0.000 0.000 0.000 :771(get)\n",
+ " 1 0.000 0.000 0.000 0.000 threading.py:1118(_wait_for_tstate_lock)\n",
+ " 7 0.000 0.000 0.000 0.000 parse_expr_input.py:85(parse_as_expression)\n",
+ " 1 0.000 0.000 0.000 0.000 frame.py:3600(estimated_size)\n",
+ " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:50(_parse_positional_inputs)\n",
+ " 1 0.000 0.000 0.000 0.000 :756(encode)\n",
+ " 1 0.000 0.000 0.000 0.000 frame.py:591(shape)\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n",
+ " 1 0.000 0.000 0.000 0.000 iostream.py:127(_event_pipe)\n",
+ " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:72(_is_iterable)\n",
+ " 1 0.000 0.000 0.000 0.000 parse_expr_input.py:20(parse_as_list_of_expressions)\n",
+ " 1 0.000 0.000 0.000 0.000 typing.py:1304(__instancecheck__)\n",
+ " 1 0.000 0.000 0.000 0.000 :121(__subclasscheck__)\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method _abc._abc_subclasscheck}\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'shape' of 'builtins.PyDataFrame' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n",
+ " 7 0.000 0.000 0.000 0.000 {built-in method builtins.len}\n",
+ " 6 0.000 0.000 0.000 0.000 {method 'write' of '_io.StringIO' objects}\n",
+ " 6 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.RLock' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method _stat.S_ISDIR}\n",
+ " 1 0.000 0.000 0.000 0.000 various.py:210(scale_bytes)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 deprecation.py:143(_rename_keyword_argument)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'encode' of 'str' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'startswith' of 'str' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 _utils.py:58(parse_row_index_args)\n",
+ " 1 0.000 0.000 0.000 0.000 threading.py:568(is_set)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method posix.fspath}"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "%%prun\n",
+ "test_polars()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
"id": "547f7253-cd62-44c6-8d7a-840dab2dbbbd",
"metadata": {
"tags": []
},
+ "outputs": [],
+ "source": [
+ "def test_pandas():\n",
+ " # Load the JSON file into a Pandas DataFrame\n",
+ " pd_df = pd.read_json(file_path, lines=True, dtype=pandas_schema)\n",
+ " pd_memory_usage = pd_df.memory_usage(deep=True).sum()\n",
+ "\n",
+ " # Get the number of rows in the Pandas DataFrame\n",
+ " num_rows_pandas = pd_df.shape[0]\n",
+ "\n",
+ " print(pd_df)\n",
+ "\n",
+ " print(f\"Pandas DataFarme number of rows: {num_rows_pandas}\")\n",
+ " print(f\"Pandas DataFrame memory usage: {pd_memory_usage / (1024 ** 2):.2f} MB\") \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "50230892-0a0e-4144-a17e-27d2714de1e8",
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"name": "stdout",
@@ -651,34 +1428,2240 @@
"\n",
"[8000 rows x 7 columns]\n",
"Pandas DataFarme number of rows: 8000\n",
- "Pandas DataFrame memory usage: 7.56 MB\n"
+ "Pandas DataFrame memory usage: 7.56 MB\n",
+ " "
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "# Load the JSON file into a Pandas DataFrame\n",
- "pd_df = pd.read_json(file_path, lines=True)\n",
- "pd_memory_usage = pd_df.memory_usage(deep=True).sum()\n",
- "\n",
- "# Get the number of rows in the Pandas DataFrame\n",
- "num_rows_pandas = pd_df.shape[0]\n",
- "\n",
- "print(pd_df)\n",
- "\n",
- "print(f\"Pandas DataFarme number of rows: {num_rows_pandas}\")\n",
- "print(f\"Pandas DataFrame memory usage: {pd_memory_usage / (1024 ** 2):.2f} MB\")"
+ "%%flame -q --inverted\n",
+ "test_pandas()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "4d47d7ec-d3f1-4fac-9933-ec330651a6f4",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " @timestamp host.hostname host.ip \\\n",
+ "0 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "1 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "2 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "3 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "4 2024-05-15T15:57:18.471Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "... ... ... ... \n",
+ "7995 2024-05-15T16:10:07.128Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "7996 2024-05-15T16:10:07.136Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "7997 2024-05-15T16:10:07.136Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "7998 2024-05-15T16:10:07.149Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "7999 2024-05-15T16:10:07.149Z win10 fe80::24b4:3691:44a6:38a1 \n",
+ "\n",
+ " log.level winlog.event_id winlog.task \\\n",
+ "0 information 13 Registry value set (rule: RegistryEvent) \n",
+ "1 information 13 Registry value set (rule: RegistryEvent) \n",
+ "2 information 13 Registry value set (rule: RegistryEvent) \n",
+ "3 information 13 Registry value set (rule: RegistryEvent) \n",
+ "4 information 13 Registry value set (rule: RegistryEvent) \n",
+ "... ... ... ... \n",
+ "7995 information 4663 Removable Storage \n",
+ "7996 information 4663 Removable Storage \n",
+ "7997 information 4663 Removable Storage \n",
+ "7998 information 4663 Removable Storage \n",
+ "7999 information 4663 Removable Storage \n",
+ "\n",
+ " message \n",
+ "0 Registry value set:\\nRuleName: InvDB-Ver\\nEven... \n",
+ "1 Registry value set:\\nRuleName: InvDB-Path\\nEve... \n",
+ "2 Registry value set:\\nRuleName: InvDB-Pub\\nEven... \n",
+ "3 Registry value set:\\nRuleName: InvDB-CompileTi... \n",
+ "4 Registry value set:\\nRuleName: InvDB-Ver\\nEven... \n",
+ "... ... \n",
+ "7995 An attempt was made to access an object.\\n\\nSu... \n",
+ "7996 An attempt was made to access an object.\\n\\nSu... \n",
+ "7997 An attempt was made to access an object.\\n\\nSu... \n",
+ "7998 An attempt was made to access an object.\\n\\nSu... \n",
+ "7999 An attempt was made to access an object.\\n\\nSu... \n",
+ "\n",
+ "[8000 rows x 7 columns]\n",
+ "Pandas DataFarme number of rows: 8000\n",
+ "Pandas DataFrame memory usage: 7.56 MB\n",
+ " "
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ " 46681 function calls (46472 primitive calls) in 0.113 seconds\n",
+ "\n",
+ " Ordered by: internal time\n",
+ "\n",
+ " ncalls tottime percall cumtime percall filename:lineno(function)\n",
+ " 1 0.029 0.029 0.029 0.029 {built-in method pandas._libs.json.ujson_loads}\n",
+ " 6 0.010 0.002 0.010 0.002 {pandas._libs.lib.memory_usage_of_objects}\n",
+ " 1 0.009 0.009 0.012 0.012 {method 'read' of '_io.TextIOWrapper' objects}\n",
+ " 8001 0.005 0.000 0.005 0.000 construction.py:915()\n",
+ " 1 0.005 0.005 0.005 0.005 {pandas._libs.lib.dicts_to_array}\n",
+ " 94 0.004 0.000 0.004 0.000 {method 'split' of 'str' objects}\n",
+ " 1 0.003 0.003 0.009 0.009 {pandas._libs.lib.fast_unique_multiple_list_gen}\n",
+ " 1 0.003 0.003 0.010 0.010 _json.py:960(_combine_lines)\n",
+ " 1 0.003 0.003 0.003 0.003 {built-in method _codecs.utf_8_decode}\n",
+ " 64/8 0.003 0.000 0.003 0.000 {method 'join' of 'str' objects}\n",
+ " 1 0.003 0.003 0.113 0.113 :1()\n",
+ " 1 0.002 0.002 0.049 0.049 _json.py:1360(_parse)\n",
+ " 6 0.002 0.000 0.002 0.000 {built-in method pandas._libs.lib.ensure_string_array}\n",
+ " 4 0.002 0.000 0.002 0.001 managers.py:2194(_stack_arrays)\n",
+ " 8002 0.002 0.000 0.003 0.000 _json.py:965()\n",
+ " 1 0.002 0.002 0.002 0.002 construction.py:922()\n",
+ " 1 0.001 0.001 0.004 0.004 _json.py:965()\n",
+ " 1 0.001 0.001 0.091 0.091 _json.py:500(read_json)\n",
+ " 8001 0.001 0.000 0.001 0.000 {method 'strip' of 'str' objects}\n",
+ " 1 0.001 0.001 0.001 0.001 {built-in method io.open}\n",
+ " 9 0.001 0.000 0.001 0.000 {method 'astype' of 'numpy.ndarray' objects}\n",
+ " 2 0.001 0.001 0.002 0.001 managers.py:2224(_merge_blocks)\n",
+ " 53/51 0.001 0.000 0.001 0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}\n",
+ " 1 0.001 0.001 0.010 0.010 _json.py:1422(_try_convert_types)\n",
+ "2422/2396 0.001 0.000 0.001 0.000 {built-in method builtins.isinstance}\n",
+ " 8001 0.001 0.000 0.001 0.000 {method 'keys' of 'dict' objects}\n",
+ " 32 0.001 0.000 0.001 0.000 generic.py:6147(__finalize__)\n",
+ " 1 0.001 0.001 0.001 0.001 {built-in method posix.stat}\n",
+ " 2 0.000 0.000 0.002 0.001 _json.py:1282(_try_convert_to_date)\n",
+ " 2 0.000 0.000 0.005 0.003 construction.py:96(arrays_to_mgr)\n",
+ " 98 0.000 0.000 0.000 0.000 generic.py:6206(__setattr__)\n",
+ " 24 0.000 0.000 0.001 0.000 base.py:510(find)\n",
+ "1415/1290 0.000 0.000 0.000 0.000 {built-in method builtins.len}\n",
+ " 29 0.000 0.000 0.001 0.000 {pandas._libs.lib.maybe_convert_objects}\n",
+ " 1 0.000 0.000 0.016 0.016 construction.py:793(to_arrays)\n",
+ " 21 0.000 0.000 0.000 0.000 managers.py:991(iget)\n",
+ " 661 0.000 0.000 0.000 0.000 format.py:428(len)\n",
+ " 1 0.000 0.000 0.000 0.000 socket.py:543(send)\n",
+ " 21 0.000 0.000 0.001 0.000 common.py:1587(pandas_dtype)\n",
+ " 47 0.000 0.000 0.000 0.000 {built-in method numpy.empty}\n",
+ " 93 0.000 0.000 0.000 0.000 config.py:127(_get_single_key)\n",
+ " 6 0.000 0.000 0.001 0.000 format.py:1332(_format_strings)\n",
+ " 198 0.000 0.000 0.000 0.000 base.py:236(construct_from_string)\n",
+ " 41 0.000 0.000 0.000 0.000 generic.py:274(__init__)\n",
+ " 5 0.000 0.000 0.001 0.000 base.py:478(__new__)\n",
+ " 19 0.000 0.000 0.001 0.000 construction.py:519(sanitize_array)\n",
+ " 7 0.000 0.000 0.001 0.000 series.py:371(__init__)\n",
+ " 18 0.000 0.000 0.000 0.000 {method 'reduce' of 'numpy.ufunc' objects}\n",
+ " 67 0.000 0.000 0.000 0.000 printing.py:162(pprint_thing)\n",
+ " 60 0.000 0.000 0.001 0.000 format.py:1355(_format)\n",
+ " 4 0.000 0.000 0.000 0.000 {pandas._libs.tslib.array_with_unit_to_datetime}\n",
+ " 63 0.000 0.000 0.000 0.000 base.py:5350(__getitem__)\n",
+ " 89 0.000 0.000 0.001 0.000 config.py:145(_get_option)\n",
+ " 91 0.000 0.000 0.000 0.000 config.py:633(_get_root)\n",
+ " 67 0.000 0.000 0.000 0.000 printing.py:193(as_escaped_string)\n",
+ " 182 0.000 0.000 0.000 0.000 config.py:647(_get_deprecated_option)\n",
+ " 212 0.000 0.000 0.000 0.000 generic.py:42(_instancecheck)\n",
+ " 6 0.000 0.000 0.000 0.000 {pandas._libs.lib.map_infer}\n",
+ " 212 0.000 0.000 0.000 0.000 generic.py:37(_check)\n",
+ " 21 0.000 0.000 0.001 0.000 frame.py:4402(_get_item_cache)\n",
+ " 1 0.000 0.000 0.004 0.004 format.py:843(_get_strcols_without_index)\n",
+ " 21 0.000 0.000 0.001 0.000 frame.py:3776(_ixs)\n",
+ " 35 0.000 0.000 0.000 0.000 numeric.py:290(full)\n",
+ " 18 0.000 0.000 0.001 0.000 format.py:1909(_make_fixed_width)\n",
+ " 2 0.000 0.000 0.002 0.001 managers.py:2137(_form_blocks)\n",
+ " 9 0.000 0.000 0.002 0.000 format.py:1217(format_array)\n",
+ " 10 0.000 0.000 0.003 0.000 astype.py:56(_astype_nansafe)\n",
+ " 61 0.000 0.000 0.000 0.000 {built-in method builtins.max}\n",
+ " 180 0.000 0.000 0.000 0.000 format.py:1932(just)\n",
+ " 14 0.000 0.000 0.000 0.000 base.py:5300(__contains__)\n",
+ " 15 0.000 0.000 0.000 0.000 {built-in method numpy.array}\n",
+ " 67 0.000 0.000 0.000 0.000 inference.py:373(is_sequence)\n",
+ " 9 0.000 0.000 0.001 0.000 indexing.py:1006(_getitem_lowerdim)\n",
+ " 2 0.000 0.000 0.021 0.011 frame.py:665(__init__)\n",
+ " 198 0.000 0.000 0.000 0.000 format.py:1923()\n",
+ " 21 0.000 0.000 0.001 0.000 frame.py:4384(_box_col_values)\n",
+ " 73 0.000 0.000 0.000 0.000 missing.py:184(_isna)\n",
+ " 24 0.000 0.000 0.000 0.000 warnings.py:466(__enter__)\n",
+ " 24 0.000 0.000 0.001 0.000 frame.py:1392(items)\n",
+ " 88 0.000 0.000 0.001 0.000 config.py:271(__call__)\n",
+ " 7 0.000 0.000 0.003 0.000 format.py:890(format_col)\n",
+ " 301 0.000 0.000 0.000 0.000 {built-in method builtins.getattr}\n",
+ " 24 0.000 0.000 0.000 0.000 warnings.py:181(_add_filter)\n",
+ " 2 0.000 0.000 0.000 0.000 {pandas._libs.lib.array_equivalent_object}\n",
+ " 17 0.000 0.000 0.000 0.000 cast.py:1147(maybe_infer_to_datetimelike)\n",
+ " 9 0.000 0.000 0.007 0.001 _json.py:1204(_try_convert_data)\n",
+ " 7 0.000 0.000 0.004 0.001 managers.py:308(apply)\n",
+ " 5 0.000 0.000 0.000 0.000 printing.py:28(adjoin)\n",
+ " 18 0.000 0.000 0.000 0.000 format.py:1938()\n",
+ " 56 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_list_like}\n",
+ " 48 0.000 0.000 0.000 0.000 {built-in method numpy.asarray}\n",
+ " 62 0.000 0.000 0.000 0.000 {built-in method builtins.all}\n",
+ " 9 0.000 0.000 0.001 0.000 indexing.py:1139(__getitem__)\n",
+ " 2 0.000 0.000 0.010 0.005 _json.py:1396(_process_converter)\n",
+ " 89 0.000 0.000 0.000 0.000 config.py:686(_warn_if_deprecated)\n",
+ " 35 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(copyto)\n",
+ " 15 0.000 0.000 0.000 0.000 blocks.py:2388(new_block)\n",
+ " 15 0.000 0.000 0.000 0.000 inference.py:273(is_dict_like)\n",
+ " 6 0.000 0.000 0.000 0.000 base.py:836(__iter__)\n",
+ " 7 0.000 0.000 0.010 0.001 base.py:1135(_memory_usage)\n",
+ " 24 0.000 0.000 0.000 0.000 {method 'remove' of 'list' objects}\n",
+ " 20 0.000 0.000 0.000 0.000 printing.py:65()\n",
+ " 7 0.000 0.000 0.000 0.000 blocks.py:247(make_block)\n",
+ " 60 0.000 0.000 0.000 0.000 {built-in method _abc._abc_instancecheck}\n",
+ " 8 0.000 0.000 0.000 0.000 managers.py:1825(from_array)\n",
+ " 201 0.000 0.000 0.000 0.000 {method 'replace' of 'str' objects}\n",
+ " 28 0.000 0.000 0.000 0.000 printing.py:69()\n",
+ " 2 0.000 0.000 0.001 0.000 concat.py:618(get_result)\n",
+ " 41 0.000 0.000 0.000 0.000 flags.py:53(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 construction.py:928(_finalize_columns_and_data)\n",
+ " 1 0.000 0.000 0.110 0.110 1231667944.py:1(test_pandas)\n",
+ " 9 0.000 0.000 0.001 0.000 indexing.py:1651(_getitem_tuple)\n",
+ " 40 0.000 0.000 0.000 0.000 {built-in method builtins.any}\n",
+ " 31 0.000 0.000 0.000 0.000 generic.py:562(_get_axis)\n",
+ " 93 0.000 0.000 0.000 0.000 config.py:674(_translate_key)\n",
+ " 16 0.000 0.000 0.000 0.000 format.py:903(_get_formatter)\n",
+ " 14 0.000 0.000 0.000 0.000 numpy_.py:98(__init__)\n",
+ " 93 0.000 0.000 0.000 0.000 config.py:615(_select_options)\n",
+ " 32 0.000 0.000 0.000 0.000 managers.py:1960(internal_values)\n",
+ " 3 0.000 0.000 0.000 0.000 {method '_rebuild_blknos_and_blklocs' of 'pandas._libs.internals.BlockManager' objects}\n",
+ " 10 0.000 0.000 0.003 0.000 astype.py:158(astype_array)\n",
+ " 32 0.000 0.000 0.000 0.000 generic.py:335(_from_mgr)\n",
+ " 16 0.000 0.000 0.000 0.000 blocks.py:2317(maybe_coerce_values)\n",
+ " 60 0.000 0.000 0.000 0.000 __init__.py:33(using_copy_on_write)\n",
+ " 14 0.000 0.000 0.000 0.000 {method 'get_loc' of 'pandas._libs.index.IndexEngine' objects}\n",
+ " 6 0.000 0.000 0.000 0.000 {built-in method pandas._libs.missing.isnaobj}\n",
+ " 7 0.000 0.000 0.005 0.001 generic.py:6368(astype)\n",
+ " 72 0.000 0.000 0.000 0.000 base.py:909(__len__)\n",
+ " 2 0.000 0.000 0.000 0.000 construction.py:596(_homogenize)\n",
+ " 48 0.000 0.000 0.000 0.000 printing.py:60(justify)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'close' of '_io.TextIOWrapper' objects}\n",
+ " 7 0.000 0.000 0.004 0.001 blocks.py:588(astype)\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:94(concatenate_managers)\n",
+ " 9 0.000 0.000 0.001 0.000 indexing.py:1681(_getitem_axis)\n",
+ " 21 0.000 0.000 0.000 0.000 frame.py:654(_constructor_sliced_from_mgr)\n",
+ " 48 0.000 0.000 0.000 0.000 format.py:431(justify)\n",
+ " 73 0.000 0.000 0.000 0.000 missing.py:101(isna)\n",
+ " 140 0.000 0.000 0.000 0.000 {built-in method builtins.hasattr}\n",
+ " 4 0.000 0.000 0.001 0.000 datetimes.py:721(to_datetime)\n",
+ " 6 0.000 0.000 0.000 0.000 iostream.py:610(write)\n",
+ " 19 0.000 0.000 0.001 0.000 base.py:7521(ensure_index)\n",
+ " 20 0.000 0.000 0.000 0.000 blocks.py:2346(get_block_type)\n",
+ " 63 0.000 0.000 0.000 0.000 common.py:149(cast_scalar_indexer)\n",
+ " 142 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_scalar}\n",
+ " 14 0.000 0.000 0.000 0.000 managers.py:1949(dtype)\n",
+ " 2 0.000 0.000 0.000 0.000 {method 'get_slice' of 'pandas._libs.internals.BlockManager' objects}\n",
+ " 136 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n",
+ " 1 0.000 0.000 0.015 0.015 construction.py:891(_list_of_dict_to_arrays)\n",
+ " 24 0.000 0.000 0.000 0.000 warnings.py:487(__exit__)\n",
+ " 9 0.000 0.000 0.000 0.000 indexing.py:931(_validate_tuple_indexer)\n",
+ " 21 0.000 0.000 0.000 0.000 series.py:1372(_set_as_cached)\n",
+ " 7 0.000 0.000 0.000 0.000 _dtype.py:344(_name_get)\n",
+ " 7 0.000 0.000 0.000 0.000 missing.py:261(_isna_array)\n",
+ " 18 0.000 0.000 0.000 0.000 dtypes.py:1266(construct_from_string)\n",
+ " 7 0.000 0.000 0.000 0.000 warnings.py:130(filterwarnings)\n",
+ " 14 0.000 0.000 0.000 0.000 base.py:3763(get_loc)\n",
+ " 6 0.000 0.000 0.000 0.000 missing.py:380(notna)\n",
+ " 30 0.000 0.000 0.000 0.000 format.py:1617()\n",
+ " 27 0.000 0.000 0.000 0.000 construction.py:485(ensure_wrapped_if_datetimelike)\n",
+ " 3 0.000 0.000 0.000 0.000 base.py:1418(_format_with_header)\n",
+ " 23 0.000 0.000 0.000 0.000 {method 'match' of 're.Pattern' objects}\n",
+ " 18 0.000 0.000 0.000 0.000 dtypes.py:814(construct_from_string)\n",
+ " 9 0.000 0.000 0.000 0.000 indexing.py:1614(_is_scalar_access)\n",
+ " 18 0.000 0.000 0.000 0.000 dtypes.py:332(construct_from_string)\n",
+ " 66 0.000 0.000 0.000 0.000 {built-in method pandas._libs.missing.checknull}\n",
+ " 10 0.000 0.000 0.000 0.000 format.py:479(get_adjustment)\n",
+ " 7 0.000 0.000 0.000 0.000 base.py:649(_simple_new)\n",
+ " 217 0.000 0.000 0.000 0.000 {method 'ljust' of 'str' objects}\n",
+ " 1 0.000 0.000 0.075 0.075 _json.py:980(read)\n",
+ " 16 0.000 0.000 0.000 0.000 common.py:137(is_object_dtype)\n",
+ " 32 0.000 0.000 0.000 0.000 flags.py:89(allows_duplicate_labels)\n",
+ " 24 0.000 0.000 0.000 0.000 warnings.py:440(__init__)\n",
+ " 1 0.000 0.000 0.113 0.113 {built-in method builtins.exec}\n",
+ " 18 0.000 0.000 0.000 0.000 dtypes.py:1021(construct_from_string)\n",
+ " 9 0.000 0.000 0.002 0.000 format.py:1328(get_result)\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:119(_join_multiline)\n",
+ " 3 0.000 0.000 0.000 0.000 _strptime.py:309(_strptime)\n",
+ " 30 0.000 0.000 0.000 0.000 construction.py:420(extract_array)\n",
+ " 21 0.000 0.000 0.000 0.000 blocks.py:1007(iget)\n",
+ " 1 0.000 0.000 0.012 0.012 frame.py:3471(memory_usage)\n",
+ " 76 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_np_dtype}\n",
+ " 77 0.000 0.000 0.000 0.000 format.py:884()\n",
+ " 10 0.000 0.000 0.000 0.000 generic.py:760(_set_axis)\n",
+ " 18 0.000 0.000 0.000 0.000 indexing.py:1536(_validate_key)\n",
+ " 17 0.000 0.000 0.000 0.000 warnings.py:165(simplefilter)\n",
+ " 4 0.000 0.000 0.000 0.000 _asarray.py:31(require)\n",
+ " 25 0.000 0.000 0.000 0.000 common.py:1425(_is_dtype_type)\n",
+ " 6 0.000 0.000 0.000 0.000 concat.py:322(_get_block_for_concat_plan)\n",
+ " 60 0.000 0.000 0.000 0.000 :117(__instancecheck__)\n",
+ " 7 0.000 0.000 0.004 0.001 astype.py:192(astype_array_safe)\n",
+ " 25 0.000 0.000 0.000 0.000 common.py:96(is_bool_indexer)\n",
+ " 1 0.000 0.000 0.001 0.001 common.py:652(get_handle)\n",
+ " 7 0.000 0.000 0.000 0.000 construction.py:1028(convert)\n",
+ " 157 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:403(__init__)\n",
+ " 9 0.000 0.000 0.000 0.000 indexing.py:2678(check_dict_or_set_indexers)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:1683(_validate_names)\n",
+ " 3 0.000 0.000 0.000 0.000 cast.py:119(maybe_convert_platform)\n",
+ " 3 0.000 0.000 0.000 0.000 {built-in method numpy.arange}\n",
+ " 14 0.000 0.000 0.000 0.000 managers.py:1964(array_values)\n",
+ " 103 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_integer}\n",
+ " 4 0.000 0.000 0.001 0.000 base.py:1038(astype)\n",
+ " 7 0.000 0.000 0.000 0.000 string.py:129()\n",
+ " 1 0.000 0.000 0.062 0.062 _json.py:1022(_get_object_parser)\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:296(_get_combined_plan)\n",
+ " 47 0.000 0.000 0.000 0.000 range.py:963(__len__)\n",
+ " 2 0.000 0.000 0.000 0.000 parse.py:374(urlparse)\n",
+ " 6 0.000 0.000 0.000 0.000 concat.py:389(is_na)\n",
+ " 39 0.000 0.000 0.000 0.000 inference.py:334(is_hashable)\n",
+ " 6 0.000 0.000 0.000 0.000 fromnumeric.py:69(_wrapreduction)\n",
+ " 7 0.000 0.000 0.004 0.001 managers.py:405(astype)\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:956(_get_formatted_index)\n",
+ " 3 0.000 0.000 0.000 0.000 cast.py:1544(construct_1d_object_array_from_listlike)\n",
+ " 8 0.000 0.000 0.000 0.000 range.py:198(_simple_new)\n",
+ " 9 0.000 0.000 0.000 0.000 common.py:1066(is_numeric_dtype)\n",
+ " 5 0.000 0.000 0.000 0.000 format.py:434(adjoin)\n",
+ " 4 0.000 0.000 0.001 0.000 datetimes.py:216(_maybe_cache)\n",
+ " 49 0.000 0.000 0.000 0.000 {built-in method __new__ of type object at 0x860f60}\n",
+ " 36 0.000 0.000 0.000 0.000 managers.py:1799(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:915(_get_formatted_column_labels)\n",
+ " 2 0.000 0.000 0.000 0.000 generic.py:4296(_slice)\n",
+ " 9 0.000 0.000 0.000 0.000 series.py:653(name)\n",
+ " 18 0.000 0.000 0.000 0.000 string_.py:135(construct_from_string)\n",
+ " 8 0.000 0.000 0.000 0.000 series.py:581(_constructor_from_mgr)\n",
+ " 6 0.000 0.000 0.000 0.000 __init__.py:272(_compile)\n",
+ " 61 0.000 0.000 0.000 0.000 printing.py:57()\n",
+ " 4 0.000 0.000 0.000 0.000 datetimes.py:526(_to_datetime_with_unit)\n",
+ " 2 0.000 0.000 0.004 0.002 managers.py:2068(create_block_manager_from_column_arrays)\n",
+ " 27 0.000 0.000 0.000 0.000 indexing.py:1144()\n",
+ " 2 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_all_arraylike}\n",
+ " 10 0.000 0.000 0.000 0.000 base.py:73(_validate_set_axis)\n",
+ " 32 0.000 0.000 0.000 0.000 series.py:750(_values)\n",
+ " 154 0.000 0.000 0.000 0.000 {method 'rjust' of 'str' objects}\n",
+ " 18 0.000 0.000 0.000 0.000 dtypes.py:2180(construct_from_string)\n",
+ " 14 0.000 0.000 0.000 0.000 indexing.py:1629(_validate_integer)\n",
+ " 6 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}\n",
+ " 1 0.000 0.000 0.011 0.011 frame.py:3561()\n",
+ " 5 0.000 0.000 0.000 0.000 base.py:574(_ensure_array)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:666(_parse)\n",
+ " 1 0.000 0.000 0.007 0.007 frame.py:1229(to_string)\n",
+ " 72 0.000 0.000 0.000 0.000 {built-in method _warnings._filters_mutated}\n",
+ " 71 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}\n",
+ " 21 0.000 0.000 0.000 0.000 frame.py:651(_sliced_from_mgr)\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method _operator.gt}\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:89(_insert_dot_separator_vertical)\n",
+ " 4 0.000 0.000 0.000 0.000 _asarray.py:112()\n",
+ " 1 0.000 0.000 0.003 0.003 :319(decode)\n",
+ " 3 0.000 0.000 0.000 0.000 format.py:1619()\n",
+ " 40 0.000 0.000 0.000 0.000 inference.py:300()\n",
+ " 7 0.000 0.000 0.000 0.000 common.py:1268(is_extension_array_dtype)\n",
+ " 5 0.000 0.000 0.000 0.000 cast.py:1483(construct_1d_arraylike_from_scalar)\n",
+ " 14 0.000 0.000 0.000 0.000 blocks.py:2241(array_values)\n",
+ " 3 0.000 0.000 0.000 0.000 _parser.py:77(get_token)\n",
+ " 13 0.000 0.000 0.000 0.000 common.py:296(maybe_iterable_to_list)\n",
+ " 7 0.000 0.000 0.000 0.000 managers.py:1812(from_blocks)\n",
+ " 18 0.000 0.000 0.000 0.000 dtypes.py:1789(construct_from_string)\n",
+ " 1 0.000 0.000 0.000 0.000 array_ops.py:290(comparison_op)\n",
+ " 38 0.000 0.000 0.000 0.000 generic.py:548(_get_axis_number)\n",
+ " 5 0.000 0.000 0.000 0.000 base.py:69(shape)\n",
+ " 48 0.000 0.000 0.000 0.000 {method 'startswith' of 'str' objects}\n",
+ " 14 0.000 0.000 0.000 0.000 dtypes.py:1407(__init__)\n",
+ " 9 0.000 0.000 0.000 0.000 numerictypes.py:356(issubdtype)\n",
+ " 12 0.000 0.000 0.000 0.000 base.py:7616(maybe_extract_name)\n",
+ " 1 0.000 0.000 0.000 0.000 expressions.py:95(_evaluate_numexpr)\n",
+ " 1 0.000 0.000 0.000 0.000 iostream.py:243(schedule)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:902(_concat)\n",
+ " 14 0.000 0.000 0.000 0.000 construction.py:695(_sanitize_ndim)\n",
+ " 7 0.000 0.000 0.000 0.000 _json.py:1442(is_ok)\n",
+ " 1 0.000 0.000 0.000 0.000 numeric.py:2407(array_equal)\n",
+ " 7 0.000 0.000 0.000 0.000 series.py:703(name)\n",
+ " 10 0.000 0.000 0.000 0.000 common.py:1322(is_ea_or_datetimelike_dtype)\n",
+ " 2 0.000 0.000 0.000 0.000 config.py:153(_set_option)\n",
+ " 1 0.000 0.000 0.014 0.014 _json.py:816(__init__)\n",
+ " 4 0.000 0.000 0.000 0.000 blocks.py:297(slice_block_columns)\n",
+ " 1 0.000 0.000 0.000 0.000 blocks.py:2375(new_block_2d)\n",
+ " 8 0.000 0.000 0.001 0.000 <__array_function__ internals>:177(concatenate)\n",
+ " 1 0.000 0.000 0.007 0.007 frame.py:1123(__repr__)\n",
+ " 10 0.000 0.000 0.000 0.000 common.py:1562(validate_all_hashable)\n",
+ " 1 0.000 0.000 0.002 0.002 managers.py:2207(_consolidate)\n",
+ " 9 0.000 0.000 0.000 0.000 indexing.py:948(_is_nested_tuple_indexer)\n",
+ " 1 0.000 0.000 0.001 0.001 format.py:564(__init__)\n",
+ " 10 0.000 0.000 0.000 0.000 managers.py:225(set_axis)\n",
+ " 2 0.000 0.000 0.000 0.000 common.py:1155(_is_binary_mode)\n",
+ " 42 0.000 0.000 0.000 0.000 base.py:5127(_values)\n",
+ " 3 0.000 0.000 0.000 0.000 range.py:234(_data)\n",
+ " 18 0.000 0.000 0.000 0.000 common.py:367(apply_if_callable)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:228(asarray_tuplesafe)\n",
+ " 2 0.000 0.000 0.000 0.000 indexing.py:978(_getitem_tuple_same_dim)\n",
+ " 1 0.000 0.000 0.000 0.000 {pandas._libs.internals.get_concat_blkno_indexers}\n",
+ " 4 0.000 0.000 0.000 0.000 datetimes.py:369(_convert_listlike_datetimes)\n",
+ " 35 0.000 0.000 0.000 0.000 {method 'format' of 'str' objects}\n",
+ " 9 0.000 0.000 0.000 0.000 blocks.py:2467(extend_blocks)\n",
+ " 17 0.000 0.000 0.000 0.000 __init__.py:43(using_pyarrow_string_dtype)\n",
+ " 1 0.000 0.000 0.012 0.012 _json.py:896(_preprocess_data)\n",
+ " 1 0.000 0.000 0.000 0.000 cast.py:1569(maybe_cast_to_integer_array)\n",
+ " 1 0.000 0.000 0.000 0.000 cast.py:774(infer_dtype_from_scalar)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:5519(equals)\n",
+ " 3 0.000 0.000 0.007 0.002 {built-in method builtins.print}\n",
+ " 6 0.000 0.000 0.000 0.000 missing.py:305(_isna_string_dtype)\n",
+ " 5 0.000 0.000 0.000 0.000 api.py:379(default_index)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:62(__init__)\n",
+ " 1 0.000 0.000 0.004 0.004 construction.py:423(dict_to_mgr)\n",
+ " 1 0.000 0.000 0.002 0.002 _json.py:1185(_convert_axes)\n",
+ " 6 0.000 0.000 0.000 0.000 fromnumeric.py:2432(all)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:289(_get_filepath_or_buffer)\n",
+ " 2 0.000 0.000 0.001 0.001 concat.py:157(concat)\n",
+ " 53 0.000 0.000 0.000 0.000 {built-in method builtins.hash}\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:2293(is_unique)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:5422(append)\n",
+ " 2 0.000 0.000 0.000 0.000 _json.py:1049(close)\n",
+ " 36 0.000 0.000 0.000 0.000 {method 'insert' of 'list' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:538(infer_compression)\n",
+ " 7 0.000 0.000 0.010 0.001 series.py:5223(memory_usage)\n",
+ " 3 0.000 0.000 0.000 0.000 concat.py:572(_is_uniform_join_units)\n",
+ " 6 0.000 0.000 0.000 0.000 managers.py:2212()\n",
+ " 7 0.000 0.000 0.000 0.000 generic.py:6189(__getattr__)\n",
+ " 18 0.000 0.000 0.000 0.000 indexing.py:2651(is_label_like)\n",
+ " 13 0.000 0.000 0.000 0.000 blocks.py:187(is_extension)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:1427()\n",
+ " 5 0.000 0.000 0.000 0.000 common.py:173(_expand_user)\n",
+ " 1 0.000 0.000 0.002 0.002 _json.py:912(_get_data_from_filepath)\n",
+ " 9 0.000 0.000 0.000 0.000 common.py:131()\n",
+ " 2 0.000 0.000 0.000 0.000 common.py:121(close)\n",
+ " 1 0.000 0.000 0.000 0.000 construction.py:487()\n",
+ " 7 0.000 0.000 0.000 0.000 {method 'add_index_reference' of 'pandas._libs.internals.BlockValuesRefs' objects}\n",
+ " 9 0.000 0.000 0.000 0.000 format.py:1300(__init__)\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:492(_clean_keys_and_objs)\n",
+ " 3 0.000 0.000 0.000 0.000 blocks.py:198(_consolidate_key)\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:128()\n",
+ " 1 0.000 0.000 0.005 0.005 format.py:1077(to_string)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:489(copy)\n",
+ " 18 0.000 0.000 0.000 0.000 numerictypes.py:282(issubclass_)\n",
+ " 24 0.000 0.000 0.000 0.000 managers.py:169(blknos)\n",
+ " 14 0.000 0.000 0.000 0.000 construction.py:734(_sanitize_str_dtypes)\n",
+ " 7 0.000 0.000 0.000 0.000 frame.py:1539(__len__)\n",
+ " 9 0.000 0.000 0.000 0.000 concat.py:597()\n",
+ " 66 0.000 0.000 0.000 0.000 generic.py:393(flags)\n",
+ " 9 0.000 0.000 0.000 0.000 common.py:514(is_string_or_object_np_dtype)\n",
+ " 63 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_float}\n",
+ " 34 0.000 0.000 0.000 0.000 {method 'endswith' of 'str' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:478(_get_ndims)\n",
+ " 68 0.000 0.000 0.000 0.000 {built-in method builtins.iter}\n",
+ " 1 0.000 0.000 0.062 0.062 _json.py:1172(parse)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:842(_engine)\n",
+ " 20 0.000 0.000 0.000 0.000 common.py:1581()\n",
+ " 1 0.000 0.000 0.000 0.000 {pandas._libs.missing.is_float_nan}\n",
+ " 6 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(all)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:7092(_cmp_method)\n",
+ " 1 0.000 0.000 0.001 0.001 format.py:825(_truncate_vertically)\n",
+ " 18 0.000 0.000 0.000 0.000 indexing.py:966(_validate_key_length)\n",
+ " 9 0.000 0.000 0.000 0.000 :1207(_handle_fromlist)\n",
+ " 27 0.000 0.000 0.000 0.000 indexing.py:955()\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:52(concat_compat)\n",
+ " 27 0.000 0.000 0.000 0.000 indexing.py:2685()\n",
+ " 27 0.000 0.000 0.000 0.000 indexing.py:1143()\n",
+ " 5 0.000 0.000 0.000 0.000 :229(expanduser)\n",
+ " 6 0.000 0.000 0.000 0.000 generic.py:487(_validate_dtype)\n",
+ " 1 0.000 0.000 0.000 0.000 managers.py:1740()\n",
+ " 1 0.000 0.000 0.000 0.000 series.py:3159(_append)\n",
+ " 25 0.000 0.000 0.000 0.000 managers.py:185(blklocs)\n",
+ " 34 0.000 0.000 0.000 0.000 flags.py:57(allows_duplicate_labels)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'take' of 'numpy.ndarray' objects}\n",
+ " 14 0.000 0.000 0.000 0.000 managers.py:2124(_grouping_func)\n",
+ " 34 0.000 0.000 0.000 0.000 generic.py:358(attrs)\n",
+ " 14 0.000 0.000 0.000 0.000 series.py:626(dtype)\n",
+ " 6 0.000 0.000 0.000 0.000 {built-in method posix.getpid}\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:487(get_dataframe_repr_params)\n",
+ " 42 0.000 0.000 0.000 0.000 {method 'lower' of 'str' objects}\n",
+ " 11 0.000 0.000 0.000 0.000 common.py:306(is_null_slice)\n",
+ " 1 0.000 0.000 0.000 0.000 console.py:9(get_console_size)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'argsort' of 'numpy.ndarray' objects}\n",
+ " 29 0.000 0.000 0.000 0.000 managers.py:1902(_block)\n",
+ " 13 0.000 0.000 0.000 0.000 frame.py:949(axes)\n",
+ " 1 0.000 0.000 0.016 0.016 construction.py:506(nested_data_to_arrays)\n",
+ " 11 0.000 0.000 0.000 0.000 indexing.py:150(iloc)\n",
+ " 10 0.000 0.000 0.000 0.000 format.py:425(__init__)\n",
+ " 15 0.000 0.000 0.000 0.000 base.py:831(_reset_identity)\n",
+ " 4 0.000 0.000 0.000 0.000 common.py:233(stringify_path)\n",
+ " 5 0.000 0.000 0.000 0.000 base.py:592(_dtype_to_subclass)\n",
+ " 27 0.000 0.000 0.000 0.000 indexing.py:2694()\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:7592(trim_front)\n",
+ " 7 0.000 0.000 0.000 0.000 _dtype.py:330(_name_includes_bit_suffix)\n",
+ " 2 0.000 0.000 0.000 0.000 construction.py:765(_try_cast)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:1163(save_to_buffer)\n",
+ " 2 0.000 0.000 0.000 0.000 managers.py:1734(_consolidate_check)\n",
+ " 7 0.000 0.000 0.005 0.001 _json.py:1429()\n",
+ " 14 0.000 0.000 0.000 0.000 series.py:791(array)\n",
+ " 2 0.000 0.000 0.002 0.001 managers.py:1744(_consolidate_inplace)\n",
+ " 9 0.000 0.000 0.000 0.000 concat.py:587()\n",
+ " 2 0.000 0.000 0.000 0.000 frozen.py:73(__getitem__)\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:76(_f)\n",
+ " 1 0.000 0.000 0.000 0.000 array_ops.py:191(_na_arithmetic_op)\n",
+ " 35 0.000 0.000 0.000 0.000 multiarray.py:1079(copyto)\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:713(_get_concat_axis)\n",
+ " 9 0.000 0.000 0.000 0.000 common.py:556(require_length_match)\n",
+ " 16 0.000 0.000 0.000 0.000 common.py:123()\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:189(_binify)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:683(_initialize_justify)\n",
+ " 21 0.000 0.000 0.000 0.000 common.py:1255(is_1d_only_ea_dtype)\n",
+ " 6 0.000 0.000 0.000 0.000 iostream.py:505(_is_master_process)\n",
+ " 2 0.000 0.000 0.000 0.000 indexing.py:1718(_get_slice_axis)\n",
+ " 1 0.000 0.000 0.001 0.001 format.py:789(truncate)\n",
+ " 1 0.000 0.000 0.005 0.005 string.py:40(_get_string_representation)\n",
+ " 24 0.000 0.000 0.000 0.000 blocks.py:583(dtype)\n",
+ " 18 0.000 0.000 0.000 0.000 indexing.py:1627()\n",
+ " 12 0.000 0.000 0.000 0.000 format.py:633(is_truncated_horizontally)\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:604(nansum)\n",
+ " 7 0.000 0.000 0.000 0.000 _json.py:1462()\n",
+ " 8 0.000 0.000 0.000 0.000 common.py:1366(_is_dtype)\n",
+ " 1 0.000 0.000 0.005 0.005 string.py:28(to_string)\n",
+ " 3 0.000 0.000 0.000 0.000 frame.py:641(_constructor_from_mgr)\n",
+ " 3 0.000 0.000 0.000 0.000 locale.py:396(normalize)\n",
+ " 14 0.000 0.000 0.000 0.000 {built-in method builtins.setattr}\n",
+ " 28 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_iterator}\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:1023(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 generic.py:12031(_min_count_stat_function)\n",
+ " 27 0.000 0.000 0.000 0.000 indexing.py:915()\n",
+ " 14 0.000 0.000 0.000 0.000 construction.py:754(_maybe_repeat)\n",
+ " 2 0.000 0.000 0.000 0.000 {method 'all' of 'numpy.ndarray' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:949()\n",
+ " 17 0.000 0.000 0.000 0.000 blocks.py:1003(shape)\n",
+ " 15 0.000 0.000 0.000 0.000 generic.py:659(ndim)\n",
+ " 16 0.000 0.000 0.000 0.000 common.py:121(classes)\n",
+ " 2 0.000 0.000 0.000 0.000 _methods.py:61(_all)\n",
+ " 14 0.000 0.000 0.000 0.000 utils.py:62(is_list_like_indexer)\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:695(new_axes)\n",
+ " 9 0.000 0.000 0.000 0.000 indexing.py:909(_expand_ellipsis)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:199(split)\n",
+ " 5 0.000 0.000 0.000 0.000 printing.py:48()\n",
+ " 6 0.000 0.000 0.000 0.000 fromnumeric.py:70()\n",
+ " 1 0.000 0.000 0.000 0.000 fromnumeric.py:51(_wrapfunc)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:341(nbytes)\n",
+ " 2 0.000 0.000 0.000 0.000 common.py:557(condition)\n",
+ " 1 0.000 0.000 0.000 0.000 managers.py:918(_verify_integrity)\n",
+ " 1 0.000 0.000 0.000 0.000 console.py:79(in_ipython_frontend)\n",
+ " 8 0.000 0.000 0.000 0.000 {method 'max' of 'numpy.ndarray' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 missing.py:466(array_equivalent)\n",
+ " 1 0.000 0.000 0.000 0.000 generic.py:6337(dtypes)\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:698()\n",
+ " 6 0.000 0.000 0.000 0.000 enum.py:193(__get__)\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:67(_insert_dot_separators)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:674(_with_infer)\n",
+ " 15 0.000 0.000 0.000 0.000 base.py:71()\n",
+ " 2 0.000 0.000 0.000 0.000 common.py:277(is_fsspec_url)\n",
+ " 3 0.000 0.000 0.000 0.000 format.py:1613(_format_strings)\n",
+ " 3 0.000 0.000 0.000 0.000 base.py:1396(format)\n",
+ " 2 0.000 0.000 0.000 0.000 common.py:145(is_url)\n",
+ " 2 0.000 0.000 0.000 0.000 missing.py:564(_array_equivalent_object)\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:565()\n",
+ " 2 0.000 0.000 0.000 0.000 :1()\n",
+ " 3 0.000 0.000 0.000 0.000 frame.py:4399(_clear_item_cache)\n",
+ " 1 0.000 0.000 0.001 0.001 _json.py:1432(_try_convert_dates)\n",
+ " 1 0.000 0.000 0.000 0.000 construction.py:1068()\n",
+ " 18 0.000 0.000 0.000 0.000 {method 'search' of 're.Pattern' objects}\n",
+ " 14 0.000 0.000 0.000 0.000 format.py:877()\n",
+ " 4 0.000 0.000 0.000 0.000 missing.py:642(na_value_for_dtype)\n",
+ " 1 0.000 0.000 0.000 0.000 managers.py:278(get_dtypes)\n",
+ " 2 0.000 0.000 0.000 0.000 range.py:996(_getitem_slice)\n",
+ " 5 0.000 0.000 0.000 0.000 format.py:2024(_has_names)\n",
+ " 4 0.000 0.000 0.000 0.000 base.py:1751(_get_names)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:62(new_method)\n",
+ " 26 0.000 0.000 0.000 0.000 base.py:7606()\n",
+ " 1 0.000 0.000 0.005 0.005 string.py:34(_get_strcols)\n",
+ " 1 0.000 0.000 0.000 0.000 series.py:6094(_reduce)\n",
+ " 44 0.000 0.000 0.000 0.000 typing.py:2256(cast)\n",
+ " 3 0.000 0.000 0.000 0.000 blocks.py:265(make_block_same_class)\n",
+ " 3 0.000 0.000 0.000 0.000 locale.py:593(getlocale)\n",
+ " 2 0.000 0.000 0.000 0.000 parse.py:119(_coerce_args)\n",
+ " 6 0.000 0.000 0.000 0.000 {built-in method builtins.sum}\n",
+ " 25 0.000 0.000 0.000 0.000 {built-in method builtins.callable}\n",
+ " 1 0.000 0.000 0.005 0.005 format.py:611(get_strcols)\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:974()\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:543(_get_sample_object)\n",
+ " 7 0.000 0.000 0.000 0.000 series.py:784(_references)\n",
+ " 19 0.000 0.000 0.000 0.000 base.py:1657(name)\n",
+ " 6 0.000 0.000 0.000 0.000 blocks.py:203(_can_hold_na)\n",
+ " 6 0.000 0.000 0.000 0.000 __init__.py:225(compile)\n",
+ " 4 0.000 0.000 0.000 0.000 base.py:448(size)\n",
+ " 1 0.000 0.000 0.000 0.000 construction.py:1006(convert_object_array)\n",
+ " 1 0.000 0.000 0.000 0.000 api.py:106(_get_distinct_objs)\n",
+ " 1 0.000 0.000 0.000 0.000 inference.py:404(is_dataclass)\n",
+ " 10 0.000 0.000 0.000 0.000 _parser.py:203(isword)\n",
+ " 7 0.000 0.000 0.000 0.000 inspect.py:292(isclass)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:1795(set_names)\n",
+ " 1 0.000 0.000 0.000 0.000 managers.py:279()\n",
+ " 1 0.000 0.000 0.000 0.000 threading.py:1185(is_alive)\n",
+ " 3 0.000 0.000 0.000 0.000 {built-in method _locale.setlocale}\n",
+ " 6 0.000 0.000 0.000 0.000 generic.py:6182()\n",
+ " 3 0.000 0.000 0.000 0.000 format.py:645(has_index_names)\n",
+ " 1 0.000 0.000 0.001 0.001 shape_base.py:223(vstack)\n",
+ " 1 0.000 0.000 0.000 0.000 construction.py:481()\n",
+ " 3 0.000 0.000 0.000 0.000 inference.py:105(is_file_like)\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:773(_concat_indexes)\n",
+ " 6 0.000 0.000 0.000 0.000 base.py:6625(_validate_indexer)\n",
+ " 3 0.000 0.000 0.000 0.000 frame.py:966(shape)\n",
+ " 3 0.000 0.000 0.000 0.000 format.py:653(show_row_idx_names)\n",
+ " 2 0.000 0.000 0.000 0.000 managers.py:1726(is_consolidated)\n",
+ " 1 0.000 0.000 0.000 0.000 arraylike.py:54(__gt__)\n",
+ " 1 0.000 0.000 0.000 0.000 _json.py:1126(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 config.py:469(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:389(new_func)\n",
+ " 1 0.000 0.000 0.000 0.000 contextlib.py:104(__init__)\n",
+ " 7 0.000 0.000 0.000 0.000 _dtype.py:24(_kind_name)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:773(_view)\n",
+ " 6 0.000 0.000 0.000 0.000 iostream.py:532(_schedule_flush)\n",
+ " 3 0.000 0.000 0.000 0.000 _strptime.py:26(_getlang)\n",
+ " 1 0.000 0.000 0.000 0.000 construction.py:950(_validate_or_indexify_columns)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:1243(copy)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:5458(_concat)\n",
+ " 9 0.000 0.000 0.000 0.000 common.py:126(_classes_and_not_datetimelike)\n",
+ " 4 0.000 0.000 0.000 0.000 nanops.py:79()\n",
+ " 1 0.000 0.000 0.000 0.000 shape_base.py:81(atleast_2d)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:221(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method builtins.sorted}\n",
+ " 4 0.000 0.000 0.000 0.000 {pandas._libs.lib.maybe_indices_to_slice}\n",
+ " 1 0.000 0.000 0.001 0.001 :16(exists)\n",
+ " 1 0.000 0.000 0.000 0.000 api.py:120(_get_combined_index)\n",
+ " 4 0.000 0.000 0.000 0.000 base.py:675(empty)\n",
+ " 1 0.000 0.000 0.000 0.000 config.py:477(__enter__)\n",
+ " 2 0.000 0.000 0.000 0.000 _json.py:1105(__exit__)\n",
+ " 4 0.000 0.000 0.000 0.000 generic.py:568(_get_block_manager_axis)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:4190(_validate_positional_slice)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:352(memory_usage)\n",
+ " 1 0.000 0.000 0.000 0.000 function.py:411(validate_func)\n",
+ " 8 0.000 0.000 0.000 0.000 common.py:1390(_get_dtype)\n",
+ " 15 0.000 0.000 0.000 0.000 blocks.py:239(mgr_locs)\n",
+ " 9 0.000 0.000 0.000 0.000 contextlib.py:428(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:126()\n",
+ " 8 0.000 0.000 0.000 0.000 _methods.py:39(_amax)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:1030(_cmp_method)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:395(__init__)\n",
+ " 9 0.000 0.000 0.000 0.000 series.py:577(_constructor)\n",
+ " 2 0.000 0.000 0.000 0.000 config.py:215(get_default_val)\n",
+ " 1 0.000 0.000 0.000 0.000 api.py:72(get_objs_combined_axis)\n",
+ " 13 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:202(_maybe_reindex_columns_na_proxy)\n",
+ " 3 0.000 0.000 0.000 0.000 locale.py:479(_parse_localename)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:5453()\n",
+ " 2 0.000 0.000 0.000 0.000 construction.py:196(mgr_to_mgr)\n",
+ " 9 0.000 0.000 0.000 0.000 contextlib.py:434(__exit__)\n",
+ " 4 0.000 0.000 0.000 0.000 construction.py:687(_sanitize_non_ordered)\n",
+ " 3 0.000 0.000 0.000 0.000 _parser.py:189(__next__)\n",
+ " 9 0.000 0.000 0.000 0.000 concat.py:584()\n",
+ " 1 0.000 0.000 0.000 0.000 construction.py:532(treat_as_nested)\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:657(show_col_idx_names)\n",
+ " 3 0.000 0.000 0.000 0.000 base.py:791(is_)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:751(_adjust_max_rows)\n",
+ " 1 0.000 0.000 0.000 0.000 generic.py:12070(sum)\n",
+ " 3 0.000 0.000 0.000 0.000 _strptime.py:565(_strptime_datetime)\n",
+ " 4 0.000 0.000 0.000 0.000 {built-in method sys.getsizeof}\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:629(is_truncated)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:1754(_set_names)\n",
+ " 1 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(array_equal)\n",
+ " 4 0.000 0.000 0.000 0.000 format.py:637(is_truncated_vertically)\n",
+ " 4 0.000 0.000 0.000 0.000 range.py:347()\n",
+ " 4 0.000 0.000 0.000 0.000 managers.py:920()\n",
+ " 2 0.000 0.000 0.000 0.000 common.py:521(is_string_dtype)\n",
+ " 1 0.000 0.000 0.001 0.001 common.py:1141(file_exists)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:7607()\n",
+ " 2 0.000 0.000 0.000 0.000 generic.py:4314(_set_is_copy)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:5153(_get_engine_target)\n",
+ " 5 0.000 0.000 0.000 0.000 managers.py:896(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:703(_get_comb_axis)\n",
+ " 11 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 fromnumeric.py:1038(argsort)\n",
+ " 4 0.000 0.000 0.000 0.000 blocks.py:1016(_slice)\n",
+ " 1 0.000 0.000 0.000 0.000 contextlib.py:132(__enter__)\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:649(has_column_names)\n",
+ " 1 0.000 0.000 0.000 0.000 expressions.py:226(evaluate)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:977(is_numeric_v_string_like)\n",
+ " 1 0.000 0.000 0.000 0.000 config.py:483(__exit__)\n",
+ " 3 0.000 0.000 0.000 0.000 generic.py:2073()\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:782(_rename)\n",
+ " 1 0.000 0.000 0.000 0.000 threading.py:1118(_wait_for_tstate_lock)\n",
+ " 3 0.000 0.000 0.000 0.000 nanops.py:72(check)\n",
+ " 1 0.000 0.000 0.000 0.000 missing.py:131(dispatch_fill_zeros)\n",
+ " 6 0.000 0.000 0.000 0.000 enum.py:1249(value)\n",
+ " 4 0.000 0.000 0.000 0.000 datetimes.py:156(should_cache)\n",
+ " 1 0.000 0.000 0.000 0.000 expressions.py:67(_evaluate_standard)\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:1179(get_buffer)\n",
+ " 3 0.000 0.000 0.000 0.000 blocks.py:192(_can_consolidate)\n",
+ " 1 0.000 0.000 0.000 0.000 iostream.py:127(_event_pipe)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:484(_view)\n",
+ " 4 0.000 0.000 0.000 0.000 generic.py:6177()\n",
+ " 1 0.000 0.000 0.000 0.000 :309(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'sum' of 'numpy.ndarray' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 managers.py:2233()\n",
+ " 14 0.000 0.000 0.000 0.000 base.py:6612(_maybe_cast_indexer)\n",
+ " 9 0.000 0.000 0.000 0.000 range.py:377(dtype)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:85(consensus_name_attr)\n",
+ " 1 0.000 0.000 0.000 0.000 _validators.py:450(check_dtype_backend)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:459(_engine_type)\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:455(newfunc)\n",
+ " 1 0.000 0.000 0.001 0.001 <__array_function__ internals>:177(vstack)\n",
+ " 6 0.000 0.000 0.000 0.000 common.py:1107()\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method _codecs.lookup}\n",
+ " 1 0.000 0.000 0.000 0.000 console.py:54(in_interactive_session)\n",
+ " 1 0.000 0.000 0.000 0.000 contextlib.py:287(helper)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:80(ensure_str)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:322(weekday)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'any' of 'numpy.ndarray' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:689(_get_result_dim)\n",
+ " 9 0.000 0.000 0.000 0.000 {pandas._libs.lib.item_from_zerodim}\n",
+ " 7 0.000 0.000 0.000 0.000 managers.py:335()\n",
+ " 4 0.000 0.000 0.000 0.000 config.py:663(_get_registered_option)\n",
+ " 6 0.000 0.000 0.000 0.000 concat.py:351(__init__)\n",
+ " 4 0.000 0.000 0.000 0.000 {method 'upper' of 'str' objects}\n",
+ " 12 0.000 0.000 0.000 0.000 base.py:363(ndim)\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:22(__init__)\n",
+ " 11 0.000 0.000 0.000 0.000 {method 'read' of '_io.StringIO' objects}\n",
+ " 7 0.000 0.000 0.000 0.000 {method 'write' of '_io.StringIO' objects}\n",
+ " 3 0.000 0.000 0.000 0.000 concat.py:167()\n",
+ " 1 0.000 0.000 0.000 0.000 :2(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 series.py:6195(sum)\n",
+ " 1 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(argsort)\n",
+ " 2 0.000 0.000 0.000 0.000 {built-in method builtins.next}\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:253(_get_values)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:1900(rename)\n",
+ " 3 0.000 0.000 0.000 0.000 range.py:281(start)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:346(shape)\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:324(_get_dtype_max)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'fill' of 'numpy.ndarray' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:303()\n",
+ " 2 0.000 0.000 0.000 0.000 generic.py:638(_info_axis)\n",
+ " 1 0.000 0.000 0.000 0.000 contextlib.py:141(__exit__)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:641(dimensions_info)\n",
+ " 1 0.000 0.000 0.000 0.000 generic.py:2015(empty)\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:5462()\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:747()\n",
+ " 1 0.000 0.000 0.000 0.000 dataclasses.py:1256(is_dataclass)\n",
+ " 4 0.000 0.000 0.000 0.000 {pandas._libs.algos.ensure_object}\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:208(isnum)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:1107(_maybe_memory_map)\n",
+ " 1 0.000 0.000 0.000 0.000 api.py:102()\n",
+ " 6 0.000 0.000 0.000 0.000 fromnumeric.py:2427(_all_dispatcher)\n",
+ " 8 0.000 0.000 0.000 0.000 multiarray.py:152(concatenate)\n",
+ " 10 0.000 0.000 0.000 0.000 {method 'isalpha' of 'str' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 construction.py:916()\n",
+ " 1 0.000 0.000 0.000 0.000 frame.py:1114(_info_repr)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 config.py:897(is_nonnegative_int)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:732(_calc_max_rows_fitted)\n",
+ " 6 0.000 0.000 0.000 0.000 common.py:175()\n",
+ " 2 0.000 0.000 0.000 0.000 common.py:171(not_none)\n",
+ " 1 0.000 0.000 0.000 0.000 shape_base.py:218(_vhstack_dispatcher)\n",
+ " 1 0.000 0.000 0.000 0.000 generic.py:1948(__iter__)\n",
+ " 1 0.000 0.000 0.000 0.000 <__array_function__ internals>:177(atleast_2d)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:946()\n",
+ " 6 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.RLock' objects}\n",
+ " 3 0.000 0.000 0.000 0.000 {method 'clear' of 'dict' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:342(ampm)\n",
+ " 2 0.000 0.000 0.000 0.000 series.py:3169()\n",
+ " 1 0.000 0.000 0.000 0.000 expressions.py:76(_can_use_numexpr)\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:1025(needs_i8_conversion)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:721(_calc_max_cols_fitted)\n",
+ " 9 0.000 0.000 0.000 0.000 contextlib.py:431(__enter__)\n",
+ " 3 0.000 0.000 0.000 0.000 range.py:316(step)\n",
+ " 1 0.000 0.000 0.000 0.000 _methods.py:55(_any)\n",
+ " 6 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_int_or_none}\n",
+ " 7 0.000 0.000 0.000 0.000 _json.py:1401()\n",
+ " 1 0.000 0.000 0.000 0.000 config.py:478()\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:631()\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:329(month)\n",
+ " 3 0.000 0.000 0.000 0.000 range.py:911()\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:623(should_show_dimensions)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:539()\n",
+ " 7 0.000 0.000 0.000 0.000 series.py:1381(_clear_item_cache)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:765(_is_in_terminal)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:319(jump)\n",
+ " 5 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects}\n",
+ " 3 0.000 0.000 0.000 0.000 frame.py:637(_constructor)\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:73()\n",
+ " 3 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.lock' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 concat.py:720()\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:213(isspace)\n",
+ " 4 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_bool}\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:689(_initialize_columns)\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method pandas._libs.lib.is_interval}\n",
+ " 5 0.000 0.000 0.000 0.000 {built-in method posix.fspath}\n",
+ " 2 0.000 0.000 0.000 0.000 {pandas._libs.lib.dtypes_all_equal}\n",
+ " 1 0.000 0.000 0.000 0.000 common.py:503(get_compression_method)\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method builtins.min}\n",
+ " 1 0.000 0.000 0.000 0.000 _methods.py:47(_sum)\n",
+ " 1 0.000 0.000 0.000 0.000 dispatch.py:17(should_extension_dispatch)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'getvalue' of '_io.StringIO' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:665(_initialize_sparsify)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:922()\n",
+ " 1 0.000 0.000 0.000 0.000 threading.py:568(is_set)\n",
+ " 1 0.000 0.000 0.000 0.000 managers.py:536(nblocks)\n",
+ " 1 0.000 0.000 0.000 0.000 shape_base.py:207(_arrays_for_stack_dispatcher)\n",
+ " 1 0.000 0.000 0.000 0.000 inference.py:306(is_named_tuple)\n",
+ " 1 0.000 0.000 0.000 0.000 string.py:63(_need_to_wrap_around)\n",
+ " 1 0.000 0.000 0.000 0.000 _validators.py:226(validate_bool_kwarg)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:974(dtype)\n",
+ " 3 0.000 0.000 0.000 0.000 range.py:299(stop)\n",
+ " 2 0.000 0.000 0.000 0.000 managers.py:235(items)\n",
+ " 1 0.000 0.000 0.000 0.000 {built-in method _codecs.lookup_error}\n",
+ " 2 0.000 0.000 0.000 0.000 indexing.py:2665(need_slice)\n",
+ " 3 0.000 0.000 0.000 0.000 {built-in method builtins.id}\n",
+ " 2 0.000 0.000 0.000 0.000 format.py:959()\n",
+ " 1 0.000 0.000 0.000 0.000 managers.py:2242()\n",
+ " 2 0.000 0.000 0.000 0.000 concat.py:766(_maybe_check_integrity)\n",
+ " 1 0.000 0.000 0.000 0.000 :260(__init__)\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:209(_maybe_get_mask)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 base.py:2756(_is_multi)\n",
+ " 1 0.000 0.000 0.000 0.000 function.py:64(__call__)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:697(_initialize_colspace)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'isdigit' of 'str' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 {built-in method numpy.asanyarray}\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'isspace' of 'str' objects}\n",
+ " 1 0.000 0.000 0.000 0.000 nanops.py:1491(_maybe_null_out)\n",
+ " 1 0.000 0.000 0.000 0.000 fromnumeric.py:1034(_argsort_dispatcher)\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:1056(_could_be_tzname)\n",
+ " 1 0.000 0.000 0.000 0.000 numeric.py:2403(_array_equal_dispatcher)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'values' of 'dict' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 {function FrozenList.__getitem__ at 0x7f0665c34860}\n",
+ " 1 0.000 0.000 0.000 0.000 _parser.py:186(__iter__)\n",
+ " 2 0.000 0.000 0.000 0.000 parse.py:108(_noop)\n",
+ " 1 0.000 0.000 0.000 0.000 {method 'reverse' of 'list' objects}\n",
+ " 2 0.000 0.000 0.000 0.000 _json.py:1102(__enter__)\n",
+ " 2 0.000 0.000 0.000 0.000 base.py:1954(nlevels)\n",
+ " 1 0.000 0.000 0.000 0.000 interactiveshell.py:637(get_ipython)\n",
+ " 1 0.000 0.000 0.000 0.000 range.py:228(_constructor)\n",
+ " 1 0.000 0.000 0.000 0.000 format.py:670(_initialize_formatters)\n",
+ " 1 0.000 0.000 0.000 0.000 {pandas._libs.lib.is_period}\n",
+ " 1 0.000 0.000 0.000 0.000 shape_base.py:77(_atleast_2d_dispatcher)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "%%prun\n",
+ "test_pandas()"
]
},
{
"cell_type": "markdown",
- "id": "04937c37-16b4-4e03-9cf2-ac704e48f60e",
+ "id": "18519e3f-6d5e-477a-a3e5-1fd0e0b30fcc",
"metadata": {},
"source": [
- "# Result\n",
+ "# Results\n",
"\n",
- "Polars and Pandas borth processed the same data (8000 rows, categorical data represented as strings).\n",
- "\n"
+ "Polars and Pandas both processed the same data (8000 rows, categorical data represented as strings).\n",
+ "\n",
+ "\n",
+ "## Versions\n",
+ "\n",
+ "\n",
+ "* Pandas: 2.1.4\n",
+ "* Polars: 0.20.26\n",
+ "\n",
+ "## Memory usage comparison\n",
+ "\n",
+ "File on disk: 6,0 MB (du -sh), 8000 rows, 7 columns. \n",
+ "\n",
+ "* Polars: 4,76 MB\n",
+ "* Pandas: 7,56 MB\n",
+ "\n",
+ "-> Polars was more memory efficient: ~ 1,6 times less memory\n",
+ "\n",
+ "\n",
+ "## Profile comparison\n",
+ "\n",
+ "* Polars: 256 function calls (253 primitive calls) in 0.020 seconds\n",
+ "* Pandas: 46681 function calls (46472 primitive calls) in 0.113 seconds\n",
+ "\n",
+ "-> Polars was ~ 5,6 times faster and needed ~ 180x less function and primitive calls. \n",
+ "\n",
+ "\n",
+ "## Conclusion\n",
+ "\n",
+ "Polars should be used whenever possible."
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bdbf106d-4117-491b-9773-85dcd9d5914c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {