Skip to content

Commit

Permalink
Optionally disable verify_mapping_compatibility in pandas_to_eland ap…
Browse files Browse the repository at this point in the history
…pend

Fixes elastic#412
  • Loading branch information
Ashton Sidhu committed Dec 29, 2021
1 parent 5bc1a82 commit 16c5753
Show file tree
Hide file tree
Showing 3 changed files with 394 additions and 9 deletions.
369 changes: 369 additions & 0 deletions eland/Untitled-1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,369 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"from datetime import datetime, timedelta\n",
"\n",
"dt = datetime.utcnow()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"df1 = pd.DataFrame(\n",
" {\n",
" \"a\": [1, 2, 3],\n",
" \"b\": [1.0, 2.0, 3.0],\n",
" \"c\": [\"A\", \"B\", \"C\"],\n",
" \"d\": [dt, dt + timedelta(1), dt + timedelta(2)],\n",
" },\n",
" index=[\"0\", \"1\", \"2\"],\n",
")\n",
"\n",
"df2 = pd.DataFrame({\"Z\": [3, 2, 1], \"a\": [4, 5, 6]}, index=[\"0\", \"1\", \"2\"])\n",
"es_index = \"test3\""
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"from elasticsearch import Elasticsearch\n",
"\n",
"es_client = Elasticsearch(\"http://localhost:9200\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"import eland as ed"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sidhuas/.pyenv/versions/3.9.1/envs/eland/lib/python3.9/site-packages/elasticsearch/connection/base.py:200: ElasticsearchWarning: Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.16/security-minimal-setup.html to enable security.\n",
" warnings.warn(message, category=ElasticsearchWarning)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
"<p>0 rows × 4 columns</p>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [a, b, c, d]\n",
"Index: []\n",
"\n",
"[0 rows x 4 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed.pandas_to_eland(df1, es_client, es_index, es_dropna=True, \n",
" es_if_exists='append', use_pandas_index_for_es_ids=False)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sidhuas/.pyenv/versions/3.9.1/envs/eland/lib/python3.9/site-packages/elasticsearch/connection/base.py:200: ElasticsearchWarning: Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.16/security-minimal-setup.html to enable security.\n",
" warnings.warn(message, category=ElasticsearchWarning)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Z</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>6KaKA34BeVG4RuLOG_yX</th>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>A</td>\n",
" <td>2021-12-29 00:13:36.956031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6aaKA34BeVG4RuLOG_yX</th>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2.0</td>\n",
" <td>B</td>\n",
" <td>2021-12-30 00:13:36.956031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6qaKA34BeVG4RuLOG_yX</th>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>3.0</td>\n",
" <td>C</td>\n",
" <td>2021-12-31 00:13:36.956031</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
"<p>3 rows × 5 columns</p>"
],
"text/plain": [
" Z a b c d\n",
"6KaKA34BeVG4RuLOG_yX NaN 1 1.0 A 2021-12-29 00:13:36.956031\n",
"6aaKA34BeVG4RuLOG_yX NaN 2 2.0 B 2021-12-30 00:13:36.956031\n",
"6qaKA34BeVG4RuLOG_yX NaN 3 3.0 C 2021-12-31 00:13:36.956031\n",
"\n",
"[3 rows x 5 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed.pandas_to_eland(df2, es_client, es_index, es_dropna=True, enforce_index_schema=False,\n",
" es_if_exists='append', use_pandas_index_for_es_ids=False)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sidhuas/.pyenv/versions/3.9.1/envs/eland/lib/python3.9/site-packages/elasticsearch/connection/base.py:200: ElasticsearchWarning: Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.16/security-minimal-setup.html to enable security.\n",
" warnings.warn(message, category=ElasticsearchWarning)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Z</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>6KaKA34BeVG4RuLOG_yX</th>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>A</td>\n",
" <td>2021-12-29 00:13:36.956031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6aaKA34BeVG4RuLOG_yX</th>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2.0</td>\n",
" <td>B</td>\n",
" <td>2021-12-30 00:13:36.956031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6qaKA34BeVG4RuLOG_yX</th>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>3.0</td>\n",
" <td>C</td>\n",
" <td>2021-12-31 00:13:36.956031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66aKA34BeVG4RuLOH_xK</th>\n",
" <td>3.0</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7KaKA34BeVG4RuLOH_xK</th>\n",
" <td>2.0</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7aaKA34BeVG4RuLOH_xK</th>\n",
" <td>1.0</td>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
"<p>6 rows × 5 columns</p>"
],
"text/plain": [
" Z a b c d\n",
"6KaKA34BeVG4RuLOG_yX NaN 1 1.0 A 2021-12-29 00:13:36.956031\n",
"6aaKA34BeVG4RuLOG_yX NaN 2 2.0 B 2021-12-30 00:13:36.956031\n",
"6qaKA34BeVG4RuLOG_yX NaN 3 3.0 C 2021-12-31 00:13:36.956031\n",
"66aKA34BeVG4RuLOH_xK 3.0 4 NaN NaN NaT\n",
"7KaKA34BeVG4RuLOH_xK 2.0 5 NaN NaN NaT\n",
"7aaKA34BeVG4RuLOH_xK 1.0 6 NaN NaN NaT\n",
"\n",
"[6 rows x 5 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed.DataFrame(es_client, es_index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "a4e377a64fa06eb866bfeb92d02adfe8420325b7057dd732fb302fa0a637b068"
},
"kernelspec": {
"display_name": "Python 3.9.1 64-bit ('eland': pyenv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
3 changes: 2 additions & 1 deletion eland/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def pandas_to_eland(
es_refresh: bool = False,
es_dropna: bool = False,
es_type_overrides: Optional[Mapping[str, str]] = None,
enforce_index_schema: bool = True,
thread_count: int = 4,
chunksize: Optional[int] = None,
use_pandas_index_for_es_ids: bool = True,
Expand Down Expand Up @@ -177,7 +178,7 @@ def pandas_to_eland(
es_client.indices.delete(index=es_dest_index)
es_api_compat(es_client.indices.create, index=es_dest_index, body=mapping)

elif es_if_exists == "append":
elif es_if_exists == "append" and enforce_index_schema:
dest_mapping = es_client.indices.get_mapping(index=es_dest_index)[
es_dest_index
]
Expand Down
Loading

0 comments on commit 16c5753

Please sign in to comment.